def _test_ad(self, sess, model, hp_dict, neval):
        """Test automatic differentiation.

        Args:
            model: Mode object.
            hp_dict: A dictionary of hyperparameter name and initial values.
        """
        hyperparams = dict(
            [(hp_name, model.optimizer.hyperparams[hp_name]) for hp_name in hp_dict.keys()])
        grads = model.optimizer.grads
        accumulators = model.optimizer.accumulators
        new_accumulators = model.optimizer.new_accumulators
        loss = model.cost

        # Build look ahead graph.
        look_ahead_ops, grad_ops, zero_out_ops = look_ahead_grads(
            hyperparams, grads, accumulators, new_accumulators, loss, dtype=self._dtype)

        # Gets variables to be checkpointed.
        ckpt_var_list = []
        for var_list in model.optimizer.accumulators.values():
            ckpt_var_list.extend(var_list)
        ckpt_var_list.append(model.global_step)

        # Build checkpoint ops.
        ckpt = build_checkpoint(ckpt_var_list)
        read_checkpoint_op = read_checkpoint(ckpt, ckpt_var_list)
        write_checkpoint_op = write_checkpoint(ckpt, ckpt_var_list)

        # Initialize weights parameters.
        sess.run(tf.global_variables_initializer())

        # Checkpoint weights and momentum parameters.
        sess.run(write_checkpoint_op)

        # Initialize hyperparameters.
        for hp_name, init_hp in hp_dict.items():
            model.optimizer.assign_hyperparam(sess, hp_name, init_hp)
        _, grad_hp = self.run_loss(sess, model, self._batch_size, look_ahead_ops, grad_ops)

        # Check the gradient of loss wrt. each hp.
        for ii, (hp_name, init_hp) in enumerate(hp_dict.items()):
            sess.run(read_checkpoint_op)
            sess.run(zero_out_ops)
            model.optimizer.assign_hyperparam(sess, hp_name, init_hp - self._eps)
            l1, _ = self.run_loss(sess, model, self._batch_size, look_ahead_ops, grad_ops)
            sess.run(read_checkpoint_op)
            sess.run(zero_out_ops)
            model.optimizer.assign_hyperparam(sess, hp_name, init_hp + self._eps)
            l2, _ = self.run_loss(sess, model, self._batch_size, look_ahead_ops, grad_ops)
            grad_hp_fd = (l2 - l1) / (2 * self._eps)
            model.optimizer.assign_hyperparam(sess, hp_name, init_hp)
            np.testing.assert_allclose(grad_hp[ii], grad_hp_fd, rtol=self._rtol, atol=self._atol)
Ejemplo n.º 2
0
def main_raxml_runner(args, op):
    """ Run pargenes from the parsed arguments op """
    start = time.time()
    output_dir = op.output_dir
    checkpoint_index = checkpoint.read_checkpoint(output_dir)
    if (os.path.exists(output_dir) and not op.do_continue):
        logger.info(
            "[Error] The output directory " + output_dir +
            " already exists. Please use another output directory or run with --continue."
        )
        sys.exit(1)
    commons.makedirs(output_dir)
    logger.init_logger(op.output_dir)
    print_header(args)
    msas = None
    logger.timed_log("end of MSAs initializations")
    scriptdir = os.path.dirname(os.path.realpath(__file__))
    modeltest_run_path = os.path.join(output_dir, "modeltest_run")
    raxml_run_path = os.path.join(output_dir, "mlsearch_run")
    binaries_dir = os.path.join(scriptdir, "..", "pargenes_binaries")
    print("Binaries directory: " + binaries_dir)
    if (op.scheduler != "split"):
        raxml_library = os.path.join(binaries_dir, "raxml-ng")
        modeltest_library = os.path.join(binaries_dir, "modeltest-ng")
    else:
        raxml_library = os.path.join(binaries_dir, "raxml-ng-mpi.so")
        modeltest_library = os.path.join(binaries_dir, "modeltest-ng-mpi.so")
    astral_jar = os.path.join(binaries_dir, "astral.jar")
    if (len(op.raxml_binary) > 1):
        raxml_library = op.raxml_binary
    if (len(op.modeltest_binary) > 1):
        modeltest_library = op.modeltest_binary
    if (len(op.astral_jar) > 1):
        astral_jar = op.astral_jar
    astral_jar = os.path.abspath(astral_jar)
    if (checkpoint_index < 1):
        msas = commons.init_msas(op)
        raxml.run_parsing_step(msas, raxml_library, op.scheduler,
                               os.path.join(output_dir, "parse_run"), op.cores,
                               op)
        raxml.analyse_parsed_msas(msas, op)
        checkpoint.write_checkpoint(output_dir, 1)
        logger.timed_log("end of parsing mpi-scheduler run")
    else:
        msas = raxml.load_msas(op)
    if (op.dry_run):
        logger.info("End of the dry run. Exiting")
        return 0
    logger.timed_log("end of anlysing parsing results")
    if (op.use_modeltest):
        if (checkpoint_index < 2):
            modeltest.run(msas, output_dir, modeltest_library,
                          modeltest_run_path, op)
            logger.timed_log("end of modeltest mpi-scheduler run")
            modeltest.parse_modeltest_results(op.modeltest_criteria, msas,
                                              output_dir)
            logger.timed_log("end of parsing  modeltest results")
            # then recompute the binary MSA files to put the correct model, and reevaluate the MSA sizes with the new models
            shutil.move(os.path.join(output_dir, "parse_run"),
                        os.path.join(output_dir, "old_parse_run"))
            raxml.run_parsing_step(msas, raxml_library, op.scheduler,
                                   os.path.join(output_dir, "parse_run"),
                                   op.cores, op)
            raxml.analyse_parsed_msas(msas, op)
            logger.timed_log("end of the second parsing step")
            checkpoint.write_checkpoint(output_dir, 2)
    if (checkpoint_index < 3):
        raxml.run(msas, op.random_starting_trees, op.parsimony_starting_trees,
                  op.bootstraps, raxml_library, op.scheduler, raxml_run_path,
                  op.cores, op)
        logger.timed_log("end of mlsearch mpi-scheduler run")
        checkpoint.write_checkpoint(output_dir, 3)
    if (op.random_starting_trees + op.parsimony_starting_trees > 1):
        if (checkpoint_index < 4):
            raxml.select_best_ml_tree(msas, op)
            logger.timed_log("end of selecting the best ML tree")
            checkpoint.write_checkpoint(output_dir, 4)
    if (op.bootstraps != 0):
        if (checkpoint_index < 5):
            bootstraps.concatenate_bootstraps(output_dir, min(16, op.cores))
            logger.timed_log("end of bootstraps concatenation")
            checkpoint.write_checkpoint(output_dir, 5)
        starting_trees = op.random_starting_trees + op.parsimony_starting_trees
        if (checkpoint_index < 6 and starting_trees > 0):
            bootstraps.run(msas, output_dir, raxml_library, op.scheduler,
                           os.path.join(output_dir, "supports_run"), op.cores,
                           op)
            logger.timed_log("end of supports mpi-scheduler run")
            checkpoint.write_checkpoint(output_dir, 6)
    if (op.use_astral):
        if (checkpoint_index < 7):
            astral.run_astral_pargenes(astral_jar, op)
            checkpoint.write_checkpoint(output_dir, 7)
    all_invalid = True
    for name, msa in msas.items():
        if (msa.valid):
            all_invalid = False
    if (all_invalid):
        print("[Error] ParGenes failed to analyze all MSAs.")
        report.report_and_exit(op.output_dir, 1)
    print_stats(op)
    return 0
Ejemplo n.º 3
0
def online_smd(dataset_name='mnist',
               init_lr=1e-1,
               momentum=0.001,
               num_steps=20000,
               middle_decay=False,
               steps_per_update=10,
               smd=True,
               steps_look_ahead=5,
               num_meta_steps=10,
               steps_per_eval=100,
               batch_size=100,
               meta_lr=1e-2,
               print_step=False,
               effective_lr=True,
               negative_momentum=True,
               optimizer='momentum',
               stochastic=True,
               exp_folder='.'):
    """Train an MLP for MNIST.

    Args:
        dataset_name: String. Name of the dataset.
        init_lr: Float. Initial learning rate, default 0.1.
        momentum: Float. Initial momentum, default 0.9.
        num_steps: Int. Total number of steps, default 20000.
        middle_decay: Whether applying manual learning rate decay to 1e-4 from the middle, default False.
        steps_per_update: Int. Number of steps per update, default 10.
        smd: Bool. Whether run SMD.
        steps_look_ahead: Int. Number of steps to look ahead, default 5.
        num_meta_steps: Int. Number of meta steps, default 10.
        steps_per_eval: Int. Number of training steps per evaluation, default 100.
        batch_size: Int. Mini-batch size, default 100.
        meta_lr: Float. Meta learning rate, default 1e-2.
        print_step: Bool. Whether to print loss during training, default True.
        effective_lr: Bool. Whether to re-parameterize learning rate as lr / (1 - momentum), default True.
        negative_momentum: Bool. Whether to re-parameterize momentum as (1 - momentum), default True.
        optimizer: String. Name of the optimizer. Options: `momentum`, `adam, default `momentum`.
        stochastic: Bool. Whether to do stochastic or deterministic look ahead, default True.

    Returns:
        results: Results tuple object.
    """
    dataset = get_dataset(dataset_name)
    dataset_train = get_dataset(
        dataset_name)  # For evaluate training progress (full epoch).
    dataset_test = get_dataset(
        dataset_name, test=True)  # For evaluate test progress (full epoch).

    if dataset_name == 'mnist':
        input_shape = [None, 28, 28, 1]
    elif dataset_name.startswith('cifar'):
        input_shape = [None, 32, 32, 3]

    x = tf.placeholder(tf.float32, input_shape, name="x")
    y = tf.placeholder(tf.int64, [None], name="y")

    if effective_lr:
        init_lr_ = init_lr / (1.0 - momentum)
    else:
        init_lr_ = init_lr

    if negative_momentum:
        init_mom_ = 1.0 - momentum
    else:
        init_mom_ = momentum
    if dataset_name == 'mnist':
        config = get_mnist_mlp_config(
            init_lr_,
            init_mom_,
            effective_lr=effective_lr,
            negative_momentum=negative_momentum)
    elif dataset_name == 'cifar-10':
        config = get_cifar_cnn_config(
            init_lr_,
            init_mom_,
            effective_lr=effective_lr,
            negative_momentum=negative_momentum)
    else:
        raise NotImplemented
    with tf.name_scope('Train'):
        with tf.variable_scope('Model'):
            if dataset_name == 'mnist':
                m = get_mnist_mlp_model(
                    config, x, y, optimizer=optimizer, training=True)
                model = m
            elif dataset_name == 'cifar-10':
                m = get_cifar_cnn_model(
                    config, x, y, optimizer=optimizer, training=True)
                model = m
    with tf.name_scope('Test'):
        with tf.variable_scope('Model', reuse=True):
            if dataset_name == 'mnist':
                mtest = get_mnist_mlp_model(config, x, y, training=False)
            elif dataset_name == 'cifar-10':
                mtest = get_cifar_cnn_model(config, x, y, training=False)

    final_lr = 1e-4
    midpoint = num_steps // 2

    if dataset_name == 'mnist':
        num_train = 60000
        num_test = 10000
    elif dataset_name.startswith('cifar'):
        num_train = 50000
        num_test = 10000

    lr_ = init_lr_
    mom_ = init_mom_
    bsize = batch_size
    steps_per_epoch = num_train // bsize
    steps_test_per_epoch = num_test // bsize

    train_xent_list = []
    train_acc_list = []
    test_xent_list = []
    test_acc_list = []
    lr_list = []
    mom_list = []
    step_list = []
    log.info(
        'Applying decay at midpoint with final learning rate = {:.3e}'.format(
            final_lr))

    if 'momentum' in optimizer:
        mom_name = 'mom'
    elif 'adam' in optimizer:
        mom_name = 'beta1'
    else:
        raise ValueError('Unknown optimizer')
    hp_dict = {'lr': init_lr} #, mom_name: momentum}
    hp_names = hp_dict.keys()
    hyperparams = dict([(hp_name, model.optimizer.hyperparams[hp_name])
                        for hp_name in hp_names])
    grads = model.optimizer.grads
    accumulators = model.optimizer.accumulators
    new_accumulators = model.optimizer.new_accumulators
    loss = model.cost

    # Build look ahead graph.
    look_ahead_ops, hp_grad_ops, zero_out_ops = look_ahead_grads(
        hyperparams, grads, accumulators, new_accumulators, loss)

    # Meta optimizer, use Adam on the log space.
    meta_opt = LogOptimizer(tf.train.AdamOptimizer(meta_lr))
    hp = [model.optimizer.hyperparams[hp_name] for hp_name in hp_names]
    hp_grads_dict = {
        'lr': tf.placeholder(tf.float32, [], name='lr_grad'),
        # mom_name: tf.placeholder(
        #     tf.float32, [], name='{}_grad'.format(mom_name))
    }
    hp_grads_plh = [hp_grads_dict[hp_name] for hp_name in hp_names]
    hp_grads_and_vars = list(zip(hp_grads_plh, hp))
    cgrad = {'lr': (-1e1, 1e1)} #, mom_name: (-1e1, 1e1)}
    cval = {'lr': (1e-4, 1e1)} #, mom_name: (1e-4, 1e0)}
    cgrad_ = [cgrad[hp_name] for hp_name in hp_names]
    cval_ = [cval[hp_name] for hp_name in hp_names]
    meta_train_op = meta_opt.apply_gradients(
        hp_grads_and_vars, clip_gradients=cgrad_, clip_values=cval_)

    var_list = tf.global_variables()
    ckpt = build_checkpoint(tf.global_variables())
    write_op = write_checkpoint(ckpt, var_list)
    read_op = read_checkpoint(ckpt, var_list)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        exp_logger = _get_exp_logger(sess, exp_folder)

        def log_hp(hp_dict):
            lr_ = hp_dict['lr']
            mom_ = hp_dict['mom']
            # Log current learning rate and momentum.
            if negative_momentum:
                exp_logger.log(ii, 'mom', 1.0 - mom_)
                exp_logger.log(ii, 'log neg mom', np.log10(mom_))
                mom__ = 1.0 - mom_
            else:
                exp_logger.log(ii, 'mom', mom_)
                exp_logger.log(ii, 'log neg mom', np.log10(1.0 - mom_))
                mom__ = mom_

            if effective_lr:
                lr__ = lr_ * (1.0 - mom__)
                eflr_ = lr_
            else:
                lr__ = lr_
                eflr_ = lr_ / (1.0 - mom__)
            exp_logger.log(ii, 'eff lr', eflr_)
            exp_logger.log(ii, 'log eff lr', np.log10(eflr_))
            exp_logger.log(ii, 'lr', lr__)
            exp_logger.log(ii, 'log lr', np.log10(lr__))
            exp_logger.flush()
            return lr__, mom__

        # Assign initial learning rate and momentum.
        m.optimizer.assign_hyperparam(sess, 'lr', lr_)
        m.optimizer.assign_hyperparam(sess, 'mom', mom_)
        train_iter = six.moves.xrange(num_steps)
        if not print_step:
            train_iter = tqdm(train_iter, ncols=0)
        for ii in train_iter:
            # Meta-optimization loop.
            if ii == 0 or ii % steps_per_update == 0:
                if ii < midpoint and smd:
                    if stochastic:
                        data_list = [
                            dataset.next_batch(bsize)
                            for step in six.moves.xrange(steps_look_ahead)
                        ]
                        # Take next few batches for last step evaluation.
                        eval_data_list = [
                            dataset.next_batch(bsize)
                            for step in six.moves.xrange(steps_look_ahead)
                        ]
                    else:
                        data_entry = dataset.next_batch(bsize)
                        data_list = [data_entry] * steps_look_ahead
                        # Use the deterministic batch for last step evaluation.
                        eval_data_list = [data_list[0]]
                    sess.run(write_op)
                    for ms in six.moves.xrange(num_meta_steps):
                        cost, hp_dict = meta_step(sess, model, data_list,
                                                  look_ahead_ops, hp_grad_ops,
                                                  hp_grads_plh, meta_train_op,
                                                  eval_data_list)
                        sess.run(read_op)
                        for hpname, hpval in hp_dict.items():
                            model.optimizer.assign_hyperparam(
                                sess, hpname, hpval)
                    lr_ = hp_dict['lr']
                    # mom_ = hp_dict['mom']
                else:
                    hp_dict = sess.run(model.optimizer.hyperparams)
                lr_log, mom_log = log_hp(hp_dict)
                lr_list.append(lr_log)
                mom_list.append(mom_log)

            if ii == midpoint // 2:
                m.optimizer.assign_hyperparam(sess, 'mom', 1 - 0.9)

            if ii == midpoint:
                lr_before_mid = hp_dict['lr']
                tau = (num_steps - midpoint) / np.log(lr_before_mid / final_lr)

            if ii > midpoint:
                lr_ = np.exp(-(ii - midpoint) / tau) * lr_before_mid
                m.optimizer.assign_hyperparam(sess, 'lr', lr_)

            # Run regular training.
            if lr_ > 1e-6:
                # Use CBL for first half of training
                xd, yd = data_entry if (smd and not stochastic and ii < midpoint) else dataset.next_batch(bsize)
                cost_, _ = sess.run(
                    [m.cost, m.train_op], feed_dict={
                        m.x: xd,
                        m.y: yd
                    })
                if ii < midpoint:
                    sess.run(m._retrieve_ema_op)

            # Evaluate every certain number of steps.
            if ii == 0 or (ii + 1) % steps_per_eval == 0:
                test_acc = 0.0
                test_xent = 0.0
                train_acc = 0.0
                train_xent = 0.0

                # Report full epoch training loss.
                for jj in six.moves.xrange(steps_per_epoch):
                    xd, yd = dataset_train.next_batch(bsize)
                    xent_, acc_ = sess.run(
                        [m.cost, m.acc], feed_dict={
                            x: xd,
                            y: yd
                        })
                    train_xent += xent_ / float(steps_per_epoch)
                    train_acc += acc_ / float(steps_per_epoch)
                step_list.append(ii + 1)
                train_xent_list.append(train_xent)
                train_acc_list.append(train_acc)
                dataset_train.reset()

                # Report full epoch validation loss.
                for jj in six.moves.xrange(steps_test_per_epoch):
                    xd, yd = dataset_test.next_batch(bsize)
                    xent_, acc_ = sess.run(
                        [mtest.cost, mtest.acc], feed_dict={
                            x: xd,
                            y: yd
                        })
                    test_xent += xent_ / float(steps_test_per_epoch)
                    test_acc += acc_ / float(steps_test_per_epoch)
                test_xent_list.append(test_xent)
                test_acc_list.append(test_acc)
                dataset_test.reset()

                # Log training progress.
                exp_logger.log(ii, 'train loss', train_xent)
                exp_logger.log(ii, 'log train loss', np.log10(train_xent))
                exp_logger.log(ii, 'test loss', test_xent)
                exp_logger.log(ii, 'log test loss', np.log10(test_xent))
                exp_logger.log(ii, 'train acc', train_acc)
                exp_logger.log(ii, 'test acc', test_acc)
                exp_logger.flush()

                if print_step:
                    log.info((
                        'Steps {:d} T Xent {:.3e} T Acc {:.3f} V Xent {:.3e} V Acc {:.3f} '
                        'LR {:.3e}').format(ii + 1, train_xent,
                                            train_acc * 100.0, test_xent,
                                            test_acc * 100.0, lr_))

    return Results(
        step=np.array(step_list),
        train_xent=np.array(train_xent_list),
        train_acc=np.array(train_acc_list),
        test_xent=np.array(test_xent_list),
        test_acc=np.array(test_acc_list),
        lr=np.array(lr_list),
        momentum=np.array(mom_list))
def run_offline_smd(num_steps,
                    init_lr,
                    init_decay,
                    meta_lr,
                    num_meta_steps,
                    momentum=MOMENTUM,
                    effective_lr=False,
                    negative_momentum=False,
                    pretrain_ckpt=None,
                    output_fname=None,
                    seed=0):
    """Run offline SMD experiments.

    Args:
        init_lr: Initial learning rate.
        init_decay: Initial decay constant.
        data_list: List of tuples of inputs and labels.
        meta_lr: Float. Meta descent learning rate.
        num_meta_steps: Int. Number of meta descent steps.
        momentum: Float. Momentum.
        effective_lr: Bool. Whether to optimize in the effective LR space.
        negative_momentum: Bool. Whether to optimize in the negative momentum space.
    """
    bsize = BATCH_SIZE
    if output_fname is not None:
        log_folder = os.path.dirname(output_fname)
    else:
        log_folder = os.path.join('results', 'mnist', 'offline', 'smd')
        log_folder = os.path.join(log_folder, _get_run_number(log_folder))
    if not os.path.exists(log_folder):
        os.makedirs(log_folder)
    with tf.Graph().as_default(), tf.Session() as sess:
        dataset = get_dataset('mnist')
        exp_logger = _get_exp_logger(sess, log_folder)
        if effective_lr:
            init_lr_ = init_lr / float(1.0 - momentum)
        else:
            init_lr_ = init_lr

        if negative_momentum:
            init_mom_ = 1.0 - momentum
        else:
            init_mom_ = momentum

        config = get_mnist_mlp_config(
            init_lr_,
            init_mom_,
            decay=init_decay,
            effective_lr=effective_lr,
            negative_momentum=negative_momentum)
        x = tf.placeholder(tf.float32, [None, 28, 28, 1], name="x")
        y = tf.placeholder(tf.int64, [None], name="y")
        with tf.name_scope('Train'):
            with tf.variable_scope('Model'):
                model = get_mnist_mlp_model(
                    config,
                    x,
                    y,
                    optimizer='momentum_inv_decay',
                    training=True)
        all_vars = tf.global_variables()
        var_to_restore = list(
            filter(lambda x: 'momentum' not in x.name.lower(), all_vars))
        var_to_restore = list(
            filter(lambda x: 'global_step' not in x.name.lower(),
                   var_to_restore))
        var_to_restore = list(
            filter(lambda x: 'lr' not in x.name.lower(), var_to_restore))
        var_to_restore = list(
            filter(lambda x: 'mom' not in x.name.lower(), var_to_restore))
        var_to_restore = list(
            filter(lambda x: 'decay' not in x.name.lower(), var_to_restore))
        saver = tf.train.Saver(var_to_restore)
        rnd = np.random.RandomState(seed)

        hp_dict = {'lr': init_lr, 'decay': init_decay}
        hp_names = hp_dict.keys()
        hyperparams = dict([(hp_name, model.optimizer.hyperparams[hp_name])
                            for hp_name in hp_names])
        grads = model.optimizer.grads
        accumulators = model.optimizer.accumulators
        new_accumulators = model.optimizer.new_accumulators
        loss = model.cost

        # Build look ahead graph.
        look_ahead_ops, hp_grad_ops, zero_out_ops = look_ahead_grads(
            hyperparams, grads, accumulators, new_accumulators, loss)

        # Meta optimizer, use Adam on the log space.
        # meta_opt = LogOptimizer(tf.train.AdamOptimizer(meta_lr))
        meta_opt = LogOptimizer(tf.train.MomentumOptimizer(meta_lr, 0.9))
        hp = [model.optimizer.hyperparams[hp_name] for hp_name in hp_names]
        hp_grads_dict = {
            'lr': tf.placeholder(tf.float32, [], name='lr_grad'),
            'decay': tf.placeholder(tf.float32, [], name='decay_grad')
        }
        hp_grads_plh = [hp_grads_dict[hp_name] for hp_name in hp_names]
        hp_grads_and_vars = list(zip(hp_grads_plh, hp))
        cgrad = {'lr': (-1e1, 1e1), 'decay': (-1e1, 1e1)}
        cval = {'lr': (1e-4, 1e1), 'decay': (1e-4, 1e3)}
        cgrad_ = [cgrad[hp_name] for hp_name in hp_names]
        cval_ = [cval[hp_name] for hp_name in hp_names]
        meta_train_op = meta_opt.apply_gradients(
            hp_grads_and_vars, clip_gradients=cgrad_, clip_values=cval_)

        if output_fname is not None:
            msg = '{} exists, please remove previous experiment data.'.format(
                output_fname)
            assert not os.path.exists(output_fname), msg
            log.info('Writing to {}'.format(output_fname))
            with open(output_fname, 'w') as f:
                f.write('Step,LR,Mom,Decay,Loss\n')

        # Initialize all variables.
        sess.run(tf.global_variables_initializer())
        var_list = tf.global_variables()
        if pretrain_ckpt is not None:
            saver.restore(sess, pretrain_ckpt)
        ckpt = build_checkpoint(var_list)
        write_op = write_checkpoint(ckpt, var_list)
        read_op = read_checkpoint(ckpt, var_list)
        sess.run(write_op)

        # Progress bar.
        it = tqdm(
            six.moves.xrange(num_meta_steps),
            ncols=0,
            desc='look_{}_ilr_{:.0e}_decay_{:.0e}'.format(
                num_steps, init_lr, init_decay))

        for run in it:
            # Stochastic data list makes the SMD converge faster.
            data_list = [
                dataset.next_batch(bsize)
                for step in six.moves.xrange(num_steps)
            ]
            eval_data_list = [
                dataset.next_batch(bsize)
                for step in six.moves.xrange(NUM_TRAIN // bsize)
            ]
            # Run meta descent step.
            cost, hp_dict = meta_step(sess, model, data_list, look_ahead_ops,
                                      hp_grad_ops, hp_grads_plh, meta_train_op,
                                      eval_data_list)

            # Early stop if hits NaN.
            if np.isnan(cost):
                break

            # Restore parameters.
            sess.run(read_op)
            for hpname, hpval in hp_dict.items():
                model.optimizer.assign_hyperparam(sess, hpname, hpval)

            # Read out hyperparameters in normal parameterization.
            if negative_momentum:
                mom = 1 - hp_dict['mom']
            else:
                mom = hp_dict['mom']
            if effective_lr:
                lr = hp_dict['lr'] * (1 - mom)
            else:
                lr = hp_dict['lr']

            # Write to logs.
            if output_fname is not None:
                with open(output_fname, 'a') as f:
                    f.write('{:d},{:f},{:f},{:f},{:f}\n'.format(
                        run, lr, hp_dict['mom'], hp_dict['decay'], cost))
            # Log to TensorBoard.
            exp_logger.log(run, 'lr', lr)
            exp_logger.log(run, 'decay', hp_dict['decay'])
            exp_logger.log(run, 'log loss', np.log10(cost))
            exp_logger.flush()

            # Update progress bar.
            it.set_postfix(
                lr='{:.3e}'.format(lr),
                decay='{:.3e}'.format(hp_dict['decay']),
                loss='{:.3e}'.format(cost))

        exp_logger.close()
Ejemplo n.º 5
0
def main_raxml_runner(op):
    """ Run pargenes from the parsed arguments op """
    start = time.time()
    output_dir = op.output_dir
    checkpoint_index = checkpoint.read_checkpoint(output_dir)
    print("Checkpoint: " + str(checkpoint_index))
    if (os.path.exists(output_dir) and not op.do_continue):
        print(
            "[Error] The output directory " + output_dir +
            " already exists. Please use another output directory or run with --continue."
        )
        sys.exit(1)
    commons.makedirs(output_dir)
    logs = commons.get_log_file(output_dir, "pargenes_logs")
    print("Redirecting logs to " + logs)
    sys.stdout = open(logs, "w")
    print_header()
    msas = commons.init_msas(op)
    timed_print(start, "end of MSAs initializations")
    scriptdir = os.path.dirname(os.path.realpath(__file__))
    modeltest_run_path = os.path.join(output_dir, "modeltest_run")
    raxml_run_path = os.path.join(output_dir, "mlsearch_run")
    if (op.scheduler == "onecore"):
        raxml_library = os.path.join(scriptdir, "..", "raxml-ng", "bin",
                                     "raxml-ng")
        modeltest_library = os.path.join(scriptdir, "..", "modeltest", "bin",
                                         "modeltest-ng")
    else:
        raxml_library = os.path.join(scriptdir, "..", "raxml-ng", "bin",
                                     "raxml-ng-mpi.so")
        modeltest_library = os.path.join(scriptdir, "..", "modeltest", "build",
                                         "src", "modeltest-ng-mpi.so")
    if (checkpoint_index < 1):
        raxml.run_parsing_step(msas, raxml_library, op.scheduler,
                               os.path.join(output_dir, "parse_run"), op.cores,
                               op)
        checkpoint.write_checkpoint(output_dir, 1)
        timed_print(start, "end of parsing mpi-scheduler run")
    raxml.analyse_parsed_msas(msas, op, output_dir)
    if (op.dry_run):
        print("End of the dry run. Exiting")
        return 0
    timed_print(start, "end of anlysing parsing results")
    if (op.use_modeltest):
        if (checkpoint_index < 2):
            modeltest.run(msas, output_dir, modeltest_library,
                          modeltest_run_path, op)
            timed_print(start, "end of modeltest mpi-scheduler run")
            modeltest.parse_modeltest_results(op.modeltest_criteria, msas,
                                              output_dir)
            timed_print(start, "end of parsing  modeltest results")
            # then recompute the binary MSA files to put the correct model, and reevaluate the MSA sizes with the new models
            shutil.move(os.path.join(output_dir, "parse_run"),
                        os.path.join(output_dir, "old_parse_run"))
            raxml.run_parsing_step(msas, raxml_library, op.scheduler,
                                   os.path.join(output_dir, "parse_run"),
                                   op.cores, op)
            raxml.analyse_parsed_msas(msas, op, output_dir)
            timed_print(start, "end of the second parsing step")
            checkpoint.write_checkpoint(output_dir, 2)
    if (checkpoint_index < 3):
        raxml.run(msas, op.random_starting_trees, op.parsimony_starting_trees,
                  op.bootstraps, raxml_library, op.scheduler, raxml_run_path,
                  op.cores, op)
        timed_print(start, "end of mlsearch mpi-scheduler run")
        checkpoint.write_checkpoint(output_dir, 3)
    if (op.random_starting_trees + op.parsimony_starting_trees > 1):
        if (checkpoint_index < 4):
            raxml.select_best_ml_tree(msas, op)
            timed_print(start, "end of selecting the best ML tree")
            checkpoint.write_checkpoint(output_dir, 4)
    if (op.bootstraps != 0):
        if (checkpoint_index < 5):
            bootstraps.concatenate_bootstraps(output_dir, min(16, op.cores))
            timed_print(start, "end of bootstraps concatenation")
            checkpoint.write_checkpoint(output_dir, 5)
        if (checkpoint_index < 6):
            bootstraps.run(output_dir, raxml_library, op.scheduler,
                           os.path.join(output_dir, "supports_run"), op.cores,
                           op)
            timed_print(start, "end of supports mpi-scheduler run")
            checkpoint.write_checkpoint(output_dir, 6)
    return 0