Beispiel #1
0
def darts_search_main(hparams):
    components = mu.main_entry(hparams, train=True, net_code='darts')
    datasets = components['datasets']

    logging.info('Building model')
    model = DartsChildNet(hparams)
    if UseParallel:
        model = ParalleledDartsChildNet(model, output_device=hparams.device_id)

    # [NOTE]: In DARTS, criterion is fixed to CrossEntropyLoss.
    criterion = build_criterion(hparams, datasets.source_dict,
                                datasets.target_dict)
    mu.logging_model_criterion(model, criterion)

    trainer = DartsTrainer(hparams, model, criterion)
    mu.logging_training_stats(hparams)

    epoch, batch_offset = mu.prepare_checkpoint(hparams, trainer)

    # Send a dummy batch to warm the caching allocator
    dummy_batch = datasets.get_dataset('train').get_dummy_batch(
        hparams.max_tokens,
        trainer.get_model().max_positions())
    trainer.dummy_train_step(dummy_batch)

    # Train until the learning rate gets too small
    max_epoch = hparams.max_epoch or math.inf
    max_update = hparams.max_update or math.inf
    lr = trainer.get_lr()
    train_meter = StopwatchMeter()
    train_meter.start()
    while lr > hparams.min_lr and epoch <= max_epoch:
        # Train for one epoch
        train(hparams, trainer, datasets, epoch, batch_offset)

        # Evaluate on validate set
        if epoch % hparams.validate_interval == 0:
            for k, subset in enumerate(hparams.valid_subset.split(',')):
                val_loss = mu.validate(hparams, trainer, datasets, subset,
                                       epoch)
                if k == 0:
                    # Only use first validation loss to update the learning schedule
                    lr = trainer.lr_step(epoch, val_loss)

                    # save checkpoint and net code
                    if not hparams.no_save:
                        mu.save_checkpoint(trainer, hparams, epoch, 0,
                                           val_loss)
                        save_net_code(trainer, hparams, epoch, 0, val_loss)
        else:
            lr = trainer.lr_step(epoch)

        epoch += 1
        batch_offset = 0

        if trainer.get_num_updates() >= max_update:
            break
    train_meter.stop()
    logging.info('Training done in {:.1f} seconds'.format(train_meter.sum))
Beispiel #2
0
def nao_search_main(hparams):
    components = mu.main_entry(hparams,
                               train=True,
                               net_code='nao_train',
                               hparams_ppp=nao_utils.hparams_ppp_nao)
    datasets = components['datasets']

    logging.info('Building model')
    criterion = build_criterion(hparams, datasets.source_dict,
                                datasets.target_dict)
    trainer = NAOTrainer(hparams, criterion)
    model = trainer.get_model()
    mu.logging_model_criterion(model, criterion, logging_params=False)
    mu.logging_training_stats(hparams)

    # Used to skip child training and evaluation in debug mode.
    debug_epd = False

    max_ctrl_step = hparams.max_ctrl_step or math.inf
    ctrl_step = 1
    train_meter = StopwatchMeter()
    train_meter.start()
    while ctrl_step <= max_ctrl_step:
        logging.info('Training step {}'.format(ctrl_step))
        trainer.set_seed(ctrl_step)

        # Train child model.
        trainer.init_arch_pool()

        if debug_epd:
            valid_acc_list = list(np.linspace(0.0, 1.0,
                                              len(trainer.arch_pool)))
        else:
            trainer.train_children(datasets)

            # Evaluate seed arches.
            valid_acc_list = trainer.eval_children(datasets,
                                                   compute_loss=False)

        # Output arches and evaluated error rate.
        old_arches = trainer.arch_pool
        # Error rate list.
        old_arches_perf = [1.0 - i for i in valid_acc_list]

        # Sort old arches.
        old_arches_sorted_indices = np.argsort(old_arches_perf)
        old_arches = [old_arches[i] for i in old_arches_sorted_indices]
        old_arches_perf = [
            old_arches_perf[i] for i in old_arches_sorted_indices
        ]

        # Save old arches and performances in order.
        nao_utils.save_arches(hparams, ctrl_step, old_arches, old_arches_perf)

        # Train encoder-predictor-decoder.
        trainer.controller_train_step(old_arches, old_arches_perf)

        # Generate new arches.
        trainer.controller_generate_step(old_arches)

        # Save updated arches after generate step.
        nao_utils.save_arches(hparams,
                              ctrl_step,
                              trainer.arch_pool,
                              arches_perf=None,
                              after_gen=True)

        # TODO: Save shared weights.

        ctrl_step += 1
    train_meter.stop()
    logging.info('Training done in {:.1f} seconds'.format(train_meter.sum))
Beispiel #3
0
def nao_epd_main(hparams):
    # TODO: Add stand alone training script of NaoEpd.
    import json
    from libs.layers.net_code import NetCode

    DirName = 'D:/Users/v-yaf/DataTransfer/NAS4Text/arch_pool_results'
    iteration = hparams.sa_iteration
    subset = 'dev'

    components = mu.main_entry(hparams,
                               train=True,
                               net_code='nao_train_standalone',
                               hparams_ppp=nao_utils.hparams_ppp_nao)
    datasets = components['datasets']

    logging.info('Building model')
    criterion = build_criterion(hparams, datasets.source_dict,
                                datasets.target_dict)
    trainer = NAOTrainer(hparams, criterion, only_epd_cuda=True)
    model = trainer.get_model()
    mu.logging_model_criterion(model, criterion, logging_params=False)
    mu.logging_training_stats(hparams)

    TargetFiles = {
        'x':
        os.path.join(
            DirName, 'arches-{}-{}-{}.txt'.format(hparams.hparams_set, subset,
                                                  iteration)),
        'y':
        os.path.join(
            DirName, 'bleus-{}-{}-{}.txt'.format(hparams.hparams_set, subset,
                                                 iteration)),
        'output':
        os.path.join(
            DirName, 'arches-{}-{}-{}.txt'.format(hparams.hparams_set, subset,
                                                  iteration)),
    }

    with open(TargetFiles['x'], 'r', encoding='utf-8') as f_x, \
            open(TargetFiles['y'], 'r', encoding='utf-8') as f_y:
        arch_pool = [NetCode(json.loads(line)) for line in f_x]
        perf_pool = [1.0 - float(line.strip()) / 100.0 for line in f_y]

    # Sort old arches.
    arches_sorted_indices = np.argsort(perf_pool)
    arch_pool = [arch_pool[i] for i in arches_sorted_indices]
    perf_pool = [perf_pool[i] for i in arches_sorted_indices]

    trainer.arch_pool = arch_pool

    if hparams.reload:
        trainer.load_epd()
    else:
        split_test = True
        trainer.controller_train_step(arch_pool,
                                      perf_pool,
                                      split_test=split_test)

    new_arch_pool = trainer.controller_generate_step(arch_pool,
                                                     log_compare_perf=True)
    # [NOTE]: Only save unique arches.
    unique_arch_pool = []
    for arch in new_arch_pool:
        if not any(arch.fast_eq(a) for a in arch_pool):
            unique_arch_pool.append(arch)
    nao_utils.save_arches(hparams,
                          iteration,
                          unique_arch_pool,
                          arches_perf=None,
                          after_gen=True)

    trainer.save_epd()