def run(params, error_queue):
    try:
        # start training
        logger.info(params)
        if not torch.cuda.is_available():
            raise NotImplementedError('Training on CPU is not supported')
        torch.cuda.set_device(params.rank)
        torch.manual_seed(params.seed)
        logger.info('Process %s is now running in gpu:%s', os.getpid(),
                    torch.cuda.current_device())

        data = load_data(params, 'train')
        print(
            data.get_iterator(shuffle=True,
                              group_by_size=True,
                              partition=params.rank))

        encoder, decoder, num_updates = build_mt_model(params)
        trainer = TrainerMT(encoder, decoder, data, params, num_updates)
        for i in range(trainer.epoch, params.max_epoch):
            logger.info("==== Starting epoch %i ...====" % trainer.epoch)
            trainer.train_epoch()
            tqdm.write('Finish epcoh %i.' % i)

    except KeyboardInterrupt:
        pass  # killed by parent, do nothing
    except Exception:
        # propagate exception to parent process, keeping original traceback
        import traceback
        error_queue.put((params.rank, traceback.format_exc()))
예제 #2
0
def main(params):
    data = load_data(params, name='train')
    encoder, decoder, num_updates = build_mt_model(params)
    trainer = TrainerMT(encoder, decoder, data, params, num_updates)

    for i in range(trainer.epoch, 30):
        logger.info("==== Starting epoch %i ...====" % trainer.epoch)
        trainer.train_epoch()
        tqdm.write('Finish epcoh %i.' % i)
예제 #3
0
def perform_translation(input_file_path, translation_directory,
                        cloze_train_path, question_train_path,
                        fasttext_vectors_path, checkpoint_path):
    params = get_params(
        exp_name='translation',
        dump_path=translation_directory,
        cloze_train_path=cloze_train_path,
        question_train_path=question_train_path,
        cloze_test_path=input_file_path,
        fasttext_vectors_path=fasttext_vectors_path,
        checkpoint_path=checkpoint_path,
    )

    # check parameters
    assert params.exp_name
    check_all_data_params(params)
    check_mt_model_params(params)
    data = load_data(params, mono_only=True)
    encoder, decoder, discriminator, lm = build_mt_model(params, data)
    # initialize trainer / reload checkpoint / initialize evaluator
    trainer = TrainerMT(encoder, decoder, discriminator, lm, data, params)
    trainer.reload_checkpoint()
    trainer.test_sharing()  # check parameters sharing
    evaluator = EvaluatorMT(trainer, data, params)

    with torch.no_grad():
        lang1, lang2 = 'cloze', 'question'

        evaluator.encoder.eval()
        evaluator.decoder.eval()
        lang1_id = evaluator.params.lang2id[lang1]
        lang2_id = evaluator.params.lang2id[lang2]

        translations = []
        dataset = evaluator.data['mono'][lang1]['test']
        dataset.batch_size = params.batch_size

        for i, (sent1, len1) in enumerate(
                dataset.get_iterator(shuffle=False, group_by_size=False)()):
            encoded = evaluator.encoder(sent1.cuda(), len1, lang1_id)
            sent2_, len2_, _ = evaluator.decoder.generate(encoded, lang2_id)
            lang1_text = convert_to_text(sent1, len1, evaluator.dico[lang1],
                                         lang1_id, evaluator.params)
            lang2_text = convert_to_text(sent2_, len2_, evaluator.dico[lang2],
                                         lang2_id, evaluator.params)
            translations += zip(lang1_text, lang2_text)

        # export sentences to hypothesis file and restore BPE segmentation
        out_name = os.path.join(translation_directory,
                                'output_translations.txt')
        with open(out_name, 'w', encoding='utf-8') as f:
            f.write('\n'.join(['\t'.join(st) for st in translations]) + '\n')
        restore_segmentation(out_name)

    return out_name
예제 #4
0
def load(params):
    # check parameters
    assert params.exp_name
    check_all_data_params(params)
    check_mt_model_params(params)

    # initialize experiment / load data / build model
    data = load_data(params)
    encoder, decoder, discriminator, lm = build_mt_model(params, data)

    # initialize trainer / reload checkpoint / initialize evaluator
    trainer = TrainerMT(encoder, decoder, discriminator, lm, data, params)
    trainer.reload_checkpoint()
    trainer.test_sharing()  # check parameters sharing
    evaluator = EvaluatorMT(trainer, data, params)
    return trainer, evaluator
예제 #5
0
def inference(params):
    # check parameters
    assert params.exp_name
    check_all_data_params(params)
    check_mt_model_params(params)

    # initialize experiment / load data / build model
    logger = initialize_exp(params)
    data = load_data(params)
    encoder, decoder, discriminator, lm = build_mt_model(params, data)

    # initialize trainer / reload checkpoint / initialize evaluator
    trainer = TrainerMT(encoder, decoder, discriminator, lm, data, params)
    trainer.reload_best_model()
    trainer.test_sharing()  # check parameters sharing
    evaluator = EvaluatorMT(trainer, data, params)

    # evaluation mode
    evaluator.eval_inference()
    exit()
예제 #6
0
def main(params):
    # check parameters
    assert params.exp_name
    check_all_data_params(params)
    check_mt_model_params(params)

    # initialize experiment / load data / build model
    logger = initialize_exp(params)
    data = load_data(params)
    encoder, decoder, discriminator, lm = build_mt_model(params, data)

    # initialize trainer / reload checkpoint / initialize evaluator
    trainer = TrainerMT(encoder, decoder, discriminator, lm, data, params)
    trainer.reload_checkpoint()
    trainer.test_sharing()  # check parameters sharing
    evaluator = EvaluatorMT(trainer, data, params)

    # evaluation mode
    if params.eval_only:
        evaluator.run_all_evals(0)
        exit()

    # language model pretraining
    if params.lm_before > 0:
        logger.info("Pretraining language model for %i iterations ..." %
                    params.lm_before)
        trainer.n_sentences = 0
        for _ in range(params.lm_before):
            for lang in params.langs:
                trainer.lm_step(lang)
            trainer.iter()

    # define epoch size
    if params.epoch_size == -1:
        params.epoch_size = params.n_para
    assert params.epoch_size > 0

    # start training
    for _ in range(trainer.epoch, params.max_epoch):

        logger.info(
            "====================== Starting epoch %i ... ======================"
            % trainer.epoch)

        trainer.n_sentences = 0

        while trainer.n_sentences < params.epoch_size:

            # discriminator training
            for _ in range(params.n_dis):
                trainer.discriminator_step()

            # language model training
            if params.lambda_lm > 0:
                for _ in range(params.lm_after):
                    for lang in params.langs:
                        trainer.lm_step(lang)

            # MT training (parallel data)
            if params.lambda_xe_para > 0:
                for lang1, lang2 in params.para_directions:
                    trainer.enc_dec_step(lang1, lang2, params.lambda_xe_para)

            # MT training (back-parallel data)
            if params.lambda_xe_back > 0:
                for lang1, lang2 in params.back_directions:
                    trainer.enc_dec_step(lang1,
                                         lang2,
                                         params.lambda_xe_back,
                                         back=True)

            # autoencoder training (monolingual data)
            if params.lambda_xe_mono > 0:
                for lang in params.mono_directions:
                    trainer.enc_dec_step(lang, lang, params.lambda_xe_mono)

            # AE - MT training (on the fly back-translation)
            if params.lambda_xe_otfd > 0 or params.lambda_xe_otfa > 0:

                # start on-the-fly batch generations
                if not getattr(params, 'started_otf_batch_gen', False):
                    otf_iterator = trainer.otf_bt_gen_async()
                    params.started_otf_batch_gen = True

                # update model parameters on subprocesses
                if trainer.n_iter % params.otf_sync_params_every == 0:
                    trainer.otf_sync_params()

                # get training batch from CPU
                before_gen = time.time()
                batches = next(otf_iterator)
                trainer.gen_time += time.time() - before_gen

                # training
                for batch in batches:
                    lang1, lang2, lang3 = batch['lang1'], batch[
                        'lang2'], batch['lang3']
                    # 2-lang back-translation - autoencoding
                    if lang1 != lang2 == lang3:
                        trainer.otf_bt(batch, params.lambda_xe_otfa,
                                       params.otf_backprop_temperature)
                    # 2-lang back-translation - parallel data
                    elif lang1 == lang3 != lang2:
                        trainer.otf_bt(batch, params.lambda_xe_otfd,
                                       params.otf_backprop_temperature)
                    # 3-lang back-translation - parallel data
                    elif lang1 != lang2 and lang2 != lang3 and lang1 != lang3:
                        trainer.otf_bt(batch, params.lambda_xe_otfd,
                                       params.otf_backprop_temperature)

            trainer.iter()

        # end of epoch
        logger.info(
            "====================== End of epoch %i ======================" %
            trainer.epoch)

        # evaluate discriminator / perplexity / BLEU
        scores = evaluator.run_all_evals(trainer.epoch)

        # print / JSON log
        for k, v in scores.items():
            logger.info('%s -> %.6f' % (k, v))
        logger.info("__log__:%s" % json.dumps(scores))

        # save best / save periodic / end epoch
        trainer.save_best_model(scores)
        trainer.save_periodic()
        trainer.end_epoch(scores)
        trainer.test_sharing()
예제 #7
0
parser.add_argument("--id", type=int, default=0)
parser.add_argument("--checkpoint_dir",
                    type=str,
                    default='/data2/twang/simple-fairseq/all_models/big')
params = parser.parse_args()
params.gpu_num = 1
params.seed = 1234
params.reload_model = '{}/model_epoch{}.pt'.format(params.checkpoint_dir,
                                                   params.id)
params.translate_file = 'data/valid.bpe.zh'
params.src_dico_file = 'data/dict.bpe.zh'
params.tgt_dico_file = 'data/dict.bpe.en'
params.out_file = '{}/predict_{}.en'.format(params.checkpoint_dir, params.id)
if __name__ == '__main__':
    data = load_data(params, name='test')
    encoder, decoder, _ = build_mt_model(params)
    encoder.eval()
    decoder.eval()
    iterator = data.get_iterator(shuffle=False, group_by_size=False)()
    file = open(params.out_file, 'w', encoding='utf-8')
    total = 0
    with torch.no_grad():
        for (sen1, len1) in iterator:
            len1, bak_order = len1.sort(descending=True)
            sen1 = sen1[:, bak_order]
            sen1 = sen1.cuda()
            encoded = encoder(sen1, len1)
            sent2, len2, _ = decoder.generate(encoded)
            total += len2.size(0)
            logger.info('Translating %i sentences.' % total)
            for j in bak_order.argsort().tolist():
예제 #8
0
    type=float,
    default=1.0,
    help="Length penalty: <1.0 favors shorter, >1.0 favors longer sentences")
params = parser.parse_args()

if __name__ == '__main__':

    # check parameters
    assert params.exp_name
    check_all_data_params(params)
    check_mt_model_params(params)

    # initialize experiment / load data / build model
    logger = initialize_exp(params)
    data = load_data(params)
    encoder, decoder, discriminator, lm = build_mt_model(params, data)

    # initialize trainer / reload checkpoint / initialize evaluator
    encoder = nn.DataParallel(encoder,
                              device_ids=[0, 1, 2],
                              output_device=[0, 1, 2])
    decoder = nn.DataParallel(decoder,
                              device_ids=[0, 1, 2],
                              output_device=[0, 1, 2])
    if discriminator != None:
        discriminator = nn.DataParallel(discriminator,
                                        device_ids=[0, 1, 2],
                                        output_device=[0, 1, 2])
    if lm != None:
        lm = nn.DataParallel(lm, device_ids=[0, 1, 2], output_device=[0, 1, 2])
예제 #9
0
                    type=float,
                    default=5.0,
                    help="clip grad norm")
parser.add_argument("--id", type=int, default=0)
parser.add_argument("--checkpoint_dir", type=str)
params = parser.parse_args()
params.checkpoint_dir = 'huawei_delay4'
params.id = 29
params.reload_model = '{}/model_epoch{}.pt'.format(params.checkpoint_dir,
                                                   params.id)
params.translate_file = 'huawei/valid.bpe.zh'
params.src_dico_file = 'huawei/dict.bpe.zh'
params.tgt_dico_file = 'huawei/dict.bpe.en'

if __name__ == '__main__':
    data = load_data(params, name='test')
    encoder, decoder, _ = build_mt_model(params, cuda=False)
    encoder.eval()
    decoder.eval()

    test_seq = torch.LongTensor(10, 1).random_(0, params.src_n_words)
    test_seq_length = torch.LongTensor([test_seq.size()[0]])

    print(test_seq, test_seq_length)
    # Trace the model
    traced_encoder = torch.jit.trace(encoder, (test_seq, test_seq_length))
    # print(traced_encoder.graph)
    print('trace finish')
    traced_encoder.save('huawei_delay4/traced_encoder.pth')
    print('save finish')
def main(params):
    # check parameters
    assert params.exp_name
    check_all_data_params(params)
    check_mt_model_params(params)

    # import pickle
    # with open('params.pkl', 'wb') as f_in:
    #     pickle.dump(params, f_in)
    # exit()

    # initialize experiment / load data / build model
    logger = initialize_exp(params)
    data = load_data(params)

    if params.model_path is None:
        encoder, decoder, discriminator, lm = build_mt_model(
            params, data, cuda=torch.cuda.is_available())
    else:
        model = torch.load(params.model_path)
        encoder = model['enc']
        decoder = model['dec']
        discriminator = model['dis']
        lm = model['lm']
    # initialize trainer / reload checkpoint / initialize evaluator
    trainer = TrainerMT(encoder, decoder, discriminator, lm, data, params)
    trainer.reload_checkpoint()
    trainer.test_sharing()  # check parameters sharing
    evaluator = EvaluatorMT(trainer, data, params)

    # evaluation mode
    if params.eval_only:
        scores = evaluator.run_all_evals(0, params)
        # print / JSON log
        for k, v in scores.items():
            logger.info('%s -> %.6f' % (k, v))
        logger.info("__log__:%s" % json.dumps(scores))
        exit()

    # language model pretraining
    if params.lm_before > 0:
        logger.info("Pretraining language model for %i iterations ..." %
                    params.lm_before)
        trainer.n_sentences = 0
        for idx in range(params.lm_before):
            for lang in params.mono_directions:
                trainer.enc_dec_step(lang, lang, params.lambda_xe_mono)
            # for lang in params.langs:
            #     trainer.lm_step(lang)
            trainer.iter()
            if (idx + 1) % 10000 == 0:
                trainer.save_model('lmpre_%d' % idx)

    # define epoch size
    if params.epoch_size == -1:
        params.epoch_size = params.n_para
    assert params.epoch_size > 0

    # start training
    for epoch in range(trainer.epoch, params.max_epoch):

        logger.info(
            "====================== Starting epoch %i ... ======================"
            % trainer.epoch)

        trainer.n_sentences = 0

        while trainer.n_sentences < params.epoch_size:

            # discriminator training
            for _ in range(params.n_dis):
                trainer.discriminator_step()

            # language model training
            if params.lambda_lm > 0:
                for _ in range(params.lm_after):
                    for lang in params.langs:
                        trainer.lm_step(lang)

            # MT training (parallel data)
            if params.lambda_xe_para > 0:
                for lang1, lang2 in params.para_directions:
                    trainer.enc_dec_step(lang1, lang2, params.lambda_xe_para)

            # MT training (back-parallel data)
            if params.lambda_xe_back > 0:
                for lang1, lang2 in params.back_directions:
                    trainer.enc_dec_step(lang1,
                                         lang2,
                                         params.lambda_xe_back,
                                         back=True)

            # autoencoder training (monolingual data)
            if params.lambda_xe_mono > 0:
                for lang in params.mono_directions:
                    trainer.enc_dec_step(lang, lang, params.lambda_xe_mono)

            # AE - MT training (on the fly back-translation)
            if params.lambda_xe_otfd > 0 or params.lambda_xe_otfa > 0:

                # start on-the-fly batch generations
                if not getattr(params, 'started_otf_batch_gen', False):
                    otf_iterator = trainer.otf_bt_gen_async()
                    params.started_otf_batch_gen = True

                # update model parameters on subprocesses
                if trainer.n_iter % params.otf_sync_params_every == 0:
                    trainer.otf_sync_params()

                # get training batch from CPU
                before_gen = time.time()
                batches = next(otf_iterator)
                trainer.gen_time += time.time() - before_gen

                # training
                for batch in batches:
                    lang1, lang2, lang3 = batch['lang1'], batch[
                        'lang2'], batch['lang3']
                    if params.reward_gamma_ap != 0 or params.reward_type_ar != 'None':
                        # 2-lang back-translation - autoencoding
                        if lang1 != lang2 == lang3:
                            trainer.otf_bt(batch, params.lambda_xe_otfa,
                                           params.otf_backprop_temperature)
                        # 2-lang back-translation - parallel data
                        elif lang1 == lang3 != lang2:
                            if params.use_rl and epoch >= params.rl_start_epoch:
                                trainer.otf_bt_rl(
                                    batch, params.lambda_xe_otfd,
                                    params.otf_backprop_temperature,
                                    params.reward_gamma_ap,
                                    params.reward_gamma_ar,
                                    params.reward_type_ar,
                                    params.reward_thresh_ar,
                                    params.reward_gamma_cv)
                            else:
                                trainer.otf_bt(batch, params.lambda_xe_otfd,
                                               params.otf_backprop_temperature)
                        # 3-lang back-translation - parallel data
                        elif lang1 != lang2 and lang2 != lang3 and lang1 != lang3:
                            trainer.otf_bt(batch, params.lambda_xe_otfd,
                                           params.otf_backprop_temperature)
                    else:
                        trainer.otf_bt(batch, params.lambda_xe_otfd,
                                       params.otf_backprop_temperature)
            trainer.iter()

        # end of epoch
        logger.info(
            "====================== End of epoch %i ======================" %
            trainer.epoch)

        # evaluate discriminator / perplexity / BLEU
        # scores=0
        scores = evaluator.run_all_evals(trainer.epoch, params)  #TODO

        # print / JSON log
        for k, v in scores.items():
            logger.info('%s -> %.6f' % (k, v))
        logger.info("__log__:%s" % json.dumps(scores))

        # save best / save periodic / end epoch
        # trainer.save_best_model(scores)
        trainer.save_periodic()
        trainer.end_epoch(scores)
        trainer.test_sharing()