def run(params, error_queue): try: # start training logger.info(params) if not torch.cuda.is_available(): raise NotImplementedError('Training on CPU is not supported') torch.cuda.set_device(params.rank) torch.manual_seed(params.seed) logger.info('Process %s is now running in gpu:%s', os.getpid(), torch.cuda.current_device()) data = load_data(params, 'train') print( data.get_iterator(shuffle=True, group_by_size=True, partition=params.rank)) encoder, decoder, num_updates = build_mt_model(params) trainer = TrainerMT(encoder, decoder, data, params, num_updates) for i in range(trainer.epoch, params.max_epoch): logger.info("==== Starting epoch %i ...====" % trainer.epoch) trainer.train_epoch() tqdm.write('Finish epcoh %i.' % i) except KeyboardInterrupt: pass # killed by parent, do nothing except Exception: # propagate exception to parent process, keeping original traceback import traceback error_queue.put((params.rank, traceback.format_exc()))
def main(params): data = load_data(params, name='train') encoder, decoder, num_updates = build_mt_model(params) trainer = TrainerMT(encoder, decoder, data, params, num_updates) for i in range(trainer.epoch, 30): logger.info("==== Starting epoch %i ...====" % trainer.epoch) trainer.train_epoch() tqdm.write('Finish epcoh %i.' % i)
def perform_translation(input_file_path, translation_directory, cloze_train_path, question_train_path, fasttext_vectors_path, checkpoint_path): params = get_params( exp_name='translation', dump_path=translation_directory, cloze_train_path=cloze_train_path, question_train_path=question_train_path, cloze_test_path=input_file_path, fasttext_vectors_path=fasttext_vectors_path, checkpoint_path=checkpoint_path, ) # check parameters assert params.exp_name check_all_data_params(params) check_mt_model_params(params) data = load_data(params, mono_only=True) encoder, decoder, discriminator, lm = build_mt_model(params, data) # initialize trainer / reload checkpoint / initialize evaluator trainer = TrainerMT(encoder, decoder, discriminator, lm, data, params) trainer.reload_checkpoint() trainer.test_sharing() # check parameters sharing evaluator = EvaluatorMT(trainer, data, params) with torch.no_grad(): lang1, lang2 = 'cloze', 'question' evaluator.encoder.eval() evaluator.decoder.eval() lang1_id = evaluator.params.lang2id[lang1] lang2_id = evaluator.params.lang2id[lang2] translations = [] dataset = evaluator.data['mono'][lang1]['test'] dataset.batch_size = params.batch_size for i, (sent1, len1) in enumerate( dataset.get_iterator(shuffle=False, group_by_size=False)()): encoded = evaluator.encoder(sent1.cuda(), len1, lang1_id) sent2_, len2_, _ = evaluator.decoder.generate(encoded, lang2_id) lang1_text = convert_to_text(sent1, len1, evaluator.dico[lang1], lang1_id, evaluator.params) lang2_text = convert_to_text(sent2_, len2_, evaluator.dico[lang2], lang2_id, evaluator.params) translations += zip(lang1_text, lang2_text) # export sentences to hypothesis file and restore BPE segmentation out_name = os.path.join(translation_directory, 'output_translations.txt') with open(out_name, 'w', encoding='utf-8') as f: f.write('\n'.join(['\t'.join(st) for st in translations]) + '\n') restore_segmentation(out_name) return out_name
def load(params): # check parameters assert params.exp_name check_all_data_params(params) check_mt_model_params(params) # initialize experiment / load data / build model data = load_data(params) encoder, decoder, discriminator, lm = build_mt_model(params, data) # initialize trainer / reload checkpoint / initialize evaluator trainer = TrainerMT(encoder, decoder, discriminator, lm, data, params) trainer.reload_checkpoint() trainer.test_sharing() # check parameters sharing evaluator = EvaluatorMT(trainer, data, params) return trainer, evaluator
def inference(params): # check parameters assert params.exp_name check_all_data_params(params) check_mt_model_params(params) # initialize experiment / load data / build model logger = initialize_exp(params) data = load_data(params) encoder, decoder, discriminator, lm = build_mt_model(params, data) # initialize trainer / reload checkpoint / initialize evaluator trainer = TrainerMT(encoder, decoder, discriminator, lm, data, params) trainer.reload_best_model() trainer.test_sharing() # check parameters sharing evaluator = EvaluatorMT(trainer, data, params) # evaluation mode evaluator.eval_inference() exit()
def main(params): # check parameters assert params.exp_name check_all_data_params(params) check_mt_model_params(params) # initialize experiment / load data / build model logger = initialize_exp(params) data = load_data(params) encoder, decoder, discriminator, lm = build_mt_model(params, data) # initialize trainer / reload checkpoint / initialize evaluator trainer = TrainerMT(encoder, decoder, discriminator, lm, data, params) trainer.reload_checkpoint() trainer.test_sharing() # check parameters sharing evaluator = EvaluatorMT(trainer, data, params) # evaluation mode if params.eval_only: evaluator.run_all_evals(0) exit() # language model pretraining if params.lm_before > 0: logger.info("Pretraining language model for %i iterations ..." % params.lm_before) trainer.n_sentences = 0 for _ in range(params.lm_before): for lang in params.langs: trainer.lm_step(lang) trainer.iter() # define epoch size if params.epoch_size == -1: params.epoch_size = params.n_para assert params.epoch_size > 0 # start training for _ in range(trainer.epoch, params.max_epoch): logger.info( "====================== Starting epoch %i ... ======================" % trainer.epoch) trainer.n_sentences = 0 while trainer.n_sentences < params.epoch_size: # discriminator training for _ in range(params.n_dis): trainer.discriminator_step() # language model training if params.lambda_lm > 0: for _ in range(params.lm_after): for lang in params.langs: trainer.lm_step(lang) # MT training (parallel data) if params.lambda_xe_para > 0: for lang1, lang2 in params.para_directions: trainer.enc_dec_step(lang1, lang2, params.lambda_xe_para) # MT training (back-parallel data) if params.lambda_xe_back > 0: for lang1, lang2 in params.back_directions: trainer.enc_dec_step(lang1, lang2, params.lambda_xe_back, back=True) # autoencoder training (monolingual data) if params.lambda_xe_mono > 0: for lang in params.mono_directions: trainer.enc_dec_step(lang, lang, params.lambda_xe_mono) # AE - MT training (on the fly back-translation) if params.lambda_xe_otfd > 0 or params.lambda_xe_otfa > 0: # start on-the-fly batch generations if not getattr(params, 'started_otf_batch_gen', False): otf_iterator = trainer.otf_bt_gen_async() params.started_otf_batch_gen = True # update model parameters on subprocesses if trainer.n_iter % params.otf_sync_params_every == 0: trainer.otf_sync_params() # get training batch from CPU before_gen = time.time() batches = next(otf_iterator) trainer.gen_time += time.time() - before_gen # training for batch in batches: lang1, lang2, lang3 = batch['lang1'], batch[ 'lang2'], batch['lang3'] # 2-lang back-translation - autoencoding if lang1 != lang2 == lang3: trainer.otf_bt(batch, params.lambda_xe_otfa, params.otf_backprop_temperature) # 2-lang back-translation - parallel data elif lang1 == lang3 != lang2: trainer.otf_bt(batch, params.lambda_xe_otfd, params.otf_backprop_temperature) # 3-lang back-translation - parallel data elif lang1 != lang2 and lang2 != lang3 and lang1 != lang3: trainer.otf_bt(batch, params.lambda_xe_otfd, params.otf_backprop_temperature) trainer.iter() # end of epoch logger.info( "====================== End of epoch %i ======================" % trainer.epoch) # evaluate discriminator / perplexity / BLEU scores = evaluator.run_all_evals(trainer.epoch) # print / JSON log for k, v in scores.items(): logger.info('%s -> %.6f' % (k, v)) logger.info("__log__:%s" % json.dumps(scores)) # save best / save periodic / end epoch trainer.save_best_model(scores) trainer.save_periodic() trainer.end_epoch(scores) trainer.test_sharing()
# initialize trainer / reload checkpoint / initialize evaluator encoder = nn.DataParallel(encoder, device_ids=[0, 1, 2], output_device=[0, 1, 2]) decoder = nn.DataParallel(decoder, device_ids=[0, 1, 2], output_device=[0, 1, 2]) if discriminator != None: discriminator = nn.DataParallel(discriminator, device_ids=[0, 1, 2], output_device=[0, 1, 2]) if lm != None: lm = nn.DataParallel(lm, device_ids=[0, 1, 2], output_device=[0, 1, 2]) trainer = TrainerMT(encoder, decoder, discriminator, lm, data, params) trainer.reload_checkpoint() trainer.test_sharing() # check parameters sharing evaluator = EvaluatorMT(trainer, data, params) # evaluation mode if params.eval_only: evaluator.run_all_evals(0) exit() # language model pretraining if params.lm_before > 0: logger.info("Pretraining language model for %i iterations ..." % params.lm_before) trainer.n_sentences = 0 for _ in range(params.lm_before):
def main(params): # check parameters assert params.exp_name check_all_data_params(params) check_mt_model_params(params) # import pickle # with open('params.pkl', 'wb') as f_in: # pickle.dump(params, f_in) # exit() # initialize experiment / load data / build model logger = initialize_exp(params) data = load_data(params) if params.model_path is None: encoder, decoder, discriminator, lm = build_mt_model( params, data, cuda=torch.cuda.is_available()) else: model = torch.load(params.model_path) encoder = model['enc'] decoder = model['dec'] discriminator = model['dis'] lm = model['lm'] # initialize trainer / reload checkpoint / initialize evaluator trainer = TrainerMT(encoder, decoder, discriminator, lm, data, params) trainer.reload_checkpoint() trainer.test_sharing() # check parameters sharing evaluator = EvaluatorMT(trainer, data, params) # evaluation mode if params.eval_only: scores = evaluator.run_all_evals(0, params) # print / JSON log for k, v in scores.items(): logger.info('%s -> %.6f' % (k, v)) logger.info("__log__:%s" % json.dumps(scores)) exit() # language model pretraining if params.lm_before > 0: logger.info("Pretraining language model for %i iterations ..." % params.lm_before) trainer.n_sentences = 0 for idx in range(params.lm_before): for lang in params.mono_directions: trainer.enc_dec_step(lang, lang, params.lambda_xe_mono) # for lang in params.langs: # trainer.lm_step(lang) trainer.iter() if (idx + 1) % 10000 == 0: trainer.save_model('lmpre_%d' % idx) # define epoch size if params.epoch_size == -1: params.epoch_size = params.n_para assert params.epoch_size > 0 # start training for epoch in range(trainer.epoch, params.max_epoch): logger.info( "====================== Starting epoch %i ... ======================" % trainer.epoch) trainer.n_sentences = 0 while trainer.n_sentences < params.epoch_size: # discriminator training for _ in range(params.n_dis): trainer.discriminator_step() # language model training if params.lambda_lm > 0: for _ in range(params.lm_after): for lang in params.langs: trainer.lm_step(lang) # MT training (parallel data) if params.lambda_xe_para > 0: for lang1, lang2 in params.para_directions: trainer.enc_dec_step(lang1, lang2, params.lambda_xe_para) # MT training (back-parallel data) if params.lambda_xe_back > 0: for lang1, lang2 in params.back_directions: trainer.enc_dec_step(lang1, lang2, params.lambda_xe_back, back=True) # autoencoder training (monolingual data) if params.lambda_xe_mono > 0: for lang in params.mono_directions: trainer.enc_dec_step(lang, lang, params.lambda_xe_mono) # AE - MT training (on the fly back-translation) if params.lambda_xe_otfd > 0 or params.lambda_xe_otfa > 0: # start on-the-fly batch generations if not getattr(params, 'started_otf_batch_gen', False): otf_iterator = trainer.otf_bt_gen_async() params.started_otf_batch_gen = True # update model parameters on subprocesses if trainer.n_iter % params.otf_sync_params_every == 0: trainer.otf_sync_params() # get training batch from CPU before_gen = time.time() batches = next(otf_iterator) trainer.gen_time += time.time() - before_gen # training for batch in batches: lang1, lang2, lang3 = batch['lang1'], batch[ 'lang2'], batch['lang3'] if params.reward_gamma_ap != 0 or params.reward_type_ar != 'None': # 2-lang back-translation - autoencoding if lang1 != lang2 == lang3: trainer.otf_bt(batch, params.lambda_xe_otfa, params.otf_backprop_temperature) # 2-lang back-translation - parallel data elif lang1 == lang3 != lang2: if params.use_rl and epoch >= params.rl_start_epoch: trainer.otf_bt_rl( batch, params.lambda_xe_otfd, params.otf_backprop_temperature, params.reward_gamma_ap, params.reward_gamma_ar, params.reward_type_ar, params.reward_thresh_ar, params.reward_gamma_cv) else: trainer.otf_bt(batch, params.lambda_xe_otfd, params.otf_backprop_temperature) # 3-lang back-translation - parallel data elif lang1 != lang2 and lang2 != lang3 and lang1 != lang3: trainer.otf_bt(batch, params.lambda_xe_otfd, params.otf_backprop_temperature) else: trainer.otf_bt(batch, params.lambda_xe_otfd, params.otf_backprop_temperature) trainer.iter() # end of epoch logger.info( "====================== End of epoch %i ======================" % trainer.epoch) # evaluate discriminator / perplexity / BLEU # scores=0 scores = evaluator.run_all_evals(trainer.epoch, params) #TODO # print / JSON log for k, v in scores.items(): logger.info('%s -> %.6f' % (k, v)) logger.info("__log__:%s" % json.dumps(scores)) # save best / save periodic / end epoch # trainer.save_best_model(scores) trainer.save_periodic() trainer.end_epoch(scores) trainer.test_sharing()
def main(params): # check parameters assert params.exp_name check_all_data_params(params) # done check_mt_model_params(params) # initialize experiment / load data / build model logger = initialize_exp(params) data = load_data(params, mono_para=params.mono_para) encoder, decoder_aux, decoder, discriminator, lm = build_mt_model_modified( params, data) #encoder, decoder1, decoder2, discriminator, lm = build_mt_model(params, data) # add new decoder # initialize trainer / reload checkpoint / initialize evaluator trainer = TrainerMT(encoder, decoder, decoder_aux, discriminator, lm, data, params) trainer.reload_checkpoint() trainer.test_sharing() # check parameters sharing evaluator = EvaluatorMT(trainer, data, params) if params.eval_only: evaluator.run_all_evals(0) exit() # language model pretraining if params.lm_before > 0: logger.info("Pretraining language model for %i iterations ..." % params.lm_before) trainer.n_sentences = 0 for _ in range(params.lm_before): for lang in params.langs: trainer.lm_step(lang) trainer.iter() # define epoch size if params.epoch_size == -1: params.epoch_size = params.n_para assert params.epoch_size > 0 # start training for _ in range(trainer.epoch, params.max_epoch): logger.info( "====================== Starting epoch %i ... ======================" % trainer.epoch) trainer.n_sentences = 0 while trainer.n_sentences < params.epoch_size: # discriminator training : n_dis = 0 : we do not use dis logger.info("num of sentences %i " % trainer.n_sentences) for _ in range(params.n_dis): trainer.discriminator_step() # language model training : 0 : we do not train lm if params.lambda_lm > 0: for _ in range(params.lm_after): for lang in params.langs: trainer.lm_step(lang) # autoencoder training (monolingual data) if params.lambda_xe_mono > 0: trainer.enc_dec_step(lang, lang, params.lambda_xe_mono, mono=True) # autoencoder training disfluent-fluent parallel monolingual dataset : Disfluent_mono -> Mono data if params.lambda_xe_mono_para > 0: for lang1, lang2 in params.mono_para_directions: # (en,en), (es,es) if (lang1 in ['en']): trainer.enc_dec_step(lang1, lang2, params.lambda_xe_mono_para, mono_para=True) # MT training (parallel data) : introduce new decoder here. Fisher Data # E1->D2' and E2->D1' if params.lambda_xe_para > 0: for lang1, lang2 in params.para_directions: # (en,es), (es,en) #logger.info(" para : %s %s" % (lang1, lang2)) #logger.info("training para") trainer.enc_dec_step(lang1, lang2, params.lambda_xe_para, para=True) # MT training news-commentary data # E1->D2'->D2 and E2->D1'->D1 if params.lambda_xe_para_aux > 0: for lang1, lang2 in params.para_directions: # (en, es) , (es,en) trainer.enc_dec_step(lang1, lang2, params.lambda_xe_para_aux, aux=True) # MT training (back-parallel data) : 0 if params.lambda_xe_back > 0: for lang1, lang2 in params.back_directions: trainer.enc_dec_step(lang1, lang2, params.lambda_xe_back, back=True) # AE - MT training (on the fly back-translation) : otfd in on (let's see what to do with it) set it t if params.lambda_xe_otfd > 0 or params.lambda_xe_otfa > 0: # start on-the-fly batch generations if not getattr(params, 'started_otf_batch_gen', False): otf_iterator = trainer.otf_bt_gen_async() params.started_otf_batch_gen = True # update model parameters on subprocesses if trainer.n_iter % params.otf_sync_params_every == 0: trainer.otf_sync_params() # get training batch from CPU before_gen = time.time() batches = next(otf_iterator) trainer.gen_time += time.time() - before_gen # training for batch in batches: lang1, lang2, lang3 = batch['lang1'], batch[ 'lang2'], batch['lang3'] # 2-lang back-translation - autoencoding if lang1 != lang2 == lang3: trainer.otf_bt(batch, params.lambda_xe_otfa, params.otf_backprop_temperature) # 2-lang back-translation - parallel data elif lang1 == lang3 != lang2: trainer.otf_bt(batch, params.lambda_xe_otfd, params.otf_backprop_temperature) # 3-lang back-translation - parallel data elif lang1 != lang2 and lang2 != lang3 and lang1 != lang3: trainer.otf_bt(batch, params.lambda_xe_otfd, params.otf_backprop_temperature) trainer.iter() # end of epoch logger.info( "====================== End of epoch %i ======================" % trainer.epoch) # evaluate discriminator / perplexity / BLEU scores = evaluator.run_all_evals(trainer.epoch) # print / JSON log for k, v in scores.items(): logger.info('%s -> %.6f' % (k, v)) logger.info("__log__:%s" % json.dumps(scores)) # save best / save periodic / end epoch trainer.save_best_model(scores) trainer.save_periodic() trainer.end_epoch(scores) trainer.test_sharing()