def main(params): # Get starting time start = time.time() total_elapsed_time_until_now = 0 # initialize the multi-GPU / multi-node training init_distributed_mode(params) # initialize the experiment logger = initialize_exp(params) logger.info('***** Starting time {} *****'.format(start)) # initialize SLURM signal handler for time limit / pre-emption init_signal_handler() # load data data = load_data(params) logger.info('***** Time limit to run script: {} (min) *****'.format( params.time_limit)) # build model if params.encoder_only: model = build_model(params, data['dico']) else: encoder, decoder = build_model(params, data['dico']) # build trainer, reload potential checkpoints / build evaluator if params.encoder_only: trainer = SingleTrainer(model, data, params) evaluator = SingleEvaluator(trainer, data, params) else: trainer = EncDecTrainer(encoder, decoder, data, params) evaluator = EncDecEvaluator(trainer, data, params) # evaluation if params.eval_only: scores = evaluator.run_all_evals(trainer) for k, v in scores.items(): logger.info("%s -> %.6f" % (k, v)) logger.info("__log__:%s" % json.dumps(scores)) exit() # set sampling probabilities for training set_sampling_probs(data, params) total_elapsed_time_until_now += (time.time() - start) / 60.0 elapsed_time_last_three_epochs = deque(maxlen=3) logger.info('total_elapsed_time_until_now = {:2f} (min)'.format( total_elapsed_time_until_now)) # debug # logger.info("os.environ['LD_LIBRARY_PATH'] = {}".format(os.environ['LD_LIBRARY_PATH'])) # logger.info("os.environ['PATH'] = {}".format(os.environ['PATH'])) # language model training for _ in range(params.max_epoch): logger.info('Checking parameters - beginning of epoch: {:8f}'.format( sum(p.sum().item() for p in model.parameters()))) start = time.time() logger.info("============ Starting epoch %i ... ============" % trainer.epoch) trainer.n_sentences = 0 while trainer.n_sentences < trainer.epoch_size: # CLM steps for lang1, lang2 in shuf_order(params.clm_steps, params): trainer.clm_step(lang1, lang2, params.lambda_clm) # MLM steps (also includes TLM if lang2 is not None) for lang1, lang2 in shuf_order(params.mlm_steps, params): trainer.mlm_step(lang1, lang2, params.lambda_mlm) # parallel classification steps for lang1, lang2 in shuf_order(params.pc_steps, params): trainer.pc_step(lang1, lang2, params.lambda_pc) # denoising auto-encoder steps for lang in shuf_order(params.ae_steps): trainer.mt_step(lang, lang, params.lambda_ae) # machine translation steps for lang1, lang2 in shuf_order(params.mt_steps, params): trainer.mt_step(lang1, lang2, params.lambda_mt) # back-translation steps for lang1, lang2, lang3 in shuf_order(params.bt_steps): trainer.bt_step(lang1, lang2, lang3, params.lambda_bt) trainer.iter() logger.info("============ End of epoch %i ============" % trainer.epoch) # evaluate perplexity # logger.info('Before scoring ...') scores = evaluator.run_all_evals(trainer) # logger.info('Finished scoring.') # print / JSON log for k, v in scores.items(): logger.info("%s -> %.6f" % (k, v)) if params.is_master: logger.info("__log__:%s" % json.dumps(scores)) # logger.info('Before saving model ...') # end of epoch trainer.save_best_model(scores) trainer.save_periodic() trainer.end_epoch(scores) # logger.info('End saving model.') # Compute elapsed time elapsed_time_epoch = (time.time() - start) / 60.0 elapsed_time_last_three_epochs.append(elapsed_time_epoch) total_elapsed_time_until_now += elapsed_time_epoch est_avg_time_each_epoch = np.mean( np.array(elapsed_time_last_three_epochs)) logger.info('total_elapsed_time_until_now = {:2f} (min)'.format( total_elapsed_time_until_now)) logger.info('elapsed_time_last_three_epochs = {}'.format( elapsed_time_last_three_epochs)) logger.info('est_avg_time_each_epoch = {:.2f} (min)'.format( est_avg_time_each_epoch)) logger.info('params.time_limit = {:2f} (min)'.format( params.time_limit)) logger.info('Checking parameters - end of epoch: {:8f}'.format( sum(p.sum().item() for p in model.parameters()))) # Check running time if params.time_limit > 0: # Estimated avg time for each epoch is computed using running time of previous epoch if total_elapsed_time_until_now + est_avg_time_each_epoch < params.time_limit: logger.info( 'Total elapsed time including next epoch is estimated to be LESS than time limit.' ) logger.info('CONTINUE TRAINING ...') else: logger.info( 'Total elapsed time including next epoch is estimated to be GREATER than time limit.' ) logger.info('STOP TRAINING.') return
# tasks params.transfer_tasks = params.transfer_tasks.split(',') assert len(params.transfer_tasks) > 0 assert all([task in TASKS for task in params.transfer_tasks]) # reload pretrained model embedder = SentenceEmbedder.reload(params.model_path, params) # reload langs from pretrained model params.n_langs = embedder.pretrain_params['n_langs'] params.id2lang = embedder.pretrain_params['id2lang'] params.lang2id = embedder.pretrain_params['lang2id'] # initialize the experiment / build sentence embedder logger = initialize_exp(params) scores = {} # prepare trainers / evaluators glue = GLUE(embedder, scores, params) xnli = XNLI(embedder, scores, params) flue = FLUE(embedder, scores, params) # run for task in params.transfer_tasks: if task in GLUE_TASKS: glue.run(task) if task in XNLI_TASKS: xnli.run() if task in FLUE_TASKS: flue.run(task)
def main(params): # initialize the experiment logger = initialize_exp(params) # generate parser / parse parameters parser = get_parser() params = parser.parse_args() reloaded = torch.load(params.model_path) model_params = AttrDict(reloaded['params']) logger.info("Supported languages: %s" % ", ".join(model_params.lang2id.keys())) # update dictionary parameters for name in [ 'n_words', 'bos_index', 'eos_index', 'pad_index', 'unk_index', 'mask_index' ]: setattr(params, name, getattr(model_params, name)) # build dictionary / build encoder / build decoder / reload weights dico = Dictionary(reloaded['dico_id2word'], reloaded['dico_word2id'], reloaded['dico_counts']) encoder = TransformerModel(model_params, dico, is_encoder=True, with_output=True).cuda().eval() decoder = TransformerModel(model_params, dico, is_encoder=False, with_output=True).cuda().eval() encoder.load_state_dict(reloaded['encoder']) decoder.load_state_dict(reloaded['decoder']) params.src_id = model_params.lang2id[params.src_lang] params.tgt_id = model_params.lang2id[params.tgt_lang] # read sentences from stdin src_sent = [] for line in sys.stdin.readlines(): assert len(line.strip().split()) > 0 src_sent.append(line) logger.info("Read %i sentences from stdin. Translating ..." % len(src_sent)) f = io.open(params.output_path, 'w', encoding='utf-8') for i in range(0, len(src_sent), params.batch_size): # prepare batch word_ids = [ torch.LongTensor([dico.index(w) for w in s.strip().split()]) for s in src_sent[i:i + params.batch_size] ] lengths = torch.LongTensor([len(s) + 2 for s in word_ids]) batch = torch.LongTensor(lengths.max().item(), lengths.size(0)).fill_(params.pad_index) batch[0] = params.eos_index for j, s in enumerate(word_ids): if lengths[j] > 2: # if sentence not empty batch[1:lengths[j] - 1, j].copy_(s) batch[lengths[j] - 1, j] = params.eos_index langs = batch.clone().fill_(params.src_id) # encode source batch and translate it encoded = encoder('fwd', x=batch.cuda(), lengths=lengths.cuda(), langs=langs.cuda(), causal=False) encoded = encoded.transpose(0, 1) decoded, dec_lengths = decoder.generate( encoded, lengths.cuda(), params.tgt_id, max_len=int(1.5 * lengths.max().item() + 10)) # convert sentences to words for j in range(decoded.size(1)): # remove delimiters sent = decoded[:, j] delimiters = (sent == params.eos_index).nonzero().view(-1) assert len(delimiters) >= 1 and delimiters[0].item() == 0 sent = sent[1:] if len(delimiters) == 1 else sent[1:delimiters[1]] # output translation source = src_sent[i + j].strip() target = " ".join([dico[sent[k].item()] for k in range(len(sent))]) sys.stderr.write("%i / %i: %s -> %s\n" % (i + j, len(src_sent), source, target)) f.write(target + "\n") f.close()
def main(params): # start a comet project if params.debug_train: experiment = Experiment(workspace="hopemcgovern", log_code=True, disabled=True) else: experiment = Experiment(workspace="hopemcgovern", log_code=True) experiment.set_name(params.exp_name) experiment.log_parameters(params) experiment.add_tag('XLM') # initialize the multi-GPU / multi-node training init_distributed_mode(params) # initialize the experiment logger = initialize_exp(params) # initialize SLURM signal handler for time limit / pre-emption init_signal_handler() # going to put everything except the training, val, and test in the trainer class, data = load_data(params) # build model if params.encoder_only: model = build_model(params, data['dico']) if params.use_adapters: logger.info("Using adapters") for param in model.named_parameters(): if param[0][:8] != "adapters": param[1].requires_grad = False for param_name, param in model.embeddings.named_parameters(): param.requires_grad = True for param_name, param in model.position_embeddings.named_parameters( ): param.requires_grad = True for param_name, param in model.pred_layer.named_parameters(): param.requires_grad = True for param in model.layer_norm_emb.parameters(): param.requires_grad = True for param in model.named_parameters(): logger.info(param[0] + ' required grad = ' + str(param[1].requires_grad)) else: encoder, decoder = build_model(params, data['dico']) # build trainer, reload potential checkpoints / build evaluator if params.encoder_only: trainer = SingleTrainer(model, data, params, experiment) evaluator = SingleEvaluator(trainer, data, params) else: trainer = EncDecTrainer(encoder, decoder, data, params, experiment) evaluator = EncDecEvaluator(trainer, data, params) # evaluation if params.eval_only: scores = evaluator.run_all_evals(trainer) for k, v in scores.items(): logger.info("%s -> %.6f" % (k, v)) logger.info("__log__:%s" % json.dumps(scores)) exit() # set sampling probabilities for training set_sampling_probs(data, params) # language model training for _ in range(params.max_epoch): logger.info("============ Starting epoch %i ... ============" % trainer.epoch) trainer.n_sentences = 0 while trainer.n_sentences < trainer.epoch_size: # CLM steps for lang1, lang2 in shuf_order(params.clm_steps, params): trainer.clm_step(lang1, lang2, params.lambda_clm) # MLM steps (also includes TLM if lang2 is not None) for lang1, lang2 in shuf_order(params.mlm_steps, params): trainer.mlm_step(lang1, lang2, params.lambda_mlm) # parallel classification steps for lang1, lang2 in shuf_order(params.pc_steps, params): trainer.pc_step(lang1, lang2, params.lambda_pc) # denoising auto-encoder steps for lang in shuf_order(params.ae_steps): trainer.mt_step(lang, lang, params.lambda_ae) # machine translation steps for lang1, lang2 in shuf_order(params.mt_steps, params): trainer.mt_step(lang1, lang2, params.lambda_mt) # back-translation steps for lang1, lang2, lang3 in shuf_order(params.bt_steps): trainer.bt_step(lang1, lang2, lang3, params.lambda_bt) # reference-agreement-translation steps for lang1, lang2, lang3 in shuf_order(params.rat_steps): trainer.rat_step(lang1, lang2, lang3, params.lambda_rat) # reference-agreement-back-translation steps for lang1, lang2, lang3, in shuf_order(params.rabt_steps): trainer.rabt_step(lang1, lang2, lang3, params.lambda_rabt) # cross-lingual-back-translation steps for lang1, lang2, lang3 in shuf_order(params.xbt_steps): trainer.xbt_step(lang1, lang2, lang3, params.lambda_xbt) experiment.log_metric('epoch', trainer.epoch) experiment.log_metric('n_iter', trainer.n_iter) experiment.log_metric('n_total_iterations', trainer.n_total_iter) experiment.log_metric('n_sentences', trainer.n_sentences) trainer.iter() logger.info("============ End of epoch %i ============" % trainer.epoch) # evaluate perplexity scores = evaluator.run_all_evals(trainer) experiment.log_metrics(scores, epoch=trainer.epoch) # print / JSON log for k, v in scores.items(): logger.info("%s -> %.6f" % (k, v)) if params.is_master: logger.info("__log__:%s" % json.dumps(scores)) # end of epoch trainer.save_best_model(scores) trainer.save_periodic() trainer.end_epoch(scores)
def main(params): # initialize the multi-GPU / multi-node training init_distributed_mode(params) # initialize the experiment logger = initialize_exp(params) # initialize SLURM signal handler for time limit / pre-emption init_signal_handler() # load data data = load_data(params) # build model if params.encoder_only: model = build_model(params, data['dico']) else: encoder, decoder = build_model(params, data['dico']) # build trainer, reload potential checkpoints / build evaluator if params.encoder_only: trainer = SingleTrainer(model, data, params) evaluator = SingleEvaluator(trainer, data, params) else: trainer = EncDecTrainer(encoder, decoder, data, params) evaluator = EncDecEvaluator(trainer, data, params) # evaluation if params.eval_only: scores = evaluator.run_all_evals(trainer) for k, v in scores.items(): logger.info("%s -> %.6f" % (k, v)) logger.info("__log__:%s" % json.dumps(scores)) exit() # set sampling probabilities for training set_sampling_probs(data, params) # language model training for _ in range(params.max_epoch): logger.info("============ Starting epoch %i ... ============" % trainer.epoch) trainer.n_sentences = 0 while trainer.n_sentences < trainer.epoch_size: # CLM steps for lang1, lang2 in shuf_order(params.clm_steps, params): trainer.clm_step(lang1, lang2, params.lambda_clm) # MLM steps (also includes TLM if lang2 is not None) for lang1, lang2 in shuf_order(params.mlm_steps, params): trainer.mlm_step(lang1, lang2, params.lambda_mlm) # parallel classification steps for lang1, lang2 in shuf_order(params.pc_steps, params): trainer.pc_step(lang1, lang2, params.lambda_pc) # denoising auto-encoder steps for lang in shuf_order(params.ae_steps): trainer.mt_step(lang, lang, params.lambda_ae) # machine translation steps for lang1, lang2 in shuf_order(params.mt_steps, params): trainer.mt_step(lang1, lang2, params.lambda_mt) # back-translation steps for lang1, lang2, lang3 in shuf_order(params.bt_steps): trainer.bt_step(lang1, lang2, lang3, params.lambda_bt) trainer.iter() logger.info("============ End of epoch %i ============" % trainer.epoch) # evaluate perplexity scores = evaluator.run_all_evals(trainer) # print / JSON log for k, v in scores.items(): logger.info("%s -> %.6f" % (k, v)) if params.is_master: logger.info("__log__:%s" % json.dumps(scores)) # end of epoch trainer.save_best_model(scores) trainer.save_periodic() trainer.end_epoch(scores)