def main(): start = time.time() parser = args.parse_args() # run some checks on arguments check_args(parser) # format logging log_name = os.path.join( parser.run_log, '{}_run_log_{}.log'.format(parser.experiment, dt.now().strftime("%Y%m%d_%H%M"))) log.basicConfig(filename=log_name, format='%(asctime)s | %(name)s -- %(message)s', level=log.INFO) os.chmod(log_name, parser.access_mode) # set device to CPU if available device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("Starting experiment {} VN -> EN NMT on {}.".format( parser.experiment, device)) log.info("Starting experiment {} VN -> EN NMT on {}.".format( parser.experiment, device)) # set seed for replication random.seed(parser.seed) np.random.seed(parser.seed) torch.manual_seed(parser.seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(parser.seed) log.info("For reproducibility, the seed is set to {}.".format(parser.seed)) # set file paths source_name = parser.source_name target_name = parser.target_name # get saved models dir base_saved_models_dir = parser.save_dir saved_models_dir = os.path.join(base_saved_models_dir, source_name + '2' + target_name) plots_dir = parser.plots_dir log.info("We will save the models in this directory: {}".format( saved_models_dir)) log.info("We will save the plots in this directory: {}".format(plots_dir)) # get data dir main_data_path = parser.data_dir path_to_train_data = { 'source': main_data_path + 'train.tok.' + source_name, 'target': main_data_path + 'train.tok.' + target_name } path_to_dev_data = { 'source': main_data_path + 'dev.tok.' + source_name, 'target': main_data_path + 'dev.tok.' + target_name } path_to_test_data = { 'source': main_data_path + 'test.tok.' + source_name, 'target': main_data_path + 'test.tok.' + target_name } # Configuration bs = parser.batch_size log.info("Batch size = {}.".format(bs)) enc_emb = parser.enc_emb enc_hidden = parser.enc_hidden enc_layers = parser.enc_layers rnn_type = parser.rnn_type dec_emb = parser.dec_emb dec_hidden = parser.dec_hidden dec_layers = parser.dec_layers learning_rate = parser.learning_rate num_epochs = parser.epochs attn_flag = parser.wo_attn log.info("The attention flag is set to {}.".format(attn_flag)) beam_size = parser.beam_size log.info("We evaluate using beam size of {}.".format(beam_size)) train, val, test, en_lang, vi_lang = dataset_helper.train_val_load( "", main_data_path) # get vocab sizes log.info('English has vocab size of: {} words.'.format(en_lang.n_words)) log.info('Vietnamese has vocab size of: {} words.'.format(vi_lang.n_words)) # get max sentence length by 95% percentile MAX_LEN = int(train['en_len'].quantile(0.95)) log.info( 'We will have a max sentence length of {} (95 percentile).'.format( MAX_LEN)) # set data loaders bs_dict = {'train': bs, 'validate': 1, 'test': 1} shuffle_dict = {'train': True, 'validate': False, 'test': False} train_used = train val_used = val collate_fn_dict = { 'train': partial(dataset_helper.vocab_collate_func, MAX_LEN=MAX_LEN), 'validate': dataset_helper.vocab_collate_func_val, 'test': dataset_helper.vocab_collate_func_val } transformed_dataset = { 'train': dataset_helper.Vietnamese(train_used), 'validate': dataset_helper.Vietnamese(val_used, val=True), 'test': dataset_helper.Vietnamese(test, val=True) } dataloader = { x: DataLoader(transformed_dataset[x], batch_size=bs_dict[x], collate_fn=collate_fn_dict[x], shuffle=shuffle_dict[x], num_workers=0) for x in ['train', 'validate', 'test'] } # instantiate encoder/decoder encoder_wo_att = nnet_models.EncoderRNN(input_size=vi_lang.n_words, embed_dim=enc_emb, hidden_size=enc_hidden, n_layers=enc_layers, rnn_type=rnn_type).to(device) decoder_wo_att = nnet_models.AttentionDecoderRNN( output_size=en_lang.n_words, embed_dim=dec_emb, hidden_size=dec_hidden, n_layers=dec_layers, attention=attn_flag).to(device) # instantiate optimizer if parser.optimizer == 'sgd': encoder_optimizer = optim.SGD(encoder_wo_att.parameters(), lr=learning_rate, nesterov=True, momentum=0.99) decoder_optimizer = optim.SGD(decoder_wo_att.parameters(), lr=learning_rate, nesterov=True, momentum=0.99) elif parser.optimizer == 'adam': # lee kho learning rate encoder_optimizer = optim.Adam(encoder_wo_att.parameters(), lr=1e-4) decoder_optimizer = optim.Adam(decoder_wo_att.parameters(), lr=1e-4) else: raise ValueError('Invalid optimizer!') # instantiate scheduler enc_scheduler = ReduceLROnPlateau(encoder_optimizer, min_lr=1e-4, factor=0.5, patience=0) dec_scheduler = ReduceLROnPlateau(decoder_optimizer, min_lr=1e-4, factor=0.5, patience=0) criterion = nn.NLLLoss(ignore_index=global_variables.PAD_IDX) log.info( "Seq2Seq Model with the following parameters: batch_size = {}, learning_rate = {}, rnn_type = {}, enc_emb = {}, enc_hidden = {}, enc_layers = {}, dec_emb = {}, dec_hidden = {}, dec_layers = {}, num_epochs = {}, source_name = {}, target_name = {}" .format(bs, learning_rate, rnn_type, enc_emb, enc_hidden, enc_layers, dec_emb, dec_hidden, dec_layers, num_epochs, source_name, target_name)) # do we want to train again? train_again = False encoder_save = '{}_wo_att_{}bs_{}hs_{}_{}beam_enc_{}_layer'.format( rnn_type, bs, enc_hidden, parser.optimizer, beam_size, enc_layers) decoder_save = '{}_wo_att_{}bs_{}hs_{}_{}beam_dec_{}_layer'.format( rnn_type, bs, enc_hidden, parser.optimizer, beam_size, dec_layers) if os.path.exists(utils.get_full_filepath( saved_models_dir, encoder_save)) and os.path.exists( utils.get_full_filepath(saved_models_dir, decoder_save)) and (not train_again): log.info("Retrieving saved encoder from {}".format( utils.get_full_filepath(saved_models_dir, encoder_save))) log.info("Retrieving saved decoder from {}".format( utils.get_full_filepath(saved_models_dir, decoder_save))) encoder_wo_att.load_state_dict( torch.load(utils.get_full_filepath(saved_models_dir, encoder_save))) decoder_wo_att.load_state_dict( torch.load(utils.get_full_filepath(saved_models_dir, decoder_save))) else: log.info("Check if encoder path exists: {}".format( utils.get_full_filepath(saved_models_dir, encoder_save))) log.info("Check if decoder path exists: {}".format( utils.get_full_filepath(saved_models_dir, decoder_save))) log.info("Encoder and Decoder do not exist! Starting to train...") encoder_wo_att, decoder_wo_att, loss_hist, acc_hist = train_utilities.train_model( encoder_optimizer, decoder_optimizer, encoder_wo_att, decoder_wo_att, criterion, "no_attention", dataloader, en_lang, vi_lang, saved_models_dir, encoder_save, decoder_save, num_epochs=num_epochs, rm=0.95, enc_scheduler=enc_scheduler, dec_scheduler=dec_scheduler) log.info("Total time is: {} min : {} s".format( (time.time() - start) // 60, (time.time() - start) % 60)) log.info( "We will save the encoder/decoder in this directory: {}".format( saved_models_dir)) # BLEU with beam size bleu_no_unk, att_score_wo, pred_wo, src_wo = train_utilities.validation_beam_search( encoder_wo_att, decoder_wo_att, dataloader['validate'], en_lang, vi_lang, 'no_attention', beam_size, verbose=False) log.info("Bleu-{} Score (No UNK): {}".format(beam_size, bleu_no_unk)) print("Bleu-{} Score (No UNK): {}".format(beam_size, bleu_no_unk)) bleu_unk, att_score_wo, pred_wo, src_wo = train_utilities.validation_beam_search( encoder_wo_att, decoder_wo_att, dataloader['validate'], en_lang, vi_lang, 'no_attention', beam_size, verbose=False, replace_unk=True) log.info("Bleu-{} Score (UNK): {}".format(beam_size, bleu_unk)) print("Bleu-{} Score (UNK): {}".format(beam_size, bleu_unk)) # generate 5 random predictions indexes = range(len(pred_wo)) for i in np.random.choice(indexes, 5): print('Source: {} \nPrediction: {}\n---'.format(src_wo[i], pred_wo[i])) log.info('Source: {} \nPrediction: {}\n---'.format( src_wo[i], pred_wo[i])) log.info("Exported Binned Bleu Score Plot to {}!".format(plots_dir)) _, _, fig = utils.get_binned_bl_score( encoder=encoder_wo_att, decoder=decoder_wo_att, val_dataset=transformed_dataset['validate'], attn_flag=attn_flag, beam_size=beam_size, location=plots_dir, collate=collate_fn_dict['validate'], lang_en=en_lang, lang_vi=vi_lang)
def main(): start = time.time() parser = args.parse_args() # run some checks on arguments check_args(parser) # format logging log_name = os.path.join( parser.run_log, '{}_run_log_{}.log'.format(parser.experiment, dt.now().strftime("%Y%m%d_%H%M"))) log.basicConfig(filename=log_name, format='%(asctime)s | %(name)s -- %(message)s', level=log.INFO) os.chmod(log_name, parser.access_mode) # set devise to CPU if available device = torch.device("cuda" if torch.cuda.is_available() else "cpu") log.info("Starting experiment {} VN -> EN NMT on {}.".format( parser.experiment, device)) # set seed for replication random.seed(parser.seed) np.random.seed(parser.seed) torch.manual_seed(parser.seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(parser.seed) log.info("For reproducibility, the seed is set to {}.".format(parser.seed)) # set file paths source_name = parser.source_name target_name = parser.target_name # get saved models dir base_saved_models_dir = parser.save_dir saved_models_dir = os.path.join(base_saved_models_dir, source_name + '2' + target_name) plots_dir = parser.plots_dir log.info("We will save the models in this directory: {}".format( saved_models_dir)) log.info("We will save the plots in this directory: {}".format(plots_dir)) # get data dir main_data_path = parser.data_dir path_to_train_data = { 'source': main_data_path + 'train.' + source_name, 'target': main_data_path + 'train.' + target_name } path_to_dev_data = { 'source': main_data_path + 'dev.' + source_name, 'target': main_data_path + 'dev.' + target_name } # get language objects saved_language_model_dir = os.path.join(saved_models_dir, 'lang_obj') # get dictionary of datasets dataset_dict = { 'train': nmt_dataset.LanguagePair(source_name=source_name, target_name=target_name, filepath=path_to_train_data, lang_obj_path=saved_language_model_dir, minimum_count=1), 'dev': nmt_dataset.LanguagePair(source_name=source_name, target_name=target_name, filepath=path_to_dev_data, lang_obj_path=saved_language_model_dir, minimum_count=1) } # get max sentence length by 99% percentile MAX_LEN = int(dataset_dict['train'].main_df['source_len'].quantile(0.9999)) log.info("MAX_LEN (99th Percentile) = {}".format(MAX_LEN)) batchSize = parser.batch_size log.info("Batch size = {}.".format(batchSize)) dataloader_dict = { 'train': DataLoader(dataset_dict['train'], batch_size=batchSize, collate_fn=partial(nmt_dataset.vocab_collate_func, MAX_LEN=MAX_LEN), shuffle=True, num_workers=0), 'dev': DataLoader(dataset_dict['dev'], batch_size=batchSize, collate_fn=partial(nmt_dataset.vocab_collate_func, MAX_LEN=MAX_LEN), shuffle=True, num_workers=0) } # Configuration source_lang_obj = dataset_dict['train'].source_lang_obj target_lang_obj = dataset_dict['train'].target_lang_obj source_vocab = dataset_dict['train'].source_lang_obj.n_words target_vocab = dataset_dict['train'].target_lang_obj.n_words hidden_size = parser.hidden_size rnn_layers = parser.rnn_layers lr = parser.learning_rate longest_label = parser.longest_label gradient_clip = parser.gradient_clip num_epochs = parser.epochs log.info( "The source vocab ({}) has {} words and target vocab ({}) has {} words" .format(source_name, source_vocab, target_name, target_vocab)) # encoder model encoder_rnn = nnet_models_new.EncoderRNN(input_size=source_vocab, hidden_size=hidden_size, numlayers=rnn_layers) # decoder model decoder_rnn = nnet_models_new.DecoderRNN(output_size=target_vocab, hidden_size=hidden_size, numlayers=rnn_layers) # seq2seq model nmt_rnn = nnet_models_new.seq2seq( encoder_rnn, decoder_rnn, lr=lr, hiddensize=hidden_size, numlayers=hidden_size, target_lang=dataset_dict['train'].target_lang_obj, longest_label=longest_label, clip=gradient_clip, device=device) log.info( "Seq2Seq Model with the following parameters: batch_size = {}, learning_rate = {}, hidden_size = {}, rnn_layers = {}, lr = {}, longest_label = {}, gradient_clip = {}, num_epochs = {}, source_name = {}, target_name = {}" .format(batchSize, lr, hidden_size, rnn_layers, lr, longest_label, gradient_clip, num_epochs, source_name, target_name)) # do we want to train again? train_again = False saved_file_name = 'no_attn_bs{}_lr{}_hs_{}_rnnlayer{}'.format( batchSize, lr, hidden_size, rnn_layers) # check if there is a saved model and if we want to train again if os.path.exists(utils.get_full_filepath(saved_models_dir, 'rnn')) and (not train_again): log.info("Retrieving saved model from {}".format( utils.get_full_filepath(saved_models_dir, 'rnn'))) nmt_rnn = torch.load(utils.get_full_filepath(saved_models_dir, 'rnn'), map_location=global_variables.device) # train model again else: log.info("Check if this path exists: {}".format( utils.get_full_filepath(saved_models_dir, saved_file_name))) log.info("It does not exist! Starting to train...") utils.train_model(dataloader_dict, nmt_rnn, num_epochs=num_epochs, saved_model_path=saved_models_dir, enc_type=saved_file_name) log.info("Total time is: {} min : {} s".format((time.time() - start) // 60, (time.time() - start) % 60)) log.info("We will save the models in this directory: {}".format( saved_models_dir)) # generate translations use_cuda = True utils.get_translation(nmt_rnn, 'I love to watch science movies on Mondays', source_lang_obj, use_cuda, source_name, target_name) utils.get_translation(nmt_rnn, 'I want to be the best friend that I can be', source_lang_obj, use_cuda, source_name, target_name) utils.get_translation(nmt_rnn, 'I love you', source_lang_obj, use_cuda, source_name, target_name) utils.get_translation( nmt_rnn, 'I love football, I like to watch it with my friends. It is always a great time.', source_lang_obj, use_cuda, source_name, target_name) utils.get_translation( nmt_rnn, 'I do not know what I would do without pizza, it is very tasty to eat. If I could have any food in the world it would probably be pizza.', source_lang_obj, use_cuda, source_name, target_name) utils.get_translation( nmt_rnn, 'Trump is the worst president in all of history. He can be a real racist and say very nasty things to people of color.', source_lang_obj, use_cuda, source_name, target_name) utils.get_translation(nmt_rnn, 'Thank you very much.', source_lang_obj, use_cuda, source_name, target_name) utils.get_translation(nmt_rnn, 'Think about your own choices.', source_lang_obj, use_cuda, source_name, target_name) utils.get_translation( nmt_rnn, 'I recently did a survey with over 2,000 Americans , and the average number of choices that the typical American reports making is about 70 in a typical day .', source_lang_obj, use_cuda, source_name, target_name) # export plot log.info("Exported Binned Bleu Score Plot to {}!".format(plots_dir)) _, _, fig = utils.get_binned_bl_score(nmt_rnn, dataset_dict['dev'], plots_dir, batchSize=batchSize)
def main(): # parse arguments parser = args.parse_args() # set up logger log_path = os.path.join(parser.save_dir, "logs") if not os.path.exists(log_path): os.mkdir(log_path) log_fname = os.path.join( log_path, "{}_log_{}.log".format(parser.exp_name, dt.now().strftime("%Y%m%d_%H%M"))) log.basicConfig(filename=log_fname, format='%(asctime)s: %(name)s || %(message)s', level=log.INFO) # ============================================================================= # start # ============================================================================= log.info("=" * 40 + " Start Program " + "=" * 40) # ============================================================================= # misc stuff # ============================================================================= # Set devise to CPU if available device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") log.info("Device is {}".format(device)) # set random seeds random.seed(parser.seed) np.random.seed(parser.seed) torch.manual_seed(parser.seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(parser.seed) # set data directory log.info("Data Directory is {}.".format(parser.data_dir)) # ============================================================================= # import data # ============================================================================= task_names = parser.data_name.split(',') content_headers = parser.content.split(',') tokenizer = transformers.AutoTokenizer.from_pretrained( parser.model, do_lower_case=parser.do_lower_case) data_handler = myio.IO( data_dir=parser.data_dir, model_name=parser.model, task_names=task_names, tokenizer=tokenizer, max_length=parser.input_length, content=content_headers, review_key=parser.review_key, label_name=parser.label_name, val_split=parser.val_split, test_split=parser.test_split, batch_size=parser.batch_size, shuffle=not parser.no_shuffle, cache=not parser.no_cache, ) data_handler.read_task() # ============================================================================= # define model # ============================================================================= log.info("=" * 40 + " Defining Model " + "=" * 40) config = transformers.AutoConfig.from_pretrained(parser.model) classifier = model.Model( model=parser.model, config=config, n_others=parser.n_others, n_hidden=parser.n_class_hidden, n_flag=parser.n_labels, load=parser.preload_emb, load_name=parser.preload_emb_name, ) # ============================================================================= # define trainer # ============================================================================= log.info("Save Directory is {}.".format(parser.save_dir)) log.info("=" * 40 + " Defining Trainer " + "=" * 40) # create trainer object trainer = learner.Learner( model=classifier, device=device, myio=data_handler, max_epochs=parser.max_epochs, save_path=parser.save_dir, lr=parser.lr, weight_decay=parser.weight_decay, pct_start=parser.pct_start, anneal_strategy=parser.anneal_strategy, cycle_momentum=parser.cycle_momentum, log_int=parser.log_int, buffer_break=not parser.no_early_stop, break_int=parser.patience, accumulate_int=parser.grad_accum, max_grad_norm=parser.max_grad_norm, n_others=parser.n_others, batch_size=parser.batch_size, check_int=parser.check_int, save=parser.save, test=parser.test, ) # train model best = trainer.learn( model_name=parser.model, task_name=task_names[0], early_check=parser.early_check, debug=parser.debug, ) best['experiment'] = parser.exp_name #write results to "results.jsonl" if not os.path.exists(parser.save_dir): os.mkdir(parser.save_dir) results_name = os.path.join(parser.save_dir, "results.jsonl") with open(results_name, 'a') as f: f.write(json.dumps(best) + "\n") log.info("=" * 40 + " Program Complete " + "=" * 40) log.info("=" * 40 + " Results written to {} ".format(results_name) + "=" * 40)
def main(): """ Main method for meta-learning """ start = time.time() repository = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) parser = args.parse_args() # run some checks on arguments check_args(parser) # format logging log_name = os.path.join( parser.run_log, '{}_meta_run_log_{}.log'.format(parser.experiment, dt.now().strftime("%Y%m%d_%H%M"))) log.basicConfig(filename=log_name, format='%(asctime)s | %(name)s -- %(message)s', level=log.INFO) os.chmod(log_name, parser.access_mode) # set devise to CPU if available device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") log.info("Device is {}".format(device)) # set seed for replication random.seed(parser.seed) np.random.seed(parser.seed) torch.manual_seed(parser.seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(parser.seed) log.info("Starting experiment {} meta learning on {} with model {}".format( parser.experiment, device, parser.model)) # set tokenizer and config from Huggingface tokenizer = transformers.AutoTokenizer.from_pretrained( parser.model, do_lower_case=parser.do_lower_case) config = transformers.AutoConfig.from_pretrained(parser.model) # create IO object and import data cache_head = os.path.join(parser.save_dir, 'cached_data') cache_dir = os.path.join(cache_head, parser.model) if not os.path.exists(cache_head): os.mkdir(cache_head) if not os.path.exists(cache_dir): os.mkdir(cache_dir) data_handler = myio.IO(parser.data_dir, cache_dir, tokenizer, parser.max_seq_length, parser.doc_stride, parser.max_query_length, batch_size=parser.batch_size, shuffle=True, cache=True) # set oml oml = meta_learner.MetaLearningClassification( update_lr=parser.meta_update_lr, meta_lr=parser.meta_meta_lr, hf_model_name=parser.model, config=config, myio=data_handler, max_grad_norm=parser.max_grad_norm, device=device) if isinstance(oml.net, nn.DataParallel): rln = oml.net.module.model.bert else: rln = oml.net.model.bert old_weights = copy.deepcopy(rln) # freeze_layers oml.freeze_rln() # do meta_learning meta_tasks = parser.meta_tasks.split(',') # create save path meta_RLN_head = os.path.join(parser.save_dir, "meta_weights") if not os.path.exists(meta_RLN_head): os.mkdir(meta_RLN_head) meta_RLN_weights = os.path.join(meta_RLN_head, parser.experiment + "_meta_weights.pt") meta_steps = trange(0, parser.meta_steps, desc='Meta Outer', mininterval=30) running_loss = 0 for step in meta_steps: # sample tasks sample_tasks = np.random.choice(meta_tasks, parser.n_meta_tasks, replace=False) # sample trajectory d_traj = [] d_rand = [] for task in sample_tasks: task_traj, task_rand = data_handler.sample_dl( task=task, samples=parser.n_meta_task_samples, use='train') d_traj += task_traj d_rand += task_rand loss = oml(d_traj, d_rand) running_loss += loss if step % parser.verbose_steps == 0: log.info( f"OML Loss is {loss} | Step {step} | Average is {running_loss/max(1,step)}" ) # check if rln weights are changing changed = False if isinstance(oml.net, nn.DataParallel): rln = oml.net.module.model.bert else: rln = oml.net.model.bert for old, new in zip(old_weights.parameters(), rln.parameters()): if not old.equal(new): changed = True break assert changed, "Weights are the same" # save every meta step # for multi-GPU if isinstance(oml.net, nn.DataParallel): weights = oml.net.module.model.bert.state_dict() else: weights = oml.net.model.bert.state_dict() torch.save(weights, meta_RLN_weights) log.info( f"Meta loss is {loss} | Step {step} | Average is {running_loss/(step+1)}" ) log.info(f"Changed weights: {changed}") log.info("Saved meta weights at {}".format(meta_RLN_weights)) log.info("Total time is: {} min : {} s".format((time.time() - start) // 60, (time.time() - start) % 60))
from args import args args = args.parse_args() PERCEPTION_RATINGS = { 0: 'To Be Decided', 1: 'Major Star', 2: 'Star', 3: 'Well Known', 4: 'Recognisable', 5: 'Unimportant'} def get_momentum_text(rating): if rating > 500: return 'White Hot' elif rating > 400: return 'Red Hot' elif rating > 300: return 'Very Hot' elif rating > 200: return 'Hot' elif rating > 100: return 'Very Warm' elif rating > 0: return 'Warm' elif rating == 0: return 'Neutral' elif rating < -300: return 'Very Cold'
def main(): start = time.time() parser = args.parse_args() # run some checks on arguments check_args(parser) # format logging log_name = os.path.join( parser.run_log, '{}_run_log_{}.log'.format(parser.experiment, dt.now().strftime("%Y%m%d_%H%M"))) log.basicConfig(filename=log_name, format='%(asctime)s | %(name)s -- %(message)s', level=log.INFO) os.chmod(log_name, parser.access_mode) # set devise to CPU if available device = torch.device("cuda" if torch.cuda.is_available() else "cpu") log.info("Starting experiment {} VN -> EN NMT on {}.".format( parser.experiment, device)) # set seed for replication random.seed(parser.seed) np.random.seed(parser.seed) torch.manual_seed(parser.seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(parser.seed) log.info("For reproducibility, the seed is set to {}.".format(parser.seed)) # set file paths source_name = parser.source_name target_name = parser.target_name # get saved models dir base_saved_models_dir = parser.save_dir saved_models_dir = os.path.join(base_saved_models_dir, source_name + '2' + target_name) log.info("We will save the models in this directory: {}".format( saved_models_dir)) # get data dir main_data_path = parser.data_dir path_to_train_data = { 'source': main_data_path + 'train.' + source_name, 'target': main_data_path + 'train.' + target_name } path_to_dev_data = { 'source': main_data_path + 'dev.' + source_name, 'target': main_data_path + 'dev.' + target_name } # get language objects saved_language_model_dir = os.path.join(saved_models_dir, 'lang_obj') # get dictionary of datasets dataset_dict = { 'train': nmt_dataset.LanguagePair(source_name=source_name, target_name=target_name, filepath=path_to_train_data, lang_obj_path=saved_language_model_dir, minimum_count=1), 'dev': nmt_dataset.LanguagePair(source_name=source_name, target_name=target_name, filepath=path_to_dev_data, lang_obj_path=saved_language_model_dir, minimum_count=1) } # get max sentence length by 99% percentile MAX_LEN = int(dataset_dict['train'].main_df['source_len'].quantile(0.9999)) batchSize = parser.batch_size log.info("Batch size = {}.".format(batchSize)) dataloader_dict = { 'train': DataLoader(dataset_dict['train'], batch_size=batchSize, collate_fn=partial(nmt_dataset.vocab_collate_func, MAX_LEN=MAX_LEN), shuffle=True, num_workers=0), 'dev': DataLoader(dataset_dict['dev'], batch_size=batchSize, collate_fn=partial(nmt_dataset.vocab_collate_func, MAX_LEN=MAX_LEN), shuffle=True, num_workers=0) } # Configuration source_lang_obj = dataset_dict['train'].source_lang_obj target_lang_obj = dataset_dict['train'].target_lang_obj source_vocab = dataset_dict['train'].source_lang_obj.n_words target_vocab = dataset_dict['train'].target_lang_obj.n_words hidden_size = parser.hidden_size rnn_layers = parser.rnn_layers lr = parser.learning_rate longest_label = parser.longest_label gradient_clip = parser.gradient_clip num_epochs = parser.epochs encoder_attention = parser.encoder_attention self_attention = parser.self_attention log.info("encoder_attention = {}, self_attention = {}".format( encoder_attention, self_attention)) # encoder model encoder_transformer = nnet_models_new.EncoderTransformer( source_vocab, MAX_LEN, hidden_size, rnn_layers) # decoder model decoder_encoderattn = nnet_models_new.Decoder_SelfAttn( output_size=target_vocab, hidden_size=hidden_size, encoder_attention=encoder_attention, self_attention=self_attention) # seq2seq model nmt_encoderattn = nnet_models_new.seq2seq( encoder_transformer, decoder_encoderattn, lr=lr, hiddensize=hidden_size, numlayers=hidden_size, target_lang=dataset_dict['train'].target_lang_obj, longest_label=longest_label, clip=gradient_clip, device=device) log.info( "Seq2Seq Model with the following parameters: encoder_attention = {}, self_attention = {}, batch_size = {}, learning_rate = {}, hidden_size = {}, rnn_layers = {}, lr = {}, longest_label = {}, gradient_clip = {}, num_epochs = {}, source_name = {}, target_name = {}" .format(encoder_attention, self_attention, batchSize, lr, hidden_size, rnn_layers, lr, longest_label, gradient_clip, num_epochs, source_name, target_name)) # do we want to train again? train_again = False modelname = 'encoderattn' # check if there is a saved model and if we want to train again if os.path.exists(utils.get_full_filepath( saved_models_dir, modelname)) and (not train_again): log.info("Retrieving saved model from {}".format( utils.get_full_filepath(saved_models_dir, modelname))) nmt_rnn = torch.load( utils.get_full_filepath(saved_models_dir, modelname)) # train model again else: log.info("Check if this path exists: {}".format( utils.get_full_filepath(saved_models_dir, modelname))) log.info("It does not exist! Starting to train...") utils.train_model(dataloader_dict, nmt_encoderattn, num_epochs=num_epochs, saved_model_path=saved_models_dir, enc_type='encoderattn_test') log.info("Total time is: {} min : {} s".format((time.time() - start) // 60, (time.time() - start) % 60))
def main(): """ Main method for experiment """ start = time.time() repository = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) parser = args.parse_args() # run some checks on arguments check_args(parser) # format logging log_name = os.path.join( parser.run_log, '{}_run_log_{}.log'.format(parser.experiment, dt.now().strftime("%Y%m%d_%H%M"))) log.basicConfig(filename=log_name, format='%(asctime)s | %(name)s -- %(message)s', level=log.INFO) os.chmod(log_name, parser.access_mode) # set devise to CPU if available device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") log.info("Device is {}".format(device)) # set seed for replication random.seed(parser.seed) np.random.seed(parser.seed) torch.manual_seed(parser.seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(parser.seed) log.info("Starting experiment {} on {} with model {}".format( parser.experiment, device, parser.model)) print("{}".format(parser.experiment)) # set tokenizer and config from Huggingface tokenizer = transformers.AutoTokenizer.from_pretrained( parser.model, do_lower_case=parser.do_lower_case) config = transformers.AutoConfig.from_pretrained(parser.model) # create IO object and import data cache_head = os.path.join(parser.save_dir, 'cached_data') cache_dir = os.path.join(cache_head, parser.model) if not os.path.exists(cache_head): os.mkdir(cache_head) if not os.path.exists(cache_dir): os.mkdir(cache_dir) data_handler = myio.IO(parser.data_dir, cache_dir, tokenizer, parser.max_seq_length, parser.doc_stride, parser.max_query_length, batch_size=parser.batch_size, shuffle=True, cache=True) # ============================================================================= # BASELINE # ============================================================================= # parse continual learning curriculum parser.continual_curriculum = parser.continual_curriculum.split(',') # create BERT model BERTmodel = model.QAModel( parser.model, config, load_rln=parser.load_rln, rln_weights=parser.rln_weights, ) # create learner object for BERT model trainer = learner.Learner( parser.access_mode, parser.fp16, parser.fp16_opt_level, BERTmodel, parser.model, device, data_handler, parser.save_dir, parser.n_best_size, parser.max_answer_length, parser.do_lower_case, parser.verbose_logging, parser.version_2_with_negative, parser.null_score_diff_threshold, max_steps=parser.fine_tune_steps, log_int=parser.logging_steps, best_int=parser.save_steps, verbose_int=parser.verbose_steps, max_grad_norm=parser.max_grad_norm, optimizer=None, weight_decay=parser.weight_decay, lr=parser.learning_rate, eps=parser.adam_epsilon, warmup_steps=parser.warmup_steps, freeze_embeddings=parser.freeze_embeddings, ) # create continual learning object and perform continual learning c_learner = cont_learning.ContLearner( parser.model, 'BERT', trainer, curriculum=parser.continual_curriculum, fine_tune_prev=not parser.no_prev_fine_tune) log.info("Starting Continual Learning") if not parser.no_cont_learning: c_learner.c_learn(rln_only=parser.carry_rln_only) if len(parser.continual_curriculum) > 1 and not parser.no_forget_eval: c_learner.evaluate_forgetting(rln_only=parser.carry_rln_only) log.info("Generating Plot") # generate BERT plot now = dt.now().strftime("%Y%m%d_%H%M") # create results folders if not generated plot_dir = os.path.join(parser.save_dir, "plots") json_dir = os.path.join(parser.save_dir, "json_results") if not os.path.exists(plot_dir): os.mkdir(plot_dir) if not os.path.exists(json_dir): os.mkdir(json_dir) # plot results and save plot = analyze.plot_learning(c_learner.scores, x_tick_int=2 * parser.logging_steps, iterations=parser.fine_tune_steps) plot_name = os.path.join( plot_dir, "baseline_{}_{}_{}.png".format(parser.experiment, parser.model, now)) plot.savefig(plot_name) os.chmod(plot_name, parser.access_mode) log.info("Plot saved at: {}".format(plot_name)) # write data to json baseline_results_name = os.path.join( json_dir, "baseline_{}_{}_{}.json".format(parser.experiment, parser.model, now)) with open(baseline_results_name, 'w') as fw: json.dump(c_learner.scores, fw) os.chmod(baseline_results_name, parser.access_mode) log.info( "Baseline results written to: {}".format(baseline_results_name)) log.info("Total time is: {}min : {}s".format((time.time() - start) // 60, (time.time() - start) % 60))
def main(): """ Main method for experiment """ start = time.time() repository = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) parser = args.parse_args() if parser.run_log == 'log': parser.run_log = os.path.join(parser.save_dir, 'log') if not os.path.exists(parser.run_log): os.mkdir(parser.run_log) # run some checks on arguments check_args(parser) # format logging log_name = os.path.join( parser.run_log, '{}_run_log_{}.log'.format(parser.experiment, dt.now().strftime("%Y%m%d_%H%M"))) log.basicConfig(filename=log_name, format='%(asctime)s | %(name)s -- %(message)s', level=log.INFO) os.chmod(log_name, parser.access_mode) # set devise to CPU if available device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") log.info("Device is {}".format(device)) # set seed for replication random.seed(parser.seed) np.random.seed(parser.seed) torch.manual_seed(parser.seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(parser.seed) log.info("Starting experiment {} on {}".format(parser.experiment, device)) data_handler = myio.IO( data_dir=parser.data_dir, # directory storing data batch_size=parser.batch_size, # batch size shuffle=not parser.no_shuffle, # whether to shuffle training data split=parser.val_split, # percentage of data for validation ) # TODO: # create model my_model = model.Model( road_lambda=parser.road_lambda, # relative weight of road map loss box_lambda=parser.box_lambda, # relative weight of bounding box loss preload_backbone=parser.preload, # whether to load pretrained weights backbone_weights=parser. preload_weights, # pretrained backbone weights if needed ) # create learner trainer = learner.Learner( access_mode=parser.access_mode, # os access mode for created files experiment_name=parser.experiment, # name of experiment model=my_model, # model device=device, # device to run experiment myio=data_handler, # myio.IO object for loading data save_dir=parser.save_dir, # directory to save results max_steps=parser.training_steps, # maximum number of update steps best_int=parser.save_steps, # interval for checking weights verbose_int=parser.verbose_steps, # interval for logging information max_grad_norm=parser. max_grad_norm, # maximum gradients to avoid exploding gradients optimizer=None, # optimizer for training weight_decay=parser.weight_decay, # weight decay if using lr=parser.learning_rate, # learning rate eps=parser.adam_epsilon, # epsilon to use for adam accumulate_int=parser. accumulate_int, # number of steps to accumulate gradients before stepping batch_size=parser.batch_size, # batch size warmup_pct=parser. pct_start, # percent of updates used to warm-up learning rate save=not parser.no_save, # whether to save weights patience=parser. patience, # number of checks without improvement before early stop ) # train model results = trainer.train(labeled=not parser.no_label, debug=parser.debug) results["experiment"] = parser.experiment # write results to "results.jsonl" results_name = os.path.join(parser.save_dir, "results.jsonl") with open(results_name, 'a') as f: f.write(json.dumps(results) + "\n") os.chmod(results_name, parser.access_mode) log.info("Results written to: {}".format(results_name)) log.info("Total time is: {} min : {} sec".format( (time.time() - start) // 60, (time.time() - start) % 60))