def score_converter_v1(tokenized_file, sent_list, sent_retri_top_k, sent_retri_scal_prob): sent_select_results_list_1 = simi_sampler.threshold_sampler_insure_unique( tokenized_file, sent_list, sent_retri_scal_prob, top_n=sent_retri_top_k) return sent_select_results_list_1
def train_fever_std_ema_v1(resume_model=None, wn_feature=False): """ This method is the new training script for train fever with span and probability score. :param resume_model: :param wn_feature: :return: """ num_epoch = 200 seed = 12 batch_size = 32 lazy = True dev_prob_threshold = 0.1 train_prob_threshold = 0.1 train_sample_top_k = 8 experiment_name = f"nsmn_sent_wise_std_ema_lr1|t_prob:{train_prob_threshold}|top_k:{train_sample_top_k}" # resume_model = None print("Do EMA:") print("Dev prob threshold:", dev_prob_threshold) print("Train prob threshold:", train_prob_threshold) print("Train sample top k:", train_sample_top_k) dev_upstream_sent_list = common.load_jsonl( config.RESULT_PATH / "sent_retri_nn/balanced_sentence_selection_results/dev_sent_pred_scores.jsonl" ) train_upstream_sent_list = common.load_jsonl( config.RESULT_PATH / "sent_retri_nn/balanced_sentence_selection_results/train_sent_scores.jsonl" ) # Prepare Data token_indexers = { 'tokens': SingleIdTokenIndexer(namespace='tokens'), # This is the raw tokens 'elmo_chars': ELMoTokenCharactersIndexer( namespace='elmo_characters') # This is the elmo_characters } print("Building Prob Dicts...") train_sent_list = common.load_jsonl( config.RESULT_PATH / "sent_retri_nn/balanced_sentence_selection_results/train_sent_scores.jsonl" ) dev_sent_list = common.load_jsonl( config.RESULT_PATH / "sent_retri_nn/balanced_sentence_selection_results/dev_sent_pred_scores.jsonl" ) selection_dict = paired_selection_score_dict(train_sent_list) selection_dict = paired_selection_score_dict(dev_sent_list, selection_dict) upstream_dev_list = threshold_sampler_insure_unique( config.T_FEVER_DEV_JSONL, dev_upstream_sent_list, prob_threshold=dev_prob_threshold, top_n=5) # Specifiy ablation to remove wordnet and number embeddings. dev_fever_data_reader = WNSIMIReader(token_indexers=token_indexers, lazy=lazy, wn_p_dict=p_dict, max_l=320, ablation=None) train_fever_data_reader = WNSIMIReader(token_indexers=token_indexers, lazy=lazy, wn_p_dict=p_dict, max_l=320, shuffle_sentences=False, ablation=None) complete_upstream_dev_data = select_sent_with_prob_for_eval( config.T_FEVER_DEV_JSONL, upstream_dev_list, selection_dict, tokenized=True) dev_instances = dev_fever_data_reader.read(complete_upstream_dev_data) # Load Vocabulary biterator = BasicIterator(batch_size=batch_size) vocab, weight_dict = load_vocab_embeddings(config.DATA_ROOT / "vocab_cache" / "nli_basic") vocab.change_token_with_index_to_namespace('hidden', -2, namespace='labels') print(vocab.get_token_to_index_vocabulary('labels')) print(vocab.get_vocab_size('tokens')) biterator.index_with(vocab) # Build Model device = torch.device("cuda" if torch.cuda.is_available() else "cpu", index=0) device_num = -1 if device.type == 'cpu' else 0 model = Model( rnn_size_in=(1024 + 300 + dev_fever_data_reader.wn_feature_size, 1024 + 450 + dev_fever_data_reader.wn_feature_size), rnn_size_out=(450, 450), weight=weight_dict['glove.840B.300d'], vocab_size=vocab.get_vocab_size('tokens'), mlp_d=900, embedding_dim=300, max_l=300, use_extra_lex_feature=False, max_span_l=100) print("Model Max length:", model.max_l) if resume_model is not None: model.load_state_dict(torch.load(resume_model)) model.display() model.to(device) cloned_empty_model = copy.deepcopy(model) ema: EMA = EMA(parameters=model.named_parameters()) # Create Log File file_path_prefix, date = save_tool.gen_file_prefix(f"{experiment_name}") # Save the source code. script_name = os.path.basename(__file__) with open(os.path.join(file_path_prefix, script_name), 'w') as out_f, open(__file__, 'r') as it: out_f.write(it.read()) out_f.flush() # Save source code end. best_dev = -1 iteration = 0 start_lr = 0.0001 optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=start_lr) criterion = nn.CrossEntropyLoss() for i_epoch in range(num_epoch): print("Resampling...") # Resampling train_data_with_candidate_sample_list = \ threshold_sampler_insure_unique(config.T_FEVER_TRAIN_JSONL, train_upstream_sent_list, train_prob_threshold, top_n=train_sample_top_k) complete_upstream_train_data = adv_simi_sample_with_prob_v1_1( config.T_FEVER_TRAIN_JSONL, train_data_with_candidate_sample_list, selection_dict, tokenized=True) print("Sample data length:", len(complete_upstream_train_data)) sampled_train_instances = train_fever_data_reader.read( complete_upstream_train_data) train_iter = biterator(sampled_train_instances, shuffle=True, num_epochs=1, cuda_device=device_num) for i, batch in tqdm(enumerate(train_iter)): model.train() out = model(batch) y = batch['label'] loss = criterion(out, y) # No decay optimizer.zero_grad() loss.backward() optimizer.step() iteration += 1 # EMA update ema(model.named_parameters()) if i_epoch < 15: mod = 10000 # mod = 10 else: mod = 2000 if iteration % mod == 0: # eval_iter = biterator(dev_instances, shuffle=False, num_epochs=1, cuda_device=device_num) # complete_upstream_dev_data = hidden_eval(model, eval_iter, complete_upstream_dev_data) # # eval_mode = {'check_sent_id_correct': True, 'standard': True} # strict_score, acc_score, pr, rec, f1 = c_scorer.fever_score(complete_upstream_dev_data, # common.load_jsonl(config.T_FEVER_DEV_JSONL), # mode=eval_mode, # verbose=False) # print("Fever Score(Strict/Acc./Precision/Recall/F1):", strict_score, acc_score, pr, rec, f1) # # print(f"Dev:{strict_score}/{acc_score}") # EMA saving eval_iter = biterator(dev_instances, shuffle=False, num_epochs=1, cuda_device=device_num) load_ema_to_model(cloned_empty_model, ema) complete_upstream_dev_data = hidden_eval( cloned_empty_model, eval_iter, complete_upstream_dev_data) eval_mode = {'check_sent_id_correct': True, 'standard': True} strict_score, acc_score, pr, rec, f1 = c_scorer.fever_score( complete_upstream_dev_data, common.load_jsonl(config.T_FEVER_DEV_JSONL), mode=eval_mode, verbose=False) print("Fever Score EMA(Strict/Acc./Precision/Recall/F1):", strict_score, acc_score, pr, rec, f1) print(f"Dev EMA:{strict_score}/{acc_score}") need_save = False if strict_score > best_dev: best_dev = strict_score need_save = True if need_save: # save_path = os.path.join( # file_path_prefix, # f'i({iteration})_epoch({i_epoch})_dev({strict_score})_lacc({acc_score})_seed({seed})' # ) # torch.save(model.state_dict(), save_path) ema_save_path = os.path.join( file_path_prefix, f'ema_i({iteration})_epoch({i_epoch})_dev({strict_score})_lacc({acc_score})_seed({seed})' ) save_ema_to_file(ema, ema_save_path)
def pipeline(in_file, eval_file=None, model_path_dict=default_model_path_dict, steps=default_steps): """ :param in_file: The raw input file. :param eval_file: Whether to provide evaluation along the line. :return: """ sentence_retri_1_scale_prob = 0.5 sentence_retri_2_scale_prob = 0.9 sent_retri_1_top_k = 5 sent_retri_2_top_k = 1 sent_prob_for_2doc = 0.1 sent_topk_for_2doc = 5 enhance_retri_1_scale_prob = -1 build_submission = True doc_retrieval_method = 'word_freq' haonan_docretri_object = HAONAN_DOCRETRI_OBJECT() if not PIPELINE_DIR.exists(): PIPELINE_DIR.mkdir() if steps['s1.tokenizing']['do']: time_stamp = utils.get_current_time_str() current_pipeline_dir = PIPELINE_DIR / f"{time_stamp}_r" else: current_pipeline_dir = steps['s1.tokenizing']['out_file'].parent print("Current Result Root:", current_pipeline_dir) if not current_pipeline_dir.exists(): current_pipeline_dir.mkdir() eval_list = common.load_jsonl(eval_file) if eval_file is not None else None in_file_stem = in_file.stem tokenized_file = current_pipeline_dir / f"t_{in_file_stem}.jsonl" # Save code into directory script_name = os.path.basename(__file__) with open(os.path.join(str(current_pipeline_dir), script_name), 'w') as out_f, open(__file__, 'r') as it: out_f.write(it.read()) out_f.flush() # Tokenizing. print("Step 1. Tokenizing.") if steps['s1.tokenizing']['do']: tokenized_claim(in_file, tokenized_file) # Auto Saved print("Tokenized file saved to:", tokenized_file) else: tokenized_file = steps['s1.tokenizing']['out_file'] print("Use preprocessed file:", tokenized_file) # Tokenizing End. # First Document retrieval. print("Step 2. First Document Retrieval") if steps['s2.1doc_retri']['do']: doc_retrieval_result_list = first_doc_retrieval( haonan_docretri_object, tokenized_file, method=doc_retrieval_method) doc_retrieval_file_1 = current_pipeline_dir / f"doc_retr_1_{in_file_stem}.jsonl" common.save_jsonl(doc_retrieval_result_list, doc_retrieval_file_1) print("First Document Retrieval file saved to:", doc_retrieval_file_1) else: doc_retrieval_file_1 = steps['s2.1doc_retri']['out_file'] doc_retrieval_result_list = common.load_jsonl(doc_retrieval_file_1) print("Use preprocessed file:", doc_retrieval_file_1) if eval_list is not None: print("Evaluating 1st Doc Retrieval") eval_mode = {'check_doc_id_correct': True, 'standard': False} print( c_scorer.fever_score(doc_retrieval_result_list, eval_list, mode=eval_mode, verbose=False)) # First Document retrieval End. # First Sentence Selection. print("Step 3. First Sentence Selection") if steps['s3.1sen_select']['do']: dev_sent_list_1_e0 = simple_nnmodel.pipeline_first_sent_selection( tokenized_file, doc_retrieval_file_1, model_path_dict['sselector']) dev_sent_file_1_e0 = current_pipeline_dir / f"dev_sent_score_1_{in_file_stem}.jsonl" common.save_jsonl(dev_sent_list_1_e0, dev_sent_file_1_e0) # Manual setting, delete it later # dev_sent_file_1_e0 = None # dev_sent_list_1_e0 = common.load_jsonl("/home/easonnie/projects/FunEver/results/pipeline_r/2018_07_24_11:07:41_r(new_model_v1_2_for_realtest)_scaled_0.05_selector_em/dev_sent_score_1_shared_task_test.jsonl") # End if steps['s3.1sen_select']['ensemble']: print("Ensemble!") dev_sent_list_1_e1 = simple_nnmodel.pipeline_first_sent_selection( tokenized_file, doc_retrieval_file_1, model_path_dict['sselector_1']) dev_sent_file_1_e1 = current_pipeline_dir / f"dev_sent_score_1_{in_file_stem}_e1.jsonl" common.save_jsonl(dev_sent_list_1_e1, dev_sent_file_1_e1) # exit(0) # dev_sent_list_1_e1 = common.load_jsonl(dev_sent_file_1_e1) dev_sent_list_1_e2 = simple_nnmodel.pipeline_first_sent_selection( tokenized_file, doc_retrieval_file_1, model_path_dict['sselector_2']) dev_sent_file_1_e2 = current_pipeline_dir / f"dev_sent_score_1_{in_file_stem}_e2.jsonl" common.save_jsonl(dev_sent_list_1_e2, dev_sent_file_1_e2) # exit(0) # dev_sent_list_1_e2 = common.load_jsonl(dev_sent_file_1_e2) dev_sent_list_1 = merge_sent_results( [dev_sent_list_1_e0, dev_sent_list_1_e1, dev_sent_list_1_e2]) dev_sent_file_1 = current_pipeline_dir / f"dev_sent_score_1_{in_file_stem}_ensembled.jsonl" common.save_jsonl(dev_sent_list_1, dev_sent_file_1) # exit(0) else: dev_sent_list_1 = dev_sent_list_1_e0 dev_sent_file_1 = dev_sent_file_1_e0 # Merging two results print("First Sentence Selection file saved to:", dev_sent_file_1) else: dev_sent_file_1 = steps['s3.1sen_select']['out_file'] dev_sent_list_1 = common.load_jsonl(dev_sent_file_1) print("Use preprocessed file:", dev_sent_file_1) # exit(0) if eval_list is not None: print("Evaluating 1st Sentence Selection") # sent_select_results_list_1 = simi_sampler.threshold_sampler(tokenized_file, dev_sent_full_list, # sentence_retri_scale_prob, top_n=5) # additional_dev_sent_list = common.load_jsonl("/Users/Eason/RA/FunEver/results/sent_retri_nn/2018_07_20_15-17-59_r/dev_sent_2r.jsonl") # dev_sent_full_list = dev_sent_full_list + additional_dev_sent_list sent_select_results_list_1 = simi_sampler.threshold_sampler_insure_unique( tokenized_file, dev_sent_list_1, sentence_retri_1_scale_prob, top_n=sent_retri_1_top_k) # sent_select_results_list_1 = simi_sampler.threshold_sampler_insure_unique_merge(sent_select_results_list_1, # additional_dev_sent_list, # sentence_retri_2_scale_prob, # top_n=5, add_n=1) eval_mode = {'check_sent_id_correct': True, 'standard': False} # for a, b in zip(eval_list, sent_select_results_list_1): # b['predicted_label'] = a['label'] print( c_scorer.fever_score(sent_select_results_list_1, eval_list, mode=eval_mode, verbose=False)) print("Step 4. Second Document Retrieval") if steps['s4.2doc_retri']['do']: dev_sent_list_1 = common.load_jsonl(dev_sent_file_1) filtered_dev_instance_1_for_doc2 = simi_sampler.threshold_sampler_insure_unique( tokenized_file, dev_sent_list_1, sent_prob_for_2doc, top_n=sent_topk_for_2doc) filtered_dev_instance_1_for_doc2_file = current_pipeline_dir / f"dev_sent_score_1_{in_file_stem}_scaled_for_doc2.jsonl" common.save_jsonl(filtered_dev_instance_1_for_doc2, filtered_dev_instance_1_for_doc2_file) dev_sent_1_result = simi_sampler.threshold_sampler_insure_unique( doc_retrieval_file_1, # Remember this name dev_sent_list_1, sentence_retri_1_scale_prob, top_n=sent_topk_for_2doc) dev_doc2_list = second_doc_retrieval( haonan_docretri_object, filtered_dev_instance_1_for_doc2_file, dev_sent_1_result) dev_doc2_file = current_pipeline_dir / f"doc_retr_2_{in_file_stem}.jsonl" common.save_jsonl(dev_doc2_list, dev_doc2_file) print("Second Document Retrieval File saved to:", dev_doc2_file) else: dev_doc2_file = steps['s4.2doc_retri']['out_file'] # dev_doc2_list = common.load_jsonl(dev_doc2_file) print("Use preprocessed file:", dev_doc2_file) print("Step 5. Second Sentence Selection") if steps['s5.2sen_select']['do']: dev_sent_2_list = get_score_multihop( tokenized_file, dev_doc2_file, model_path=model_path_dict['sselector']) dev_sent_file_2 = current_pipeline_dir / f"dev_sent_score_2_{in_file_stem}.jsonl" common.save_jsonl(dev_sent_2_list, dev_sent_file_2) print("First Sentence Selection file saved to:", dev_sent_file_2) else: dev_sent_file_2 = steps['s5.2sen_select']['out_file'] if eval_list is not None: print("Evaluating 1st Sentence Selection") dev_sent_list_1 = common.load_jsonl(dev_sent_file_1) dev_sent_list_2 = common.load_jsonl(dev_sent_file_2) sent_select_results_list_1 = simi_sampler.threshold_sampler_insure_unique( tokenized_file, dev_sent_list_1, sentence_retri_1_scale_prob, top_n=5) sent_select_results_list_1 = simi_sampler.threshold_sampler_insure_unique_merge( sent_select_results_list_1, dev_sent_list_2, sentence_retri_2_scale_prob, top_n=5, add_n=sent_retri_2_top_k) eval_mode = {'check_sent_id_correct': True, 'standard': False} # for a, b in zip(eval_list, sent_select_results_list_1): # b['predicted_label'] = a['label'] print( c_scorer.fever_score(sent_select_results_list_1, eval_list, mode=eval_mode, verbose=False)) # print("Step 6. NLI") # if steps['s6.nli']['do']: # dev_sent_list_1 = common.load_jsonl(dev_sent_file_1) # dev_sent_list_2 = common.load_jsonl(dev_sent_file_2) # sentence_retri_1_scale_prob = 0.05 # print("Threshold:", sentence_retri_1_scale_prob) # sent_select_results_list_1 = simi_sampler.threshold_sampler_insure_unique(tokenized_file, dev_sent_list_1, # sentence_retri_1_scale_prob, top_n=5) # # sent_select_results_list_2 = simi_sampler.threshold_sampler_insure_unique_merge(sent_select_results_list_1, # # dev_sent_list_2, # # sentence_retri_2_scale_prob, # # top_n=5, # # add_n=sent_retri_2_top_k) # nli_results = nli.mesim_wn_simi_v1_2.pipeline_nli_run(tokenized_file, # sent_select_results_list_1, # [dev_sent_file_1, dev_sent_file_2], # model_path_dict['nli'], # with_logits=True, # with_probs=True) # # nli_results_file = current_pipeline_dir / f"nli_r_{in_file_stem}.jsonl" # common.save_jsonl(nli_results, nli_results_file) # else: # nli_results_file = steps['s6.nli']['out_file'] # nli_results = common.load_jsonl(nli_results_file) # Ensemble code # dev_sent_list_1 = common.load_jsonl(dev_sent_file_1) # dev_sent_list_2 = common.load_jsonl(dev_sent_file_2) # sentence_retri_1_scale_prob = 0.05 # print("NLI sentence threshold:", sentence_retri_1_scale_prob) # sent_select_results_list_1 = simi_sampler.threshold_sampler_insure_unique(tokenized_file, dev_sent_list_1, # sentence_retri_1_scale_prob, top_n=5) # # # sent_select_results_list_2 = simi_sampler.threshold_sampler_insure_unique_merge(sent_select_results_list_1, # # dev_sent_list_2, # # sentence_retri_2_scale_prob, # # top_n=5, # # add_n=sent_retri_2_top_k) # # nli_results = nli.mesim_wn_simi_v1_2.pipeline_nli_run(tokenized_file, # # sent_select_results_list_1, # # [dev_sent_file_1, dev_sent_file_2], # # model_path_dict['nli'], with_probs=True, with_logits=True) # # # nli_results = nli.mesim_wn_simi_v1_2.pipeline_nli_run_bigger(tokenized_file, # # sent_select_results_list_1, # # [dev_sent_file_1, dev_sent_file_2], # # model_path_dict['nli_2'], # # with_probs=True, # # with_logits=True) # # nli_results = nli.mesim_wn_simi_v1_2.pipeline_nli_run_bigger(tokenized_file, # sent_select_results_list_1, # [dev_sent_file_1, dev_sent_file_2], # model_path_dict['nli_4'], # with_probs=True, # with_logits=True) # # nli_results_file = current_pipeline_dir / f"nli_r_{in_file_stem}_withlb_e4.jsonl" # common.save_jsonl(nli_results, nli_results_file) # Ensemble code end # exit(0) nli_r_e0 = common.load_jsonl(current_pipeline_dir / "nli_r_shared_task_test_withlb_e0.jsonl") nli_r_e1 = common.load_jsonl(current_pipeline_dir / "nli_r_shared_task_test_withlb_e1.jsonl") nli_r_e2 = common.load_jsonl(current_pipeline_dir / "nli_r_shared_task_test_withlb_e2.jsonl") nli_r_e3 = common.load_jsonl(current_pipeline_dir / "nli_r_shared_task_test_withlb_e3.jsonl") nli_r_e4 = common.load_jsonl(current_pipeline_dir / "nli_r_shared_task_test_withlb_e4.jsonl") nli_results = merge_nli_results( [nli_r_e0, nli_r_e1, nli_r_e2, nli_r_e3, nli_r_e4]) print("Post Processing enhancement") delete_unused_evidence(nli_results) print("Deleting Useless Evidence") dev_sent_list_1 = common.load_jsonl(dev_sent_file_1) dev_sent_list_2 = common.load_jsonl(dev_sent_file_2) print("Appending 1 of second Evidence") nli_results = simi_sampler.threshold_sampler_insure_unique_merge( nli_results, dev_sent_list_2, sentence_retri_2_scale_prob, top_n=5, add_n=sent_retri_2_top_k) delete_unused_evidence(nli_results) # High tolerance enhancement! print("Final High Tolerance Enhancement") print("Appending all of first Evidence") nli_results = simi_sampler.threshold_sampler_insure_unique_merge( nli_results, dev_sent_list_1, enhance_retri_1_scale_prob, top_n=100, add_n=100) delete_unused_evidence(nli_results) if build_submission: output_file = current_pipeline_dir / "predictions.jsonl" build_submission_file(nli_results, output_file)
def train_fever_ema_v1_runtest(resume_model=None): """ This method is training script for bert+nsmn model :param resume_model: :return: """ num_epoch = 200 seed = 12 batch_size = 32 lazy = True dev_prob_threshold = 0.02 train_prob_threshold = 0.02 train_sample_top_k = 8 experiment_name = f"bert_nsmn_ema_lr1|t_prob:{train_prob_threshold}|top_k:{train_sample_top_k}" bert_type_name = "bert-large-uncased" bert_servant = BertServant(bert_type_name=bert_type_name) print("Do EMA:") print("Dev prob threshold:", dev_prob_threshold) print("Train prob threshold:", train_prob_threshold) print("Train sample top k:", train_sample_top_k) dev_upstream_sent_list = common.load_jsonl( config.RESULT_PATH / "sent_retri_nn/balanced_sentence_selection_results/dev_sent_pred_scores.jsonl" ) train_upstream_sent_list = common.load_jsonl( config.RESULT_PATH / "sent_retri_nn/balanced_sentence_selection_results/train_sent_scores.jsonl" ) # Prepare Data # 22 Nov 2018 03:16 # Remove this because everything can be handled by Bert Servant. print("Building Prob Dicts...") train_sent_list = common.load_jsonl( config.RESULT_PATH / "sent_retri_nn/balanced_sentence_selection_results/train_sent_scores.jsonl" ) dev_sent_list = common.load_jsonl( config.RESULT_PATH / "sent_retri_nn/balanced_sentence_selection_results/dev_sent_pred_scores.jsonl" ) selection_dict = paired_selection_score_dict(train_sent_list) selection_dict = paired_selection_score_dict(dev_sent_list, selection_dict) upstream_dev_list = threshold_sampler_insure_unique( config.T_FEVER_DEV_JSONL, dev_upstream_sent_list, prob_threshold=dev_prob_threshold, top_n=5) dev_fever_data_reader = BertReader(bert_servant, lazy=lazy, max_l=60) train_fever_data_reader = BertReader(bert_servant, lazy=lazy, max_l=60) complete_upstream_dev_data = select_sent_with_prob_for_eval( config.T_FEVER_DEV_JSONL, upstream_dev_list, selection_dict, tokenized=True) dev_instances = dev_fever_data_reader.read(complete_upstream_dev_data) # Load Vocabulary, if we are using bert, we don't need anything here. biterator = BasicIterator(batch_size=batch_size) unk_token_num = {'tokens': 2600} # work around for initiating vocabulary. vocab = ExVocabulary(unk_token_num=unk_token_num) vocab.add_token_to_namespace('SUPPORTS', namespace='labels') vocab.add_token_to_namespace('REFUTES', namespace='labels') vocab.add_token_to_namespace('NOT ENOUGH INFO', namespace='labels') print(vocab) biterator.index_with(vocab) # Build Model device = torch.device("cuda" if torch.cuda.is_available() else "cpu", index=0) device_num = -1 if device.type == 'cpu' else 0 bert_servant.bert_model.to(device) # Init model here model = Model( bert_servant, bert_batch_size=1, rnn_size_in=(1024 + 2, 1024 + 2 + 300), # probs + task indicator. rnn_size_out=(300, 300), max_l=250, mlp_d=300, num_of_class=3, drop_r=0.5, activation_type='gelu') model.to(device) # Create Log File file_path_prefix, date = save_tool.gen_file_prefix(f"{experiment_name}") # Save the source code. script_name = os.path.basename(__file__) with open(os.path.join(file_path_prefix, script_name), 'w') as out_f, open(__file__, 'r') as it: out_f.write(it.read()) out_f.flush() # Save source code end. # best_dev = -1 # iteration = 0 # start_lr = 0.0001 # optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=start_lr) # criterion = nn.CrossEntropyLoss() for i_epoch in range(num_epoch): dev_iter = biterator(dev_instances, shuffle=False, num_epochs=1) for i, batch in enumerate(dev_iter): out = model(batch)
def train_fever_ema_v1(resume_model=None): """ This method is training script for bert+nsmn model :param resume_model: :return: """ num_epoch = 200 seed = 12 batch_size = 32 lazy = True dev_prob_threshold = 0.02 train_prob_threshold = 0.02 train_sample_top_k = 8 experiment_name = f"bert_nsmn_ema_lr1|t_prob:{train_prob_threshold}|top_k:{train_sample_top_k}" bert_type_name = "bert-large-uncased" bert_servant = BertServant(bert_type_name=bert_type_name) # print("Do EMA:") print("Dev prob threshold:", dev_prob_threshold) print("Train prob threshold:", train_prob_threshold) print("Train sample top k:", train_sample_top_k) dev_upstream_sent_list = common.load_jsonl( config.RESULT_PATH / "sent_retri_nn/balanced_sentence_selection_results/dev_sent_pred_scores.jsonl" ) train_upstream_sent_list = common.load_jsonl( config.RESULT_PATH / "sent_retri_nn/balanced_sentence_selection_results/train_sent_scores.jsonl" ) # Prepare Data # 22 Nov 2018 03:16 # Remove this because everything can be handled by Bert Servant. print("Building Prob Dicts...") train_sent_list = common.load_jsonl( config.RESULT_PATH / "sent_retri_nn/balanced_sentence_selection_results/train_sent_scores.jsonl" ) dev_sent_list = common.load_jsonl( config.RESULT_PATH / "sent_retri_nn/balanced_sentence_selection_results/dev_sent_pred_scores.jsonl" ) selection_dict = paired_selection_score_dict(train_sent_list) selection_dict = paired_selection_score_dict(dev_sent_list, selection_dict) upstream_dev_list = threshold_sampler_insure_unique( config.T_FEVER_DEV_JSONL, dev_upstream_sent_list, prob_threshold=dev_prob_threshold, top_n=5) dev_fever_data_reader = BertReader(bert_servant, lazy=lazy, max_l=60) train_fever_data_reader = BertReader(bert_servant, lazy=lazy, max_l=60) complete_upstream_dev_data = select_sent_with_prob_for_eval( config.T_FEVER_DEV_JSONL, upstream_dev_list, selection_dict, tokenized=True) dev_instances = dev_fever_data_reader.read(complete_upstream_dev_data) # Load Vocabulary, if we are using bert, we don't need anything here. biterator = BasicIterator(batch_size=batch_size) unk_token_num = {'tokens': 2600} # work around for initiating vocabulary. vocab = ExVocabulary(unk_token_num=unk_token_num) vocab.add_token_to_namespace('SUPPORTS', namespace='labels') vocab.add_token_to_namespace('REFUTES', namespace='labels') vocab.add_token_to_namespace('NOT ENOUGH INFO', namespace='labels') print(vocab) biterator.index_with(vocab) # Build Model device = torch.device("cuda" if torch.cuda.is_available() else "cpu", index=0) device_num = -1 if device.type == 'cpu' else 0 bert_servant.bert_model.to(device) # Init model here model = Model( bert_servant, bert_batch_size=1, rnn_size_in=(1024 + 2, 1024 + 2 + 300), # probs + task indicator. rnn_size_out=(300, 300), max_l=250, mlp_d=300, num_of_class=3, drop_r=0.5, activation_type='gelu') model.to(device) # Create Log File file_path_prefix, date = save_tool.gen_file_prefix(f"{experiment_name}") # Save the source code. script_name = os.path.basename(__file__) with open(os.path.join(file_path_prefix, script_name), 'w') as out_f, open(__file__, 'r') as it: out_f.write(it.read()) out_f.flush() # Save source code end. best_dev = -1 iteration = 0 # start_lr = 0.0001 optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=start_lr) criterion = nn.CrossEntropyLoss() for i_epoch in range(num_epoch): print("Resampling...") # Resampling train_data_with_candidate_sample_list = \ threshold_sampler_insure_unique(config.T_FEVER_TRAIN_JSONL, train_upstream_sent_list, train_prob_threshold, top_n=train_sample_top_k) complete_upstream_train_data = adv_simi_sample_with_prob_v1_1( config.T_FEVER_TRAIN_JSONL, train_data_with_candidate_sample_list, selection_dict, tokenized=True) random.shuffle(complete_upstream_train_data) print("Sample data length:", len(complete_upstream_train_data)) sampled_train_instances = train_fever_data_reader.read( complete_upstream_train_data) train_iter = biterator(sampled_train_instances, shuffle=True, num_epochs=1) for i, batch in tqdm(enumerate(train_iter)): model.train() out = model(batch) y = batch['label'].to(next(model.parameters()).device) loss = criterion(out, y) # No decay optimizer.zero_grad() loss.backward() optimizer.step() iteration += 1 # EMA update # ema(model.named_parameters()) if i_epoch < 15: mod = 20000 # mod = 500 else: mod = 2000 if iteration % mod == 0: eval_iter = biterator(dev_instances, shuffle=False, num_epochs=1) complete_upstream_dev_data = hidden_eval( model, eval_iter, complete_upstream_dev_data) eval_mode = {'check_sent_id_correct': True, 'standard': True} strict_score, acc_score, pr, rec, f1 = c_scorer.fever_score( complete_upstream_dev_data, common.load_jsonl(config.T_FEVER_DEV_JSONL), mode=eval_mode, verbose=False) print("Fever Score(Strict/Acc./Precision/Recall/F1):", strict_score, acc_score, pr, rec, f1) print(f"Dev:{strict_score}/{acc_score}") # EMA saving # eval_iter = biterator(dev_instances, shuffle=False, num_epochs=1, cuda_device=device_num) # load_ema_to_model(cloned_empty_model, ema) # complete_upstream_dev_data = hidden_eval(cloned_empty_model, eval_iter, complete_upstream_dev_data) # # eval_mode = {'check_sent_id_correct': True, 'standard': True} # strict_score, acc_score, pr, rec, f1 = c_scorer.fever_score(complete_upstream_dev_data, # common.load_jsonl(config.T_FEVER_DEV_JSONL), # mode=eval_mode, # verbose=False) # print("Fever Score EMA(Strict/Acc./Precision/Recall/F1):", strict_score, acc_score, pr, rec, f1) # # print(f"Dev EMA:{strict_score}/{acc_score}") need_save = False if strict_score > best_dev: best_dev = strict_score need_save = True if need_save: save_path = os.path.join( file_path_prefix, f'i({iteration})_epoch({i_epoch})_dev({strict_score})_lacc({acc_score})_seed({seed})' ) torch.save(model.state_dict(), save_path)