def run_thread(arguments): msg = "" # 1. compute the ranking metrics on the filtered log_probs tensor sample_MRR, sample_P, experiment_result, return_msg = metrics.get_ranking( arguments["mymodel_probs"], arguments["masked_indices"], arguments["vocab"], is_masked_probs=False, label_index=arguments["label_index"], print_generation=arguments["interactive"], topk=10000, ) em, f1, is_error, no_overlap, larger_by_1, larger_by_2, larger_by_3, larger_by_4, larger_by_5_or_more = metrics.calculate_em_f1( arguments['target'], arguments['prediction']) msg += "\n" + return_msg sample_perplexity = 0.0 if arguments["interactive"]: pprint(arguments["sample"]) # THIS IS OPTIONAL - mainly used for debuggind reason # 2. compute perplexity and print predictions for the complete log_probs tensor sample_perplexity, return_msg = print_sentence_predictions( arguments["original_log_probs"], arguments["token_ids"], arguments["vocab"], masked_indices=arguments["masked_indices"], print_generation=arguments["interactive"], ) input("press enter to continue...") msg += "\n" + return_msg return experiment_result, sample_MRR, sample_P, sample_perplexity, msg, em, f1, is_error, no_overlap, larger_by_1, larger_by_2, larger_by_3, larger_by_4, larger_by_5_or_more
def get_results(model, sentence, paragraph): #number of tokens max_tokens = 3 result_list = [] for i in range(1, max_tokens + 1): if i != 1: sentence = sentence.replace("[MASK]", "[MASK] [MASK]", 1) #print(sentence) if paragraph == "": sentences = [sentence] else: sentences = [sentence, paragraph] original_log_probs_list, [token_ids], [ masked_indices ] = model.get_batch_generation([sentences], try_cuda=True) index_list = None filtered_log_probs_list = original_log_probs_list ret = {} # build topk lists for this template and safe in ret1 and ret2 if masked_indices and len(masked_indices) > 0: results = evaluation_metrics.get_ranking( filtered_log_probs_list[0], masked_indices, model.vocab, index_list=index_list) result_list.append(results) return result_list
def main(args): if not args.text and not args.interactive: msg = "ERROR: either you start LAMA eval_generation with the " \ "interactive option (--i) or you pass in input a piece of text (--t)" raise ValueError(msg) stopping_condition = True #print("Language Models: {}".format(args.models_names)) models = {} for lm in args.models_names: if os.path.exists("/data/fichtel/lm_builds/model_{}".format(lm)): with open("/data/fichtel/lm_builds/model_{}".format(lm), 'rb') as lm_build_file: models[lm] = dill.load(lm_build_file) else: models[lm] = build_model_by_name(lm, args) with open("/data/fichtel/lm_builds/model_{}".format(lm), 'wb') as lm_build_file: dill.dump(models[lm], lm_build_file) vocab_subset = None if args.common_vocab_filename is not None: common_vocab = load_vocab(args.common_vocab_filename) print("common vocabulary size: {}".format(len(common_vocab))) vocab_subset = [x for x in common_vocab] while stopping_condition: if args.text: text = args.text stopping_condition = False else: text = input("insert text:") if args.split_sentence: import spacy # use spacy to tokenize input sentence nlp = spacy.load(args.spacy_model) tokens = nlp(text) print(tokens) sentences = [] for s in tokens.sents: print(" - {}".format(s)) sentences.append(s.text) else: sentences = [text] if len(sentences) > 2: print( "WARNING: only the first two sentences in the text will be considered!" ) sentences = sentences[:2] for model_name, model in models.items(): #print("\n{}:".format(model_name)) original_log_probs_list, [token_ids], [ masked_indices ] = model.get_batch_generation([sentences], try_cuda=False) index_list = None if vocab_subset is not None: # filter log_probs filter_logprob_indices, index_list = model.init_indices_for_filter_logprobs( vocab_subset) filtered_log_probs_list = model.filter_logprobs( original_log_probs_list, filter_logprob_indices) print(filtered_log_probs_list) else: filtered_log_probs_list = original_log_probs_list ret = {} # rank over the subset of the vocab (if defined) for the SINGLE masked tokens if masked_indices and len(masked_indices) > 0: ret = evaluation_metrics.get_ranking( filtered_log_probs_list[0], masked_indices, model.vocab, index_list=index_list) # prediction and perplexity for the whole softmax # print_sentence_predictions(original_log_probs_list[0], token_ids, model.vocab, masked_indices=masked_indices for r in ret: print("%s %s" % (r, ret[r]))
def LAMA_results(args, data_point, models): #result_task1 will contain label SUPPORT or REJECT as the result of predictions result_task1 = "" #we store in claimWithMask the claim with [MASK]. Token contains the current entity claimWithMask, token = getClaimwithMaks(data_point) args.text = claimWithMask if not args.text and not args.interactive: msg = "ERROR: either you start LAMA eval_generation with the " "interactive option (--i) or you pass in input a piece of text (--t)" raise ValueError(msg) stopping_condition = True #print("Language Models: {}".format(args.models_names)) vocab_subset = None if args.common_vocab_filename is not None: common_vocab = load_vocab(args.common_vocab_filename) print("common vocabulary size: {}".format(len(common_vocab))) vocab_subset = [x for x in common_vocab] while stopping_condition: if args.text: text = args.text stopping_condition = False else: text = input("insert text:") if args.split_sentence: import spacy # use spacy to tokenize input sentence nlp = spacy.load(args.spacy_model) tokens = nlp(text) #print(tokens) sentences = [] for s in tokens.sents: #print(" - {}".format(s)) sentences.append(s.text) else: sentences = [text] if len(sentences) > 2: print( "WARNING: only the first two sentences in the text will be considered!" ) sentences = sentences[:2] for model_name, model in models.items(): #print("\n{}:".format(model_name)) original_log_probs_list, [token_ids], [ masked_indices ] = model.get_batch_generation([sentences], try_cuda=False) index_list = None if vocab_subset is not None: # filter log_probs filter_logprob_indices, index_list = model.init_indices_for_filter_logprobs( vocab_subset) filtered_log_probs_list = model.filter_logprobs( original_log_probs_list, filter_logprob_indices) else: filtered_log_probs_list = original_log_probs_list # rank over the subset of the vocab (if defined) for the SINGLE masked tokens if masked_indices and len(masked_indices) > 0: # we change evaluation_metrics function. In this function we return also a dataframe that has the same content of msg that is printed with top 1' predictions with their prob. # we store this dataframe in df_predictions a, b, c, d, df_predictions = evaluation_metrics.get_ranking( filtered_log_probs_list[0], masked_indices, model.vocab, index_list=index_list) list_predictions = df_predictions['prediction'].tolist() # we set lower all predictions becuase our entity is all in lower case. This to avoid confusion. list_predictions = [x.lower() for x in list_predictions] #we check if current entity is equal to the first predictions. If it's equal, we set SUPPORT as result, otherwise REJECT if token == list_predictions[0]: result_task1 = "SUPPORT" else: result_task1 = "REJECT" return result_task1