def rescore(): device = "cuda:0" if torch.cuda.is_available() else "cpu" model = GPT2LMScorer(model_name=args.model_name, device=device, batch_size=args.batch_size) with open(args.predictions_path) as f: candidates = f.readlines() scores = np.array(model.sentence_score(candidates, log=True)) best_candidate = candidates[int(np.argmax(scores))] with open(args.output_path, 'w') as f: f.write(best_candidate)
def should_throw_an_exception_for_an_unsupported_model_name(): with pytest.raises(OSError): GPT2LMScorer("_")
def describe_tokens_log_prob_for_batch(): # pylint: disable=protected-access scorer = GPT2LMScorer("gpt2") def should_work_on_zero_sentences(): assert scorer._tokens_log_prob_for_batch([]) == [] def should_work_on_an_empty_sentence(): sentence = "" outputs = scorer._tokens_log_prob_for_batch([sentence]) assert len(outputs) == 1 scores, ids, tokens = outputs[0] assert len(scores) == 1 assert len(ids) == 1 assert len(tokens) == 1 assert all(score <= 0.0 for score in scores) def should_work_on_a_single_sentence(): sentences = ["Hello World!"] eps = 1e-4 expected = [( [ -8.293975830078125, -5.755699157714844, -1.4748573303222656, -6.430915832519531, ], [15496, 2159, 0, 50256], ["Hello", "ĠWorld", "!", "<|endoftext|>"], )] outputs = scorer._tokens_log_prob_for_batch(sentences) assert len(outputs) == len(sentences) for i in range(len(sentences)): scores, ids, tokens = outputs[i] scores, ids = scores.tolist(), ids.tolist() exp_scores, exp_ids, exp_tokens = expected[i] for j, (score, exp_score) in enumerate(zip(scores, exp_scores)): assert math.isclose(score, exp_score, rel_tol=eps), { "index": (i, j) } assert ids == exp_ids assert tokens == exp_tokens def should_work_on_multiple_sentences(): sentences = [ "Hello World!", "This is an example.", "The sky is blue today." ] eps = 1e-4 expected = [ ( [ -8.293975830078125, -5.755699157714844, -1.4748573303222656, -6.430915832519531, ], [15496, 2159, 0, 50256], ["Hello", "ĠWorld", "!", "<|endoftext|>"], ), ( [ -4.8580474853515625, -1.6949310302734375, -2.4207611083984375, -3.6429824829101562, -6.3849029541015625, -6.194488525390625, ], [1212, 318, 281, 1672, 13, 50256], ["This", "Ġis", "Ġan", "Ġexample", ".", "<|endoftext|>"], ), ( [ -3.2780990600585938, -8.624397277832031, -1.1694717407226562, -2.7195663452148438, -4.5889739990234375, -1.297027587890625, -5.4553375244140625, ], [464, 6766, 318, 4171, 1909, 13, 50256], [ "The", "Ġsky", "Ġis", "Ġblue", "Ġtoday", ".", "<|endoftext|>" ], ), ] outputs = scorer._tokens_log_prob_for_batch(sentences) assert len(outputs) == len(sentences) for i in range(len(sentences)): scores, ids, tokens = outputs[i] scores, ids = scores.tolist(), ids.tolist() exp_scores, exp_ids, exp_tokens = expected[i] for j, (score, exp_score) in enumerate(zip(scores, exp_scores)): assert math.isclose(score, exp_score, rel_tol=eps), { "index": (i, j) } assert ids == exp_ids assert tokens == exp_tokens @pytest.mark.xfail def should_work_with_a_sentence_longer_than_the_model_max_size(): max_input_size = scorer.tokenizer.max_len long_sentence = "Very" + " long" * max_input_size exp_ids = [16371] + [809] * max_input_size + [50256] exp_tokens = ["Very"] + ["Ġlong"] * max_input_size + ["<|endoftext|>"] _, ids, tokens = scorer._tokens_log_prob_for_batch([long_sentence])[0] ids = ids.tolist() assert ids == exp_ids assert tokens == exp_tokens
def should_not_be_empty(): assert len(list(GPT2LMScorer.supported_model_names())) > 0
def should_not_throw_an_exception_for_a_supported_model_name(): GPT2LMScorer("gpt2")
def should_return_gpt2_models(mocker): mocker.patch.object(GPT2LMScorer, "__init__", return_value=None) for model_name in GPT2LMScorer.supported_model_names(): scorer = AutoLMScorer.from_pretrained(model_name) assert isinstance(scorer, GPT2LMScorer), model_name
def describe_sentence_score_for_english(): scorer = GPT2LMScorer("gpt2") def should_give_lower_score_to_sentences_with_adjectives_errors(): # ERRANT - ADJ error sentence_pairs = [ ("I have a big amount of money.", "I have a wide amount of money."), ] assert_score_of_sentence_pairs(scorer, sentence_pairs) def should_give_lower_score_to_sentences_with_adjectives_forms_errors(): # ERRANT - ADJ:FORM error sentence_pairs = [ ( "This is the best day of my life.", "This is the goodest day of my life.", ), ( "I think that is the biggest apple I have ever seen!", "I think that is the bigger apple I have ever seen!", ), ( "I think this game is easier than the one we played yesterday.", "I think this game is more easy than the one we played yesterday.", ), ] assert_score_of_sentence_pairs(scorer, sentence_pairs) def should_give_lower_score_to_sentences_with_adverbs_errors(): # ERRANT - ADV error sentence_pairs = [ ("Let us finish this quickly!", "Let us finish this speedily!",), ] assert_score_of_sentence_pairs(scorer, sentence_pairs) def should_give_lower_score_to_sentences_with_conjunctions_errors(): # ERRANT - CONJ error sentence_pairs = [ ( "It is a private matter between him and me.", "It is a private matter between him but me.", ), ( "I need to finish this project, but I do not have enough time.", "I need to finish this project, and I do not have enough time.", ), ] assert_score_of_sentence_pairs(scorer, sentence_pairs) def should_give_lower_score_to_sentences_with_determiners_errors(): # ERRANT - DET error sentence_pairs = [ ( "The restaurant is in the middle of my home town.", "The restaurant is in the middle of the my home town.", ), ("I am Italian.", "I am a Italian.",), ("I am a teacher.", "I am teacher.",), ("She gave me some advice.", "She gave me an advice.",), ] assert_score_of_sentence_pairs(scorer, sentence_pairs) def should_give_lower_score_to_sentences_with_morphology_errors(): # ERRANT - MORPH error sentence_pairs = [ ("I will quickly solve this.", "I will quick solve this.",), ] assert_score_of_sentence_pairs(scorer, sentence_pairs) def should_give_lower_score_to_sentences_with_noun_errors(): # ERRANT - NOUN error sentence_pairs = [ ( "Today's people have a frenetic lifestyle.", "Today's person have a frenetic lifestyle.", ), ] assert_score_of_sentence_pairs(scorer, sentence_pairs) def should_give_lower_score_to_sentences_with_noun_inflection_errors(): # ERRANT - NOUN:INFL error sentence_pairs = [ ( "There is too many information in this table.", "There is too many informations in this table.", ), ] assert_score_of_sentence_pairs(scorer, sentence_pairs) def should_give_lower_score_to_sentences_with_noun_number_errors(): # ERRANT - NOUN:NUM error sentence_pairs = [ ("She has too many cats.", "She has too many cat.",), ] assert_score_of_sentence_pairs(scorer, sentence_pairs) def should_give_lower_score_to_sentences_with_noun_possissive_errors(): # ERRANT - NOUN:POSS error sentence_pairs = [ ("My friend's boss is leaving.", "My friends boss is leaving.",), ] assert_score_of_sentence_pairs(scorer, sentence_pairs) def should_give_lower_score_to_sentences_with_orthography_errors(): # ERRANT - ORTH error sentence_pairs = [ ("You are my best friend.", "You are my bestfriend.",), ] assert_score_of_sentence_pairs(scorer, sentence_pairs) def should_give_lower_score_to_sentences_with_particle_errors(): # ERRANT - PART error sentence_pairs = [ ("Can you look at the kids?", "Can you look in the kids?",), ] assert_score_of_sentence_pairs(scorer, sentence_pairs) def should_give_lower_score_to_sentences_with_preposition_errors(): # ERRANT - PREP error sentence_pairs = [ ("Can you look at them?", "Can you look in them?",), ] assert_score_of_sentence_pairs(scorer, sentence_pairs) def should_give_lower_score_to_sentences_with_pronoun_errors(): # ERRANT - PRON error sentence_pairs = [ ( "We should believe more in ourselves.", "We should believe more in ours.", ), ] assert_score_of_sentence_pairs(scorer, sentence_pairs) def should_give_lower_score_to_sentences_with_punctuation_errors(): # ERRANT - PUNCT error sentence_pairs = [ ("I like dogs, cats, and dolphins.", "I like dogs cats and dolphins.",), ("I can do this, but not now.", "I can do this but not now.",), ] assert_score_of_sentence_pairs(scorer, sentence_pairs) def should_give_lower_score_to_sentences_with_spelling_errors(): # ERRANT - SPELL error sentence_pairs = [ ( "They believe it is a genetic problem.", "They believe it is a genectic problem.", ), ] assert_score_of_sentence_pairs(scorer, sentence_pairs) def should_give_lower_score_to_sentences_with_verb_errors(): # ERRANT - VERB error sentence_pairs = [ ("I can walk there.", "I can ambulate there.",), ] assert_score_of_sentence_pairs(scorer, sentence_pairs) def should_give_lower_score_to_sentences_with_verb_form_errors(): # ERRANT - VERB:FORM error sentence_pairs = [ ("I danced yesterday.", "I dancing yesterday.",), ("I am going to run a marathon.", "I am go to run a marathon.",), ("I am going to run a marathon.", "I am going to running a marathon.",), ] assert_score_of_sentence_pairs(scorer, sentence_pairs) def should_give_lower_score_to_sentences_with_verb_inflection_errors(): # ERRANT - VERB:INFL error sentence_pairs = [ ("I got arrested yesterday.", "I getted arrested yesterday.",), ("You flipped the wrong coin.", "You fliped the wrong coin.",), ] assert_score_of_sentence_pairs(scorer, sentence_pairs) def should_give_lower_score_to_sentences_with_verb_subj_agreement_errors(): # ERRANT - VERB:SVA error sentence_pairs = [ ("I think he has the virus.", "I think he have the virus.",), ("They said he is sick.", "They said he are sick.",), ] assert_score_of_sentence_pairs(scorer, sentence_pairs) def should_give_lower_score_to_sentences_with_verb_tense_errors(): # ERRANT - VERB:TENSE error sentence_pairs = [ ("He ate the pie yesterday.", "He eats the pie yesterday.",), ("The pie was eaten by him yesterday.", "The pie eats by him yesterday.",), ] assert_score_of_sentence_pairs(scorer, sentence_pairs)