def sample(self): # Training 1/1 # I think that John found the cat. # first cp_verb_first THAT D1 NP1 verb_1 THE NP2 # Training 0/0 # They think that John found every cat. # non_first cp_verb_non_first THAT D1 NP1 verb_1 D2 NP2 # Test 1/0 # John thinks that every cat found me. # D1 NP1 cp_verb_1 THAT D2 NP2 verb_2 first_acc # Test 0/1 # John thinks that the cat found them. # D1 NP1 cp_verb_1 THAT THE NP2 verb_2 non_first_acc # Control 1/1 # John thinks that the cat found me. # D1 NP1 cp_verb_1 THAT THE NP2 verb_2 first_acc # Control 0/0 # John thinks that every cat found them. # D1 NP1 cp_verb_1 THAT D2 NP2 verb_2 non_first_acc first, non_first, first_acc, non_first_acc = self.get_pronouns() NP1 = choice(all_animate_nouns) NP2 = choice(self.animate_common_nouns, avoid=NP1) D1 = choice(get_matched_by(NP1, "arg_1", self.safe_dets)) D2 = choice(get_matched_by(NP2, "arg_1", self.safe_dets)) cp_verb = choice(self.cp_verb) cp_verb_aux = return_aux(cp_verb, first) cp_verb_first = re_conjugate(cp_verb, first, cp_verb_aux) cp_verb_non_first = re_conjugate(cp_verb, non_first, cp_verb_aux) cp_verb_1 = re_conjugate(cp_verb, NP1, cp_verb_aux) verb = choice(self.trans_verb) verb_aux = return_aux(verb, NP1) verb_1 = re_conjugate(verb, NP1, verb_aux) verb_2 = re_conjugate(verb, NP2, verb_aux) track_sentence = [ (first[0], cp_verb[0], NP1[0], verb[0], NP2[0]), #training 1/1 (non_first[0], cp_verb[0], NP1[0], verb[0], NP2[0]), #training 0/0 (NP1[0], cp_verb[0], NP2[0], verb[0], first_acc[0]), #Test 1/0 (NP1[0], cp_verb[0], NP2[0], verb[0], non_first_acc[0]), #Test 0/1 (NP1[0], cp_verb[0], NP2[0], verb[0], first_acc[0]), #Control 1/1 (NP1[0], cp_verb[0], NP2[0], verb[0], non_first_acc[0] ) #Control 0/0 ] data = self.build_paradigm( training_1_1="%s %s that %s %s %s the %s" % (first[0], cp_verb_first[0], D1[0], NP1[0], verb_1[0], NP2[0]), training_0_0="%s %s that %s %s %s %s %s" % (non_first[0], cp_verb_non_first[0], D1[0], NP1[0], verb_1[0], D2[0], NP2[0]), test_1_0="%s %s %s that %s %s %s %s" % (D1[0], NP1[0], cp_verb_1[0], D2[0], NP2[0], verb_2[0], first_acc[0]), test_0_1="%s %s %s that the %s %s %s" % (D1[0], NP1[0], cp_verb_1[0], NP2[0], verb_2[0], non_first_acc[0]), control_1_1="%s %s %s that the %s %s %s" % (D1[0], NP1[0], cp_verb_1[0], NP2[0], verb_2[0], first_acc[0]), control_0_0="%s %s %s that %s %s %s %s" % (D1[0], NP1[0], cp_verb_1[0], D2[0], NP2[0], verb_2[0], non_first_acc[0])) return data, track_sentence
all_documents = get_all_conjunctive([("category", "N"), ("document", "1")]) all_singular_neuter_animate_nouns = get_all_conjunctive([("category", "N"), ("sg", "1"), ("animate", "1"), ("gender", "n")]) all_safe_nouns = np.setdiff1d(all_nouns, all_singular_neuter_animate_nouns) # gather functional classes that will be accessed frequently all_frequent_quantifiers = get_all("frequent", "1", get_all("category", "(S/(S\\NP))/N")) all_reflexives = get_all("category_2", "refl") # gather potentially reflexive predicates all_transitive_verbs = get_all("category", "(S\\NP)/NP") all_anim_anim_verbs = get_matched_by( choice(all_animate_nouns), "arg_1", get_matched_by(choice(all_animate_nouns), "arg_2", all_transitive_verbs)) all_doc_doc_verbs = get_matched_by( choice(all_documents), "arg_1", get_matched_by(choice(all_documents), "arg_2", all_transitive_verbs)) all_refl_preds = np.union1d(all_anim_anim_verbs, all_doc_doc_verbs) # sample sentences until desired number for writer in [train_output, dev_output, test_output]: counter = 0 while counter < number_to_generate: # DP1 Rel V1 DP2 V2 Refl1/Refl2 # The women who like the boy see themselves/himself # D1 N1 Rel V2 Refl1/Refl2 V1 D2 N2 # The women who saw themselves/himself like the boy
def sample_nested_rc_2_rcs(self): V1 = choice(self.all_non_ing_transitive_verbs) V1_ing = self.get_ing_form(V1) NP1 = choice(get_matches_of(V1, "arg_1", self.safe_nouns)) V1 = conjugate(V1, NP1) V1_ing = conjugate(V1_ing, NP1) D1 = choice(get_matched_by(NP1, "arg_1", all_very_common_dets)) NP2 = choice(get_matches_of(V1, "arg_2", self.safe_nouns)) D2 = choice(get_matched_by(NP2, "arg_1", all_very_common_dets)) S1 = " ".join([D1[0], NP1[0], "%s", D2[0], NP2[0]]) option = random.randint(0, 2) if option == 0: RC1, arg_RC1, V_RC1, V_RC1_ing = self.subject_relative_clause(NP1, bind=True) RC1_b, _, V_RC1_b, V_RC1_ing_b = self.subject_relative_clause(arg_RC1, bind=False) elif option == 1: RC1, arg_RC1, V_RC1, V_RC1_ing = self.object_relative_clause(NP1, bind=True) RC1_b, _, V_RC1_b, V_RC1_ing_b = self.subject_relative_clause(arg_RC1, bind=False) else: RC1, arg_RC1, V_RC1, V_RC1_ing = self.subject_relative_clause(NP1, bind=True) RC1_b, _, V_RC1_b, V_RC1_ing_b = self.object_relative_clause(arg_RC1, bind=False) option = random.randint(0, 2) if option == 0: RC2, arg_RC2, V_RC2, V_RC2_ing = self.subject_relative_clause(NP2, bind=True) RC2_b, _, V_RC2_b, V_RC2_ing_b = self.subject_relative_clause(arg_RC2, bind=False) elif option == 1: RC2, arg_RC2, V_RC2, V_RC2_ing = self.object_relative_clause(NP2, bind=True) RC2_b, _, V_RC2_b, V_RC2_ing_b = self.subject_relative_clause(arg_RC2, bind=False) else: RC2, arg_RC2, V_RC2, V_RC2_ing = self.subject_relative_clause(NP2, bind=True) RC2_b, _, V_RC2_b, V_RC2_ing_b = self.object_relative_clause(arg_RC2, bind=False) RC1_iv, V_RC1_iv, V_RC1_iv_ing = self.subject_relative_clause_intransitive(NP1) RC2_iv, V_RC2_iv, V_RC2_iv_ing = self.subject_relative_clause_intransitive(NP2) track_sentence = [ (S1, RC1, RC2), (S1, RC1, RC2) ] data = [] option = random.randint(0, 1) if option == 0: data.append(" ".join([D1[0], NP1[0], RC1.format(v=V_RC1, rc=(RC1_b % V_RC1_b)), V1_ing[0], D2[0], NP2[0], RC2_iv % V_RC2_iv])) else: data.append(" ".join([D1[0], NP1[0], RC1_iv % V_RC1_iv, V1_ing[0], D2[0], NP2[0], RC2.format(v=V_RC2, rc=(RC2_b % V_RC2_b))])) option = random.randint(0, 5) if option == 0: data.append(" ".join([D1[0], NP1[0], RC1.format(v=V_RC1_ing, rc=(RC1_b % V_RC1_b)), V1[0], D2[0], NP2[0]])) elif option == 1: data.append(" ".join([D1[0], NP1[0], RC1.format(v=V_RC1, rc=(RC1_b % V_RC1_ing_b)), V1[0], D2[0], NP2[0]])) elif option == 2: data.append(" ".join([D1[0], NP1[0], RC1.format(v=V_RC1, rc=(RC1_b % V_RC1_b)), V1[0], D2[0], NP2[0], RC2_iv % V_RC2_iv_ing])) elif option == 3: data.append(" ".join([D1[0], NP1[0], V1[0], D2[0], NP2[0], RC2.format(v=V_RC2_ing, rc=(RC2_b % V_RC2_b))])) elif option == 4: data.append(" ".join([D1[0], NP1[0], V1[0], D2[0], NP2[0], RC2.format(v=V_RC2, rc=(RC2_b % V_RC2_ing_b))])) else: data.append(" ".join([D1[0], NP1[0], RC1_iv % V_RC1_iv_ing, V1[0], D2[0], NP2[0], RC2.format(v=V_RC2, rc=(RC2_b % V_RC2_b))])) return data, track_sentence
def sample(self): # Training 1 # The boy might see the cat and the students bought the paper # Training 0 # The boy might see the cat and the students shred the paper # Test 1 # The boy might see the cat and the students found the book # Test 0 # The boy might see the cat and the students understand the book V1 = choice(self.all_safe_verbs) subj = choice(get_matches_of(V1, "arg_1", all_common_nouns)) aux = return_aux(V1, subj) D_subj = choice(get_matched_by(subj, "arg_1", all_frequent_determiners)) obj = choice(get_matches_of(V1, "arg_2", all_common_nouns)) D_obj = choice(get_matched_by(obj, "arg_1", all_frequent_determiners)) S1 = " ".join( [D_subj[0], subj[0], aux[0], V1[0], D_obj[0], obj[0], "and"]) V_past_in = choice(self.irr_past_verbs_in_domain) subj2 = choice(get_matches_of(V_past_in, "arg_1", all_plural_nouns)) D_subj2 = choice( get_matched_by(subj2, "arg_1", all_frequent_determiners)) obj2_in = choice(get_matches_of(V_past_in, "arg_2", all_common_nouns)) D_obj2_in = choice( get_matched_by(obj2_in, "arg_1", all_frequent_determiners)) V_pres_in = choice( get_matched_by( subj2, "arg_1", get_matched_by(obj2_in, "arg_2", self.present_plural_verbs_in_domain))) try: V_past_out = choice( get_matched_by(subj2, "arg_1", self.irr_past_verbs_out_domain)) obj2_out = choice( get_matches_of(V_past_out, "arg_2", all_common_nouns)) D_obj2_out = choice( get_matched_by(obj2_out, "arg_1", all_frequent_determiners)) V_pres_out = choice( get_matched_by( subj2, "arg_1", get_matched_by(obj2_out, "arg_2", self.present_plural_verbs_out_domain))) except IndexError: raise MatchNotFoundError("") track_sentence = [ (S1, D_subj2[0], subj2[0], V_past_in[0], D_obj2_in[0], obj2_in[0]), (S1, D_subj2[0], subj2[0], V_pres_in[0], D_obj2_in[0], obj2_in[0]), (S1, D_subj2[0], subj2[0], V_past_out[0], D_obj2_out[0], obj2_out[0]), (S1, D_subj2[0], subj2[0], V_pres_out[0], D_obj2_out[0], obj2_out[0]) ] data = self.build_paradigm( training_1_1=" ".join([ S1, D_subj2[0], subj2[0], V_past_in[0], D_obj2_in[0], obj2_in[0], "." ]), training_0_0=" ".join([ S1, D_subj2[0], subj2[0], V_pres_in[0], D_obj2_in[0], obj2_in[0], "." ]), test_1_0=" ".join([ S1, D_subj2[0], subj2[0], V_past_out[0], D_obj2_out[0], obj2_out[0], "." ]), test_0_1=" ".join([ S1, D_subj2[0], subj2[0], V_pres_out[0], D_obj2_out[0], obj2_out[0], "." ]), ) return data, track_sentence
np.append(get_all("expression", "those"), np.append(get_all("expression", "this"), get_all_conjunctive([("expression", "that"), ("category_2", "D")]))))) ever_replacements = np.array(["often", "also", "fortunately", "obviously", "clearly"]) ever_replacements_no_often = np.array(["also", "fortunately", "obviously", "clearly"]) adverb_npi_replacements = np.array(["regularly", "on weekends", "on occasion", "for a while", "as well"]) # sample sentences until desired number while len(sentences) < number_to_generate: # sentence template # D1 N1 Aux1 (Adv) ever/also V1 that D2 N2 Aux2 (Adv) V2 D3 N3 # The/a boy has (rarely) ever/also said that the/a girl has (rarely) sung the/a song # build all lexical items try: N1 = choice(all_animate_nouns) D1 = choice(get_matched_by(N1, "arg_1", all_common_dets)) Adv_freq = choice(all_freq_adverbs) if Adv_freq[0] == "often": NPI_replacement = choice(ever_replacements_no_often) else: NPI_replacement = choice(ever_replacements) Adv_nonfreq = choice(all_nonfreq_adverbs) # If nonfrequent Adv is often, don't use it as a replacement for "ever" if Adv_nonfreq[0] == "often": NPI_replacement = choice(ever_replacements_no_often) else: NPI_replacement = choice(ever_replacements) V1 = choice(get_matched_by(N1, "arg_1", all_embedding_verbs)) Aux1 = return_aux(V1, N1, allow_negated=False) N2 = choice(all_animate_nouns, [N1])
def sample(self): # Training 1 # John compelled Mary to leave. # DP1 Aux1 V_control_in DP2 TO VP # Training 0 # John wanted Mary to leave. # DP1 Aux1 V_raising_in DP2 TO VP # Training 1 # John convinced Mary to leave. # DP1 Aux1 V_control_out DP2 TO VP # Training 0 # John considered Mary to leave. # DP1 Aux1 V_raising_out DP2 TO VP V_trans = choice(self.all_possibly_singular_transitive_verbs) NP_trans_1 = choice(get_matches_of(V_trans, "arg_1", self.safe_nouns)) NP_trans_2 = choice(get_matches_of(V_trans, "arg_2", self.safe_nouns)) D_trans_1 = choice(get_matched_by(NP_trans_1, "arg_1", self.safe_dets)) D_trans_2 = choice(get_matched_by(NP_trans_2, "arg_1", self.safe_dets)) Aux_trans = return_aux(V_trans, NP_trans_1) S1_abs = " ".join([ "%s", NP_trans_1[0], Aux_trans[0], V_trans[0], "%s", NP_trans_2[0] ]) option = random.choice([1, 2, 3]) if option == 1: # subject control/raising V_control_in = choice(self.v_control_subj_in) NP1 = choice(get_matches_of(V_control_in, "arg_1", self.safe_nouns)) D1 = choice(get_matched_by(NP1, "arg_1", self.safe_dets)) Aux1 = return_aux(V_control_in, NP1) V = choice( get_matches_of( V_control_in, "arg_2", get_matched_by(NP1, "arg_1", self.all_bare_transitive_verbs))) NP2 = choice(get_matches_of(V, "arg_2", self.safe_nouns)) D2 = choice(get_matched_by(NP2, "arg_1", self.safe_dets)) V_control_out = choice( get_matched_by( NP1, "arg_1", get_matches_of( Aux1, "arg_2", get_matched_by(V, "arg_2", self.v_control_subj_out)))) V_raising_in = choice( get_matched_by( NP1, "arg_1", get_matches_of( Aux1, "arg_2", get_matched_by(V, "arg_2", self.v_raising_subj_in)))) V_raising_out = choice( get_matched_by( NP1, "arg_1", get_matches_of( Aux1, "arg_2", get_matched_by(V, "arg_2", self.v_raising_subj_out)))) to = "to" elif option == 2: # object control/raising V_control_in = choice(self.v_control_obj_in) NP1 = choice(get_matches_of(V_control_in, "arg_1", self.safe_nouns)) D1 = choice(get_matched_by(NP1, "arg_1", self.safe_dets)) Aux1 = return_aux(V_control_in, NP1) control_obj = N_to_DP_mutate( choice(get_matches_of(V_control_in, "arg_2"))) V = choice( get_matches_of( V_control_in, "arg_3", get_matched_by(NP1, "arg_1", self.all_bare_transitive_verbs))) NP2 = choice(get_matches_of(V, "arg_2", self.safe_nouns)) D2 = choice(get_matched_by(NP2, "arg_1", self.safe_dets)) V_control_out = choice( get_matched_by( NP1, "arg_1", get_matches_of( Aux1, "arg_2", get_matched_by( V, "arg_3", get_matched_by(control_obj, "arg_2", self.v_control_obj_out))))) try: V_raising_in = choice( get_matched_by( NP1, "arg_1", get_matches_of(Aux1, "arg_2", self.v_raising_obj_in))) V_raising_out = choice( get_matched_by( NP1, "arg_1", get_matches_of(Aux1, "arg_2", self.v_raising_obj_out))) except Exception: pass to = control_obj[0] + " to" else: # adjective control/raising V_control_in = choice(self.adj_control_subj_in) NP1 = choice( get_matches_of(V_control_in, "arg_1", all_common_nouns)) D1 = choice(get_matched_by(NP1, "arg_1", self.safe_dets)) Aux1 = return_copula(NP1) V = choice( get_matches_of( V_control_in, "arg_2", get_matched_by(NP1, "arg_1", self.all_bare_transitive_verbs))) NP2 = choice(get_matches_of(V, "arg_2", all_common_nouns)) D2 = choice(get_matched_by(NP2, "arg_1", self.safe_dets)) V_control_out = choice( get_matched_by( NP1, "arg_1", get_matched_by(V, "arg_2", self.adj_control_subj_out))) V_raising_in = choice(self.adj_raising_subj_in) V_raising_out = choice(self.adj_raising_subj_out) to = "to" Ds = [] option = random.choice([ 1, 2, 3 ]) # There are three in-domain configurations (arbitrarily chosen) if option == 1: Ds.append(("the", "a", D1[0], D2[0])) Ds.append(("a", "the", D1[0], D2[0])) elif option == 2: Ds.append(("the", D_trans_2[0], D1[0], "a")) Ds.append(("a", D_trans_2[0], D1[0], "the")) else: Ds.append((D_trans_1[0], "the", D1[0], "a")) Ds.append((D_trans_1[0], "a", D1[0], "the")) option = random.choice([ 1, 2, 3 ]) # There are three out-domain configurations (arbitrarily chosen) if option == 1: Ds.append(("the", D_trans_2[0], "a", D2[0])) Ds.append(("a", D_trans_2[0], "the", D2[0])) elif option == 2: Ds.append((D_trans_1[0], "the", "a", D2[0])) Ds.append((D_trans_1[0], "a", "the", D2[0])) else: Ds.append((D_trans_1[0], D_trans_2[0], "the", "a")) Ds.append((D_trans_1[0], D_trans_2[0], "a", "the")) data = self.build_paradigm( training_1_1=" ".join([ S1_abs, "and", "%s", NP1[0], Aux1[0], V_control_in[0], to, V[0], "%s", NP2[0], "." ]) % Ds[0], training_0_0=" ".join([ S1_abs, "and", "%s", NP1[0], Aux1[0], V_raising_in[0], to, V[0], "%s", NP2[0], "." ]) % Ds[1], control_1_0=" ".join([ S1_abs, "and", "%s", NP1[0], Aux1[0], V_control_in[0], to, V[0], "%s", NP2[0], "." ]) % Ds[1], control_0_1=" ".join([ S1_abs, "and", "%s", NP1[0], Aux1[0], V_raising_in[0], to, V[0], "%s", NP2[0], "." ]) % Ds[0], test_1_0=" ".join([ S1_abs, "and", "%s", NP1[0], Aux1[0], V_control_out[0], to, V[0], "%s", NP2[0], "." ]) % Ds[3], test_0_1=" ".join([ S1_abs, "and", "%s", NP1[0], Aux1[0], V_raising_out[0], to, V[0], "%s", NP2[0], "." ]) % Ds[2], control_1_1=" ".join([ S1_abs, "and", "%s", NP1[0], Aux1[0], V_control_out[0], to, V[0], "%s", NP2[0], "." ]) % Ds[2], control_0_0=" ".join([ S1_abs, "and", "%s", NP1[0], Aux1[0], V_raising_out[0], to, V[0], "%s", NP2[0], "." ]) % Ds[3], ) track_sentence = [ (NP1[0], Aux1[0], V_control_in[0], to, V[0], NP2[0], "."), (NP1[0], Aux1[0], V_raising_in[0], to, V[0], NP2[0], "."), (NP1[0], Aux1[0], V_control_out[0], to, V[0], NP2[0], "."), (NP1[0], Aux1[0], V_raising_out[0], to, V[0], NP2[0], "."), (NP1[0], Aux1[0], V_control_out[0], to, V[0], NP2[0], "."), (NP1[0], Aux1[0], V_raising_out[0], to, V[0], NP2[0], ".") ] return data, track_sentence
def get_pronouns(self): r = random.random( ) # randomly select either a nominative pronoun, possessive determiner, or possessive pronoun if r < 1 / 3: # nominative pronoun first = choice(np.intersect1d(self.first, self.nom_pronoun)) non_first = choice(np.setdiff1d(self.nom_pronoun, self.first)) first_acc = choice( get_all_conjunctive([("person", first["person"]), ("sg", first["sg"])], self.acc_pronoun)) non_first_acc = choice( get_all_conjunctive([("person", non_first["person"]), ("sg", non_first["sg"])], self.acc_pronoun)) elif r < 2 / 3: # possessive det noun = choice(self.possessible_animates) first_det = choice(get_all("person", "1", self.poss_det)) first = noun.copy() first[0] = first_det[0] + " " + first[0] non_first_det = choice(np.setdiff1d(self.poss_det, self.first)) non_first = noun.copy() non_first[0] = non_first_det[0] + " " + non_first[0] first_acc = choice( get_all_conjunctive([("person", first_det["person"]), ("sg", first_det["sg"])], self.acc_pronoun)) non_first_acc = choice( get_all_conjunctive([("person", non_first_det["person"]), ("sg", non_first_det["sg"])], self.acc_pronoun)) else: # possessive pronoun first = choice(get_all("person", "1", self.poss_pronoun)) non_first = choice(np.setdiff1d(self.poss_pronoun, self.first)) first_acc = choice( get_all_conjunctive([("person", first["person"]), ("sg", first["sg"])], self.acc_pronoun)) non_first_acc = choice( get_all_conjunctive([("person", non_first["person"]), ("sg", non_first["sg"])], self.acc_pronoun)) vals = ["1", "0"] sg = random.choice( ["1", "0"] ) # Possessive pronouns can have either singular or plural agreement, irrespective of person/number marking vals.remove(sg) pl = vals[0] first["sg"] = sg first["pl"] = pl non_first["sg"] = sg non_first["pl"] = pl return first, non_first, first_acc, non_first_acc
def sample(self): """ Training 1/1 The girl saw a cat and John is the tall man. The girl saw a cat and the tall man is in the room. The girl saw a cat and the man is tall. TThe girl saw a cat and the man in the room is tall. Training 0/0 A girl saw a cat and John is a man. A girl saw a cat and John is the man in a room. A girl saw a cat and a man is John. Test 1/0 A girl saw a cat and John is a tall man in a room. A girl saw a cat and John is tall. A girl saw a cat and a tall man is John. A girl saw a cat and a tall man in a room is John. A girl saw a cat and a tall man is president. A girl saw a cat and a tall man in the room is president. Test 0/1 The girl saw a cat and John is in the room. The girl saw a cat and The man is in the room. The girl saw a cat and The man in the room is John. The girl saw a cat and John is president. The girl saw a cat and The man is president. The girl saw a cat and the man in the room is president. Control 1/1 The girl saw a cat and John is a tall man in a room. The girl saw a cat and John is tall. The girl saw a cat and a tall man is John. The girl saw a cat and a tall man in a room is John. The girl saw a cat and a tall man is president. The girl saw a cat and a tall man in the room is president. Control 0/0 A girl saw a cat and John is in a room. A girl saw a cat and a man is in a room. A girl saw a cat and a man in a room is John. A girl saw a cat and John is president. A girl saw a cat and a man is president. A girl saw a cat and a man in a room is president. """ v_trans = choice(all_transitive_verbs) subj = choice(get_matches_of(v_trans, "arg_1", all_common_nouns)) aux = return_aux(v_trans, subj) D_subj = choice(get_matched_by(subj, "arg_1", self.safe_determiners)) obj = choice(get_matches_of(v_trans, "arg_2", all_common_nouns)) D_obj = choice(get_matched_by(obj, "arg_1", self.safe_determiners)) S1 = " ".join( [D_subj[0], subj[0], aux[0], v_trans[0], D_obj[0], obj[0], "and"]) S1_the_subj = " ".join( ["the", subj[0], aux[0], v_trans[0], D_obj[0], obj[0], "and"]) S1_the_obj = " ".join( [D_subj[0], subj[0], aux[0], v_trans[0], "the", obj[0], "and"]) name_in = choice(self.names_in_domain) name_out = choice(self.names_out_domain) noun_in = choice( np.array( list( filter( lambda x: x["gender"] == name_in["gender"] or x[ "gender"] == "n" or x["gender"] == "", self.common_nouns_in_domain)))) noun_out = choice( np.array( list( filter( lambda x: x["gender"] == name_out["gender"] or x[ "gender"] == "n" or x["gender"] == "", self.common_nouns_out_domain)))) D_in = choice(get_matched_by(noun_in, "arg_1", self.a)) D_out = choice(get_matched_by(noun_out, "arg_1", self.a)) adj_in = choice(self.adjs_in_domain) adj_out = choice(self.adjs_out_domain) locative_in = build_locative(choice(self.locales_in_domain), allow_quantifiers=False, avoid=self.the) locative_out = build_locative(choice(self.locales_out_domain), allow_quantifiers=False, avoid=self.the) other_noun = choice( np.array( list( filter( lambda x: x["gender"] == name_out["gender"] or x[ "gender"] == "n", self.one_word_noun)))) track_sentence = [ (name_in[0], noun_in[0], adj_in[0], locative_in[0]), (name_in[0], noun_in[0], adj_in[0], locative_in[0]), (name_in[0], noun_in[0], adj_in[0], locative_in[0]), (name_in[0], noun_in[0], adj_in[0], locative_in[0]), (name_in[0], noun_in[0], adj_in[0], locative_in[0]), (name_in[0], noun_in[0], adj_in[0], locative_in[0]), ] # Training_1_1 option = random.randint(0, 7) (D, S) = ("the", S1) if option < 4 else (D_in[0], S1_the_obj) if option % 4 == 0: training_1 = " ".join( [S, name_in[0], "is", D, adj_in[0], noun_in[0]]) elif option % 4 == 1: training_1 = " ".join( [S, D, adj_in[0], noun_in[0], "is", locative_in[0]]) elif option % 4 == 2: training_1 = " ".join([S, D, noun_in[0], "is", adj_in[0]]) else: training_1 = " ".join( [S, D, noun_in[0], locative_in[0], "is", adj_in[0]]) # Training_0_0 option = random.choice([1, 2, 3]) if option == 1: training_0 = " ".join([S1, name_in[0], "is", D_in[0], noun_in[0]]) elif option == 2: training_0 = " ".join( [S1, name_in[0], "is", D_in[0], noun_in[0], locative_in[0]]) else: training_0 = " ".join([S1, D_in[0], noun_in[0], "is", name_in[0]]) # Control_1_0 option = random.randint(0, 3) if option == 0: control_1_0 = " ".join( [S, name_in[0], "is", D_in[0], adj_in[0], noun_in[0]]) elif option == 1: control_1_0 = " ".join( [S, D_in[0], adj_in[0], noun_in[0], "is", locative_in[0]]) elif option == 2: control_1_0 = " ".join([S, D_in[0], noun_in[0], "is", adj_in[0]]) else: control_1_0 = " ".join( [S, D_in[0], noun_in[0], locative_in[0], "is", adj_in[0]]) # Control_0_1 option = random.randint(0, 5) (D, S) = ("the", S1) if option < 3 else (D_in[0], S1_the_obj) if option % 3 == 1: control_0_1 = " ".join([S, name_in[0], "is", D, noun_in[0]]) elif option % 3 == 2: control_0_1 = " ".join( [S, name_in[0], "is", D, noun_in[0], locative_in[0]]) else: control_0_1 = " ".join([S, D, noun_in[0], "is", name_in[0]]) # Test_1_0 option = random.choice([1, 2, 3, 4, 5]) if option == 1: test_1_0 = " ".join([ S1, name_out[0], "is", D_out[0], adj_out[0], noun_out[0], locative_out[0] ]) elif option == 2: test_1_0 = " ".join( [S1, D_out[0], adj_out[0], noun_out[0], "is", name_out[0]]) elif option == 3: test_1_0 = " ".join([ S1, D_out[0], adj_out[0], noun_out[0], locative_out[0], "is", name_out[0] ]) elif option == 4: test_1_0 = " ".join( [S1, D_out[0], adj_out[0], noun_out[0], "is", other_noun[0]]) else: test_1_0 = " ".join([ S1, D_out[0], adj_out[0], noun_out[0], locative_out[0], "is", other_noun[0] ]) # Control_1_1 option = random.randint(0, 9) (D, S) = ("the", S1) if option < 5 else (D_out[0], S1_the_subj) if option % 5 == 0: control_1_1 = " ".join([ S, name_out[0], "is", D, adj_out[0], noun_out[0], locative_out[0] ]) elif option % 5 == 1: control_1_1 = " ".join( [S, D, adj_out[0], noun_out[0], "is", name_out[0]]) elif option % 5 == 2: control_1_1 = " ".join([ S, D, adj_out[0], noun_out[0], locative_out[0], "is", name_out[0] ]) elif option % 5 == 3: control_1_1 = " ".join( [S, D, adj_out[0], noun_out[0], "is", other_noun[0]]) else: control_1_1 = " ".join([ S, D, adj_out[0], noun_out[0], locative_out[0], "is", other_noun[0] ]) # Test_0_1 option = random.randint(0, 7) (D, S) = ("the", S1) if option < 4 else (D_out[0], S1_the_subj) if option % 4 == 0: test_0_1 = " ".join([S, D, noun_out[0], "is", locative_out[0]]) elif option % 4 == 1: test_0_1 = " ".join( [S, D, noun_out[0], locative_out[0], "is", name_out[0]]) elif option % 4 == 2: test_0_1 = " ".join([S, D, noun_out[0], "is", other_noun[0]]) else: test_0_1 = " ".join( [S, D, noun_out[0], locative_out[0], "is", other_noun[0]]) # Control_0_0 option = random.choice([1, 2, 3, 4, 5, 6]) if option == 1: control_0_0 = " ".join([S1, name_out[0], "is", locative_out[0]]) elif option == 2: control_0_0 = " ".join( [S1, D_out[0], noun_out[0], "is", locative_out[0]]) elif option == 3: control_0_0 = " ".join([ S1, D_out[0], noun_out[0], locative_out[0], "is", name_out[0] ]) elif option == 4: control_0_0 = " ".join([S1, name_out[0], "is", other_noun[0]]) elif option == 5: control_0_0 = " ".join( [S1, D_out[0], noun_out[0], "is", other_noun[0]]) else: control_0_0 = " ".join([ S1, D_out[0], noun_out[0], locative_out[0], "is", other_noun[0] ]) data = self.build_paradigm( training_1_1=training_1 + ".", training_0_0=training_0 + ".", control_1_0=control_1_0 + ".", control_0_1=control_0_1 + ".", test_1_0=test_1_0 + ".", test_0_1=test_0_1 + ".", control_1_1=control_1_1 + ".", control_0_0=control_0_0 + ".", ) return data, track_sentence
def sample_verb(self): V_trans = choice(self.all_possibly_singular_transitive_verbs) NP_trans_1 = choice( get_matches_of(V_trans, "arg_1", self.all_singular_common_nouns)) NP_trans_2 = choice( get_matches_of(V_trans, "arg_2", self.all_singular_common_nouns)) D_trans_1 = choice(get_matched_by(NP_trans_1, "arg_1", self.safe_dets)) D_trans_2 = choice(get_matched_by(NP_trans_2, "arg_1", self.safe_dets)) Aux_trans = return_aux(V_trans, NP_trans_1) S1_abs = " ".join([ "%s", NP_trans_1[0], Aux_trans[0], V_trans[0], "%s", NP_trans_2[0] ]) V1 = choice(self.in_domain_verbs_main) V1_ant = get_same_V_form(V1["antonym"], V1) V1_other = get_same_V_form(V1["synonym_hypernym_hyponym"], V1) Subj1 = choice( get_matches_of( V1, "arg_1", get_matches_of( V1_ant, "arg_1", get_matches_of(V1_other, "arg_1", self.all_singular_common_nouns)))) Subj2 = choice( get_matches_of( V1_ant, "arg_1", get_matches_of(V1_other, "arg_1", self.all_singular_common_nouns))) D1 = choice(get_matched_by(Subj1, "arg_1", self.safe_dets)) D2 = choice(get_matched_by(Subj2, "arg_1", self.safe_dets)) Aux1 = return_aux(V1, Subj1, allow_negated=False) Aux2 = return_aux(V1_ant, Subj2, allow_negated=False) if V1["category"] == "(S\\NP)/NP": # If the antonym is transitive, we need to generate objects that match all relevant verb forms for _ in range(10): Obj1 = N_to_DP_mutate(choice( get_matches_of( V1, "arg_2", get_matches_of( V1_ant, "arg_2", get_matches_of(V1_other, "arg_2", all_nouns)))), avoid=all_very_common_dets) Obj2 = N_to_DP_mutate(choice( get_matches_of( V1, "arg_2", get_matches_of( V1_ant, "arg_2", get_matches_of(V1_other, "arg_2", all_nouns)))), avoid=all_very_common_dets) try: V2 = choice( get_matches_of( Aux1, "arg_2", get_matched_by( Subj1, "arg_1", get_matched_by( Obj1, "arg_2", self.out_domain_transitive_verbs_main)))) except Exception: raise MatchNotFoundError( "fail to find V: %s %s %s %s %s" % (V1[0], Subj1[0], Subj2[0], Obj1[0], Obj2[0])) V2 = get_same_V_form(V2["root"], V1) V2_ant = get_same_V_form(V2["antonym"], V2) V2_other = get_same_V_form(V2["synonym_hypernym_hyponym"], V2) if is_match_disj(Obj2, V2_ant["arg_2"]) and is_match_disj(Obj2, V2_other["arg_2"]) and \ is_match_disj(Subj2, V2_ant["arg_1"]) and is_match_disj(Subj2, V2_other["arg_1"]): break else: print("fail to match: %s %s %s %s %s %s" % (V1[0], V2[0], Subj1[0], Subj2[0], Obj1[0], Obj2[0])) elif V1["category"] == "S\\NP": # If the antonym is intransitive, no objects try: V2 = choice( get_matches_of( Aux1, "arg_2", get_matched_by( Subj1, "arg_1", self.out_domain_intransitive_verbs_main))) except Exception: raise MatchNotFoundError("fail to find V: %s %s %s" % (V1[0], Subj1[0], Subj2[0])) V2 = get_same_V_form(V2["root"], V1) V2_ant = get_same_V_form(V2["antonym"], V2) V2_other = get_same_V_form(V2["synonym_hypernym_hyponym"], V2) Obj1 = self.empty # No object: this is an empty string Obj2 = self.empty # No object: this is an empty string if is_match_disj(Subj2, V2_ant["arg_1"]) and is_match_disj( Subj2, V2_other["arg_1"]): pass else: raise MatchNotFoundError("fail to match: %s %s %s %s" % (V1[0], V2[0], Subj1[0], Subj2[0])) Ds = [] option = random.choice([ 1, 2, 3 ]) # There are three in-domain configurations (arbitrarily chosen) if option == 1: Ds.append(("the", "a", D1[0], D2[0])) Ds.append(("a", "the", D1[0], D2[0])) elif option == 2: Ds.append(("the", D_trans_2[0], D1[0], "a")) Ds.append(("a", D_trans_2[0], D1[0], "the")) else: Ds.append((D_trans_1[0], "the", D1[0], "a")) Ds.append((D_trans_1[0], "a", D1[0], "the")) option = random.choice([ 1, 2, 3 ]) # There are three out-domain configurations (arbitrarily chosen) if option == 1: Ds.append(("the", D_trans_2[0], "a", D2[0])) Ds.append(("a", D_trans_2[0], "the", D2[0])) elif option == 2: Ds.append((D_trans_1[0], "the", "a", D2[0])) Ds.append((D_trans_1[0], "a", "the", D2[0])) else: Ds.append((D_trans_1[0], D_trans_2[0], "the", "a")) Ds.append((D_trans_1[0], D_trans_2[0], "a", "the")) data = self.build_paradigm( training_1_1=" ".join([ S1_abs, "and", "%s", Subj1[0], Aux1[0], V1[0], Obj1[0], "and", "%s", Subj2[0], Aux2[0], V1_ant[0], Obj2[0], "." ]) % Ds[0], training_0_0=" ".join([ S1_abs, "and", "%s", Subj1[0], Aux1[0], V1[0], Obj1[0], "and", "%s", Subj2[0], Aux2[0], V1_other[0], Obj2[0], "." ]) % Ds[1], control_1_0=" ".join([ S1_abs, "and", "%s", Subj1[0], Aux1[0], V1[0], Obj1[0], "and", "%s", Subj2[0], Aux2[0], V1_ant[0], Obj2[0], "." ]) % Ds[1], control_0_1=" ".join([ S1_abs, "and", "%s", Subj1[0], Aux1[0], V1[0], Obj1[0], "and", "%s", Subj2[0], Aux2[0], V1_other[0], Obj2[0], "." ]) % Ds[0], test_1_0=" ".join([ S1_abs, "and", "%s", Subj1[0], Aux1[0], V2[0], Obj1[0], "and", "%s", Subj2[0], Aux2[0], V2_ant[0], Obj2[0], "." ]) % Ds[3], test_0_1=" ".join([ S1_abs, "and", "%s", Subj1[0], Aux1[0], V2[0], Obj1[0], "and", "%s", Subj2[0], Aux2[0], V2_other[0], Obj2[0], "." ]) % Ds[2], control_1_1=" ".join([ S1_abs, "and", "%s", Subj1[0], Aux1[0], V2[0], Obj1[0], "and", "%s", Subj2[0], Aux2[0], V2_ant[0], Obj2[0], "." ]) % Ds[2], control_0_0=" ".join([ S1_abs, "and", "%s", Subj1[0], Aux1[0], V2[0], Obj1[0], "and", "%s", Subj2[0], Aux2[0], V2_other[0], Obj2[0], "." ]) % Ds[3], ) track_sentence = [ (Subj1[0], V1[0], Obj1[0], Subj2[0], V1_ant[0], Obj2[0], "."), (Subj1[0], V1[0], Obj1[0], Subj2[0], V1_other[0], Obj2[0], "."), (Subj1[0], V2[0], Obj1[0], Subj2[0], V2_ant[0], Obj2[0], "."), (Subj1[0], V2[0], Obj1[0], Subj2[0], V2_ant[0], Obj2[0], "."), (Subj1[0], V2[0], Obj1[0], Subj2[0], V2_ant[0], Obj2[0], "."), (Subj1[0], V2[0], Obj1[0], Subj2[0], V2_ant[0], Obj2[0], "."), ] return data, track_sentence
def sample_modified_NP(self): # Training 1 # Every man who read the book told a boy to see the same movie. # D1_1 NP1 rel Aux1 V1 D2_1 NP2 Aux2 V2 D3_1 NP3 Aux3 V3 D4_1 recip NP4 # Training 0 # The man who read every book told a boy to see the same movie. # D2_2 NP1 rel Aux1 V1 D2_2 NP2 Aux2 V2 D3_2 NP3 Aux3 V3 D4_2 recip NP4 # Test 1 # The man told every boy reading the book to see the same movie. # D1_3 NP1 Aux2 V2 D3_3 NP3 V1ing D2_3 NP2 Aux3 V3 D4_3 recip NP4 # Test 0 # The man told that boy reading every book to see the same movie. # D1_4 NP1 Aux2 V2 D3_4 NP3 V1ing D2_4 NP2 Aux3 V3 D4_4 recip NP4 V2 = choice(self.embedding_verbs) NP1 = choice( get_matches_of(V2, "arg_1", self.all_singular_common_nouns)) Aux2 = return_aux(V2, NP1, allow_negated=False) rel = choice(get_matched_by(NP1, "arg_1", all_relativizers)) if V2["category_2"] == "V_control_object": NP3 = choice( get_matches_of(V2, "arg_2", self.all_singular_common_nouns)) V3 = choice( get_matches_of(V2, "arg_3", self.all_bare_transitive_verbs)) Aux3 = self.to elif V2["category_2"] == "V_raising_object": V3 = choice(self.all_bare_transitive_verbs) Aux3 = self.to NP3 = choice( get_matches_of(V3, "arg_1", self.all_singular_common_nouns)) else: # clause embedding verb V2[0] = V2[0] + " that" V3 = choice(self.all_possibly_singular_transitive_verbs) NP3 = choice( get_matches_of(V3, "arg_1", self.all_singular_common_nouns)) Aux3 = return_aux(V3, NP3, allow_negated=False) try: NP4 = choice( get_matches_of(V3, "arg_2", self.all_singular_common_nouns)) except Exception: pass V1 = choice( get_matched_by( NP1, "arg_1", get_matched_by(NP3, "arg_1", self.possibly_ing_transitive_verbs))) try: V1ing = choice(get_all("ing", "1", get_all("root", V1["root"]))) except Exception: pass NP2 = choice( get_matches_of(V1, "arg_2", self.all_singular_common_nouns)) Aux1 = return_aux(V1, NP1, allow_negated=False) recip = random.choice(["the same", "a different"]) try: D1 = choice(get_matched_by(NP1, "arg_1", self.singular_indefs)) D2 = choice(get_matched_by(NP2, "arg_1", self.singular_indefs)) D3 = choice(get_matched_by(NP3, "arg_1", self.singular_indefs)) D4 = choice(get_matched_by(NP4, "arg_1", self.singular_indefs)) except Exception: pass # There are four possible patterns for training example with label 1 Ds = [] option = random.choice([1, 2, 3]) if option == 1: Ds.append(["every", D2[0], recip, D4[0]]) elif option == 2: Ds.append(["every", D2[0], D3[0], recip]) elif option == 3: Ds.append([D1[0], D2[0], "every", recip]) # There are two possible patterns for training example with label 0 option = random.choice([1, 2]) if option == 1: Ds.append([D1[0], "every", recip, D4[0]]) elif option == 2: Ds.append([D1[0], "every", D3[0], recip]) # There are four possible patterns for test example with label 1 option = random.choice([1, 2, 3, 4]) if option == 1: Ds.append(["every", recip, D2[0], D4[0]]) elif option == 2: Ds.append(["every", D3[0], recip, D4[0]]) elif option == 3: Ds.append(["every", D3[0], D2[0], recip]) elif option == 4: Ds.append([D1[0], "every", D2[0], recip]) # There's only one possible pattern for test example with label 0 Ds.append([D1[0], D2[0], "every", recip]) data = self.build_paradigm( training_1_1=" ".join([ Ds[0][0], NP1[0], rel[0], Aux1[0], V1[0], Ds[0][1], NP2[0], Aux2[0], V2[0], Ds[0][2], NP3[0], Aux3[0], V3[0], Ds[0][3], NP4[0], "." ]), training_0_0=" ".join([ Ds[1][0], NP1[0], rel[0], Aux1[0], V1[0], Ds[1][1], NP2[0], Aux2[0], V2[0], Ds[1][2], NP3[0], Aux3[0], V3[0], Ds[1][3], NP4[0], "." ]), test_1_0=" ".join([ Ds[2][0], NP1[0], Aux2[0], V2[0], Ds[2][1], NP3[0], V1ing[0], Ds[2][2], NP2[0], Aux3[0], V3[0], Ds[2][3], NP4[0], "." ]), test_0_1=" ".join([ Ds[3][0], NP1[0], Aux2[0], V2[0], Ds[3][1], NP3[0], V1ing[0], Ds[3][2], NP2[0], Aux3[0], V3[0], Ds[3][3], NP4[0], "." ])) track_sentence = [(NP1[0], V1[0], NP2[0], V2[0], NP3[0], V3[0], NP4[0], recip)] return data, track_sentence
get_all("frequent", "1")) any_decoys = np.concatenate( (get_all("expression", "the"), get_all_conjunctive([("expression", "that"), ("category_2", "D")]), get_all("expression", "this"), get_all("expression", "these"), get_all("expression", "those"))) # sample sentences until desired number while len(sentences) < number_to_generate: # sentence template # D1 N1 who V1 any/the/D2 N2 V2 any/the/D3 N3 # every boy who bought any/the/some apples sang any/the/a song # build all lexical items #TODO: throw in modifiers try: N1 = choice(all_animate_nouns) D1_up = choice(get_matched_by(N1, "arg_1", all_UE_UE_quantifiers)) D1_down = choice(get_matched_by(N1, "arg_1", all_DE_UE_quantifiers)) V1 = choice(get_matched_by(N1, "arg_1", all_transitive_verbs)) V1 = conjugate(V1, N1, allow_negated=False) N2 = choice(get_matches_of(V1, "arg_2", all_non_singular_nouns), [N1]) D2 = choice( get_matched_by(N2, "arg_1", all_UE_UE_quantifiers), [D1_up, D1_down] ) # restrict to UE quantifiers, otherwise there could be another licensor V2 = choice(get_matched_by(N1, "arg_1", all_transitive_verbs), [V1]) V2 = conjugate(V2, N1, allow_negated=False) N3 = choice(get_matches_of(V2, "arg_2", all_non_singular_nouns), [N1, N2]) D3 = choice(get_matched_by(N3, "arg_1", all_UE_UE_quantifiers), [D1_up, D1_down])
def sample_coordination(self): # Training 1 # A man slept or every girl helped the same dog. # D1_1 NP1 Aux1 IV OR D2_1 NP2 Aux2 V2 D3_1 NP3 # Training # Every man slept or the same girl helped a dog. # D1_2 NP1 Aux1 IV OR D2_2 NP2 Aux2 V2 D3_2 NP3 # Test 1 # Every man loved a girl or helped the same dog. # D1_3 NP1 Aux1 TV D2_3 NP2 OR Aux2 V2 D3_3 NP3 # Test 0 # A man loved every girl or helped the same dog. # D1_4 NP1 Aux1 TV D2_4 NP2 OR Aux2 V2 D3_4 NP3 IV = choice(self.all_singular_intransitive_verbs) try: NP1 = choice( get_matches_of(IV, "arg_1", self.all_singular_common_nouns)) except Exception: pass Aux1 = return_aux(IV, NP1, allow_negated=False) TV = choice( get_matched_by(NP1, "arg_1", get_matches_of(Aux1, "arg_2", all_transitive_verbs))) NP2 = choice( get_matches_of(TV, "arg_2", self.all_singular_common_nouns)) V2 = choice( get_matched_by(NP1, "arg_1", get_matched_by(NP2, "arg_1", all_transitive_verbs))) Aux2 = return_aux(V2, NP2, allow_negated=False) NP3 = choice( get_matches_of(V2, "arg_2", self.all_singular_common_nouns)) recip = random.choice(["the same", "a different"]) D1 = choice(get_matched_by(NP1, "arg_1", self.singular_indefs)) D2 = choice(get_matched_by(NP2, "arg_1", self.singular_indefs)) D3 = choice(get_matched_by(NP3, "arg_1", self.singular_indefs)) reverse = bool(random.choice([0, 1])) Ds = [] # There is one possible pattern for training example with label 1 Ds.append([D1[0], "every", recip]) # There are two possible patterns for training example with label 0 if reverse: option = random.choice([1, 2]) if option == 1: Ds.append(["every", recip, D3[0]]) elif option == 2: Ds.append(["every", D2[0], recip]) else: option = random.choice([1, 2]) if option == 1: Ds.append([recip, "every", D3[0]]) elif option == 2: Ds.append([recip, D2[0], "every"]) # There are two possible patterns for test example with label 1 option = random.choice([1, 2]) if option == 1: Ds.append(["every", recip, D3[0]]) elif option == 2: Ds.append(["every", D2[0], recip]) # There's only one possible pattern for test example with label 0 Ds.append([D1[0], "every", recip]) # We can reverse the order of the clauses in the training example for variety clause_1_a = " ".join([Ds[0][0], NP1[0], Aux1[0], IV[0]]) clause_1_b = " ".join( [Ds[0][1], NP2[0], Aux2[0], V2[0], Ds[0][2], NP3[0]]) clause_0_a = " ".join([Ds[1][0], NP1[0], Aux1[0], IV[0]]) clause_0_b = " ".join( [Ds[1][1], NP2[0], Aux2[0], V2[0], Ds[1][2], NP3[0]]) if reverse: training_1_1 = "%s or %s." % (clause_1_a, clause_1_b) training_0_0 = "%s or %s." % (clause_0_a, clause_0_b) else: training_1_1 = "%s or %s." % (clause_1_b, clause_1_a) training_0_0 = "%s or %s." % (clause_0_b, clause_0_a) data = self.build_paradigm(training_1_1=training_1_1, training_0_0=training_0_0, test_1_0=" ".join([ Ds[2][0], NP1[0], Aux1[0], TV[0], Ds[2][1], NP2[0], "or", Aux2[0], V2[0], Ds[2][2], NP3[0], "." ]), test_0_1=" ".join([ Ds[3][0], NP1[0], Aux1[0], TV[0], Ds[3][1], NP2[0], "or", Aux2[0], V2[0], Ds[3][2], NP3[0], "." ])) track_sentence = [(NP1[0], NP2[0], NP3[0], IV[0], TV[0], V2[0], recip)] return data, track_sentence
def sample_verb(self): V_trans = choice(all_transitive_verbs) NP_trans_1 = choice(get_matches_of(V_trans, "arg_1", all_common_nouns)) NP_trans_2 = choice(get_matches_of(V_trans, "arg_2", all_common_nouns)) D_trans_1 = choice(get_matched_by(NP_trans_1, "arg_1", self.safe_dets)) D_trans_2 = choice(get_matched_by(NP_trans_2, "arg_1", self.safe_dets)) Aux_trans = return_aux(V_trans, NP_trans_1) S1_the_subj = " ".join([ "the", NP_trans_1[0], Aux_trans[0], V_trans[0], D_trans_2[0], NP_trans_2[0] ]) S1_the_obj = " ".join([ D_trans_1[0], NP_trans_1[0], Aux_trans[0], V_trans[0], "the", NP_trans_2[0] ]) S1 = " ".join([ D_trans_1[0], NP_trans_1[0], Aux_trans[0], V_trans[0], D_trans_2[0], NP_trans_2[0] ]) V1 = choice(self.in_domain_verbs_main) try: V1_ant = get_same_V_form(V1["antonym"], V1) V1_other = get_same_V_form(V1["synonym_hypernym_hyponym"], V1) except Exception: pass Subj1 = choice( get_matches_of( V1, "arg_1", get_matches_of( V1_ant, "arg_1", get_matches_of(V1_other, "arg_1", all_common_nouns)))) Subj2 = choice( get_matches_of(V1_ant, "arg_1", get_matches_of(V1_other, "arg_1", all_common_nouns))) D1 = choice(get_matched_by(Subj1, "arg_1", self.safe_dets)) D2 = choice(get_matched_by(Subj2, "arg_1", self.safe_dets)) Aux1 = return_aux(V1, Subj1, allow_negated=False) Aux2 = return_aux(V1_ant, Subj2, allow_negated=False) if V1["category"] == "(S\\NP)/NP": # If the antonym is transitive, we need to generate objects that match all relevant verb forms for _ in range(10): Obj1 = N_to_DP_mutate(choice( get_matches_of( V1, "arg_2", get_matches_of( V1_ant, "arg_2", get_matches_of(V1_other, "arg_2", all_nouns)))), avoid=self.the) Obj2 = N_to_DP_mutate(choice( get_matches_of( V1, "arg_2", get_matches_of( V1_ant, "arg_2", get_matches_of(V1_other, "arg_2", all_nouns)))), avoid=self.the) try: V2 = choice( get_matches_of( Aux1, "arg_2", get_matched_by( Subj1, "arg_1", get_matched_by( Obj1, "arg_2", self.out_domain_transitive_verbs_main)))) except Exception: raise MatchNotFoundError( "fail to find V: %s %s %s %s %s" % (V1[0], Subj1[0], Subj2[0], Obj1[0], Obj2[0])) V2 = get_same_V_form(V2["root"], V1) V2_ant = get_same_V_form(V2["antonym"], V2) V2_other = get_same_V_form(V2["synonym_hypernym_hyponym"], V2) if is_match_disj(Obj2, V2_ant["arg_2"]) and is_match_disj(Obj2, V2_other["arg_2"]) and \ is_match_disj(Subj2, V2_ant["arg_1"]) and is_match_disj(Subj2, V2_other["arg_1"]): break else: print("fail to match: %s %s %s %s %s %s" % (V1[0], V2[0], Subj1[0], Subj2[0], Obj1[0], Obj2[0])) elif V1["category"] == "S\\NP": # If the antonym is intransitive, no objects try: V2 = choice( get_matches_of( Aux1, "arg_2", get_matched_by( Subj1, "arg_1", self.out_domain_intransitive_verbs_main))) except Exception: raise MatchNotFoundError("fail to find V: %s %s %s" % (V1[0], Subj1[0], Subj2[0])) V2 = get_same_V_form(V2["root"], V1) V2_ant = get_same_V_form(V2["antonym"], V2) V2_other = get_same_V_form(V2["synonym_hypernym_hyponym"], V2) Obj1 = self.empty # No object: this is an empty string Obj2 = self.empty # No object: this is an empty string if is_match_disj(Subj2, V2_ant["arg_1"]) and is_match_disj( Subj2, V2_other["arg_1"]): pass else: raise MatchNotFoundError("fail to match: %s %s %s %s" % (V1[0], V2[0], Subj1[0], Subj2[0])) if choice([True, False]): training_1_1 = " ".join([ S1_the_subj, "and", D1[0], Subj1[0], Aux1[0], V1[0], Obj1[0], "and", D2[0], Subj2[0], Aux2[0], V1_ant[0], Obj2[0], "." ]) else: training_1_1 = " ".join([ "the", Subj1[0], Aux1[0], V1[0], Obj1[0], "and", D2[0], Subj2[0], Aux2[0], V1_ant[0], Obj2[0], "and", S1, "." ]) option = random.randint(0, 5) if option == 0: training_0_0 = " ".join([ S1_the_obj, "and", D1[0], Subj1[0], Aux1[0], V1[0], Obj1[0], "and", D2[0], Subj2[0], Aux2[0], V1_other[0], Obj2[0], "." ]) elif option == 1: training_0_0 = " ".join([ S1, "and", "the", Subj1[0], Aux1[0], V1[0], Obj1[0], "and", D2[0], Subj2[0], Aux2[0], V1_other[0], Obj2[0], "." ]) elif option == 2: training_0_0 = " ".join([ S1, "and", D1[0], Subj1[0], Aux1[0], V1[0], Obj1[0], "and", "the", Subj2[0], Aux2[0], V1_other[0], Obj2[0], "." ]) elif option == 3: training_0_0 = " ".join([ D1[0], Subj1[0], Aux1[0], V1[0], Obj1[0], "and", "the", Subj2[0], Aux2[0], V1_other[0], Obj2[0], "and", S1, "." ]) elif option == 4: training_0_0 = " ".join([ D1[0], Subj1[0], Aux1[0], V1[0], Obj1[0], "and", D2[0], Subj2[0], Aux2[0], V1_other[0], Obj2[0], "and", S1_the_subj, "." ]) else: training_0_0 = " ".join([ D1[0], Subj1[0], Aux1[0], V1[0], Obj1[0], "and", D2[0], Subj2[0], Aux2[0], V1_other[0], Obj2[0], "and", S1_the_obj, "." ]) if choice([True, False]): control_0_1 = " ".join([ S1_the_subj, "and", D1[0], Subj1[0], Aux1[0], V1[0], Obj1[0], "and", D2[0], Subj2[0], Aux2[0], V1_other[0], Obj2[0], "." ]) else: control_0_1 = " ".join([ "the", Subj1[0], Aux1[0], V1[0], Obj1[0], "and", D2[0], Subj2[0], Aux2[0], V1_other[0], Obj2[0], "and", S1, "." ]) option = random.randint(0, 5) if option == 0: control_1_0 = " ".join([ S1_the_obj, "and", D1[0], Subj1[0], Aux1[0], V1[0], Obj1[0], "and", D2[0], Subj2[0], Aux2[0], V1_ant[0], Obj2[0], "." ]) elif option == 1: control_1_0 = " ".join([ S1, "and", "the", Subj1[0], Aux1[0], V1[0], Obj1[0], "and", D2[0], Subj2[0], Aux2[0], V1_ant[0], Obj2[0], "." ]) elif option == 2: control_1_0 = " ".join([ S1, "and", D1[0], Subj1[0], Aux1[0], V1[0], Obj1[0], "and", "the", Subj2[0], Aux2[0], V1_ant[0], Obj2[0], "." ]) elif option == 3: control_1_0 = " ".join([ D1[0], Subj1[0], Aux1[0], V1[0], Obj1[0], "and", "the", Subj2[0], Aux2[0], V1_ant[0], Obj2[0], "and", S1, "." ]) elif option == 4: control_1_0 = " ".join([ D1[0], Subj1[0], Aux1[0], V1[0], Obj1[0], "and", D2[0], Subj2[0], Aux2[0], V1_ant[0], Obj2[0], "and", S1_the_subj, "." ]) else: control_1_0 = " ".join([ D1[0], Subj1[0], Aux1[0], V1[0], Obj1[0], "and", D2[0], Subj2[0], Aux2[0], V1_ant[0], Obj2[0], "and", S1_the_obj, "." ]) option = random.randint(0, 5) if option == 0: test_1_0 = " ".join([ S1_the_obj, "and", D1[0], Subj1[0], Aux1[0], V2[0], Obj1[0], "and", D2[0], Subj2[0], Aux2[0], V2_ant[0], Obj2[0], "." ]) elif option == 1: test_1_0 = " ".join([ S1, "and", "the", Subj1[0], Aux1[0], V2[0], Obj1[0], "and", D2[0], Subj2[0], Aux2[0], V2_ant[0], Obj2[0], "." ]) elif option == 2: test_1_0 = " ".join([ S1, "and", D1[0], Subj1[0], Aux1[0], V2[0], Obj1[0], "and", "the", Subj2[0], Aux2[0], V2_ant[0], Obj2[0], "." ]) elif option == 3: test_1_0 = " ".join([ D1[0], Subj1[0], Aux1[0], V2[0], Obj1[0], "and", "the", Subj2[0], Aux2[0], V2_ant[0], Obj2[0], "and", S1, "." ]) elif option == 4: test_1_0 = " ".join([ D1[0], Subj1[0], Aux1[0], V2[0], Obj1[0], "and", D2[0], Subj2[0], Aux2[0], V2_ant[0], Obj2[0], "and", S1_the_subj, "." ]) else: test_1_0 = " ".join([ D1[0], Subj1[0], Aux1[0], V2[0], Obj1[0], "and", D2[0], Subj2[0], Aux2[0], V2_ant[0], Obj2[0], "and", S1_the_obj, "." ]) if choice([True, False]): test_0_1 = " ".join([ S1_the_subj, "and", D1[0], Subj1[0], Aux1[0], V2[0], Obj1[0], "and", D2[0], Subj2[0], Aux2[0], V2_other[0], Obj2[0], "." ]) else: test_0_1 = " ".join([ "the", Subj1[0], Aux1[0], V2[0], Obj1[0], "and", D2[0], Subj2[0], Aux2[0], V2_other[0], Obj2[0], "and", S1, "." ]) option = random.randint(0, 5) if option == 0: control_0_0 = " ".join([ S1_the_obj, "and", D1[0], Subj1[0], Aux1[0], V2[0], Obj1[0], "and", D2[0], Subj2[0], Aux2[0], V2_other[0], Obj2[0], "." ]) elif option == 1: control_0_0 = " ".join([ S1, "and", "the", Subj1[0], Aux1[0], V2[0], Obj1[0], "and", D2[0], Subj2[0], Aux2[0], V2_other[0], Obj2[0], "." ]) elif option == 2: control_0_0 = " ".join([ S1, "and", D1[0], Subj1[0], Aux1[0], V2[0], Obj1[0], "and", "the", Subj2[0], Aux2[0], V2_other[0], Obj2[0], "." ]) elif option == 3: control_0_0 = " ".join([ D1[0], Subj1[0], Aux1[0], V2[0], Obj1[0], "and", "the", Subj2[0], Aux2[0], V2_other[0], Obj2[0], "and", S1, "." ]) elif option == 4: control_0_0 = " ".join([ D1[0], Subj1[0], Aux1[0], V2[0], Obj1[0], "and", D2[0], Subj2[0], Aux2[0], V2_other[0], Obj2[0], "and", S1_the_subj, "." ]) else: control_0_0 = " ".join([ D1[0], Subj1[0], Aux1[0], V2[0], Obj1[0], "and", D2[0], Subj2[0], Aux2[0], V2_other[0], Obj2[0], "and", S1_the_obj, "." ]) if choice([True, False]): control_1_1 = " ".join([ S1_the_subj, "and", D1[0], Subj1[0], Aux1[0], V2[0], Obj1[0], "and", D2[0], Subj2[0], Aux2[0], V2_ant[0], Obj2[0], "." ]) else: control_1_1 = " ".join([ "the", Subj1[0], Aux1[0], V2[0], Obj1[0], "and", D2[0], Subj2[0], Aux2[0], V2_ant[0], Obj2[0], "and", S1, "." ]) data = self.build_paradigm( training_1_1=training_1_1, training_0_0=training_0_0, test_1_0=test_1_0, test_0_1=test_0_1, control_1_1=control_1_1, control_0_0=control_0_0, control_1_0=control_1_0, control_0_1=control_0_1, ) track_sentence = [ (Subj1[0], V1[0], Obj1[0], Subj2[0], V1_ant[0], Obj2[0], "."), (Subj1[0], V1[0], Obj1[0], Subj2[0], V1_other[0], Obj2[0], "."), (Subj1[0], V2[0], Obj1[0], Subj2[0], V2_ant[0], Obj2[0], "."), (Subj1[0], V2[0], Obj1[0], Subj2[0], V2_ant[0], Obj2[0], "."), ] return data, track_sentence
def sample_adj(self): V_trans = choice(all_transitive_verbs) NP_trans_1 = choice(get_matches_of(V_trans, "arg_1", all_common_nouns)) NP_trans_2 = choice(get_matches_of(V_trans, "arg_2", all_common_nouns)) D_trans_1 = choice(get_matched_by(NP_trans_1, "arg_1", self.safe_dets)) D_trans_2 = choice(get_matched_by(NP_trans_2, "arg_1", self.safe_dets)) Aux_trans = return_aux(V_trans, NP_trans_1) S1_the_subj = " ".join([ "the", NP_trans_1[0], Aux_trans[0], V_trans[0], D_trans_2[0], NP_trans_2[0] ]) S1_the_obj = " ".join([ D_trans_1[0], NP_trans_1[0], Aux_trans[0], V_trans[0], "the", NP_trans_2[0] ]) S1 = " ".join([ D_trans_1[0], NP_trans_1[0], Aux_trans[0], V_trans[0], D_trans_2[0], NP_trans_2[0] ]) option = random.choice([1, 2]) if option == 1: # prenominal APs related by a transitive verb A1 = choice(self.in_domain_adjs_main) A1_ant = choice( get_all("expression", A1["antonym"], self.in_domain_adjs)) A1_other = choice( get_all("expression", A1["synonym_hypernym_hyponym"], self.in_domain_adjs)) try: Subj = choice(get_matches_of(A1, "arg_1", all_common_nouns)) except Exception: pass Obj = choice( get_matches_of( A1_ant, "arg_1", get_matches_of(A1_other, "arg_1", all_common_nouns))) D1 = choice(get_matched_by(Subj, "arg_1", self.safe_dets)) D2 = choice(get_matched_by(Obj, "arg_1", self.safe_dets)) try: V = choice( get_matched_by( Subj, "arg_1", get_matched_by(Obj, "arg_2", all_transitive_verbs))) except Exception: raise MatchNotFoundError( "fail to find verb with subj=%s and obj=%s" % (Subj[0], Obj[0])) Aux = return_aux(V, Subj, allow_negated=False) A2 = choice( get_matched_by(Subj, "arg_1", self.out_domain_adjs_main)) try: A2_ant = choice( get_all("expression", A2["antonym"], self.out_domain_adjs)) A2_other = choice( get_all("expression", A2["synonym_hypernym_hyponym"], self.out_domain_adjs)) except Exception: pass if not (is_match_disj(Obj, A2_ant["arg_1"]) and is_match_disj(Obj, A2_other["arg_1"])): raise MatchNotFoundError( "fail to match: %s %s %s %s %s " % (A2[0], A2_ant[0], A2_other[0], Subj[0], Obj[0])) if choice([True, False]): training_1_1 = " ".join([ S1_the_subj, "and", D1[0], A1[0], Subj[0], Aux[0], V[0], D2[0], A1_ant[0], Obj[0], "." ]) else: training_1_1 = " ".join([ "the", A1[0], Subj[0], Aux[0], V[0], D2[0], A1_ant[0], Obj[0], "and", S1, "." ]) option = random.randint(0, 5) if option == 0: training_0_0 = " ".join([ S1_the_obj, "and", D1[0], A1[0], Subj[0], Aux[0], V[0], D2[0], A1_other[0], Obj[0], "." ]) elif option == 1: training_0_0 = " ".join([ S1, "and", "the", A1[0], Subj[0], Aux[0], V[0], D2[0], A1_other[0], Obj[0], "." ]) elif option == 2: training_0_0 = " ".join([ S1, "and", D1[0], A1[0], Subj[0], Aux[0], V[0], "the", A1_other[0], Obj[0], "." ]) elif option == 3: training_0_0 = " ".join([ D1[0], A1[0], Subj[0], Aux[0], V[0], "the", A1_other[0], Obj[0], "and", S1, "." ]) elif option == 4: training_0_0 = " ".join([ D1[0], A1[0], Subj[0], Aux[0], V[0], D2[0], A1_other[0], Obj[0], "and", S1_the_subj, "." ]) else: training_0_0 = " ".join([ D1[0], A1[0], Subj[0], Aux[0], V[0], D2[0], A1_other[0], Obj[0], "and", S1_the_obj, "." ]) if choice([True, False]): control_0_1 = " ".join([ S1_the_subj, "and", D1[0], A1[0], Subj[0], Aux[0], V[0], D2[0], A1_other[0], Obj[0], "." ]) else: control_0_1 = " ".join([ "the", A1[0], Subj[0], Aux[0], V[0], D2[0], A1_other[0], Obj[0], "and", S1, "." ]) option = random.randint(0, 5) if option == 0: control_1_0 = " ".join([ S1_the_obj, "and", D1[0], A1[0], Subj[0], Aux[0], V[0], D2[0], A1_ant[0], Obj[0], "." ]) elif option == 1: control_1_0 = " ".join([ S1, "and", "the", A1[0], Subj[0], Aux[0], V[0], D2[0], A1_ant[0], Obj[0], "." ]) elif option == 2: control_1_0 = " ".join([ S1, "and", D1[0], A1[0], Subj[0], Aux[0], V[0], "the", A1_ant[0], Obj[0], "." ]) elif option == 3: control_1_0 = " ".join([ D1[0], A1[0], Subj[0], Aux[0], V[0], "the", A1_ant[0], Obj[0], "and", S1, "." ]) elif option == 4: control_1_0 = " ".join([ D1[0], A1[0], Subj[0], Aux[0], V[0], D2[0], A1_ant[0], Obj[0], "and", S1_the_subj, "." ]) else: control_1_0 = " ".join([ D1[0], A1[0], Subj[0], Aux[0], V[0], D2[0], A1_ant[0], Obj[0], "and", S1_the_obj, "." ]) option = random.randint(0, 5) if option == 0: test_1_0 = " ".join([ S1_the_obj, "and", D1[0], A2[0], Subj[0], Aux[0], V[0], D2[0], A2_ant[0], Obj[0], "." ]) elif option == 1: test_1_0 = " ".join([ S1, "and", "the", A2[0], Subj[0], Aux[0], V[0], D2[0], A2_ant[0], Obj[0], "." ]) elif option == 2: test_1_0 = " ".join([ S1, "and", D1[0], A2[0], Subj[0], Aux[0], V[0], "the", A2_ant[0], Obj[0], "." ]) elif option == 3: test_1_0 = " ".join([ D1[0], A2[0], Subj[0], Aux[0], V[0], "the", A2_ant[0], Obj[0], "and", S1, "." ]) elif option == 4: test_1_0 = " ".join([ D1[0], A2[0], Subj[0], Aux[0], V[0], D2[0], A2_ant[0], Obj[0], "and", S1_the_subj, "." ]) else: test_1_0 = " ".join([ D1[0], A2[0], Subj[0], Aux[0], V[0], D2[0], A2_ant[0], Obj[0], "and", S1_the_obj, "." ]) if choice([True, False]): test_0_1 = " ".join([ S1_the_subj, "and", D1[0], A2[0], Subj[0], Aux[0], V[0], D2[0], A2_other[0], Obj[0], "." ]) else: test_0_1 = " ".join([ "the", A2[0], Subj[0], Aux[0], V[0], D2[0], A2_other[0], Obj[0], "and", S1, "." ]) option = random.randint(0, 5) if option == 0: control_0_0 = " ".join([ S1_the_obj, "and", D1[0], A2[0], Subj[0], Aux[0], V[0], D2[0], A2_other[0], Obj[0], "." ]) elif option == 1: control_0_0 = " ".join([ S1, "and", "the", A2[0], Subj[0], Aux[0], V[0], D2[0], A2_other[0], Obj[0], "." ]) elif option == 2: control_0_0 = " ".join([ S1, "and", D1[0], A2[0], Subj[0], Aux[0], V[0], "the", A2_other[0], Obj[0], "." ]) elif option == 3: control_0_0 = " ".join([ D1[0], A2[0], Subj[0], Aux[0], V[0], "the", A2_other[0], Obj[0], "and", S1, "." ]) elif option == 4: control_0_0 = " ".join([ D1[0], A2[0], Subj[0], Aux[0], V[0], D2[0], A2_other[0], Obj[0], "and", S1_the_subj, "." ]) else: control_0_0 = " ".join([ D1[0], A2[0], Subj[0], Aux[0], V[0], D2[0], A2_other[0], Obj[0], "and", S1_the_obj, "." ]) if choice([True, False]): control_1_1 = " ".join([ S1_the_subj, "and", D1[0], A2[0], Subj[0], Aux[0], V[0], D2[0], A2_ant[0], Obj[0], "." ]) else: control_1_1 = " ".join([ "the", A2[0], Subj[0], Aux[0], V[0], D2[0], A2_ant[0], Obj[0], "and", S1, "." ]) data = self.build_paradigm( training_1_1=training_1_1, training_0_0=training_0_0, test_1_0=test_1_0, test_0_1=test_0_1, control_1_1=control_1_1, control_0_0=control_0_0, control_1_0=control_1_0, control_0_1=control_0_1, ) track_sentence = [ (A1[0], Subj[0], V[0], A1_ant[0], Obj[0]), (A1[0], Subj[0], V[0], A1_other[0], Obj[0]), (A2[0], Subj[0], V[0], A2_ant[0], Obj[0]), (A2[0], Subj[0], V[0], A2_other[0], Obj[0]), ] return data, track_sentence else: # predicative AP A1 = choice(self.in_domain_adjs_main) A1_ant = choice( get_all("expression", A1["antonym"], self.in_domain_adjs)) A1_other = choice( get_all("expression", A1["synonym_hypernym_hyponym"], self.in_domain_adjs)) Subj1 = choice(get_matches_of(A1, "arg_1", all_common_nouns)) D1 = choice(get_matched_by(Subj1, "arg_1", self.safe_dets)) Copula1 = return_copula(Subj1, allow_negated=False) A2 = choice( get_matched_by(Subj1, "arg_1", self.out_domain_adjs_main)) A2_ant = choice( get_all("expression", A2["antonym"], self.out_domain_adjs)) try: A2_other = choice( get_all("expression", A2["synonym_hypernym_hyponym"], self.out_domain_adjs)) except Exception: pass Subj2 = choice( get_matches_of( A1_ant, "arg_1", get_matches_of( A1_other, "arg_1", get_matches_of( A2_ant, "arg_1", get_matches_of(A2_other, "arg_1", all_common_nouns))))) D2 = choice(get_matched_by(Subj2, "arg_1", self.safe_dets)) Copula2 = return_copula(Subj2, allow_negated=False) if choice([True, False]): training_1_1 = " ".join([ S1_the_subj, "and", D1[0], Subj1[0], Copula1[0], A1[0], "and", D2[0], Subj2[0], Copula2[0], A1_ant[0], "." ]) else: training_1_1 = " ".join([ "the", Subj1[0], Copula1[0], A1[0], "and", D2[0], Subj2[0], Copula2[0], A1_ant[0], "and", S1, "." ]) option = random.randint(0, 5) if option == 0: training_0_0 = " ".join([ S1_the_obj, "and", D1[0], Subj1[0], Copula1[0], A1[0], "and", D2[0], Subj2[0], Copula2[0], A1_other[0], "." ]) elif option == 1: training_0_0 = " ".join([ S1, "and", "the", Subj1[0], Copula1[0], A1[0], "and", D2[0], Subj2[0], Copula2[0], A1_other[0], "." ]) elif option == 2: training_0_0 = " ".join([ S1, "and", D1[0], Subj1[0], Copula1[0], A1[0], "and", "the", Subj2[0], Copula2[0], A1_other[0], "." ]) elif option == 3: training_0_0 = " ".join([ D1[0], Subj1[0], Copula1[0], A1[0], "and", "the", Subj2[0], Copula2[0], A1_other[0], "and", S1, "." ]) elif option == 4: training_0_0 = " ".join([ D1[0], Subj1[0], Copula1[0], A1[0], "and", D2[0], Subj2[0], Copula2[0], A1_other[0], "and", S1_the_subj, "." ]) else: training_0_0 = " ".join([ D1[0], Subj1[0], Copula1[0], A1[0], "and", D2[0], Subj2[0], Copula2[0], A1_other[0], "and", S1_the_obj, "." ]) if choice([True, False]): control_0_1 = " ".join([ S1_the_subj, "and", D1[0], Subj1[0], Copula1[0], A1[0], "and", D2[0], Subj2[0], Copula2[0], A1_other[0], "." ]) else: control_0_1 = " ".join([ "the", Subj1[0], Copula1[0], A1[0], "and", D2[0], Subj2[0], Copula2[0], A1_other[0], "and", S1, "." ]) option = random.randint(0, 5) if option == 0: control_1_0 = " ".join([ S1_the_obj, "and", D1[0], Subj1[0], Copula1[0], A1[0], "and", D2[0], Subj2[0], Copula2[0], A1_ant[0], "." ]) elif option == 1: control_1_0 = " ".join([ S1, "and", "the", Subj1[0], Copula1[0], A1[0], "and", D2[0], Subj2[0], Copula2[0], A1_ant[0], "." ]) elif option == 2: control_1_0 = " ".join([ S1, "and", D1[0], Subj1[0], Copula1[0], A1[0], "and", "the", Subj2[0], Copula2[0], A1_ant[0], "." ]) elif option == 3: control_1_0 = " ".join([ D1[0], Subj1[0], Copula1[0], A1[0], "and", "the", Subj2[0], Copula2[0], A1_ant[0], "and", S1, "." ]) elif option == 4: control_1_0 = " ".join([ D1[0], Subj1[0], Copula1[0], A1[0], "and", D2[0], Subj2[0], Copula2[0], A1_ant[0], "and", S1_the_subj, "." ]) else: control_1_0 = " ".join([ D1[0], Subj1[0], Copula1[0], A1[0], "and", D2[0], Subj2[0], Copula2[0], A1_ant[0], "and", S1_the_obj, "." ]) option = random.randint(0, 5) if option == 0: test_1_0 = " ".join([ S1_the_obj, "and", D1[0], Subj1[0], Copula1[0], A2[0], "and", D2[0], Subj2[0], Copula2[0], A2_ant[0], "." ]) elif option == 1: test_1_0 = " ".join([ S1, "and", "the", Subj1[0], Copula1[0], A2[0], "and", D2[0], Subj2[0], Copula2[0], A2_ant[0], "." ]) elif option == 2: test_1_0 = " ".join([ S1, "and", D1[0], Subj1[0], Copula1[0], A2[0], "and", "the", Subj2[0], Copula2[0], A2_ant[0], "." ]) elif option == 3: test_1_0 = " ".join([ D1[0], Subj1[0], Copula1[0], A2[0], "and", "the", Subj2[0], Copula2[0], A2_ant[0], "and", S1, "." ]) elif option == 4: test_1_0 = " ".join([ D1[0], Subj1[0], Copula1[0], A2[0], "and", D2[0], Subj2[0], Copula2[0], A2_ant[0], "and", S1_the_subj, "." ]) else: test_1_0 = " ".join([ D1[0], Subj1[0], Copula1[0], A2[0], "and", D2[0], Subj2[0], Copula2[0], A2_ant[0], "and", S1_the_obj, "." ]) if choice([True, False]): test_0_1 = " ".join([ S1_the_subj, "and", D1[0], Subj1[0], Copula1[0], A2[0], "and", D2[0], Subj2[0], Copula2[0], A2_other[0], "." ]) else: test_0_1 = " ".join([ "the", Subj1[0], Copula1[0], A2[0], "and", D2[0], Subj2[0], Copula2[0], A2_other[0], "and", S1, "." ]) option = random.randint(0, 5) if option == 0: control_0_0 = " ".join([ S1_the_obj, "and", D1[0], Subj1[0], Copula1[0], A2[0], "and", D2[0], Subj2[0], Copula2[0], A2_other[0], "." ]) elif option == 1: control_0_0 = " ".join([ S1, "and", "the", Subj1[0], Copula1[0], A2[0], "and", D2[0], Subj2[0], Copula2[0], A2_other[0], "." ]) elif option == 2: control_0_0 = " ".join([ S1, "and", D1[0], Subj1[0], Copula1[0], A2[0], "and", "the", Subj2[0], Copula2[0], A2_other[0], "." ]) elif option == 3: control_0_0 = " ".join([ D1[0], Subj1[0], Copula1[0], A2[0], "and", "the", Subj2[0], Copula2[0], A2_other[0], "and", S1, "." ]) elif option == 4: control_0_0 = " ".join([ D1[0], Subj1[0], Copula1[0], A2[0], "and", D2[0], Subj2[0], Copula2[0], A2_other[0], "and", S1_the_subj, "." ]) else: control_0_0 = " ".join([ D1[0], Subj1[0], Copula1[0], A2[0], "and", D2[0], Subj2[0], Copula2[0], A2_other[0], "and", S1_the_obj, "." ]) if choice([True, False]): control_1_1 = " ".join([ S1_the_subj, "and", D1[0], Subj1[0], Copula1[0], A2[0], "and", D2[0], Subj2[0], Copula2[0], A2_ant[0], "." ]) else: control_1_1 = " ".join([ "the", Subj1[0], Copula1[0], A2[0], "and", D2[0], Subj2[0], Copula2[0], A2_ant[0], "and", S1, "." ]) data = self.build_paradigm(training_1_1=training_1_1, training_0_0=training_0_0, test_1_0=test_1_0, test_0_1=test_0_1, control_1_1=control_1_1, control_0_0=control_0_0, control_1_0=control_1_0, control_0_1=control_0_1) track_sentence = [ (A1[0], Subj1[0], A1_ant[0], Subj2[0]), (A1[0], Subj1[0], A1_other[0], Subj2[0]), (A1[0], Subj1[0], A2_ant[0], Subj2[0]), (A1[0], Subj1[0], A2_other[0], Subj2[0]), ] return data, track_sentence
def sample(self): # John should only go to France V = choice(all_verbs) V_args = negate_V_args( verb_args_from_verb(V, allow_negated=False, allow_modal=False, allow_quantifiers=False)) V_args = embed_V_args_under_modal(V_args) V_bare = get_bare_form(V) VP = V_to_VP_mutate(V, aux=False, args=V_args) N_alt = N_to_DP_mutate(choice( get_matches_of( V, "arg_1", get_matches_of(V_args["aux"], "arg_1", all_nominals))), allow_quantifiers=False) if V_args["aux"][0] in ["does", "do", "did"]: unembedded_trigger = "%s only %s %s." % (V_args["subj"][0], V_args["aux"][0], VP[0]) else: unembedded_trigger = "%s %s only %s." % (V_args["subj"][0], V_args["aux"][0], VP[0]) negated_trigger = "%s %s only %s %s." % ( V_args["subj"][0], V_args["aux_neg"][0], V_args["verb_neg"][0], " ".join([x[0] for x in V_args["args"]])) if V_args["aux_under_modal"] == None: modal_trigger = "%s might only %s." % (V_args["subj"][0], VP[0]) else: modal_trigger = "%s might %s only %s %s." % ( V_args["subj"][0], V_args["aux_under_modal"][0], V_args["verb_under_modal"][0], " ".join( [x[0] for x in V_args["args"]])) conditional_trigger = "if %s, it's okay." % unembedded_trigger[:-1] if V["finite"] == "1": do = get_do_form(V) interrogative_trigger = "%s %s only %s %s?" % ( do[0], V_args["subj"][0], V_bare[0], join_args(V_args["args"])) else: interrogative_trigger = "%s %s only %s?" % ( V_args["aux"][0], V_args["subj"][0], VP[0]) presupposition = "%s %s %s." % (V_args["subj"][0], V_args["aux"][0], VP[0]) negated_presupposition = "%s %s %s %s." % ( V_args["subj"][0], V_args["aux_neg"][0], V_args["verb_neg"][0], " ".join([x[0] for x in V_args["args"]])) neutral_presupposition = "%s %s %s." % (N_alt[0], V_args["aux"][0], VP[0]) data = self.build_presupposition_paradigm( unembedded_trigger=unembedded_trigger, negated_trigger=negated_trigger, interrogative_trigger=interrogative_trigger, modal_trigger=modal_trigger, conditional_trigger=conditional_trigger, presupposition=presupposition, negated_presupposition=negated_presupposition, neutral_presupposition=neutral_presupposition) return data, presupposition
def sample_adj(self): V_trans = choice(self.all_possibly_singular_transitive_verbs) NP_trans_1 = choice( get_matches_of(V_trans, "arg_1", self.all_singular_common_nouns)) NP_trans_2 = choice( get_matches_of(V_trans, "arg_2", self.all_singular_common_nouns)) D_trans_1 = choice(get_matched_by(NP_trans_1, "arg_1", self.safe_dets)) D_trans_2 = choice(get_matched_by(NP_trans_2, "arg_1", self.safe_dets)) Aux_trans = return_aux(V_trans, NP_trans_1) S1_abs = " ".join([ "%s", NP_trans_1[0], Aux_trans[0], V_trans[0], "%s", NP_trans_2[0] ]) option = random.choice([1, 2]) if option == 1: # prenominal APs related by a transitive verb A1 = choice(self.in_domain_adjs_main) A1_ant = choice( get_all("expression", A1["antonym"], self.in_domain_adjs)) A1_other = choice( get_all("expression", A1["synonym_hypernym_hyponym"], self.in_domain_adjs)) try: Subj = choice( get_matches_of(A1, "arg_1", self.all_singular_common_nouns)) except Exception: pass Obj = choice( get_matches_of( A1_ant, "arg_1", get_matches_of(A1_other, "arg_1", self.all_singular_common_nouns))) D1 = choice(get_matched_by(Subj, "arg_1", self.safe_dets)) D2 = choice(get_matched_by(Obj, "arg_1", self.safe_dets)) try: V = choice( get_matched_by( Subj, "arg_1", get_matched_by( Obj, "arg_2", self.all_possibly_singular_transitive_verbs))) except Exception: raise MatchNotFoundError( "fail to find verb with subj=%s and obj=%s" % (Subj[0], Obj[0])) Aux = return_aux(V, Subj, allow_negated=False) A2 = choice( get_matched_by(Subj, "arg_1", self.out_domain_adjs_main)) try: A2_ant = choice( get_all("expression", A2["antonym"], self.out_domain_adjs)) A2_other = choice( get_all("expression", A2["synonym_hypernym_hyponym"], self.out_domain_adjs)) except Exception: pass if not (is_match_disj(Obj, A2_ant["arg_1"]) and is_match_disj(Obj, A2_other["arg_1"])): raise MatchNotFoundError( "fail to match: %s %s %s %s %s " % (A2[0], A2_ant[0], A2_other[0], Subj[0], Obj[0])) Ds = [] option = random.choice([ 1, 2, 3 ]) # There are three in-domain configurations (arbitrarily chosen) if option == 1: Ds.append(("the", "a", D1[0], D2[0])) Ds.append(("a", "the", D1[0], D2[0])) elif option == 2: Ds.append(("the", D_trans_2[0], D1[0], "a")) Ds.append(("a", D_trans_2[0], D1[0], "the")) else: Ds.append((D_trans_1[0], "the", D1[0], "a")) Ds.append((D_trans_1[0], "a", D1[0], "the")) option = random.choice( [1, 2, 3] ) # There are three out-domain configurations (arbitrarily chosen) if option == 1: Ds.append(("the", D_trans_2[0], "a", D2[0])) Ds.append(("a", D_trans_2[0], "the", D2[0])) elif option == 2: Ds.append((D_trans_1[0], "the", "a", D2[0])) Ds.append((D_trans_1[0], "a", "the", D2[0])) else: Ds.append((D_trans_1[0], D_trans_2[0], "the", "a")) Ds.append((D_trans_1[0], D_trans_2[0], "a", "the")) data = self.build_paradigm( training_1_1=" ".join([ S1_abs, "and", "%s", A1[0], Subj[0], Aux[0], V[0], "%s", A1_ant[0], Obj[0], "." ]) % Ds[0], training_0_0=" ".join([ S1_abs, "and", "%s", A1[0], Subj[0], Aux[0], V[0], "%s", A1_other[0], Obj[0], "." ]) % Ds[1], control_1_0=" ".join([ S1_abs, "and", "%s", A1[0], Subj[0], Aux[0], V[0], "%s", A1_ant[0], Obj[0], "." ]) % Ds[1], control_0_1=" ".join([ S1_abs, "and", "%s", A1[0], Subj[0], Aux[0], V[0], "%s", A1_other[0], Obj[0], "." ]) % Ds[0], test_1_0=" ".join([ S1_abs, "and", "%s", A2[0], Subj[0], Aux[0], V[0], "%s", A2_ant[0], Obj[0], "." ]) % Ds[3], test_0_1=" ".join([ S1_abs, "and", "%s", A2[0], Subj[0], Aux[0], V[0], "%s", A2_other[0], Obj[0], "." ]) % Ds[2], control_1_1=" ".join([ S1_abs, "and", "%s", A2[0], Subj[0], Aux[0], V[0], "%s", A2_ant[0], Obj[0], "." ]) % Ds[2], control_0_0=" ".join([ S1_abs, "and", "%s", A2[0], Subj[0], Aux[0], V[0], "%s", A2_other[0], Obj[0], "." ]) % Ds[3], ) track_sentence = [ (A1[0], Subj[0], V[0], A1_ant[0], Obj[0]), (A1[0], Subj[0], V[0], A1_other[0], Obj[0]), (A2[0], Subj[0], V[0], A2_ant[0], Obj[0]), (A2[0], Subj[0], V[0], A2_other[0], Obj[0]), ] return data, track_sentence else: # predicative AP A1 = choice(self.in_domain_adjs_main) A1_ant = choice( get_all("expression", A1["antonym"], self.in_domain_adjs)) A1_other = choice( get_all("expression", A1["synonym_hypernym_hyponym"], self.in_domain_adjs)) Subj1 = choice( get_matches_of(A1, "arg_1", self.all_singular_common_nouns)) D1 = choice(get_matched_by(Subj1, "arg_1", self.safe_dets)) Copula1 = return_copula(Subj1, allow_negated=False) A2 = choice( get_matched_by(Subj1, "arg_1", self.out_domain_adjs_main)) A2_ant = choice( get_all("expression", A2["antonym"], self.out_domain_adjs)) try: A2_other = choice( get_all("expression", A2["synonym_hypernym_hyponym"], self.out_domain_adjs)) Subj2 = choice( get_matches_of( A1_ant, "arg_1", get_matches_of( A1_other, "arg_1", get_matches_of( A2_ant, "arg_1", get_matches_of( A2_other, "arg_1", self.all_singular_common_nouns))))) except Exception: pass D2 = choice(get_matched_by(Subj2, "arg_1", self.safe_dets)) Copula2 = return_copula(Subj2, allow_negated=False) Ds = [] option = random.choice([ 1, 2, 3 ]) # There are three in-domain configurations (arbitrarily chosen) if option == 1: Ds.append(("the", "a", D1[0], D2[0])) Ds.append(("a", "the", D1[0], D2[0])) elif option == 2: Ds.append(("the", D_trans_2[0], D1[0], "a")) Ds.append(("a", D_trans_2[0], D1[0], "the")) else: Ds.append((D_trans_1[0], "the", D1[0], "a")) Ds.append((D_trans_1[0], "a", D1[0], "the")) option = random.choice( [1, 2, 3] ) # There are three out-domain configurations (arbitrarily chosen) if option == 1: Ds.append(("the", D_trans_2[0], "a", D2[0])) Ds.append(("a", D_trans_2[0], "the", D2[0])) elif option == 2: Ds.append((D_trans_1[0], "the", "a", D2[0])) Ds.append((D_trans_1[0], "a", "the", D2[0])) else: Ds.append((D_trans_1[0], D_trans_2[0], "the", "a")) Ds.append((D_trans_1[0], D_trans_2[0], "a", "the")) data = self.build_paradigm( training_1_1=" ".join([ S1_abs, "and", "%s", Subj1[0], Copula1[0], A1[0], "and", "%s", Subj2[0], Copula2[0], A1_ant[0], "." ]) % Ds[0], training_0_0=" ".join([ S1_abs, "and", "%s", Subj1[0], Copula1[0], A1[0], "and", "%s", Subj2[0], Copula2[0], A1_other[0], "." ]) % Ds[1], control_1_0=" ".join([ S1_abs, "and", "%s", Subj1[0], Copula1[0], A1[0], "and", "%s", Subj2[0], Copula2[0], A1_ant[0], "." ]) % Ds[1], control_0_1=" ".join([ S1_abs, "and", "%s", Subj1[0], Copula1[0], A1[0], "and", "%s", Subj2[0], Copula2[0], A1_other[0], "." ]) % Ds[0], test_1_0=" ".join([ S1_abs, "and", "%s", Subj1[0], Copula1[0], A2[0], "and", "%s", Subj2[0], Copula2[0], A2_ant[0], "." ]) % Ds[3], test_0_1=" ".join([ S1_abs, "and", "%s", Subj1[0], Copula1[0], A2[0], "and", "%s", Subj2[0], Copula2[0], A2_other[0], "." ]) % Ds[2], control_1_1=" ".join([ S1_abs, "and", "%s", Subj1[0], Copula1[0], A2[0], "and", "%s", Subj2[0], Copula2[0], A2_ant[0], "." ]) % Ds[2], control_0_0=" ".join([ S1_abs, "and", "%s", Subj1[0], Copula1[0], A2[0], "and", "%s", Subj2[0], Copula2[0], A2_other[0], "." ]) % Ds[3], ) track_sentence = [ (A1[0], Subj1[0], A1_ant[0], Subj2[0]), (A1[0], Subj1[0], A1_other[0], Subj2[0]), (A1[0], Subj1[0], A2_ant[0], Subj2[0]), (A1[0], Subj1[0], A2_other[0], Subj2[0]), ] return data, track_sentence
def sample(self): # Training 1/1 # The man who helped a girl thinks that that guy found a cat. # THE NP1 rel V1 D2 NP2 cp_verb THAT D3 NP3 V2 D4 NP4 # Training 0/0 # This man who helped a girl thinks that that guy found the cat. # D1 NP1 rel V1 D2 NP2 cp_verb THAT D3 NP3 V2 THE NP4 # Test 1/0 # The man thinks that that guy who helped a girl found a cat. # THE NP1 cp_verb THAT D3 NP3 rel V1 D2 NP2 V2 D4 NP4 # Test 0/1 # This man thinks that that guy who helped the girl found the cat. # D1 NP1 cp_verb THAT D3 NP3 rel V1 THE NP2 V2 THE NP4 cp_verb = choice(self.cp_verbs) try: NP1 = choice(get_matches_of(cp_verb, "arg_1", all_common_nouns)) except Exception: pass D1 = choice(get_matched_by(NP1, "arg_1", self.safe_dets)) rel1 = choice(get_matched_by(NP1, "arg_1", all_relativizers)) V1 = choice(get_matched_by(NP1, "arg_1", all_transitive_verbs)) NP2 = choice(get_matches_of(V1, "arg_2", all_common_nouns)) D2 = choice(get_matched_by(NP2, "arg_1", self.safe_dets)) rel2 = choice(get_matched_by(NP2, "arg_1", all_relativizers)) Aux1 = return_aux(V1, NP1) NP3 = choice(get_matches_of(V1, "arg_1", get_matches_of(Aux1, "arg_1", all_common_nouns))) D3 = choice(get_matched_by(NP3, "arg_1", self.safe_dets)) V2 = choice(get_matched_by(NP3, "arg_1", all_transitive_verbs)) NP4 = choice(get_matches_of(V2, "arg_2", all_common_nouns)) D4 = choice(get_matched_by(NP4, "arg_1", self.safe_dets)) Aux_cp = return_aux(cp_verb, NP1) Aux2 = return_aux(V2, NP3) Ds = [] Ds.append(["the", D2[0], D3[0], D4[0]]) option = random.choice([1, 2, 3]) # There are three in-domain configurations (arbitrarily chosen) if option == 1: Ds.append([D1[0], "the", D3[0], D4[0]]) elif option == 2: Ds.append([D1[0], D2[0], "the", D4[0]]) else: Ds.append([D1[0], D2[0], D3[0], "the"]) Ds.append(["the", D3[0], D2[0], D4[0]]) option = random.choice([1, 2, 3]) # There are three out-domain configurations (arbitrarily chosen) if option == 1: Ds.append([D1[0], "the", D2[0], D4[0]]) elif option == 2: Ds.append([D1[0], D3[0], "the", D4[0]]) else: Ds.append([D1[0], D3[0], D2[0], "the"]) track_sentence = [ (NP1[0], V1[0], NP2[0], cp_verb[0], NP3[0], V2[0], NP4[0]), #training 1/1 (NP1[0], V1[0], NP2[0], cp_verb[0], NP3[0], V2[0], NP4[0]), #training 0/0 (NP1[0], V1[0], NP2[0], cp_verb[0], NP3[0], V2[0], NP4[0]), #Test 1/0 (NP1[0], V1[0], NP2[0], cp_verb[0], NP3[0], V2[0], NP4[0]), #Test 0/1 ] data = self.build_paradigm( training_1_1=" ".join([Ds[0][0], NP1[0], rel1[0], Aux1[0], V1[0], Ds[0][1], NP2[0], Aux_cp[0], cp_verb[0], "that", Ds[0][2], NP3[0], Aux2[0], V2[0], Ds[0][3], NP4[0]]), training_0_0=" ".join([Ds[1][0], NP1[0], rel1[0], Aux1[0], V1[0], Ds[1][1], NP2[0], Aux_cp[0], cp_verb[0], "that", Ds[1][2], NP3[0], Aux2[0], V2[0], Ds[1][3], NP4[0]]), test_1_0=" ".join([Ds[2][0], NP1[0], Aux_cp[0], cp_verb[0], "that", Ds[2][2], NP3[0], rel2[0], Aux1[0], V1[0], Ds[2][1], NP2[0], Aux2[0], V2[0], Ds[2][3], NP4[0]]), test_0_1=" ".join([Ds[3][0], NP1[0], Aux_cp[0], cp_verb[0], "that", Ds[3][2], NP3[0], rel2[0], Aux1[0], V1[0], Ds[3][1], NP2[0], Aux2[0], V2[0], Ds[3][3], NP4[0]]), ) return data, track_sentence
def sample(self): # Training 1 # John compelled Mary to leave. # DP1 Aux1 V_control_in DP2 TO VP # Training 0 # John wanted Mary to leave. # DP1 Aux1 V_raising_in DP2 TO VP # Training 1 # John convinced Mary to leave. # DP1 Aux1 V_control_out DP2 TO VP # Training 0 # John considered Mary to leave. # DP1 Aux1 V_raising_out DP2 TO VP V_trans = choice(all_transitive_verbs) NP_trans_1 = choice(get_matches_of(V_trans, "arg_1", all_common_nouns)) NP_trans_2 = choice(get_matches_of(V_trans, "arg_2", all_common_nouns)) D_trans_1 = choice(get_matched_by(NP_trans_1, "arg_1", self.safe_dets)) D_trans_2 = choice(get_matched_by(NP_trans_2, "arg_1", self.safe_dets)) Aux_trans = return_aux(V_trans, NP_trans_1) S1 = " ".join([ D_trans_1[0], NP_trans_1[0], Aux_trans[0], V_trans[0], D_trans_2[0], NP_trans_2[0] ]) S1_the_subj = " ".join([ "the", NP_trans_1[0], Aux_trans[0], V_trans[0], D_trans_2[0], NP_trans_2[0] ]) S1_the_obj = " ".join([ D_trans_1[0], NP_trans_1[0], Aux_trans[0], V_trans[0], "the", NP_trans_2[0] ]) # S1_abs = " ".join(["%s", NP_trans_1[0], Aux_trans[0], V_trans[0], "%s", NP_trans_2[0]]) option = random.choice([1, 2, 3]) if option == 1: # subject control/raising V_control_in = choice(self.v_control_subj_in) NP1 = choice( get_matches_of(V_control_in, "arg_1", all_common_nouns)) D1 = choice(get_matched_by(NP1, "arg_1", self.safe_dets)) Aux1 = return_aux(V_control_in, NP1) V = choice( get_matches_of( V_control_in, "arg_2", get_matched_by(NP1, "arg_1", self.all_bare_transitive_verbs))) NP2 = choice(get_matches_of(V, "arg_2", all_common_nouns)) D2 = choice(get_matched_by(NP2, "arg_1", self.safe_dets)) V_control_out = choice( get_matched_by( NP1, "arg_1", get_matches_of( Aux1, "arg_2", get_matched_by(V, "arg_2", self.v_control_subj_out)))) V_raising_in = choice( get_matched_by( NP1, "arg_1", get_matches_of( Aux1, "arg_2", get_matched_by(V, "arg_2", self.v_raising_subj_in)))) V_raising_out = choice( get_matched_by( NP1, "arg_1", get_matches_of( Aux1, "arg_2", get_matched_by(V, "arg_2", self.v_raising_subj_out)))) to = "to" elif option == 2: # object control/raising V_control_in = choice(self.v_control_obj_in) NP1 = choice( get_matches_of(V_control_in, "arg_1", all_common_nouns)) D1 = choice(get_matched_by(NP1, "arg_1", self.safe_dets)) Aux1 = return_aux(V_control_in, NP1) control_obj = N_to_DP_mutate( choice(get_matches_of(V_control_in, "arg_2"))) V = choice( get_matches_of( V_control_in, "arg_3", get_matched_by(NP1, "arg_1", self.all_bare_transitive_verbs))) NP2 = choice(get_matches_of(V, "arg_2", all_common_nouns)) D2 = choice(get_matched_by(NP2, "arg_1", self.safe_dets)) try: V_control_out = choice( get_matched_by( NP1, "arg_1", get_matches_of( Aux1, "arg_2", get_matched_by( V, "arg_3", get_matched_by(control_obj, "arg_2", self.v_control_obj_out))))) V_raising_in = choice( get_matched_by( NP1, "arg_1", get_matches_of(Aux1, "arg_2", self.v_raising_obj_in))) V_raising_out = choice( get_matched_by( NP1, "arg_1", get_matches_of(Aux1, "arg_2", self.v_raising_obj_out))) except Exception: pass to = control_obj[0] + " to" else: # adjective control/raising V_control_in = choice(self.adj_control_subj_in) NP1 = choice( get_matches_of(V_control_in, "arg_1", all_common_nouns)) D1 = choice(get_matched_by(NP1, "arg_1", self.safe_dets)) Aux1 = return_copula(NP1) V = choice( get_matches_of( V_control_in, "arg_2", get_matched_by(NP1, "arg_1", self.all_bare_transitive_verbs))) NP2 = choice(get_matches_of(V, "arg_2", all_common_nouns)) D2 = choice(get_matched_by(NP2, "arg_1", self.safe_dets)) V_control_out = choice( get_matched_by( NP1, "arg_1", get_matched_by(V, "arg_2", self.adj_control_subj_out))) V_raising_in = choice(self.adj_raising_subj_in) V_raising_out = choice(self.adj_raising_subj_out) to = "to" option = random.randint(0, 1) if option == 1: training_1_1 = " ".join([ S1_the_subj, "and", D1[0], NP1[0], Aux1[0], V_control_in[0], to, V[0], D2[0], NP2[0], "." ]) else: training_1_1 = " ".join([ "the", NP1[0], Aux1[0], V_control_in[0], to, V[0], "the", NP2[0], "and", S1, "." ]) option = random.randint(0, 1) if option == 1: test_0_1 = " ".join([ S1_the_subj, "and", D1[0], NP1[0], Aux1[0], V_raising_out[0], to, V[0], D2[0], NP2[0], "." ]) else: test_0_1 = " ".join([ "the", NP1[0], Aux1[0], V_raising_out[0], to, V[0], D2[0], NP2[0], "and", S1, "." ]) option = random.randint(0, 1) if option == 1: control_1_1 = " ".join([ S1_the_subj, "and", D1[0], NP1[0], Aux1[0], V_control_out[0], to, V[0], D2[0], NP2[0], "." ]) else: control_1_1 = " ".join([ "the", NP1[0], Aux1[0], V_control_out[0], to, V[0], D2[0], NP2[0], "and", S1, "." ]) option = random.randint(0, 1) if option == 1: control_0_1 = " ".join([ S1_the_subj, "and", D1[0], NP1[0], Aux1[0], V_raising_in[0], to, V[0], D2[0], NP2[0], "." ]) else: control_0_1 = " ".join([ "the", NP1[0], Aux1[0], V_raising_in[0], to, V[0], D2[0], NP2[0], "and", S1, "." ]) option = random.randint(0, 1) if option == 1: training_0_0 = " ".join([ S1_the_obj, "and", D1[0], NP1[0], Aux1[0], V_raising_in[0], to, V[0], D2[0], NP2[0], "." ]) else: training_0_0 = " ".join([ D1[0], NP1[0], Aux1[0], V_raising_in[0], to, V[0], "the", NP2[0], "and", S1, "." ]) option = random.randint(0, 1) if option == 1: test_1_0 = " ".join([ S1_the_obj, "and", D1[0], NP1[0], Aux1[0], V_control_out[0], to, V[0], D2[0], NP2[0], "." ]) else: test_1_0 = " ".join([ D1[0], NP1[0], Aux1[0], V_control_out[0], to, V[0], "the", NP2[0], "and", S1, "." ]) option = random.randint(0, 1) if option == 1: control_0_0 = " ".join([ S1_the_obj, "and", D1[0], NP1[0], Aux1[0], V_raising_out[0], to, V[0], D2[0], NP2[0], "." ]) else: control_0_0 = " ".join([ D1[0], NP1[0], Aux1[0], V_raising_out[0], to, V[0], "the", NP2[0], "and", S1, "." ]) option = random.randint(0, 1) if option == 1: control_1_0 = " ".join([ S1_the_obj, "and", D1[0], NP1[0], Aux1[0], V_control_in[0], to, V[0], D2[0], NP2[0], "." ]) else: control_1_0 = " ".join([ D1[0], NP1[0], Aux1[0], V_control_in[0], to, V[0], "the", NP2[0], "and", S1, "." ]) data = self.build_paradigm(training_1_1=training_1_1, training_0_0=training_0_0, test_1_0=test_1_0, test_0_1=test_0_1, control_1_1=control_1_1, control_0_0=control_0_0, control_1_0=control_1_0, control_0_1=control_0_1) track_sentence = [ (NP1[0], Aux1[0], V_control_in[0], to, V[0], NP2[0], "."), (NP1[0], Aux1[0], V_raising_in[0], to, V[0], NP2[0], "."), (NP1[0], Aux1[0], V_control_out[0], to, V[0], NP2[0], "."), (NP1[0], Aux1[0], V_raising_out[0], to, V[0], NP2[0], "."), (NP1[0], Aux1[0], V_control_out[0], to, V[0], NP2[0], "."), (NP1[0], Aux1[0], V_raising_out[0], to, V[0], NP2[0], ".") ] return data, track_sentence
def sample(self): # The cat that was eating the mice is sleeping # Subj Rel Aux_Emb V_emb Obj_emb Aux_agree V_mat_agree args # The cat that was eating the mice are sleeping # Subj Rel Aux_Emb V_emb Obj_emb Aux_not_agree V_mat_not_agree args V_emb = None while V_emb is None: V_mat_agree = choice(self.safe_mat_verbs) subj = N_to_DP_mutate( choice(get_matches_of(V_mat_agree, "arg_1", self.all_reg_nouns))) rel = choice( get_matched_by(subj, "arg_1", get_all("category_2", "rel"))) if V_mat_agree["finite"] == "1": if V_mat_agree["3sg"] == "1": V_mat_not_agree = choice( get_all_conjunctive([("pres", "1"), ("3sg", "0")], get_all("root", V_mat_agree["root"]))) else: V_mat_not_agree = choice( get_all_conjunctive([("pres", "1"), ("3sg", "1")], get_all("root", V_mat_agree["root"]))) else: V_mat_not_agree = V_mat_agree if subj["pl"] == "1": obj_emb = N_to_DP_mutate( choice( get_matches_of(V_mat_not_agree, "arg_1", get_all_singular_nouns()))) else: obj_emb = N_to_DP_mutate( choice( get_matches_of(V_mat_not_agree, "arg_1", get_all_plural_nouns()))) try: V_emb = choice( get_matched_by( subj, "arg_1", get_matched_by(obj_emb, "arg_2", self.safe_emb_verbs))) except IndexError: pass Aux_emb = return_aux(V_emb, subj) Auxs = require_aux_agree(V_mat_agree, subj) Aux_agree = Auxs["aux_agree"] Aux_not_agree = Auxs["aux_nonagree"] V_mat_args = verb_args_from_verb(V_mat_agree, subj=subj, aux=Aux_agree) if V_mat_agree["finite"] == "1": prefix = "%s %s %s %s %s" % (subj[0], rel[0], Aux_emb[0], V_emb[0], obj_emb[0]) word_good = V_mat_agree[0] word_bad = V_mat_not_agree[0] else: prefix = "%s %s %s %s %s" % (subj[0], rel[0], Aux_emb[0], V_emb[0], obj_emb[0]) word_good = Aux_agree word_bad = Aux_not_agree data = { "sentence_good": "%s %s %s %s %s %s %s %s." % (subj[0], rel[0], Aux_emb[0], V_emb[0], obj_emb[0], Aux_agree, V_mat_agree[0], join_args(V_mat_args["args"])), "sentence_bad": "%s %s %s %s %s %s %s %s." % (subj[0], rel[0], Aux_emb[0], V_emb[0], obj_emb[0], Aux_not_agree, V_mat_not_agree[0], join_args(V_mat_args["args"])), "one_prefix_prefix": prefix, "one_prefix_word_good": word_good, "one_prefix_word_bad": word_bad } return data, data["sentence_good"]
def sample(self): # Both cats like mice V = choice(all_possibly_plural_verbs) V_bare = get_bare_form(V) N_subj = choice(get_matches_of(V, "arg_1", self.safe_nouns)) N_subj_alt = choice(get_matches_of(V, "arg_1", self.safe_nouns), avoid=N_subj) V_args = verb_args_from_verb(V, subj=N_subj, allow_negated=False, allow_modal=False, allow_quantifiers=False) RC = verb_phrase_from_subj(N_subj) rel = choice(get_matched_by(N_subj, "arg_1", all_relativizers)) V_neg, Aux_neg = negate_VP(V, V_args["aux"]) V_args = embed_V_args_under_modal(V_args) unembedded_trigger = "both %s %s %s %s %s %s." % ( N_subj[0], rel[0], RC[0], V_args["aux"][0], V_args["verb"][0], join_args(V_args["args"])) negated_trigger = "both %s %s %s %s %s %s." % ( N_subj[0], rel[0], RC[0], Aux_neg[0], V_neg[0], join_args(V_args["args"])) if V_args["aux_under_modal"] == None: modal_trigger = "both %s %s %s might %s %s." % ( N_subj[0], rel[0], RC[0], V_bare[0], join_args(V_args["args"])) else: modal_trigger = "both %s %s %s might %s %s %s." % ( N_subj[0], rel[0], RC[0], V_args["aux_under_modal"][0], V_args["verb_under_modal"][0], join_args(V_args["args"])) conditional_trigger = "if both %s %s %s %s %s %s, it's okay." % ( N_subj[0], rel[0], RC[0], V_args["aux"][0], V_args["verb"][0], join_args(V_args["args"])) if V["finite"] == "1": do = get_do_form(V) interrogative_trigger = "%s both %s %s %s %s %s." % ( do[0], N_subj[0], rel[0], RC[0], V_bare[0], join_args(V_args["args"])) else: interrogative_trigger = "%s both %s %s %s %s %s?" % ( V_args["aux"][0], N_subj[0], rel[0], RC[0], V_args["verb"][0], join_args(V_args["args"])) presupposition = "there are exactly two %s %s %s" % (N_subj[0], rel[0], RC[0]) if np.random.choice([True, False]): negated_options = [ "there are exactly three %s %s %s.", "There are more than two %s %s %s.", "There are dozens of %s %s %s." ] negated_presupposition = np.random.choice(negated_options) % ( N_subj[0], rel[0], RC[0]) else: negated_presupposition = "there aren't exactly two %s %s %s" % ( N_subj[0], rel[0], RC[0]) neutral_presupposition = "there are exactly two %s %s %s" % ( N_subj_alt[0], rel[0], RC[0]) data = self.build_presupposition_paradigm( unembedded_trigger=unembedded_trigger, negated_trigger=negated_trigger, interrogative_trigger=interrogative_trigger, modal_trigger=modal_trigger, conditional_trigger=conditional_trigger, presupposition=presupposition, negated_presupposition=negated_presupposition, neutral_presupposition=neutral_presupposition) return data, presupposition
def sample(self): """ Training 1/1 The girl saw a cat and John is the tall man. The girl saw a cat and the tall man is in the room. The girl saw a cat and the man is tall. TThe girl saw a cat and the man in the room is tall. Training 0/0 A girl saw a cat and John is a man. A girl saw a cat and John is the man in a room. A girl saw a cat and a man is John. Test 1/0 A girl saw a cat and John is a tall man in a room. A girl saw a cat and John is tall. A girl saw a cat and a tall man is John. A girl saw a cat and a tall man in a room is John. A girl saw a cat and a tall man is president. A girl saw a cat and a tall man in the room is president. Test 0/1 The girl saw a cat and John is in the room. The girl saw a cat and The man is in the room. The girl saw a cat and The man in the room is John. The girl saw a cat and John is president. The girl saw a cat and The man is president. The girl saw a cat and the man in the room is president. Control 1/1 The girl saw a cat and John is a tall man in a room. The girl saw a cat and John is tall. The girl saw a cat and a tall man is John. The girl saw a cat and a tall man in a room is John. The girl saw a cat and a tall man is president. The girl saw a cat and a tall man in the room is president. Control 0/0 A girl saw a cat and John is in a room. A girl saw a cat and a man is in a room. A girl saw a cat and a man in a room is John. A girl saw a cat and John is president. A girl saw a cat and a man is president. A girl saw a cat and a man in a room is president. """ v_trans = choice(self.all_possibly_singular_transitive_verbs) subj = choice(get_matches_of(v_trans, "arg_1", self.all_singular_common_nouns)) aux = return_aux(v_trans, subj) D_subj = choice(get_matched_by(subj, "arg_1", self.safe_determiners)) obj = choice(get_matches_of(v_trans, "arg_2", self.all_singular_common_nouns)) D_obj = choice(get_matched_by(obj, "arg_1", self.safe_determiners)) S1 = " ".join([D_subj[0], subj[0], aux[0], v_trans[0], D_obj[0], obj[0], "and"]) S1_the_subj = " ".join(["the", subj[0], aux[0], v_trans[0], D_obj[0], obj[0], "and"]) S1_the_obj = " ".join([D_subj[0], subj[0], aux[0], v_trans[0], "the", obj[0], "and"]) S1_a_subj = " ".join(["a", subj[0], aux[0], v_trans[0], D_obj[0], obj[0], "and"]) S1_a_obj = " ".join([D_subj[0], subj[0], aux[0], v_trans[0], "a", obj[0], "and"]) S1_the_a = " ".join(["the", subj[0], aux[0], v_trans[0], "a", obj[0], "and"]) S1_a_the = " ".join(["a", subj[0], aux[0], v_trans[0], "the", obj[0], "and"]) name_in = choice(self.names_in_domain) name_out = choice(self.names_out_domain) noun_in = choice(np.array(list( filter(lambda x: x["gender"] == name_in["gender"] or x["gender"] == "n" or x["gender"] == "", self.common_nouns_in_domain)))) noun_out = choice(np.array(list( filter(lambda x: x["gender"] == name_out["gender"] or x["gender"] == "n" or x["gender"] == "", self.common_nouns_out_domain)))) D_in = choice(get_matched_by(noun_in, "arg_1", self.safe_determiners)) D_out = choice(get_matched_by(noun_out, "arg_1", self.safe_determiners)) adj_in = choice(self.adjs_in_domain) adj_out = choice(self.adjs_out_domain) locative_in = build_locative(choice(self.locales_in_domain), allow_quantifiers=False, bind_det=True) locative_out = build_locative(choice(self.locales_out_domain), allow_quantifiers=False, bind_det=True) D_loc_in = choice(get_matched_by(locative_in, "arg_1", self.safe_determiners)) D_loc_out = choice(get_matched_by(locative_out, "arg_1", self.safe_determiners)) locative_in_d = locative_in[0] % D_loc_in[0] locative_out_d = locative_out[0] % D_loc_out[0] locative_in_the = locative_in[0] % "the" locative_out_the = locative_out[0] % "the" locative_in_a = locative_in[0] % "a" locative_out_a = locative_out[0] % "a" other_noun = choice(np.array( list(filter(lambda x: x["gender"] == name_out["gender"] or x["gender"] == "n", self.one_word_noun)))) track_sentence = [ (name_in[0], noun_in[0], adj_in[0], locative_in[0]), (name_in[0], noun_in[0], adj_in[0], locative_in[0]), (name_in[0], noun_in[0], adj_in[0], locative_in[0]), (name_in[0], noun_in[0], adj_in[0], locative_in[0]), (name_in[0], noun_in[0], adj_in[0], locative_in[0]), (name_in[0], noun_in[0], adj_in[0], locative_in[0]), ] # Training_1_1 option = random.randint(0, 7) if option == 0: training_1 = " ".join([S1_the_subj, name_in[0], "is", "a", adj_in[0], noun_in[0]]) elif option == 1: training_1 = " ".join([S1, "the", adj_in[0], noun_in[0], "is", locative_in_a]) elif option == 2: training_1 = " ".join([S1_the_subj, D_in[0], adj_in[0], noun_in[0], "is", locative_in_a]) elif option == 3: training_1 = " ".join([S1_the_subj, "a", adj_in[0], noun_in[0], "is", locative_in_d]) elif option == 4: training_1 = " ".join([S1_the_subj, "a", noun_in[0], "is", adj_in[0]]) elif option == 5: training_1 = " ".join([S1, "the", noun_in[0], locative_in_a, "is", adj_in[0]]) elif option == 6: training_1 = " ".join([S1_the_subj, "a", noun_in[0], locative_in_d, "is", adj_in[0]]) else: training_1 = " ".join([S1_the_subj, D_in[0], noun_in[0], locative_in_a, "is", adj_in[0]]) # Training_0_0 option = random.randint(0, 4) if option == 0: training_0 = " ".join([S1_a_subj, name_in[0], "is", "the", noun_in[0]]) elif option == 1: training_0 = " ".join([S1_a_subj, name_in[0], "is", "the", noun_in[0], locative_in_d]) elif option == 2: training_0 = " ".join([S1_a_subj, name_in[0], "is", D_in[0], noun_in[0], locative_in_the]) elif option == 3: training_0 = " ".join([S1, name_in[0], "is", "a", noun_in[0], locative_in_the]) else: training_0 = " ".join([S1_a_subj, "the", noun_in[0], "is", name_in[0]]) # Control_1_0 option = random.randint(0, 7) if option == 0: control_1_0 = " ".join([S1_a_subj, name_in[0], "is", "the", adj_in[0], noun_in[0]]) elif option == 1: control_1_0 = " ".join([S1, "a", adj_in[0], noun_in[0], "is", locative_in_the]) elif option == 2: control_1_0 = " ".join([S1_a_subj, D_in[0], adj_in[0], noun_in[0], "is", locative_in_the]) elif option == 3: control_1_0 = " ".join([S1_a_subj, "the", adj_in[0], noun_in[0], "is", locative_in_d]) elif option == 4: control_1_0 = " ".join([S1_a_subj, "the", noun_in[0], "is", adj_in[0]]) elif option == 5: control_1_0 = " ".join([S1, "a", noun_in[0], locative_in_the, "is", adj_in[0]]) elif option == 6: control_1_0 = " ".join([S1_a_subj, "the", noun_in[0], locative_in_d, "is", adj_in[0]]) else: control_1_0 = " ".join([S1_a_subj, D_in[0], noun_in[0], locative_in_the, "is", adj_in[0]]) # Control_0_1 option = random.randint(0, 4) if option == 0: control_0_1 = " ".join([S1_the_subj, name_in[0], "is", "a", noun_in[0]]) elif option == 1: control_0_1 = " ".join([S1_the_subj, name_in[0], "is", "a", noun_in[0], locative_in_d]) elif option == 2: control_0_1 = " ".join([S1_the_subj, name_in[0], "is", D_in[0], noun_in[0], locative_in_a]) elif option == 3: control_0_1 = " ".join([S1, name_in[0], "is", "the", noun_in[0], locative_in_a]) else: control_0_1 = " ".join([S1_the_subj, "a", noun_in[0], "is", name_in[0]]) # Test_1_0 option = random.randint(0, 12) if option == 1: test_1_0 = " ".join([S1_a_obj, name_out[0], "is", "the", adj_out[0], noun_out[0], locative_out_d]) elif option == 2: test_1_0 = " ".join([S1_a_obj, name_out[0], "is", D_out[0], adj_out[0], noun_out[0], locative_out_the]) elif option == 3: test_1_0 = " ".join([S1_a_the, name_out[0], "is", D_out[0], adj_out[0], noun_out[0], locative_out_the]) elif option == 4: test_1_0 = " ".join([S1_a_obj, "the", adj_out[0], noun_out[0], "is", name_out[0]]) elif option == 5: test_1_0 = " ".join([S1_a_the, D_out[0], adj_out[0], noun_out[0], "is", name_out[0]]) elif option == 6: test_1_0 = " ".join([S1_a_obj, "the", adj_out[0], noun_out[0], locative_out_d, "is", name_out[0]]) elif option == 7: test_1_0 = " ".join([S1_a_obj, D_out[0], adj_out[0], noun_out[0], locative_out_the, "is", name_out[0]]) elif option == 8: test_1_0 = " ".join([S1_a_the, D_out[0], adj_out[0], noun_out[0], locative_out_d, "is", name_out[0]]) elif option == 9: test_1_0 = " ".join([S1_a_obj, "the", adj_out[0], noun_out[0], "is", other_noun[0]]) elif option == 10: test_1_0 = " ".join([S1_a_the, D_out[0], adj_out[0], noun_out[0], "is", other_noun[0]]) elif option == 11: test_1_0 = " ".join([S1_a_obj, "the", adj_out[0], noun_out[0], locative_out_d, "is", other_noun[0]]) elif option == 12: test_1_0 = " ".join([S1_a_obj, D_out[0], adj_out[0], noun_out[0], locative_out_the, "is", other_noun[0]]) else: test_1_0 = " ".join([S1_a_the, D_out[0], adj_out[0], noun_out[0], locative_out_d, "is", other_noun[0]]) # Control_1_1 option = random.randint(0, 12) if option == 0: control_1_1 = " ".join([S1_the_obj, name_out[0], "is", "a", adj_out[0], noun_out[0], locative_out_d]) elif option == 1: control_1_1 = " ".join([S1_the_obj, name_out[0], "is", D_out[0], adj_out[0], noun_out[0], locative_out_a]) elif option == 2: control_1_1 = " ".join([S1_the_a, name_out[0], "is", D_out[0], adj_out[0], noun_out[0], locative_out_d]) elif option == 3: control_1_1 = " ".join([S1_the_obj, "a", adj_out[0], noun_out[0], "is", name_out[0]]) elif option == 4: control_1_1 = " ".join([S1_the_a, D_out[0], adj_out[0], noun_out[0], "is", name_out[0]]) elif option == 5: control_1_1 = " ".join([S1_the_obj, "a", adj_out[0], noun_out[0], locative_out_d, "is", name_out[0]]) elif option == 6: control_1_1 = " ".join([S1_the_obj, D_out[0], adj_out[0], noun_out[0], locative_out_a, "is", name_out[0]]) elif option == 7: control_1_1 = " ".join([S1_the_a, D_out[0], adj_out[0], noun_out[0], locative_out_d, "is", name_out[0]]) elif option == 8: control_1_1 = " ".join([S1_the_obj, "a", adj_out[0], noun_out[0], "is", other_noun[0]]) elif option == 9: control_1_1 = " ".join([S1_the_a, D_out[0], adj_out[0], noun_out[0], "is", other_noun[0]]) elif option == 10: control_1_1 = " ".join([S1_the_obj, "a", adj_out[0], noun_out[0], locative_out_d, "is", other_noun[0]]) elif option == 11: control_1_1 = " ".join([S1_the_obj, D_out[0], adj_out[0], noun_out[0], locative_out_a, "is", other_noun[0]]) else: control_1_1 = " ".join([S1_the_a, D_out[0], adj_out[0], noun_out[0], locative_out_d, "is", other_noun[0]]) # Test_0_1 option = random.randint(0, 10) if option == 0: test_0_1 = " ".join([S1_the_obj, "a", noun_out[0], "is", locative_out_d]) elif option == 1: test_0_1 = " ".join([S1_the_obj, D_out[0], noun_out[0], "is", locative_out_a]) elif option == 2: test_0_1 = " ".join([S1_the_a, D_out[0], noun_out[0], "is", locative_out_d]) elif option == 3: test_0_1 = " ".join([S1_the_obj, "a", noun_out[0], locative_out_d, "is", name_out[0]]) elif option == 4: test_0_1 = " ".join([S1_the_obj, D_out[0], noun_out[0], locative_out_a, "is", name_out[0]]) elif option == 5: test_0_1 = " ".join([S1_the_a, D_out[0], noun_out[0], locative_out_d, "is", name_out[0]]) elif option == 6: test_0_1 = " ".join([S1_the_obj, "a", noun_out[0], "is", other_noun[0]]) elif option == 7: test_0_1 = " ".join([S1_the_a, D_out[0], noun_out[0], "is", other_noun[0]]) elif option == 8: test_0_1 = " ".join([S1_the_obj, "the", noun_out[0], locative_out_d, "is", other_noun[0]]) elif option == 9: test_0_1 = " ".join([S1_the_obj, D_out[0], noun_out[0], locative_out_a, "is", other_noun[0]]) else: test_0_1 = " ".join([S1_the_a, D_out[0], noun_out[0], locative_out_d, "is", other_noun[0]]) # Control_0_0 option = random.randint(0, 13) if option == 0: control_0_0 = " ".join([S1_a_obj, name_out[0], "is", locative_out_the]) elif option == 1: control_0_0 = " ".join([S1_a_the, name_out[0], "is", locative_out_d]) elif option == 2: control_0_0 = " ".join([S1_a_obj, "the", noun_out[0], "is", locative_out_d]) elif option == 3: control_0_0 = " ".join([S1_a_obj, D_out[0], noun_out[0], "is", locative_out_the]) elif option == 4: control_0_0 = " ".join([S1_a_the, D_out[0], noun_out[0], "is", locative_out_d]) elif option == 5: control_0_0 = " ".join([S1_a_obj, "the", noun_out[0], locative_out_d, "is", name_out[0]]) elif option == 6: control_0_0 = " ".join([S1_a_obj, D_out[0], noun_out[0], locative_out_the, "is", name_out[0]]) elif option == 7: control_0_0 = " ".join([S1_a_the, D_out[0], noun_out[0], locative_out_d, "is", name_out[0]]) elif option == 8: control_0_0 = " ".join([S1_a_the, name_out[0], "is", other_noun[0]]) elif option == 9: control_0_0 = " ".join([S1_a_obj, "the", noun_out[0], "is", other_noun[0]]) elif option == 10: control_0_0 = " ".join([S1_a_the, D_out[0], noun_out[0], "is", other_noun[0]]) elif option == 11: control_0_0 = " ".join([S1_a_obj, "the", noun_out[0], locative_out_d, "is", other_noun[0]]) elif option == 12: control_0_0 = " ".join([S1_a_obj, D_out[0], noun_out[0], locative_out_the, "is", other_noun[0]]) else: control_0_0 = " ".join([S1_a_the, D_out[0], noun_out[0], locative_out_d, "is", other_noun[0]]) data = self.build_paradigm( training_1_1=training_1 + ".", training_0_0=training_0 + ".", control_1_0=control_1_0 + ".", control_0_1=control_0_1 + ".", test_1_0=test_1_0 + ".", test_0_1=test_0_1 + ".", control_1_1=control_1_1 + ".", control_0_0=control_0_0 + ".", ) return data, track_sentence
def args_matching_3_verbs(self, v1, v2, v3, frequent=True, subj=None, aux=None, allow_negated=True, allow_modal=True, allow_recursion=False, allow_quantifiers=True): """ :param verb: a vocab entry for a verb :param frequent: should only frequent vocab be generated? :param subj: if supplied, the value of the subject in the returned dict. If None, a subject will be generated. :param aux: if supplied, the value of the auxiliary in the returned dict. If None, an auxiliary will be generated. :param allow_negated: should negated auxiliaries (e.g. has't) be generated? :param allow_modal: should modal auxiliaries (e.g. might) be generated? :param allow_recursion: for verbs that select for a clause or VP, should other clause/VP embedding verbs be generated in the embedded position? :param allow_quantifiers: should quantifiers (e.g. most, every) be generated as determiners for DPs? :return: dict of all arguments of verb: {subject:x1, auxiliary:x2, verb:x3, args:[arg_1, arg_2, ..., arg_n]} """ args = {} if frequent: freq_vocab = get_all("frequent", "1") else: freq_vocab = vocab # all verbs have a subject if subj is None: args["subj"] = N_to_DP_mutate(choice(get_matches_of(v1, "arg_1", get_matches_of(v2, "arg_1", get_matches_of(v3, "arg_1", (get_all("category", "N", freq_vocab)))))), allow_quantifiers=allow_quantifiers) else: args["subj"] = subj # all verbs have an auxiliary (or null) if aux is None: args["aux"] = return_aux(v1, args["subj"], allow_negated=allow_negated, allow_modal=allow_modal) else: args["aux"] = aux # INTRANSITIVE if v1["category"] == "S\\NP": args["args"] = [] # TRANSITIVE if v1["category"] == "(S\\NP)/NP": args["args"] = [N_to_DP_mutate(choice(get_matches_of(v1, "arg_2", get_matches_of(v2, "arg_2", get_matches_of(v3, "arg_2", get_all("category", "N", freq_vocab))))), allow_quantifiers=allow_quantifiers)] # # FROM-ING EMBEDDING # if v1["category"] == "(S\\NP)/(S[from]\\NP)": # obj = N_to_DP_mutate(choice(get_matches_of(v1, "arg_2", # get_matches_of(v2, "arg_2", # get_matches_of(v3, "arg_2", freq_vocab)))), allow_quantifiers=allow_quantifiers) # if allow_recursion: # VP = V_to_VP_mutate(choice(get_matched_by(obj, "arg_1", all_ing_verbs)), frequent=frequent, aux=False) # else: # safe_verbs = np.intersect1d(all_ing_verbs, all_non_recursive_verbs) # VP = V_to_VP_mutate(choice(get_matched_by(obj, "arg_1", safe_verbs)), frequent=frequent, aux=False) # VP[0] = "from " + VP[0] # args["args"] = [obj, VP] # # # RAISING TO OBJECT # if v1["category_2"] == "V_raising_object": # if allow_recursion: # v_emb = choice(all_bare_verbs) # else: # safe_verbs = np.intersect1d(all_bare_verbs, all_non_recursive_verbs) # v_emb = choice(safe_verbs) # args_emb = verb_args_from_verb(v_emb, frequent) # VP = V_to_VP_mutate(v_emb, frequent=frequent, args=args_emb, aux=False) # VP[0] = "to " + VP[0] # args["args"] = [args_emb["subj"], VP] # # # OBJECT CONTROL # if v1["category_2"] == "V_control_object": # obj = N_to_DP_mutate(choice(get_matches_of(v1, "arg_2", # get_matches_of(v2, "arg_2", # get_matches_of(v3, "arg_2")), allow_quantifiers=allow_quantifiers))) # if allow_recursion: # v_emb = choice(get_matched_by(obj, "arg_1", all_bare_verbs)) # else: # safe_verbs = np.intersect1d(all_bare_verbs, all_non_recursive_verbs) # v_emb = choice(get_matched_by(obj, "arg_1", safe_verbs)) # VP = V_to_VP_mutate(v_emb, frequent=frequent, aux=False) # VP[0] = "to " + VP[0] # args["args"] = [obj, VP] # CLAUSE EMBEDDING if v1["category"] == "(S\\NP)/S": emb_clause = make_sentence(frequent) if v1["arg_2"] == "expression_that": emb_clause[0] = "that " + emb_clause if v1["arg_2"] == "expression_wh": emb_clause[0] = "whether " + emb_clause args["args"] = [emb_clause] # # QUESTION EMBEDDING # if v1["category"] == "(S\\NP)/Q": # args["args"] = [make_emb_subj_question(frequent)] # # TODO: implement other kinds of questions # # # SUBJECT CONTROL # if v1["category"] == "(S\\NP)/(S[to]\\NP)": # if allow_recursion: # v_emb = choice(get_matched_by(args["subj"], "arg_1", all_bare_verbs)) # else: # safe_verbs = np.intersect1d(all_bare_verbs, all_non_recursive_verbs) # v_emb = choice(get_matched_by(args["subj"], "arg_1", safe_verbs)) # VP = V_to_VP_mutate(v_emb, frequent=frequent, aux=False) # VP[0] = "to " + VP[0] # args["args"] = [VP] # # # RAISING TO SUBJECT # if verb["category_2"] == "V_raising_subj": # if allow_recursion: # v_emb = choice(all_bare_verbs) # else: # safe_verbs = np.intersect1d(all_bare_verbs, all_non_recursive_verbs) # v_emb = choice(safe_verbs) # args_emb = verb_args_from_verb(v_emb, frequent, subj=False) # VP = V_to_VP_mutate(v_emb, frequent=frequent, args=args_emb, aux=False) # VP[0] = "to " + VP[0] # args["args"] = [VP] return args
def sample(self): # Training 1 # John compelled Mary to leave. # DP1 Aux1 V_control_in DP2 TO VP # Training 0 # John wanted Mary to leave. # DP1 Aux1 V_raising_in DP2 TO VP # Training 1 # John convinced Mary to leave. # DP1 Aux1 V_control_out DP2 TO VP # Training 0 # John considered Mary to leave. # DP1 Aux1 V_raising_out DP2 TO VP # V_trans = choice(all_transitive_verbs) # NP_trans_1 = choice(get_matches_of(V_trans, "arg_1", all_common_nouns)) # NP_trans_2 = choice(get_matches_of(V_trans, "arg_2", all_common_nouns)) # D_trans_1 = choice(get_matched_by(NP_trans_1, "arg_1", self.safe_dets)) # D_trans_2 = choice(get_matched_by(NP_trans_2, "arg_1", self.safe_dets)) # Aux_trans = return_aux(V_trans, NP_trans_1) # S1 = " ".join([D_trans_1[0], NP_trans_1[0], Aux_trans[0], V_trans[0], D_trans_2[0], NP_trans_2[0]]) option = random.choice([1, 2, 3]) if option == 1: # subject control/raising V_control_in = choice(self.v_control_subj_in) DP1 = N_to_DP_mutate(choice(get_matches_of(V_control_in, "arg_1"))) Aux1 = return_aux(V_control_in, DP1) VP = V_to_VP_mutate(choice( get_matches_of( V_control_in, "arg_2", get_matched_by(DP1, "arg_1", self.all_bare_transitive_verbs))), aux=False) V_control_out = choice( get_matched_by( DP1, "arg_1", get_matches_of( Aux1, "arg_2", get_matched_by(VP, "arg_2", self.v_control_subj_out)))) V_raising_in = choice( get_matched_by( DP1, "arg_1", get_matches_of( Aux1, "arg_2", get_matched_by(VP, "arg_2", self.v_raising_subj_in)))) V_raising_out = choice( get_matched_by( DP1, "arg_1", get_matches_of( Aux1, "arg_2", get_matched_by(VP, "arg_2", self.v_raising_subj_out)))) to = "to" elif option == 2: V_control_in = choice(self.v_control_obj_in) DP1 = N_to_DP_mutate(choice(get_matches_of(V_control_in, "arg_1"))) Aux1 = return_aux(V_control_in, DP1) DP2 = N_to_DP_mutate(choice(get_matches_of(V_control_in, "arg_2"))) VP = V_to_VP_mutate(choice( get_matches_of( V_control_in, "arg_3", get_matched_by(DP2, "arg_1", self.all_bare_transitive_verbs))), aux=False) V_control_out = choice( get_matched_by( DP1, "arg_1", get_matches_of( Aux1, "arg_2", get_matched_by( VP, "arg_3", get_matched_by(DP2, "arg_2", self.v_control_obj_out))))) V_raising_in = choice( get_matched_by( DP1, "arg_1", get_matches_of(Aux1, "arg_2", self.v_raising_obj_in))) V_raising_out = choice( get_matched_by( DP1, "arg_1", get_matches_of(Aux1, "arg_2", self.v_raising_obj_out))) to = DP2[0] + " to" else: V_control_in = choice(self.adj_control_subj_in) DP1 = N_to_DP_mutate(choice(get_matches_of(V_control_in, "arg_1"))) Aux1 = return_copula(DP1) VP = V_to_VP_mutate(choice( get_matches_of( V_control_in, "arg_2", get_matched_by(DP1, "arg_1", self.all_bare_transitive_verbs))), aux=False) V_control_out = choice( get_matched_by( DP1, "arg_1", get_matched_by(VP, "arg_2", self.adj_control_subj_out))) V_raising_in = choice(self.adj_raising_subj_in) V_raising_out = choice(self.adj_raising_subj_out) to = "to" training_1 = " ".join([DP1[0], Aux1[0], V_control_in[0], to, VP[0]]) training_0 = " ".join([DP1[0], Aux1[0], V_raising_in[0], to, VP[0]]) test_1 = " ".join([DP1[0], Aux1[0], V_control_out[0], to, VP[0]]) test_0 = " ".join([DP1[0], Aux1[0], V_raising_out[0], to, VP[0]]) long_subordinate_clause, short_subordinate_clause = self.build_dependent_clauses( [training_1, training_0, test_0, test_1]) data = self.build_paradigm( training_1_1="%s, %s." % (long_subordinate_clause, training_1), training_0_0="%s, %s." % (short_subordinate_clause, training_0), control_1_0="%s, %s." % (short_subordinate_clause, training_1), control_0_1="%s, %s." % (long_subordinate_clause, training_0), test_1_0="%s, %s." % (short_subordinate_clause, test_1), test_0_1="%s, %s." % (long_subordinate_clause, test_0), control_1_1="%s, %s." % (long_subordinate_clause, test_1), control_0_0="%s, %s." % (short_subordinate_clause, test_0), ) track_sentence = [(DP1[0], Aux1[0], V_control_in[0], to, VP[0]), (DP1[0], Aux1[0], V_raising_in[0], to, VP[0]), (DP1[0], Aux1[0], V_control_out[0], to, VP[0]), (DP1[0], Aux1[0], V_raising_out[0], to, VP[0])] return data, track_sentence
def sample(self): # The doctor of the men is helping some people. # D Subj S_arg Aux_agree V_agree args # The doctor of the men are helping some people. # D Subj S_arg Aux_not_agree V_not_agree args S_arg = None while S_arg is None: subj = choice(self.safe_subjs) D = choice(get_matched_by(subj, "arg_1", all_very_common_dets)) V_agree = choice(get_matched_by(subj, "arg_1", self.safe_verbs)) if V_agree["finite"] == "1": if V_agree["3sg"] == "1": V_not_agree = choice( get_all_conjunctive([("pres", "1"), ("3sg", "0")], get_all("root", V_agree["root"]))) else: V_not_agree = choice( get_all_conjunctive([("pres", "1"), ("3sg", "1")], get_all("root", V_agree["root"]))) else: V_not_agree = V_agree try: if subj["pl"] == "1": S_arg = N_to_DP_mutate( choice( get_matches_of( V_not_agree, "arg_1", get_matches_of(subj, "arg_1", all_singular_nouns)))) pass else: S_arg = N_to_DP_mutate( choice( get_matches_of( V_not_agree, "arg_1", get_matches_of(subj, "arg_1", all_plural_nouns)))) pass except Exception: continue Auxs = require_aux_agree(V_agree, subj) Aux_agree = Auxs["aux_agree"] Aux_not_agree = Auxs["aux_nonagree"] V_args = verb_args_from_verb(V_agree, subj=subj, aux=Aux_agree) if V_agree["finite"] == "1": prefix = "%s %s %s" % (D[0], subj[0], S_arg[0]) word_good = V_agree[0] word_bad = V_not_agree[0] else: prefix = "%s %s %s" % (D[0], subj[0], S_arg[0]) word_good = Aux_agree word_bad = Aux_not_agree data = { "sentence_good": "%s %s %s %s %s %s." % (D[0], subj[0], S_arg[0], Aux_agree, V_agree[0], join_args(V_args["args"])), "sentence_bad": "%s %s %s %s %s %s." % (D[0], subj[0], S_arg[0], Aux_not_agree, V_not_agree[0], join_args(V_args["args"])), "one_prefix_prefix": prefix, "one_prefix_word_good": word_good, "one_prefix_word_bad": word_bad } return data, data["sentence_good"]
def sample(self): # Training 1 # John compelled Mary to leave. # DP1 Aux1 V_control_in DP2 TO VP # Training 0 # John wanted Mary to leave. # DP1 Aux1 V_raising_in DP2 TO VP # Training 1 # John convinced Mary to leave. # DP1 Aux1 V_control_out DP2 TO VP # Training 0 # John considered Mary to leave. # DP1 Aux1 V_raising_out DP2 TO VP option = random.choice([1, 2, 3]) if option == 1: # subject control/raising V_control_in = choice(self.v_control_subj_in) DP1 = N_to_DP_mutate(choice(get_matches_of(V_control_in, "arg_1"))) Aux1 = return_aux(V_control_in, DP1) VP = V_to_VP_mutate(choice( get_matches_of(V_control_in, "arg_2", get_matched_by(DP1, "arg_1", all_bare_verbs))), aux=False) V_control_out = choice( get_matched_by( DP1, "arg_1", get_matches_of( Aux1, "arg_2", get_matched_by(VP, "arg_2", self.v_control_subj_out)))) V_raising_in = choice( get_matched_by( DP1, "arg_1", get_matches_of( Aux1, "arg_2", get_matched_by(VP, "arg_2", self.v_raising_subj_in)))) V_raising_out = choice( get_matched_by( DP1, "arg_1", get_matches_of( Aux1, "arg_2", get_matched_by(VP, "arg_2", self.v_raising_subj_out)))) to = "to" elif option == 2: V_control_in = choice(self.v_control_obj_in) DP1 = N_to_DP_mutate(choice(get_matches_of(V_control_in, "arg_1"))) Aux1 = return_aux(V_control_in, DP1) DP2 = N_to_DP_mutate(choice(get_matches_of(V_control_in, "arg_2"))) VP = V_to_VP_mutate(choice( get_matches_of(V_control_in, "arg_3", get_matched_by(DP2, "arg_1", all_bare_verbs))), aux=False) V_control_out = choice( get_matched_by( DP1, "arg_1", get_matches_of( Aux1, "arg_2", get_matched_by( VP, "arg_3", get_matched_by(DP2, "arg_2", self.v_control_obj_out))))) V_raising_in = choice( get_matched_by( DP1, "arg_1", get_matches_of(Aux1, "arg_2", self.v_raising_obj_in))) V_raising_out = choice( get_matched_by( DP1, "arg_1", get_matches_of(Aux1, "arg_2", self.v_raising_obj_out))) to = DP2[0] + " to" else: V_control_in = choice(self.adj_control_subj_in) DP1 = N_to_DP_mutate(choice(get_matches_of(V_control_in, "arg_1"))) Aux1 = return_copula(DP1) VP = V_to_VP_mutate(choice( get_matches_of(V_control_in, "arg_2", get_matched_by(DP1, "arg_1", all_bare_verbs))), aux=False) V_control_out = choice( get_matched_by( DP1, "arg_1", get_matched_by(VP, "arg_2", self.adj_control_subj_out))) V_raising_in = choice(self.adj_raising_subj_in) V_raising_out = choice(self.adj_raising_subj_out) to = "to" data = self.build_paradigm( training_1_1=" ".join( [DP1[0], Aux1[0], V_control_in[0], to, VP[0], "."]), training_0_0=" ".join( [DP1[0], Aux1[0], V_raising_in[0], to, VP[0], "."]), test_1_0=" ".join( [DP1[0], Aux1[0], V_control_out[0], to, VP[0], "."]), test_0_1=" ".join( [DP1[0], Aux1[0], V_raising_out[0], to, VP[0], "."]), ) track_sentence = [(DP1[0], Aux1[0], V_control_in[0], to, VP[0], "."), (DP1[0], Aux1[0], V_raising_in[0], to, VP[0], "."), (DP1[0], Aux1[0], V_control_out[0], to, VP[0], "."), (DP1[0], Aux1[0], V_raising_out[0], to, VP[0], ".")] return data, track_sentence
def sample(self): # Training 1/1 # I think that John found the apparition. # first cp_verb_first THAT D1 NP1 verb_1 Dt APPARITION # Training 0/0 # They think that John found the hairdresser. # non_first cp_verb_non_first THAT D1 NP1 verb_1 D2 NP2 # Test 1/0 # John thinks that the hairdresser found me. # D1 NP1 cp_verb_1 THAT D2 NP2 verb_2 first_acc # Test 0/1 # John thinks that the apparition found them. # D1 NP1 cp_verb_1 THAT Dt APPARITION verb_t non_first_acc # Control 1/1 # John thinks that the apparition found me. # D1 NP1 cp_verb_1 THAT Dt APPARITION verb_t first_acc # Control 0/0 # John thinks that the hairdresser found them. # D1 NP1 cp_verb_1 THAT D2 NP2 verb_2 non_first_acc first, non_first, first_acc, non_first_acc = self.get_pronouns() NP1 = choice( np.setdiff1d(all_animate_nouns, get_all("expression", "apparition"))) NP2 = choice(self.safe_animate_common_nouns, avoid=NP1) D1 = choice(get_matched_by(NP1, "arg_1", self.dets)) D2 = choice(get_matched_by(NP2, "arg_1", self.dets)) Dt = choice(get_matched_by(self.target_lexicon, "arg_1", self.dets)) cp_verb = choice(self.cp_verb) cp_verb_aux = return_aux(cp_verb, first) cp_verb_first = re_conjugate(cp_verb, first, cp_verb_aux) cp_verb_non_first = re_conjugate(cp_verb, non_first, cp_verb_aux) cp_verb_1 = re_conjugate(cp_verb, NP1, cp_verb_aux) verb = choice(self.trans_verb) verb_aux = return_aux(verb, NP1) verb_1 = re_conjugate(verb, NP1, verb_aux) verb_2 = re_conjugate(verb, NP2, verb_aux) # t for target_exicon verb_t = re_conjugate(verb, self.target_lexicon, verb_aux) track_sentence = [ (first[0], cp_verb[0], NP1[0], verb[0]), #training 1/1 (non_first[0], cp_verb[0], NP1[0], verb[0], NP2[0]), #training 0/0 (NP1[0], cp_verb[0], NP2[0], verb[0], first_acc[0]), #Test 1/0 (NP1[0], cp_verb[0], verb[0], non_first_acc[0]), #Test 0/1 (NP1[0], cp_verb[0], verb[0], first_acc[0]), #Control 1/1 (NP1[0], cp_verb[0], NP2[0], verb[0], non_first_acc[0] ) #Control 0/0 ] data = self.build_paradigm( training_1_1="%s %s that %s %s %s %s apparition" % (first[0], cp_verb_first[0], D1[0], NP1[0], verb_1[0], Dt[0]), training_0_0="%s %s that %s %s %s %s %s" % (non_first[0], cp_verb_non_first[0], D1[0], NP1[0], verb_1[0], D2[0], NP2[0]), test_1_0="%s %s %s that %s %s %s %s" % (D1[0], NP1[0], cp_verb_1[0], D2[0], NP2[0], verb_2[0], first_acc[0]), test_0_1="%s %s %s that %s apparition %s %s" % (D1[0], NP1[0], cp_verb_1[0], Dt[0], verb_t[0], non_first_acc[0]), control_1_1="%s %s %s that %s apparition %s %s" % (D1[0], NP1[0], cp_verb_1[0], Dt[0], verb_t[0], first_acc[0]), control_0_0="%s %s %s that %s %s %s %s" % (D1[0], NP1[0], cp_verb_1[0], D2[0], NP2[0], verb_2[0], non_first_acc[0])) return data, track_sentence
def sample_CP_verb_RC(self): V1 = choice(self.CP_verbs_non_ing) V1_ing = self.get_ing_form(V1) NP1 = choice(get_matches_of(V1, "arg_1", self.safe_nouns)) D1 = choice(get_matched_by(NP1, "arg_1", all_very_common_dets)) V1 = conjugate(V1, NP1) V1_ing = conjugate(V1_ing, NP1) V2 = choice(self.all_non_ing_transitive_verbs) V2_ing = self.get_ing_form(V2) NP2 = choice(get_matches_of(V2, "arg_1", self.safe_nouns)) V2 = conjugate(V2, NP2) V2_ing = conjugate(V2_ing, NP2) D2 = choice(get_matched_by(NP2, "arg_1", all_very_common_dets)) NP3 = choice(get_matches_of(V2, "arg_2", self.safe_nouns)) D3 = choice(get_matched_by(NP3, "arg_1", all_very_common_dets)) if bool(random.randint(0, 1)): RC1, _, V_RC1, V_RC1_ing = self.subject_relative_clause(NP1) else: RC1, _, V_RC1, V_RC1_ing = self.object_relative_clause(NP1) if bool(random.randint(0, 1)): RC2, _, V_RC2, V_RC2_ing = self.subject_relative_clause(NP2) else: RC2, _, V_RC2, V_RC2_ing = self.object_relative_clause(NP2) if bool(random.randint(0, 1)): RC3, _, V_RC3, V_RC3_ing = self.subject_relative_clause(NP3) else: RC3, _, V_RC3, V_RC3_ing = self.object_relative_clause(NP3) S1 = " ".join([D1[0], "%s", NP1[0], "%s", V1[0], "that", D2[0], "%s", NP2[0], V2[0], D3[0], "%s", NP3[0]]) track_sentence = [ (S1, RC1, RC2, RC3), (S1, RC1, RC2, RC3) ] data = [] option = random.randint(0, 2) if option == 0: data.append(" ".join([D1[0], NP1[0], RC1 % V_RC1, V1_ing[0], "that", D2[0], NP2[0], V2[0], D3[0], NP3[0]])) elif option == 1: data.append(" ".join([D1[0], NP1[0], V1_ing[0], "that", D2[0], NP2[0], RC2 % V_RC2, V2[0], D3[0], NP3[0]])) else: data.append(" ".join([D1[0], NP1[0], V1_ing[0], "that", D2[0], NP2[0], V2[0], D3[0], NP3[0], RC3 % V_RC3])) option = random.randint(0, 5) if option == 0: data.append(" ".join([D1[0], NP1[0], RC1 % V_RC1_ing, V1[0], "that", D2[0], NP2[0], V2[0], D3[0], NP3[0]])) elif option == 1: data.append(" ".join([D1[0], NP1[0], RC1 % V_RC1, V1[0], "that", D2[0], NP2[0], V2_ing[0], D3[0], NP3[0]])) elif option == 2: data.append(" ".join([D1[0], NP1[0], V1[0], "that", D2[0], NP2[0], RC2 % V_RC2_ing, V2[0], D3[0], NP3[0]])) elif option == 3: data.append(" ".join([D1[0], NP1[0], V1[0], "that", D2[0], NP2[0], RC2 % V_RC2, V2_ing[0], D3[0], NP3[0]])) elif option == 4: data.append(" ".join([D1[0], NP1[0], V1[0], "that", D2[0], NP2[0], V2[0], D3[0], NP3[0], RC3 % V_RC3_ing])) else: data.append(" ".join([D1[0], NP1[0], V1[0], "that", D2[0], NP2[0], V2_ing[0], D3[0], NP3[0], RC3 % V_RC3])) return data, track_sentence
def sample_CP_noun_RC(self): NP1 = choice(self.CP_nouns) V1 = choice( get_matched_by(NP1, "arg_1", self.all_non_ing_transitive_verbs)) V1_ing = self.get_ing_form(V1) V1 = conjugate(V1, NP1) V1_ing = conjugate(V1_ing, NP1) D1 = choice(get_matched_by(NP1, "arg_1", self.safe_dets)) NP2 = choice(get_matches_of(V1, "arg_2", self.safe_nouns)) D2 = choice(get_matched_by(NP2, "arg_1", self.safe_dets)) V_emb = choice(self.all_non_ing_transitive_verbs) V_emb_ing = self.get_ing_form(V_emb) NP1_emb = choice(get_matches_of(V_emb, "arg_1", self.safe_nouns)) V_emb = conjugate(V_emb, NP1_emb) V_emb_ing = conjugate(V_emb_ing, NP1_emb) D1_emb = choice(get_matched_by(NP1_emb, "arg_1", self.safe_dets)) NP2_emb = choice(get_matches_of(V_emb, "arg_2", self.safe_nouns)) D2_emb = choice(get_matched_by(NP2_emb, "arg_1", self.safe_dets)) RC2, V_RC2, V_RC2_ing = self.subject_relative_clause_intransitive(NP2) RC1_emb, V_RC1_emb, V_RC1_emb_ing = self.subject_relative_clause_intransitive( NP1_emb) RC2_emb, V_RC2_emb, V_RC2_emb_ing = self.subject_relative_clause_intransitive( NP2_emb) S1 = " ".join([ D1[0], NP1[0], NP1_emb[0], V_emb[0], NP2_emb[0], V1[0], D2[0], NP2[0] ]) track_sentence = [(S1), (S1)] data = [] Ds = [] option = random.randint(0, 2) if option == 0: data.append(" ".join([ "%s", NP1[0], "that", "%s", NP1_emb[0], RC1_emb % V_RC1_emb, V_emb[0], "%s", NP2_emb[0], V1_ing[0], "%s", NP2[0] ])) elif option == 1: data.append(" ".join([ "%s", NP1[0], "that", "%s", NP1_emb[0], V_emb[0], "%s", NP2_emb[0], RC2_emb % V_RC2_emb, V1_ing[0], "%s", NP2[0] ])) else: data.append(" ".join([ "%s", NP1[0], "that", "%s", NP1_emb[0], V_emb[0], "%s", NP2_emb[0], V1_ing[0], "%s", NP2[0], RC2 % V_RC2 ])) option = random.randint(0, 5) if option == 0: data.append(" ".join([ "%s", NP1[0], "that", "%s", NP1_emb[0], RC1_emb % V_RC1_emb, V_emb_ing[0], "%s", NP2_emb[0], V1[0], "%s", NP2[0] ])) elif option == 1: data.append(" ".join([ "%s", NP1[0], "that", "%s", NP1_emb[0], V_emb_ing[0], "%s", NP2_emb[0], RC2_emb % V_RC2_emb, V1[0], "%s", NP2[0] ])) elif option == 2: data.append(" ".join([ "%s", NP1[0], "that", "%s", NP1_emb[0], V_emb_ing[0], "%s", NP2_emb[0], V1[0], "%s", NP2[0], RC2 % V_RC2 ])) elif option == 3: data.append(" ".join([ "%s", NP1[0], "that", "%s", NP1_emb[0], RC1_emb % V_RC1_emb_ing, V_emb[0], "%s", NP2_emb[0], V1[0], "%s", NP2[0] ])) elif option == 4: data.append(" ".join([ "%s", NP1[0], "that", "%s", NP1_emb[0], V_emb[0], "%s", NP2_emb[0], RC2_emb % V_RC2_emb_ing, V1[0], "%s", NP2[0] ])) else: data.append(" ".join([ "%s", NP1[0], "that", "%s", NP1_emb[0], V_emb[0], "%s", NP2_emb[0], V1[0], "%s", NP2[0], RC2 % V_RC2_ing ])) Ds.append((D1[0], D1_emb[0], D2_emb[0], D2[0])) Ds.append((D1[0], D1_emb[0], D2_emb[0], D2[0])) return data, track_sentence, Ds
def sample(self): V1 = choice(self.all_safe_verbs) subj = choice(get_matches_of(V1, "arg_1", all_common_nouns)) aux = return_aux(V1, subj) D_subj = choice(get_matched_by(subj, "arg_1", all_frequent_determiners)) obj = choice(get_matches_of(V1, "arg_2", all_common_nouns)) D_obj = choice(get_matched_by(obj, "arg_1", all_frequent_determiners)) S1 = " ".join( [D_subj[0], subj[0], aux[0], V1[0], D_obj[0], obj[0], "and"]) V_past_in = choice(self.irr_past_verbs_in_domain) subj2 = choice(get_matches_of(V_past_in, "arg_1", all_plural_nouns)) D_subj2 = choice( get_matched_by(subj2, "arg_1", all_frequent_determiners)) obj2_in = choice(get_matches_of(V_past_in, "arg_2", all_common_nouns)) D_obj2_in = choice( get_matched_by(obj2_in, "arg_1", all_frequent_determiners)) V_pres_in = choice( get_matched_by( subj2, "arg_1", get_matched_by(obj2_in, "arg_2", self.present_plural_verbs_in_domain))) try: V_past_out = choice( get_matched_by(subj2, "arg_1", self.irr_past_verbs_out_domain)) obj2_out = choice( get_matches_of(V_past_out, "arg_2", all_common_nouns)) D_obj2_out = choice( get_matched_by(obj2_out, "arg_1", all_frequent_determiners)) V_pres_out = choice( get_matched_by( subj2, "arg_1", get_matched_by(obj2_out, "arg_2", self.present_plural_verbs_out_domain))) except IndexError: raise MatchNotFoundError("") track_sentence = [ (S1, D_subj2[0], subj2[0], V_past_in[0], D_obj2_in[0], obj2_in[0]), (S1, D_subj2[0], subj2[0], V_pres_in[0], D_obj2_in[0], obj2_in[0]), (S1, D_subj2[0], subj2[0], V_past_out[0], D_obj2_out[0], obj2_out[0]), (S1, D_subj2[0], subj2[0], V_pres_out[0], D_obj2_out[0], obj2_out[0]) ] in_domain_1 = " ".join( [D_subj2[0], subj2[0], V_past_in[0], D_obj2_in[0], obj2_in[0]]) in_domain_0 = " ".join( [D_subj2[0], subj2[0], V_pres_in[0], D_obj2_in[0], obj2_in[0]]) out_domain_1 = " ".join( [D_subj2[0], subj2[0], V_past_out[0], D_obj2_out[0], obj2_out[0]]) out_domain_0 = " ".join( [D_subj2[0], subj2[0], V_pres_out[0], D_obj2_out[0], obj2_out[0]]) data = self.build_paradigm( training_1_1=titlecase(in_domain_1), training_0_0=in_domain_0, test_1_0=out_domain_1, test_0_1=titlecase(out_domain_0), control_1_0=in_domain_1, control_0_1=titlecase(in_domain_0), control_1_1=titlecase(out_domain_1), control_0_0=out_domain_0, ) return data, track_sentence
def sample(self): """ Training 1 John is the tall man. The tall man is in the room. The man is tall. The man in the room is tall. Training 0 John is the man. John is the man in the room. The man is John. Test 1 John is the tall man in the room. John is tall. The tall man is John. The tall man in the room is John. The tall man is president. The tall man in the room is president. Test 0 John is in the room. The man is in the room. The man in the room is John. John is president. The man is president. the man in the room is president. """ name_in = choice(self.names_in_domain) name_out = choice(self.names_out_domain) noun_in = choice(np.array(list(filter(lambda x: x["gender"] == name_in["gender"] or x["gender"] == "n" or x["gender"] == "", self.common_nouns_in_domain)))) noun_out = choice(np.array(list(filter(lambda x: x["gender"] == name_out["gender"] or x["gender"] == "n" or x["gender"] == "", self.common_nouns_out_domain)))) adj_in = choice(self.adjs_in_domain) adj_out = choice(self.adjs_out_domain) locative_in = build_locative(choice(self.locales_in_domain), allow_quantifiers=False) locative_out = build_locative(choice(self.locales_out_domain), allow_quantifiers=False) other_noun = choice(np.array(list(filter(lambda x: x["gender"] == name_out["gender"] or x["gender"] == "n", self.one_word_noun)))) track_sentence = [ (name_in[0], noun_in[0], adj_in[0], locative_in[0]), (name_in[0], noun_in[0], adj_in[0], locative_in[0]), (name_in[0], noun_in[0], adj_in[0], locative_in[0]), (name_in[0], noun_in[0], adj_in[0], locative_in[0]), (name_in[0], noun_in[0], adj_in[0], locative_in[0]), (name_in[0], noun_in[0], adj_in[0], locative_in[0]), ] option = random.choice([1, 2, 3, 4]) if option == 1: training_1 = " ".join([name_in[0], "is", "the", adj_in[0], noun_in[0]]) elif option == 2: training_1 = " ".join(["the", adj_in[0], noun_in[0], "is", locative_in[0]]) elif option == 3: training_1 = " ".join(["the", noun_in[0], "is", adj_in[0]]) else: training_1 = " ".join(["the", noun_in[0], locative_in[0], "is", adj_in[0]]) option = random.choice([1, 2, 3]) if option == 1: training_0 = " ".join([name_in[0], "is", "the", noun_in[0]]) elif option == 2: training_0 = " ".join([name_in[0], "is", "the", noun_in[0], locative_in[0]]) else: training_0 = " ".join(["the", noun_in[0], "is", name_in[0]]) option = random.choice([1, 2, 3, 4, 5, 6]) if option == 1: test_1 = " ".join([name_out[0], "is", "the", adj_out[0], noun_out[0], locative_out[0]]) elif option == 2: test_1 = " ".join([name_out[0], "is", adj_out[0]]) elif option == 3: test_1 = " ".join(["the", adj_out[0], noun_out[0], "is", name_out[0]]) elif option == 4: test_1 = " ".join(["the", adj_out[0], noun_out[0], locative_out[0], "is", name_out[0]]) elif option == 5: test_1 = " ".join(["the", adj_out[0], noun_out[0], "is", other_noun[0]]) else: test_1 = " ".join(["the", adj_out[0], noun_out[0], locative_out[0], "is", other_noun[0]]) option = random.choice([1, 2, 3, 4, 5, 6]) if option == 1: test_0 = " ".join([name_out[0], "is", locative_out[0]]) elif option == 2: test_0 = " ".join(["the", noun_out[0], "is", locative_out[0]]) elif option == 3: test_0 = " ".join(["the", noun_out[0], locative_out[0], "is", name_out[0]]) elif option == 4: test_0 = " ".join([name_out[0], "is", other_noun[0]]) elif option == 5: test_0 = " ".join(["the", noun_out[0], "is", other_noun[0]]) else: test_0 = " ".join(["the", noun_out[0], locative_out[0], "is", other_noun[0]]) long_subordinate_clause, short_subordinate_clause = self.build_dependent_clauses( [training_1, training_0, test_0, test_1]) data = self.build_paradigm( training_1_1="%s, %s." % (long_subordinate_clause, training_1), training_0_0="%s, %s." % (short_subordinate_clause, training_0), control_1_0="%s, %s." % (short_subordinate_clause, training_1), control_0_1="%s, %s." % (long_subordinate_clause, training_0), test_1_0="%s, %s." % (short_subordinate_clause, test_1), test_0_1="%s, %s." % (long_subordinate_clause, test_0), control_1_1="%s, %s." % (long_subordinate_clause, test_1), control_0_0="%s, %s." % (short_subordinate_clause, test_0), ) return data, track_sentence