def sample(self):
        # Training 1/1
        # I     think         that    John found  the cat.
        # first cp_verb_first THAT D1 NP1  verb_1 THE NP2

        # Training 0/0
        # They      think             that    John found  every cat.
        # non_first cp_verb_non_first THAT D1 NP1  verb_1 D2    NP2

        # Test 1/0
        #    John thinks    that every cat found  me.
        # D1 NP1  cp_verb_1 THAT D2    NP2 verb_2 first_acc

        # Test 0/1
        #    John thinks    that the cat found  them.
        # D1 NP1  cp_verb_1 THAT THE NP2 verb_2 non_first_acc

        # Control 1/1
        #    John thinks    that the cat found  me.
        # D1 NP1  cp_verb_1 THAT THE NP2 verb_2 first_acc

        # Control 0/0
        #    John thinks    that every cat found  them.
        # D1 NP1  cp_verb_1 THAT D2    NP2 verb_2 non_first_acc

        first, non_first, first_acc, non_first_acc = self.get_pronouns()
        NP1 = choice(all_animate_nouns)
        NP2 = choice(self.animate_common_nouns, avoid=NP1)
        D1 = choice(get_matched_by(NP1, "arg_1", self.safe_dets))
        D2 = choice(get_matched_by(NP2, "arg_1", self.safe_dets))
        cp_verb = choice(self.cp_verb)
        cp_verb_aux = return_aux(cp_verb, first)
        cp_verb_first = re_conjugate(cp_verb, first, cp_verb_aux)
        cp_verb_non_first = re_conjugate(cp_verb, non_first, cp_verb_aux)
        cp_verb_1 = re_conjugate(cp_verb, NP1, cp_verb_aux)
        verb = choice(self.trans_verb)
        verb_aux = return_aux(verb, NP1)
        verb_1 = re_conjugate(verb, NP1, verb_aux)
        verb_2 = re_conjugate(verb, NP2, verb_aux)

        track_sentence = [
            (first[0], cp_verb[0], NP1[0], verb[0], NP2[0]),  #training 1/1
            (non_first[0], cp_verb[0], NP1[0], verb[0], NP2[0]),  #training 0/0
            (NP1[0], cp_verb[0], NP2[0], verb[0], first_acc[0]),  #Test 1/0
            (NP1[0], cp_verb[0], NP2[0], verb[0], non_first_acc[0]),  #Test 0/1
            (NP1[0], cp_verb[0], NP2[0], verb[0], first_acc[0]),  #Control 1/1
            (NP1[0], cp_verb[0], NP2[0], verb[0], non_first_acc[0]
             )  #Control 0/0
        ]

        data = self.build_paradigm(
            training_1_1="%s %s that %s %s %s the %s" %
            (first[0], cp_verb_first[0], D1[0], NP1[0], verb_1[0], NP2[0]),
            training_0_0="%s %s that %s %s %s %s %s" %
            (non_first[0], cp_verb_non_first[0], D1[0], NP1[0], verb_1[0],
             D2[0], NP2[0]),
            test_1_0="%s %s %s that %s %s %s %s" %
            (D1[0], NP1[0], cp_verb_1[0], D2[0], NP2[0], verb_2[0],
             first_acc[0]),
            test_0_1="%s %s %s that the %s %s %s" %
            (D1[0], NP1[0], cp_verb_1[0], NP2[0], verb_2[0], non_first_acc[0]),
            control_1_1="%s %s %s that the %s %s %s" %
            (D1[0], NP1[0], cp_verb_1[0], NP2[0], verb_2[0], first_acc[0]),
            control_0_0="%s %s %s that %s %s %s %s" %
            (D1[0], NP1[0], cp_verb_1[0], D2[0], NP2[0], verb_2[0],
             non_first_acc[0]))
        return data, track_sentence
Beispiel #2
0
all_documents = get_all_conjunctive([("category", "N"), ("document", "1")])
all_singular_neuter_animate_nouns = get_all_conjunctive([("category", "N"),
                                                         ("sg", "1"),
                                                         ("animate", "1"),
                                                         ("gender", "n")])
all_safe_nouns = np.setdiff1d(all_nouns, all_singular_neuter_animate_nouns)

# gather functional classes that will be accessed frequently
all_frequent_quantifiers = get_all("frequent", "1",
                                   get_all("category", "(S/(S\\NP))/N"))
all_reflexives = get_all("category_2", "refl")

# gather potentially reflexive predicates
all_transitive_verbs = get_all("category", "(S\\NP)/NP")
all_anim_anim_verbs = get_matched_by(
    choice(all_animate_nouns), "arg_1",
    get_matched_by(choice(all_animate_nouns), "arg_2", all_transitive_verbs))
all_doc_doc_verbs = get_matched_by(
    choice(all_documents), "arg_1",
    get_matched_by(choice(all_documents), "arg_2", all_transitive_verbs))
all_refl_preds = np.union1d(all_anim_anim_verbs, all_doc_doc_verbs)

# sample sentences until desired number
for writer in [train_output, dev_output, test_output]:
    counter = 0
    while counter < number_to_generate:
        # DP1       Rel V1   DP2     V2  Refl1/Refl2
        # The women who like the boy see themselves/himself

        # D1  N1    Rel V2  Refl1/Refl2        V1   D2  N2
        # The women who saw themselves/himself like the boy
Beispiel #3
0
    def sample_nested_rc_2_rcs(self):

        V1 = choice(self.all_non_ing_transitive_verbs)
        V1_ing = self.get_ing_form(V1)
        NP1 = choice(get_matches_of(V1, "arg_1", self.safe_nouns))
        V1 = conjugate(V1, NP1)
        V1_ing = conjugate(V1_ing, NP1)
        D1 = choice(get_matched_by(NP1, "arg_1", all_very_common_dets))
        NP2 = choice(get_matches_of(V1, "arg_2", self.safe_nouns))
        D2 = choice(get_matched_by(NP2, "arg_1", all_very_common_dets))
        S1 = " ".join([D1[0], NP1[0], "%s", D2[0], NP2[0]])

        option = random.randint(0, 2)
        if option == 0:
            RC1, arg_RC1, V_RC1, V_RC1_ing = self.subject_relative_clause(NP1, bind=True)
            RC1_b, _, V_RC1_b, V_RC1_ing_b = self.subject_relative_clause(arg_RC1, bind=False)
        elif option == 1:
            RC1, arg_RC1, V_RC1, V_RC1_ing = self.object_relative_clause(NP1, bind=True)
            RC1_b, _, V_RC1_b, V_RC1_ing_b = self.subject_relative_clause(arg_RC1, bind=False)
        else:
            RC1, arg_RC1, V_RC1, V_RC1_ing = self.subject_relative_clause(NP1, bind=True)
            RC1_b, _, V_RC1_b, V_RC1_ing_b = self.object_relative_clause(arg_RC1, bind=False)


        option = random.randint(0, 2)
        if option == 0:
            RC2, arg_RC2, V_RC2, V_RC2_ing = self.subject_relative_clause(NP2, bind=True)
            RC2_b, _, V_RC2_b, V_RC2_ing_b = self.subject_relative_clause(arg_RC2, bind=False)
        elif option == 1:
            RC2, arg_RC2, V_RC2, V_RC2_ing = self.object_relative_clause(NP2, bind=True)
            RC2_b, _, V_RC2_b, V_RC2_ing_b = self.subject_relative_clause(arg_RC2, bind=False)
        else:
            RC2, arg_RC2, V_RC2, V_RC2_ing = self.subject_relative_clause(NP2, bind=True)
            RC2_b, _, V_RC2_b, V_RC2_ing_b = self.object_relative_clause(arg_RC2, bind=False)


        RC1_iv, V_RC1_iv, V_RC1_iv_ing = self.subject_relative_clause_intransitive(NP1)
        RC2_iv, V_RC2_iv, V_RC2_iv_ing = self.subject_relative_clause_intransitive(NP2)

        track_sentence = [
            (S1, RC1, RC2),
            (S1, RC1, RC2)
        ]

        data = []
        option = random.randint(0, 1)
        if option == 0:
            data.append(" ".join([D1[0], NP1[0], RC1.format(v=V_RC1, rc=(RC1_b % V_RC1_b)), V1_ing[0], D2[0], NP2[0], RC2_iv % V_RC2_iv]))
        else:
            data.append(" ".join([D1[0], NP1[0], RC1_iv % V_RC1_iv, V1_ing[0], D2[0], NP2[0], RC2.format(v=V_RC2, rc=(RC2_b % V_RC2_b))]))

        option = random.randint(0, 5)
        if option == 0:
            data.append(" ".join([D1[0], NP1[0], RC1.format(v=V_RC1_ing, rc=(RC1_b % V_RC1_b)), V1[0], D2[0], NP2[0]]))
        elif option == 1:
            data.append(" ".join([D1[0], NP1[0], RC1.format(v=V_RC1, rc=(RC1_b % V_RC1_ing_b)), V1[0], D2[0], NP2[0]]))
        elif option == 2:
            data.append(" ".join([D1[0], NP1[0], RC1.format(v=V_RC1, rc=(RC1_b % V_RC1_b)), V1[0], D2[0], NP2[0], RC2_iv % V_RC2_iv_ing]))
        elif option == 3:
            data.append(" ".join([D1[0], NP1[0], V1[0], D2[0], NP2[0], RC2.format(v=V_RC2_ing, rc=(RC2_b % V_RC2_b))]))
        elif option == 4:
            data.append(" ".join([D1[0], NP1[0], V1[0], D2[0], NP2[0], RC2.format(v=V_RC2, rc=(RC2_b % V_RC2_ing_b))]))
        else:
            data.append(" ".join([D1[0], NP1[0], RC1_iv % V_RC1_iv_ing, V1[0], D2[0], NP2[0], RC2.format(v=V_RC2, rc=(RC2_b % V_RC2_b))]))

        return data, track_sentence
    def sample(self):
        # Training 1
        # The boy might see the cat and the students bought the paper

        # Training 0
        # The boy might see the cat and the students shred the paper

        # Test 1
        # The boy might see the cat and the students found the book

        # Test 0
        # The boy might see the cat and the students understand the book

        V1 = choice(self.all_safe_verbs)
        subj = choice(get_matches_of(V1, "arg_1", all_common_nouns))
        aux = return_aux(V1, subj)
        D_subj = choice(get_matched_by(subj, "arg_1",
                                       all_frequent_determiners))
        obj = choice(get_matches_of(V1, "arg_2", all_common_nouns))
        D_obj = choice(get_matched_by(obj, "arg_1", all_frequent_determiners))
        S1 = " ".join(
            [D_subj[0], subj[0], aux[0], V1[0], D_obj[0], obj[0], "and"])

        V_past_in = choice(self.irr_past_verbs_in_domain)
        subj2 = choice(get_matches_of(V_past_in, "arg_1", all_plural_nouns))
        D_subj2 = choice(
            get_matched_by(subj2, "arg_1", all_frequent_determiners))
        obj2_in = choice(get_matches_of(V_past_in, "arg_2", all_common_nouns))
        D_obj2_in = choice(
            get_matched_by(obj2_in, "arg_1", all_frequent_determiners))
        V_pres_in = choice(
            get_matched_by(
                subj2, "arg_1",
                get_matched_by(obj2_in, "arg_2",
                               self.present_plural_verbs_in_domain)))

        try:
            V_past_out = choice(
                get_matched_by(subj2, "arg_1", self.irr_past_verbs_out_domain))
            obj2_out = choice(
                get_matches_of(V_past_out, "arg_2", all_common_nouns))
            D_obj2_out = choice(
                get_matched_by(obj2_out, "arg_1", all_frequent_determiners))
            V_pres_out = choice(
                get_matched_by(
                    subj2, "arg_1",
                    get_matched_by(obj2_out, "arg_2",
                                   self.present_plural_verbs_out_domain)))
        except IndexError:
            raise MatchNotFoundError("")

        track_sentence = [
            (S1, D_subj2[0], subj2[0], V_past_in[0], D_obj2_in[0], obj2_in[0]),
            (S1, D_subj2[0], subj2[0], V_pres_in[0], D_obj2_in[0], obj2_in[0]),
            (S1, D_subj2[0], subj2[0], V_past_out[0], D_obj2_out[0],
             obj2_out[0]),
            (S1, D_subj2[0], subj2[0], V_pres_out[0], D_obj2_out[0],
             obj2_out[0])
        ]

        data = self.build_paradigm(
            training_1_1=" ".join([
                S1, D_subj2[0], subj2[0], V_past_in[0], D_obj2_in[0],
                obj2_in[0], "."
            ]),
            training_0_0=" ".join([
                S1, D_subj2[0], subj2[0], V_pres_in[0], D_obj2_in[0],
                obj2_in[0], "."
            ]),
            test_1_0=" ".join([
                S1, D_subj2[0], subj2[0], V_past_out[0], D_obj2_out[0],
                obj2_out[0], "."
            ]),
            test_0_1=" ".join([
                S1, D_subj2[0], subj2[0], V_pres_out[0], D_obj2_out[0],
                obj2_out[0], "."
            ]),
        )
        return data, track_sentence
Beispiel #5
0
                                 np.append(get_all("expression", "those"),
                                 np.append(get_all("expression", "this"), get_all_conjunctive([("expression", "that"),
                                           ("category_2", "D")])))))
ever_replacements = np.array(["often", "also", "fortunately", "obviously", "clearly"])
ever_replacements_no_often = np.array(["also", "fortunately", "obviously", "clearly"])
adverb_npi_replacements = np.array(["regularly", "on weekends", "on occasion", "for a while", "as well"])

# sample sentences until desired number
while len(sentences) < number_to_generate:
    # sentence template
    # D1    N1  Aux1 (Adv)    ever/also V1   that D2    N2   Aux2 (Adv)    V2   D3    N3
    # The/a boy has  (rarely) ever/also said that the/a girl has  (rarely) sung the/a song

    # build all lexical items
    try:
        N1 = choice(all_animate_nouns)
        D1 = choice(get_matched_by(N1, "arg_1", all_common_dets))
        Adv_freq = choice(all_freq_adverbs)
        if Adv_freq[0] == "often":
            NPI_replacement = choice(ever_replacements_no_often)
        else:
            NPI_replacement = choice(ever_replacements)
        Adv_nonfreq = choice(all_nonfreq_adverbs)
        # If nonfrequent Adv is often, don't use it as a replacement for "ever"
        if Adv_nonfreq[0] == "often":
            NPI_replacement = choice(ever_replacements_no_often)
        else:
            NPI_replacement = choice(ever_replacements)
        V1 = choice(get_matched_by(N1, "arg_1", all_embedding_verbs))
        Aux1 = return_aux(V1, N1, allow_negated=False)
        N2 = choice(all_animate_nouns, [N1])
Beispiel #6
0
    def sample(self):
        # Training 1
        # John compelled         Mary to leave.
        # DP1  Aux1 V_control_in DP2  TO VP

        # Training 0
        # John wanted            Mary to leave.
        # DP1  Aux1 V_raising_in DP2  TO VP

        # Training 1
        # John convinced          Mary to leave.
        # DP1  Aux1 V_control_out DP2  TO VP

        # Training 0
        # John considered         Mary to leave.
        # DP1  Aux1 V_raising_out DP2  TO VP

        V_trans = choice(self.all_possibly_singular_transitive_verbs)
        NP_trans_1 = choice(get_matches_of(V_trans, "arg_1", self.safe_nouns))
        NP_trans_2 = choice(get_matches_of(V_trans, "arg_2", self.safe_nouns))
        D_trans_1 = choice(get_matched_by(NP_trans_1, "arg_1", self.safe_dets))
        D_trans_2 = choice(get_matched_by(NP_trans_2, "arg_1", self.safe_dets))
        Aux_trans = return_aux(V_trans, NP_trans_1)
        S1_abs = " ".join([
            "%s", NP_trans_1[0], Aux_trans[0], V_trans[0], "%s", NP_trans_2[0]
        ])

        option = random.choice([1, 2, 3])
        if option == 1:  # subject control/raising
            V_control_in = choice(self.v_control_subj_in)
            NP1 = choice(get_matches_of(V_control_in, "arg_1",
                                        self.safe_nouns))
            D1 = choice(get_matched_by(NP1, "arg_1", self.safe_dets))
            Aux1 = return_aux(V_control_in, NP1)
            V = choice(
                get_matches_of(
                    V_control_in, "arg_2",
                    get_matched_by(NP1, "arg_1",
                                   self.all_bare_transitive_verbs)))
            NP2 = choice(get_matches_of(V, "arg_2", self.safe_nouns))
            D2 = choice(get_matched_by(NP2, "arg_1", self.safe_dets))
            V_control_out = choice(
                get_matched_by(
                    NP1, "arg_1",
                    get_matches_of(
                        Aux1, "arg_2",
                        get_matched_by(V, "arg_2", self.v_control_subj_out))))
            V_raising_in = choice(
                get_matched_by(
                    NP1, "arg_1",
                    get_matches_of(
                        Aux1, "arg_2",
                        get_matched_by(V, "arg_2", self.v_raising_subj_in))))
            V_raising_out = choice(
                get_matched_by(
                    NP1, "arg_1",
                    get_matches_of(
                        Aux1, "arg_2",
                        get_matched_by(V, "arg_2", self.v_raising_subj_out))))
            to = "to"
        elif option == 2:  # object control/raising
            V_control_in = choice(self.v_control_obj_in)
            NP1 = choice(get_matches_of(V_control_in, "arg_1",
                                        self.safe_nouns))
            D1 = choice(get_matched_by(NP1, "arg_1", self.safe_dets))
            Aux1 = return_aux(V_control_in, NP1)
            control_obj = N_to_DP_mutate(
                choice(get_matches_of(V_control_in, "arg_2")))
            V = choice(
                get_matches_of(
                    V_control_in, "arg_3",
                    get_matched_by(NP1, "arg_1",
                                   self.all_bare_transitive_verbs)))
            NP2 = choice(get_matches_of(V, "arg_2", self.safe_nouns))
            D2 = choice(get_matched_by(NP2, "arg_1", self.safe_dets))
            V_control_out = choice(
                get_matched_by(
                    NP1, "arg_1",
                    get_matches_of(
                        Aux1, "arg_2",
                        get_matched_by(
                            V, "arg_3",
                            get_matched_by(control_obj, "arg_2",
                                           self.v_control_obj_out)))))

            try:
                V_raising_in = choice(
                    get_matched_by(
                        NP1, "arg_1",
                        get_matches_of(Aux1, "arg_2", self.v_raising_obj_in)))
                V_raising_out = choice(
                    get_matched_by(
                        NP1, "arg_1",
                        get_matches_of(Aux1, "arg_2", self.v_raising_obj_out)))
            except Exception:
                pass
            to = control_obj[0] + " to"

        else:  # adjective control/raising
            V_control_in = choice(self.adj_control_subj_in)
            NP1 = choice(
                get_matches_of(V_control_in, "arg_1", all_common_nouns))
            D1 = choice(get_matched_by(NP1, "arg_1", self.safe_dets))
            Aux1 = return_copula(NP1)
            V = choice(
                get_matches_of(
                    V_control_in, "arg_2",
                    get_matched_by(NP1, "arg_1",
                                   self.all_bare_transitive_verbs)))
            NP2 = choice(get_matches_of(V, "arg_2", all_common_nouns))
            D2 = choice(get_matched_by(NP2, "arg_1", self.safe_dets))
            V_control_out = choice(
                get_matched_by(
                    NP1, "arg_1",
                    get_matched_by(V, "arg_2", self.adj_control_subj_out)))
            V_raising_in = choice(self.adj_raising_subj_in)
            V_raising_out = choice(self.adj_raising_subj_out)
            to = "to"

        Ds = []
        option = random.choice([
            1, 2, 3
        ])  # There are three in-domain configurations (arbitrarily chosen)
        if option == 1:
            Ds.append(("the", "a", D1[0], D2[0]))
            Ds.append(("a", "the", D1[0], D2[0]))
        elif option == 2:
            Ds.append(("the", D_trans_2[0], D1[0], "a"))
            Ds.append(("a", D_trans_2[0], D1[0], "the"))
        else:
            Ds.append((D_trans_1[0], "the", D1[0], "a"))
            Ds.append((D_trans_1[0], "a", D1[0], "the"))

        option = random.choice([
            1, 2, 3
        ])  # There are three out-domain configurations (arbitrarily chosen)
        if option == 1:
            Ds.append(("the", D_trans_2[0], "a", D2[0]))
            Ds.append(("a", D_trans_2[0], "the", D2[0]))
        elif option == 2:
            Ds.append((D_trans_1[0], "the", "a", D2[0]))
            Ds.append((D_trans_1[0], "a", "the", D2[0]))
        else:
            Ds.append((D_trans_1[0], D_trans_2[0], "the", "a"))
            Ds.append((D_trans_1[0], D_trans_2[0], "a", "the"))

        data = self.build_paradigm(
            training_1_1=" ".join([
                S1_abs, "and", "%s", NP1[0], Aux1[0], V_control_in[0], to,
                V[0], "%s", NP2[0], "."
            ]) % Ds[0],
            training_0_0=" ".join([
                S1_abs, "and", "%s", NP1[0], Aux1[0], V_raising_in[0], to,
                V[0], "%s", NP2[0], "."
            ]) % Ds[1],
            control_1_0=" ".join([
                S1_abs, "and", "%s", NP1[0], Aux1[0], V_control_in[0], to,
                V[0], "%s", NP2[0], "."
            ]) % Ds[1],
            control_0_1=" ".join([
                S1_abs, "and", "%s", NP1[0], Aux1[0], V_raising_in[0], to,
                V[0], "%s", NP2[0], "."
            ]) % Ds[0],
            test_1_0=" ".join([
                S1_abs, "and", "%s", NP1[0], Aux1[0], V_control_out[0], to,
                V[0], "%s", NP2[0], "."
            ]) % Ds[3],
            test_0_1=" ".join([
                S1_abs, "and", "%s", NP1[0], Aux1[0], V_raising_out[0], to,
                V[0], "%s", NP2[0], "."
            ]) % Ds[2],
            control_1_1=" ".join([
                S1_abs, "and", "%s", NP1[0], Aux1[0], V_control_out[0], to,
                V[0], "%s", NP2[0], "."
            ]) % Ds[2],
            control_0_0=" ".join([
                S1_abs, "and", "%s", NP1[0], Aux1[0], V_raising_out[0], to,
                V[0], "%s", NP2[0], "."
            ]) % Ds[3],
        )

        track_sentence = [
            (NP1[0], Aux1[0], V_control_in[0], to, V[0], NP2[0], "."),
            (NP1[0], Aux1[0], V_raising_in[0], to, V[0], NP2[0], "."),
            (NP1[0], Aux1[0], V_control_out[0], to, V[0], NP2[0], "."),
            (NP1[0], Aux1[0], V_raising_out[0], to, V[0], NP2[0], "."),
            (NP1[0], Aux1[0], V_control_out[0], to, V[0], NP2[0], "."),
            (NP1[0], Aux1[0], V_raising_out[0], to, V[0], NP2[0], ".")
        ]

        return data, track_sentence
 def get_pronouns(self):
     r = random.random(
     )  # randomly select either a nominative pronoun, possessive determiner, or possessive pronoun
     if r < 1 / 3:  # nominative pronoun
         first = choice(np.intersect1d(self.first, self.nom_pronoun))
         non_first = choice(np.setdiff1d(self.nom_pronoun, self.first))
         first_acc = choice(
             get_all_conjunctive([("person", first["person"]),
                                  ("sg", first["sg"])], self.acc_pronoun))
         non_first_acc = choice(
             get_all_conjunctive([("person", non_first["person"]),
                                  ("sg", non_first["sg"])],
                                 self.acc_pronoun))
     elif r < 2 / 3:  # possessive det
         noun = choice(self.possessible_animates)
         first_det = choice(get_all("person", "1", self.poss_det))
         first = noun.copy()
         first[0] = first_det[0] + " " + first[0]
         non_first_det = choice(np.setdiff1d(self.poss_det, self.first))
         non_first = noun.copy()
         non_first[0] = non_first_det[0] + " " + non_first[0]
         first_acc = choice(
             get_all_conjunctive([("person", first_det["person"]),
                                  ("sg", first_det["sg"])],
                                 self.acc_pronoun))
         non_first_acc = choice(
             get_all_conjunctive([("person", non_first_det["person"]),
                                  ("sg", non_first_det["sg"])],
                                 self.acc_pronoun))
     else:  # possessive pronoun
         first = choice(get_all("person", "1", self.poss_pronoun))
         non_first = choice(np.setdiff1d(self.poss_pronoun, self.first))
         first_acc = choice(
             get_all_conjunctive([("person", first["person"]),
                                  ("sg", first["sg"])], self.acc_pronoun))
         non_first_acc = choice(
             get_all_conjunctive([("person", non_first["person"]),
                                  ("sg", non_first["sg"])],
                                 self.acc_pronoun))
         vals = ["1", "0"]
         sg = random.choice(
             ["1", "0"]
         )  # Possessive pronouns can have either singular or plural agreement, irrespective of person/number marking
         vals.remove(sg)
         pl = vals[0]
         first["sg"] = sg
         first["pl"] = pl
         non_first["sg"] = sg
         non_first["pl"] = pl
     return first, non_first, first_acc, non_first_acc
Beispiel #8
0
    def sample(self):
        """
        Training 1/1
        The girl saw a cat and John is the tall man.
        The girl saw a cat and the tall man is in the room.
        The girl saw a cat and the man is tall.
        TThe girl saw a cat and the man in the room is tall.

        Training 0/0
        A girl saw a cat and John is a man.
        A girl saw a cat and John is the man in a room.
        A girl saw a cat and a man is John.

        Test 1/0
        A girl saw a cat and John is a tall man in a room.
        A girl saw a cat and John is tall.
        A girl saw a cat and a tall man is John.
        A girl saw a cat and a tall man in a room is John.
        A girl saw a cat and a tall man is president.
        A girl saw a cat and a tall man in the room is president.

        Test 0/1
        The girl saw a cat and John is in the room.
        The girl saw a cat and The man is in the room.
        The girl saw a cat and The man in the room is John.
        The girl saw a cat and John is president.
        The girl saw a cat and The man is president.
        The girl saw a cat and the man in the room is president.

        Control 1/1
        The girl saw a cat and John is a tall man in a room.
        The girl saw a cat and John is tall.
        The girl saw a cat and a tall man is John.
        The girl saw a cat and a tall man in a room is John.
        The girl saw a cat and a tall man is president.
        The girl saw a cat and a tall man in the room is president.

        Control 0/0
        A girl saw a cat and John is in a room.
        A girl saw a cat and a man is in a room.
        A girl saw a cat and a man in a room is John.
        A girl saw a cat and John is president.
        A girl saw a cat and a man is president.
        A girl saw a cat and a man in a room is president.
        """
        v_trans = choice(all_transitive_verbs)
        subj = choice(get_matches_of(v_trans, "arg_1", all_common_nouns))
        aux = return_aux(v_trans, subj)
        D_subj = choice(get_matched_by(subj, "arg_1", self.safe_determiners))
        obj = choice(get_matches_of(v_trans, "arg_2", all_common_nouns))
        D_obj = choice(get_matched_by(obj, "arg_1", self.safe_determiners))
        S1 = " ".join(
            [D_subj[0], subj[0], aux[0], v_trans[0], D_obj[0], obj[0], "and"])
        S1_the_subj = " ".join(
            ["the", subj[0], aux[0], v_trans[0], D_obj[0], obj[0], "and"])
        S1_the_obj = " ".join(
            [D_subj[0], subj[0], aux[0], v_trans[0], "the", obj[0], "and"])
        name_in = choice(self.names_in_domain)
        name_out = choice(self.names_out_domain)
        noun_in = choice(
            np.array(
                list(
                    filter(
                        lambda x: x["gender"] == name_in["gender"] or x[
                            "gender"] == "n" or x["gender"] == "",
                        self.common_nouns_in_domain))))
        noun_out = choice(
            np.array(
                list(
                    filter(
                        lambda x: x["gender"] == name_out["gender"] or x[
                            "gender"] == "n" or x["gender"] == "",
                        self.common_nouns_out_domain))))
        D_in = choice(get_matched_by(noun_in, "arg_1", self.a))
        D_out = choice(get_matched_by(noun_out, "arg_1", self.a))
        adj_in = choice(self.adjs_in_domain)
        adj_out = choice(self.adjs_out_domain)
        locative_in = build_locative(choice(self.locales_in_domain),
                                     allow_quantifiers=False,
                                     avoid=self.the)
        locative_out = build_locative(choice(self.locales_out_domain),
                                      allow_quantifiers=False,
                                      avoid=self.the)
        other_noun = choice(
            np.array(
                list(
                    filter(
                        lambda x: x["gender"] == name_out["gender"] or x[
                            "gender"] == "n", self.one_word_noun))))

        track_sentence = [
            (name_in[0], noun_in[0], adj_in[0], locative_in[0]),
            (name_in[0], noun_in[0], adj_in[0], locative_in[0]),
            (name_in[0], noun_in[0], adj_in[0], locative_in[0]),
            (name_in[0], noun_in[0], adj_in[0], locative_in[0]),
            (name_in[0], noun_in[0], adj_in[0], locative_in[0]),
            (name_in[0], noun_in[0], adj_in[0], locative_in[0]),
        ]

        # Training_1_1
        option = random.randint(0, 7)
        (D, S) = ("the", S1) if option < 4 else (D_in[0], S1_the_obj)
        if option % 4 == 0:
            training_1 = " ".join(
                [S, name_in[0], "is", D, adj_in[0], noun_in[0]])
        elif option % 4 == 1:
            training_1 = " ".join(
                [S, D, adj_in[0], noun_in[0], "is", locative_in[0]])
        elif option % 4 == 2:
            training_1 = " ".join([S, D, noun_in[0], "is", adj_in[0]])
        else:
            training_1 = " ".join(
                [S, D, noun_in[0], locative_in[0], "is", adj_in[0]])

        # Training_0_0
        option = random.choice([1, 2, 3])
        if option == 1:
            training_0 = " ".join([S1, name_in[0], "is", D_in[0], noun_in[0]])
        elif option == 2:
            training_0 = " ".join(
                [S1, name_in[0], "is", D_in[0], noun_in[0], locative_in[0]])
        else:
            training_0 = " ".join([S1, D_in[0], noun_in[0], "is", name_in[0]])

        # Control_1_0
        option = random.randint(0, 3)
        if option == 0:
            control_1_0 = " ".join(
                [S, name_in[0], "is", D_in[0], adj_in[0], noun_in[0]])
        elif option == 1:
            control_1_0 = " ".join(
                [S, D_in[0], adj_in[0], noun_in[0], "is", locative_in[0]])
        elif option == 2:
            control_1_0 = " ".join([S, D_in[0], noun_in[0], "is", adj_in[0]])
        else:
            control_1_0 = " ".join(
                [S, D_in[0], noun_in[0], locative_in[0], "is", adj_in[0]])

        # Control_0_1
        option = random.randint(0, 5)
        (D, S) = ("the", S1) if option < 3 else (D_in[0], S1_the_obj)
        if option % 3 == 1:
            control_0_1 = " ".join([S, name_in[0], "is", D, noun_in[0]])
        elif option % 3 == 2:
            control_0_1 = " ".join(
                [S, name_in[0], "is", D, noun_in[0], locative_in[0]])
        else:
            control_0_1 = " ".join([S, D, noun_in[0], "is", name_in[0]])

        # Test_1_0
        option = random.choice([1, 2, 3, 4, 5])
        if option == 1:
            test_1_0 = " ".join([
                S1, name_out[0], "is", D_out[0], adj_out[0], noun_out[0],
                locative_out[0]
            ])
        elif option == 2:
            test_1_0 = " ".join(
                [S1, D_out[0], adj_out[0], noun_out[0], "is", name_out[0]])
        elif option == 3:
            test_1_0 = " ".join([
                S1, D_out[0], adj_out[0], noun_out[0], locative_out[0], "is",
                name_out[0]
            ])
        elif option == 4:
            test_1_0 = " ".join(
                [S1, D_out[0], adj_out[0], noun_out[0], "is", other_noun[0]])
        else:
            test_1_0 = " ".join([
                S1, D_out[0], adj_out[0], noun_out[0], locative_out[0], "is",
                other_noun[0]
            ])

        # Control_1_1
        option = random.randint(0, 9)
        (D, S) = ("the", S1) if option < 5 else (D_out[0], S1_the_subj)
        if option % 5 == 0:
            control_1_1 = " ".join([
                S, name_out[0], "is", D, adj_out[0], noun_out[0],
                locative_out[0]
            ])
        elif option % 5 == 1:
            control_1_1 = " ".join(
                [S, D, adj_out[0], noun_out[0], "is", name_out[0]])
        elif option % 5 == 2:
            control_1_1 = " ".join([
                S, D, adj_out[0], noun_out[0], locative_out[0], "is",
                name_out[0]
            ])
        elif option % 5 == 3:
            control_1_1 = " ".join(
                [S, D, adj_out[0], noun_out[0], "is", other_noun[0]])
        else:
            control_1_1 = " ".join([
                S, D, adj_out[0], noun_out[0], locative_out[0], "is",
                other_noun[0]
            ])

        # Test_0_1
        option = random.randint(0, 7)
        (D, S) = ("the", S1) if option < 4 else (D_out[0], S1_the_subj)
        if option % 4 == 0:
            test_0_1 = " ".join([S, D, noun_out[0], "is", locative_out[0]])
        elif option % 4 == 1:
            test_0_1 = " ".join(
                [S, D, noun_out[0], locative_out[0], "is", name_out[0]])
        elif option % 4 == 2:
            test_0_1 = " ".join([S, D, noun_out[0], "is", other_noun[0]])
        else:
            test_0_1 = " ".join(
                [S, D, noun_out[0], locative_out[0], "is", other_noun[0]])

        # Control_0_0
        option = random.choice([1, 2, 3, 4, 5, 6])
        if option == 1:
            control_0_0 = " ".join([S1, name_out[0], "is", locative_out[0]])
        elif option == 2:
            control_0_0 = " ".join(
                [S1, D_out[0], noun_out[0], "is", locative_out[0]])
        elif option == 3:
            control_0_0 = " ".join([
                S1, D_out[0], noun_out[0], locative_out[0], "is", name_out[0]
            ])
        elif option == 4:
            control_0_0 = " ".join([S1, name_out[0], "is", other_noun[0]])
        elif option == 5:
            control_0_0 = " ".join(
                [S1, D_out[0], noun_out[0], "is", other_noun[0]])
        else:
            control_0_0 = " ".join([
                S1, D_out[0], noun_out[0], locative_out[0], "is", other_noun[0]
            ])

        data = self.build_paradigm(
            training_1_1=training_1 + ".",
            training_0_0=training_0 + ".",
            control_1_0=control_1_0 + ".",
            control_0_1=control_0_1 + ".",
            test_1_0=test_1_0 + ".",
            test_0_1=test_0_1 + ".",
            control_1_1=control_1_1 + ".",
            control_0_0=control_0_0 + ".",
        )
        return data, track_sentence
    def sample_verb(self):
        V_trans = choice(self.all_possibly_singular_transitive_verbs)
        NP_trans_1 = choice(
            get_matches_of(V_trans, "arg_1", self.all_singular_common_nouns))
        NP_trans_2 = choice(
            get_matches_of(V_trans, "arg_2", self.all_singular_common_nouns))
        D_trans_1 = choice(get_matched_by(NP_trans_1, "arg_1", self.safe_dets))
        D_trans_2 = choice(get_matched_by(NP_trans_2, "arg_1", self.safe_dets))
        Aux_trans = return_aux(V_trans, NP_trans_1)
        S1_abs = " ".join([
            "%s", NP_trans_1[0], Aux_trans[0], V_trans[0], "%s", NP_trans_2[0]
        ])
        V1 = choice(self.in_domain_verbs_main)
        V1_ant = get_same_V_form(V1["antonym"], V1)
        V1_other = get_same_V_form(V1["synonym_hypernym_hyponym"], V1)
        Subj1 = choice(
            get_matches_of(
                V1, "arg_1",
                get_matches_of(
                    V1_ant, "arg_1",
                    get_matches_of(V1_other, "arg_1",
                                   self.all_singular_common_nouns))))
        Subj2 = choice(
            get_matches_of(
                V1_ant, "arg_1",
                get_matches_of(V1_other, "arg_1",
                               self.all_singular_common_nouns)))
        D1 = choice(get_matched_by(Subj1, "arg_1", self.safe_dets))
        D2 = choice(get_matched_by(Subj2, "arg_1", self.safe_dets))
        Aux1 = return_aux(V1, Subj1, allow_negated=False)
        Aux2 = return_aux(V1_ant, Subj2, allow_negated=False)
        if V1["category"] == "(S\\NP)/NP":  # If the antonym is transitive, we need to generate objects that match all relevant verb forms
            for _ in range(10):
                Obj1 = N_to_DP_mutate(choice(
                    get_matches_of(
                        V1, "arg_2",
                        get_matches_of(
                            V1_ant, "arg_2",
                            get_matches_of(V1_other, "arg_2", all_nouns)))),
                                      avoid=all_very_common_dets)
                Obj2 = N_to_DP_mutate(choice(
                    get_matches_of(
                        V1, "arg_2",
                        get_matches_of(
                            V1_ant, "arg_2",
                            get_matches_of(V1_other, "arg_2", all_nouns)))),
                                      avoid=all_very_common_dets)
                try:
                    V2 = choice(
                        get_matches_of(
                            Aux1, "arg_2",
                            get_matched_by(
                                Subj1, "arg_1",
                                get_matched_by(
                                    Obj1, "arg_2",
                                    self.out_domain_transitive_verbs_main))))
                except Exception:
                    raise MatchNotFoundError(
                        "fail to find V: %s %s %s %s %s" %
                        (V1[0], Subj1[0], Subj2[0], Obj1[0], Obj2[0]))
                V2 = get_same_V_form(V2["root"], V1)
                V2_ant = get_same_V_form(V2["antonym"], V2)
                V2_other = get_same_V_form(V2["synonym_hypernym_hyponym"], V2)
                if is_match_disj(Obj2, V2_ant["arg_2"]) and is_match_disj(Obj2, V2_other["arg_2"]) and \
                        is_match_disj(Subj2, V2_ant["arg_1"]) and is_match_disj(Subj2, V2_other["arg_1"]):
                    break
                else:
                    print("fail to match: %s %s %s %s %s %s" %
                          (V1[0], V2[0], Subj1[0], Subj2[0], Obj1[0], Obj2[0]))
        elif V1["category"] == "S\\NP":  # If the antonym is intransitive, no objects
            try:
                V2 = choice(
                    get_matches_of(
                        Aux1, "arg_2",
                        get_matched_by(
                            Subj1, "arg_1",
                            self.out_domain_intransitive_verbs_main)))
            except Exception:
                raise MatchNotFoundError("fail to find V: %s %s %s" %
                                         (V1[0], Subj1[0], Subj2[0]))
            V2 = get_same_V_form(V2["root"], V1)
            V2_ant = get_same_V_form(V2["antonym"], V2)
            V2_other = get_same_V_form(V2["synonym_hypernym_hyponym"], V2)
            Obj1 = self.empty  # No object: this is an empty string
            Obj2 = self.empty  # No object: this is an empty string
            if is_match_disj(Subj2, V2_ant["arg_1"]) and is_match_disj(
                    Subj2, V2_other["arg_1"]):
                pass
            else:
                raise MatchNotFoundError("fail to match: %s %s %s %s" %
                                         (V1[0], V2[0], Subj1[0], Subj2[0]))

        Ds = []
        option = random.choice([
            1, 2, 3
        ])  # There are three in-domain configurations (arbitrarily chosen)
        if option == 1:
            Ds.append(("the", "a", D1[0], D2[0]))
            Ds.append(("a", "the", D1[0], D2[0]))
        elif option == 2:
            Ds.append(("the", D_trans_2[0], D1[0], "a"))
            Ds.append(("a", D_trans_2[0], D1[0], "the"))
        else:
            Ds.append((D_trans_1[0], "the", D1[0], "a"))
            Ds.append((D_trans_1[0], "a", D1[0], "the"))

        option = random.choice([
            1, 2, 3
        ])  # There are three out-domain configurations (arbitrarily chosen)
        if option == 1:
            Ds.append(("the", D_trans_2[0], "a", D2[0]))
            Ds.append(("a", D_trans_2[0], "the", D2[0]))
        elif option == 2:
            Ds.append((D_trans_1[0], "the", "a", D2[0]))
            Ds.append((D_trans_1[0], "a", "the", D2[0]))
        else:
            Ds.append((D_trans_1[0], D_trans_2[0], "the", "a"))
            Ds.append((D_trans_1[0], D_trans_2[0], "a", "the"))

        data = self.build_paradigm(
            training_1_1=" ".join([
                S1_abs, "and", "%s", Subj1[0], Aux1[0], V1[0], Obj1[0], "and",
                "%s", Subj2[0], Aux2[0], V1_ant[0], Obj2[0], "."
            ]) % Ds[0],
            training_0_0=" ".join([
                S1_abs, "and", "%s", Subj1[0], Aux1[0], V1[0], Obj1[0], "and",
                "%s", Subj2[0], Aux2[0], V1_other[0], Obj2[0], "."
            ]) % Ds[1],
            control_1_0=" ".join([
                S1_abs, "and", "%s", Subj1[0], Aux1[0], V1[0], Obj1[0], "and",
                "%s", Subj2[0], Aux2[0], V1_ant[0], Obj2[0], "."
            ]) % Ds[1],
            control_0_1=" ".join([
                S1_abs, "and", "%s", Subj1[0], Aux1[0], V1[0], Obj1[0], "and",
                "%s", Subj2[0], Aux2[0], V1_other[0], Obj2[0], "."
            ]) % Ds[0],
            test_1_0=" ".join([
                S1_abs, "and", "%s", Subj1[0], Aux1[0], V2[0], Obj1[0], "and",
                "%s", Subj2[0], Aux2[0], V2_ant[0], Obj2[0], "."
            ]) % Ds[3],
            test_0_1=" ".join([
                S1_abs, "and", "%s", Subj1[0], Aux1[0], V2[0], Obj1[0], "and",
                "%s", Subj2[0], Aux2[0], V2_other[0], Obj2[0], "."
            ]) % Ds[2],
            control_1_1=" ".join([
                S1_abs, "and", "%s", Subj1[0], Aux1[0], V2[0], Obj1[0], "and",
                "%s", Subj2[0], Aux2[0], V2_ant[0], Obj2[0], "."
            ]) % Ds[2],
            control_0_0=" ".join([
                S1_abs, "and", "%s", Subj1[0], Aux1[0], V2[0], Obj1[0], "and",
                "%s", Subj2[0], Aux2[0], V2_other[0], Obj2[0], "."
            ]) % Ds[3],
        )
        track_sentence = [
            (Subj1[0], V1[0], Obj1[0], Subj2[0], V1_ant[0], Obj2[0], "."),
            (Subj1[0], V1[0], Obj1[0], Subj2[0], V1_other[0], Obj2[0], "."),
            (Subj1[0], V2[0], Obj1[0], Subj2[0], V2_ant[0], Obj2[0], "."),
            (Subj1[0], V2[0], Obj1[0], Subj2[0], V2_ant[0], Obj2[0], "."),
            (Subj1[0], V2[0], Obj1[0], Subj2[0], V2_ant[0], Obj2[0], "."),
            (Subj1[0], V2[0], Obj1[0], Subj2[0], V2_ant[0], Obj2[0], "."),
        ]
        return data, track_sentence
Beispiel #10
0
    def sample_modified_NP(self):
        # Training 1
        # Every man who      read the  book      told a    boy to   see the  same  movie.
        # D1_1  NP1 rel Aux1 V1   D2_1 NP2  Aux2 V2   D3_1 NP3 Aux3 V3  D4_1 recip NP4

        # Training 0
        # The  man who      read every book      told a    boy to   see the  same  movie.
        # D2_2 NP1 rel Aux1 V1   D2_2  NP2  Aux2 V2   D3_2 NP3 Aux3 V3  D4_2 recip NP4

        # Test 1
        # The  man      told every boy  reading the  book to   see the  same  movie.
        # D1_3 NP1 Aux2 V2   D3_3  NP3  V1ing   D2_3 NP2  Aux3 V3  D4_3 recip NP4

        # Test 0
        # The  man      told that boy  reading every book to   see the  same  movie.
        # D1_4 NP1 Aux2 V2   D3_4 NP3  V1ing   D2_4  NP2  Aux3 V3  D4_4 recip NP4

        V2 = choice(self.embedding_verbs)
        NP1 = choice(
            get_matches_of(V2, "arg_1", self.all_singular_common_nouns))
        Aux2 = return_aux(V2, NP1, allow_negated=False)
        rel = choice(get_matched_by(NP1, "arg_1", all_relativizers))

        if V2["category_2"] == "V_control_object":
            NP3 = choice(
                get_matches_of(V2, "arg_2", self.all_singular_common_nouns))
            V3 = choice(
                get_matches_of(V2, "arg_3", self.all_bare_transitive_verbs))
            Aux3 = self.to

        elif V2["category_2"] == "V_raising_object":
            V3 = choice(self.all_bare_transitive_verbs)
            Aux3 = self.to
            NP3 = choice(
                get_matches_of(V3, "arg_1", self.all_singular_common_nouns))
        else:  # clause embedding verb
            V2[0] = V2[0] + " that"
            V3 = choice(self.all_possibly_singular_transitive_verbs)
            NP3 = choice(
                get_matches_of(V3, "arg_1", self.all_singular_common_nouns))
            Aux3 = return_aux(V3, NP3, allow_negated=False)

        try:
            NP4 = choice(
                get_matches_of(V3, "arg_2", self.all_singular_common_nouns))
        except Exception:
            pass

        V1 = choice(
            get_matched_by(
                NP1, "arg_1",
                get_matched_by(NP3, "arg_1",
                               self.possibly_ing_transitive_verbs)))
        try:
            V1ing = choice(get_all("ing", "1", get_all("root", V1["root"])))
        except Exception:
            pass
        NP2 = choice(
            get_matches_of(V1, "arg_2", self.all_singular_common_nouns))
        Aux1 = return_aux(V1, NP1, allow_negated=False)

        recip = random.choice(["the same", "a different"])
        try:
            D1 = choice(get_matched_by(NP1, "arg_1", self.singular_indefs))
            D2 = choice(get_matched_by(NP2, "arg_1", self.singular_indefs))
            D3 = choice(get_matched_by(NP3, "arg_1", self.singular_indefs))
            D4 = choice(get_matched_by(NP4, "arg_1", self.singular_indefs))
        except Exception:
            pass

        # There are four possible patterns for training example with label 1
        Ds = []
        option = random.choice([1, 2, 3])
        if option == 1:
            Ds.append(["every", D2[0], recip, D4[0]])
        elif option == 2:
            Ds.append(["every", D2[0], D3[0], recip])
        elif option == 3:
            Ds.append([D1[0], D2[0], "every", recip])

        # There are two possible patterns for training example with label 0
        option = random.choice([1, 2])
        if option == 1:
            Ds.append([D1[0], "every", recip, D4[0]])
        elif option == 2:
            Ds.append([D1[0], "every", D3[0], recip])

        # There are four possible patterns for test example with label 1
        option = random.choice([1, 2, 3, 4])
        if option == 1:
            Ds.append(["every", recip, D2[0], D4[0]])
        elif option == 2:
            Ds.append(["every", D3[0], recip, D4[0]])
        elif option == 3:
            Ds.append(["every", D3[0], D2[0], recip])
        elif option == 4:
            Ds.append([D1[0], "every", D2[0], recip])

        # There's only one possible pattern for test example with label 0
        Ds.append([D1[0], D2[0], "every", recip])

        data = self.build_paradigm(
            training_1_1=" ".join([
                Ds[0][0], NP1[0], rel[0], Aux1[0], V1[0], Ds[0][1], NP2[0],
                Aux2[0], V2[0], Ds[0][2], NP3[0], Aux3[0], V3[0], Ds[0][3],
                NP4[0], "."
            ]),
            training_0_0=" ".join([
                Ds[1][0], NP1[0], rel[0], Aux1[0], V1[0], Ds[1][1], NP2[0],
                Aux2[0], V2[0], Ds[1][2], NP3[0], Aux3[0], V3[0], Ds[1][3],
                NP4[0], "."
            ]),
            test_1_0=" ".join([
                Ds[2][0], NP1[0], Aux2[0], V2[0], Ds[2][1], NP3[0], V1ing[0],
                Ds[2][2], NP2[0], Aux3[0], V3[0], Ds[2][3], NP4[0], "."
            ]),
            test_0_1=" ".join([
                Ds[3][0], NP1[0], Aux2[0], V2[0], Ds[3][1], NP3[0], V1ing[0],
                Ds[3][2], NP2[0], Aux3[0], V3[0], Ds[3][3], NP4[0], "."
            ]))

        track_sentence = [(NP1[0], V1[0], NP2[0], V2[0], NP3[0], V3[0], NP4[0],
                           recip)]

        return data, track_sentence
Beispiel #11
0
    get_all("frequent", "1"))
any_decoys = np.concatenate(
    (get_all("expression", "the"),
     get_all_conjunctive([("expression", "that"),
                          ("category_2", "D")]), get_all("expression", "this"),
     get_all("expression", "these"), get_all("expression", "those")))
# sample sentences until desired number
while len(sentences) < number_to_generate:
    # sentence template
    # D1     N1   who V1      any/the/D2    N2      V2    any/the/D3  N3
    # every  boy  who bought  any/the/some  apples  sang  any/the/a   song

    # build all lexical items
    #TODO: throw in modifiers
    try:
        N1 = choice(all_animate_nouns)
        D1_up = choice(get_matched_by(N1, "arg_1", all_UE_UE_quantifiers))
        D1_down = choice(get_matched_by(N1, "arg_1", all_DE_UE_quantifiers))
        V1 = choice(get_matched_by(N1, "arg_1", all_transitive_verbs))
        V1 = conjugate(V1, N1, allow_negated=False)
        N2 = choice(get_matches_of(V1, "arg_2", all_non_singular_nouns), [N1])
        D2 = choice(
            get_matched_by(N2, "arg_1",
                           all_UE_UE_quantifiers), [D1_up, D1_down]
        )  # restrict to UE quantifiers, otherwise there could be another licensor
        V2 = choice(get_matched_by(N1, "arg_1", all_transitive_verbs), [V1])
        V2 = conjugate(V2, N1, allow_negated=False)
        N3 = choice(get_matches_of(V2, "arg_2", all_non_singular_nouns),
                    [N1, N2])
        D3 = choice(get_matched_by(N3, "arg_1", all_UE_UE_quantifiers),
                    [D1_up, D1_down])
Beispiel #12
0
    def sample_coordination(self):
        # Training 1
        # A    man      slept or every girl      helped the  same  dog.
        # D1_1 NP1 Aux1 IV    OR D2_1  NP2  Aux2 V2     D3_1       NP3

        # Training
        # Every man      slept or the  same girl      helped a     dog.
        # D1_2  NP1 Aux1 IV    OR D2_2      NP2  Aux2 V2     D3_2  NP3

        # Test 1
        # Every man      loved a    girl or      helped the same dog.
        # D1_3  NP1 Aux1 TV    D2_3 NP2  OR Aux2 V2     D3_3     NP3

        # Test 0
        # A    man      loved every girl or      helped the same dog.
        # D1_4 NP1 Aux1 TV    D2_4  NP2  OR Aux2 V2     D3_4     NP3

        IV = choice(self.all_singular_intransitive_verbs)
        try:
            NP1 = choice(
                get_matches_of(IV, "arg_1", self.all_singular_common_nouns))
        except Exception:
            pass
        Aux1 = return_aux(IV, NP1, allow_negated=False)
        TV = choice(
            get_matched_by(NP1, "arg_1",
                           get_matches_of(Aux1, "arg_2",
                                          all_transitive_verbs)))
        NP2 = choice(
            get_matches_of(TV, "arg_2", self.all_singular_common_nouns))
        V2 = choice(
            get_matched_by(NP1, "arg_1",
                           get_matched_by(NP2, "arg_1", all_transitive_verbs)))
        Aux2 = return_aux(V2, NP2, allow_negated=False)
        NP3 = choice(
            get_matches_of(V2, "arg_2", self.all_singular_common_nouns))

        recip = random.choice(["the same", "a different"])
        D1 = choice(get_matched_by(NP1, "arg_1", self.singular_indefs))
        D2 = choice(get_matched_by(NP2, "arg_1", self.singular_indefs))
        D3 = choice(get_matched_by(NP3, "arg_1", self.singular_indefs))

        reverse = bool(random.choice([0, 1]))

        Ds = []
        # There is one possible pattern for training example with label 1
        Ds.append([D1[0], "every", recip])

        # There are two possible patterns for training example with label 0
        if reverse:
            option = random.choice([1, 2])
            if option == 1:
                Ds.append(["every", recip, D3[0]])
            elif option == 2:
                Ds.append(["every", D2[0], recip])
        else:
            option = random.choice([1, 2])
            if option == 1:
                Ds.append([recip, "every", D3[0]])
            elif option == 2:
                Ds.append([recip, D2[0], "every"])

        # There are two possible patterns for test example with label 1
        option = random.choice([1, 2])
        if option == 1:
            Ds.append(["every", recip, D3[0]])
        elif option == 2:
            Ds.append(["every", D2[0], recip])

        # There's only one possible pattern for test example with label 0
        Ds.append([D1[0], "every", recip])

        # We can reverse the order of the clauses in the training example for variety
        clause_1_a = " ".join([Ds[0][0], NP1[0], Aux1[0], IV[0]])
        clause_1_b = " ".join(
            [Ds[0][1], NP2[0], Aux2[0], V2[0], Ds[0][2], NP3[0]])
        clause_0_a = " ".join([Ds[1][0], NP1[0], Aux1[0], IV[0]])
        clause_0_b = " ".join(
            [Ds[1][1], NP2[0], Aux2[0], V2[0], Ds[1][2], NP3[0]])

        if reverse:
            training_1_1 = "%s or %s." % (clause_1_a, clause_1_b)
            training_0_0 = "%s or %s." % (clause_0_a, clause_0_b)
        else:
            training_1_1 = "%s or %s." % (clause_1_b, clause_1_a)
            training_0_0 = "%s or %s." % (clause_0_b, clause_0_a)

        data = self.build_paradigm(training_1_1=training_1_1,
                                   training_0_0=training_0_0,
                                   test_1_0=" ".join([
                                       Ds[2][0], NP1[0], Aux1[0], TV[0],
                                       Ds[2][1], NP2[0], "or", Aux2[0], V2[0],
                                       Ds[2][2], NP3[0], "."
                                   ]),
                                   test_0_1=" ".join([
                                       Ds[3][0], NP1[0], Aux1[0], TV[0],
                                       Ds[3][1], NP2[0], "or", Aux2[0], V2[0],
                                       Ds[3][2], NP3[0], "."
                                   ]))

        track_sentence = [(NP1[0], NP2[0], NP3[0], IV[0], TV[0], V2[0], recip)]

        return data, track_sentence
Beispiel #13
0
    def sample_verb(self):
        V_trans = choice(all_transitive_verbs)
        NP_trans_1 = choice(get_matches_of(V_trans, "arg_1", all_common_nouns))
        NP_trans_2 = choice(get_matches_of(V_trans, "arg_2", all_common_nouns))
        D_trans_1 = choice(get_matched_by(NP_trans_1, "arg_1", self.safe_dets))
        D_trans_2 = choice(get_matched_by(NP_trans_2, "arg_1", self.safe_dets))
        Aux_trans = return_aux(V_trans, NP_trans_1)
        S1_the_subj = " ".join([
            "the", NP_trans_1[0], Aux_trans[0], V_trans[0], D_trans_2[0],
            NP_trans_2[0]
        ])
        S1_the_obj = " ".join([
            D_trans_1[0], NP_trans_1[0], Aux_trans[0], V_trans[0], "the",
            NP_trans_2[0]
        ])
        S1 = " ".join([
            D_trans_1[0], NP_trans_1[0], Aux_trans[0], V_trans[0],
            D_trans_2[0], NP_trans_2[0]
        ])
        V1 = choice(self.in_domain_verbs_main)
        try:
            V1_ant = get_same_V_form(V1["antonym"], V1)
            V1_other = get_same_V_form(V1["synonym_hypernym_hyponym"], V1)
        except Exception:
            pass
        Subj1 = choice(
            get_matches_of(
                V1, "arg_1",
                get_matches_of(
                    V1_ant, "arg_1",
                    get_matches_of(V1_other, "arg_1", all_common_nouns))))
        Subj2 = choice(
            get_matches_of(V1_ant, "arg_1",
                           get_matches_of(V1_other, "arg_1",
                                          all_common_nouns)))
        D1 = choice(get_matched_by(Subj1, "arg_1", self.safe_dets))
        D2 = choice(get_matched_by(Subj2, "arg_1", self.safe_dets))
        Aux1 = return_aux(V1, Subj1, allow_negated=False)
        Aux2 = return_aux(V1_ant, Subj2, allow_negated=False)
        if V1["category"] == "(S\\NP)/NP":  # If the antonym is transitive, we need to generate objects that match all relevant verb forms
            for _ in range(10):
                Obj1 = N_to_DP_mutate(choice(
                    get_matches_of(
                        V1, "arg_2",
                        get_matches_of(
                            V1_ant, "arg_2",
                            get_matches_of(V1_other, "arg_2", all_nouns)))),
                                      avoid=self.the)
                Obj2 = N_to_DP_mutate(choice(
                    get_matches_of(
                        V1, "arg_2",
                        get_matches_of(
                            V1_ant, "arg_2",
                            get_matches_of(V1_other, "arg_2", all_nouns)))),
                                      avoid=self.the)
                try:
                    V2 = choice(
                        get_matches_of(
                            Aux1, "arg_2",
                            get_matched_by(
                                Subj1, "arg_1",
                                get_matched_by(
                                    Obj1, "arg_2",
                                    self.out_domain_transitive_verbs_main))))
                except Exception:
                    raise MatchNotFoundError(
                        "fail to find V: %s %s %s %s %s" %
                        (V1[0], Subj1[0], Subj2[0], Obj1[0], Obj2[0]))
                V2 = get_same_V_form(V2["root"], V1)
                V2_ant = get_same_V_form(V2["antonym"], V2)
                V2_other = get_same_V_form(V2["synonym_hypernym_hyponym"], V2)
                if is_match_disj(Obj2, V2_ant["arg_2"]) and is_match_disj(Obj2, V2_other["arg_2"]) and \
                        is_match_disj(Subj2, V2_ant["arg_1"]) and is_match_disj(Subj2, V2_other["arg_1"]):
                    break
                else:
                    print("fail to match: %s %s %s %s %s %s" %
                          (V1[0], V2[0], Subj1[0], Subj2[0], Obj1[0], Obj2[0]))
        elif V1["category"] == "S\\NP":  # If the antonym is intransitive, no objects
            try:
                V2 = choice(
                    get_matches_of(
                        Aux1, "arg_2",
                        get_matched_by(
                            Subj1, "arg_1",
                            self.out_domain_intransitive_verbs_main)))
            except Exception:
                raise MatchNotFoundError("fail to find V: %s %s %s" %
                                         (V1[0], Subj1[0], Subj2[0]))
            V2 = get_same_V_form(V2["root"], V1)
            V2_ant = get_same_V_form(V2["antonym"], V2)
            V2_other = get_same_V_form(V2["synonym_hypernym_hyponym"], V2)
            Obj1 = self.empty  # No object: this is an empty string
            Obj2 = self.empty  # No object: this is an empty string
            if is_match_disj(Subj2, V2_ant["arg_1"]) and is_match_disj(
                    Subj2, V2_other["arg_1"]):
                pass
            else:
                raise MatchNotFoundError("fail to match: %s %s %s %s" %
                                         (V1[0], V2[0], Subj1[0], Subj2[0]))

        if choice([True, False]):
            training_1_1 = " ".join([
                S1_the_subj, "and", D1[0], Subj1[0], Aux1[0], V1[0], Obj1[0],
                "and", D2[0], Subj2[0], Aux2[0], V1_ant[0], Obj2[0], "."
            ])
        else:
            training_1_1 = " ".join([
                "the", Subj1[0], Aux1[0], V1[0], Obj1[0], "and", D2[0],
                Subj2[0], Aux2[0], V1_ant[0], Obj2[0], "and", S1, "."
            ])

        option = random.randint(0, 5)
        if option == 0:
            training_0_0 = " ".join([
                S1_the_obj, "and", D1[0], Subj1[0], Aux1[0], V1[0], Obj1[0],
                "and", D2[0], Subj2[0], Aux2[0], V1_other[0], Obj2[0], "."
            ])
        elif option == 1:
            training_0_0 = " ".join([
                S1, "and", "the", Subj1[0], Aux1[0], V1[0], Obj1[0], "and",
                D2[0], Subj2[0], Aux2[0], V1_other[0], Obj2[0], "."
            ])
        elif option == 2:
            training_0_0 = " ".join([
                S1, "and", D1[0], Subj1[0], Aux1[0], V1[0], Obj1[0], "and",
                "the", Subj2[0], Aux2[0], V1_other[0], Obj2[0], "."
            ])
        elif option == 3:
            training_0_0 = " ".join([
                D1[0], Subj1[0], Aux1[0], V1[0], Obj1[0], "and", "the",
                Subj2[0], Aux2[0], V1_other[0], Obj2[0], "and", S1, "."
            ])
        elif option == 4:
            training_0_0 = " ".join([
                D1[0], Subj1[0], Aux1[0], V1[0], Obj1[0], "and", D2[0],
                Subj2[0], Aux2[0], V1_other[0], Obj2[0], "and", S1_the_subj,
                "."
            ])
        else:
            training_0_0 = " ".join([
                D1[0], Subj1[0], Aux1[0], V1[0], Obj1[0], "and", D2[0],
                Subj2[0], Aux2[0], V1_other[0], Obj2[0], "and", S1_the_obj, "."
            ])

        if choice([True, False]):
            control_0_1 = " ".join([
                S1_the_subj, "and", D1[0], Subj1[0], Aux1[0], V1[0], Obj1[0],
                "and", D2[0], Subj2[0], Aux2[0], V1_other[0], Obj2[0], "."
            ])
        else:
            control_0_1 = " ".join([
                "the", Subj1[0], Aux1[0], V1[0], Obj1[0], "and", D2[0],
                Subj2[0], Aux2[0], V1_other[0], Obj2[0], "and", S1, "."
            ])

        option = random.randint(0, 5)
        if option == 0:
            control_1_0 = " ".join([
                S1_the_obj, "and", D1[0], Subj1[0], Aux1[0], V1[0], Obj1[0],
                "and", D2[0], Subj2[0], Aux2[0], V1_ant[0], Obj2[0], "."
            ])
        elif option == 1:
            control_1_0 = " ".join([
                S1, "and", "the", Subj1[0], Aux1[0], V1[0], Obj1[0], "and",
                D2[0], Subj2[0], Aux2[0], V1_ant[0], Obj2[0], "."
            ])
        elif option == 2:
            control_1_0 = " ".join([
                S1, "and", D1[0], Subj1[0], Aux1[0], V1[0], Obj1[0], "and",
                "the", Subj2[0], Aux2[0], V1_ant[0], Obj2[0], "."
            ])
        elif option == 3:
            control_1_0 = " ".join([
                D1[0], Subj1[0], Aux1[0], V1[0], Obj1[0], "and", "the",
                Subj2[0], Aux2[0], V1_ant[0], Obj2[0], "and", S1, "."
            ])
        elif option == 4:
            control_1_0 = " ".join([
                D1[0], Subj1[0], Aux1[0], V1[0], Obj1[0], "and", D2[0],
                Subj2[0], Aux2[0], V1_ant[0], Obj2[0], "and", S1_the_subj, "."
            ])
        else:
            control_1_0 = " ".join([
                D1[0], Subj1[0], Aux1[0], V1[0], Obj1[0], "and", D2[0],
                Subj2[0], Aux2[0], V1_ant[0], Obj2[0], "and", S1_the_obj, "."
            ])

        option = random.randint(0, 5)
        if option == 0:
            test_1_0 = " ".join([
                S1_the_obj, "and", D1[0], Subj1[0], Aux1[0], V2[0], Obj1[0],
                "and", D2[0], Subj2[0], Aux2[0], V2_ant[0], Obj2[0], "."
            ])
        elif option == 1:
            test_1_0 = " ".join([
                S1, "and", "the", Subj1[0], Aux1[0], V2[0], Obj1[0], "and",
                D2[0], Subj2[0], Aux2[0], V2_ant[0], Obj2[0], "."
            ])
        elif option == 2:
            test_1_0 = " ".join([
                S1, "and", D1[0], Subj1[0], Aux1[0], V2[0], Obj1[0], "and",
                "the", Subj2[0], Aux2[0], V2_ant[0], Obj2[0], "."
            ])
        elif option == 3:
            test_1_0 = " ".join([
                D1[0], Subj1[0], Aux1[0], V2[0], Obj1[0], "and", "the",
                Subj2[0], Aux2[0], V2_ant[0], Obj2[0], "and", S1, "."
            ])
        elif option == 4:
            test_1_0 = " ".join([
                D1[0], Subj1[0], Aux1[0], V2[0], Obj1[0], "and", D2[0],
                Subj2[0], Aux2[0], V2_ant[0], Obj2[0], "and", S1_the_subj, "."
            ])
        else:
            test_1_0 = " ".join([
                D1[0], Subj1[0], Aux1[0], V2[0], Obj1[0], "and", D2[0],
                Subj2[0], Aux2[0], V2_ant[0], Obj2[0], "and", S1_the_obj, "."
            ])

        if choice([True, False]):
            test_0_1 = " ".join([
                S1_the_subj, "and", D1[0], Subj1[0], Aux1[0], V2[0], Obj1[0],
                "and", D2[0], Subj2[0], Aux2[0], V2_other[0], Obj2[0], "."
            ])
        else:
            test_0_1 = " ".join([
                "the", Subj1[0], Aux1[0], V2[0], Obj1[0], "and", D2[0],
                Subj2[0], Aux2[0], V2_other[0], Obj2[0], "and", S1, "."
            ])

        option = random.randint(0, 5)
        if option == 0:
            control_0_0 = " ".join([
                S1_the_obj, "and", D1[0], Subj1[0], Aux1[0], V2[0], Obj1[0],
                "and", D2[0], Subj2[0], Aux2[0], V2_other[0], Obj2[0], "."
            ])
        elif option == 1:
            control_0_0 = " ".join([
                S1, "and", "the", Subj1[0], Aux1[0], V2[0], Obj1[0], "and",
                D2[0], Subj2[0], Aux2[0], V2_other[0], Obj2[0], "."
            ])
        elif option == 2:
            control_0_0 = " ".join([
                S1, "and", D1[0], Subj1[0], Aux1[0], V2[0], Obj1[0], "and",
                "the", Subj2[0], Aux2[0], V2_other[0], Obj2[0], "."
            ])
        elif option == 3:
            control_0_0 = " ".join([
                D1[0], Subj1[0], Aux1[0], V2[0], Obj1[0], "and", "the",
                Subj2[0], Aux2[0], V2_other[0], Obj2[0], "and", S1, "."
            ])
        elif option == 4:
            control_0_0 = " ".join([
                D1[0], Subj1[0], Aux1[0], V2[0], Obj1[0], "and", D2[0],
                Subj2[0], Aux2[0], V2_other[0], Obj2[0], "and", S1_the_subj,
                "."
            ])
        else:
            control_0_0 = " ".join([
                D1[0], Subj1[0], Aux1[0], V2[0], Obj1[0], "and", D2[0],
                Subj2[0], Aux2[0], V2_other[0], Obj2[0], "and", S1_the_obj, "."
            ])

        if choice([True, False]):
            control_1_1 = " ".join([
                S1_the_subj, "and", D1[0], Subj1[0], Aux1[0], V2[0], Obj1[0],
                "and", D2[0], Subj2[0], Aux2[0], V2_ant[0], Obj2[0], "."
            ])
        else:
            control_1_1 = " ".join([
                "the", Subj1[0], Aux1[0], V2[0], Obj1[0], "and", D2[0],
                Subj2[0], Aux2[0], V2_ant[0], Obj2[0], "and", S1, "."
            ])

        data = self.build_paradigm(
            training_1_1=training_1_1,
            training_0_0=training_0_0,
            test_1_0=test_1_0,
            test_0_1=test_0_1,
            control_1_1=control_1_1,
            control_0_0=control_0_0,
            control_1_0=control_1_0,
            control_0_1=control_0_1,
        )
        track_sentence = [
            (Subj1[0], V1[0], Obj1[0], Subj2[0], V1_ant[0], Obj2[0], "."),
            (Subj1[0], V1[0], Obj1[0], Subj2[0], V1_other[0], Obj2[0], "."),
            (Subj1[0], V2[0], Obj1[0], Subj2[0], V2_ant[0], Obj2[0], "."),
            (Subj1[0], V2[0], Obj1[0], Subj2[0], V2_ant[0], Obj2[0], "."),
        ]
        return data, track_sentence
Beispiel #14
0
    def sample_adj(self):
        V_trans = choice(all_transitive_verbs)
        NP_trans_1 = choice(get_matches_of(V_trans, "arg_1", all_common_nouns))
        NP_trans_2 = choice(get_matches_of(V_trans, "arg_2", all_common_nouns))
        D_trans_1 = choice(get_matched_by(NP_trans_1, "arg_1", self.safe_dets))
        D_trans_2 = choice(get_matched_by(NP_trans_2, "arg_1", self.safe_dets))
        Aux_trans = return_aux(V_trans, NP_trans_1)
        S1_the_subj = " ".join([
            "the", NP_trans_1[0], Aux_trans[0], V_trans[0], D_trans_2[0],
            NP_trans_2[0]
        ])
        S1_the_obj = " ".join([
            D_trans_1[0], NP_trans_1[0], Aux_trans[0], V_trans[0], "the",
            NP_trans_2[0]
        ])
        S1 = " ".join([
            D_trans_1[0], NP_trans_1[0], Aux_trans[0], V_trans[0],
            D_trans_2[0], NP_trans_2[0]
        ])
        option = random.choice([1, 2])
        if option == 1:  # prenominal APs related by a transitive verb
            A1 = choice(self.in_domain_adjs_main)
            A1_ant = choice(
                get_all("expression", A1["antonym"], self.in_domain_adjs))
            A1_other = choice(
                get_all("expression", A1["synonym_hypernym_hyponym"],
                        self.in_domain_adjs))
            try:
                Subj = choice(get_matches_of(A1, "arg_1", all_common_nouns))
            except Exception:
                pass
            Obj = choice(
                get_matches_of(
                    A1_ant, "arg_1",
                    get_matches_of(A1_other, "arg_1", all_common_nouns)))
            D1 = choice(get_matched_by(Subj, "arg_1", self.safe_dets))
            D2 = choice(get_matched_by(Obj, "arg_1", self.safe_dets))
            try:
                V = choice(
                    get_matched_by(
                        Subj, "arg_1",
                        get_matched_by(Obj, "arg_2", all_transitive_verbs)))
            except Exception:
                raise MatchNotFoundError(
                    "fail to find verb with subj=%s and obj=%s" %
                    (Subj[0], Obj[0]))
            Aux = return_aux(V, Subj, allow_negated=False)
            A2 = choice(
                get_matched_by(Subj, "arg_1", self.out_domain_adjs_main))
            try:
                A2_ant = choice(
                    get_all("expression", A2["antonym"], self.out_domain_adjs))
                A2_other = choice(
                    get_all("expression", A2["synonym_hypernym_hyponym"],
                            self.out_domain_adjs))
            except Exception:
                pass
            if not (is_match_disj(Obj, A2_ant["arg_1"])
                    and is_match_disj(Obj, A2_other["arg_1"])):
                raise MatchNotFoundError(
                    "fail to match: %s %s %s %s %s " %
                    (A2[0], A2_ant[0], A2_other[0], Subj[0], Obj[0]))

            if choice([True, False]):
                training_1_1 = " ".join([
                    S1_the_subj, "and", D1[0], A1[0], Subj[0], Aux[0], V[0],
                    D2[0], A1_ant[0], Obj[0], "."
                ])
            else:
                training_1_1 = " ".join([
                    "the", A1[0], Subj[0], Aux[0], V[0], D2[0], A1_ant[0],
                    Obj[0], "and", S1, "."
                ])

            option = random.randint(0, 5)
            if option == 0:
                training_0_0 = " ".join([
                    S1_the_obj, "and", D1[0], A1[0], Subj[0], Aux[0], V[0],
                    D2[0], A1_other[0], Obj[0], "."
                ])
            elif option == 1:
                training_0_0 = " ".join([
                    S1, "and", "the", A1[0], Subj[0], Aux[0], V[0], D2[0],
                    A1_other[0], Obj[0], "."
                ])
            elif option == 2:
                training_0_0 = " ".join([
                    S1, "and", D1[0], A1[0], Subj[0], Aux[0], V[0], "the",
                    A1_other[0], Obj[0], "."
                ])
            elif option == 3:
                training_0_0 = " ".join([
                    D1[0], A1[0], Subj[0], Aux[0], V[0], "the", A1_other[0],
                    Obj[0], "and", S1, "."
                ])
            elif option == 4:
                training_0_0 = " ".join([
                    D1[0], A1[0], Subj[0], Aux[0], V[0], D2[0], A1_other[0],
                    Obj[0], "and", S1_the_subj, "."
                ])
            else:
                training_0_0 = " ".join([
                    D1[0], A1[0], Subj[0], Aux[0], V[0], D2[0], A1_other[0],
                    Obj[0], "and", S1_the_obj, "."
                ])

            if choice([True, False]):
                control_0_1 = " ".join([
                    S1_the_subj, "and", D1[0], A1[0], Subj[0], Aux[0], V[0],
                    D2[0], A1_other[0], Obj[0], "."
                ])
            else:
                control_0_1 = " ".join([
                    "the", A1[0], Subj[0], Aux[0], V[0], D2[0], A1_other[0],
                    Obj[0], "and", S1, "."
                ])

            option = random.randint(0, 5)
            if option == 0:
                control_1_0 = " ".join([
                    S1_the_obj, "and", D1[0], A1[0], Subj[0], Aux[0], V[0],
                    D2[0], A1_ant[0], Obj[0], "."
                ])
            elif option == 1:
                control_1_0 = " ".join([
                    S1, "and", "the", A1[0], Subj[0], Aux[0], V[0], D2[0],
                    A1_ant[0], Obj[0], "."
                ])
            elif option == 2:
                control_1_0 = " ".join([
                    S1, "and", D1[0], A1[0], Subj[0], Aux[0], V[0], "the",
                    A1_ant[0], Obj[0], "."
                ])
            elif option == 3:
                control_1_0 = " ".join([
                    D1[0], A1[0], Subj[0], Aux[0], V[0], "the", A1_ant[0],
                    Obj[0], "and", S1, "."
                ])
            elif option == 4:
                control_1_0 = " ".join([
                    D1[0], A1[0], Subj[0], Aux[0], V[0], D2[0], A1_ant[0],
                    Obj[0], "and", S1_the_subj, "."
                ])
            else:
                control_1_0 = " ".join([
                    D1[0], A1[0], Subj[0], Aux[0], V[0], D2[0], A1_ant[0],
                    Obj[0], "and", S1_the_obj, "."
                ])

            option = random.randint(0, 5)
            if option == 0:
                test_1_0 = " ".join([
                    S1_the_obj, "and", D1[0], A2[0], Subj[0], Aux[0], V[0],
                    D2[0], A2_ant[0], Obj[0], "."
                ])
            elif option == 1:
                test_1_0 = " ".join([
                    S1, "and", "the", A2[0], Subj[0], Aux[0], V[0], D2[0],
                    A2_ant[0], Obj[0], "."
                ])
            elif option == 2:
                test_1_0 = " ".join([
                    S1, "and", D1[0], A2[0], Subj[0], Aux[0], V[0], "the",
                    A2_ant[0], Obj[0], "."
                ])
            elif option == 3:
                test_1_0 = " ".join([
                    D1[0], A2[0], Subj[0], Aux[0], V[0], "the", A2_ant[0],
                    Obj[0], "and", S1, "."
                ])
            elif option == 4:
                test_1_0 = " ".join([
                    D1[0], A2[0], Subj[0], Aux[0], V[0], D2[0], A2_ant[0],
                    Obj[0], "and", S1_the_subj, "."
                ])
            else:
                test_1_0 = " ".join([
                    D1[0], A2[0], Subj[0], Aux[0], V[0], D2[0], A2_ant[0],
                    Obj[0], "and", S1_the_obj, "."
                ])

            if choice([True, False]):
                test_0_1 = " ".join([
                    S1_the_subj, "and", D1[0], A2[0], Subj[0], Aux[0], V[0],
                    D2[0], A2_other[0], Obj[0], "."
                ])
            else:
                test_0_1 = " ".join([
                    "the", A2[0], Subj[0], Aux[0], V[0], D2[0], A2_other[0],
                    Obj[0], "and", S1, "."
                ])

            option = random.randint(0, 5)
            if option == 0:
                control_0_0 = " ".join([
                    S1_the_obj, "and", D1[0], A2[0], Subj[0], Aux[0], V[0],
                    D2[0], A2_other[0], Obj[0], "."
                ])
            elif option == 1:
                control_0_0 = " ".join([
                    S1, "and", "the", A2[0], Subj[0], Aux[0], V[0], D2[0],
                    A2_other[0], Obj[0], "."
                ])
            elif option == 2:
                control_0_0 = " ".join([
                    S1, "and", D1[0], A2[0], Subj[0], Aux[0], V[0], "the",
                    A2_other[0], Obj[0], "."
                ])
            elif option == 3:
                control_0_0 = " ".join([
                    D1[0], A2[0], Subj[0], Aux[0], V[0], "the", A2_other[0],
                    Obj[0], "and", S1, "."
                ])
            elif option == 4:
                control_0_0 = " ".join([
                    D1[0], A2[0], Subj[0], Aux[0], V[0], D2[0], A2_other[0],
                    Obj[0], "and", S1_the_subj, "."
                ])
            else:
                control_0_0 = " ".join([
                    D1[0], A2[0], Subj[0], Aux[0], V[0], D2[0], A2_other[0],
                    Obj[0], "and", S1_the_obj, "."
                ])

            if choice([True, False]):
                control_1_1 = " ".join([
                    S1_the_subj, "and", D1[0], A2[0], Subj[0], Aux[0], V[0],
                    D2[0], A2_ant[0], Obj[0], "."
                ])
            else:
                control_1_1 = " ".join([
                    "the", A2[0], Subj[0], Aux[0], V[0], D2[0], A2_ant[0],
                    Obj[0], "and", S1, "."
                ])

            data = self.build_paradigm(
                training_1_1=training_1_1,
                training_0_0=training_0_0,
                test_1_0=test_1_0,
                test_0_1=test_0_1,
                control_1_1=control_1_1,
                control_0_0=control_0_0,
                control_1_0=control_1_0,
                control_0_1=control_0_1,
            )

            track_sentence = [
                (A1[0], Subj[0], V[0], A1_ant[0], Obj[0]),
                (A1[0], Subj[0], V[0], A1_other[0], Obj[0]),
                (A2[0], Subj[0], V[0], A2_ant[0], Obj[0]),
                (A2[0], Subj[0], V[0], A2_other[0], Obj[0]),
            ]
            return data, track_sentence
        else:  # predicative AP
            A1 = choice(self.in_domain_adjs_main)
            A1_ant = choice(
                get_all("expression", A1["antonym"], self.in_domain_adjs))
            A1_other = choice(
                get_all("expression", A1["synonym_hypernym_hyponym"],
                        self.in_domain_adjs))
            Subj1 = choice(get_matches_of(A1, "arg_1", all_common_nouns))
            D1 = choice(get_matched_by(Subj1, "arg_1", self.safe_dets))
            Copula1 = return_copula(Subj1, allow_negated=False)
            A2 = choice(
                get_matched_by(Subj1, "arg_1", self.out_domain_adjs_main))
            A2_ant = choice(
                get_all("expression", A2["antonym"], self.out_domain_adjs))
            try:
                A2_other = choice(
                    get_all("expression", A2["synonym_hypernym_hyponym"],
                            self.out_domain_adjs))
            except Exception:
                pass
            Subj2 = choice(
                get_matches_of(
                    A1_ant, "arg_1",
                    get_matches_of(
                        A1_other, "arg_1",
                        get_matches_of(
                            A2_ant, "arg_1",
                            get_matches_of(A2_other, "arg_1",
                                           all_common_nouns)))))
            D2 = choice(get_matched_by(Subj2, "arg_1", self.safe_dets))
            Copula2 = return_copula(Subj2, allow_negated=False)

            if choice([True, False]):
                training_1_1 = " ".join([
                    S1_the_subj, "and", D1[0], Subj1[0], Copula1[0], A1[0],
                    "and", D2[0], Subj2[0], Copula2[0], A1_ant[0], "."
                ])
            else:
                training_1_1 = " ".join([
                    "the", Subj1[0], Copula1[0], A1[0], "and", D2[0], Subj2[0],
                    Copula2[0], A1_ant[0], "and", S1, "."
                ])

            option = random.randint(0, 5)
            if option == 0:
                training_0_0 = " ".join([
                    S1_the_obj, "and", D1[0], Subj1[0], Copula1[0], A1[0],
                    "and", D2[0], Subj2[0], Copula2[0], A1_other[0], "."
                ])
            elif option == 1:
                training_0_0 = " ".join([
                    S1, "and", "the", Subj1[0], Copula1[0], A1[0], "and",
                    D2[0], Subj2[0], Copula2[0], A1_other[0], "."
                ])
            elif option == 2:
                training_0_0 = " ".join([
                    S1, "and", D1[0], Subj1[0], Copula1[0], A1[0], "and",
                    "the", Subj2[0], Copula2[0], A1_other[0], "."
                ])
            elif option == 3:
                training_0_0 = " ".join([
                    D1[0], Subj1[0], Copula1[0], A1[0], "and", "the", Subj2[0],
                    Copula2[0], A1_other[0], "and", S1, "."
                ])
            elif option == 4:
                training_0_0 = " ".join([
                    D1[0], Subj1[0], Copula1[0], A1[0], "and", D2[0], Subj2[0],
                    Copula2[0], A1_other[0], "and", S1_the_subj, "."
                ])
            else:
                training_0_0 = " ".join([
                    D1[0], Subj1[0], Copula1[0], A1[0], "and", D2[0], Subj2[0],
                    Copula2[0], A1_other[0], "and", S1_the_obj, "."
                ])

            if choice([True, False]):
                control_0_1 = " ".join([
                    S1_the_subj, "and", D1[0], Subj1[0], Copula1[0], A1[0],
                    "and", D2[0], Subj2[0], Copula2[0], A1_other[0], "."
                ])
            else:
                control_0_1 = " ".join([
                    "the", Subj1[0], Copula1[0], A1[0], "and", D2[0], Subj2[0],
                    Copula2[0], A1_other[0], "and", S1, "."
                ])

            option = random.randint(0, 5)
            if option == 0:
                control_1_0 = " ".join([
                    S1_the_obj, "and", D1[0], Subj1[0], Copula1[0], A1[0],
                    "and", D2[0], Subj2[0], Copula2[0], A1_ant[0], "."
                ])
            elif option == 1:
                control_1_0 = " ".join([
                    S1, "and", "the", Subj1[0], Copula1[0], A1[0], "and",
                    D2[0], Subj2[0], Copula2[0], A1_ant[0], "."
                ])
            elif option == 2:
                control_1_0 = " ".join([
                    S1, "and", D1[0], Subj1[0], Copula1[0], A1[0], "and",
                    "the", Subj2[0], Copula2[0], A1_ant[0], "."
                ])
            elif option == 3:
                control_1_0 = " ".join([
                    D1[0], Subj1[0], Copula1[0], A1[0], "and", "the", Subj2[0],
                    Copula2[0], A1_ant[0], "and", S1, "."
                ])
            elif option == 4:
                control_1_0 = " ".join([
                    D1[0], Subj1[0], Copula1[0], A1[0], "and", D2[0], Subj2[0],
                    Copula2[0], A1_ant[0], "and", S1_the_subj, "."
                ])
            else:
                control_1_0 = " ".join([
                    D1[0], Subj1[0], Copula1[0], A1[0], "and", D2[0], Subj2[0],
                    Copula2[0], A1_ant[0], "and", S1_the_obj, "."
                ])

            option = random.randint(0, 5)
            if option == 0:
                test_1_0 = " ".join([
                    S1_the_obj, "and", D1[0], Subj1[0], Copula1[0], A2[0],
                    "and", D2[0], Subj2[0], Copula2[0], A2_ant[0], "."
                ])
            elif option == 1:
                test_1_0 = " ".join([
                    S1, "and", "the", Subj1[0], Copula1[0], A2[0], "and",
                    D2[0], Subj2[0], Copula2[0], A2_ant[0], "."
                ])
            elif option == 2:
                test_1_0 = " ".join([
                    S1, "and", D1[0], Subj1[0], Copula1[0], A2[0], "and",
                    "the", Subj2[0], Copula2[0], A2_ant[0], "."
                ])
            elif option == 3:
                test_1_0 = " ".join([
                    D1[0], Subj1[0], Copula1[0], A2[0], "and", "the", Subj2[0],
                    Copula2[0], A2_ant[0], "and", S1, "."
                ])
            elif option == 4:
                test_1_0 = " ".join([
                    D1[0], Subj1[0], Copula1[0], A2[0], "and", D2[0], Subj2[0],
                    Copula2[0], A2_ant[0], "and", S1_the_subj, "."
                ])
            else:
                test_1_0 = " ".join([
                    D1[0], Subj1[0], Copula1[0], A2[0], "and", D2[0], Subj2[0],
                    Copula2[0], A2_ant[0], "and", S1_the_obj, "."
                ])

            if choice([True, False]):
                test_0_1 = " ".join([
                    S1_the_subj, "and", D1[0], Subj1[0], Copula1[0], A2[0],
                    "and", D2[0], Subj2[0], Copula2[0], A2_other[0], "."
                ])
            else:
                test_0_1 = " ".join([
                    "the", Subj1[0], Copula1[0], A2[0], "and", D2[0], Subj2[0],
                    Copula2[0], A2_other[0], "and", S1, "."
                ])

            option = random.randint(0, 5)
            if option == 0:
                control_0_0 = " ".join([
                    S1_the_obj, "and", D1[0], Subj1[0], Copula1[0], A2[0],
                    "and", D2[0], Subj2[0], Copula2[0], A2_other[0], "."
                ])
            elif option == 1:
                control_0_0 = " ".join([
                    S1, "and", "the", Subj1[0], Copula1[0], A2[0], "and",
                    D2[0], Subj2[0], Copula2[0], A2_other[0], "."
                ])
            elif option == 2:
                control_0_0 = " ".join([
                    S1, "and", D1[0], Subj1[0], Copula1[0], A2[0], "and",
                    "the", Subj2[0], Copula2[0], A2_other[0], "."
                ])
            elif option == 3:
                control_0_0 = " ".join([
                    D1[0], Subj1[0], Copula1[0], A2[0], "and", "the", Subj2[0],
                    Copula2[0], A2_other[0], "and", S1, "."
                ])
            elif option == 4:
                control_0_0 = " ".join([
                    D1[0], Subj1[0], Copula1[0], A2[0], "and", D2[0], Subj2[0],
                    Copula2[0], A2_other[0], "and", S1_the_subj, "."
                ])
            else:
                control_0_0 = " ".join([
                    D1[0], Subj1[0], Copula1[0], A2[0], "and", D2[0], Subj2[0],
                    Copula2[0], A2_other[0], "and", S1_the_obj, "."
                ])

            if choice([True, False]):
                control_1_1 = " ".join([
                    S1_the_subj, "and", D1[0], Subj1[0], Copula1[0], A2[0],
                    "and", D2[0], Subj2[0], Copula2[0], A2_ant[0], "."
                ])
            else:
                control_1_1 = " ".join([
                    "the", Subj1[0], Copula1[0], A2[0], "and", D2[0], Subj2[0],
                    Copula2[0], A2_ant[0], "and", S1, "."
                ])

            data = self.build_paradigm(training_1_1=training_1_1,
                                       training_0_0=training_0_0,
                                       test_1_0=test_1_0,
                                       test_0_1=test_0_1,
                                       control_1_1=control_1_1,
                                       control_0_0=control_0_0,
                                       control_1_0=control_1_0,
                                       control_0_1=control_0_1)
            track_sentence = [
                (A1[0], Subj1[0], A1_ant[0], Subj2[0]),
                (A1[0], Subj1[0], A1_other[0], Subj2[0]),
                (A1[0], Subj1[0], A2_ant[0], Subj2[0]),
                (A1[0], Subj1[0], A2_other[0], Subj2[0]),
            ]
            return data, track_sentence
Beispiel #15
0
    def sample(self):
        # John should only go to France

        V = choice(all_verbs)
        V_args = negate_V_args(
            verb_args_from_verb(V,
                                allow_negated=False,
                                allow_modal=False,
                                allow_quantifiers=False))
        V_args = embed_V_args_under_modal(V_args)
        V_bare = get_bare_form(V)
        VP = V_to_VP_mutate(V, aux=False, args=V_args)
        N_alt = N_to_DP_mutate(choice(
            get_matches_of(
                V, "arg_1", get_matches_of(V_args["aux"], "arg_1",
                                           all_nominals))),
                               allow_quantifiers=False)

        if V_args["aux"][0] in ["does", "do", "did"]:
            unembedded_trigger = "%s only %s %s." % (V_args["subj"][0],
                                                     V_args["aux"][0], VP[0])
        else:
            unembedded_trigger = "%s %s only %s." % (V_args["subj"][0],
                                                     V_args["aux"][0], VP[0])
        negated_trigger = "%s %s only %s %s." % (
            V_args["subj"][0], V_args["aux_neg"][0], V_args["verb_neg"][0],
            " ".join([x[0] for x in V_args["args"]]))
        if V_args["aux_under_modal"] == None:
            modal_trigger = "%s might only %s." % (V_args["subj"][0], VP[0])
        else:
            modal_trigger = "%s might %s only %s %s." % (
                V_args["subj"][0], V_args["aux_under_modal"][0],
                V_args["verb_under_modal"][0], " ".join(
                    [x[0] for x in V_args["args"]]))
        conditional_trigger = "if %s, it's okay." % unembedded_trigger[:-1]
        if V["finite"] == "1":
            do = get_do_form(V)
            interrogative_trigger = "%s %s only %s %s?" % (
                do[0], V_args["subj"][0], V_bare[0], join_args(V_args["args"]))
        else:
            interrogative_trigger = "%s %s only %s?" % (
                V_args["aux"][0], V_args["subj"][0], VP[0])

        presupposition = "%s %s %s." % (V_args["subj"][0], V_args["aux"][0],
                                        VP[0])
        negated_presupposition = "%s %s %s %s." % (
            V_args["subj"][0], V_args["aux_neg"][0], V_args["verb_neg"][0],
            " ".join([x[0] for x in V_args["args"]]))
        neutral_presupposition = "%s %s %s." % (N_alt[0], V_args["aux"][0],
                                                VP[0])

        data = self.build_presupposition_paradigm(
            unembedded_trigger=unembedded_trigger,
            negated_trigger=negated_trigger,
            interrogative_trigger=interrogative_trigger,
            modal_trigger=modal_trigger,
            conditional_trigger=conditional_trigger,
            presupposition=presupposition,
            negated_presupposition=negated_presupposition,
            neutral_presupposition=neutral_presupposition)
        return data, presupposition
    def sample_adj(self):
        V_trans = choice(self.all_possibly_singular_transitive_verbs)
        NP_trans_1 = choice(
            get_matches_of(V_trans, "arg_1", self.all_singular_common_nouns))
        NP_trans_2 = choice(
            get_matches_of(V_trans, "arg_2", self.all_singular_common_nouns))
        D_trans_1 = choice(get_matched_by(NP_trans_1, "arg_1", self.safe_dets))
        D_trans_2 = choice(get_matched_by(NP_trans_2, "arg_1", self.safe_dets))
        Aux_trans = return_aux(V_trans, NP_trans_1)
        S1_abs = " ".join([
            "%s", NP_trans_1[0], Aux_trans[0], V_trans[0], "%s", NP_trans_2[0]
        ])
        option = random.choice([1, 2])
        if option == 1:  # prenominal APs related by a transitive verb
            A1 = choice(self.in_domain_adjs_main)
            A1_ant = choice(
                get_all("expression", A1["antonym"], self.in_domain_adjs))
            A1_other = choice(
                get_all("expression", A1["synonym_hypernym_hyponym"],
                        self.in_domain_adjs))
            try:
                Subj = choice(
                    get_matches_of(A1, "arg_1",
                                   self.all_singular_common_nouns))
            except Exception:
                pass
            Obj = choice(
                get_matches_of(
                    A1_ant, "arg_1",
                    get_matches_of(A1_other, "arg_1",
                                   self.all_singular_common_nouns)))
            D1 = choice(get_matched_by(Subj, "arg_1", self.safe_dets))
            D2 = choice(get_matched_by(Obj, "arg_1", self.safe_dets))
            try:
                V = choice(
                    get_matched_by(
                        Subj, "arg_1",
                        get_matched_by(
                            Obj, "arg_2",
                            self.all_possibly_singular_transitive_verbs)))
            except Exception:
                raise MatchNotFoundError(
                    "fail to find verb with subj=%s and obj=%s" %
                    (Subj[0], Obj[0]))
            Aux = return_aux(V, Subj, allow_negated=False)
            A2 = choice(
                get_matched_by(Subj, "arg_1", self.out_domain_adjs_main))
            try:
                A2_ant = choice(
                    get_all("expression", A2["antonym"], self.out_domain_adjs))
                A2_other = choice(
                    get_all("expression", A2["synonym_hypernym_hyponym"],
                            self.out_domain_adjs))
            except Exception:
                pass
            if not (is_match_disj(Obj, A2_ant["arg_1"])
                    and is_match_disj(Obj, A2_other["arg_1"])):
                raise MatchNotFoundError(
                    "fail to match: %s %s %s %s %s " %
                    (A2[0], A2_ant[0], A2_other[0], Subj[0], Obj[0]))

            Ds = []
            option = random.choice([
                1, 2, 3
            ])  # There are three in-domain configurations (arbitrarily chosen)
            if option == 1:
                Ds.append(("the", "a", D1[0], D2[0]))
                Ds.append(("a", "the", D1[0], D2[0]))
            elif option == 2:
                Ds.append(("the", D_trans_2[0], D1[0], "a"))
                Ds.append(("a", D_trans_2[0], D1[0], "the"))
            else:
                Ds.append((D_trans_1[0], "the", D1[0], "a"))
                Ds.append((D_trans_1[0], "a", D1[0], "the"))

            option = random.choice(
                [1, 2, 3]
            )  # There are three out-domain configurations (arbitrarily chosen)
            if option == 1:
                Ds.append(("the", D_trans_2[0], "a", D2[0]))
                Ds.append(("a", D_trans_2[0], "the", D2[0]))
            elif option == 2:
                Ds.append((D_trans_1[0], "the", "a", D2[0]))
                Ds.append((D_trans_1[0], "a", "the", D2[0]))
            else:
                Ds.append((D_trans_1[0], D_trans_2[0], "the", "a"))
                Ds.append((D_trans_1[0], D_trans_2[0], "a", "the"))

            data = self.build_paradigm(
                training_1_1=" ".join([
                    S1_abs, "and", "%s", A1[0], Subj[0], Aux[0], V[0], "%s",
                    A1_ant[0], Obj[0], "."
                ]) % Ds[0],
                training_0_0=" ".join([
                    S1_abs, "and", "%s", A1[0], Subj[0], Aux[0], V[0], "%s",
                    A1_other[0], Obj[0], "."
                ]) % Ds[1],
                control_1_0=" ".join([
                    S1_abs, "and", "%s", A1[0], Subj[0], Aux[0], V[0], "%s",
                    A1_ant[0], Obj[0], "."
                ]) % Ds[1],
                control_0_1=" ".join([
                    S1_abs, "and", "%s", A1[0], Subj[0], Aux[0], V[0], "%s",
                    A1_other[0], Obj[0], "."
                ]) % Ds[0],
                test_1_0=" ".join([
                    S1_abs, "and", "%s", A2[0], Subj[0], Aux[0], V[0], "%s",
                    A2_ant[0], Obj[0], "."
                ]) % Ds[3],
                test_0_1=" ".join([
                    S1_abs, "and", "%s", A2[0], Subj[0], Aux[0], V[0], "%s",
                    A2_other[0], Obj[0], "."
                ]) % Ds[2],
                control_1_1=" ".join([
                    S1_abs, "and", "%s", A2[0], Subj[0], Aux[0], V[0], "%s",
                    A2_ant[0], Obj[0], "."
                ]) % Ds[2],
                control_0_0=" ".join([
                    S1_abs, "and", "%s", A2[0], Subj[0], Aux[0], V[0], "%s",
                    A2_other[0], Obj[0], "."
                ]) % Ds[3],
            )

            track_sentence = [
                (A1[0], Subj[0], V[0], A1_ant[0], Obj[0]),
                (A1[0], Subj[0], V[0], A1_other[0], Obj[0]),
                (A2[0], Subj[0], V[0], A2_ant[0], Obj[0]),
                (A2[0], Subj[0], V[0], A2_other[0], Obj[0]),
            ]
            return data, track_sentence
        else:  # predicative AP
            A1 = choice(self.in_domain_adjs_main)
            A1_ant = choice(
                get_all("expression", A1["antonym"], self.in_domain_adjs))
            A1_other = choice(
                get_all("expression", A1["synonym_hypernym_hyponym"],
                        self.in_domain_adjs))
            Subj1 = choice(
                get_matches_of(A1, "arg_1", self.all_singular_common_nouns))
            D1 = choice(get_matched_by(Subj1, "arg_1", self.safe_dets))
            Copula1 = return_copula(Subj1, allow_negated=False)
            A2 = choice(
                get_matched_by(Subj1, "arg_1", self.out_domain_adjs_main))
            A2_ant = choice(
                get_all("expression", A2["antonym"], self.out_domain_adjs))
            try:
                A2_other = choice(
                    get_all("expression", A2["synonym_hypernym_hyponym"],
                            self.out_domain_adjs))
                Subj2 = choice(
                    get_matches_of(
                        A1_ant, "arg_1",
                        get_matches_of(
                            A1_other, "arg_1",
                            get_matches_of(
                                A2_ant, "arg_1",
                                get_matches_of(
                                    A2_other, "arg_1",
                                    self.all_singular_common_nouns)))))
            except Exception:
                pass
            D2 = choice(get_matched_by(Subj2, "arg_1", self.safe_dets))
            Copula2 = return_copula(Subj2, allow_negated=False)

            Ds = []
            option = random.choice([
                1, 2, 3
            ])  # There are three in-domain configurations (arbitrarily chosen)
            if option == 1:
                Ds.append(("the", "a", D1[0], D2[0]))
                Ds.append(("a", "the", D1[0], D2[0]))
            elif option == 2:
                Ds.append(("the", D_trans_2[0], D1[0], "a"))
                Ds.append(("a", D_trans_2[0], D1[0], "the"))
            else:
                Ds.append((D_trans_1[0], "the", D1[0], "a"))
                Ds.append((D_trans_1[0], "a", D1[0], "the"))

            option = random.choice(
                [1, 2, 3]
            )  # There are three out-domain configurations (arbitrarily chosen)
            if option == 1:
                Ds.append(("the", D_trans_2[0], "a", D2[0]))
                Ds.append(("a", D_trans_2[0], "the", D2[0]))
            elif option == 2:
                Ds.append((D_trans_1[0], "the", "a", D2[0]))
                Ds.append((D_trans_1[0], "a", "the", D2[0]))
            else:
                Ds.append((D_trans_1[0], D_trans_2[0], "the", "a"))
                Ds.append((D_trans_1[0], D_trans_2[0], "a", "the"))

            data = self.build_paradigm(
                training_1_1=" ".join([
                    S1_abs, "and", "%s", Subj1[0], Copula1[0], A1[0], "and",
                    "%s", Subj2[0], Copula2[0], A1_ant[0], "."
                ]) % Ds[0],
                training_0_0=" ".join([
                    S1_abs, "and", "%s", Subj1[0], Copula1[0], A1[0], "and",
                    "%s", Subj2[0], Copula2[0], A1_other[0], "."
                ]) % Ds[1],
                control_1_0=" ".join([
                    S1_abs, "and", "%s", Subj1[0], Copula1[0], A1[0], "and",
                    "%s", Subj2[0], Copula2[0], A1_ant[0], "."
                ]) % Ds[1],
                control_0_1=" ".join([
                    S1_abs, "and", "%s", Subj1[0], Copula1[0], A1[0], "and",
                    "%s", Subj2[0], Copula2[0], A1_other[0], "."
                ]) % Ds[0],
                test_1_0=" ".join([
                    S1_abs, "and", "%s", Subj1[0], Copula1[0], A2[0], "and",
                    "%s", Subj2[0], Copula2[0], A2_ant[0], "."
                ]) % Ds[3],
                test_0_1=" ".join([
                    S1_abs, "and", "%s", Subj1[0], Copula1[0], A2[0], "and",
                    "%s", Subj2[0], Copula2[0], A2_other[0], "."
                ]) % Ds[2],
                control_1_1=" ".join([
                    S1_abs, "and", "%s", Subj1[0], Copula1[0], A2[0], "and",
                    "%s", Subj2[0], Copula2[0], A2_ant[0], "."
                ]) % Ds[2],
                control_0_0=" ".join([
                    S1_abs, "and", "%s", Subj1[0], Copula1[0], A2[0], "and",
                    "%s", Subj2[0], Copula2[0], A2_other[0], "."
                ]) % Ds[3],
            )
            track_sentence = [
                (A1[0], Subj1[0], A1_ant[0], Subj2[0]),
                (A1[0], Subj1[0], A1_other[0], Subj2[0]),
                (A1[0], Subj1[0], A2_ant[0], Subj2[0]),
                (A1[0], Subj1[0], A2_other[0], Subj2[0]),
            ]
            return data, track_sentence
    def sample(self):
        # Training 1/1
        # The man who helped a  girl thinks  that that guy found a  cat.
        # THE   NP1 rel V1   D2 NP2  cp_verb THAT D3   NP3 V2    D4 NP4

        # Training 0/0
        # This man who helped a  girl thinks  that that guy found the cat.
        # D1   NP1 rel V1     D2 NP2  cp_verb THAT D3   NP3 V2    THE NP4

        # Test 1/0
        # The  man thinks   that that guy  who helped a girl found a  cat.
        # THE  NP1  cp_verb THAT D3   NP3  rel V1     D2 NP2 V2    D4 NP4

        # Test 0/1
        # This man thinks  that that guy who helped the girl found the cat.
        # D1   NP1 cp_verb THAT D3  NP3  rel V1     THE NP2  V2    THE NP4


        cp_verb = choice(self.cp_verbs)
        try:
            NP1 = choice(get_matches_of(cp_verb, "arg_1", all_common_nouns))
        except Exception:
            pass
        D1 = choice(get_matched_by(NP1, "arg_1", self.safe_dets))
        rel1 = choice(get_matched_by(NP1, "arg_1", all_relativizers))
        V1 = choice(get_matched_by(NP1, "arg_1", all_transitive_verbs))
        NP2 = choice(get_matches_of(V1, "arg_2", all_common_nouns))
        D2 = choice(get_matched_by(NP2, "arg_1", self.safe_dets))
        rel2 = choice(get_matched_by(NP2, "arg_1", all_relativizers))
        Aux1 = return_aux(V1, NP1)
        NP3 = choice(get_matches_of(V1, "arg_1", get_matches_of(Aux1, "arg_1", all_common_nouns)))
        D3 = choice(get_matched_by(NP3, "arg_1", self.safe_dets))
        V2 = choice(get_matched_by(NP3, "arg_1", all_transitive_verbs))
        NP4 = choice(get_matches_of(V2, "arg_2", all_common_nouns))
        D4 = choice(get_matched_by(NP4, "arg_1", self.safe_dets))
        Aux_cp = return_aux(cp_verb, NP1)
        Aux2 = return_aux(V2, NP3)

        Ds = []
        Ds.append(["the", D2[0], D3[0], D4[0]])
        option = random.choice([1, 2, 3])   # There are three in-domain configurations (arbitrarily chosen)
        if option == 1:
            Ds.append([D1[0], "the", D3[0], D4[0]])
        elif option == 2:
            Ds.append([D1[0], D2[0], "the", D4[0]])
        else:
            Ds.append([D1[0], D2[0], D3[0], "the"])


        Ds.append(["the", D3[0], D2[0], D4[0]])
        option = random.choice([1, 2, 3])   # There are three out-domain configurations (arbitrarily chosen)
        if option == 1:
            Ds.append([D1[0], "the", D2[0], D4[0]])
        elif option == 2:
            Ds.append([D1[0], D3[0], "the", D4[0]])
        else:
            Ds.append([D1[0], D3[0], D2[0], "the"])

        track_sentence = [
                (NP1[0], V1[0], NP2[0], cp_verb[0], NP3[0], V2[0], NP4[0]),  #training 1/1
                (NP1[0], V1[0], NP2[0], cp_verb[0], NP3[0], V2[0], NP4[0]),  #training 0/0
                (NP1[0], V1[0], NP2[0], cp_verb[0], NP3[0], V2[0], NP4[0]),  #Test 1/0
                (NP1[0], V1[0], NP2[0], cp_verb[0], NP3[0], V2[0], NP4[0]),  #Test 0/1
            ]

        data = self.build_paradigm(
            training_1_1=" ".join([Ds[0][0], NP1[0], rel1[0], Aux1[0], V1[0], Ds[0][1], NP2[0], Aux_cp[0], cp_verb[0],
                                   "that", Ds[0][2], NP3[0], Aux2[0], V2[0], Ds[0][3], NP4[0]]),
            training_0_0=" ".join([Ds[1][0], NP1[0], rel1[0], Aux1[0], V1[0], Ds[1][1], NP2[0], Aux_cp[0], cp_verb[0],
                                   "that", Ds[1][2], NP3[0], Aux2[0], V2[0], Ds[1][3], NP4[0]]),
            test_1_0=" ".join([Ds[2][0], NP1[0], Aux_cp[0], cp_verb[0], "that", Ds[2][2], NP3[0],
                               rel2[0], Aux1[0], V1[0], Ds[2][1], NP2[0], Aux2[0], V2[0], Ds[2][3], NP4[0]]),
            test_0_1=" ".join([Ds[3][0], NP1[0], Aux_cp[0], cp_verb[0], "that", Ds[3][2], NP3[0],
                               rel2[0], Aux1[0], V1[0], Ds[3][1], NP2[0], Aux2[0], V2[0], Ds[3][3], NP4[0]]),
        )
        return data, track_sentence
Beispiel #18
0
    def sample(self):
        # Training 1
        # John compelled         Mary to leave.
        # DP1  Aux1 V_control_in DP2  TO VP

        # Training 0
        # John wanted            Mary to leave.
        # DP1  Aux1 V_raising_in DP2  TO VP

        # Training 1
        # John convinced          Mary to leave.
        # DP1  Aux1 V_control_out DP2  TO VP

        # Training 0
        # John considered         Mary to leave.
        # DP1  Aux1 V_raising_out DP2  TO VP

        V_trans = choice(all_transitive_verbs)
        NP_trans_1 = choice(get_matches_of(V_trans, "arg_1", all_common_nouns))
        NP_trans_2 = choice(get_matches_of(V_trans, "arg_2", all_common_nouns))
        D_trans_1 = choice(get_matched_by(NP_trans_1, "arg_1", self.safe_dets))
        D_trans_2 = choice(get_matched_by(NP_trans_2, "arg_1", self.safe_dets))
        Aux_trans = return_aux(V_trans, NP_trans_1)
        S1 = " ".join([
            D_trans_1[0], NP_trans_1[0], Aux_trans[0], V_trans[0],
            D_trans_2[0], NP_trans_2[0]
        ])
        S1_the_subj = " ".join([
            "the", NP_trans_1[0], Aux_trans[0], V_trans[0], D_trans_2[0],
            NP_trans_2[0]
        ])
        S1_the_obj = " ".join([
            D_trans_1[0], NP_trans_1[0], Aux_trans[0], V_trans[0], "the",
            NP_trans_2[0]
        ])
        # S1_abs = " ".join(["%s", NP_trans_1[0], Aux_trans[0], V_trans[0], "%s", NP_trans_2[0]])

        option = random.choice([1, 2, 3])
        if option == 1:  # subject control/raising
            V_control_in = choice(self.v_control_subj_in)
            NP1 = choice(
                get_matches_of(V_control_in, "arg_1", all_common_nouns))
            D1 = choice(get_matched_by(NP1, "arg_1", self.safe_dets))
            Aux1 = return_aux(V_control_in, NP1)
            V = choice(
                get_matches_of(
                    V_control_in, "arg_2",
                    get_matched_by(NP1, "arg_1",
                                   self.all_bare_transitive_verbs)))
            NP2 = choice(get_matches_of(V, "arg_2", all_common_nouns))
            D2 = choice(get_matched_by(NP2, "arg_1", self.safe_dets))
            V_control_out = choice(
                get_matched_by(
                    NP1, "arg_1",
                    get_matches_of(
                        Aux1, "arg_2",
                        get_matched_by(V, "arg_2", self.v_control_subj_out))))
            V_raising_in = choice(
                get_matched_by(
                    NP1, "arg_1",
                    get_matches_of(
                        Aux1, "arg_2",
                        get_matched_by(V, "arg_2", self.v_raising_subj_in))))
            V_raising_out = choice(
                get_matched_by(
                    NP1, "arg_1",
                    get_matches_of(
                        Aux1, "arg_2",
                        get_matched_by(V, "arg_2", self.v_raising_subj_out))))
            to = "to"
        elif option == 2:  # object control/raising
            V_control_in = choice(self.v_control_obj_in)
            NP1 = choice(
                get_matches_of(V_control_in, "arg_1", all_common_nouns))
            D1 = choice(get_matched_by(NP1, "arg_1", self.safe_dets))
            Aux1 = return_aux(V_control_in, NP1)
            control_obj = N_to_DP_mutate(
                choice(get_matches_of(V_control_in, "arg_2")))
            V = choice(
                get_matches_of(
                    V_control_in, "arg_3",
                    get_matched_by(NP1, "arg_1",
                                   self.all_bare_transitive_verbs)))
            NP2 = choice(get_matches_of(V, "arg_2", all_common_nouns))
            D2 = choice(get_matched_by(NP2, "arg_1", self.safe_dets))
            try:
                V_control_out = choice(
                    get_matched_by(
                        NP1, "arg_1",
                        get_matches_of(
                            Aux1, "arg_2",
                            get_matched_by(
                                V, "arg_3",
                                get_matched_by(control_obj, "arg_2",
                                               self.v_control_obj_out)))))
                V_raising_in = choice(
                    get_matched_by(
                        NP1, "arg_1",
                        get_matches_of(Aux1, "arg_2", self.v_raising_obj_in)))
                V_raising_out = choice(
                    get_matched_by(
                        NP1, "arg_1",
                        get_matches_of(Aux1, "arg_2", self.v_raising_obj_out)))
            except Exception:
                pass
            to = control_obj[0] + " to"

        else:  # adjective control/raising
            V_control_in = choice(self.adj_control_subj_in)
            NP1 = choice(
                get_matches_of(V_control_in, "arg_1", all_common_nouns))
            D1 = choice(get_matched_by(NP1, "arg_1", self.safe_dets))
            Aux1 = return_copula(NP1)
            V = choice(
                get_matches_of(
                    V_control_in, "arg_2",
                    get_matched_by(NP1, "arg_1",
                                   self.all_bare_transitive_verbs)))
            NP2 = choice(get_matches_of(V, "arg_2", all_common_nouns))
            D2 = choice(get_matched_by(NP2, "arg_1", self.safe_dets))
            V_control_out = choice(
                get_matched_by(
                    NP1, "arg_1",
                    get_matched_by(V, "arg_2", self.adj_control_subj_out)))
            V_raising_in = choice(self.adj_raising_subj_in)
            V_raising_out = choice(self.adj_raising_subj_out)
            to = "to"

        option = random.randint(0, 1)
        if option == 1:
            training_1_1 = " ".join([
                S1_the_subj, "and", D1[0], NP1[0], Aux1[0], V_control_in[0],
                to, V[0], D2[0], NP2[0], "."
            ])
        else:
            training_1_1 = " ".join([
                "the", NP1[0], Aux1[0], V_control_in[0], to, V[0], "the",
                NP2[0], "and", S1, "."
            ])

        option = random.randint(0, 1)
        if option == 1:
            test_0_1 = " ".join([
                S1_the_subj, "and", D1[0], NP1[0], Aux1[0], V_raising_out[0],
                to, V[0], D2[0], NP2[0], "."
            ])
        else:
            test_0_1 = " ".join([
                "the", NP1[0], Aux1[0], V_raising_out[0], to, V[0], D2[0],
                NP2[0], "and", S1, "."
            ])

        option = random.randint(0, 1)
        if option == 1:
            control_1_1 = " ".join([
                S1_the_subj, "and", D1[0], NP1[0], Aux1[0], V_control_out[0],
                to, V[0], D2[0], NP2[0], "."
            ])
        else:
            control_1_1 = " ".join([
                "the", NP1[0], Aux1[0], V_control_out[0], to, V[0], D2[0],
                NP2[0], "and", S1, "."
            ])

        option = random.randint(0, 1)
        if option == 1:
            control_0_1 = " ".join([
                S1_the_subj, "and", D1[0], NP1[0], Aux1[0], V_raising_in[0],
                to, V[0], D2[0], NP2[0], "."
            ])
        else:
            control_0_1 = " ".join([
                "the", NP1[0], Aux1[0], V_raising_in[0], to, V[0], D2[0],
                NP2[0], "and", S1, "."
            ])

        option = random.randint(0, 1)
        if option == 1:
            training_0_0 = " ".join([
                S1_the_obj, "and", D1[0], NP1[0], Aux1[0], V_raising_in[0], to,
                V[0], D2[0], NP2[0], "."
            ])
        else:
            training_0_0 = " ".join([
                D1[0], NP1[0], Aux1[0], V_raising_in[0], to, V[0], "the",
                NP2[0], "and", S1, "."
            ])

        option = random.randint(0, 1)
        if option == 1:
            test_1_0 = " ".join([
                S1_the_obj, "and", D1[0], NP1[0], Aux1[0], V_control_out[0],
                to, V[0], D2[0], NP2[0], "."
            ])
        else:
            test_1_0 = " ".join([
                D1[0], NP1[0], Aux1[0], V_control_out[0], to, V[0], "the",
                NP2[0], "and", S1, "."
            ])

        option = random.randint(0, 1)
        if option == 1:
            control_0_0 = " ".join([
                S1_the_obj, "and", D1[0], NP1[0], Aux1[0], V_raising_out[0],
                to, V[0], D2[0], NP2[0], "."
            ])
        else:
            control_0_0 = " ".join([
                D1[0], NP1[0], Aux1[0], V_raising_out[0], to, V[0], "the",
                NP2[0], "and", S1, "."
            ])

        option = random.randint(0, 1)
        if option == 1:
            control_1_0 = " ".join([
                S1_the_obj, "and", D1[0], NP1[0], Aux1[0], V_control_in[0], to,
                V[0], D2[0], NP2[0], "."
            ])
        else:
            control_1_0 = " ".join([
                D1[0], NP1[0], Aux1[0], V_control_in[0], to, V[0], "the",
                NP2[0], "and", S1, "."
            ])

        data = self.build_paradigm(training_1_1=training_1_1,
                                   training_0_0=training_0_0,
                                   test_1_0=test_1_0,
                                   test_0_1=test_0_1,
                                   control_1_1=control_1_1,
                                   control_0_0=control_0_0,
                                   control_1_0=control_1_0,
                                   control_0_1=control_0_1)

        track_sentence = [
            (NP1[0], Aux1[0], V_control_in[0], to, V[0], NP2[0], "."),
            (NP1[0], Aux1[0], V_raising_in[0], to, V[0], NP2[0], "."),
            (NP1[0], Aux1[0], V_control_out[0], to, V[0], NP2[0], "."),
            (NP1[0], Aux1[0], V_raising_out[0], to, V[0], NP2[0], "."),
            (NP1[0], Aux1[0], V_control_out[0], to, V[0], NP2[0], "."),
            (NP1[0], Aux1[0], V_raising_out[0], to, V[0], NP2[0], ".")
        ]

        return data, track_sentence
Beispiel #19
0
    def sample(self):
        # The cat   that was      eating the mice is        sleeping
        #     Subj  Rel  Aux_Emb  V_emb  Obj_emb  Aux_agree V_mat_agree args
        # The cat   that was      eating the mice are           sleeping
        #     Subj  Rel  Aux_Emb  V_emb  Obj_emb  Aux_not_agree V_mat_not_agree args

        V_emb = None
        while V_emb is None:
            V_mat_agree = choice(self.safe_mat_verbs)
            subj = N_to_DP_mutate(
                choice(get_matches_of(V_mat_agree, "arg_1",
                                      self.all_reg_nouns)))
            rel = choice(
                get_matched_by(subj, "arg_1", get_all("category_2", "rel")))
            if V_mat_agree["finite"] == "1":
                if V_mat_agree["3sg"] == "1":
                    V_mat_not_agree = choice(
                        get_all_conjunctive([("pres", "1"), ("3sg", "0")],
                                            get_all("root",
                                                    V_mat_agree["root"])))
                else:
                    V_mat_not_agree = choice(
                        get_all_conjunctive([("pres", "1"), ("3sg", "1")],
                                            get_all("root",
                                                    V_mat_agree["root"])))
            else:
                V_mat_not_agree = V_mat_agree

            if subj["pl"] == "1":
                obj_emb = N_to_DP_mutate(
                    choice(
                        get_matches_of(V_mat_not_agree, "arg_1",
                                       get_all_singular_nouns())))
            else:
                obj_emb = N_to_DP_mutate(
                    choice(
                        get_matches_of(V_mat_not_agree, "arg_1",
                                       get_all_plural_nouns())))

            try:
                V_emb = choice(
                    get_matched_by(
                        subj, "arg_1",
                        get_matched_by(obj_emb, "arg_2", self.safe_emb_verbs)))
            except IndexError:
                pass

        Aux_emb = return_aux(V_emb, subj)

        Auxs = require_aux_agree(V_mat_agree, subj)
        Aux_agree = Auxs["aux_agree"]
        Aux_not_agree = Auxs["aux_nonagree"]
        V_mat_args = verb_args_from_verb(V_mat_agree, subj=subj, aux=Aux_agree)

        if V_mat_agree["finite"] == "1":
            prefix = "%s %s %s %s %s" % (subj[0], rel[0], Aux_emb[0], V_emb[0],
                                         obj_emb[0])
            word_good = V_mat_agree[0]
            word_bad = V_mat_not_agree[0]
        else:
            prefix = "%s %s %s %s %s" % (subj[0], rel[0], Aux_emb[0], V_emb[0],
                                         obj_emb[0])
            word_good = Aux_agree
            word_bad = Aux_not_agree

        data = {
            "sentence_good":
            "%s %s %s %s %s %s %s %s." %
            (subj[0], rel[0], Aux_emb[0], V_emb[0], obj_emb[0], Aux_agree,
             V_mat_agree[0], join_args(V_mat_args["args"])),
            "sentence_bad":
            "%s %s %s %s %s %s %s %s." %
            (subj[0], rel[0], Aux_emb[0], V_emb[0], obj_emb[0], Aux_not_agree,
             V_mat_not_agree[0], join_args(V_mat_args["args"])),
            "one_prefix_prefix":
            prefix,
            "one_prefix_word_good":
            word_good,
            "one_prefix_word_bad":
            word_bad
        }
        return data, data["sentence_good"]
Beispiel #20
0
    def sample(self):
        # Both cats like mice

        V = choice(all_possibly_plural_verbs)
        V_bare = get_bare_form(V)
        N_subj = choice(get_matches_of(V, "arg_1", self.safe_nouns))
        N_subj_alt = choice(get_matches_of(V, "arg_1", self.safe_nouns),
                            avoid=N_subj)
        V_args = verb_args_from_verb(V,
                                     subj=N_subj,
                                     allow_negated=False,
                                     allow_modal=False,
                                     allow_quantifiers=False)
        RC = verb_phrase_from_subj(N_subj)
        rel = choice(get_matched_by(N_subj, "arg_1", all_relativizers))
        V_neg, Aux_neg = negate_VP(V, V_args["aux"])
        V_args = embed_V_args_under_modal(V_args)

        unembedded_trigger = "both %s %s %s %s %s %s." % (
            N_subj[0], rel[0], RC[0], V_args["aux"][0], V_args["verb"][0],
            join_args(V_args["args"]))
        negated_trigger = "both %s %s %s %s %s %s." % (
            N_subj[0], rel[0], RC[0], Aux_neg[0], V_neg[0],
            join_args(V_args["args"]))
        if V_args["aux_under_modal"] == None:
            modal_trigger = "both %s %s %s might %s %s." % (
                N_subj[0], rel[0], RC[0], V_bare[0], join_args(V_args["args"]))
        else:
            modal_trigger = "both %s %s %s might %s %s %s." % (
                N_subj[0], rel[0], RC[0], V_args["aux_under_modal"][0],
                V_args["verb_under_modal"][0], join_args(V_args["args"]))
        conditional_trigger = "if both %s %s %s %s %s %s, it's okay." % (
            N_subj[0], rel[0], RC[0], V_args["aux"][0], V_args["verb"][0],
            join_args(V_args["args"]))

        if V["finite"] == "1":
            do = get_do_form(V)
            interrogative_trigger = "%s both %s %s %s %s %s." % (
                do[0], N_subj[0], rel[0], RC[0], V_bare[0],
                join_args(V_args["args"]))
        else:
            interrogative_trigger = "%s both %s %s %s %s %s?" % (
                V_args["aux"][0], N_subj[0], rel[0], RC[0], V_args["verb"][0],
                join_args(V_args["args"]))

        presupposition = "there are exactly two %s %s %s" % (N_subj[0], rel[0],
                                                             RC[0])
        if np.random.choice([True, False]):
            negated_options = [
                "there are exactly three %s %s %s.",
                "There are more than two %s %s %s.",
                "There are dozens of %s %s %s."
            ]
            negated_presupposition = np.random.choice(negated_options) % (
                N_subj[0], rel[0], RC[0])
        else:
            negated_presupposition = "there aren't exactly two %s %s %s" % (
                N_subj[0], rel[0], RC[0])
        neutral_presupposition = "there are exactly two %s %s %s" % (
            N_subj_alt[0], rel[0], RC[0])

        data = self.build_presupposition_paradigm(
            unembedded_trigger=unembedded_trigger,
            negated_trigger=negated_trigger,
            interrogative_trigger=interrogative_trigger,
            modal_trigger=modal_trigger,
            conditional_trigger=conditional_trigger,
            presupposition=presupposition,
            negated_presupposition=negated_presupposition,
            neutral_presupposition=neutral_presupposition)
        return data, presupposition
    def sample(self):
        """
        Training 1/1
        The girl saw a cat and John is the tall man.
        The girl saw a cat and the tall man is in the room.
        The girl saw a cat and the man is tall.
        TThe girl saw a cat and the man in the room is tall.

        Training 0/0
        A girl saw a cat and John is a man.
        A girl saw a cat and John is the man in a room.
        A girl saw a cat and a man is John.

        Test 1/0
        A girl saw a cat and John is a tall man in a room.
        A girl saw a cat and John is tall.
        A girl saw a cat and a tall man is John.
        A girl saw a cat and a tall man in a room is John.
        A girl saw a cat and a tall man is president.
        A girl saw a cat and a tall man in the room is president.

        Test 0/1
        The girl saw a cat and John is in the room.
        The girl saw a cat and The man is in the room.
        The girl saw a cat and The man in the room is John.
        The girl saw a cat and John is president.
        The girl saw a cat and The man is president.
        The girl saw a cat and the man in the room is president.

        Control 1/1
        The girl saw a cat and John is a tall man in a room.
        The girl saw a cat and John is tall.
        The girl saw a cat and a tall man is John.
        The girl saw a cat and a tall man in a room is John.
        The girl saw a cat and a tall man is president.
        The girl saw a cat and a tall man in the room is president.

        Control 0/0
        A girl saw a cat and John is in a room.
        A girl saw a cat and a man is in a room.
        A girl saw a cat and a man in a room is John.
        A girl saw a cat and John is president.
        A girl saw a cat and a man is president.
        A girl saw a cat and a man in a room is president.
        """
        v_trans = choice(self.all_possibly_singular_transitive_verbs)
        subj = choice(get_matches_of(v_trans, "arg_1", self.all_singular_common_nouns))
        aux = return_aux(v_trans, subj)
        D_subj = choice(get_matched_by(subj, "arg_1", self.safe_determiners))
        obj = choice(get_matches_of(v_trans, "arg_2", self.all_singular_common_nouns))
        D_obj = choice(get_matched_by(obj, "arg_1", self.safe_determiners))
        S1 = " ".join([D_subj[0], subj[0], aux[0], v_trans[0], D_obj[0], obj[0], "and"])
        S1_the_subj = " ".join(["the", subj[0], aux[0], v_trans[0], D_obj[0], obj[0], "and"])
        S1_the_obj = " ".join([D_subj[0], subj[0], aux[0], v_trans[0], "the", obj[0], "and"])
        S1_a_subj = " ".join(["a", subj[0], aux[0], v_trans[0], D_obj[0], obj[0], "and"])
        S1_a_obj = " ".join([D_subj[0], subj[0], aux[0], v_trans[0], "a", obj[0], "and"])
        S1_the_a = " ".join(["the", subj[0], aux[0], v_trans[0], "a", obj[0], "and"])
        S1_a_the = " ".join(["a", subj[0], aux[0], v_trans[0], "the", obj[0], "and"])
        name_in = choice(self.names_in_domain)
        name_out = choice(self.names_out_domain)
        noun_in = choice(np.array(list(
            filter(lambda x: x["gender"] == name_in["gender"] or x["gender"] == "n" or x["gender"] == "",
                   self.common_nouns_in_domain))))
        noun_out = choice(np.array(list(
            filter(lambda x: x["gender"] == name_out["gender"] or x["gender"] == "n" or x["gender"] == "",
                   self.common_nouns_out_domain))))
        D_in = choice(get_matched_by(noun_in, "arg_1", self.safe_determiners))
        D_out = choice(get_matched_by(noun_out, "arg_1", self.safe_determiners))
        adj_in = choice(self.adjs_in_domain)
        adj_out = choice(self.adjs_out_domain)
        locative_in = build_locative(choice(self.locales_in_domain), allow_quantifiers=False, bind_det=True)
        locative_out = build_locative(choice(self.locales_out_domain), allow_quantifiers=False, bind_det=True)
        D_loc_in = choice(get_matched_by(locative_in, "arg_1", self.safe_determiners))
        D_loc_out = choice(get_matched_by(locative_out, "arg_1", self.safe_determiners))
        locative_in_d = locative_in[0] % D_loc_in[0]
        locative_out_d = locative_out[0] % D_loc_out[0]
        locative_in_the = locative_in[0] % "the"
        locative_out_the = locative_out[0] % "the"
        locative_in_a = locative_in[0] % "a"
        locative_out_a = locative_out[0] % "a"
        other_noun = choice(np.array(
            list(filter(lambda x: x["gender"] == name_out["gender"] or x["gender"] == "n", self.one_word_noun))))

        track_sentence = [
            (name_in[0], noun_in[0], adj_in[0], locative_in[0]),
            (name_in[0], noun_in[0], adj_in[0], locative_in[0]),
            (name_in[0], noun_in[0], adj_in[0], locative_in[0]),
            (name_in[0], noun_in[0], adj_in[0], locative_in[0]),
            (name_in[0], noun_in[0], adj_in[0], locative_in[0]),
            (name_in[0], noun_in[0], adj_in[0], locative_in[0]),
        ]

        # Training_1_1
        option = random.randint(0, 7)
        if option == 0:
            training_1 = " ".join([S1_the_subj, name_in[0], "is", "a", adj_in[0], noun_in[0]])
        elif option == 1:
            training_1 = " ".join([S1, "the", adj_in[0], noun_in[0], "is", locative_in_a])
        elif option == 2:
            training_1 = " ".join([S1_the_subj, D_in[0], adj_in[0], noun_in[0], "is", locative_in_a])
        elif option == 3:
            training_1 = " ".join([S1_the_subj, "a", adj_in[0], noun_in[0], "is", locative_in_d])
        elif option == 4:
            training_1 = " ".join([S1_the_subj, "a", noun_in[0], "is", adj_in[0]])
        elif option == 5:
            training_1 = " ".join([S1, "the", noun_in[0], locative_in_a, "is", adj_in[0]])
        elif option == 6:
            training_1 = " ".join([S1_the_subj, "a", noun_in[0], locative_in_d, "is", adj_in[0]])
        else:
            training_1 = " ".join([S1_the_subj, D_in[0], noun_in[0], locative_in_a, "is", adj_in[0]])

        # Training_0_0
        option = random.randint(0, 4)
        if option == 0:
            training_0 = " ".join([S1_a_subj, name_in[0], "is", "the", noun_in[0]])
        elif option == 1:
            training_0 = " ".join([S1_a_subj, name_in[0], "is", "the", noun_in[0], locative_in_d])
        elif option == 2:
            training_0 = " ".join([S1_a_subj, name_in[0], "is", D_in[0], noun_in[0], locative_in_the])
        elif option == 3:
            training_0 = " ".join([S1, name_in[0], "is", "a", noun_in[0], locative_in_the])
        else:
            training_0 = " ".join([S1_a_subj, "the", noun_in[0], "is", name_in[0]])

        # Control_1_0
        option = random.randint(0, 7)
        if option == 0:
            control_1_0 = " ".join([S1_a_subj, name_in[0], "is", "the", adj_in[0], noun_in[0]])
        elif option == 1:
            control_1_0 = " ".join([S1, "a", adj_in[0], noun_in[0], "is", locative_in_the])
        elif option == 2:
            control_1_0 = " ".join([S1_a_subj, D_in[0], adj_in[0], noun_in[0], "is", locative_in_the])
        elif option == 3:
            control_1_0 = " ".join([S1_a_subj, "the", adj_in[0], noun_in[0], "is", locative_in_d])
        elif option == 4:
            control_1_0 = " ".join([S1_a_subj, "the", noun_in[0], "is", adj_in[0]])
        elif option == 5:
            control_1_0 = " ".join([S1, "a", noun_in[0], locative_in_the, "is", adj_in[0]])
        elif option == 6:
            control_1_0 = " ".join([S1_a_subj, "the", noun_in[0], locative_in_d, "is", adj_in[0]])
        else:
            control_1_0 = " ".join([S1_a_subj, D_in[0], noun_in[0], locative_in_the, "is", adj_in[0]])

        # Control_0_1
        option = random.randint(0, 4)
        if option == 0:
            control_0_1 = " ".join([S1_the_subj, name_in[0], "is", "a", noun_in[0]])
        elif option == 1:
            control_0_1 = " ".join([S1_the_subj, name_in[0], "is", "a", noun_in[0], locative_in_d])
        elif option == 2:
            control_0_1 = " ".join([S1_the_subj, name_in[0], "is", D_in[0], noun_in[0], locative_in_a])
        elif option == 3:
            control_0_1 = " ".join([S1, name_in[0], "is", "the", noun_in[0], locative_in_a])
        else:
            control_0_1 = " ".join([S1_the_subj, "a", noun_in[0], "is", name_in[0]])

        # Test_1_0
        option = random.randint(0, 12)
        if option == 1:
            test_1_0 = " ".join([S1_a_obj, name_out[0], "is", "the", adj_out[0], noun_out[0], locative_out_d])
        elif option == 2:
            test_1_0 = " ".join([S1_a_obj, name_out[0], "is", D_out[0], adj_out[0], noun_out[0], locative_out_the])
        elif option == 3:
            test_1_0 = " ".join([S1_a_the, name_out[0], "is", D_out[0], adj_out[0], noun_out[0], locative_out_the])
        elif option == 4:
            test_1_0 = " ".join([S1_a_obj, "the", adj_out[0], noun_out[0], "is", name_out[0]])
        elif option == 5:
            test_1_0 = " ".join([S1_a_the, D_out[0], adj_out[0], noun_out[0], "is", name_out[0]])
        elif option == 6:
            test_1_0 = " ".join([S1_a_obj, "the", adj_out[0], noun_out[0], locative_out_d, "is", name_out[0]])
        elif option == 7:
            test_1_0 = " ".join([S1_a_obj, D_out[0], adj_out[0], noun_out[0], locative_out_the, "is", name_out[0]])
        elif option == 8:
            test_1_0 = " ".join([S1_a_the, D_out[0], adj_out[0], noun_out[0], locative_out_d, "is", name_out[0]])
        elif option == 9:
            test_1_0 = " ".join([S1_a_obj, "the", adj_out[0], noun_out[0], "is", other_noun[0]])
        elif option == 10:
            test_1_0 = " ".join([S1_a_the, D_out[0], adj_out[0], noun_out[0], "is", other_noun[0]])
        elif option == 11:
            test_1_0 = " ".join([S1_a_obj, "the", adj_out[0], noun_out[0], locative_out_d, "is", other_noun[0]])
        elif option == 12:
            test_1_0 = " ".join([S1_a_obj, D_out[0], adj_out[0], noun_out[0], locative_out_the, "is", other_noun[0]])
        else:
            test_1_0 = " ".join([S1_a_the, D_out[0], adj_out[0], noun_out[0], locative_out_d, "is", other_noun[0]])

        # Control_1_1
        option = random.randint(0, 12)
        if option == 0:
            control_1_1 = " ".join([S1_the_obj, name_out[0], "is", "a", adj_out[0], noun_out[0], locative_out_d])
        elif option == 1:
            control_1_1 = " ".join([S1_the_obj, name_out[0], "is", D_out[0], adj_out[0], noun_out[0], locative_out_a])
        elif option == 2:
            control_1_1 = " ".join([S1_the_a, name_out[0], "is", D_out[0], adj_out[0], noun_out[0], locative_out_d])
        elif option == 3:
            control_1_1 = " ".join([S1_the_obj, "a", adj_out[0], noun_out[0], "is", name_out[0]])
        elif option == 4:
            control_1_1 = " ".join([S1_the_a, D_out[0], adj_out[0], noun_out[0], "is", name_out[0]])
        elif option == 5:
            control_1_1 = " ".join([S1_the_obj, "a", adj_out[0], noun_out[0], locative_out_d, "is", name_out[0]])
        elif option == 6:
            control_1_1 = " ".join([S1_the_obj, D_out[0], adj_out[0], noun_out[0], locative_out_a, "is", name_out[0]])
        elif option == 7:
            control_1_1 = " ".join([S1_the_a, D_out[0], adj_out[0], noun_out[0], locative_out_d, "is", name_out[0]])
        elif option == 8:
            control_1_1 = " ".join([S1_the_obj, "a", adj_out[0], noun_out[0], "is", other_noun[0]])
        elif option == 9:
            control_1_1 = " ".join([S1_the_a, D_out[0], adj_out[0], noun_out[0], "is", other_noun[0]])
        elif option == 10:
            control_1_1 = " ".join([S1_the_obj, "a", adj_out[0], noun_out[0], locative_out_d, "is", other_noun[0]])
        elif option == 11:
            control_1_1 = " ".join([S1_the_obj, D_out[0], adj_out[0], noun_out[0], locative_out_a, "is", other_noun[0]])
        else:
            control_1_1 = " ".join([S1_the_a, D_out[0], adj_out[0], noun_out[0], locative_out_d, "is", other_noun[0]])

        # Test_0_1
        option = random.randint(0, 10)
        if option == 0:
            test_0_1 = " ".join([S1_the_obj, "a", noun_out[0], "is", locative_out_d])
        elif option == 1:
            test_0_1 = " ".join([S1_the_obj, D_out[0], noun_out[0], "is", locative_out_a])
        elif option == 2:
            test_0_1 = " ".join([S1_the_a, D_out[0], noun_out[0], "is", locative_out_d])
        elif option == 3:
            test_0_1 = " ".join([S1_the_obj, "a", noun_out[0], locative_out_d, "is", name_out[0]])
        elif option == 4:
            test_0_1 = " ".join([S1_the_obj, D_out[0], noun_out[0], locative_out_a, "is", name_out[0]])
        elif option == 5:
            test_0_1 = " ".join([S1_the_a, D_out[0], noun_out[0], locative_out_d, "is", name_out[0]])
        elif option == 6:
            test_0_1 = " ".join([S1_the_obj, "a", noun_out[0], "is", other_noun[0]])
        elif option == 7:
            test_0_1 = " ".join([S1_the_a, D_out[0], noun_out[0], "is", other_noun[0]])
        elif option == 8:
            test_0_1 = " ".join([S1_the_obj, "the", noun_out[0], locative_out_d, "is", other_noun[0]])
        elif option == 9:
            test_0_1 = " ".join([S1_the_obj, D_out[0], noun_out[0], locative_out_a, "is", other_noun[0]])
        else:
            test_0_1 = " ".join([S1_the_a, D_out[0], noun_out[0], locative_out_d, "is", other_noun[0]])

        # Control_0_0
        option = random.randint(0, 13)
        if option == 0:
            control_0_0 = " ".join([S1_a_obj, name_out[0], "is", locative_out_the])
        elif option == 1:
            control_0_0 = " ".join([S1_a_the, name_out[0], "is", locative_out_d])
        elif option == 2:
            control_0_0 = " ".join([S1_a_obj, "the", noun_out[0], "is", locative_out_d])
        elif option == 3:
            control_0_0 = " ".join([S1_a_obj, D_out[0], noun_out[0], "is", locative_out_the])
        elif option == 4:
            control_0_0 = " ".join([S1_a_the, D_out[0], noun_out[0], "is", locative_out_d])
        elif option == 5:
            control_0_0 = " ".join([S1_a_obj, "the", noun_out[0], locative_out_d, "is", name_out[0]])
        elif option == 6:
            control_0_0 = " ".join([S1_a_obj, D_out[0], noun_out[0], locative_out_the, "is", name_out[0]])
        elif option == 7:
            control_0_0 = " ".join([S1_a_the, D_out[0], noun_out[0], locative_out_d, "is", name_out[0]])
        elif option == 8:
            control_0_0 = " ".join([S1_a_the, name_out[0], "is", other_noun[0]])
        elif option == 9:
            control_0_0 = " ".join([S1_a_obj, "the", noun_out[0], "is", other_noun[0]])
        elif option == 10:
            control_0_0 = " ".join([S1_a_the, D_out[0], noun_out[0], "is", other_noun[0]])
        elif option == 11:
            control_0_0 = " ".join([S1_a_obj, "the", noun_out[0], locative_out_d, "is", other_noun[0]])
        elif option == 12:
            control_0_0 = " ".join([S1_a_obj, D_out[0], noun_out[0], locative_out_the, "is", other_noun[0]])
        else:
            control_0_0 = " ".join([S1_a_the, D_out[0], noun_out[0], locative_out_d, "is", other_noun[0]])

        data = self.build_paradigm(
            training_1_1=training_1 + ".",
            training_0_0=training_0 + ".",
            control_1_0=control_1_0 + ".",
            control_0_1=control_0_1 + ".",
            test_1_0=test_1_0 + ".",
            test_0_1=test_0_1 + ".",
            control_1_1=control_1_1 + ".",
            control_0_0=control_0_0 + ".",
        )
        return data, track_sentence
Beispiel #22
0
    def args_matching_3_verbs(self, v1, v2, v3, frequent=True, subj=None, aux=None, allow_negated=True, allow_modal=True,
                            allow_recursion=False, allow_quantifiers=True):
        """
        :param verb: a vocab entry for a verb
        :param frequent: should only frequent vocab be generated?
        :param subj: if supplied, the value of the subject in the returned dict. If None, a subject will be generated.
        :param aux: if supplied, the value of the auxiliary in the returned dict. If None, an auxiliary will be generated.
        :param allow_negated: should negated auxiliaries (e.g. has't) be generated?
        :param allow_modal: should modal auxiliaries (e.g. might) be generated?
        :param allow_recursion: for verbs that select for a clause or VP, should other clause/VP embedding verbs be generated in the embedded position?
        :param allow_quantifiers: should quantifiers (e.g. most, every) be generated as determiners for DPs?
        :return: dict of all arguments of verb: {subject:x1, auxiliary:x2, verb:x3, args:[arg_1, arg_2, ..., arg_n]}
        """
        args = {}
        if frequent:
            freq_vocab = get_all("frequent", "1")
        else:
            freq_vocab = vocab

        # all verbs have a subject
        if subj is None:
            args["subj"] = N_to_DP_mutate(choice(get_matches_of(v1, "arg_1",
                                                                get_matches_of(v2, "arg_1",
                                                                               get_matches_of(v3, "arg_1", (get_all("category", "N", freq_vocab)))))),
                                          allow_quantifiers=allow_quantifiers)
        else:
            args["subj"] = subj

        # all verbs have an auxiliary (or null)
        if aux is None:
            args["aux"] = return_aux(v1, args["subj"], allow_negated=allow_negated, allow_modal=allow_modal)
        else:
            args["aux"] = aux

        # INTRANSITIVE
        if v1["category"] == "S\\NP":
            args["args"] = []

        # TRANSITIVE
        if v1["category"] == "(S\\NP)/NP":
            args["args"] = [N_to_DP_mutate(choice(get_matches_of(v1, "arg_2",
                                                                 get_matches_of(v2, "arg_2",
                                                                                get_matches_of(v3, "arg_2",
                                                                                               get_all("category", "N", freq_vocab))))),
                                           allow_quantifiers=allow_quantifiers)]

        # # FROM-ING EMBEDDING
        # if v1["category"] == "(S\\NP)/(S[from]\\NP)":
        #     obj = N_to_DP_mutate(choice(get_matches_of(v1, "arg_2",
        #                                                get_matches_of(v2, "arg_2",
        #                                                               get_matches_of(v3, "arg_2", freq_vocab)))), allow_quantifiers=allow_quantifiers)
        #     if allow_recursion:
        #         VP = V_to_VP_mutate(choice(get_matched_by(obj, "arg_1", all_ing_verbs)), frequent=frequent, aux=False)
        #     else:
        #         safe_verbs = np.intersect1d(all_ing_verbs, all_non_recursive_verbs)
        #         VP = V_to_VP_mutate(choice(get_matched_by(obj, "arg_1", safe_verbs)), frequent=frequent, aux=False)
        #     VP[0] = "from " + VP[0]
        #     args["args"] = [obj, VP]
        #
        # # RAISING TO OBJECT
        # if v1["category_2"] == "V_raising_object":
        #     if allow_recursion:
        #         v_emb = choice(all_bare_verbs)
        #     else:
        #         safe_verbs = np.intersect1d(all_bare_verbs, all_non_recursive_verbs)
        #         v_emb = choice(safe_verbs)
        #     args_emb = verb_args_from_verb(v_emb, frequent)
        #     VP = V_to_VP_mutate(v_emb, frequent=frequent, args=args_emb, aux=False)
        #     VP[0] = "to " + VP[0]
        #     args["args"] = [args_emb["subj"], VP]
        #
        # # OBJECT CONTROL
        # if v1["category_2"] == "V_control_object":
        #     obj = N_to_DP_mutate(choice(get_matches_of(v1, "arg_2",
        #                                                get_matches_of(v2, "arg_2",
        #                                                               get_matches_of(v3, "arg_2")), allow_quantifiers=allow_quantifiers)))
        #     if allow_recursion:
        #         v_emb = choice(get_matched_by(obj, "arg_1", all_bare_verbs))
        #     else:
        #         safe_verbs = np.intersect1d(all_bare_verbs, all_non_recursive_verbs)
        #         v_emb = choice(get_matched_by(obj, "arg_1", safe_verbs))
        #     VP = V_to_VP_mutate(v_emb, frequent=frequent, aux=False)
        #     VP[0] = "to " + VP[0]
        #     args["args"] = [obj, VP]

        # CLAUSE EMBEDDING
        if v1["category"] == "(S\\NP)/S":
            emb_clause = make_sentence(frequent)
            if v1["arg_2"] == "expression_that":
                emb_clause[0] = "that " + emb_clause
            if v1["arg_2"] == "expression_wh":
                emb_clause[0] = "whether " + emb_clause
            args["args"] = [emb_clause]

        # # QUESTION EMBEDDING
        # if v1["category"] == "(S\\NP)/Q":
        #     args["args"] = [make_emb_subj_question(frequent)]
        #     # TODO: implement other kinds of questions
        #
        # # SUBJECT CONTROL
        # if v1["category"] == "(S\\NP)/(S[to]\\NP)":
        #     if allow_recursion:
        #         v_emb = choice(get_matched_by(args["subj"], "arg_1", all_bare_verbs))
        #     else:
        #         safe_verbs = np.intersect1d(all_bare_verbs, all_non_recursive_verbs)
        #         v_emb = choice(get_matched_by(args["subj"], "arg_1", safe_verbs))
        #     VP = V_to_VP_mutate(v_emb, frequent=frequent, aux=False)
        #     VP[0] = "to " + VP[0]
        #     args["args"] = [VP]
        #
        # # RAISING TO SUBJECT
        # if verb["category_2"] == "V_raising_subj":
        #     if allow_recursion:
        #         v_emb = choice(all_bare_verbs)
        #     else:
        #         safe_verbs = np.intersect1d(all_bare_verbs, all_non_recursive_verbs)
        #         v_emb = choice(safe_verbs)
        #     args_emb = verb_args_from_verb(v_emb, frequent, subj=False)
        #     VP = V_to_VP_mutate(v_emb, frequent=frequent, args=args_emb, aux=False)
        #     VP[0] = "to " + VP[0]
        #     args["args"] = [VP]

        return args
Beispiel #23
0
    def sample(self):
        # Training 1
        # John compelled         Mary to leave.
        # DP1  Aux1 V_control_in DP2  TO VP

        # Training 0
        # John wanted            Mary to leave.
        # DP1  Aux1 V_raising_in DP2  TO VP

        # Training 1
        # John convinced          Mary to leave.
        # DP1  Aux1 V_control_out DP2  TO VP

        # Training 0
        # John considered         Mary to leave.
        # DP1  Aux1 V_raising_out DP2  TO VP

        # V_trans = choice(all_transitive_verbs)
        # NP_trans_1 = choice(get_matches_of(V_trans, "arg_1", all_common_nouns))
        # NP_trans_2 = choice(get_matches_of(V_trans, "arg_2", all_common_nouns))
        # D_trans_1 = choice(get_matched_by(NP_trans_1, "arg_1", self.safe_dets))
        # D_trans_2 = choice(get_matched_by(NP_trans_2, "arg_1", self.safe_dets))
        # Aux_trans = return_aux(V_trans, NP_trans_1)
        # S1 = " ".join([D_trans_1[0], NP_trans_1[0], Aux_trans[0], V_trans[0], D_trans_2[0], NP_trans_2[0]])

        option = random.choice([1, 2, 3])
        if option == 1:  # subject control/raising
            V_control_in = choice(self.v_control_subj_in)
            DP1 = N_to_DP_mutate(choice(get_matches_of(V_control_in, "arg_1")))
            Aux1 = return_aux(V_control_in, DP1)
            VP = V_to_VP_mutate(choice(
                get_matches_of(
                    V_control_in, "arg_2",
                    get_matched_by(DP1, "arg_1",
                                   self.all_bare_transitive_verbs))),
                                aux=False)
            V_control_out = choice(
                get_matched_by(
                    DP1, "arg_1",
                    get_matches_of(
                        Aux1, "arg_2",
                        get_matched_by(VP, "arg_2", self.v_control_subj_out))))
            V_raising_in = choice(
                get_matched_by(
                    DP1, "arg_1",
                    get_matches_of(
                        Aux1, "arg_2",
                        get_matched_by(VP, "arg_2", self.v_raising_subj_in))))
            V_raising_out = choice(
                get_matched_by(
                    DP1, "arg_1",
                    get_matches_of(
                        Aux1, "arg_2",
                        get_matched_by(VP, "arg_2", self.v_raising_subj_out))))
            to = "to"
        elif option == 2:
            V_control_in = choice(self.v_control_obj_in)
            DP1 = N_to_DP_mutate(choice(get_matches_of(V_control_in, "arg_1")))
            Aux1 = return_aux(V_control_in, DP1)
            DP2 = N_to_DP_mutate(choice(get_matches_of(V_control_in, "arg_2")))
            VP = V_to_VP_mutate(choice(
                get_matches_of(
                    V_control_in, "arg_3",
                    get_matched_by(DP2, "arg_1",
                                   self.all_bare_transitive_verbs))),
                                aux=False)
            V_control_out = choice(
                get_matched_by(
                    DP1, "arg_1",
                    get_matches_of(
                        Aux1, "arg_2",
                        get_matched_by(
                            VP, "arg_3",
                            get_matched_by(DP2, "arg_2",
                                           self.v_control_obj_out)))))
            V_raising_in = choice(
                get_matched_by(
                    DP1, "arg_1",
                    get_matches_of(Aux1, "arg_2", self.v_raising_obj_in)))
            V_raising_out = choice(
                get_matched_by(
                    DP1, "arg_1",
                    get_matches_of(Aux1, "arg_2", self.v_raising_obj_out)))
            to = DP2[0] + " to"

        else:
            V_control_in = choice(self.adj_control_subj_in)
            DP1 = N_to_DP_mutate(choice(get_matches_of(V_control_in, "arg_1")))
            Aux1 = return_copula(DP1)
            VP = V_to_VP_mutate(choice(
                get_matches_of(
                    V_control_in, "arg_2",
                    get_matched_by(DP1, "arg_1",
                                   self.all_bare_transitive_verbs))),
                                aux=False)
            V_control_out = choice(
                get_matched_by(
                    DP1, "arg_1",
                    get_matched_by(VP, "arg_2", self.adj_control_subj_out)))
            V_raising_in = choice(self.adj_raising_subj_in)
            V_raising_out = choice(self.adj_raising_subj_out)
            to = "to"

        training_1 = " ".join([DP1[0], Aux1[0], V_control_in[0], to, VP[0]])
        training_0 = " ".join([DP1[0], Aux1[0], V_raising_in[0], to, VP[0]])
        test_1 = " ".join([DP1[0], Aux1[0], V_control_out[0], to, VP[0]])
        test_0 = " ".join([DP1[0], Aux1[0], V_raising_out[0], to, VP[0]])

        long_subordinate_clause, short_subordinate_clause = self.build_dependent_clauses(
            [training_1, training_0, test_0, test_1])

        data = self.build_paradigm(
            training_1_1="%s, %s." % (long_subordinate_clause, training_1),
            training_0_0="%s, %s." % (short_subordinate_clause, training_0),
            control_1_0="%s, %s." % (short_subordinate_clause, training_1),
            control_0_1="%s, %s." % (long_subordinate_clause, training_0),
            test_1_0="%s, %s." % (short_subordinate_clause, test_1),
            test_0_1="%s, %s." % (long_subordinate_clause, test_0),
            control_1_1="%s, %s." % (long_subordinate_clause, test_1),
            control_0_0="%s, %s." % (short_subordinate_clause, test_0),
        )

        track_sentence = [(DP1[0], Aux1[0], V_control_in[0], to, VP[0]),
                          (DP1[0], Aux1[0], V_raising_in[0], to, VP[0]),
                          (DP1[0], Aux1[0], V_control_out[0], to, VP[0]),
                          (DP1[0], Aux1[0], V_raising_out[0], to, VP[0])]

        return data, track_sentence
    def sample(self):
        # The doctor of the men is        helping some people.
        # D   Subj      S_arg   Aux_agree V_agree args
        # The doctor of the men are           helping     some people.
        # D   Subj      S_arg   Aux_not_agree V_not_agree args

        S_arg = None
        while S_arg is None:
            subj = choice(self.safe_subjs)
            D = choice(get_matched_by(subj, "arg_1", all_very_common_dets))
            V_agree = choice(get_matched_by(subj, "arg_1", self.safe_verbs))
            if V_agree["finite"] == "1":
                if V_agree["3sg"] == "1":
                    V_not_agree = choice(
                        get_all_conjunctive([("pres", "1"), ("3sg", "0")],
                                            get_all("root", V_agree["root"])))
                else:
                    V_not_agree = choice(
                        get_all_conjunctive([("pres", "1"), ("3sg", "1")],
                                            get_all("root", V_agree["root"])))
            else:
                V_not_agree = V_agree

            try:
                if subj["pl"] == "1":
                    S_arg = N_to_DP_mutate(
                        choice(
                            get_matches_of(
                                V_not_agree, "arg_1",
                                get_matches_of(subj, "arg_1",
                                               all_singular_nouns))))
                    pass
                else:
                    S_arg = N_to_DP_mutate(
                        choice(
                            get_matches_of(
                                V_not_agree, "arg_1",
                                get_matches_of(subj, "arg_1",
                                               all_plural_nouns))))
                    pass
            except Exception:
                continue

        Auxs = require_aux_agree(V_agree, subj)
        Aux_agree = Auxs["aux_agree"]
        Aux_not_agree = Auxs["aux_nonagree"]
        V_args = verb_args_from_verb(V_agree, subj=subj, aux=Aux_agree)

        if V_agree["finite"] == "1":
            prefix = "%s %s %s" % (D[0], subj[0], S_arg[0])
            word_good = V_agree[0]
            word_bad = V_not_agree[0]
        else:
            prefix = "%s %s %s" % (D[0], subj[0], S_arg[0])
            word_good = Aux_agree
            word_bad = Aux_not_agree

        data = {
            "sentence_good":
            "%s %s %s %s %s %s." % (D[0], subj[0], S_arg[0], Aux_agree,
                                    V_agree[0], join_args(V_args["args"])),
            "sentence_bad":
            "%s %s %s %s %s %s." % (D[0], subj[0], S_arg[0], Aux_not_agree,
                                    V_not_agree[0], join_args(V_args["args"])),
            "one_prefix_prefix":
            prefix,
            "one_prefix_word_good":
            word_good,
            "one_prefix_word_bad":
            word_bad
        }
        return data, data["sentence_good"]
Beispiel #25
0
    def sample(self):
        # Training 1
        # John compelled         Mary to leave.
        # DP1  Aux1 V_control_in DP2  TO VP

        # Training 0
        # John wanted            Mary to leave.
        # DP1  Aux1 V_raising_in DP2  TO VP

        # Training 1
        # John convinced          Mary to leave.
        # DP1  Aux1 V_control_out DP2  TO VP

        # Training 0
        # John considered         Mary to leave.
        # DP1  Aux1 V_raising_out DP2  TO VP
        option = random.choice([1, 2, 3])
        if option == 1:  # subject control/raising
            V_control_in = choice(self.v_control_subj_in)
            DP1 = N_to_DP_mutate(choice(get_matches_of(V_control_in, "arg_1")))
            Aux1 = return_aux(V_control_in, DP1)
            VP = V_to_VP_mutate(choice(
                get_matches_of(V_control_in, "arg_2",
                               get_matched_by(DP1, "arg_1", all_bare_verbs))),
                                aux=False)
            V_control_out = choice(
                get_matched_by(
                    DP1, "arg_1",
                    get_matches_of(
                        Aux1, "arg_2",
                        get_matched_by(VP, "arg_2", self.v_control_subj_out))))
            V_raising_in = choice(
                get_matched_by(
                    DP1, "arg_1",
                    get_matches_of(
                        Aux1, "arg_2",
                        get_matched_by(VP, "arg_2", self.v_raising_subj_in))))
            V_raising_out = choice(
                get_matched_by(
                    DP1, "arg_1",
                    get_matches_of(
                        Aux1, "arg_2",
                        get_matched_by(VP, "arg_2", self.v_raising_subj_out))))
            to = "to"
        elif option == 2:
            V_control_in = choice(self.v_control_obj_in)
            DP1 = N_to_DP_mutate(choice(get_matches_of(V_control_in, "arg_1")))
            Aux1 = return_aux(V_control_in, DP1)
            DP2 = N_to_DP_mutate(choice(get_matches_of(V_control_in, "arg_2")))
            VP = V_to_VP_mutate(choice(
                get_matches_of(V_control_in, "arg_3",
                               get_matched_by(DP2, "arg_1", all_bare_verbs))),
                                aux=False)
            V_control_out = choice(
                get_matched_by(
                    DP1, "arg_1",
                    get_matches_of(
                        Aux1, "arg_2",
                        get_matched_by(
                            VP, "arg_3",
                            get_matched_by(DP2, "arg_2",
                                           self.v_control_obj_out)))))
            V_raising_in = choice(
                get_matched_by(
                    DP1, "arg_1",
                    get_matches_of(Aux1, "arg_2", self.v_raising_obj_in)))
            V_raising_out = choice(
                get_matched_by(
                    DP1, "arg_1",
                    get_matches_of(Aux1, "arg_2", self.v_raising_obj_out)))
            to = DP2[0] + " to"

        else:
            V_control_in = choice(self.adj_control_subj_in)
            DP1 = N_to_DP_mutate(choice(get_matches_of(V_control_in, "arg_1")))
            Aux1 = return_copula(DP1)
            VP = V_to_VP_mutate(choice(
                get_matches_of(V_control_in, "arg_2",
                               get_matched_by(DP1, "arg_1", all_bare_verbs))),
                                aux=False)
            V_control_out = choice(
                get_matched_by(
                    DP1, "arg_1",
                    get_matched_by(VP, "arg_2", self.adj_control_subj_out)))
            V_raising_in = choice(self.adj_raising_subj_in)
            V_raising_out = choice(self.adj_raising_subj_out)
            to = "to"

        data = self.build_paradigm(
            training_1_1=" ".join(
                [DP1[0], Aux1[0], V_control_in[0], to, VP[0], "."]),
            training_0_0=" ".join(
                [DP1[0], Aux1[0], V_raising_in[0], to, VP[0], "."]),
            test_1_0=" ".join(
                [DP1[0], Aux1[0], V_control_out[0], to, VP[0], "."]),
            test_0_1=" ".join(
                [DP1[0], Aux1[0], V_raising_out[0], to, VP[0], "."]),
        )

        track_sentence = [(DP1[0], Aux1[0], V_control_in[0], to, VP[0], "."),
                          (DP1[0], Aux1[0], V_raising_in[0], to, VP[0], "."),
                          (DP1[0], Aux1[0], V_control_out[0], to, VP[0], "."),
                          (DP1[0], Aux1[0], V_raising_out[0], to, VP[0], ".")]

        return data, track_sentence
    def sample(self):
        # Training 1/1
        # I     think         that    John found  the apparition.
        # first cp_verb_first THAT D1 NP1  verb_1 Dt  APPARITION

        # Training 0/0
        # They      think             that    John found  the hairdresser.
        # non_first cp_verb_non_first THAT D1 NP1  verb_1 D2  NP2

        # Test 1/0
        #    John thinks    that the hairdresser found  me.
        # D1 NP1  cp_verb_1 THAT D2  NP2         verb_2 first_acc

        # Test 0/1
        #    John thinks    that the apparition found  them.
        # D1 NP1  cp_verb_1 THAT Dt  APPARITION verb_t non_first_acc

        # Control 1/1
        #    John thinks    that the apparition found  me.
        # D1 NP1  cp_verb_1 THAT Dt  APPARITION verb_t first_acc

        # Control 0/0
        #    John thinks    that the hairdresser found  them.
        # D1 NP1  cp_verb_1 THAT D2  NP2         verb_2 non_first_acc

        first, non_first, first_acc, non_first_acc = self.get_pronouns()
        NP1 = choice(
            np.setdiff1d(all_animate_nouns, get_all("expression",
                                                    "apparition")))
        NP2 = choice(self.safe_animate_common_nouns, avoid=NP1)
        D1 = choice(get_matched_by(NP1, "arg_1", self.dets))
        D2 = choice(get_matched_by(NP2, "arg_1", self.dets))
        Dt = choice(get_matched_by(self.target_lexicon, "arg_1", self.dets))
        cp_verb = choice(self.cp_verb)
        cp_verb_aux = return_aux(cp_verb, first)
        cp_verb_first = re_conjugate(cp_verb, first, cp_verb_aux)
        cp_verb_non_first = re_conjugate(cp_verb, non_first, cp_verb_aux)
        cp_verb_1 = re_conjugate(cp_verb, NP1, cp_verb_aux)
        verb = choice(self.trans_verb)
        verb_aux = return_aux(verb, NP1)
        verb_1 = re_conjugate(verb, NP1, verb_aux)
        verb_2 = re_conjugate(verb, NP2, verb_aux)
        # t for target_exicon
        verb_t = re_conjugate(verb, self.target_lexicon, verb_aux)

        track_sentence = [
            (first[0], cp_verb[0], NP1[0], verb[0]),  #training 1/1
            (non_first[0], cp_verb[0], NP1[0], verb[0], NP2[0]),  #training 0/0
            (NP1[0], cp_verb[0], NP2[0], verb[0], first_acc[0]),  #Test 1/0
            (NP1[0], cp_verb[0], verb[0], non_first_acc[0]),  #Test 0/1
            (NP1[0], cp_verb[0], verb[0], first_acc[0]),  #Control 1/1
            (NP1[0], cp_verb[0], NP2[0], verb[0], non_first_acc[0]
             )  #Control 0/0
        ]

        data = self.build_paradigm(
            training_1_1="%s %s that %s %s %s %s apparition" %
            (first[0], cp_verb_first[0], D1[0], NP1[0], verb_1[0], Dt[0]),
            training_0_0="%s %s that %s %s %s %s %s" %
            (non_first[0], cp_verb_non_first[0], D1[0], NP1[0], verb_1[0],
             D2[0], NP2[0]),
            test_1_0="%s %s %s that %s %s %s %s" %
            (D1[0], NP1[0], cp_verb_1[0], D2[0], NP2[0], verb_2[0],
             first_acc[0]),
            test_0_1="%s %s %s that %s apparition %s %s" %
            (D1[0], NP1[0], cp_verb_1[0], Dt[0], verb_t[0], non_first_acc[0]),
            control_1_1="%s %s %s that %s apparition %s %s" %
            (D1[0], NP1[0], cp_verb_1[0], Dt[0], verb_t[0], first_acc[0]),
            control_0_0="%s %s %s that %s %s %s %s" %
            (D1[0], NP1[0], cp_verb_1[0], D2[0], NP2[0], verb_2[0],
             non_first_acc[0]))
        return data, track_sentence
Beispiel #27
0
    def sample_CP_verb_RC(self):

        V1 = choice(self.CP_verbs_non_ing)
        V1_ing = self.get_ing_form(V1)
        NP1 = choice(get_matches_of(V1, "arg_1", self.safe_nouns))
        D1 = choice(get_matched_by(NP1, "arg_1", all_very_common_dets))
        V1 = conjugate(V1, NP1)
        V1_ing = conjugate(V1_ing, NP1)

        V2 = choice(self.all_non_ing_transitive_verbs)
        V2_ing = self.get_ing_form(V2)
        NP2 = choice(get_matches_of(V2, "arg_1", self.safe_nouns))
        V2 = conjugate(V2, NP2)
        V2_ing = conjugate(V2_ing, NP2)
        D2 = choice(get_matched_by(NP2, "arg_1", all_very_common_dets))
        NP3 = choice(get_matches_of(V2, "arg_2", self.safe_nouns))
        D3 = choice(get_matched_by(NP3, "arg_1", all_very_common_dets))


        if bool(random.randint(0, 1)):
            RC1, _, V_RC1, V_RC1_ing = self.subject_relative_clause(NP1)
        else:
            RC1, _, V_RC1, V_RC1_ing = self.object_relative_clause(NP1)

        if bool(random.randint(0, 1)):
            RC2, _, V_RC2, V_RC2_ing = self.subject_relative_clause(NP2)
        else:
            RC2, _, V_RC2, V_RC2_ing = self.object_relative_clause(NP2)

        if bool(random.randint(0, 1)):
            RC3, _, V_RC3, V_RC3_ing = self.subject_relative_clause(NP3)
        else:
            RC3, _, V_RC3, V_RC3_ing = self.object_relative_clause(NP3)


        S1 = " ".join([D1[0], "%s", NP1[0], "%s", V1[0], "that", D2[0], "%s", NP2[0], V2[0], D3[0], "%s", NP3[0]])

        track_sentence = [
            (S1, RC1, RC2, RC3),
            (S1, RC1, RC2, RC3)
        ]

        data = []

        option = random.randint(0, 2)
        if option == 0:
            data.append(" ".join([D1[0], NP1[0], RC1 % V_RC1, V1_ing[0], "that", D2[0], NP2[0], V2[0], D3[0], NP3[0]]))
        elif option == 1:
            data.append(" ".join([D1[0], NP1[0], V1_ing[0], "that", D2[0], NP2[0], RC2 % V_RC2, V2[0], D3[0], NP3[0]]))
        else:
            data.append(" ".join([D1[0], NP1[0], V1_ing[0], "that", D2[0], NP2[0], V2[0], D3[0], NP3[0], RC3 % V_RC3]))

        option = random.randint(0, 5)
        if option == 0:
            data.append(" ".join([D1[0], NP1[0], RC1 % V_RC1_ing, V1[0], "that", D2[0], NP2[0], V2[0], D3[0], NP3[0]]))
        elif option == 1:
            data.append(" ".join([D1[0], NP1[0], RC1 % V_RC1, V1[0], "that", D2[0], NP2[0], V2_ing[0], D3[0], NP3[0]]))
        elif option == 2:
            data.append(" ".join([D1[0], NP1[0], V1[0], "that", D2[0], NP2[0], RC2 % V_RC2_ing, V2[0], D3[0], NP3[0]]))
        elif option == 3:
            data.append(" ".join([D1[0], NP1[0], V1[0], "that", D2[0], NP2[0], RC2 % V_RC2, V2_ing[0], D3[0], NP3[0]]))
        elif option == 4:
            data.append(" ".join([D1[0], NP1[0], V1[0], "that", D2[0], NP2[0], V2[0], D3[0], NP3[0], RC3 % V_RC3_ing]))
        else:
            data.append(" ".join([D1[0], NP1[0], V1[0], "that", D2[0], NP2[0], V2_ing[0], D3[0], NP3[0], RC3 % V_RC3]))

        return data, track_sentence
    def sample_CP_noun_RC(self):

        NP1 = choice(self.CP_nouns)
        V1 = choice(
            get_matched_by(NP1, "arg_1", self.all_non_ing_transitive_verbs))
        V1_ing = self.get_ing_form(V1)
        V1 = conjugate(V1, NP1)
        V1_ing = conjugate(V1_ing, NP1)
        D1 = choice(get_matched_by(NP1, "arg_1", self.safe_dets))
        NP2 = choice(get_matches_of(V1, "arg_2", self.safe_nouns))
        D2 = choice(get_matched_by(NP2, "arg_1", self.safe_dets))

        V_emb = choice(self.all_non_ing_transitive_verbs)
        V_emb_ing = self.get_ing_form(V_emb)
        NP1_emb = choice(get_matches_of(V_emb, "arg_1", self.safe_nouns))
        V_emb = conjugate(V_emb, NP1_emb)
        V_emb_ing = conjugate(V_emb_ing, NP1_emb)
        D1_emb = choice(get_matched_by(NP1_emb, "arg_1", self.safe_dets))
        NP2_emb = choice(get_matches_of(V_emb, "arg_2", self.safe_nouns))
        D2_emb = choice(get_matched_by(NP2_emb, "arg_1", self.safe_dets))

        RC2, V_RC2, V_RC2_ing = self.subject_relative_clause_intransitive(NP2)
        RC1_emb, V_RC1_emb, V_RC1_emb_ing = self.subject_relative_clause_intransitive(
            NP1_emb)
        RC2_emb, V_RC2_emb, V_RC2_emb_ing = self.subject_relative_clause_intransitive(
            NP2_emb)

        S1 = " ".join([
            D1[0], NP1[0], NP1_emb[0], V_emb[0], NP2_emb[0], V1[0], D2[0],
            NP2[0]
        ])
        track_sentence = [(S1), (S1)]

        data = []
        Ds = []
        option = random.randint(0, 2)
        if option == 0:
            data.append(" ".join([
                "%s", NP1[0], "that", "%s", NP1_emb[0], RC1_emb % V_RC1_emb,
                V_emb[0], "%s", NP2_emb[0], V1_ing[0], "%s", NP2[0]
            ]))
        elif option == 1:
            data.append(" ".join([
                "%s", NP1[0], "that", "%s", NP1_emb[0], V_emb[0], "%s",
                NP2_emb[0], RC2_emb % V_RC2_emb, V1_ing[0], "%s", NP2[0]
            ]))
        else:
            data.append(" ".join([
                "%s", NP1[0], "that", "%s", NP1_emb[0], V_emb[0], "%s",
                NP2_emb[0], V1_ing[0], "%s", NP2[0], RC2 % V_RC2
            ]))

        option = random.randint(0, 5)
        if option == 0:
            data.append(" ".join([
                "%s", NP1[0], "that", "%s", NP1_emb[0], RC1_emb % V_RC1_emb,
                V_emb_ing[0], "%s", NP2_emb[0], V1[0], "%s", NP2[0]
            ]))
        elif option == 1:
            data.append(" ".join([
                "%s", NP1[0], "that", "%s", NP1_emb[0], V_emb_ing[0], "%s",
                NP2_emb[0], RC2_emb % V_RC2_emb, V1[0], "%s", NP2[0]
            ]))
        elif option == 2:
            data.append(" ".join([
                "%s", NP1[0], "that", "%s", NP1_emb[0], V_emb_ing[0], "%s",
                NP2_emb[0], V1[0], "%s", NP2[0], RC2 % V_RC2
            ]))

        elif option == 3:
            data.append(" ".join([
                "%s", NP1[0], "that", "%s", NP1_emb[0],
                RC1_emb % V_RC1_emb_ing, V_emb[0], "%s", NP2_emb[0], V1[0],
                "%s", NP2[0]
            ]))
        elif option == 4:
            data.append(" ".join([
                "%s", NP1[0], "that", "%s", NP1_emb[0], V_emb[0], "%s",
                NP2_emb[0], RC2_emb % V_RC2_emb_ing, V1[0], "%s", NP2[0]
            ]))
        else:
            data.append(" ".join([
                "%s", NP1[0], "that", "%s", NP1_emb[0], V_emb[0], "%s",
                NP2_emb[0], V1[0], "%s", NP2[0], RC2 % V_RC2_ing
            ]))

        Ds.append((D1[0], D1_emb[0], D2_emb[0], D2[0]))
        Ds.append((D1[0], D1_emb[0], D2_emb[0], D2[0]))

        return data, track_sentence, Ds
    def sample(self):

        V1 = choice(self.all_safe_verbs)
        subj = choice(get_matches_of(V1, "arg_1", all_common_nouns))
        aux = return_aux(V1, subj)
        D_subj = choice(get_matched_by(subj, "arg_1",
                                       all_frequent_determiners))
        obj = choice(get_matches_of(V1, "arg_2", all_common_nouns))
        D_obj = choice(get_matched_by(obj, "arg_1", all_frequent_determiners))
        S1 = " ".join(
            [D_subj[0], subj[0], aux[0], V1[0], D_obj[0], obj[0], "and"])

        V_past_in = choice(self.irr_past_verbs_in_domain)
        subj2 = choice(get_matches_of(V_past_in, "arg_1", all_plural_nouns))
        D_subj2 = choice(
            get_matched_by(subj2, "arg_1", all_frequent_determiners))
        obj2_in = choice(get_matches_of(V_past_in, "arg_2", all_common_nouns))
        D_obj2_in = choice(
            get_matched_by(obj2_in, "arg_1", all_frequent_determiners))
        V_pres_in = choice(
            get_matched_by(
                subj2, "arg_1",
                get_matched_by(obj2_in, "arg_2",
                               self.present_plural_verbs_in_domain)))

        try:
            V_past_out = choice(
                get_matched_by(subj2, "arg_1", self.irr_past_verbs_out_domain))
            obj2_out = choice(
                get_matches_of(V_past_out, "arg_2", all_common_nouns))
            D_obj2_out = choice(
                get_matched_by(obj2_out, "arg_1", all_frequent_determiners))
            V_pres_out = choice(
                get_matched_by(
                    subj2, "arg_1",
                    get_matched_by(obj2_out, "arg_2",
                                   self.present_plural_verbs_out_domain)))
        except IndexError:
            raise MatchNotFoundError("")

        track_sentence = [
            (S1, D_subj2[0], subj2[0], V_past_in[0], D_obj2_in[0], obj2_in[0]),
            (S1, D_subj2[0], subj2[0], V_pres_in[0], D_obj2_in[0], obj2_in[0]),
            (S1, D_subj2[0], subj2[0], V_past_out[0], D_obj2_out[0],
             obj2_out[0]),
            (S1, D_subj2[0], subj2[0], V_pres_out[0], D_obj2_out[0],
             obj2_out[0])
        ]

        in_domain_1 = " ".join(
            [D_subj2[0], subj2[0], V_past_in[0], D_obj2_in[0], obj2_in[0]])
        in_domain_0 = " ".join(
            [D_subj2[0], subj2[0], V_pres_in[0], D_obj2_in[0], obj2_in[0]])
        out_domain_1 = " ".join(
            [D_subj2[0], subj2[0], V_past_out[0], D_obj2_out[0], obj2_out[0]])
        out_domain_0 = " ".join(
            [D_subj2[0], subj2[0], V_pres_out[0], D_obj2_out[0], obj2_out[0]])

        data = self.build_paradigm(
            training_1_1=titlecase(in_domain_1),
            training_0_0=in_domain_0,
            test_1_0=out_domain_1,
            test_0_1=titlecase(out_domain_0),
            control_1_0=in_domain_1,
            control_0_1=titlecase(in_domain_0),
            control_1_1=titlecase(out_domain_1),
            control_0_0=out_domain_0,
        )
        return data, track_sentence
Beispiel #30
0
    def sample(self):
        """
        Training 1
        John is the tall man.
        The tall man is in the room.
        The man is tall.
        The man in the room is tall.

        Training 0
        John is the man.
        John is the man in the room.
        The man is John.

        Test 1
        John is the tall man in the room.
        John is tall.
        The tall man is John.
        The tall man in the room is John.
        The tall man is president.
        The tall man in the room is president.

        Test 0
        John is in the room.
        The man is in the room.
        The man in the room is John.
        John is president.
        The man is president.
        the man in the room is president.
        """
        name_in = choice(self.names_in_domain)
        name_out = choice(self.names_out_domain)
        noun_in = choice(np.array(list(filter(lambda x: x["gender"] == name_in["gender"] or x["gender"] == "n" or x["gender"] == "", self.common_nouns_in_domain))))
        noun_out = choice(np.array(list(filter(lambda x: x["gender"] == name_out["gender"] or x["gender"] == "n" or x["gender"] == "", self.common_nouns_out_domain))))
        adj_in = choice(self.adjs_in_domain)
        adj_out = choice(self.adjs_out_domain)
        locative_in = build_locative(choice(self.locales_in_domain), allow_quantifiers=False)
        locative_out = build_locative(choice(self.locales_out_domain), allow_quantifiers=False)
        other_noun = choice(np.array(list(filter(lambda x: x["gender"] == name_out["gender"] or x["gender"] == "n", self.one_word_noun))))

        track_sentence = [
            (name_in[0], noun_in[0], adj_in[0], locative_in[0]),
            (name_in[0], noun_in[0], adj_in[0], locative_in[0]),
            (name_in[0], noun_in[0], adj_in[0], locative_in[0]),
            (name_in[0], noun_in[0], adj_in[0], locative_in[0]),
            (name_in[0], noun_in[0], adj_in[0], locative_in[0]),
            (name_in[0], noun_in[0], adj_in[0], locative_in[0]),
        ]

        option = random.choice([1, 2, 3, 4])
        if option == 1:
            training_1 = " ".join([name_in[0], "is", "the", adj_in[0], noun_in[0]])
        elif option == 2:
            training_1 = " ".join(["the", adj_in[0], noun_in[0], "is", locative_in[0]])
        elif option == 3:
            training_1 = " ".join(["the", noun_in[0], "is", adj_in[0]])
        else:
            training_1 = " ".join(["the", noun_in[0], locative_in[0], "is", adj_in[0]])

        option = random.choice([1, 2, 3])
        if option == 1:
            training_0 = " ".join([name_in[0], "is", "the", noun_in[0]])
        elif option == 2:
            training_0 = " ".join([name_in[0], "is", "the", noun_in[0], locative_in[0]])
        else:
            training_0 = " ".join(["the", noun_in[0], "is", name_in[0]])

        option = random.choice([1, 2, 3, 4, 5, 6])
        if option == 1:
            test_1 = " ".join([name_out[0], "is", "the", adj_out[0], noun_out[0], locative_out[0]])
        elif option == 2:
            test_1 = " ".join([name_out[0], "is", adj_out[0]])
        elif option == 3:
            test_1 = " ".join(["the", adj_out[0], noun_out[0], "is", name_out[0]])
        elif option == 4:
            test_1 = " ".join(["the", adj_out[0], noun_out[0], locative_out[0], "is", name_out[0]])
        elif option == 5:
            test_1 = " ".join(["the", adj_out[0], noun_out[0], "is", other_noun[0]])
        else:
            test_1 = " ".join(["the", adj_out[0], noun_out[0], locative_out[0], "is", other_noun[0]])

        option = random.choice([1, 2, 3, 4, 5, 6])
        if option == 1:
            test_0 = " ".join([name_out[0], "is", locative_out[0]])
        elif option == 2:
            test_0 = " ".join(["the", noun_out[0], "is", locative_out[0]])
        elif option == 3:
            test_0 = " ".join(["the", noun_out[0], locative_out[0], "is", name_out[0]])
        elif option == 4:
            test_0 = " ".join([name_out[0], "is", other_noun[0]])
        elif option == 5:
            test_0 = " ".join(["the", noun_out[0], "is", other_noun[0]])
        else:
            test_0 = " ".join(["the", noun_out[0], locative_out[0], "is", other_noun[0]])

        long_subordinate_clause, short_subordinate_clause = self.build_dependent_clauses(
            [training_1, training_0, test_0, test_1])

        data = self.build_paradigm(
            training_1_1="%s, %s." % (long_subordinate_clause, training_1),
            training_0_0="%s, %s." % (short_subordinate_clause, training_0),
            control_1_0="%s, %s." % (short_subordinate_clause, training_1),
            control_0_1="%s, %s." % (long_subordinate_clause, training_0),
            test_1_0="%s, %s." % (short_subordinate_clause, test_1),
            test_0_1="%s, %s." % (long_subordinate_clause, test_0),
            control_1_1="%s, %s." % (long_subordinate_clause, test_1),
            control_0_0="%s, %s." % (short_subordinate_clause, test_0),
        )
        return data, track_sentence