Esempio n. 1
0
def main():
    """
    example:
    "look at this house" vs. "look at this houses"
    """

    demonstratives_singular = ["this", "that"]
    demonstratives_plural = ["these", "those"]

    nouns_s_and_p = get_legal_words(tag='NN', second_tag='NNP')
    adjectives = get_legal_words(tag='JJ')

    while True:

        # random choices
        noun_s, noun_p = random.choice(nouns_s_and_p)
        adj = random.choice(adjectives)

        for dem_s in demonstratives_singular:

            yield template1.format(dem_s, noun_p, adj)  # bad
            yield template1.format(dem_s, noun_s, adj)  # good

            yield template2.format(dem_s, noun_p, adj)
            yield template2.format(dem_s, noun_s, adj)

        for dem_p in demonstratives_plural:
            yield template1.format(dem_p, noun_s, adj)  # bad
            yield template1.format(dem_p, noun_p, adj)  # good

            yield template2.format(dem_p, noun_s, adj)
            yield template2.format(dem_p, noun_p, adj)
Esempio n. 2
0
def main():
    """
    example:
    "sarah discovered the vase that the dog might take ." vs. "sarah discovered what the dog might take the vase ."

    """

    nouns_s = get_legal_words(tag='NN')

    excluded_verbs_base = ('put', 'run', 'say', 'be', 'give', 'tell', 'live')
    verbs_base = get_legal_words(tag='VB', exclude=excluded_verbs_base)

    excluded_verbs_past = ('started', 'let', 'told')
    verbs_past = get_legal_words(tag='VBD', exclude=excluded_verbs_past)

    names_ = (configs.Dirs.legal_words / 'names.txt').open().read().split()
    names = find_counterbalanced_subset(names_,
                                        min_size=10,
                                        max_size=len(names_))

    animates_ = (configs.Dirs.legal_words /
                 'animates.txt').open().read().split()
    animates = find_counterbalanced_subset(animates_,
                                           min_size=8,
                                           max_size=len(animates_))

    def add_preposition_after_vb(v: str):
        if v == 'play':
            return 'play with'
        elif v == 'point':
            return 'point to'
        elif v == 'turn':
            return 'turn to'
        elif v == 'work':
            return 'work with'
        else:
            return v

    while True:

        # random choices
        slot2filler = {
            'name': random.choice(names),
            'nn1': random.choice(nouns_s),
            'nn2': random.choice(animates),
            'vbd': random.choice(verbs_past),
            'vbd2': random.choice(verbs_past),  # used in template2 only
            'vb': random.choice(verbs_base),  # used in template 1 only
        }

        slot2filler['vb'] = add_preposition_after_vb(slot2filler['vb'])

        yield template1['b'].format(**slot2filler)  # bad
        yield template1['g'].format(**slot2filler)  # good

        yield template2['b'].format(**slot2filler)  # bad
        yield template2['g'].format(**slot2filler)  # good
Esempio n. 3
0
def main():
    """
    example:
    "where does the dog go?" vs. "where does the dogs go?"
    """

    nouns_s_and_p = get_legal_words(tag='NN', second_tag='NNP')
    adjectives = get_legal_words(tag='JJ')

    doing_singular = ["does"]
    doing_plural = ["do"]

    while True:

        # random choices
        adj = random.choice(adjectives)
        noun_s, noun_p = random.choice(nouns_s_and_p)

        for doing_s in doing_singular:

            yield template1.format(doing_s, noun_p)  # bad
            yield template1.format(doing_s, noun_s)  # good

            yield template2.format(doing_s, noun_p)
            yield template2.format(doing_s, noun_s)

            yield template3.format(doing_s, noun_p)
            yield template3.format(doing_s, noun_s)

            yield template4.format(doing_s, noun_p, adj)
            yield template4.format(doing_s, noun_s, adj)

            yield template5.format(doing_s, noun_p)
            yield template5.format(doing_s, noun_s)

            yield template6.format(doing_s, noun_p)
            yield template6.format(doing_s, noun_s)

        for doing_p in doing_plural:
            yield template1.format(doing_p, noun_s)  # bad
            yield template1.format(doing_p, noun_p)  # good

            yield template2.format(doing_p, noun_s)
            yield template2.format(doing_p, noun_p)

            yield template3.format(doing_p, noun_s)
            yield template3.format(doing_p, noun_p)

            yield template4.format(doing_p, noun_s, adj)
            yield template4.format(doing_p, noun_p, adj)

            yield template5.format(doing_p, noun_s)
            yield template5.format(doing_p, noun_p)

            yield template6.format(doing_p, noun_s)
            yield template6.format(doing_p, noun_p)
Esempio n. 4
0
def main():
    """
    example:
    "the dog that i like is green" vs. "the dogs that i like is green"
    """

    nouns_s_and_p = get_legal_words(tag='NN', second_tag='NNP')
    adjectives = get_legal_words(tag='JJ')

    copulas_singular = ["is", "was"]
    copulas_plural = ["are", "were"]

    pronouns_1p_2p = ['i', 'you', 'we']
    pronouns_3p = ['he', 'she', 'it']
    assert len(pronouns_3p) == len(pronouns_1p_2p)

    while True:

        # random choices
        noun_s, noun_p = random.choice(nouns_s_and_p)
        adj = random.choice(adjectives)

        for copula_s in copulas_singular:

            # object-relative
            for pronoun_1p_2p in pronouns_1p_2p:
                yield template1a.format(noun_p, pronoun_1p_2p, copula_s, adj)  # bad
                yield template1a.format(noun_s, pronoun_1p_2p, copula_s, adj)  # good
            for pronoun_3p in pronouns_3p:
                yield template1b.format(noun_p, pronoun_3p, copula_s, adj)
                yield template1b.format(noun_s, pronoun_3p, copula_s, adj)

            # subject-relative
            yield template2a.format(noun_p, copula_s, adj)
            yield template2a.format(noun_s, copula_s, adj)

            for copula_p in copulas_plural:

                # object-relative
                for pronoun_1p_2p in pronouns_1p_2p:
                    yield template1a.format(noun_s, pronoun_1p_2p, copula_p, adj)
                    yield template1a.format(noun_p, pronoun_1p_2p, copula_p, adj)
                for pronoun_3p in pronouns_3p:
                    yield template1b.format(noun_s, pronoun_3p, copula_p, adj)
                    yield template1b.format(noun_p, pronoun_3p, copula_p, adj)

                # subject-relative
                yield template2b.format(noun_s, copula_p, adj)
                yield template2b.format(noun_p, copula_p, adj)
Esempio n. 5
0
File: n_bar.py Progetto: phueb/Zorro
def main():
    """
    example:
    "sam found one purple dog and karen revealed more ." vs. "sam found one dog and karen revealed more purple."
    """

    vbds = get_legal_words(tag='VBD')
    adjectives = get_legal_words(tag='JJ')

    nouns_mass = (configs.Dirs.legal_words /
                  'nouns_mass.txt').open().read().split()
    nouns_s = get_legal_words(tag='NN', exclude=tuple(nouns_mass))

    animates_ = (configs.Dirs.legal_words /
                 'animates.txt').open().read().split()
    animates = find_counterbalanced_subset(animates_,
                                           min_size=8,
                                           max_size=len(animates_))

    names_ = (configs.Dirs.legal_words / 'names.txt').open().read().split()
    names = find_counterbalanced_subset(names_,
                                        min_size=10,
                                        max_size=len(names_))

    determiners = ['a', 'the', 'this', 'some', 'that'] + ['your', 'his', 'her']

    number_words = ['several', 'more', 'two', 'three',
                    'a lot more']  # , 'some']

    while True:

        # random choices
        slot2filler = {
            'name1': random.choice(names),
            'name2': random.choice(names),
            'nn': random.choice(nouns_s),
            'nn2': random.choice(animates),
            'vbd': random.choice(vbds),
            'det': random.choice(determiners),
            'jj': random.choice(adjectives),
            'number': random.choice(number_words),
        }

        yield template1['b'].format(**slot2filler)  # bad
        yield template1['g'].format(**slot2filler)  # good

        yield template2['b'].format(**slot2filler)  # bad
        yield template2['g'].format(**slot2filler)  # good
Esempio n. 6
0
def main():
    """
    example:
    "look at this green house ." vs. "look at this green houses ."
    "this green house went there ." vs. "this green houses went there."
    """

    demonstratives_singular = ["this", "that"]
    demonstratives_plural = ["these", "those"]

    nouns_s_and_p = get_legal_words(tag='NN', second_tag='NNP')
    adjectives = get_legal_words(tag='JJ')

    while True:

        # random choices
        adj = random.choice(adjectives)
        noun_s, noun_p = random.choice(nouns_s_and_p)

        for dem_s in demonstratives_singular:
            yield template1.format(dem_s, adj,
                                   noun_p)  # odd numbered line: bad
            yield template1.format(dem_s, adj,
                                   noun_s)  # even numbered line: good

            yield template2.format(dem_s, adj, noun_p)
            yield template2.format(dem_s, adj, noun_s)

            yield template3.format(dem_s, adj, noun_p)
            yield template3.format(dem_s, adj, noun_s)

            yield template4.format(dem_s, adj, noun_p)
            yield template4.format(dem_s, adj, noun_s)

        for dem_p in demonstratives_plural:
            yield template1.format(dem_p, adj,
                                   noun_s)  # odd numbered line: bad
            yield template1.format(dem_p, adj,
                                   noun_p)  # even numbered line: good

            yield template2.format(dem_p, adj, noun_s)
            yield template2.format(dem_p, adj, noun_p)

            yield template3.format(dem_p, adj, noun_s)
            yield template3.format(dem_p, adj, noun_p)

            yield template4.format(dem_p, adj, noun_s)
            yield template4.format(dem_p, adj, noun_p)
Esempio n. 7
0
def main():
    """
    example:
    "the dog on the mats is brown" vs "the dog on the mats are brown"

    considerations:
    1. use equal proportion of sentences containing plural vs. singular subject nouns
    2. use equal proportion of sentences containing plural vs. singular object nouns
    2. subject with object number is counterbalanced such that:
        -singular subjects occur with 50:50 singular:plural objects
        -plural   subjects occur with 50:50 singular:plural objects
    """

    nouns_s_and_p = get_legal_words(tag='NN', second_tag='NNP')
    adjectives = get_legal_words(tag='JJ')

    copulas_singular = ["is", "was"]
    copulas_plural = ["are", "were"]

    while True:

        # counter-balance singular vs plural with subj vs. obj
        sub_s, sub_p = random.choice(nouns_s_and_p)
        obj_s, obj_p = random.choice(nouns_s_and_p)

        # random choices
        template = random.choice([template1, template2])
        adj = random.choice(adjectives)

        for copula_s in copulas_singular:
            # contrast is in number agreement between subject and copula
            yield template.format(sub_p, obj_s, copula_s, adj)  # bad
            yield template.format(sub_s, obj_s, copula_s, adj)  # good

            # same as above, except that object number is opposite
            yield template.format(sub_p, obj_p, copula_s, adj)
            yield template.format(sub_s, obj_p, copula_s, adj)

        for copula_p in copulas_plural:
            # contrast is in number agreement between subject and copula
            yield template.format(sub_s, obj_s, copula_p, adj)  # bad
            yield template.format(sub_p, obj_s, copula_p, adj)  # good

            # same as above, except that object number is opposite
            yield template.format(sub_s, obj_p, copula_p, adj)
            yield template.format(sub_p, obj_p, copula_p, adj)
Esempio n. 8
0
def main():
    """
    example:
    "he made the van this challenge ." vs. "the van made he this challenge ."
    """

    # counterbalance both forms of verb as different forms are the contrast

    vbds = [
        'brought',
        'made',
        'built',
        'gave',
        'showed',
    ]

    nouns_s = get_legal_words(tag='NN')

    animates_ = (configs.Dirs.legal_words /
                 'animates.txt').open().read().split()
    animates = find_counterbalanced_subset(animates_,
                                           min_size=8,
                                           max_size=len(animates_))

    personal_pronouns_obj = ['me', 'him', 'her', 'us',
                             'them']  # in the objective case
    personal_pronouns_subj = ['i', 'he', 'she', 'we',
                              'they']  # in the subjective case

    determiners = ['a', 'one', 'this', 'that', 'the', 'my', 'his', 'her']

    vowels = {'a', 'e', 'i', 'o', 'u'}

    while True:

        vbd = random.choice(vbds)  # template 1

        # random choices
        slot2filler = {
            'nn': random.choice(animates),
            'nn2': random.choice(nouns_s),
            'det': random.choice(determiners),
            'prp_obj': random.choice(personal_pronouns_obj),
            'prp_subj': random.choice(personal_pronouns_subj),
            'vbd': vbd,
        }

        if slot2filler['det'] == 'a' and slot2filler['nn2'][0] in vowels:
            slot2filler['det'] += 'n'

        yield template1['b'].format(**slot2filler)  # bad
        yield template1['g'].format(**slot2filler)  # good
Esempio n. 9
0
def main():
    """
    example:
    "a documentary was there looking at dogs ." vs. "there was a documentary looking at dogs ."

    note: this task is too difficult for babyBERTa
    """

    nouns_s_and_p = get_legal_words(tag='NN', second_tag='NNP')
    adjectives = get_legal_words(tag='JJ')

    quantifiers = ['each', 'most', 'all', 'every']

    copula_p = ['were', 'are', "were not", "aren't"]
    copula_s = ['was', 'is', "was not", "isn't"]

    vowels = {'a', 'e', 'i', 'o', 'u'}

    gerunds_ = [
        'looking',
        'becoming',
        'falling',
        'leaving',
        'eating',
        'increasing',
        'moving',
        'opening',
        'existing',
        'containing',
        'standing',
        'changing',
        'surrounding',
        'adding',
        'acting',
    ]
    gerunds = find_counterbalanced_subset(gerunds_,
                                          min_size=8,
                                          max_size=len(gerunds_))

    # a linker can be a preposition or determiner phrase
    gerund2linker = {
        'looking': 'like a',
        'becoming': 'some kind of a',
        'falling': 'on the',
        'leaving': 'us by the',
        'eating': 'one piece of this',
        'increasing': 'the size of the',
        'moving': 'to the',
        'opening': 'the door to a',
        'existing': 'without a',
        'containing': 'a',
        'standing': 'on top of a',
        'changing': 'the',
        'surrounding': 'the',
        'adding': 'to the',
        'acting': 'like a',
    }

    while True:

        # random choices
        noun_s, noun_p = random.choice(nouns_s_and_p)
        adj = random.choice(adjectives)
        quantifier = random.choice(quantifiers)
        subj_s, sub_p = random.choice(nouns_s_and_p)
        gerund = random.choice(gerunds)

        # plural vs. singular copula
        if quantifier in {'most', 'all'}:
            copula = random.choice(copula_p)
            subj1 = sub_p  # for template 1
        else:
            copula = random.choice(copula_s)
            subj1 = subj_s

        # "a" vs. "an"
        linker = gerund2linker[gerund]
        if linker.endswith('a') and adj[0] in vowels:
            linker += 'n'

        # contrast is about word order
        yield template1.format('there', copula, quantifier, subj1, gerund,
                               linker, adj, noun_s)  # bad
        yield template1.format(quantifier, subj1, copula, 'there', gerund,
                               linker, adj, noun_s)  # good
Esempio n. 10
0
def main():
    """
    example:
    "there was a documentary about dogs ." vs. "there was each documentary about dogs ."

    """

    nouns_s_and_p = get_legal_words(tag='NN', second_tag='NNP')
    adjectives = get_legal_words(tag='JJ')

    quantifiers_good = ['a', 'no', 'some', 'many', 'few']
    quantifiers_bad = ['each', 'most', 'all', 'every']

    template1_subjects_s_and_p = [
        ('movie', 'movies'),
        ('book', 'books'),
        ('story', 'stories'),
        ('sign', 'signs'),
    ]

    vowels = {'a', 'e', 'i', 'o', 'u'}

    copula_p = ['were', 'are', "were not", "aren't"]
    copula_s = ['was', 'is', 'was not', "isn't"]

    names_ = (configs.Dirs.legal_words / 'names.txt').open().read().split()
    names = find_counterbalanced_subset(names_,
                                        min_size=10,
                                        max_size=len(names_))

    while True:

        # random choices
        noun_s, noun_p = random.choice(nouns_s_and_p)
        adj = random.choice(adjectives)
        quantifier_b = random.choice(quantifiers_bad)
        quantifier_g = random.choice(quantifiers_good)
        subj_s, sub_p = random.choice(template1_subjects_s_and_p)
        name = random.choice(names)

        # plural vs. singular copula
        if quantifier_g in {'some', 'many', 'few'}:
            copula = random.choice(copula_p)
            subj1 = sub_p  # for template 1
            subj2 = noun_p  # for template 2
        else:
            copula = random.choice(copula_s)
            subj1 = subj_s
            subj2 = noun_s

        # "a" vs. "an"
        if subj1[0] in vowels and quantifier_g == 'a':
            quantifier_g = 'an'
        if subj1[0] in vowels and quantifier_b == 'a':
            quantifier_b = 'an'
        if subj2[0] in vowels and quantifier_g == 'a':
            quantifier_g = 'an'
        if subj2[0] in vowels and quantifier_b == 'a':
            quantifier_b = 'an'

        # prevent double negation
        if quantifier_g == 'no' and ('not' in copula or "n't" in copula):
            copula = copula.replace(' not', '')
            copula = copula.replace(" n't", '')

        yield template1.format(copula, quantifier_b, subj1, adj, noun_p)  # bad
        yield template1.format(copula, quantifier_g, subj1, adj,
                               noun_p)  # good

        yield template2.format(copula, quantifier_b, subj2, name)  # bad
        yield template2.format(copula, quantifier_g, subj2, name)  # good
Esempio n. 11
0
def main():
    """
    example:
    sam questioned the dog that can hurt sara ." vs "sam questioned who the dog can hurt sara."

    """

    nouns_s_and_p = get_legal_words(tag='NN', second_tag='NNP')

    excluded_verbs_base = ('put', 'run', 'say', 'be', 'give', 'tell', 'live')
    verbs_base = get_legal_words(tag='VB', exclude=excluded_verbs_base)

    excluded_verbs_past = ('started', 'let', 'told')
    verbs_past = get_legal_words(tag='VBD', exclude=excluded_verbs_past)

    excluded_verbs_gerund = ('saying', )
    verbs_gerund = get_legal_words(tag='VBG', exclude=excluded_verbs_gerund)

    names_ = (configs.Dirs.legal_words / 'names.txt').open().read().split()
    names = find_counterbalanced_subset(names_,
                                        min_size=10,
                                        max_size=len(names_))

    def add_preposition_after_vb(v: str):
        if v in {'acting', 'act'}:
            return f'{v} like'
        elif v in {
                'standing', 'stand', 'falling', 'fall', 'depending', 'depend'
        }:
            return f'{v} on'
        elif v in {'asking', 'ask', 'writing', 'write', 'thinking', 'think'}:
            return f'{v} about'
        elif v in {'swimming', 'swim', 'sleeping', 'sleep'}:
            return f'{v} in'
        elif v in {'driving', 'drive', 'coming', 'come', 'related', 'relate'}:
            return f'{v} to'
        elif v in {'flying', 'fly', 'working', 'work'}:
            return f'{v} with'
        else:
            return v

    while True:

        # random choices
        slot2filler = {
            'name': random.choice(names),
            'nn': random.choice(nouns_s_and_p)[0],
            'nns': random.choice(nouns_s_and_p)[1],
            'vbd': random.choice(verbs_past),
            'vbg': random.choice(verbs_gerund),  # used in template2 only
            'vb': random.choice(verbs_base),  # used in template 1 only
        }

        slot2filler['vb'] = add_preposition_after_vb(slot2filler['vb'])
        slot2filler['vbd'] = add_preposition_after_vb(slot2filler['vbd'])
        slot2filler['vbg'] = add_preposition_after_vb(slot2filler['vbg'])

        yield template1['b'].format(**slot2filler)  # bad
        yield template1['g'].format(**slot2filler)  # good

        yield template2['b'].format(**slot2filler)  # bad
        yield template2['g'].format(**slot2filler)  # good
Esempio n. 12
0
def main():
    """
    example:
    "we can help him do something" vs. "we can help himself do something"
    """

    # counterbalance both forms of verb as different forms are the contrast

    excluded_verbs_base = ('say', 'live')
    verbs_base = get_legal_words(tag='VB', exclude=excluded_verbs_base)

    nouns_s = get_legal_words(tag='NN')

    prps_obj_and_poss = [
        ('him', 'his'),
        ('her', 'hers'),
        ('us', 'our'),
        ('them', 'theirs'),
    ]

    animates_ = (configs.Dirs.legal_words /
                 'animates.txt').open().read().split()
    animates = find_counterbalanced_subset(animates_,
                                           min_size=8,
                                           max_size=len(animates_))

    auxiliaries = ['can', 'could', 'will', 'would', 'must', 'should']

    def add_misc_after_prp(
        prp: str,
        v: str,
        arg1: str,
    ) -> str:
        if v in {'take'}:
            return f'{prp} to {arg1}'
        elif v in {'make'}:
            return f'{prp} do {arg1}'
        elif v in {'work', 'put'}:
            return f'{prp} on {arg1}'
        elif v in {'turn'}:
            return f'{prp} around'
        elif v in {'tell'}:
            return f'{prp} about {arg1}'
        else:
            return prp

    def add_preposition_after_vb(v: str) -> str:
        if v in {'work', 'study'}:
            return f'{v} with'
        elif v in {'point', 'run'}:
            return f'{v} to'
        elif v in {'be'}:
            return f'{v} like'
        else:
            return v

    while True:

        prp_obj, prp_poss = random.choice(prps_obj_and_poss)

        # random choices
        slot2filler = {
            'aux': random.choice(auxiliaries),
            'prp_poss': prp_poss,
            'prp_obj': prp_obj,
            'nn': random.choice(animates),
            'vb': random.choice(verbs_base),
        }

        # sample argument once, so that the same argument is used by both bad and good sentences.
        # note: pronouns don't get determiners, but nouns do
        argument1 = random.choice([f'the {nn}' for nn in nouns_s])

        # first, add some miscellaneous component
        slot2filler['prp_poss'] = add_misc_after_prp(prp_poss,
                                                     slot2filler['vb'],
                                                     argument1)
        slot2filler['prp_obj'] = add_misc_after_prp(prp_obj, slot2filler['vb'],
                                                    argument1)

        # lastly, add a preposition
        slot2filler['vb'] = add_preposition_after_vb(slot2filler['vb'])

        yield template1['b'].format(**slot2filler)  # bad
        yield template1['g'].format(**slot2filler)  # good
Esempio n. 13
0
def main():
    """
    example:
    "only sarah could ever talk." vs. "even sarah could ever talk"
    """

    vbzs = get_legal_words(tag='VBZ', exclude=('happens', 'says', ))
    vbs = get_legal_words(tag='VB')

    nouns_s = get_legal_words(tag='NN')

    animates_ = (configs.Dirs.legal_words / 'animates.txt').open().read().split()
    animates = find_counterbalanced_subset(animates_, min_size=8, max_size=len(animates_))

    names_ = (configs.Dirs.legal_words / 'names.txt').open().read().split()
    names = find_counterbalanced_subset(names_, min_size=10, max_size=len(names_))

    determiners = ['the', 'this', 'some', 'that'] + ['your', 'his', 'her']

    auxiliaries = ['could', 'can', 'would', 'will']

    def add_argument_after_vb(v: str,
                              argument1: str,
                              ) -> str:
        if v in {'thinks', 'reads'}:
            return f'{v} about'
        elif v in {'lives', 'falls', 'is', 'be'}:
            return f'{v} in'
        elif v in {'stands', 'turns'}:
            return f'{v} on'
        elif v in {'acts', 'looks'}:
            return f'{v} like'
        elif v in {'goes', 'comes'}:
            return f'{v} to'
        elif v in {'gives', 'gives'}:
            return f'{v} {argument1}'
        elif v in {'plays', 'play', 'shows', 'show', 'tells', 'tell'}:
            return f'{v} {argument1}'
        else:
            return v

    while True:

        arg1 = random.choice(['him', 'her'])
        vbz = random.choice(vbzs)
        vb = random.choice(vbs)

        # random choices
        slot2filler = {
            'nn1': random.choice(names + animates),
            'nn2': random.choice(nouns_s),
            'vbz': add_argument_after_vb(vbz, arg1),
            'vb': add_argument_after_vb(vb, arg1),
            'det': random.choice(determiners),
            'aux': random.choice(auxiliaries)
        }

        # add determiner to animate noun
        if slot2filler['nn1'] in animates:
            slot2filler['nn1'] = random.choice(determiners) + ' ' + slot2filler['nn1']

        yield template1['b'].format(**slot2filler)  # bad
        yield template1['g'].format(**slot2filler)  # good

        yield template2['b'].format(**slot2filler)  # bad
        yield template2['g'].format(**slot2filler)  # good
Esempio n. 14
0
def main():
    """
    example:
    "has sam ever worried sarah ?" vs. "jane has ever worried sarah ."
    """

    vbs = get_legal_words(tag='VB')

    nouns_s = get_legal_words(tag='NN')

    animates_ = (configs.Dirs.legal_words /
                 'animates.txt').open().read().split()
    animates = find_counterbalanced_subset(animates_,
                                           min_size=8,
                                           max_size=len(animates_))

    names_ = (configs.Dirs.legal_words / 'names.txt').open().read().split()
    names = find_counterbalanced_subset(names_,
                                        min_size=10,
                                        max_size=len(names_))

    auxiliaries = ['does', 'will', 'could', 'did', 'should', 'would']

    determiners = ['a', 'the', 'this', 'some', 'that'] + ['your', 'his', 'her']

    def add_argument_after_vb(
        v: str,
        arg1: str,
        arg2: str,
    ) -> str:
        if v in {'say'}:
            return f'{v} something'
        elif v in {'read'}:
            return f'{v} a book'
        elif v in {'play'}:
            return f'{v} with {arg1}'
        elif v in {
                'use', 'find', 'get', 'be', 'order', 'need', 'have', 'control',
                'want', 'free', 'keep'
        }:
            return f'{v} {arg1}'
        elif v in {'tell'}:
            return f'{v} me about {arg1}'
        elif v in {'plan'}:
            return f'{v} to do something with {arg1}'
        elif v in {'take'}:
            return f'{v} {arg1} away'
        elif v in {'give', 'show', 'present'}:
            return f'{v} {arg1} to {arg2}'
        elif v in {'put'}:
            return f'{v} {arg1} on {arg2}'
        elif v in {'fall'}:
            return f'{v} in {arg1}'
        elif v in {'see'}:
            return f'{v} how the {arg1} works'
        elif v in {'come'}:
            return f'{v} to {arg1}'
        else:
            return v

    while True:

        # sample argument once, so that the same argument is used by both bad and good sentences.
        # note: pronouns don't get determiners, but nouns do
        argument1 = random.choice(['you', 'him', 'her', 'it'] +
                                  [f'the {nn}' for nn in nouns_s])
        argument2 = random.choice(['you', 'him', 'her', 'it'] +
                                  [f'the {nn}' for nn in nouns_s])

        vb = random.choice(vbs)

        # random choices
        slot2filler = {
            'name': random.choice(names),
            'nn': random.choice(animates),
            'vb': add_argument_after_vb(vb, argument1, argument2),
            'aux': random.choice(auxiliaries),
            'det': random.choice(determiners),
        }

        if slot2filler['aux'] in {'did', 'does'} and vb == 'be':
            continue

        yield template1['b'].format(**slot2filler)  # bad
        yield template1['g'].format(**slot2filler)  # good

        yield template2['b'].format(**slot2filler)  # bad
        yield template2['g'].format(**slot2filler)  # good
Esempio n. 15
0
def main():
    """
    example:
    "maybe the black dog was taken by him ." vs. "maybe the black dog was took by him ."


    a paradigm that uses just adjectives results in use of 2 adjectives ("taken", "broken") only,
    when the vocab size is 8192 - hence, we do not use this paradigm.
    instead, we use the passive construction which allows us to include the verb "given".
    """

    vocab = get_vocab_words()
    modifiers = ['maybe', 'i think', 'we hope that', 'he said that']
    nouns_s = get_legal_words(tag='NN')
    adjectives = get_legal_words(tag='JJ')

    determiners = ['the', 'this', 'one', 'your']

    vds_vns = [
        ('wore', 'worn'),
        ('broke', 'broken'),
        ('hid', 'hidden'),
        ('forgot', 'forgotten'),
        ('took', 'taken'),
        ('ate', 'eaten'),
        ('drank', 'drunk'),
        ('saw', 'seen'),
        ('chose', 'chosen'),
        ('threw', 'thrown'),
        ('beat', 'beaten'),

        # ditransitive
        ('forbade', 'forbidden'),
        ('gave', 'given'),
    ]

    while True:

        # random choices
        noun = random.choice(nouns_s)
        det = random.choice(determiners)
        adj = random.choice(adjectives)
        mod = random.choice(modifiers)

        # get two contrasting irregular inflected forms.
        # past participle (vn) is always correct
        vd, vn = random.choice(vds_vns)
        if (vn not in vocab or vd not in vocab) or vn == vd:
            continue

        # exceptional case
        if vn == 'given':
            yield template3.format(mod, det, adj, noun, vd)  # bad
            yield template3.format(mod, det, adj, noun, vn)  # good

            yield template4.format(mod, det, adj, noun, vd)
            yield template4.format(mod, det, adj, noun, vn)
        else:
            yield template1.format(mod, det, adj, noun, vd)
            yield template1.format(mod, det, adj, noun, vn)

            yield template2.format(mod, det, adj, noun, vd)
            yield template2.format(mod, det, adj, noun, vn)
Esempio n. 16
0
def main():
    """
    example:
    "sarah thinks about herself listening to the dog." vs. "sarah thinks about herself listened to that girl.""
    """

    vbds1_and_vbgs1 = get_legal_words(tag='VBD', second_tag='VBG',
                                      exclude=('told', 'forgot', 'thought', 'said', 'happened'))

    vbzs2_and_vbgs2 = get_legal_words(tag='VBZ', second_tag='VBG',
                                      exclude=('tells', 'forgets', 'thinks', 'says', 'happens'))

    nouns_s = get_legal_words(tag='NN')

    vowels = {'a', 'e', 'i', 'o', 'u'}

    determiners = ['a', 'the', 'this', 'some', 'that']

    vbs = ['thinks about',
           'thought about',
           'did not think about',
           'could think about',
           'must think about',
           'must not think about',
           ]

    def add_preposition_after_vb(v: str) -> str:
        if v in {'falling', 'fell'}:
            return f'{v} on'
        elif v in {'came', 'come', 'comes', 'coming',
                   'went', 'go', 'goes', 'going',
                   'wrote', 'write', 'writes', 'writing',
                   'ran', 'run', 'runs', 'running',
                   }:
            return f'{v} to'
        elif v in {'lived', 'live', 'lives', 'living'}:
            return f'{v} in'
        elif v in {'looked', 'look', 'looks', 'looking'}:
            return f'{v} at'
        elif v in {'reached', 'reach', 'reaches', 'reaching'}:
            return f'{v} for'
        elif v in {'showed', 'show', 'shows', 'showing'}:
            return f'{v} off'
        elif v in {'set', 'sets', 'setting',}:
            return f'{v} up'
        elif v in {'put', 'puts', 'putting'}:
            return f'{v} away'
        else:
            return v

    while True:

        vbd1, vbg1 = random.choice(vbds1_and_vbgs1)
        vbz2, vbg2 = random.choice(vbzs2_and_vbgs2)

        # random choices
        slot2filler = {
            'nn_m': random.choice(names_m),
            'nn_f': random.choice(names_f),
            'nn2': random.choice(nouns_s),
            'vbd1': add_preposition_after_vb(vbd1),
            'vbg1': add_preposition_after_vb(vbg1),
            'vbz2': add_preposition_after_vb(vbz2),
            'vbg2': add_preposition_after_vb(vbg2),
            'vb': random.choice(vbs),
            'det': random.choice(determiners),
            'prp_reflexive_m': 'himself',
            'prp_reflexive_f': 'herself',
        }

        if slot2filler['det'] == 'a' and slot2filler['nn2'][0] in vowels:
            slot2filler['det'] += 'n'

        yield template1['b'].format(**slot2filler)  # bad
        yield template1['g'].format(**slot2filler)  # good

        yield template2['b'].format(**slot2filler)  # bad
        yield template2['g'].format(**slot2filler)  # good

        yield template3['b'].format(**slot2filler)  # bad
        yield template3['g'].format(**slot2filler)  # good

        yield template4['b'].format(**slot2filler)  # bad
        yield template4['g'].format(**slot2filler)  # good
Esempio n. 17
0
def main():
    """
    example:
    "sarah laughs" vs. "sarah gives"
    """

    # we need a lot of verbs here, so temporarily reduce restrictions imposed by counterbalancing
    tmp1 = configs.Data.tag2num_words['VB']
    tmp2 = configs.Data.tag2num_words['VBZ']
    tmp3 = configs.Data.tag2num_words['VBD']
    tmp4 = configs.Data.bias_tolerance
    configs.Data.tag2num_words['VB'] = 30
    configs.Data.tag2num_words['VBZ'] = 50
    configs.Data.tag2num_words['VBD'] = 50
    configs.Data.bias_tolerance = 7000

    vbs = get_legal_words(tag='VB', exclude=('fit', 'come', 'point'))
    vbzs = get_legal_words(tag='VBZ', exclude=('points', ))
    vbds = get_legal_words(tag='VBD', exclude=('fit', 'dropped', 'signed', 'formed', 'managed'))

    configs.Data.tag2num_words['VB'] = tmp1
    configs.Data.tag2num_words['VBZ'] = tmp2
    configs.Data.tag2num_words['VBD'] = tmp3
    configs.Data.bias_tolerance = tmp4

    animates_ = (configs.Dirs.legal_words / 'animates.txt').open().read().split()
    animates = find_counterbalanced_subset(animates_, min_size=8, max_size=len(animates_))

    prps_s = ['she', 'he', 'it']
    prps_p = ['we', 'they']
    prps = prps_s + prps_p

    aux_s = ['does']
    auxiliaries = ['could', 'can', 'would', 'will', 'did'] + aux_s

    determiners = ['the', 'this', 'some', 'that', 'every'] + ['your', 'his', 'her']

    names_ = (configs.Dirs.legal_words / 'names.txt').open().read().split()
    names = find_counterbalanced_subset(names_, min_size=10, max_size=len(names_))

    vbs_intransitive = [
        'run',
        'work',
        'turn',
        'eat',
        'live',
        'read',
        'trade',
        'play',
        'know',
        'study',
        'think',
        'change',
    ]

    vbzs_intransitive = [
        'moves',
        'lives',
        'lies',
        'knows',
        'waves',
        'changes',
        'works',
        'dies',
        'leads',
        'appears',
        'thinks',
        'falls',
        'matters',
        'turns',
        'stands',
        'stands',
        'runs',
        'calls',
        'races',
    ]

    vbds_intransitive = [
        'occurred',
        'married',
        'moved',
        'looked',
        'changed',
        'finished',
        'grew',
        'broke',
        'started',
        'improved',
        'worked',
        'thought',
        'came',
        'tried',
        'read',
        'lost',
        'knew',
        'lived',
        'accepted',
        'developed',
        'joined',
        'joined',
        'decided',
        'learned',
        'occurred',
        'happened',
        'fell',
        'refused',
        'returned',
    ]

    vbs_intransitive = vbs_intransitive
    vbs_transitive = [v for v in vbs if v not in vbs_intransitive]

    vbzs_transitive = [v for v in vbzs if v not in vbzs_intransitive]

    vbzs_or_vbds_intransitive = vbzs_intransitive + vbds_intransitive
    vbzs_or_vbds_transitive = [v for v in vbzs + vbds if v not in vbzs_or_vbds_intransitive]

    while True:

        # random choices
        slot2filler = {
            'nn1': random.choice(animates + names + prps),
            'aux': random.choice(auxiliaries),
            'vbz_or_vbd_intransitive': random.choice(vbzs_or_vbds_intransitive),
            'vbz_or_vbd_transitive': random.choice(vbzs_or_vbds_transitive),
            'vb_intransitive': random.choice(vbs_intransitive),
            'vb_transitive': random.choice(vbs_transitive),
        }

        # handle exception: "occurred" and "happened" cannot have animate subject
        if slot2filler['vbz_or_vbd_intransitive'] in ['occurred', 'happened'] and\
                slot2filler['nn1'] not in ['it', 'that', 'this']:
            continue

        # add determiner to animate noun
        if slot2filler['nn1'] in animates:
            slot2filler['nn1'] = random.choice(determiners) + ' ' + slot2filler['nn1']

        # do not use template 1 with plural pronoun and VBZ
        if not (slot2filler['nn1'] in prps_p and
                slot2filler['vbz_or_vbd_intransitive'] in vbzs_intransitive or
                slot2filler['vbz_or_vbd_transitive'] in vbzs_transitive):

            yield template1['b'].format(**slot2filler)  # bad
            yield template1['g'].format(**slot2filler)  # good

        # do not use template 2 with plural pronoun and singular aux (e.g. "does")
        if not (slot2filler['nn1'] in prps_p and
                slot2filler['aux'] in aux_s):

            yield template2['b'].format(**slot2filler)  # bad
            yield template2['g'].format(**slot2filler)  # good
Esempio n. 18
0
def main():
    """
    example:
    "where is the house?" vs "where is the houses?"
    todo "where is the house?" vs "where are the house?"
    """

    nouns_s_and_p = get_legal_words(tag='NN', second_tag='NNP')
    adjectives = get_legal_words(tag='JJ')

    copulas_singular = ["is", "was"]
    copulas_plural = ["are", "were"]

    while True:

        # random choices
        noun_s, noun_p = random.choice(nouns_s_and_p)
        adj = random.choice(adjectives)

        for copula_s in copulas_singular:

            yield template1.format(copula_s, noun_p)
            yield template1.format(copula_s, noun_s)

            yield template2.format(copula_s, noun_p)
            yield template2.format(copula_s, noun_s)

            yield template3.format(copula_s, noun_p)
            yield template3.format(copula_s, noun_s)

            yield template4.format(copula_s, noun_p, adj)
            yield template4.format(copula_s, noun_s, adj)

            yield template5.format(copula_s, noun_p)
            yield template5.format(copula_s, noun_s)

            # skip template 6 because it is specific to plural copula

            yield template7.format(copula_s, noun_p)
            yield template7.format(copula_s, noun_s)

            yield template8.format(copula_s, noun_p)
            yield template8.format(copula_s, noun_s)

        for copula_p in copulas_plural:

            yield template1.format(copula_p, noun_s)
            yield template1.format(copula_p, noun_p)

            yield template2.format(copula_p, noun_s)
            yield template2.format(copula_p, noun_p)

            yield template3.format(copula_p, noun_s)
            yield template3.format(copula_p, noun_p)

            yield template4.format(copula_p, noun_s, adj)
            yield template4.format(copula_p, noun_p, adj)

            # skip template 5 because it is specific to singular copula

            yield template6.format(copula_p, noun_s)
            yield template6.format(copula_p, noun_p)

            yield template7.format(copula_p, noun_s)
            yield template7.format(copula_p, noun_p)

            yield template8.format(copula_p, noun_s)
            yield template8.format(copula_p, noun_p)
Esempio n. 19
0
def main():
    """
    example:
    "is the bell ringing ?" vs "is the bell rings ?"
    """

    # counterbalance both forms of verb as different forms are the contrast
    vbgs_and_vbzs = get_legal_words(tag='VBG',
                                    second_tag='VBZ',
                                    exclude=('facing', 'naming', 'training',
                                             'setting', 'meaning'))
    vbs_and_vbzs = get_legal_words(tag='VB', second_tag='VBZ')

    nouns_s = get_legal_words(tag='NN')

    animates_ = (configs.Dirs.legal_words /
                 'animates.txt').open().read().split()
    animates = find_counterbalanced_subset(animates_,
                                           min_size=8,
                                           max_size=len(animates_))

    names_ = (configs.Dirs.legal_words / 'names.txt').open().read().split()
    names = find_counterbalanced_subset(names_,
                                        min_size=10,
                                        max_size=len(names_))

    def add_argument_after_vb(
        v: str,
        arg1: str,
        arg2: str,
    ) -> str:
        if v in {'saying', 'says', 'say'}:
            return f'{v} something'
        elif v in {'using', 'uses', 'use'}:
            return f'{v} {arg1}'
        elif v in {'telling', 'tells', 'tell'}:
            return f'{v} me about {arg1}'
        elif v in {'making', 'makes', 'make'}:
            return f'{v} {arg1} something'
        elif v in {'planning', 'plans', 'plan'}:
            return f'{v} to do something with {arg1}'
        elif v in {'taking', 'takes', 'take'}:
            return f'{v} {arg1} away'
        elif v in {'giving', 'gives', 'give'}:
            return f'{v} {arg1} {arg2}'
        elif v in {'falling', 'falls', 'fall'}:
            return f'{v} in {arg1}'
        elif v in {'showing', 'shows', 'show'}:
            return f'{v} {arg1} to {arg2}'
        elif v in {'seeing', 'sees', 'see'}:
            return f'{v} how the {arg1} works'
        elif v in {'finding', 'finds', 'find'}:
            return f'{v} {arg1}'
        elif v in {'coming', 'comes', 'come'}:
            return f'{v} to {arg1}'
        elif v in {'getting', 'gets', 'get'}:
            return f'{v} {arg1}'
        elif v in {'depending', 'depends', 'depend'}:
            return f'{v} on {arg1}'
        else:
            return v

    while True:

        vbg1, vbz1 = random.choice(vbgs_and_vbzs)  # template 1
        vb2, vbz2 = random.choice(vbs_and_vbzs)  # template 2

        # sample argument once, so that the same argument is used by both bad and good sentences.
        # note: pronouns don't get determiners, but nouns do
        argument1 = random.choice(['you', 'him', 'her', 'it'] +
                                  [f'the {nn}' for nn in nouns_s])
        argument2 = random.choice(['you', 'him', 'her', 'it'] +
                                  [f'the {nn}' for nn in nouns_s])

        # random choices
        slot2filler = {
            'name': random.choice(names),
            'nn': random.choice(nouns_s + animates),
            'vb2': add_argument_after_vb(vb2, argument1, argument2),
            'vbz2': add_argument_after_vb(vbz2, argument1, argument2),
            'vbg1': add_argument_after_vb(vbg1, argument1, argument2),
            'vbz1': add_argument_after_vb(vbz1, argument1, argument2),
        }

        yield template1['b'].format(**slot2filler)  # bad
        yield template1['g'].format(**slot2filler)  # good

        yield template2['b'].format(**slot2filler)  # bad
        yield template2['g'].format(**slot2filler)  # good
Esempio n. 20
0
def main():
    """
    example:
    "give me the frog ." vs. "the frog gives me ."
    """

    # counterbalance both forms of verb as different forms are the contrast
    vbs_and_vbzs_1 = [
        ('give', 'gives'),
    ]

    vbs_and_vbzs_2 = [
        ('ask', 'asked'),  # "asks" is not in vocab
        ('tell', 'tells'),
    ]

    prps_obj = ['me', 'you', 'him', 'her', 'them']

    conjunctions = ['when', 'but', 'with', 'and']

    # TODO use a counterbalanced verb list

    vbzs3_and_continuations = [  # contains a mix of past and present tense forms
        # past tense form
        ('saw', '{prp_obj} there', '{prp_obj} by'),
        ('created', '{prp_obj}', '{det}'),
        ('told', '{prp_obj} about that', '{prp_obj} about {det}'),
        ('wrote', '{prp_obj} something', '{prp_obj} {det}'),
        ('wanted', '{prp_obj}', 'to'),
        ('asked', 'about {prp_obj}', '{prp_obj} about'),
        ('sold', '{prp_obj} that', 'that to'),
        ('changed', '{prp_obj}', '{det}'),
        # present tense form
        ('looks', 'at {prp_obj}', 'at {det}'),
        ('plays', 'with {prp_obj}', 'with {det}'),
        ('thinks', 'about {prp_obj}', 'about {det}'),
        ('moves', 'fast', 'to'),
        ('works', 'well', '{conjunction}'),
    ]

    adjectives = get_legal_words(tag='JJ')

    nouns_s = get_legal_words(tag='NN')

    animates_ = (configs.Dirs.legal_words /
                 'animates.txt').open().read().split()
    animates = find_counterbalanced_subset(animates_,
                                           min_size=8,
                                           max_size=len(animates_))

    personal_pronouns_obj = ['me', 'him', 'her', 'us',
                             'them']  # in the objective case
    personal_pronouns_subj = ['i', 'he', 'she', 'we',
                              'they']  # in the subjective case

    determiners = ['a', 'one', 'the', 'my', 'his',
                   'some']  # do not include "this" or "that" or "her"

    vowels = {'a', 'e', 'i', 'o', 'u'}

    while True:

        vb1, vbz1 = random.choice(vbs_and_vbzs_1)  # template 1
        vb2, vbz2 = random.choice(vbs_and_vbzs_2)  # template 2
        vbz3, cont_g, cont_b = random.choice(
            vbzs3_and_continuations)  # template 3

        # good and bad continuations
        prp_obj = random.choice(prps_obj)
        conjunction = random.choice(conjunctions)
        cont_g = cont_g.format(prp_obj=prp_obj)
        cont_b = cont_b.format(prp_obj=prp_obj,
                               det=random.choice(determiners),
                               conjunction=conjunction)

        # random choices
        slot2filler = {
            'det': random.choice(determiners),
            'jj': random.choice(adjectives),
            'nn': random.choice(animates),
            'nn2': random.choice(nouns_s),
            'prp_obj': random.choice(personal_pronouns_obj),
            'prp_subj': random.choice(personal_pronouns_subj),
            'vb1': vb1,
            'vbz1': vbz1,
            'vb2': vb2,
            'vbz2': vbz2,
            'vbz3': vbz3,
            'cont_g': cont_g,
            'cont_b': cont_b,
        }

        if slot2filler['det'] == 'a' and slot2filler['nn2'][0] in vowels:
            slot2filler['det'] += 'n'

        yield template1['b'].format(**slot2filler)  # bad
        yield template1['g'].format(**slot2filler)  # good

        yield template2['b'].format(**slot2filler)  # bad
        yield template2['g'].format(**slot2filler)  # good

        yield template3['b'].format(**slot2filler)  # bad
        yield template3['g'].format(**slot2filler)  # good
Esempio n. 21
0
def main():
    """
    example:
    "no cat can jump on more than two dogs ." vs. "no cat jump on at least two dogs ."
    """

    nouns_s_and_p = get_legal_words(tag='NN', second_tag='NNP')
    number_words_ = (configs.Dirs.legal_words /
                     "number_words.txt").open().read().split()
    number_words = find_counterbalanced_subset(number_words_,
                                               min_size=6,
                                               max_size=len(number_words_))

    quantifiers_g_b = [
        ('more than', 'at least'),
        ('fewer than', 'at most'),
    ]

    animates_ = (configs.Dirs.legal_words /
                 'animates.txt').open().read().split()
    animates = find_counterbalanced_subset(animates_,
                                           min_size=8,
                                           max_size=len(animates_))

    verbs_ = [
        'become',
        'catch',
        'leave',
        'increase',
        'move',
        'open',
        'exist',
        'contain',
        'stand',
        'change',
        'surround',
        'carry',
        'act',
    ]
    verbs = find_counterbalanced_subset(verbs_,
                                        min_size=8,
                                        max_size=len(verbs_))

    # a linker can be a preposition or determiner phrase
    verb2linker = {
        'become': None,
        'catch': None,
        'leave': None,
        'increase': 'the size of',
        'move': 'to',
        'open': 'the door to',
        'exist': 'without',
        'contain': None,
        'stand': 'on top of',
        'change': None,
        'surround': None,
        'carry': None,
        'act': 'like',
    }

    while True:

        # random choices
        animate = random.choice(animates)
        noun_s, noun_p = random.choice(nouns_s_and_p)
        number_word = random.choice(number_words)
        quantifier_g, quantifier_b = random.choice(quantifiers_g_b)
        verb = random.choice(verbs)
        aux = random.choice(['can', 'could'])

        verb_and_optional_linker = verb
        if verb2linker[verb] is not None:
            verb_and_optional_linker += ' ' + verb2linker[verb]

        if number_word == 'one':
            noun = noun_s
        else:
            noun = noun_p

        yield template1.format(animate, aux, verb_and_optional_linker,
                               quantifier_b, number_word, noun)  # bad
        yield template1.format(animate, aux, verb_and_optional_linker,
                               quantifier_g, number_word, noun)  # good
Esempio n. 22
0
def main():
    """
    example:
    "who should sarah hug after shocking the dog ?" vs "who should sarah hug the dog after shocking ?"

    note: this task is too difficult for babyBERTa
    """

    nouns_s = get_legal_words(tag='NN')

    excluded_verbs_base = ('run', 'say', 'be', 'give', 'tell', 'live', 'force')
    verbs_base = get_legal_words(tag='VB', exclude=excluded_verbs_base)

    excluded_verbs_gerund = ('saying', )
    verbs_gerund = get_legal_words(tag='VBG', exclude=excluded_verbs_gerund)

    names_ = (configs.Dirs.legal_words / 'names.txt').open().read().split()
    names = find_counterbalanced_subset(names_,
                                        min_size=10,
                                        max_size=len(names_))

    pps = ['after', 'before', 'while', 'without']

    def add_preposition_after_vb(v: str):
        if v == 'related':
            return 'related to'
        elif v == 'acting':
            return 'acting like'
        elif v == 'put':
            return 'put on'
        elif v == 'work':
            return 'work for'
        elif v == 'sleeping':
            return 'sleeping in'
        elif v == 'standing':
            return 'standing on'
        elif v == 'depending':
            return 'depending on'
        elif v == 'flying':
            return 'flying over'
        elif v == 'falling':
            return 'falling on'
        elif v == 'asking':
            return 'asking about'
        elif v == 'swimming':
            return 'swimming in'
        elif v == 'asking':
            return 'asking for'
        elif v == 'coming':
            return 'coming to'
        else:
            return v

    while True:

        # random choices
        slot2filler = {
            'name': random.choice(names),
            'nn': random.choice(nouns_s),
            'pp': random.choice(pps),
            'vb': random.choice(verbs_base),
            'vbg': random.choice(verbs_gerund),
        }

        slot2filler['vb'] = add_preposition_after_vb(slot2filler['vb'])
        slot2filler['vbg'] = add_preposition_after_vb(slot2filler['vbg'])

        yield template1['b'].format(**slot2filler)  # bad
        yield template1['g'].format(**slot2filler)  # good

        yield template2['b'].format(**slot2filler)  # bad
        yield template2['g'].format(**slot2filler)  # good
Esempio n. 23
0
def main():
    """
    example:
    "katherine will help herself do something" vs. "katerine will help himself do something"
    """

    excluded_verbs_base = ('say', )
    verbs_base = get_legal_words(tag='VB', exclude=excluded_verbs_base)

    nouns_s = get_legal_words(tag='NN')

    names_ = (configs.Dirs.legal_words / 'names.txt').open().read().split()
    names = find_counterbalanced_subset(names_,
                                        min_size=10,
                                        max_size=len(names_))

    auxiliaries = ['can', 'could', 'will', 'would', 'must', 'should']

    def add_misc_after_prp(
        prp: str,
        v: str,
        arg1: str,
    ) -> str:

        if v in {
                'take',
        }:
            return f'{prp} to {arg1}'

        elif v in {'make', 'give', 'put'}:
            return f'{prp} {arg1}'

        elif v in {
                'work',
        }:
            return f'{prp} on {arg1}'

        elif v in {
                'turn',
        }:
            return f'{prp} around'

        elif v in {
                'tell',
        }:
            return f'{prp} about {arg1}'
        else:
            return prp

    def add_preposition_after_vb(v: str) -> str:
        if v in {'work', 'study', 'live'}:
            return f'{v} with'
        elif v in {'point', 'run'}:
            return f'{v} to'
        elif v in {
                'be',
        }:
            return f'{v} like'
        else:
            return v

    while True:

        vb = random.choice(verbs_base)

        # random choices
        slot2filler = {
            'aux':
            random.choice(auxiliaries),
            'nn_m':
            random.choice([name for name in names if name in names_m] +
                          ['he', 'the man', 'a man', 'that man']),
            'nn_f':
            random.choice([name for name in names if name in names_f] +
                          ['she', 'the woman', 'a woman', 'that woman']),
        }

        # sample argument once, so that the same argument is used by both bad and good sentences.
        # note: pronouns don't get determiners, but nouns do
        if vb == 'put':
            argument1 = random.choice(['in danger', 'in this situation'])
        else:
            argument1 = random.choice([f'the {nn}' for nn in nouns_s])

        # first, add some miscellaneous component
        slot2filler['prp_m'] = add_misc_after_prp('himself', vb, argument1)
        slot2filler['prp_f'] = add_misc_after_prp('herself', vb, argument1)

        # second, add a preposition
        slot2filler['vb'] = add_preposition_after_vb(vb)

        yield template1['b'].format(**slot2filler)  # bad
        yield template1['g'].format(**slot2filler)  # good

        yield template2['b'].format(**slot2filler)  # bad
        yield template2['g'].format(**slot2filler)  # good

        # use negation only in non-question, in templates 3, and 4
        if random.random() < 0.5:
            slot2filler['aux'] += ' ' + 'not'
        if random.random() < 0.1:
            slot2filler['aux'] = 'did not'

        yield template3['b'].format(**slot2filler)  # bad
        yield template3['g'].format(**slot2filler)  # good

        yield template4['b'].format(**slot2filler)  # bad
        yield template4['g'].format(**slot2filler)  # good
Esempio n. 24
0
def main():
    """
    example:
    "who must sarah and the dog kiss ?" vs "who must sarah kiss and the dog ?"

    """

    excluded_verbs_base = ('run', 'be', 'live', 'force', 'order')
    verbs_base = get_legal_words(tag='VB', exclude=excluded_verbs_base)

    excluded_verbs_gerund = ('',)
    verbs_gerund = get_legal_words(tag='VBG', exclude=excluded_verbs_gerund)

    animates_ = (configs.Dirs.legal_words / 'animates.txt').open().read().split()
    animates = find_counterbalanced_subset(animates_, min_size=8, max_size=len(animates_))

    names_ = (configs.Dirs.legal_words / 'names.txt').open().read().split()
    names = find_counterbalanced_subset(names_, min_size=10, max_size=len(names_))

    def add_preposition_after_vb(v: str,
                                 arg: str,
                                 ):
        if v == 'related':
            return 'related to'
        elif v == 'put':
            return 'put on'
        elif v == 'work':
            return 'work for'
        elif v == 'acting':
            return 'acting like'
        elif v == 'sleeping':
            return 'sleeping in'
        elif v == 'falling':
            return 'falling on'
        elif v == 'looking':
            return 'looking for'
        elif v == 'running':
            return 'running to'
        elif v == 'talking':
            return 'talking about'
        elif v == 'thinking':
            return 'thinking about'
        elif v == 'reaching':
            return 'reaching for'
        elif v == 'work':
            return f'work {arg}'
        else:
            return v

    while True:

        # random choices
        slot2filler = {
            'name': random.choice(names),
            'nn': random.choice(animates),
            'vb': random.choice(verbs_base),
            'vbg': random.choice(verbs_gerund),
        }

        arg = random.choice(["him", "her", "them", "us"])
        slot2filler['vb'] = add_preposition_after_vb(slot2filler['vb'], arg)
        slot2filler['vbg'] = add_preposition_after_vb(slot2filler['vbg'], arg)

        # exclude bad combinations that involve "who", e.g. "saying who"
        if slot2filler['vbg'] not in {'saying', 'drinking', 'eating', 'open'}\
                and slot2filler['vb'] not in {'need', 'feel', 'open'}:

            if slot2filler['vb'] == 'tell':
                slot2filler['vb'] = 'tell something'

            yield template1['b'].format(**slot2filler)  # bad
            yield template1['g'].format(**slot2filler)  # good

            yield template2['b'].format(**slot2filler)  # bad
            yield template2['g'].format(**slot2filler)  # good

        yield template3['b'].format(**slot2filler)  # bad
        yield template3['g'].format(**slot2filler)  # good

        yield template4['b'].format(**slot2filler)  # bad
        yield template4['g'].format(**slot2filler)  # good