def _default_temporal_tests(self,
                                data: Optional[Iterable[Tuple]],
                                num_test_cases=100):
        template = self.editor.template(
            (
                "{first_name} works as {a:professions}",
                "{first_name} used to work as a {professions}",
            ),
            nsamples=num_test_cases,
            remove_duplicates=True,
        )

        template += self.editor.template(
            (
                "{first_name} {last_name} is {a:professions}",
                "{first_name} {last_name} was {a:professions}",
            ),
            nsamples=num_test_cases,
            remove_duplicates=True,
        )

        test = MFT(
            **template,
            labels=self._neutral,
            name=
            '"A works as P" gives no information about "A used to work as P"',
            capability="Temporal",
            description=
            'Eg. "A is a writer" gives no information about "A was a writer"',
        )

        self.add_test(test)

        template = self.editor.template(
            (
                "{first_name} was {a:professions1} before they were {a:professions2}",
                "{first_name} was {a:professions1} after they were {a:professions2}",
            ),
            nsamples=num_test_cases,
            remove_duplicates=True,
        )

        test = MFT(
            **template,
            labels=self._contradicts,
            name="Before != After",
            capability="Temporal",
            description='Eg. "A was a writer before they were a journalist" '
            'contradicts "A was a writer after they were a journalist"',
        )

        self.add_test(test)
Exemple #2
0
 def _default_taxonomy_tests(self,
                             data: Optional[Iterable[Tuple]],
                             num_test_cases=100):
     template = _crossproduct(
         self.editor.template(
             {
                 "contexts": [
                     "{first_name} is {comp_pairs[0]} than {first_name1}.",
                     "{first_name1} is {comp_pairs[1]} than {first_name}.",
                 ],
                 "qas": [
                     (
                         "Who is {comp_pairs[1]}?",
                         "{first_name1}",
                     ),
                     (
                         "Who is {comp_pairs[0]}?",
                         "{first_name}",
                     ),
                 ],
             },
             remove_duplicates=True,
             nsamples=num_test_cases,
             save=True,
         ))
     test = MFT(
         **template,
         name="A is COMP than B. Who is antonym(COMP)? B",
         description=
         'Eg. Context: "A is taller than B", Q: "Who is shorter?", A: "B"',
         capability="Taxonomy",
     )
     self.add_test(test)
Exemple #3
0
    def _default_vocabulary_tests(self,
                                  data: Optional[Iterable[Tuple]],
                                  num_test_cases=100):

        template = self.editor.template(
            [
                (
                    "{first_name} is {adjectives_to_compare[0]}er than {first_name1}.",
                    "Who is less {adjectives_to_compare[1]}?",
                ),
                (
                    "{first_name} is {adjectives_to_compare[0]}er than {first_name1}.",
                    "Who is {adjectives_to_compare[0]}er?",
                ),
            ],
            labels=["{first_name1}", "{first_name}"],
            remove_duplicates=True,
            nsamples=num_test_cases,
            save=True,
        )
        test = MFT(
            **template,
            name="A is COMP than B. Who is more / less COMP?",
            description='Eg. Context: "A is taller than B" '
            'Q: "Who is taller?" A: "A", Q: "Who is less tall?" A: "B"',
            capability="Vocabulary",
        )
        self.add_test(test)
    def _default_logic_tests(self,
                             data: Optional[Iterable[Tuple]],
                             num_test_cases=100):
        template = self.editor.template(
            ("{nouns1} are {compare} than {nouns2}",
             "{nouns2} are {compare} than {nouns1}"),
            nsamples=num_test_cases,
            remove_duplicates=True,
        )

        test = MFT(
            **template,
            labels=self._contradicts,
            name='"A is COMP than B" contradicts "B is COMP than A"',
            capability="Logic",
            description=
            'Eg. "A is better than B" contradicts "B is better than A"',
        )

        self.add_test(test)

        if data:
            template = Perturb.perturb(data,
                                       lambda x: (x[0], x[0]),
                                       nsamples=num_test_cases,
                                       keep_original=False)
            template += Perturb.perturb(data,
                                        lambda x: (x[1], x[1]),
                                        nsamples=num_test_cases,
                                        keep_original=False)

            test = MFT(
                **template,
                labels=self._entails,
                name="A entails A (premise == hypothesis)",
                capability="Logic",
                description=
                "If premise and hypothesis are the same, then premise entails the hypothesis",
            )

            self.add_test(test)
    def _default_ner_tests(self,
                           data: Optional[Iterable[Tuple]],
                           num_test_cases=100):
        template = self.editor.template(
            (
                "{first_name1} is {compare} than {first_name2}",
                "{first_name1} is {compare} than {first_name3}",
            ),
            nsamples=num_test_cases,
            remove_duplicates=True,
        )

        test = MFT(
            **template,
            labels=self._neutral,
            name=
            '"A is COMP than B" gives no information about "A is COMP than C"',
            capability="NER",
            description=
            'Eg. "A is better than B" gives no information about "A is better than C"',
        )

        self.add_test(test)
    def _default_negation_tests(self,
                                data: Optional[Iterable[Tuple]],
                                num_test_cases=100):

        template = self.editor.template(
            (
                "{first_name1} is {compare} than {first_name2}",
                "{first_name1} is not {compare} than {first_name2}",
            ),
            nsamples=num_test_cases,
            remove_duplicates=True,
        )

        test = MFT(
            **template,
            labels=self._contradicts,
            name='"A is COMP than B" contradicts "A is not COMP than B"',
            capability="Negation",
            description=
            "Eg. A is better than B contradicts A is not better than C",
        )

        self.add_test(test)
    def _default_vocabulary_tests(self,
                                  data: Optional[Iterable[Tuple]],
                                  num_test_cases=100):

        template = self.editor.template(
            (
                "{first_name1} is more {antonyms[0]} than {first_name2}",
                "{first_name2} is more {antonyms[1]} than {first_name1}",
            ),
            remove_duplicates=True,
            nsamples=num_test_cases,
        )

        test = MFT(
            **template,
            labels=self._entails,
            name=
            '"A is more COMP than B" entails "B is more antonym(COMP) than A"',
            capability="Vocabulary",
            description=
            "Eg. A is more active than B implies that B is more passive than A",
        )

        self.add_test(test)
Exemple #8
0
def object_test():
    global food_ret, sport_ret, drug_ret, nondrug_ret
    """
    codeDictionary = {"D":0, "M":1, "S":2, "H":3, "F":4, "O":5, "E":6, "NA":7}
    """

    editor = Editor()
    food_ret1 = editor.template('How often do you get {food}?',
                                food=food,
                                labels=0,
                                save=True)  #, nsamples=100)   a
    food_ret2 = editor.template('I can\'t stop thinking about {food}!',
                                food=food,
                                labels=0,
                                save=True)  #, nsamples=100)

    food_ret = food_ret1 + food_ret2

    mft_food = MFT(food_ret.data,
                   labels=food_ret.labels,
                   name='Object Rec: Food',
                   capability='Objects',
                   description='Food')

    sport_ret1 = editor.template('I have to participate in {sport}?',
                                 sport=sport,
                                 labels=6,
                                 save=True)  #, nsamples=100)
    sport_ret2 = editor.template(
        'It is good to move your body, like doing {sport}.',
        sport=sport,
        labels=6,
        save=True)  #, nsamples=100)

    sport_ret = sport_ret1 + sport_ret2

    mft_sport = MFT(sport_ret.data,
                    labels=sport_ret.labels,
                    name='Object Rec: Sport',
                    capability='Objects',
                    description='Sport')

    nondrug_ret1 = editor.template('How often do you take {nondrug}?',
                                   nondrug=nondrug,
                                   labels=5)  #, save=True) #, nsamples=100)
    nondrug_ret2 = editor.template(
        'Have you taken {nondrug} for the last five months?',
        nondrug=nondrug,
        labels=5)  #, save=True) #, nsamples=100)

    nondrug_ret = nondrug_ret1 + nondrug_ret2

    mft_nondrug = MFT(nondrug_ret.data,
                      labels=nondrug_ret.labels,
                      name='Object Rec: Non Drug',
                      capability='Objects',
                      description='Non Drug')

    drug_ret1 = editor.template('How often do you get {drug}?',
                                drug=drug,
                                labels=1,
                                save=True)  #, nsamples=100)
    drug_ret2 = editor.template(
        'Have you taken {drug} for the last five months?',
        drug=drug,
        labels=1,
        save=True)  #, nsamples=100)

    drug_ret = drug_ret1 + drug_ret2

    mft_drug = MFT(drug_ret.data,
                   labels=drug_ret.labels,
                   name='Object Rec: Drug',
                   capability='Objects',
                   description='Drug')

    #print(nondrug_ret.data)

    nt = Perturb.perturb(nondrug_ret.data, swap_nondrug)
    inv_n = INV(**nt,
                name='swap nondrug name in both questions',
                capability='objects',
                description='')

    #print(len(nt.data))
    #exit()
    """
    import numpy as np
    def pp(inputs):
        p1 = np.array([0.5 for x in inputs]).reshape(-1, 1)
        p0 = 1- p1
        return np.hstack((p0, p1))
    from checklist.pred_wrapper import PredictorWrapper
    wrapped = PredictorWrapper.wrap_softmax(pp)
    inv_n.run(wrapped)
    """

    dt = Perturb.perturb(drug_ret.data, swap_drug)
    inv_d = INV(**dt,
                name='swap drug name in both questions',
                capability='objects',
                description='')

    nondrug_monodec = Expect.monotonic(label=5,
                                       increasing=False,
                                       tolerance=0.1)
    drug_monodec = Expect.monotonic(label=1, increasing=False, tolerance=0.1)

    ndt = Perturb.perturb(nondrug_ret.data, swap_nd)
    dir_nd = DIR(**ndt, expect=nondrug_monodec)

    dnt = Perturb.perturb(drug_ret.data, swap_dn)
    dir_dn = DIR(**dnt, expect=drug_monodec)

    # diet    #exercise   # other     # medical  # other # medical, # o -> m, # m->o
    tests = [mft_food, mft_sport, mft_nondrug,
             mft_drug]  #, inv_n , inv_d, dir_nd, dir_dn ]
    names = [
        x.strip(",")
        for x in "mft_food, mft_sport, mft_nondrug, mft_drug".split()
    ]  #, inv_n, inv_d, dir_nd, dir_dn".split() ]

    assert (len(tests) == len(names))

    for test, name in zip(tests, names):
        test.to_raw_file('./tests/' + name + '.txt')

    return tests, names
    def _default_negation_tests(self,
                                data: Optional[Iterable[str]],
                                num_test_cases=100):
        template = self.editor.template(
            "{it} {noun} {nt} {pos_adj}.",
            it=["This", "That", "The"],
            nt=["is not", "isn't"],
            save=True,
            nsamples=num_test_cases,
        )
        template += self.editor.template(
            "{it} {benot} {a:pos_adj} {noun}.",
            it=["It", "This", "That"],
            benot=["is not", "isn't", "was not", "wasn't"],
            save=True,
            nsamples=num_test_cases,
        )
        neg = [
            "I can't say I", "I don't", "I would never say I",
            "I don't think I", "I didn't"
        ]
        template += self.editor.template(
            "{neg} {pos_verb_present} {the} {noun}.",
            neg=neg,
            the=["this", "that", "the"],
            save=True,
            nsamples=num_test_cases,
        )
        template += self.editor.template(
            "No one {pos_verb_present}s {the} {noun}.",
            neg=neg,
            the=["this", "that", "the"],
            save=True,
            nsamples=num_test_cases,
        )
        test = MFT(
            template.data,
            labels=self._negative,
            templates=template.templates,
            name="Simple negations: negative",
            capability="Negation",
            description="Very simple negations of positive statements",
        )

        self.add_test(test)

        template = self.editor.template(
            "I thought {it} {noun} would be {pos_adj}, but it {neg}.",
            neg=["was not", "wasn't"],
            it=["this", "that", "the"],
            nt=["is not", "isn't"],
            save=True,
            nsamples=num_test_cases,
        )
        template += self.editor.template(
            "I thought I would {pos_verb_present} {the} {noun}, but I {neg}.",
            neg=["did not", "didn't"],
            the=["this", "that", "the"],
            save=True,
            nsamples=num_test_cases,
        )
        test = MFT(
            template.data,
            labels=self._negative,
            templates=template.templates,
            name="Simple negations: I thought x was positive, but it was not",
            capability="Negation",
            description="",
        )
        self.add_test(test)
    def _default_temporal_tests(self,
                                data: Optional[Iterable[str]],
                                num_test_cases=100):
        self._setup_editor()

        change = ["but", "even though", "although", ""]
        template = self.editor.template(
            [
                "I used to think this {noun} was {neg_adj}, {change} now I think it is {pos_adj}.",
                "I think this {noun} is {pos_adj}, {change} I used to think it was {neg_adj}.",
                "In the past I thought this {noun} was {neg_adj}, {change} now I think it is {pos_adj}.",
                "I think this {noun} is {pos_adj}, {change} in the past I thought it was {neg_adj}.",
            ],
            change=change,
            unroll=True,
            nsamples=num_test_cases,
            save=True,
            labels=self._positive,
        )
        template += self.editor.template(
            [
                "I used to {neg_verb_present} this {noun}, {change} now I {pos_verb_present} it.",
                "I {pos_verb_present} this {noun}, {change} I used to {neg_verb_present} it.",
                "In the past I would {neg_verb_present} this {noun}, {change} now I {pos_verb} it.",
                "I {pos_verb_present} this {noun}, {change} in the past I would {neg_verb_present} it.",
            ],
            change=change,
            unroll=True,
            nsamples=num_test_cases,
            save=True,
            labels=self._positive,
        )

        template += self.editor.template(
            [
                "I used to think this {noun} was {pos_adj}, {change} now I think it is {neg_adj}.",
                "I think this {noun} is {neg_adj}, {change} I used to think it was {pos_adj}.",
                "In the past I thought this {noun} was {pos_adj}, {change} now I think it is {neg_adj}.",
                "I think this {noun} is {neg_adj}, {change} in the past I thought it was {pos_adj}.",
            ],
            change=change,
            unroll=True,
            nsamples=num_test_cases,
            save=True,
            labels=self._negative,
        )
        template += self.editor.template(
            [
                "I used to {pos_verb_present} this {noun}, {change} now I {neg_verb_present} it.",
                "I {neg_verb_present} this {noun}, {change} I used to {pos_verb_present} it.",
                "In the past I would {pos_verb_present} this {noun}, {change} now I {neg_verb_present} it.",
                "I {neg_verb_present} this {noun}, {change} in the past I would {pos_verb_present} it.",
            ],
            change=change,
            unroll=True,
            nsamples=num_test_cases,
            save=True,
            labels=self._negative,
        )
        test = MFT(
            **template,
            name="Used to, but now",
            capability="Temporal",
            description="Have two conflicing statements, one about the past and "
            "one about the present."
            "Expect the present to carry the sentiment. Examples:"
            "I used to love this airline, now I hate it -> should be negative"
            "I love this airline, although I used to hate it -> should be positive",
        )

        self.add_test(test)

        adjectives = self.editor.lexicons["pos_adj"] + self.editor.lexicons[
            "neg_adj"]
        verbs = self.editor.lexicons[
            "pos_verb_present"] + self.editor.lexicons["neg_verb_present"]

        template = self.editor.template(
            [
                "{it} {be} {a:adj} {noun}.",
                "I used to think {it} {be} {a:adj} {noun}."
            ],
            it=["it", "this", "that"],
            be=["is", "was"],
            adj=adjectives,
            save=True,
            nsamples=num_test_cases,
        )
        template += self.editor.template(
            ["{i} {verb} {the} {noun}.", "{i} used to {verb} {the} {noun}."],
            i=["I", "We"],
            the=["this", "that", "the"],
            verb=verbs,
            save=True,
            nsamples=num_test_cases,
        )
        test = DIR(
            template.data,
            self.monotonic_label_down,
            templates=template.templates,
            name="'Used to' should reduce",
            capability="Temporal",
            description=
            "A model should not be more confident on 'I used to think X' "
            "when compared to 'X', e.g. 'I used to love this restaurant' "
            "should have less confidence than 'I love this restaurant'",
        )

        self.add_test(test)
    def _default_vocabulary_tests(self,
                                  data: Optional[Iterable[str]],
                                  num_test_cases=100):

        positive_words = (self.editor.lexicons["pos_adj"] +
                          self.editor.lexicons["pos_verb_present"] +
                          self.editor.lexicons["pos_verb_past"])

        test = MFT(
            positive_words,
            labels=self._positive,
            name="Single Positive Words",
            capability="Vocabulary",
            description="Correctly recognizes positive words",
        )

        self.add_test(test)

        negative_words = (self.editor.lexicons["neg_adj"] +
                          self.editor.lexicons["neg_verb_present"] +
                          self.editor.lexicons["neg_verb_past"])

        test = MFT(
            negative_words,
            labels=self._negative,
            name="Single Negative Words",
            capability="Vocabulary",
            description="Correctly recognizes negative words",
        )

        self.add_test(test)

        template = self.editor.template(
            "{it} {noun} {be} {pos_adj}.",
            it=["The", "This", "That"],
            be=["is", "was"],
            labels=self._positive,
            save=True,
        )
        template += self.editor.template(
            "{it} {be} {a:pos_adj} {noun}.",
            it=["It", "This", "That"],
            be=["is", "was"],
            labels=self._positive,
            save=True,
        )
        template += self.editor.template(
            "{i} {pos_verb} {the} {noun}.",
            i=["I", "We"],
            the=["this", "that", "the"],
            labels=self._positive,
            save=True,
        )
        template += self.editor.template(
            "{it} {noun} {be} {neg_adj}.",
            it=["That", "This", "The"],
            be=["is", "was"],
            labels=self._negative,
            save=True,
        )
        template += self.editor.template(
            "{it} {be} {a:neg_adj} {noun}.",
            it=["It", "This", "That"],
            be=["is", "was"],
            labels=self._negative,
            save=True,
        )
        template += self.editor.template(
            "{i} {neg_verb} {the} {noun}.",
            i=["I", "We"],
            the=["this", "that", "the"],
            labels=self._negative,
            save=True,
        )

        test = MFT(
            **template,
            name="Sentiment-laden words in context",
            capability="Vocabulary",
            description="Use positive and negative verbs and adjectives "
            "with nouns such as product, movie, airline, etc. "
            'E.g. "This was a bad movie"',
        )

        self.add_test(test)

        template = self.editor.template(
            [
                "{it} {be} {a:pos_adj} {noun}.",
                "{it} {be} {a:intens_adj} {pos_adj} {noun}."
            ],
            it=["It", "This", "That"],
            be=["is", "was"],
            nsamples=num_test_cases,
            save=True,
        )
        template += self.editor.template(
            [
                "{i} {pos_verb} {the} {noun}.",
                "{i} {intens_verb} {pos_verb} {the} {noun}."
            ],
            i=["I", "We"],
            the=["this", "that", "the"],
            nsamples=num_test_cases,
            save=True,
        )
        template += self.editor.template(
            [
                "{it} {be} {a:neg_adj} {noun}.",
                "{it} {be} {a:intens_adj} {neg_adj} {noun}."
            ],
            it=["It", "This", "That"],
            be=["is", "was"],
            nsamples=num_test_cases,
            save=True,
        )
        template += self.editor.template(
            [
                "{i} {neg_verb} {the} {noun}.",
                "{i} {intens_verb} {neg_verb} {the} {noun}."
            ],
            i=["I", "We"],
            the=["this", "that", "the"],
            nsamples=num_test_cases,
            save=True,
        )

        test = DIR(
            template.data,
            self.monotonic_label,
            templates=template.templates,
            name="Intensifiers",
            capability="Vocabulary",
            description=
            "Test is composed of pairs of sentences (x1, x2), where we add an intensifier"
            "such as 'really',or 'very' to x2 and expect the confidence to NOT go down "
            "(with tolerance=0.1). e.g.:"
            "x1 = 'That was a good movie'"
            "x2 = 'That was a very good movie'",
        )

        self.add_test(test)

        template = self.editor.template(
            [
                "{it} {noun} {be} {pos_adj}.",
                "{it} {noun} {be} {reducer_adj} {pos_adj}."
            ],
            it=["The", "This", "That"],
            be=["is", "was"],
            nsamples=num_test_cases,
            save=True,
        )
        template += self.editor.template(
            [
                "{it} {noun} {be} {neg_adj}.",
                "{it} {noun} {be} {reducer_adj} {neg_adj}."
            ],
            it=["The", "This", "That"],
            be=["is", "was"],
            nsamples=num_test_cases,
            save=True,
        )
        test = DIR(
            template.data,
            self.monotonic_label_down,
            templates=template.templates,
            name="Reducers",
            capability="Vocabulary",
            description=
            "Test is composed of pairs of sentences (x1, x2), where we add a reducer"
            "such as 'somewhat', or 'kinda' to x2 and expect the confidence to NOT go up "
            " (with tolerance=0.1). e.g.:"
            "x1 = 'The staff was good.'"
            "x2 = 'The staff was somewhat good.'",
        )

        self.add_test(test)

        if data:

            positive = self.editor.template("I {pos_verb_present} you.").data
            positive += self.editor.template("You are {pos_adj}.").data

            negative = self.editor.template("I {neg_verb_present} you.").data
            negative += self.editor.template("You are {neg_adj}.").data

            template = Perturb.perturb(data,
                                       _add_phrase_function(positive),
                                       nsamples=num_test_cases)
            test = DIR(
                template.data,
                Expect.pairwise(self._diff_up),
                name="Add positive phrases",
                capability="Vocabulary",
                description=
                "Add very positive phrases (e.g. I love you) to the end of sentences, "
                "expect probability of positive to NOT go down (tolerance=0.1)",
            )

            self.add_test(test)

            template = Perturb.perturb(data,
                                       _add_phrase_function(negative),
                                       nsamples=num_test_cases)
            test = DIR(
                template.data,
                Expect.pairwise(self._diff_down),
                name="Add negative phrases",
                capability="Vocabulary",
                description=
                "Add very negative phrases (e.g. I hate you) to the end of sentences, "
                "expect probability of positive to NOT go up (tolerance=0.1)",
            )

            self.add_test(test)
Exemple #12
0

def replace_john_with_others(x, *args, **kwargs):
    # Returns empty (if John is not present) or list of strings with John replaced by Luke and Mark
    if not re.search(r'\bJohn\b', x):
        return None
    return [re.sub(r'\bJohn\b', n, x) for n in ['Luke', 'Mark']]


dataset = ['John is a man', 'Mary is a woman', 'John is an apostle']
ret = Perturb.perturb(dataset, replace_john_with_others)
ret.data

# In[12]:

import checklist
from checklist.editor import Editor
from checklist.perturb import Perturb
from checklist.test_types import MFT, INV, DIR
editor = Editor()

t = editor.template('This is {a:adj} {mask}.',
                    adj=['good', 'great', 'excellent', 'awesome'])
test1 = MFT(t.data,
            labels=1,
            name='Simple positives',
            capability='Vocabulary',
            description='')

# In[ ]:
Exemple #13
0
 def test_mft_wo_test_id(self):
     mft_test = MFT(**self.dummy_test_data,
                    expect=Expect.eq(),
                    name="mft test without test id")
     assert mft_test.test_id is None
Exemple #14
0
 def test_mft_w_test_id(self):
     mft_test = MFT(**self.dummy_test_data,
                    expect=Expect.eq(),
                    name="mft test with test id",
                    test_id=self.test_id)
     assert mft_test.test_id == self.test_id