def _default_temporal_tests(self, data: Optional[Iterable[Tuple]], num_test_cases=100): template = self.editor.template( ( "{first_name} works as {a:professions}", "{first_name} used to work as a {professions}", ), nsamples=num_test_cases, remove_duplicates=True, ) template += self.editor.template( ( "{first_name} {last_name} is {a:professions}", "{first_name} {last_name} was {a:professions}", ), nsamples=num_test_cases, remove_duplicates=True, ) test = MFT( **template, labels=self._neutral, name= '"A works as P" gives no information about "A used to work as P"', capability="Temporal", description= 'Eg. "A is a writer" gives no information about "A was a writer"', ) self.add_test(test) template = self.editor.template( ( "{first_name} was {a:professions1} before they were {a:professions2}", "{first_name} was {a:professions1} after they were {a:professions2}", ), nsamples=num_test_cases, remove_duplicates=True, ) test = MFT( **template, labels=self._contradicts, name="Before != After", capability="Temporal", description='Eg. "A was a writer before they were a journalist" ' 'contradicts "A was a writer after they were a journalist"', ) self.add_test(test)
def _default_taxonomy_tests(self, data: Optional[Iterable[Tuple]], num_test_cases=100): template = _crossproduct( self.editor.template( { "contexts": [ "{first_name} is {comp_pairs[0]} than {first_name1}.", "{first_name1} is {comp_pairs[1]} than {first_name}.", ], "qas": [ ( "Who is {comp_pairs[1]}?", "{first_name1}", ), ( "Who is {comp_pairs[0]}?", "{first_name}", ), ], }, remove_duplicates=True, nsamples=num_test_cases, save=True, )) test = MFT( **template, name="A is COMP than B. Who is antonym(COMP)? B", description= 'Eg. Context: "A is taller than B", Q: "Who is shorter?", A: "B"', capability="Taxonomy", ) self.add_test(test)
def _default_vocabulary_tests(self, data: Optional[Iterable[Tuple]], num_test_cases=100): template = self.editor.template( [ ( "{first_name} is {adjectives_to_compare[0]}er than {first_name1}.", "Who is less {adjectives_to_compare[1]}?", ), ( "{first_name} is {adjectives_to_compare[0]}er than {first_name1}.", "Who is {adjectives_to_compare[0]}er?", ), ], labels=["{first_name1}", "{first_name}"], remove_duplicates=True, nsamples=num_test_cases, save=True, ) test = MFT( **template, name="A is COMP than B. Who is more / less COMP?", description='Eg. Context: "A is taller than B" ' 'Q: "Who is taller?" A: "A", Q: "Who is less tall?" A: "B"', capability="Vocabulary", ) self.add_test(test)
def _default_logic_tests(self, data: Optional[Iterable[Tuple]], num_test_cases=100): template = self.editor.template( ("{nouns1} are {compare} than {nouns2}", "{nouns2} are {compare} than {nouns1}"), nsamples=num_test_cases, remove_duplicates=True, ) test = MFT( **template, labels=self._contradicts, name='"A is COMP than B" contradicts "B is COMP than A"', capability="Logic", description= 'Eg. "A is better than B" contradicts "B is better than A"', ) self.add_test(test) if data: template = Perturb.perturb(data, lambda x: (x[0], x[0]), nsamples=num_test_cases, keep_original=False) template += Perturb.perturb(data, lambda x: (x[1], x[1]), nsamples=num_test_cases, keep_original=False) test = MFT( **template, labels=self._entails, name="A entails A (premise == hypothesis)", capability="Logic", description= "If premise and hypothesis are the same, then premise entails the hypothesis", ) self.add_test(test)
def _default_ner_tests(self, data: Optional[Iterable[Tuple]], num_test_cases=100): template = self.editor.template( ( "{first_name1} is {compare} than {first_name2}", "{first_name1} is {compare} than {first_name3}", ), nsamples=num_test_cases, remove_duplicates=True, ) test = MFT( **template, labels=self._neutral, name= '"A is COMP than B" gives no information about "A is COMP than C"', capability="NER", description= 'Eg. "A is better than B" gives no information about "A is better than C"', ) self.add_test(test)
def _default_negation_tests(self, data: Optional[Iterable[Tuple]], num_test_cases=100): template = self.editor.template( ( "{first_name1} is {compare} than {first_name2}", "{first_name1} is not {compare} than {first_name2}", ), nsamples=num_test_cases, remove_duplicates=True, ) test = MFT( **template, labels=self._contradicts, name='"A is COMP than B" contradicts "A is not COMP than B"', capability="Negation", description= "Eg. A is better than B contradicts A is not better than C", ) self.add_test(test)
def _default_vocabulary_tests(self, data: Optional[Iterable[Tuple]], num_test_cases=100): template = self.editor.template( ( "{first_name1} is more {antonyms[0]} than {first_name2}", "{first_name2} is more {antonyms[1]} than {first_name1}", ), remove_duplicates=True, nsamples=num_test_cases, ) test = MFT( **template, labels=self._entails, name= '"A is more COMP than B" entails "B is more antonym(COMP) than A"', capability="Vocabulary", description= "Eg. A is more active than B implies that B is more passive than A", ) self.add_test(test)
def object_test(): global food_ret, sport_ret, drug_ret, nondrug_ret """ codeDictionary = {"D":0, "M":1, "S":2, "H":3, "F":4, "O":5, "E":6, "NA":7} """ editor = Editor() food_ret1 = editor.template('How often do you get {food}?', food=food, labels=0, save=True) #, nsamples=100) a food_ret2 = editor.template('I can\'t stop thinking about {food}!', food=food, labels=0, save=True) #, nsamples=100) food_ret = food_ret1 + food_ret2 mft_food = MFT(food_ret.data, labels=food_ret.labels, name='Object Rec: Food', capability='Objects', description='Food') sport_ret1 = editor.template('I have to participate in {sport}?', sport=sport, labels=6, save=True) #, nsamples=100) sport_ret2 = editor.template( 'It is good to move your body, like doing {sport}.', sport=sport, labels=6, save=True) #, nsamples=100) sport_ret = sport_ret1 + sport_ret2 mft_sport = MFT(sport_ret.data, labels=sport_ret.labels, name='Object Rec: Sport', capability='Objects', description='Sport') nondrug_ret1 = editor.template('How often do you take {nondrug}?', nondrug=nondrug, labels=5) #, save=True) #, nsamples=100) nondrug_ret2 = editor.template( 'Have you taken {nondrug} for the last five months?', nondrug=nondrug, labels=5) #, save=True) #, nsamples=100) nondrug_ret = nondrug_ret1 + nondrug_ret2 mft_nondrug = MFT(nondrug_ret.data, labels=nondrug_ret.labels, name='Object Rec: Non Drug', capability='Objects', description='Non Drug') drug_ret1 = editor.template('How often do you get {drug}?', drug=drug, labels=1, save=True) #, nsamples=100) drug_ret2 = editor.template( 'Have you taken {drug} for the last five months?', drug=drug, labels=1, save=True) #, nsamples=100) drug_ret = drug_ret1 + drug_ret2 mft_drug = MFT(drug_ret.data, labels=drug_ret.labels, name='Object Rec: Drug', capability='Objects', description='Drug') #print(nondrug_ret.data) nt = Perturb.perturb(nondrug_ret.data, swap_nondrug) inv_n = INV(**nt, name='swap nondrug name in both questions', capability='objects', description='') #print(len(nt.data)) #exit() """ import numpy as np def pp(inputs): p1 = np.array([0.5 for x in inputs]).reshape(-1, 1) p0 = 1- p1 return np.hstack((p0, p1)) from checklist.pred_wrapper import PredictorWrapper wrapped = PredictorWrapper.wrap_softmax(pp) inv_n.run(wrapped) """ dt = Perturb.perturb(drug_ret.data, swap_drug) inv_d = INV(**dt, name='swap drug name in both questions', capability='objects', description='') nondrug_monodec = Expect.monotonic(label=5, increasing=False, tolerance=0.1) drug_monodec = Expect.monotonic(label=1, increasing=False, tolerance=0.1) ndt = Perturb.perturb(nondrug_ret.data, swap_nd) dir_nd = DIR(**ndt, expect=nondrug_monodec) dnt = Perturb.perturb(drug_ret.data, swap_dn) dir_dn = DIR(**dnt, expect=drug_monodec) # diet #exercise # other # medical # other # medical, # o -> m, # m->o tests = [mft_food, mft_sport, mft_nondrug, mft_drug] #, inv_n , inv_d, dir_nd, dir_dn ] names = [ x.strip(",") for x in "mft_food, mft_sport, mft_nondrug, mft_drug".split() ] #, inv_n, inv_d, dir_nd, dir_dn".split() ] assert (len(tests) == len(names)) for test, name in zip(tests, names): test.to_raw_file('./tests/' + name + '.txt') return tests, names
def _default_negation_tests(self, data: Optional[Iterable[str]], num_test_cases=100): template = self.editor.template( "{it} {noun} {nt} {pos_adj}.", it=["This", "That", "The"], nt=["is not", "isn't"], save=True, nsamples=num_test_cases, ) template += self.editor.template( "{it} {benot} {a:pos_adj} {noun}.", it=["It", "This", "That"], benot=["is not", "isn't", "was not", "wasn't"], save=True, nsamples=num_test_cases, ) neg = [ "I can't say I", "I don't", "I would never say I", "I don't think I", "I didn't" ] template += self.editor.template( "{neg} {pos_verb_present} {the} {noun}.", neg=neg, the=["this", "that", "the"], save=True, nsamples=num_test_cases, ) template += self.editor.template( "No one {pos_verb_present}s {the} {noun}.", neg=neg, the=["this", "that", "the"], save=True, nsamples=num_test_cases, ) test = MFT( template.data, labels=self._negative, templates=template.templates, name="Simple negations: negative", capability="Negation", description="Very simple negations of positive statements", ) self.add_test(test) template = self.editor.template( "I thought {it} {noun} would be {pos_adj}, but it {neg}.", neg=["was not", "wasn't"], it=["this", "that", "the"], nt=["is not", "isn't"], save=True, nsamples=num_test_cases, ) template += self.editor.template( "I thought I would {pos_verb_present} {the} {noun}, but I {neg}.", neg=["did not", "didn't"], the=["this", "that", "the"], save=True, nsamples=num_test_cases, ) test = MFT( template.data, labels=self._negative, templates=template.templates, name="Simple negations: I thought x was positive, but it was not", capability="Negation", description="", ) self.add_test(test)
def _default_temporal_tests(self, data: Optional[Iterable[str]], num_test_cases=100): self._setup_editor() change = ["but", "even though", "although", ""] template = self.editor.template( [ "I used to think this {noun} was {neg_adj}, {change} now I think it is {pos_adj}.", "I think this {noun} is {pos_adj}, {change} I used to think it was {neg_adj}.", "In the past I thought this {noun} was {neg_adj}, {change} now I think it is {pos_adj}.", "I think this {noun} is {pos_adj}, {change} in the past I thought it was {neg_adj}.", ], change=change, unroll=True, nsamples=num_test_cases, save=True, labels=self._positive, ) template += self.editor.template( [ "I used to {neg_verb_present} this {noun}, {change} now I {pos_verb_present} it.", "I {pos_verb_present} this {noun}, {change} I used to {neg_verb_present} it.", "In the past I would {neg_verb_present} this {noun}, {change} now I {pos_verb} it.", "I {pos_verb_present} this {noun}, {change} in the past I would {neg_verb_present} it.", ], change=change, unroll=True, nsamples=num_test_cases, save=True, labels=self._positive, ) template += self.editor.template( [ "I used to think this {noun} was {pos_adj}, {change} now I think it is {neg_adj}.", "I think this {noun} is {neg_adj}, {change} I used to think it was {pos_adj}.", "In the past I thought this {noun} was {pos_adj}, {change} now I think it is {neg_adj}.", "I think this {noun} is {neg_adj}, {change} in the past I thought it was {pos_adj}.", ], change=change, unroll=True, nsamples=num_test_cases, save=True, labels=self._negative, ) template += self.editor.template( [ "I used to {pos_verb_present} this {noun}, {change} now I {neg_verb_present} it.", "I {neg_verb_present} this {noun}, {change} I used to {pos_verb_present} it.", "In the past I would {pos_verb_present} this {noun}, {change} now I {neg_verb_present} it.", "I {neg_verb_present} this {noun}, {change} in the past I would {pos_verb_present} it.", ], change=change, unroll=True, nsamples=num_test_cases, save=True, labels=self._negative, ) test = MFT( **template, name="Used to, but now", capability="Temporal", description="Have two conflicing statements, one about the past and " "one about the present." "Expect the present to carry the sentiment. Examples:" "I used to love this airline, now I hate it -> should be negative" "I love this airline, although I used to hate it -> should be positive", ) self.add_test(test) adjectives = self.editor.lexicons["pos_adj"] + self.editor.lexicons[ "neg_adj"] verbs = self.editor.lexicons[ "pos_verb_present"] + self.editor.lexicons["neg_verb_present"] template = self.editor.template( [ "{it} {be} {a:adj} {noun}.", "I used to think {it} {be} {a:adj} {noun}." ], it=["it", "this", "that"], be=["is", "was"], adj=adjectives, save=True, nsamples=num_test_cases, ) template += self.editor.template( ["{i} {verb} {the} {noun}.", "{i} used to {verb} {the} {noun}."], i=["I", "We"], the=["this", "that", "the"], verb=verbs, save=True, nsamples=num_test_cases, ) test = DIR( template.data, self.monotonic_label_down, templates=template.templates, name="'Used to' should reduce", capability="Temporal", description= "A model should not be more confident on 'I used to think X' " "when compared to 'X', e.g. 'I used to love this restaurant' " "should have less confidence than 'I love this restaurant'", ) self.add_test(test)
def _default_vocabulary_tests(self, data: Optional[Iterable[str]], num_test_cases=100): positive_words = (self.editor.lexicons["pos_adj"] + self.editor.lexicons["pos_verb_present"] + self.editor.lexicons["pos_verb_past"]) test = MFT( positive_words, labels=self._positive, name="Single Positive Words", capability="Vocabulary", description="Correctly recognizes positive words", ) self.add_test(test) negative_words = (self.editor.lexicons["neg_adj"] + self.editor.lexicons["neg_verb_present"] + self.editor.lexicons["neg_verb_past"]) test = MFT( negative_words, labels=self._negative, name="Single Negative Words", capability="Vocabulary", description="Correctly recognizes negative words", ) self.add_test(test) template = self.editor.template( "{it} {noun} {be} {pos_adj}.", it=["The", "This", "That"], be=["is", "was"], labels=self._positive, save=True, ) template += self.editor.template( "{it} {be} {a:pos_adj} {noun}.", it=["It", "This", "That"], be=["is", "was"], labels=self._positive, save=True, ) template += self.editor.template( "{i} {pos_verb} {the} {noun}.", i=["I", "We"], the=["this", "that", "the"], labels=self._positive, save=True, ) template += self.editor.template( "{it} {noun} {be} {neg_adj}.", it=["That", "This", "The"], be=["is", "was"], labels=self._negative, save=True, ) template += self.editor.template( "{it} {be} {a:neg_adj} {noun}.", it=["It", "This", "That"], be=["is", "was"], labels=self._negative, save=True, ) template += self.editor.template( "{i} {neg_verb} {the} {noun}.", i=["I", "We"], the=["this", "that", "the"], labels=self._negative, save=True, ) test = MFT( **template, name="Sentiment-laden words in context", capability="Vocabulary", description="Use positive and negative verbs and adjectives " "with nouns such as product, movie, airline, etc. " 'E.g. "This was a bad movie"', ) self.add_test(test) template = self.editor.template( [ "{it} {be} {a:pos_adj} {noun}.", "{it} {be} {a:intens_adj} {pos_adj} {noun}." ], it=["It", "This", "That"], be=["is", "was"], nsamples=num_test_cases, save=True, ) template += self.editor.template( [ "{i} {pos_verb} {the} {noun}.", "{i} {intens_verb} {pos_verb} {the} {noun}." ], i=["I", "We"], the=["this", "that", "the"], nsamples=num_test_cases, save=True, ) template += self.editor.template( [ "{it} {be} {a:neg_adj} {noun}.", "{it} {be} {a:intens_adj} {neg_adj} {noun}." ], it=["It", "This", "That"], be=["is", "was"], nsamples=num_test_cases, save=True, ) template += self.editor.template( [ "{i} {neg_verb} {the} {noun}.", "{i} {intens_verb} {neg_verb} {the} {noun}." ], i=["I", "We"], the=["this", "that", "the"], nsamples=num_test_cases, save=True, ) test = DIR( template.data, self.monotonic_label, templates=template.templates, name="Intensifiers", capability="Vocabulary", description= "Test is composed of pairs of sentences (x1, x2), where we add an intensifier" "such as 'really',or 'very' to x2 and expect the confidence to NOT go down " "(with tolerance=0.1). e.g.:" "x1 = 'That was a good movie'" "x2 = 'That was a very good movie'", ) self.add_test(test) template = self.editor.template( [ "{it} {noun} {be} {pos_adj}.", "{it} {noun} {be} {reducer_adj} {pos_adj}." ], it=["The", "This", "That"], be=["is", "was"], nsamples=num_test_cases, save=True, ) template += self.editor.template( [ "{it} {noun} {be} {neg_adj}.", "{it} {noun} {be} {reducer_adj} {neg_adj}." ], it=["The", "This", "That"], be=["is", "was"], nsamples=num_test_cases, save=True, ) test = DIR( template.data, self.monotonic_label_down, templates=template.templates, name="Reducers", capability="Vocabulary", description= "Test is composed of pairs of sentences (x1, x2), where we add a reducer" "such as 'somewhat', or 'kinda' to x2 and expect the confidence to NOT go up " " (with tolerance=0.1). e.g.:" "x1 = 'The staff was good.'" "x2 = 'The staff was somewhat good.'", ) self.add_test(test) if data: positive = self.editor.template("I {pos_verb_present} you.").data positive += self.editor.template("You are {pos_adj}.").data negative = self.editor.template("I {neg_verb_present} you.").data negative += self.editor.template("You are {neg_adj}.").data template = Perturb.perturb(data, _add_phrase_function(positive), nsamples=num_test_cases) test = DIR( template.data, Expect.pairwise(self._diff_up), name="Add positive phrases", capability="Vocabulary", description= "Add very positive phrases (e.g. I love you) to the end of sentences, " "expect probability of positive to NOT go down (tolerance=0.1)", ) self.add_test(test) template = Perturb.perturb(data, _add_phrase_function(negative), nsamples=num_test_cases) test = DIR( template.data, Expect.pairwise(self._diff_down), name="Add negative phrases", capability="Vocabulary", description= "Add very negative phrases (e.g. I hate you) to the end of sentences, " "expect probability of positive to NOT go up (tolerance=0.1)", ) self.add_test(test)
def replace_john_with_others(x, *args, **kwargs): # Returns empty (if John is not present) or list of strings with John replaced by Luke and Mark if not re.search(r'\bJohn\b', x): return None return [re.sub(r'\bJohn\b', n, x) for n in ['Luke', 'Mark']] dataset = ['John is a man', 'Mary is a woman', 'John is an apostle'] ret = Perturb.perturb(dataset, replace_john_with_others) ret.data # In[12]: import checklist from checklist.editor import Editor from checklist.perturb import Perturb from checklist.test_types import MFT, INV, DIR editor = Editor() t = editor.template('This is {a:adj} {mask}.', adj=['good', 'great', 'excellent', 'awesome']) test1 = MFT(t.data, labels=1, name='Simple positives', capability='Vocabulary', description='') # In[ ]:
def test_mft_wo_test_id(self): mft_test = MFT(**self.dummy_test_data, expect=Expect.eq(), name="mft test without test id") assert mft_test.test_id is None
def test_mft_w_test_id(self): mft_test = MFT(**self.dummy_test_data, expect=Expect.eq(), name="mft test with test id", test_id=self.test_id) assert mft_test.test_id == self.test_id