def _default_ner_tests(self, data: Optional[Iterable[str]], num_test_cases=100): if data: template = Perturb.perturb( data, utils.spacy_wrap(Perturb.change_names, ner=True), nsamples=num_test_cases ) test = INV( template.data, name="Change names", capability="NER", description="Replace names with other common names", ) self.add_test(test) template = Perturb.perturb( data, utils.spacy_wrap(Perturb.change_location, ner=True), nsamples=num_test_cases ) test = INV( template.data, name="Change locations", capability="NER", description="Replace city or country names with other cities or countries", ) self.add_test(test) template = Perturb.perturb( data, utils.spacy_wrap(Perturb.change_number, ner=True), nsamples=num_test_cases ) test = INV( template.data, name="Change numbers", capability="NER", description="Replace integers with random integers within a 20% radius of the original", ) self.add_test(test)
def _typo_test(self, data: Iterable, num_test_cases: int): """ Checks if the model is robust enough to be invariant to simple typos. """ template = Perturb.perturb(data, self.typos(), nsamples=num_test_cases, typos=1) test = INV( template.data, name="Typos", capability="Robustness", description= "Add one typo to input by swapping two adjacent characters", ) self.add_test(test) template = Perturb.perturb(data, self.typos(), nsamples=num_test_cases, typos=2) test = INV( template.data, name="2 Typos", capability="Robustness", description= "Add two typos to input by swapping two adjacent characters twice", ) self.add_test(test)
def robustness_test(): editor = Editor() #food_ret = editor.template('How often do you get {food}?', food=food, labels=0, save=True) #, nsamples=100) pdata = list(processor.pipe(food_ret.data)) perturbed_punct = Perturb.perturb(pdata, Perturb.punctuation, keep_original=False) perturbed_typo = Perturb.perturb(food_ret.data, Perturb.add_typos, keep_original=False) inv_food_punct = INV(**perturbed_punct, name='Minor Changes: Punctuation', capability='robustness', description='') inv_food_typo = INV(**perturbed_typo, name='Minor Changes: Typos', capability='robustness', description='') pdata = list(processor.pipe(drug_ret.data)) perturbed_punct = Perturb.perturb(pdata, Perturb.punctuation, keep_original=False) perturbed_typo = Perturb.perturb(drug_ret.data, Perturb.add_typos, keep_original=False) inv_drug_punct = INV(**perturbed_punct, name='Minor Changes: Punctuation', capability='robustness', description='') inv_drug_typo = INV(**perturbed_typo, name='Minor Changes: Typos', capability='robustness', description='') #Perturb.contract #Perturb.expand_contractions #Perturb.contractions #Perturb.change_names #Perturb.change_location #Perturb.change_number tests = [inv_food_punct, inv_food_typo, inv_drug_punct, inv_drug_typo] names = [ "inv_food_punct", "inv_food_typo", "inv_drug_punct", "inv_drug_typo" ] for test, name in zip(tests, names): test.to_raw_file('./tests/' + name + '.txt') return tests, names
def main(): pred = True # False tests, names = object_test() rt, rn = robustness_test() tests += rt names += rn if pred: tests_to_jsonl(names) if pred: prediction_to_format() for test, name in zip(tests, names): print("\n\nBegin test:", name) test.run_from_file('./tests/' + name + '.jsonl.predictions.json.pres', file_format='softmax', overwrite=True) test.summary() return #bert, tokenizer = load_model() #inputs = tokenizer("I am a gold collector", return_tensors="pt") #print(inputs) #print(bert(**inputs)) #print(swap_dn("Amaryl is bad for cloud")) #print(generic) #print(brand) #return #print(non_drug) #print(len(food)) #print(len(sport)) #print(len(nondrug)) print(generate_sents('I had {word} last night', food)) return print((data)[3:10]) print( Perturb.perturb(list(processor.pipe(["I am green"])), Perturb.add_negation).data) print(negation(data[3:5])) return
def _default_robustness_tests(self, data: Optional[Iterable[str]], num_test_cases=100): template = Perturb.perturb(data, utils.add_random_strings, nsamples=num_test_cases) test = INV( template.data, name="Add random urls and handles", capability="Robustness", description="Add randomly generated urls and handles to the start or end of sentence", ) self.add_test(test)
def _default_logic_tests(self, data: Optional[Iterable[Tuple]], num_test_cases=100): template = self.editor.template( ("{nouns1} are {compare} than {nouns2}", "{nouns2} are {compare} than {nouns1}"), nsamples=num_test_cases, remove_duplicates=True, ) test = MFT( **template, labels=self._contradicts, name='"A is COMP than B" contradicts "B is COMP than A"', capability="Logic", description= 'Eg. "A is better than B" contradicts "B is better than A"', ) self.add_test(test) if data: template = Perturb.perturb(data, lambda x: (x[0], x[0]), nsamples=num_test_cases, keep_original=False) template += Perturb.perturb(data, lambda x: (x[1], x[1]), nsamples=num_test_cases, keep_original=False) test = MFT( **template, labels=self._entails, name="A entails A (premise == hypothesis)", capability="Logic", description= "If premise and hypothesis are the same, then premise entails the hypothesis", ) self.add_test(test)
def _punctuation_test(self, data: Iterable, num_test_cases: int): """ Checks if the model is invariant to presence/absence of punctuation. """ template = Perturb.perturb(data, self.punctuation(), nsamples=num_test_cases) test = INV( template.data, name="Punctuation", description="Strip punctuation and / or add '.'", capability="Robustness", ) self.add_test(test)
def _contraction_test(self, data: Iterable, num_test_cases: int): """ Checks if the model is invariant to contractions and expansions (eg. What is <-> What's). """ template = Perturb.perturb(data, self.contractions(), nsamples=num_test_cases) test = INV( template.data, name="Contractions", capability="Robustness", description= "Contract or expand contractions, e.g. What is <-> What's", ) self.add_test(test)
def object_test(): global food_ret, sport_ret, drug_ret, nondrug_ret """ codeDictionary = {"D":0, "M":1, "S":2, "H":3, "F":4, "O":5, "E":6, "NA":7} """ editor = Editor() food_ret1 = editor.template('How often do you get {food}?', food=food, labels=0, save=True) #, nsamples=100) a food_ret2 = editor.template('I can\'t stop thinking about {food}!', food=food, labels=0, save=True) #, nsamples=100) food_ret = food_ret1 + food_ret2 mft_food = MFT(food_ret.data, labels=food_ret.labels, name='Object Rec: Food', capability='Objects', description='Food') sport_ret1 = editor.template('I have to participate in {sport}?', sport=sport, labels=6, save=True) #, nsamples=100) sport_ret2 = editor.template( 'It is good to move your body, like doing {sport}.', sport=sport, labels=6, save=True) #, nsamples=100) sport_ret = sport_ret1 + sport_ret2 mft_sport = MFT(sport_ret.data, labels=sport_ret.labels, name='Object Rec: Sport', capability='Objects', description='Sport') nondrug_ret1 = editor.template('How often do you take {nondrug}?', nondrug=nondrug, labels=5) #, save=True) #, nsamples=100) nondrug_ret2 = editor.template( 'Have you taken {nondrug} for the last five months?', nondrug=nondrug, labels=5) #, save=True) #, nsamples=100) nondrug_ret = nondrug_ret1 + nondrug_ret2 mft_nondrug = MFT(nondrug_ret.data, labels=nondrug_ret.labels, name='Object Rec: Non Drug', capability='Objects', description='Non Drug') drug_ret1 = editor.template('How often do you get {drug}?', drug=drug, labels=1, save=True) #, nsamples=100) drug_ret2 = editor.template( 'Have you taken {drug} for the last five months?', drug=drug, labels=1, save=True) #, nsamples=100) drug_ret = drug_ret1 + drug_ret2 mft_drug = MFT(drug_ret.data, labels=drug_ret.labels, name='Object Rec: Drug', capability='Objects', description='Drug') #print(nondrug_ret.data) nt = Perturb.perturb(nondrug_ret.data, swap_nondrug) inv_n = INV(**nt, name='swap nondrug name in both questions', capability='objects', description='') #print(len(nt.data)) #exit() """ import numpy as np def pp(inputs): p1 = np.array([0.5 for x in inputs]).reshape(-1, 1) p0 = 1- p1 return np.hstack((p0, p1)) from checklist.pred_wrapper import PredictorWrapper wrapped = PredictorWrapper.wrap_softmax(pp) inv_n.run(wrapped) """ dt = Perturb.perturb(drug_ret.data, swap_drug) inv_d = INV(**dt, name='swap drug name in both questions', capability='objects', description='') nondrug_monodec = Expect.monotonic(label=5, increasing=False, tolerance=0.1) drug_monodec = Expect.monotonic(label=1, increasing=False, tolerance=0.1) ndt = Perturb.perturb(nondrug_ret.data, swap_nd) dir_nd = DIR(**ndt, expect=nondrug_monodec) dnt = Perturb.perturb(drug_ret.data, swap_dn) dir_dn = DIR(**dnt, expect=drug_monodec) # diet #exercise # other # medical # other # medical, # o -> m, # m->o tests = [mft_food, mft_sport, mft_nondrug, mft_drug] #, inv_n , inv_d, dir_nd, dir_dn ] names = [ x.strip(",") for x in "mft_food, mft_sport, mft_nondrug, mft_drug".split() ] #, inv_n, inv_d, dir_nd, dir_dn".split() ] assert (len(tests) == len(names)) for test, name in zip(tests, names): test.to_raw_file('./tests/' + name + '.txt') return tests, names
def negation(data): pdata = list(processor.pipe([x[0] for x in data])) print(pdata) ret = Perturb.perturb(pdata, Perturb.add_negation) return ret
def _default_vocabulary_tests(self, data: Optional[Iterable[str]], num_test_cases=100): positive_words = (self.editor.lexicons["pos_adj"] + self.editor.lexicons["pos_verb_present"] + self.editor.lexicons["pos_verb_past"]) test = MFT( positive_words, labels=self._positive, name="Single Positive Words", capability="Vocabulary", description="Correctly recognizes positive words", ) self.add_test(test) negative_words = (self.editor.lexicons["neg_adj"] + self.editor.lexicons["neg_verb_present"] + self.editor.lexicons["neg_verb_past"]) test = MFT( negative_words, labels=self._negative, name="Single Negative Words", capability="Vocabulary", description="Correctly recognizes negative words", ) self.add_test(test) template = self.editor.template( "{it} {noun} {be} {pos_adj}.", it=["The", "This", "That"], be=["is", "was"], labels=self._positive, save=True, ) template += self.editor.template( "{it} {be} {a:pos_adj} {noun}.", it=["It", "This", "That"], be=["is", "was"], labels=self._positive, save=True, ) template += self.editor.template( "{i} {pos_verb} {the} {noun}.", i=["I", "We"], the=["this", "that", "the"], labels=self._positive, save=True, ) template += self.editor.template( "{it} {noun} {be} {neg_adj}.", it=["That", "This", "The"], be=["is", "was"], labels=self._negative, save=True, ) template += self.editor.template( "{it} {be} {a:neg_adj} {noun}.", it=["It", "This", "That"], be=["is", "was"], labels=self._negative, save=True, ) template += self.editor.template( "{i} {neg_verb} {the} {noun}.", i=["I", "We"], the=["this", "that", "the"], labels=self._negative, save=True, ) test = MFT( **template, name="Sentiment-laden words in context", capability="Vocabulary", description="Use positive and negative verbs and adjectives " "with nouns such as product, movie, airline, etc. " 'E.g. "This was a bad movie"', ) self.add_test(test) template = self.editor.template( [ "{it} {be} {a:pos_adj} {noun}.", "{it} {be} {a:intens_adj} {pos_adj} {noun}." ], it=["It", "This", "That"], be=["is", "was"], nsamples=num_test_cases, save=True, ) template += self.editor.template( [ "{i} {pos_verb} {the} {noun}.", "{i} {intens_verb} {pos_verb} {the} {noun}." ], i=["I", "We"], the=["this", "that", "the"], nsamples=num_test_cases, save=True, ) template += self.editor.template( [ "{it} {be} {a:neg_adj} {noun}.", "{it} {be} {a:intens_adj} {neg_adj} {noun}." ], it=["It", "This", "That"], be=["is", "was"], nsamples=num_test_cases, save=True, ) template += self.editor.template( [ "{i} {neg_verb} {the} {noun}.", "{i} {intens_verb} {neg_verb} {the} {noun}." ], i=["I", "We"], the=["this", "that", "the"], nsamples=num_test_cases, save=True, ) test = DIR( template.data, self.monotonic_label, templates=template.templates, name="Intensifiers", capability="Vocabulary", description= "Test is composed of pairs of sentences (x1, x2), where we add an intensifier" "such as 'really',or 'very' to x2 and expect the confidence to NOT go down " "(with tolerance=0.1). e.g.:" "x1 = 'That was a good movie'" "x2 = 'That was a very good movie'", ) self.add_test(test) template = self.editor.template( [ "{it} {noun} {be} {pos_adj}.", "{it} {noun} {be} {reducer_adj} {pos_adj}." ], it=["The", "This", "That"], be=["is", "was"], nsamples=num_test_cases, save=True, ) template += self.editor.template( [ "{it} {noun} {be} {neg_adj}.", "{it} {noun} {be} {reducer_adj} {neg_adj}." ], it=["The", "This", "That"], be=["is", "was"], nsamples=num_test_cases, save=True, ) test = DIR( template.data, self.monotonic_label_down, templates=template.templates, name="Reducers", capability="Vocabulary", description= "Test is composed of pairs of sentences (x1, x2), where we add a reducer" "such as 'somewhat', or 'kinda' to x2 and expect the confidence to NOT go up " " (with tolerance=0.1). e.g.:" "x1 = 'The staff was good.'" "x2 = 'The staff was somewhat good.'", ) self.add_test(test) if data: positive = self.editor.template("I {pos_verb_present} you.").data positive += self.editor.template("You are {pos_adj}.").data negative = self.editor.template("I {neg_verb_present} you.").data negative += self.editor.template("You are {neg_adj}.").data template = Perturb.perturb(data, _add_phrase_function(positive), nsamples=num_test_cases) test = DIR( template.data, Expect.pairwise(self._diff_up), name="Add positive phrases", capability="Vocabulary", description= "Add very positive phrases (e.g. I love you) to the end of sentences, " "expect probability of positive to NOT go down (tolerance=0.1)", ) self.add_test(test) template = Perturb.perturb(data, _add_phrase_function(negative), nsamples=num_test_cases) test = DIR( template.data, Expect.pairwise(self._diff_down), name="Add negative phrases", capability="Vocabulary", description= "Add very negative phrases (e.g. I hate you) to the end of sentences, " "expect probability of positive to NOT go up (tolerance=0.1)", ) self.add_test(test)
def main(): editor = Editor() adjpos = """ great fun lovely sexy wonderful amazing awesome good fantastic excelent masterful fabulous incredible salacious""" adjpos = adjpos.strip().split() adjneg = """terrible horrible shitty worst disasterous bad abominable atrocious crappy repulsive yucky irredeemable""" adjneg = adjneg.strip().split() rev_neg_pos = "some parts are {pos} but overall it is {neg}." rev_pos_neg = "some parts are {neg} but overall it is {pos}." final_data = [] id_counter = 0 out_0 = editor.template(rev_neg_pos, pos=adjpos, neg=adjneg) #print(out_0) out_1 = editor.template(rev_pos_neg, pos=adjpos, neg=adjneg) #print(out_1) seed = 'I had a {neg} day really {neg} but the album made it {pos}' ous_0 = editor.template(seed, pos=adjpos, neg=adjneg) #print(ous_0) seed = 'I had a {pos} day really {pos} but the album made it {neg}' ous_1 = editor.template(seed, pos=adjpos, neg=adjneg) #print(ous_1) with open('../data/music_reviews_dev.json', 'r') as f: data = [] for line in f.readlines(): data.append(json.loads(line)) data = random.sample(data, 100) tmp = [] tmp_labels = [] for d in data: tmp.append(d.get('reviewText')) tmp_labels.append(d.get('sentiment')) baseline = "{male} is my {religion_adj}, {sexual_adj} friend, and he showed me this song. I found it {pos}" ret_pos = editor.template(baseline, pos=adjpos) ret_neg = editor.template(baseline, pos=adjneg) for idx, entry in enumerate( random.sample( Perturb.perturb(tmp, Perturb.add_typos, keep_original=False).data, 100)): final_data.append(to_json(entry[0], tmp_labels[idx], id_counter)) id_counter += 1 for entry in random.sample(out_0.data, 100): final_data.append(to_json(entry, "negative", id_counter)) id_counter += 1 for entry in random.sample(out_1.data, 100): final_data.append(to_json(entry, "positive", id_counter)) id_counter += 1 for entry in random.sample(ous_0.data, 100): final_data.append(to_json(entry, "positive", id_counter)) id_counter += 1 for entry in random.sample(ous_1.data, 100): final_data.append(to_json(entry, "negative", id_counter)) id_counter += 1 for entry in random.sample(ret_pos.data, 100): final_data.append(to_json(entry, "positive", id_counter)) id_counter += 1 for entry in random.sample(ret_neg.data, 100): final_data.append(to_json(entry, "negative", id_counter)) id_counter += 1 with open('../data/music_reviews_yucky.json', 'w') as file: for line in final_data: file.write(json.dumps(line)) file.write("\n")
type=str, help='Input file containing dataset') parser.add_argument('--output_file', '-o', type=str, help='Output file containing perturbed dataset') parser.add_argument('--locations', '-l', nargs='+', choices=['sent1', 'sent2', 'sent3', 'ent1', 'ent2'], help='List of positions that you want to perturb') args = parser.parse_args() logging.info(str(args)) # Load and perturb origin_data = read_dataset(args.input_file) with open(args.output_file, 'w') as f: for sample in tqdm(origin_data, desc='perturbing dataset'): ret = Perturb.perturb([sample], perturb_context_by_type, keep_original=False, locations=args.locations) # Write original sample f.write(json.dumps(sample)) f.write('\n') for sample_list in ret.data: for sample in sample_list: f.write(json.dumps(sample)) f.write('\n')
# In[11]: import re import checklist from checklist.perturb import Perturb def replace_john_with_others(x, *args, **kwargs): # Returns empty (if John is not present) or list of strings with John replaced by Luke and Mark if not re.search(r'\bJohn\b', x): return None return [re.sub(r'\bJohn\b', n, x) for n in ['Luke', 'Mark']] dataset = ['John is a man', 'Mary is a woman', 'John is an apostle'] ret = Perturb.perturb(dataset, replace_john_with_others) ret.data # In[12]: import checklist from checklist.editor import Editor from checklist.perturb import Perturb from checklist.test_types import MFT, INV, DIR editor = Editor() t = editor.template('This is {a:adj} {mask}.', adj=['good', 'great', 'excellent', 'awesome']) test1 = MFT(t.data, labels=1, name='Simple positives',