def test_FireBERT_FCT(): print("Simple FireBERT_FCT tests") # create a FireBERT_FCT classifier for MNNLI # load the right processor class processor = MnliProcessor( {'sample_percent': 3}) # negative number would means abs number of samples, not percent # prepare hyperparameters hparams = {'batch_size': 32, 'sample_percent': 3} # now instantiate the model model = FireBERT_FCT(load_from='resources/models/MNLI/pytorch_model.bin', processor=processor, hparams=hparams) processor = model.get_processor() dataset, examples = processor.load_and_cache_examples("data/MNLI", example_set='dev') model.set_test_dataset(dataset, examples) trainer = pl.Trainer(gpus=1 if torch.cuda.is_available() else 0) trainer.test(model) trainer.tqdm_metrics
def test_FireBERT_base(task, set, reps=1, sample=1, hparams_default={}, tf=False): # prepare hyperparameters hparams = hparams_default # load the right processor class if task == "MNLI": processor = MnliProcessor({'sample_percent':sample}) # negative number means abs number of samples, not percent adv_processor = MnliProcessor({'sample_percent':sample}) # negative number means abs number of samples, not percent elif task == "IMDB": processor = ImdbProcessor({'sample_percent':sample}) adv_processor = ImdbProcessor({'sample_percent':sample}) lightning = "_on_lightning" if not tf else "" # now instantiate the models - one for the regular set model = FireBERT_base(load_from='resources/models/'+task+lightning+'/pytorch_model.bin', processor=processor, hparams=hparams_default) dataset, examples = processor.load_and_cache_examples("data/"+task, example_set=set) model.set_test_dataset(dataset, examples) # one for the adversarial set model_adv = FireBERT_base(load_from='resources/models/'+task+lightning+'/pytorch_model.bin', processor=processor, hparams=hparams_default) dataset_adv, examples_adv = adv_processor.load_and_cache_examples("data/"+task, example_set="adv_"+set) model_adv.set_test_dataset(dataset_adv, examples_adv) # # now test them both, and log results # trainer = pl.Trainer(gpus=(-1 if torch.cuda.is_available() else None)) trainer.test(model) result = trainer.tqdm_metrics f = open("results/base/hparams-results.csv", "a+") print(task, ",", set, ",", sample, ',"',hparams,'",',result['avg_test_acc'], sep="", file=f) f.close() trainer = pl.Trainer(gpus=(-1 if torch.cuda.is_available() else None)) trainer.test(model_adv) result = trainer.tqdm_metrics f = open("results/base/hparams-results.csv", "a+") print(task, ",", "adv_"+set, ",", sample, ',"',hparams,'",',result['avg_test_acc'], sep="", file=f) f.close() print("baseline data logged.") elapsed_time() print()
def test(): print("Simple base class tests") # create a LightningBertForSequenceClassification classifier for MNNLI # prepare hyperparameters hparams = {'batch_size': 32} # now instantiate the model model = LightningBertForSequenceClassification( load_from='resources/models/MNLI/pytorch_model.bin', processor=MnliProcessor(), hparams=hparams) processor = model.get_processor() dataset, examples = processor.load_and_cache_examples("data/MNLI", example_set='dev') model.set_test_dataset(dataset, examples) trainer = pl.Trainer(gpus=1 if torch.cuda.is_available() else 0) trainer.test(model) trainer.tqdm_metrics
def test_FireBERT_FVE(task, set, reps=1, sample=1, hparams_default={}, hparams_lists=None, lightning=''): # prepare hyperparameters hparams = hparams_default # load the right processor class if task == "MNLI": processor = MnliProcessor({'sample_percent':sample}) # negative number means abs number of samples, not percent elif task == "IMDB": processor = ImdbProcessor({'sample_percent':sample}) # now instantiate the models model = FireBERT_FVE(load_from='resources/models/'+task+lightning+'/pytorch_model.bin', processor=processor, hparams=hparams_default) processor.set_tokenizer(model.tokenizer) dataset, examples = processor.load_and_cache_examples("data/"+task, example_set=set) model.set_test_dataset(dataset, examples) #adv set # load the right processor class if task == "MNLI": adv_processor = MnliProcessor({'sample_percent':sample}) # negative number means abs number of samples, not percent elif task == "IMDB": adv_processor = ImdbProcessor({'sample_percent':sample}) model_adv = FireBERT_FVE(load_from='resources/models/'+task+lightning+'/pytorch_model.bin', processor=processor, hparams=hparams_default) adv_processor.set_tokenizer(model.tokenizer) dataset_adv, examples_adv = adv_processor.load_and_cache_examples("data/"+task, example_set="adv_"+set) model_adv.set_test_dataset(dataset_adv, examples_adv) for i in range(reps): if hparams_lists is None: print("FireBERT_FVE specific test", task, set) else: print("FireBERT_FVE hparam test", task, set) print("{") for item in hparams_lists.items(): key = item[0] values = item[1] hparams[key] = random.choice(values) print(" '"+key+"':",str(hparams[key])+",") print("}") # set the new hparams model.update_hparams(hparams) model_adv.update_hparams(hparams) trainer = pl.Trainer(gpus=(-1 if torch.cuda.is_available() else None)) trainer.test(model) result1 = trainer.tqdm_metrics trainer = pl.Trainer(gpus=(-1 if torch.cuda.is_available() else None)) trainer.test(model_adv) result2 = trainer.tqdm_metrics f = open("results/five/hparams-results.csv", "a+") print(task, ",", "adv_"+set, ",", sample, ',"',hparams,'",',result1['avg_test_acc'],",",result2['avg_test_acc'], sep="", file=f) f.close() print("iteration",i,"logged.") elapsed_time() print() if hparams_lists is None: break
'use_full_example': use_full_example, 'leave_alone': leave_alone, 'random_out_of': random_out_of, 'judge_bert': judge_bert } print(hparams) proc_hparams = {} # delete this next line to run full 100% proc_hparams.update({'sample_percent': 3, 'randomize': True}) # instantiate the model used for SWITCH switch_model = LightningBertForSequenceClassification( load_from='resources/models/MNLI/pytorch_model.bin', processor=MnliProcessor(), hparams={'batch_size': 6}) switch_model.cuda() model = FireBERT_FCT(switch_model=switch_model, processor=MnliProcessor(hparams=proc_hparams), hparams=hparams) processor = model.get_processor() train_dataset, train_examples = processor.load_and_cache_examples( "data/MNLI", example_set='train') val_dataset, _ = processor.load_and_cache_examples("data/MNLI", example_set='dev') test_dataset, _ = processor.load_and_cache_examples("data/MNLI", example_set='test')
def test_iter_FireBERT_FSE(set='mnli', sample_pct=3): import gc import random gc.enable() print("Simple FireBERT_FSE tests") # create a FireBERT_FSE classifier for MNLI # negative number means abs number of samples, not percent u = random.randint(2, 20) p = random.randint(2, 15) c = random.randint(3, 15) t = random.randint(3, 20) b = 32 random_bit = random.getrandbits(1) v = bool(random_bit) # prepare hyperparameters hparams = { 'use_USE': True, 'USE_method': "filter", 'USE_multiplier': u, 'stop_words': True, 'perturb_words': p, 'candidates_per_word': c, 'total_alternatives': t, 'match_pos': True, 'batch_size': b, 'verbose': False, 'vote_avg_logits': v } if set == 'mnli': # now instantiate the MNLI model processor = MnliProcessor({'sample_percent': sample_pct}) model = FireBERT_FSE( load_from='resources/models/MNLI/pytorch_model.bin', processor=processor, hparams=hparams) processor = model.get_processor() dataset, examples = processor.load_and_cache_examples( "data/MNLI", example_set='dev') model.set_test_dataset(dataset, examples) trainer = pl.Trainer(gpus=(-1 if torch.cuda.is_available() else None)) print("MNLI: Start testing dev set") trainer.test(model) resulta = trainer.tqdm_metrics dataset, examples = processor.load_and_cache_examples( "data/MNLI", example_set='adv_dev') model.set_test_dataset(dataset, examples) trainer = pl.Trainer(gpus=(-1 if torch.cuda.is_available() else None)) print("MNLI: Start testing adversarial dev set") trainer.test(model) resultb = trainer.tqdm_metrics fname = str("results/fse/mnli-hparams-results.txt") f = open(fname, "a") f.write( str(u) + "-" + str(p) + "-" + str(c) + "-" + str(t) + "-" + str(b) + "-" + str(v) + "," + str(resulta['avg_test_acc']) + "," + str(resultb['avg_test_acc'])) f.write("\n") f.close() processor = None dataset = None examples = None model = None trainer = None gc.collect() elif set == 'imdb': # now instantiate the IMDB model processor = ImdbProcessor({ 'sample_percent': sample_pct }) # negative number means abs number of samples, not percent model = FireBERT_FSE( load_from='resources/models/IMDB/pytorch_model.bin', processor=processor, hparams=hparams) processor = model.get_processor() dataset, examples = processor.load_and_cache_examples( "data/IMDB", example_set='dev') model.set_test_dataset(dataset, examples) trainer = pl.Trainer(gpus=(-1 if torch.cuda.is_available() else None)) print("IMDB: Start testing dev set") trainer.test(model) resulta = trainer.tqdm_metrics dataset, examples = processor.load_and_cache_examples( "data/IMDB", example_set='adv_dev') model.set_test_dataset(dataset, examples) trainer = pl.Trainer(gpus=(-1 if torch.cuda.is_available() else None)) print("IMDB: Start testing adversarial dev set") trainer.test(model) resultb = trainer.tqdm_metrics fname = str("results/fse/imdb-hparams-results.txt") f = open(fname, "a") f.write( str(u) + "-" + str(p) + "-" + str(c) + "-" + str(t) + "-" + str(b) + "-" + str(v) + "," + str(resulta['avg_test_acc']) + "," + str(resultb['avg_test_acc'])) f.write("\n") f.close() processor = None dataset = None examples = None model = None trainer = None gc.collect() else: print("Invalid data set selected!")
def test_param_FireBERT_FSE(hparams, dset='mnli', sample_pct=3): '''This class exists specifically to reproduce results from randomized control trials. Pass in hparams as an argument''' u = hparams['use_USE'] um = hparams['USE_method'] p = hparams['perturb_words'] c = hparams['candidates_per_word'] t = hparams['total_alternatives'] v = hparams['vote_avg_logits'] print("Simple Static FireBERT_FSE test") if dset == 'mnli': # now instantiate the MNLI model processor = MnliProcessor({'sample_percent': sample_pct}) model = FireBERT_FSE( load_from='resources/models/MNLI/pytorch_model.bin', processor=processor, hparams=hparams) processor = model.get_processor() dataset, examples = processor.load_and_cache_examples( "data/MNLI", example_set='dev') model.set_test_dataset(dataset, examples) trainer = pl.Trainer(gpus=(-1 if torch.cuda.is_available() else None)) print("Start testing") trainer.test(model) resulta = trainer.tqdm_metrics dataset, examples = processor.load_and_cache_examples( "data/MNLI", example_set='adv_test') model.set_test_dataset(dataset, examples) trainer = pl.Trainer(gpus=(-1 if torch.cuda.is_available() else None)) print("Start testing") trainer.test(model) resultb = trainer.tqdm_metrics print("Results: ") print("Regular results", resulta['avg_test_acc']) print("Adversarial results", resultb['avg_test_acc']) fname = str("results/fse/ind-result-mnli-" + str(u) + str(um) + "-" + str(p) + "-" + str(c) + "-" + str(t) + "-" + str(v) + ".txt") f = open(fname, "a") f.write( str(u) + "-" + str(um) + "-" + str(p) + "-" + str(c) + "-" + str(t) + "-" + str(v) + "," + str(resulta['avg_test_acc']) + "," + str(resultb['avg_test_acc'])) f.write("\n") f.close() processor = None dataset = None examples = None model = None trainer = None elif dset == 'imdb': # now instantiate the IMDB model processor = ImdbProcessor({ 'sample_percent': sample_pct }) # negative number means abs number of samples, not percent model = FireBERT_FSE( load_from='resources/models/IMDB/pytorch_model.bin', processor=processor, hparams=hparams) processor = model.get_processor() dataset, examples = processor.load_and_cache_examples( "data/IMDB", example_set='dev') model.set_test_dataset(dataset, examples) trainer = pl.Trainer(gpus=(-1 if torch.cuda.is_available() else None)) print("Start testing") trainer.test(model) resulta = trainer.tqdm_metrics dataset, examples = processor.load_and_cache_examples( "data/IMDB", example_set='adv_test') model.set_test_dataset(dataset, examples) trainer = pl.Trainer(gpus=(-1 if torch.cuda.is_available() else None)) print("Start testing") trainer.test(model) resultb = trainer.tqdm_metrics print("Results: ") print("Regular results", resulta['avg_test_acc']) print("Adversarial results", resultb['avg_test_acc']) fname = str("results/fse/ind-result-imdb-" + str(u) + str(um) + "-" + str(p) + "-" + str(c) + "-" + str(t) + "-" + str(v) + ".txt") f = open(fname, "a") f.write( str(u) + "-" + str(um) + "-" + str(p) + "-" + str(c) + "-" + str(t) + "-" + str(v) + "," + str(resulta['avg_test_acc']) + "," + str(resultb['avg_test_acc'])) f.write("\n") f.close() processor = None dataset = None examples = None model = None trainer = None else: print("Invalid data set selected!")
def debug(): from processors import MnliProcessor, ImdbProcessor from firebert_fse import FireBERT_FSE print("Simple SWITCH tests (through FSE)") # create a FireBERT_FVE classifier for MNNLI # prepare hyperparameters hparams = { 'batch_size': 32, 'leave_alone': 0, 'random_out_of': 0, 'judge_bert': False } # now instantiate the model model = FireBERT_FSE(load_from='resources/models/MNLI/pytorch_model.bin', processor=MnliProcessor(), hparams=hparams) print() print() print() data = [{ 'premise': "Conceptually cream skimming has two basic dimensions - product and geography.", 'hypothesis': "Product and geography are what make cream skimming work.", 'label': "neutral" }, { 'premise': "He writes that it 's the first time he 's added such a track .", 'hypothesis': "This is the first time he 's added such a track .", 'label': "neutral" }, { 'premise': "The new rights are nice enough .", 'hypothesis': "Everyone really likes the newest benefits .", 'label': "neutral" }, { 'premise': "This site includes a list of all award winners and a searchable database of Government Executive articles.", 'hypothesis': "The Government Executive articles housed on the website are not able to be searched.", 'label': "contradiction" }, { 'premise': "yeah i i think my favorite restaurant is always been the one closest you know the closest as long as it's it meets the minimum criteria you know of good food", 'hypothesis': "My favorite restaurants are always at least a hundred miles away from my house.", 'label': "contradiction" }, { 'premise': "Calcutta seems to be the only other production center having any pretensions to artistic creativity at all, but ironically you're actually more likely to see the works of Satyajit Ray or Mrinal Sen shown in Europe or North America than in India itself.", 'hypothesis': "Most of Mrinal Sen's work can be found in European collections.", 'label': "neutral" }] def top_n(index, words, n): result = [words[i] for i in index if i != -1] result = [ word for word in result if word not in model.switch.stop_words ] return result[:n] for d in data: # make a "feature" tensor out of those example, input_ids, attention_mask, token_type_ids, label = \ model.processor.make_single_example_with_features(d["premise"], d["hypothesis"], d["label"]) # use SWITCH to figure out word importance within the list word_indices, token_indices, word_list = \ model.switch.get_important_indices_from_example(example, input_ids, token_type_ids, attention_mask) print("Premise:", d["premise"]) print("Original hypothesis:", d["hypothesis"], "(original label: ", d['label'], ")") print("Top 5 hypothesis words (new):", top_n(word_indices, word_list, 5)) print() print() print() print("IMDB test") print() # prepare hyperparameters hparams = { 'batch_size': 32, 'leave_alone': 0, 'random_out_of': 0, 'judge_bert': False, 'perturb_words': 2 } # now instantiate the model model = FireBERT_FSE(load_from='resources/models/IMDB/pytorch_model.bin', processor=ImdbProcessor(), hparams=hparams) text = "This movie is truly fun for the whole family. Adults and kids will totally enjoy it!" label = 1 # make a "feature" tensor out of those example, input_ids, attention_mask, token_type_ids, label = \ model.processor.make_single_example_with_features(text, None, label) texts = model.switch.generate_candidates_from_example(example) print(text) for t in texts: print(":", t) print() # prepare new hyperparameters hparams = { 'batch_size': 32, 'leave_alone': 0, 'random_out_of': 0, 'judge_bert': False, 'perturb_words': 5 } # make the model use the new hparams model.update_hparams(hparams) texts = model.switch.generate_candidates_from_example(example) print(text) for t in texts: print(":", t)
'use_full_example': use_full_example, 'leave_alone': leave_alone, 'random_out_of': random_out_of, 'judge_bert': judge_bert } print(hparams) proc_hparams = {} # delete this next line to run full 100% proc_hparams.update({'sample_percent': 3, 'randomize': True}) # instantiate the model used for SWITCH switch_model = LightningBertForSequenceClassification(load_from = 'resources/models/MNLI/pytorch_model.bin', processor = MnliProcessor(), hparams = {'batch_size': 6 }) switch_model.cuda() model = FireBERT_FCT(switch_model=switch_model, processor=MnliProcessor(hparams=proc_hparams), hparams=hparams) processor = model.get_processor() train_dataset, train_examples = processor.load_and_cache_examples("data/MNLI", example_set='train') val_dataset, _ = processor.load_and_cache_examples("data/MNLI", example_set='dev') test_dataset, _ = processor.load_and_cache_examples("data/MNLI", example_set='test') model.set_train_dataset(train_dataset, train_examples) model.set_val_dataset(val_dataset) model.set_test_dataset(test_dataset)