예제 #1
0
def test_FireBERT_FCT():
    print("Simple FireBERT_FCT tests")
    # create a FireBERT_FCT classifier for MNNLI

    # load the right processor class
    processor = MnliProcessor(
        {'sample_percent':
         3})  # negative number would means abs number of samples, not percent

    # prepare hyperparameters
    hparams = {'batch_size': 32, 'sample_percent': 3}

    # now instantiate the model
    model = FireBERT_FCT(load_from='resources/models/MNLI/pytorch_model.bin',
                         processor=processor,
                         hparams=hparams)

    processor = model.get_processor()

    dataset, examples = processor.load_and_cache_examples("data/MNLI",
                                                          example_set='dev')
    model.set_test_dataset(dataset, examples)

    trainer = pl.Trainer(gpus=1 if torch.cuda.is_available() else 0)
    trainer.test(model)
    trainer.tqdm_metrics
예제 #2
0
def test_FireBERT_base(task, set, reps=1, sample=1, hparams_default={}, tf=False):

    # prepare hyperparameters
    hparams = hparams_default

    # load the right processor class
    if task == "MNLI":
        processor = MnliProcessor({'sample_percent':sample}) # negative number means abs number of samples, not percent
        adv_processor = MnliProcessor({'sample_percent':sample}) # negative number means abs number of samples, not percent
    elif task == "IMDB":
        processor = ImdbProcessor({'sample_percent':sample})
        adv_processor = ImdbProcessor({'sample_percent':sample})

    lightning = "_on_lightning" if not tf else ""
    # now instantiate the models - one for the regular set
    model = FireBERT_base(load_from='resources/models/'+task+lightning+'/pytorch_model.bin', 
                        processor=processor, 
                        hparams=hparams_default)

    dataset, examples = processor.load_and_cache_examples("data/"+task, example_set=set)
    model.set_test_dataset(dataset, examples)

    # one for the adversarial set

    model_adv = FireBERT_base(load_from='resources/models/'+task+lightning+'/pytorch_model.bin', 
                        processor=processor, 
                        hparams=hparams_default)

    dataset_adv, examples_adv = adv_processor.load_and_cache_examples("data/"+task, example_set="adv_"+set)
    model_adv.set_test_dataset(dataset_adv, examples_adv)

    #
    # now test them both, and log results
    #
    trainer = pl.Trainer(gpus=(-1 if torch.cuda.is_available() else None))
    trainer.test(model)
    result = trainer.tqdm_metrics

    f = open("results/base/hparams-results.csv", "a+")
    print(task, ",", set, ",", sample, ',"',hparams,'",',result['avg_test_acc'], sep="", file=f)
    f.close()

    trainer = pl.Trainer(gpus=(-1 if torch.cuda.is_available() else None))
    trainer.test(model_adv)
    result = trainer.tqdm_metrics

    f = open("results/base/hparams-results.csv", "a+")
    print(task, ",", "adv_"+set, ",", sample, ',"',hparams,'",',result['avg_test_acc'], sep="", file=f)
    f.close()


    print("baseline data logged.")
    elapsed_time()
    print()
예제 #3
0
def test():
    print("Simple base class tests")
    # create a LightningBertForSequenceClassification classifier for MNNLI

    # prepare hyperparameters
    hparams = {'batch_size': 32}

    # now instantiate the model
    model = LightningBertForSequenceClassification(
        load_from='resources/models/MNLI/pytorch_model.bin',
        processor=MnliProcessor(),
        hparams=hparams)

    processor = model.get_processor()

    dataset, examples = processor.load_and_cache_examples("data/MNLI",
                                                          example_set='dev')
    model.set_test_dataset(dataset, examples)

    trainer = pl.Trainer(gpus=1 if torch.cuda.is_available() else 0)
    trainer.test(model)
    trainer.tqdm_metrics
예제 #4
0
def test_FireBERT_FVE(task, set, reps=1, sample=1, hparams_default={}, hparams_lists=None, lightning=''):

    # prepare hyperparameters
    hparams = hparams_default

    # load the right processor class
    if task == "MNLI":
        processor = MnliProcessor({'sample_percent':sample}) # negative number means abs number of samples, not percent
    elif task == "IMDB":
        processor = ImdbProcessor({'sample_percent':sample})

    # now instantiate the models
    model = FireBERT_FVE(load_from='resources/models/'+task+lightning+'/pytorch_model.bin', 
                        processor=processor, 
                        hparams=hparams_default)
    processor.set_tokenizer(model.tokenizer)

    dataset, examples = processor.load_and_cache_examples("data/"+task, example_set=set)
    model.set_test_dataset(dataset, examples)

    #adv set
    # load the right processor class
    if task == "MNLI":
        adv_processor = MnliProcessor({'sample_percent':sample}) # negative number means abs number of samples, not percent
    elif task == "IMDB":
        adv_processor = ImdbProcessor({'sample_percent':sample})

    model_adv = FireBERT_FVE(load_from='resources/models/'+task+lightning+'/pytorch_model.bin', 
                        processor=processor, 
                        hparams=hparams_default)
    adv_processor.set_tokenizer(model.tokenizer)

    dataset_adv, examples_adv = adv_processor.load_and_cache_examples("data/"+task, example_set="adv_"+set)
    model_adv.set_test_dataset(dataset_adv, examples_adv)

    for i in range(reps):
        if hparams_lists is None:
            print("FireBERT_FVE specific test", task, set)
        else:
            print("FireBERT_FVE hparam test", task, set)
            print("{")
            for item in hparams_lists.items():
                key = item[0]
                values = item[1]
                hparams[key] = random.choice(values)
                print("  '"+key+"':",str(hparams[key])+",")
            print("}")

        # set the new hparams
        model.update_hparams(hparams)
        model_adv.update_hparams(hparams)

        trainer = pl.Trainer(gpus=(-1 if torch.cuda.is_available() else None))
        trainer.test(model)
        result1 = trainer.tqdm_metrics

        trainer = pl.Trainer(gpus=(-1 if torch.cuda.is_available() else None))
        trainer.test(model_adv)
        result2 = trainer.tqdm_metrics

        f = open("results/five/hparams-results.csv", "a+")
        print(task, ",", "adv_"+set, ",", sample, ',"',hparams,'",',result1['avg_test_acc'],",",result2['avg_test_acc'], sep="", file=f)
        f.close()

        print("iteration",i,"logged.")
        elapsed_time()
        print()
    
        if hparams_lists is None:
            break
예제 #5
0
    'use_full_example': use_full_example,
    'leave_alone': leave_alone,
    'random_out_of': random_out_of,
    'judge_bert': judge_bert
}

print(hparams)

proc_hparams = {}
# delete this next line to run full 100%
proc_hparams.update({'sample_percent': 3, 'randomize': True})

# instantiate the model used for SWITCH
switch_model = LightningBertForSequenceClassification(
    load_from='resources/models/MNLI/pytorch_model.bin',
    processor=MnliProcessor(),
    hparams={'batch_size': 6})
switch_model.cuda()

model = FireBERT_FCT(switch_model=switch_model,
                     processor=MnliProcessor(hparams=proc_hparams),
                     hparams=hparams)

processor = model.get_processor()

train_dataset, train_examples = processor.load_and_cache_examples(
    "data/MNLI", example_set='train')
val_dataset, _ = processor.load_and_cache_examples("data/MNLI",
                                                   example_set='dev')
test_dataset, _ = processor.load_and_cache_examples("data/MNLI",
                                                    example_set='test')
예제 #6
0
def test_iter_FireBERT_FSE(set='mnli', sample_pct=3):

    import gc
    import random
    gc.enable()

    print("Simple FireBERT_FSE tests")
    # create a FireBERT_FSE classifier for MNLI

    # negative number means abs number of samples, not percent
    u = random.randint(2, 20)
    p = random.randint(2, 15)
    c = random.randint(3, 15)
    t = random.randint(3, 20)
    b = 32
    random_bit = random.getrandbits(1)
    v = bool(random_bit)

    # prepare hyperparameters
    hparams = {
        'use_USE': True,
        'USE_method': "filter",
        'USE_multiplier': u,
        'stop_words': True,
        'perturb_words': p,
        'candidates_per_word': c,
        'total_alternatives': t,
        'match_pos': True,
        'batch_size': b,
        'verbose': False,
        'vote_avg_logits': v
    }
    if set == 'mnli':
        # now instantiate the MNLI model
        processor = MnliProcessor({'sample_percent': sample_pct})
        model = FireBERT_FSE(
            load_from='resources/models/MNLI/pytorch_model.bin',
            processor=processor,
            hparams=hparams)

        processor = model.get_processor()

        dataset, examples = processor.load_and_cache_examples(
            "data/MNLI", example_set='dev')
        model.set_test_dataset(dataset, examples)

        trainer = pl.Trainer(gpus=(-1 if torch.cuda.is_available() else None))
        print("MNLI: Start testing dev set")
        trainer.test(model)
        resulta = trainer.tqdm_metrics

        dataset, examples = processor.load_and_cache_examples(
            "data/MNLI", example_set='adv_dev')
        model.set_test_dataset(dataset, examples)

        trainer = pl.Trainer(gpus=(-1 if torch.cuda.is_available() else None))
        print("MNLI: Start testing adversarial dev set")
        trainer.test(model)
        resultb = trainer.tqdm_metrics

        fname = str("results/fse/mnli-hparams-results.txt")
        f = open(fname, "a")
        f.write(
            str(u) + "-" + str(p) + "-" + str(c) + "-" + str(t) + "-" +
            str(b) + "-" + str(v) + "," + str(resulta['avg_test_acc']) + "," +
            str(resultb['avg_test_acc']))
        f.write("\n")
        f.close()
        processor = None
        dataset = None
        examples = None
        model = None
        trainer = None
        gc.collect()
    elif set == 'imdb':
        # now instantiate the IMDB model
        processor = ImdbProcessor({
            'sample_percent': sample_pct
        })  # negative number means abs number of samples, not percent

        model = FireBERT_FSE(
            load_from='resources/models/IMDB/pytorch_model.bin',
            processor=processor,
            hparams=hparams)

        processor = model.get_processor()

        dataset, examples = processor.load_and_cache_examples(
            "data/IMDB", example_set='dev')
        model.set_test_dataset(dataset, examples)

        trainer = pl.Trainer(gpus=(-1 if torch.cuda.is_available() else None))
        print("IMDB: Start testing dev set")
        trainer.test(model)
        resulta = trainer.tqdm_metrics

        dataset, examples = processor.load_and_cache_examples(
            "data/IMDB", example_set='adv_dev')
        model.set_test_dataset(dataset, examples)

        trainer = pl.Trainer(gpus=(-1 if torch.cuda.is_available() else None))
        print("IMDB: Start testing adversarial dev set")
        trainer.test(model)
        resultb = trainer.tqdm_metrics

        fname = str("results/fse/imdb-hparams-results.txt")
        f = open(fname, "a")
        f.write(
            str(u) + "-" + str(p) + "-" + str(c) + "-" + str(t) + "-" +
            str(b) + "-" + str(v) + "," + str(resulta['avg_test_acc']) + "," +
            str(resultb['avg_test_acc']))
        f.write("\n")
        f.close()
        processor = None
        dataset = None
        examples = None
        model = None
        trainer = None
        gc.collect()
    else:
        print("Invalid data set selected!")
예제 #7
0
def test_param_FireBERT_FSE(hparams, dset='mnli', sample_pct=3):
    '''This class exists specifically to reproduce results from randomized
    control trials.  Pass in hparams as an argument'''

    u = hparams['use_USE']
    um = hparams['USE_method']
    p = hparams['perturb_words']
    c = hparams['candidates_per_word']
    t = hparams['total_alternatives']
    v = hparams['vote_avg_logits']

    print("Simple Static FireBERT_FSE test")
    if dset == 'mnli':
        # now instantiate the MNLI model
        processor = MnliProcessor({'sample_percent': sample_pct})
        model = FireBERT_FSE(
            load_from='resources/models/MNLI/pytorch_model.bin',
            processor=processor,
            hparams=hparams)

        processor = model.get_processor()

        dataset, examples = processor.load_and_cache_examples(
            "data/MNLI", example_set='dev')
        model.set_test_dataset(dataset, examples)

        trainer = pl.Trainer(gpus=(-1 if torch.cuda.is_available() else None))
        print("Start testing")
        trainer.test(model)
        resulta = trainer.tqdm_metrics

        dataset, examples = processor.load_and_cache_examples(
            "data/MNLI", example_set='adv_test')
        model.set_test_dataset(dataset, examples)

        trainer = pl.Trainer(gpus=(-1 if torch.cuda.is_available() else None))
        print("Start testing")
        trainer.test(model)
        resultb = trainer.tqdm_metrics

        print("Results: ")
        print("Regular results", resulta['avg_test_acc'])
        print("Adversarial results", resultb['avg_test_acc'])

        fname = str("results/fse/ind-result-mnli-" + str(u) + str(um) + "-" +
                    str(p) + "-" + str(c) + "-" + str(t) + "-" + str(v) +
                    ".txt")
        f = open(fname, "a")
        f.write(
            str(u) + "-" + str(um) + "-" + str(p) + "-" + str(c) + "-" +
            str(t) + "-" + str(v) + "," + str(resulta['avg_test_acc']) + "," +
            str(resultb['avg_test_acc']))
        f.write("\n")
        f.close()
        processor = None
        dataset = None
        examples = None
        model = None
        trainer = None

    elif dset == 'imdb':
        # now instantiate the IMDB model
        processor = ImdbProcessor({
            'sample_percent': sample_pct
        })  # negative number means abs number of samples, not percent

        model = FireBERT_FSE(
            load_from='resources/models/IMDB/pytorch_model.bin',
            processor=processor,
            hparams=hparams)

        processor = model.get_processor()

        dataset, examples = processor.load_and_cache_examples(
            "data/IMDB", example_set='dev')
        model.set_test_dataset(dataset, examples)

        trainer = pl.Trainer(gpus=(-1 if torch.cuda.is_available() else None))
        print("Start testing")
        trainer.test(model)
        resulta = trainer.tqdm_metrics

        dataset, examples = processor.load_and_cache_examples(
            "data/IMDB", example_set='adv_test')
        model.set_test_dataset(dataset, examples)

        trainer = pl.Trainer(gpus=(-1 if torch.cuda.is_available() else None))
        print("Start testing")
        trainer.test(model)
        resultb = trainer.tqdm_metrics

        print("Results: ")
        print("Regular results", resulta['avg_test_acc'])
        print("Adversarial results", resultb['avg_test_acc'])

        fname = str("results/fse/ind-result-imdb-" + str(u) + str(um) + "-" +
                    str(p) + "-" + str(c) + "-" + str(t) + "-" + str(v) +
                    ".txt")
        f = open(fname, "a")
        f.write(
            str(u) + "-" + str(um) + "-" + str(p) + "-" + str(c) + "-" +
            str(t) + "-" + str(v) + "," + str(resulta['avg_test_acc']) + "," +
            str(resultb['avg_test_acc']))
        f.write("\n")
        f.close()
        processor = None
        dataset = None
        examples = None
        model = None
        trainer = None

    else:
        print("Invalid data set selected!")
예제 #8
0
def debug():
    from processors import MnliProcessor, ImdbProcessor
    from firebert_fse import FireBERT_FSE

    print("Simple SWITCH tests (through FSE)")
    # create a FireBERT_FVE classifier for MNNLI

    # prepare hyperparameters
    hparams = {
        'batch_size': 32,
        'leave_alone': 0,
        'random_out_of': 0,
        'judge_bert': False
    }

    # now instantiate the model
    model = FireBERT_FSE(load_from='resources/models/MNLI/pytorch_model.bin',
                         processor=MnliProcessor(),
                         hparams=hparams)

    print()
    print()
    print()
    data = [{
        'premise':
        "Conceptually cream skimming has two basic dimensions - product and geography.",
        'hypothesis':
        "Product and geography are what make cream skimming work.",
        'label': "neutral"
    }, {
        'premise':
        "He writes that it 's the first time he 's added such a track .",
        'hypothesis': "This is the first time he 's added such a track .",
        'label': "neutral"
    }, {
        'premise': "The new rights are nice enough .",
        'hypothesis': "Everyone really likes the newest benefits .",
        'label': "neutral"
    }, {
        'premise':
        "This site includes a list of all award winners and a searchable database of Government Executive articles.",
        'hypothesis':
        "The Government Executive articles housed on the website are not able to be searched.",
        'label': "contradiction"
    }, {
        'premise':
        "yeah i i think my favorite restaurant is always been the one closest  you know the closest as long as it's it meets the minimum criteria you know of good food",
        'hypothesis':
        "My favorite restaurants are always at least a hundred miles away from my house.",
        'label': "contradiction"
    }, {
        'premise':
        "Calcutta seems to be the only other production center having any pretensions to artistic creativity at all, but ironically you're actually more likely to see the works of Satyajit Ray or Mrinal Sen shown in Europe or North America than in India itself.",
        'hypothesis':
        "Most of Mrinal Sen's work can be found in European collections.",
        'label': "neutral"
    }]

    def top_n(index, words, n):
        result = [words[i] for i in index if i != -1]
        result = [
            word for word in result if word not in model.switch.stop_words
        ]
        return result[:n]

    for d in data:
        # make a "feature" tensor out of those
        example, input_ids, attention_mask, token_type_ids, label = \
            model.processor.make_single_example_with_features(d["premise"], d["hypothesis"], d["label"])

        # use SWITCH to figure out word importance within the list
        word_indices, token_indices, word_list = \
            model.switch.get_important_indices_from_example(example, input_ids, token_type_ids, attention_mask)

        print("Premise:", d["premise"])
        print("Original hypothesis:", d["hypothesis"], "(original label: ",
              d['label'], ")")

        print("Top 5 hypothesis words (new):", top_n(word_indices, word_list,
                                                     5))
        print()

    print()

    print()
    print("IMDB test")
    print()

    # prepare hyperparameters
    hparams = {
        'batch_size': 32,
        'leave_alone': 0,
        'random_out_of': 0,
        'judge_bert': False,
        'perturb_words': 2
    }

    # now instantiate the model
    model = FireBERT_FSE(load_from='resources/models/IMDB/pytorch_model.bin',
                         processor=ImdbProcessor(),
                         hparams=hparams)

    text = "This movie is truly fun for the whole family. Adults and kids will totally enjoy it!"
    label = 1

    # make a "feature" tensor out of those
    example, input_ids, attention_mask, token_type_ids, label = \
        model.processor.make_single_example_with_features(text, None, label)

    texts = model.switch.generate_candidates_from_example(example)
    print(text)
    for t in texts:
        print(":", t)
    print()

    # prepare new hyperparameters
    hparams = {
        'batch_size': 32,
        'leave_alone': 0,
        'random_out_of': 0,
        'judge_bert': False,
        'perturb_words': 5
    }

    # make the model use the new hparams
    model.update_hparams(hparams)

    texts = model.switch.generate_candidates_from_example(example)
    print(text)
    for t in texts:
        print(":", t)
예제 #9
0
            'use_full_example': use_full_example,
            'leave_alone': leave_alone,
            'random_out_of': random_out_of,
            'judge_bert': judge_bert
           }

print(hparams)

proc_hparams = {}
# delete this next line to run full 100%
proc_hparams.update({'sample_percent': 3,
                     'randomize': True})

# instantiate the model used for SWITCH
switch_model = LightningBertForSequenceClassification(load_from = 'resources/models/MNLI/pytorch_model.bin', 
                                                      processor = MnliProcessor(), 
                                                      hparams = {'batch_size': 6 })
switch_model.cuda()

model = FireBERT_FCT(switch_model=switch_model, processor=MnliProcessor(hparams=proc_hparams), hparams=hparams)

processor = model.get_processor()

train_dataset, train_examples = processor.load_and_cache_examples("data/MNLI", example_set='train')
val_dataset, _ = processor.load_and_cache_examples("data/MNLI", example_set='dev')
test_dataset, _ = processor.load_and_cache_examples("data/MNLI", example_set='test')

model.set_train_dataset(train_dataset, train_examples)
model.set_val_dataset(val_dataset)
model.set_test_dataset(test_dataset)