Exemple #1
0
def test_non_bow():
    """Test if can learn programs that based of queries that require something
    stronger than a bag-of-words assumption"""
    sad = AIProgramDescription(name="sad", )
    what = AIProgramDescription(name="what", )
    weird = AIProgramDescription(name="weird", )
    traindata = [
        ("the dog bit bob", "sad"),
        ("bob bit the dog", "weird"),
        ("the bit dog bob", "what"),
        ("bob dog the bit", "what"),
    ]
    valdata = traindata

    # Do training
    train_output = train.run_with_specific_split(traindata,
                                                 valdata, [sad, what, weird],
                                                 False,
                                                 quiet_mode=True,
                                                 num_epochs=200)
    meta_model, final_state, train_iter, val_iter = train_output

    # eval the model. Expect should get basically perfect progam picking
    bashmetric = BashMetric()
    train.eval_model(meta_model, train_iter, [(bashmetric, 'bashmetric')])
    assert bashmetric.exact_match_acc(
    ) >= 0.98, "Did not fully learn non-bag-of-words"
Exemple #2
0
def test_case_sensitive():
    posArg = Argument("aposarg", "Stringlike", position=0)
    cow = AIProgramDescription(name="hello", arguments=[posArg])
    traindata = [
        ("my name is bob", "hello bob"),
        ("my name is Bob", "hello Bob"),
        ("my name is BoB", "hello BoB"),
        ("my name is boB", "hello boB"),
        ("my name is BOB", "hello BOB"),
    ]
    valdata = traindata

    # Do training
    train_output = train.run_with_specific_split(traindata,
                                                 valdata, [cow],
                                                 False,
                                                 quiet_mode=True,
                                                 num_epochs=200)
    meta_model, final_state, train_iter, val_iter = train_output

    # eval the model. Expect should get basically perfect progam picking
    bashmetric = BashMetric()
    train.eval_model(meta_model, train_iter, [(bashmetric, 'bashmetric')])
    assert bashmetric.exact_match_acc(
    ) >= 0.98, "Did not fully learn to gen case sensitive"
Exemple #3
0
def train_and_serialize():
    aArg = Argument("a", "Stringlike")
    bArg = Argument("b", "Stringlike")
    cow = AIProgramDescription(name="cow", arguments=[aArg, bArg])
    cArg = Argument("a", "Stringlike")
    dArg = Argument("b", "Stringlike")
    dog = AIProgramDescription(name="dog", arguments=[cArg, dArg])
    data = [
        ("have a bone puppy", "dog -a bone -b woof"),
        ("have a snack puppy", "dog -a snack -b woof"),
        ("have a apple puppy", "dog -a apple -b woof"),
        ("woof please", "dog -b woof"),
        ("nothin puppy", "dog"),
        ("nothin cow", "cow"),
        ("moo please", "cow -b moo"),
        ("have a grass cow", "cow -a grass -b moo"),
        ("have a plant cow", "cow -a plant -b moo"),
    ] * 100
    random.shuffle(data)
    # Do training
    train_output = train.run_with_data_list(data, [cow, dog],
                                            False,
                                            quiet_mode=True,
                                            num_epochs=3)
    meta_model, final_state, train_iter, val_iter = train_output

    # eval the model. Expect should get basically perfect progam picking
    bashmetric = BashMetric()
    train.eval_model(meta_model, val_iter, [(bashmetric, 'bashmetric')])
    assert bashmetric.exact_match_acc() == 1.00

    fn = ".tester.pkl"
    serialize_tools.serialize(meta_model, fn)

    return val_iter, data, fn, meta_model.run_context.nl_field
Exemple #4
0
def test_copy_mechanism():
    posArg = Argument("aposarg", "Stringlike", position=0)
    cow = AIProgramDescription(name="hello", arguments=[posArg])
    traindata = [
        ("my name is bob", "hello bob"),
        ("my name is alice", "hello alice"),
        ("my name is Alice", "hello Alice"),
        ("my name is eve", "hello eve"),
        ("my name is jim", "hello jim"),
        ("my name is gregthefifth", "hello gregthefifth"),
    ]
    valdata = [
        ("my name is hoozawhatz", "hello hoozawhatz"),
        ("my name is boogieman", "hello boogieman"),
        ("my name is frankenstien", "hello frankenstien"),
        ("my name is walle", "hello walle"),
    ]

    # Do training
    train_output = train.run_with_specific_split(traindata,
                                                 valdata, [cow],
                                                 False,
                                                 quiet_mode=True,
                                                 num_epochs=200)
    meta_model, final_state, train_iter, val_iter = train_output

    # eval the model. Expect should get basically perfect progam picking
    bashmetric = BashMetric()
    train.eval_model(meta_model, train_iter, [(bashmetric, 'bashmetric')])
    assert bashmetric.exact_match_acc() >= 0.98, "Did not fully learn train"
    bashmetric = BashMetric()
    train.eval_model(meta_model, val_iter, [(bashmetric, 'bashmetric')])
    assert bashmetric.exact_match_acc() >= 0.98, "Did not generalize to val"
Exemple #5
0
def test_copy_long_seq():
    posArg = Argument("aposarg", "Stringlike", position=0)
    cow = AIProgramDescription(name="hello", arguments=[posArg])
    traindata = [
        ("my name is a/b/c", "hello a/b/c"),
        ("my name is ab91.sb01", "hello ab91.sb01"),
        ("my name is John", "hello John"),
        ("my name is a/b/c/d/e/f/g", "hello a/b/c/d/e/f/g"),
        ("my name is a.b.cd.e.f.g", "hello a.b.cd.e.f.g"),
        ("my name is t.h.i.s.has.g.o.n.e.t.o.o.f.a.r",
         "hello t.h.i.s.has.g.o.n.e.t.o.o.f.a.r"),
    ]
    valdata = [
        ("my name is c/d/e/f/g/h/q", "hello c/d/e/f/g/h/q"),
        ("my name is how.long_can_this_be", "hello how.long_can_this_be"),
        ("my name is real.l.l.l.l.l.l.y", "hello  real.l.l.l.l.l.l.y"),
        ("my name is yo", "hello yo"),
    ]

    # Do training
    train_output = train.run_with_specific_split(traindata,
                                                 valdata, [cow],
                                                 False,
                                                 quiet_mode=True,
                                                 num_epochs=200)
    meta_model, final_state, train_iter, val_iter = train_output

    # eval the model. Expect should get basically perfect progam picking
    bashmetric = BashMetric()
    train.eval_model(meta_model, train_iter, [(bashmetric, 'bashmetric')])
    assert bashmetric.exact_match_acc() >= 0.98, "Did not fully learn train"
    bashmetric = BashMetric()
    train.eval_model(meta_model, val_iter, [(bashmetric, 'bashmetric')])
    assert bashmetric.exact_match_acc() >= 0.98, "Did not generalize to val"
Exemple #6
0
def test_serialization(train_and_serialize):
    """Test a decently complex training task. Serialize the model. Then see if has same performance"""
    val_iter, data, fn, _ = train_and_serialize
    restored_model = serialize_tools.restore(fn)

    # check make sure has good preformance
    newmetric = BashMetric()
    train.eval_model(restored_model, val_iter, [(newmetric, 'bashmetric')])
    assert newmetric.exact_match_acc(
    ) >= 0.98, "Performance loss after restore"
def run_test(model,
             basename,
             batch_size,
             criterion,
             subset,
             outname,
             device,
             inputdir_type0,
             inputdir_type1,
             num_examples_to_plot=100,
             num_workers=32):

    # For either multi or binary classifier use:
    testset = ActionOrNot(type0_pathname=inputdir_type0,
                          type1_pathname=inputdir_type1)

    loader = torch.utils.data.DataLoader(testset,
                                         batch_size=batch_size,
                                         shuffle=False,
                                         pin_memory=True,
                                         num_workers=num_workers)
    print('Testing %d image examples..' % len(testset))

    if subset is not None:
        type1 = np.asarray(testset.inds_type1_examples)  # all pos
        type0 = np.random.choice(testset.inds_type0_examples,
                                 subset)  # subset neg
        loader = torch.utils.data.DataLoader(
            testset,
            batch_size=batch_size,
            sampler=torch.utils.data.sampler.SubsetRandomSampler(
                np.concatenate((type1, type0))),
            shuffle=False,
            pin_memory=True,
            num_workers=num_workers)

    test_loss, test_acc, test_auc, misc = run_epoch(epoch=1,
                                                    loader=loader,
                                                    optimizer=None,
                                                    model=model,
                                                    criterion=criterion,
                                                    device=device,
                                                    prefix='Testing')
    test_total_outputs_class, test_total_gt_labels, test_total_filenames = misc[
        0], misc[1], misc[2]

    filename = os.path.join(outname, 'misclassifications_{}'.format(basename))
    eval_model(test_total_outputs_class,
               test_total_gt_labels,
               test_total_filenames,
               outname=filename,
               num_examples_to_plot=num_examples_to_plot)
Exemple #8
0
def test(test_version,
         test_set,
         new_model,
         eval_loss_function,
         get_true_pred,
         detach_pred,
         batch_size=32,
         just_weights=True):
    print("[info]: testing model...")
    # load model
    model, create_new = load_model.load_model(version=test_version,
                                              new_model=new_model,
                                              just_weights=just_weights,
                                              retrain=False,
                                              to_cuda=True)
    if create_new:
        print("[info]: try to test a non-trained model")
        exit(-1)

    test_loader = data.DataLoader(test_set, batch_size=batch_size)
    loss, acc = train.eval_model(model=model,
                                 data_loader=test_loader,
                                 eval_loss_function=eval_loss_function,
                                 get_true_pred=get_true_pred,
                                 detach_pred=detach_pred)
    print("[info]: test loss: {:5f}, test acc: {:4f}".format(loss, acc))
    return loss, acc
Exemple #9
0
def test_fill_pos():
    posArg = Argument("aposarg", "Stringlike", position=0)
    cow = AIProgramDescription(name="cow", arguments=[posArg])
    data = [("cow goes woof", "cow woof woof"), ("cow goes moo", "cow moo"),
            ("cow goes meow", "cow meow meow meow")] * 100
    random.shuffle(data)
    # Do training
    train_output = train.run_with_data_list(data, [cow],
                                            False,
                                            quiet_mode=True,
                                            num_epochs=3)
    meta_model, final_state, train_iter, val_iter = train_output

    # eval the model. Expect should get basically perfect progam picking
    bashmetric = BashMetric()
    train.eval_model(meta_model, val_iter, [(bashmetric, 'bashmetric')])
    assert bashmetric.exact_match_acc() >= 0.98
Exemple #10
0
def test_pipe_select():
    cow = AIProgramDescription(name="cow")
    dog = AIProgramDescription(name="dog")
    data = [("I need a plumber", "cow | dog"), ("no plumber needed", "cow"),
            ("dog in my pipes", "dog | cow")] * 100
    random.shuffle(data)
    # Do training
    train_output = train.run_with_data_list(data, [cow, dog],
                                            False,
                                            quiet_mode=True,
                                            num_epochs=5)
    meta_model, final_state, train_iter, val_iter = train_output

    # eval the model. Expect should get basically perfect progam picking
    bashmetric = BashMetric()
    train.eval_model(meta_model, val_iter, [(bashmetric, 'bashmetric')])
    assert bashmetric.exact_match_acc() >= 0.98
def run_simulation(model, loader, device):
    model.eval()
    with torch.no_grad():
        predictions = eval_model(model,
                                 loader,
                                 device,
                                 return_predictions=True)
    return predictions
Exemple #12
0
def test_fill_multival():
    aArg = Argument("a", "Stringlike")
    bArg = Argument("b", "Stringlike")
    cow = AIProgramDescription(name="foo", arguments=[aArg, bArg])
    data = [("how are you", "foo -a good -b bad"),
            ("are you good or bad", "foo -a good -b bad"),
            ("flip please", "foo -a bad -b good")] * 100
    random.shuffle(data)
    # Do training
    train_output = train.run_with_data_list(data, [cow],
                                            False,
                                            quiet_mode=True,
                                            num_epochs=3)
    meta_model, final_state, train_iter, val_iter = train_output

    # eval the model. Expect should get basically perfect progam picking
    bashmetric = BashMetric()
    train.eval_model(meta_model, val_iter, [(bashmetric, 'bashmetric')])
    assert bashmetric.exact_match_acc() >= 0.98
Exemple #13
0
 def test_train_one_epoch(self):
     train_ds, test_ds = train.get_datasets()
     input_rng = onp.random.RandomState(0)
     model = train.create_model(random.PRNGKey(0))
     optimizer = train.create_optimizer(model, 0.1, 0.9)
     optimizer, train_metrics = train.train_epoch(optimizer, train_ds, 128,
                                                  0, input_rng)
     self.assertLessEqual(train_metrics['loss'], 0.27)
     self.assertGreaterEqual(train_metrics['accuracy'], 0.92)
     loss, accuracy = train.eval_model(optimizer.target, test_ds)
     self.assertLessEqual(loss, 0.06)
     self.assertGreaterEqual(accuracy, 0.98)
Exemple #14
0
def test_pick_arg():
    """A test to see if can correctly learn to predict arg existance"""
    aArg = Argument("a", "StoreTrue")
    bArg = Argument("b", "StoreTrue")
    cow = AIProgramDescription(name="cow", arguments=[aArg, bArg])
    # make some toy data. It is multiplied by a large number to ensure
    # everything will make it into both train and val
    data = [("give cow an apple", "cow -a"),
            ("give the cow a banana", "cow -b"),
            ("cow feast", "cow -a -b")] * 100
    random.shuffle(data)
    # Do training
    train_output = train.run_with_data_list(data, [cow],
                                            False,
                                            quiet_mode=True,
                                            num_epochs=5)
    meta_model, final_state, train_iter, val_iter = train_output

    # eval the model. Expect should get basically perfect progam picking
    bashmetric = BashMetric()
    train.eval_model(meta_model, val_iter, [(bashmetric, 'bashmetric')])
    assert bashmetric.exact_match_acc() >= 0.98
Exemple #15
0
def test_pick_program():
    """A test to see if can correctly learn to predict a program"""
    cow = AIProgramDescription(name="cow")
    dog = AIProgramDescription(name="dog")
    kitty = AIProgramDescription(  # origionally tried cat, but....
        name="kitty")
    # make some toy data. It is multiplied by a large number to ensure
    # everything will make it into both train and val
    data = [("go moo", "cow"), ("go meow", "kitty"),
            ("please go woof", "dog")] * 100
    random.shuffle(data)
    # Do training
    train_output = train.run_with_data_list(data, [cow, dog, kitty],
                                            False,
                                            quiet_mode=True,
                                            num_epochs=3)
    meta_model, final_state, train_iter, val_iter = train_output

    # eval the model. Expect should get basically perfect progam picking
    bashmetric = BashMetric()
    train.eval_model(meta_model, val_iter, [(bashmetric, 'bashmetric')])
    assert bashmetric.first_cmd_acc() >= 0.98
Exemple #16
0
def get_results(test_ds, type):
    dl = DataLoader(test_ds, batch_size=len(test_ds), num_workers=5)
    retdict = train.eval_model(model, dl, args.OP_tgt, 3)
    model_dir = os.path.join('models', args.modelname)
    try:
        os.makedirs(os.path.join(model_dir, f'{type}_eval_results'))
    except:
        shutil.rmtree(os.path.join(model_dir, f'{type}_eval_results'))
        os.makedirs(os.path.join(model_dir, f'{type}_eval_results'))

    retdict['modelname'] = args.modelname
    # Print
    pprint.pprint(retdict)
    utils.pkl_dump(
        retdict, os.path.join('models', args.modelname, f'{type}_report.dict'))
Exemple #17
0
def check_ensemble():
    import train
    from torch.utils import data

    model = EnsembleModel(["googlenet-4.0", "googlenet-3.0", "googlenet-1.0"],
                          ["resnet-3.0"])
    model.eval()
    default_load_data_dir = os.path.join(
        os.path.dirname(os.path.realpath(__file__)), "get_data/data")
    train_set, valid_set, test_set = import_data.import_dataset(
        load_dir=default_load_data_dir)

    test_loader = data.DataLoader(test_set, batch_size=32)

    loss, acc = train.eval_model(
        model, test_loader, lambda pred, y, x: nn.CrossEntropyLoss()(pred, y),
        lambda x: x, lambda x: x.detach())
    print("loss: {}, acc: {}".format(loss, acc))
Exemple #18
0
    def test_single_train_step(self):
        train_ds, test_ds = train.get_datasets()
        batch_size = 32
        model = train.create_model(random.PRNGKey(0))
        optimizer = train.create_optimizer(model, 0.1, 0.9)

        # test single train step.
        optimizer, train_metrics = train.train_step(
            optimizer=optimizer,
            batch={k: v[:batch_size]
                   for k, v in train_ds.items()})
        self.assertLessEqual(train_metrics['loss'], 2.302)
        self.assertGreaterEqual(train_metrics['accuracy'], 0.0625)

        # Run eval model.
        loss, accuracy = train.eval_model(optimizer.target, test_ds)
        self.assertLess(loss, 2.252)
        self.assertGreater(accuracy, 0.2597)
Exemple #19
0
def meta_valid(model: nn.Module,
               task: Task,
               inner_optim,
               n_inner,
               support_set_size=8,
               query_set_size=8):
    model_cp, ep, task_classifier = train_support(model,
                                                  task,
                                                  inner_optim,
                                                  n_inner,
                                                  n_train=support_set_size,
                                                  n_test=query_set_size)
    results = eval_model(model_cp, task_classifier, ep["query_set"], task.loss,
                         False, False, task.n_classes == 2)

    del model_cp
    del ep
    del task_classifier
    torch.cuda.empty_cache()

    return results
Exemple #20
0
def eval_infl(setup,
              output_path,
              target_epoch,
              start_epoch,
              end_epoch,
              seed=0,
              gpu=0):
    device = 'cuda:%d' % (gpu, )

    # setup
    net_func, trainset, valset, testset = setup()
    n = len(trainset)

    # data split
    np.random.seed(seed)
    idx_val = np.random.permutation(n)[:nval]
    idx_train = np.setdiff1d(np.arange(n), idx_val)
    ntr, nte = idx_train.size, len(testset)
    idx_test = np.arange(nte)

    # model, optimizer, and lss
    model_init, optimizer_init = load_model(net_func, device, output_path,
                                            start_epoch)
    model = net_func().to(device)
    optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum)
    loss_fn = torch.nn.functional.nll_loss

    # infl
    infl_sgd_last = joblib.load('%s/infl_sgd_at_epoch%02d.dat' %
                                (output_path, target_epoch))[:, -1]
    infl_sgd_all = joblib.load('%s/infl_sgd_at_epoch%02d.dat' %
                               (output_path, target_epoch))[:, 0]
    infl_icml = joblib.load('%s/infl_icml_at_epoch%02d.dat' %
                            (output_path, target_epoch))
    np.random.seed(seed)
    infls = {
        'baseline': [],
        'icml': infl_icml,
        'sgd_last': infl_sgd_last,
        'sgd_all': infl_sgd_all,
        'random': np.random.rand(ntr)
    }

    # eval
    score = {}
    for k in k_list:
        for key in infls.keys():
            if key in score.keys():
                continue
            if key == 'baseline':
                skip_idx = []
            else:
                skip_idx = np.argsort(infls[key])[:k]

            # sgd
            torch.manual_seed(seed)
            model.load_state_dict(copy.deepcopy(model_init.state_dict()))
            optimizer.load_state_dict(
                copy.deepcopy(optimizer_init.state_dict()))
            model.train()
            for epoch in range(start_epoch, end_epoch):
                fn = '%s/epoch%02d_info.dat' % (output_path, epoch)
                info = joblib.load(fn)
                np.random.seed(epoch)
                model, optimzier = sgd_with_skip(info, device, model,
                                                 optimizer, loss_fn, trainset,
                                                 idx_train, skip_idx)

            # evaluation
            loss_tr, acc_tr = train.eval_model(model, loss_fn, device, valset,
                                               idx_train)
            loss_val, acc_val = train.eval_model(model, loss_fn, device,
                                                 valset, idx_val)
            loss_te, acc_te = train.eval_model(model, loss_fn, device, testset,
                                               idx_test)
            if key == 'baseline':
                score[key] = (loss_tr, loss_val, loss_te, acc_tr, acc_val,
                              acc_te)
            else:
                score[(key, k)] = (loss_tr, loss_val, loss_te, acc_tr, acc_val,
                                   acc_te)
            #print((key, k), acc_tr, acc_val, acc_te)

        # save
        fn = '%s/eval_epoch_%02d_to_%02d.dat' % (output_path, start_epoch,
                                                 end_epoch)
        joblib.dump(score, fn, compress=9)
def main(opt):
    if torch.cuda.is_available():
        device = torch.device('cuda')
        torch.cuda.set_device(opt.gpu_id)
    else:
        device = torch.device('cpu')

    if opt.network == 'resnet':
        model = resnet(opt.classes, opt.layers)
    elif opt.network == 'resnext':
        model = resnext(opt.classes, opt.layers)
    elif opt.network == 'resnext_wsl':
        # resnext_wsl must specify the opt.battleneck_width parameter
        opt.network = 'resnext_wsl_32x' + str(opt.battleneck_width) + 'd'
        model = resnext_wsl(opt.classes, opt.battleneck_width)
    elif opt.network == 'vgg':
        model = vgg_bn(opt.classes, opt.layers)
    elif opt.network == 'densenet':
        model = densenet(opt.classes, opt.layers)
    elif opt.network == 'inception_v3':
        model = inception_v3(opt.classes, opt.layers)
    elif opt.network == 'dpn':
        model = dpn(opt.classes, opt.layers)
    elif opt.network == 'effnet':
        model = effnet(opt.classes, opt.layers)
    # elif opt.network == 'pnasnet_m':
    #     model = pnasnet_m(opt.classes, opt.layers, opt.pretrained)

    # model = nn.DataParallel(model, device_ids=[4])
    # model = nn.DataParallel(model, device_ids=[0, 1, 2, 3])
    model = nn.DataParallel(model, device_ids=[opt.gpu_id, opt.gpu_id + 1])
    # model = convert_model(model)
    model = model.to(device)

    images, names = utils.read_test_data(
        os.path.join(opt.root_dir, opt.test_dir))

    dict_ = {}
    for crop_size in [opt.crop_size]:
        if opt.tta:
            transforms = test_transform(crop_size)
        else:
            transforms = my_transform(False, crop_size)

        dataset = TestDataset(images, names, transforms)

        loader = torch.utils.data.DataLoader(dataset,
                                             batch_size=opt.batch_size,
                                             shuffle=False,
                                             num_workers=4)
        state_dict = torch.load(opt.model_dir + '/' + opt.network + '-' +
                                str(opt.layers) + '-' + str(crop_size) +
                                '_model.ckpt')
        if opt.network == 'densenet':
            pattern = re.compile(
                r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$'
            )
            for key in list(state_dict.keys()):
                res = pattern.match(key)
                if res:
                    new_key = res.group(1) + res.group(2)
                    state_dict[new_key] = state_dict[key]
                    del state_dict[key]
        model.load_state_dict(state_dict)
        if opt.vote:
            if opt.tta:
                im_names, labels = eval_model_tta(loader, model, device=device)
            else:
                im_names, labels = eval_model(loader, model, device=device)
        else:
            if opt.tta:
                im_names, labels = eval_logits_tta(loader,
                                                   model,
                                                   device=device)
            else:
                im_names, labels = eval_logits(loader, model, device)
        im_labels = []
        # print(im_names)
        for name, label in zip(im_names, labels):
            if name in dict_:
                dict_[name].append(label)
            else:
                dict_[name] = [label]

    header = ['filename', 'type']
    utils.mkdir(opt.results_dir)
    result = opt.network + '-' + str(opt.layers) + '-' + str(
        opt.crop_size) + '_result.csv'
    filename = os.path.join(opt.results_dir, result)
    with open(filename, 'w', encoding='utf-8') as f:
        f_csv = csv.writer(f)
        f_csv.writerow(header)
        for key in dict_.keys():
            v = np.argmax(np.sum(np.array(dict_[key]), axis=0)) + 1
            # v = list(np.sum(np.array(dict_[key]), axis=0))
            f_csv.writerow([key, v])
Exemple #22
0
def main(args):
    bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    bert_model = BertModel.from_pretrained('bert-base-uncased')
    dataset_descriptors = get_dataset_paths(args.dataset_json)
    dataset_types = [k['name'] for k in dataset_descriptors]

    # Init Bert layer and Conv
    model, conv_model, sent_embedder = init_common(args, bert_model)

    task_classifier = TaskClassifier(conv_model.get_n_blocks() *
                                     args.n_filters)
    ep_maker = EpisodeMaker(
        bert_tokenizer,
        args.max_len,
        args.max_sent,
        model.cnn.get_max_kernel(),
        args.device,
        datasets=dataset_descriptors,
        sent_embedder=None if args.finetune else sent_embedder)

    task_classifier = task_classifier.to(args.device)
    model = model.to(args.device)
    optim = torch.optim.Adam(list(model.parameters()) +
                             list(task_classifier.parameters()),
                             lr=args.lr)
    best_acc = 0.

    logging.info('Multitask training starting.')
    time_log = datetime.now().strftime('%y%m%d-%H%M%S')
    writer = SummaryWriter(f'runs/multitaskep_{time_log}')
    for batch_nr in range(args.n_epochs):
        for _ in range(args.meta_batch):
            optim.zero_grad()
            dataset_type = random.choice(['gcdc', 'persuasiveness'])
            one_batch_dataset = ep_maker.get_episode(
                dataset_type=dataset_type,
                n_train=args.train_size_support)['support_set']
            binary, loss = loss_task_factory(dataset_type)
            tcw = TaskClassifierWrapper(task_classifier, dataset_type)

            train_acc, train_loss = train_model(model,
                                                tcw,
                                                one_batch_dataset,
                                                loss,
                                                optim,
                                                binary,
                                                disp_tqdm=False)
            writer.add_scalar(f'Train/{dataset_type}/multi/accuracy',
                              train_acc, batch_nr)
            writer.add_scalar(f'Train/{dataset_type}/multi/loss', train_loss,
                              batch_nr)

            logging.info("dataset_type %s, acc %.4f, loss %.4f", dataset_type,
                         train_acc, train_loss)
            logging.debug(
                "max of gradients of task_classifier: %f",
                max(p.grad.max() for p in task_classifier.parameters())
            )  # we take the max because the mean wouldn't be informative
            logging.debug(
                "avg of gradients of model: %f",
                max(p.grad.max() for p in model.parameters()
                    if p.grad is not None))

        if batch_nr % 5 == 0:
            dataset_type = 'hyperpartisan'
            model_cp = deepcopy(model)
            tcw_cp = TaskClassifierWrapper(deepcopy(task_classifier),
                                           dataset_type)
            binary_cp, loss_cp = loss_task_factory(dataset_type)
            optim_cp = torch.optim.Adam(list(model_cp.parameters()) +
                                        list(tcw_cp.parameters()),
                                        lr=args.lr)
            for k in range(args.shots):
                one_batch_dataset = ep_maker.get_episode(
                    dataset_type=dataset_type,
                    n_train=args.train_size_support,
                    n_test=args.train_size_query)['support_set']
                train_model(model_cp,
                            tcw_cp,
                            one_batch_dataset,
                            loss_cp,
                            optim_cp,
                            binary_cp,
                            disp_tqdm=False)
            one_batch_dataset = ep_maker.get_episode(
                dataset_type=dataset_type,
                n_train=args.train_size_support,
                n_test=args.train_size_query)['query_set']
            acc, avg_loss, _ = eval_model(model_cp,
                                          tcw_cp,
                                          one_batch_dataset,
                                          loss_cp,
                                          binary_cp,
                                          disp_tqdm=False)
            logging.info("Eval acc %f loss %f", acc, avg_loss)
            if acc > best_acc:
                trained_general_model = (deepcopy(model),
                                         deepcopy(task_classifier))

    dataset_type = 'fake_news'
    model_cp, task_classifier_cp = trained_general_model
    tcw_cp = TaskClassifierWrapper(task_classifier, dataset_type)
    binary_cp, loss_cp = loss_task_factory(dataset_type)
    optim_cp = torch.optim.Adam(list(model_cp.parameters()) +
                                list(tcw_cp.parameters()),
                                lr=args.lr)
    for k in range(args.shots):
        one_batch_dataset = ep_maker.get_episode(
            dataset_type=dataset_type,
            n_train=args.train_size_support,
            n_test=args.train_size_query)['support_set']
        train_model(model_cp,
                    tcw_cp,
                    one_batch_dataset,
                    loss_cp,
                    optim_cp,
                    binary_cp,
                    disp_tqdm=False)
    test_set = ep_maker.datasets[dataset_type][0]['test']
    test_set.batch_size = 1
    test_set.shuffle()
    test_set = BertPreprocessor(test_set,
                                sent_embedder,
                                conv_model.get_max_kernel(),
                                device=args.device,
                                batch_size=8)
    acc, loss, f1_stats = eval_model(model_cp,
                                     tcw_cp,
                                     test_set,
                                     loss_cp,
                                     binary_cp,
                                     disp_tqdm=False)
    logging.info("%s: accuracy %.4f; f1: %s", test_set.file, acc,
                 str(f1_stats))
def main(args):
    bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    bert_model = BertModel.from_pretrained('bert-base-uncased')
    dataset_descriptors = get_dataset_paths(args.dataset_json)
    dataset_types = [k['name'] for k in dataset_descriptors]

    # Init Bert layer and Conv
    model, conv_model, sent_embedder = init_common(args, bert_model)

    task_classifier = TaskClassifierGCDC(conv_model.get_n_blocks() *
                                         args.n_filters)
    ep_maker = EpisodeMaker(
        bert_tokenizer,
        args.max_len,
        args.max_sent,
        model.cnn.get_max_kernel(),
        args.device,
        datasets=dataset_descriptors,
        sent_embedder=None if args.finetune else sent_embedder)

    print(ep_maker.datasets['gcdc'])

    task_classifier = task_classifier.to(args.device)
    model = model.to(args.device)
    optim = torch.optim.Adam(list(model.parameters()) +
                             list(task_classifier.parameters()),
                             lr=args.lr)

    import random
    logging.info('Multitask training starting.')
    time_log = datetime.now().strftime('%y%m%d-%H%M%S')
    # writer = SummaryWriter(f'runs/multitaskep_{time_log}')
    for batch_nr in range(args.n_epochs):
        optim.zero_grad()
        dataset_type = 'gcdc'
        one_batch_dataset = ep_maker.get_episode(
            dataset_type=dataset_type,
            n_train=args.train_size_support)['support_set']

        binary, loss = loss_task_factory(dataset_type)

        train_acc, train_loss = train_model(model,
                                            task_classifier,
                                            one_batch_dataset,
                                            loss,
                                            optim,
                                            binary,
                                            disp_tqdm=False)
        # writer.add_scalar(f'Train/{dataset_type}/multi/accuracy', train_acc, batch_nr)
        # writer.add_scalar(f'Train/{dataset_type}/multi/loss', train_loss, batch_nr)

        logging.info("dataset_type %s, acc %.4f, loss %.4f", dataset_type,
                     train_acc, train_loss)
        logging.debug(
            "max of gradients of task_classifier: %f",
            max(p.grad.max() for p in task_classifier.parameters()
                ))  # we take the max because the mean wouldn't be informative
        logging.debug(
            "avg of gradients of model: %f",
            max(p.grad.max() for p in model.parameters()
                if p.grad is not None))

    for i in range(4):
        binary, loss = loss_task_factory('gcdc')
        test_set = ep_maker.datasets['gcdc'][i]['test']
        test_set.batch_size = 1
        test_set.shuffle()
        test_set = BertPreprocessor(test_set,
                                    sent_embedder,
                                    conv_model.get_max_kernel(),
                                    device=args.device,
                                    batch_size=8)
        acc, loss, _ = eval_model(model,
                                  task_classifier,
                                  test_set,
                                  loss,
                                  binary,
                                  disp_tqdm=False)
        logging.info("%s: accuracy %.4f", test_set.file, acc)