def test_non_bow(): """Test if can learn programs that based of queries that require something stronger than a bag-of-words assumption""" sad = AIProgramDescription(name="sad", ) what = AIProgramDescription(name="what", ) weird = AIProgramDescription(name="weird", ) traindata = [ ("the dog bit bob", "sad"), ("bob bit the dog", "weird"), ("the bit dog bob", "what"), ("bob dog the bit", "what"), ] valdata = traindata # Do training train_output = train.run_with_specific_split(traindata, valdata, [sad, what, weird], False, quiet_mode=True, num_epochs=200) meta_model, final_state, train_iter, val_iter = train_output # eval the model. Expect should get basically perfect progam picking bashmetric = BashMetric() train.eval_model(meta_model, train_iter, [(bashmetric, 'bashmetric')]) assert bashmetric.exact_match_acc( ) >= 0.98, "Did not fully learn non-bag-of-words"
def test_case_sensitive(): posArg = Argument("aposarg", "Stringlike", position=0) cow = AIProgramDescription(name="hello", arguments=[posArg]) traindata = [ ("my name is bob", "hello bob"), ("my name is Bob", "hello Bob"), ("my name is BoB", "hello BoB"), ("my name is boB", "hello boB"), ("my name is BOB", "hello BOB"), ] valdata = traindata # Do training train_output = train.run_with_specific_split(traindata, valdata, [cow], False, quiet_mode=True, num_epochs=200) meta_model, final_state, train_iter, val_iter = train_output # eval the model. Expect should get basically perfect progam picking bashmetric = BashMetric() train.eval_model(meta_model, train_iter, [(bashmetric, 'bashmetric')]) assert bashmetric.exact_match_acc( ) >= 0.98, "Did not fully learn to gen case sensitive"
def train_and_serialize(): aArg = Argument("a", "Stringlike") bArg = Argument("b", "Stringlike") cow = AIProgramDescription(name="cow", arguments=[aArg, bArg]) cArg = Argument("a", "Stringlike") dArg = Argument("b", "Stringlike") dog = AIProgramDescription(name="dog", arguments=[cArg, dArg]) data = [ ("have a bone puppy", "dog -a bone -b woof"), ("have a snack puppy", "dog -a snack -b woof"), ("have a apple puppy", "dog -a apple -b woof"), ("woof please", "dog -b woof"), ("nothin puppy", "dog"), ("nothin cow", "cow"), ("moo please", "cow -b moo"), ("have a grass cow", "cow -a grass -b moo"), ("have a plant cow", "cow -a plant -b moo"), ] * 100 random.shuffle(data) # Do training train_output = train.run_with_data_list(data, [cow, dog], False, quiet_mode=True, num_epochs=3) meta_model, final_state, train_iter, val_iter = train_output # eval the model. Expect should get basically perfect progam picking bashmetric = BashMetric() train.eval_model(meta_model, val_iter, [(bashmetric, 'bashmetric')]) assert bashmetric.exact_match_acc() == 1.00 fn = ".tester.pkl" serialize_tools.serialize(meta_model, fn) return val_iter, data, fn, meta_model.run_context.nl_field
def test_copy_mechanism(): posArg = Argument("aposarg", "Stringlike", position=0) cow = AIProgramDescription(name="hello", arguments=[posArg]) traindata = [ ("my name is bob", "hello bob"), ("my name is alice", "hello alice"), ("my name is Alice", "hello Alice"), ("my name is eve", "hello eve"), ("my name is jim", "hello jim"), ("my name is gregthefifth", "hello gregthefifth"), ] valdata = [ ("my name is hoozawhatz", "hello hoozawhatz"), ("my name is boogieman", "hello boogieman"), ("my name is frankenstien", "hello frankenstien"), ("my name is walle", "hello walle"), ] # Do training train_output = train.run_with_specific_split(traindata, valdata, [cow], False, quiet_mode=True, num_epochs=200) meta_model, final_state, train_iter, val_iter = train_output # eval the model. Expect should get basically perfect progam picking bashmetric = BashMetric() train.eval_model(meta_model, train_iter, [(bashmetric, 'bashmetric')]) assert bashmetric.exact_match_acc() >= 0.98, "Did not fully learn train" bashmetric = BashMetric() train.eval_model(meta_model, val_iter, [(bashmetric, 'bashmetric')]) assert bashmetric.exact_match_acc() >= 0.98, "Did not generalize to val"
def test_copy_long_seq(): posArg = Argument("aposarg", "Stringlike", position=0) cow = AIProgramDescription(name="hello", arguments=[posArg]) traindata = [ ("my name is a/b/c", "hello a/b/c"), ("my name is ab91.sb01", "hello ab91.sb01"), ("my name is John", "hello John"), ("my name is a/b/c/d/e/f/g", "hello a/b/c/d/e/f/g"), ("my name is a.b.cd.e.f.g", "hello a.b.cd.e.f.g"), ("my name is t.h.i.s.has.g.o.n.e.t.o.o.f.a.r", "hello t.h.i.s.has.g.o.n.e.t.o.o.f.a.r"), ] valdata = [ ("my name is c/d/e/f/g/h/q", "hello c/d/e/f/g/h/q"), ("my name is how.long_can_this_be", "hello how.long_can_this_be"), ("my name is real.l.l.l.l.l.l.y", "hello real.l.l.l.l.l.l.y"), ("my name is yo", "hello yo"), ] # Do training train_output = train.run_with_specific_split(traindata, valdata, [cow], False, quiet_mode=True, num_epochs=200) meta_model, final_state, train_iter, val_iter = train_output # eval the model. Expect should get basically perfect progam picking bashmetric = BashMetric() train.eval_model(meta_model, train_iter, [(bashmetric, 'bashmetric')]) assert bashmetric.exact_match_acc() >= 0.98, "Did not fully learn train" bashmetric = BashMetric() train.eval_model(meta_model, val_iter, [(bashmetric, 'bashmetric')]) assert bashmetric.exact_match_acc() >= 0.98, "Did not generalize to val"
def test_serialization(train_and_serialize): """Test a decently complex training task. Serialize the model. Then see if has same performance""" val_iter, data, fn, _ = train_and_serialize restored_model = serialize_tools.restore(fn) # check make sure has good preformance newmetric = BashMetric() train.eval_model(restored_model, val_iter, [(newmetric, 'bashmetric')]) assert newmetric.exact_match_acc( ) >= 0.98, "Performance loss after restore"
def run_test(model, basename, batch_size, criterion, subset, outname, device, inputdir_type0, inputdir_type1, num_examples_to_plot=100, num_workers=32): # For either multi or binary classifier use: testset = ActionOrNot(type0_pathname=inputdir_type0, type1_pathname=inputdir_type1) loader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=num_workers) print('Testing %d image examples..' % len(testset)) if subset is not None: type1 = np.asarray(testset.inds_type1_examples) # all pos type0 = np.random.choice(testset.inds_type0_examples, subset) # subset neg loader = torch.utils.data.DataLoader( testset, batch_size=batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( np.concatenate((type1, type0))), shuffle=False, pin_memory=True, num_workers=num_workers) test_loss, test_acc, test_auc, misc = run_epoch(epoch=1, loader=loader, optimizer=None, model=model, criterion=criterion, device=device, prefix='Testing') test_total_outputs_class, test_total_gt_labels, test_total_filenames = misc[ 0], misc[1], misc[2] filename = os.path.join(outname, 'misclassifications_{}'.format(basename)) eval_model(test_total_outputs_class, test_total_gt_labels, test_total_filenames, outname=filename, num_examples_to_plot=num_examples_to_plot)
def test(test_version, test_set, new_model, eval_loss_function, get_true_pred, detach_pred, batch_size=32, just_weights=True): print("[info]: testing model...") # load model model, create_new = load_model.load_model(version=test_version, new_model=new_model, just_weights=just_weights, retrain=False, to_cuda=True) if create_new: print("[info]: try to test a non-trained model") exit(-1) test_loader = data.DataLoader(test_set, batch_size=batch_size) loss, acc = train.eval_model(model=model, data_loader=test_loader, eval_loss_function=eval_loss_function, get_true_pred=get_true_pred, detach_pred=detach_pred) print("[info]: test loss: {:5f}, test acc: {:4f}".format(loss, acc)) return loss, acc
def test_fill_pos(): posArg = Argument("aposarg", "Stringlike", position=0) cow = AIProgramDescription(name="cow", arguments=[posArg]) data = [("cow goes woof", "cow woof woof"), ("cow goes moo", "cow moo"), ("cow goes meow", "cow meow meow meow")] * 100 random.shuffle(data) # Do training train_output = train.run_with_data_list(data, [cow], False, quiet_mode=True, num_epochs=3) meta_model, final_state, train_iter, val_iter = train_output # eval the model. Expect should get basically perfect progam picking bashmetric = BashMetric() train.eval_model(meta_model, val_iter, [(bashmetric, 'bashmetric')]) assert bashmetric.exact_match_acc() >= 0.98
def test_pipe_select(): cow = AIProgramDescription(name="cow") dog = AIProgramDescription(name="dog") data = [("I need a plumber", "cow | dog"), ("no plumber needed", "cow"), ("dog in my pipes", "dog | cow")] * 100 random.shuffle(data) # Do training train_output = train.run_with_data_list(data, [cow, dog], False, quiet_mode=True, num_epochs=5) meta_model, final_state, train_iter, val_iter = train_output # eval the model. Expect should get basically perfect progam picking bashmetric = BashMetric() train.eval_model(meta_model, val_iter, [(bashmetric, 'bashmetric')]) assert bashmetric.exact_match_acc() >= 0.98
def run_simulation(model, loader, device): model.eval() with torch.no_grad(): predictions = eval_model(model, loader, device, return_predictions=True) return predictions
def test_fill_multival(): aArg = Argument("a", "Stringlike") bArg = Argument("b", "Stringlike") cow = AIProgramDescription(name="foo", arguments=[aArg, bArg]) data = [("how are you", "foo -a good -b bad"), ("are you good or bad", "foo -a good -b bad"), ("flip please", "foo -a bad -b good")] * 100 random.shuffle(data) # Do training train_output = train.run_with_data_list(data, [cow], False, quiet_mode=True, num_epochs=3) meta_model, final_state, train_iter, val_iter = train_output # eval the model. Expect should get basically perfect progam picking bashmetric = BashMetric() train.eval_model(meta_model, val_iter, [(bashmetric, 'bashmetric')]) assert bashmetric.exact_match_acc() >= 0.98
def test_train_one_epoch(self): train_ds, test_ds = train.get_datasets() input_rng = onp.random.RandomState(0) model = train.create_model(random.PRNGKey(0)) optimizer = train.create_optimizer(model, 0.1, 0.9) optimizer, train_metrics = train.train_epoch(optimizer, train_ds, 128, 0, input_rng) self.assertLessEqual(train_metrics['loss'], 0.27) self.assertGreaterEqual(train_metrics['accuracy'], 0.92) loss, accuracy = train.eval_model(optimizer.target, test_ds) self.assertLessEqual(loss, 0.06) self.assertGreaterEqual(accuracy, 0.98)
def test_pick_arg(): """A test to see if can correctly learn to predict arg existance""" aArg = Argument("a", "StoreTrue") bArg = Argument("b", "StoreTrue") cow = AIProgramDescription(name="cow", arguments=[aArg, bArg]) # make some toy data. It is multiplied by a large number to ensure # everything will make it into both train and val data = [("give cow an apple", "cow -a"), ("give the cow a banana", "cow -b"), ("cow feast", "cow -a -b")] * 100 random.shuffle(data) # Do training train_output = train.run_with_data_list(data, [cow], False, quiet_mode=True, num_epochs=5) meta_model, final_state, train_iter, val_iter = train_output # eval the model. Expect should get basically perfect progam picking bashmetric = BashMetric() train.eval_model(meta_model, val_iter, [(bashmetric, 'bashmetric')]) assert bashmetric.exact_match_acc() >= 0.98
def test_pick_program(): """A test to see if can correctly learn to predict a program""" cow = AIProgramDescription(name="cow") dog = AIProgramDescription(name="dog") kitty = AIProgramDescription( # origionally tried cat, but.... name="kitty") # make some toy data. It is multiplied by a large number to ensure # everything will make it into both train and val data = [("go moo", "cow"), ("go meow", "kitty"), ("please go woof", "dog")] * 100 random.shuffle(data) # Do training train_output = train.run_with_data_list(data, [cow, dog, kitty], False, quiet_mode=True, num_epochs=3) meta_model, final_state, train_iter, val_iter = train_output # eval the model. Expect should get basically perfect progam picking bashmetric = BashMetric() train.eval_model(meta_model, val_iter, [(bashmetric, 'bashmetric')]) assert bashmetric.first_cmd_acc() >= 0.98
def get_results(test_ds, type): dl = DataLoader(test_ds, batch_size=len(test_ds), num_workers=5) retdict = train.eval_model(model, dl, args.OP_tgt, 3) model_dir = os.path.join('models', args.modelname) try: os.makedirs(os.path.join(model_dir, f'{type}_eval_results')) except: shutil.rmtree(os.path.join(model_dir, f'{type}_eval_results')) os.makedirs(os.path.join(model_dir, f'{type}_eval_results')) retdict['modelname'] = args.modelname # Print pprint.pprint(retdict) utils.pkl_dump( retdict, os.path.join('models', args.modelname, f'{type}_report.dict'))
def check_ensemble(): import train from torch.utils import data model = EnsembleModel(["googlenet-4.0", "googlenet-3.0", "googlenet-1.0"], ["resnet-3.0"]) model.eval() default_load_data_dir = os.path.join( os.path.dirname(os.path.realpath(__file__)), "get_data/data") train_set, valid_set, test_set = import_data.import_dataset( load_dir=default_load_data_dir) test_loader = data.DataLoader(test_set, batch_size=32) loss, acc = train.eval_model( model, test_loader, lambda pred, y, x: nn.CrossEntropyLoss()(pred, y), lambda x: x, lambda x: x.detach()) print("loss: {}, acc: {}".format(loss, acc))
def test_single_train_step(self): train_ds, test_ds = train.get_datasets() batch_size = 32 model = train.create_model(random.PRNGKey(0)) optimizer = train.create_optimizer(model, 0.1, 0.9) # test single train step. optimizer, train_metrics = train.train_step( optimizer=optimizer, batch={k: v[:batch_size] for k, v in train_ds.items()}) self.assertLessEqual(train_metrics['loss'], 2.302) self.assertGreaterEqual(train_metrics['accuracy'], 0.0625) # Run eval model. loss, accuracy = train.eval_model(optimizer.target, test_ds) self.assertLess(loss, 2.252) self.assertGreater(accuracy, 0.2597)
def meta_valid(model: nn.Module, task: Task, inner_optim, n_inner, support_set_size=8, query_set_size=8): model_cp, ep, task_classifier = train_support(model, task, inner_optim, n_inner, n_train=support_set_size, n_test=query_set_size) results = eval_model(model_cp, task_classifier, ep["query_set"], task.loss, False, False, task.n_classes == 2) del model_cp del ep del task_classifier torch.cuda.empty_cache() return results
def eval_infl(setup, output_path, target_epoch, start_epoch, end_epoch, seed=0, gpu=0): device = 'cuda:%d' % (gpu, ) # setup net_func, trainset, valset, testset = setup() n = len(trainset) # data split np.random.seed(seed) idx_val = np.random.permutation(n)[:nval] idx_train = np.setdiff1d(np.arange(n), idx_val) ntr, nte = idx_train.size, len(testset) idx_test = np.arange(nte) # model, optimizer, and lss model_init, optimizer_init = load_model(net_func, device, output_path, start_epoch) model = net_func().to(device) optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum) loss_fn = torch.nn.functional.nll_loss # infl infl_sgd_last = joblib.load('%s/infl_sgd_at_epoch%02d.dat' % (output_path, target_epoch))[:, -1] infl_sgd_all = joblib.load('%s/infl_sgd_at_epoch%02d.dat' % (output_path, target_epoch))[:, 0] infl_icml = joblib.load('%s/infl_icml_at_epoch%02d.dat' % (output_path, target_epoch)) np.random.seed(seed) infls = { 'baseline': [], 'icml': infl_icml, 'sgd_last': infl_sgd_last, 'sgd_all': infl_sgd_all, 'random': np.random.rand(ntr) } # eval score = {} for k in k_list: for key in infls.keys(): if key in score.keys(): continue if key == 'baseline': skip_idx = [] else: skip_idx = np.argsort(infls[key])[:k] # sgd torch.manual_seed(seed) model.load_state_dict(copy.deepcopy(model_init.state_dict())) optimizer.load_state_dict( copy.deepcopy(optimizer_init.state_dict())) model.train() for epoch in range(start_epoch, end_epoch): fn = '%s/epoch%02d_info.dat' % (output_path, epoch) info = joblib.load(fn) np.random.seed(epoch) model, optimzier = sgd_with_skip(info, device, model, optimizer, loss_fn, trainset, idx_train, skip_idx) # evaluation loss_tr, acc_tr = train.eval_model(model, loss_fn, device, valset, idx_train) loss_val, acc_val = train.eval_model(model, loss_fn, device, valset, idx_val) loss_te, acc_te = train.eval_model(model, loss_fn, device, testset, idx_test) if key == 'baseline': score[key] = (loss_tr, loss_val, loss_te, acc_tr, acc_val, acc_te) else: score[(key, k)] = (loss_tr, loss_val, loss_te, acc_tr, acc_val, acc_te) #print((key, k), acc_tr, acc_val, acc_te) # save fn = '%s/eval_epoch_%02d_to_%02d.dat' % (output_path, start_epoch, end_epoch) joblib.dump(score, fn, compress=9)
def main(opt): if torch.cuda.is_available(): device = torch.device('cuda') torch.cuda.set_device(opt.gpu_id) else: device = torch.device('cpu') if opt.network == 'resnet': model = resnet(opt.classes, opt.layers) elif opt.network == 'resnext': model = resnext(opt.classes, opt.layers) elif opt.network == 'resnext_wsl': # resnext_wsl must specify the opt.battleneck_width parameter opt.network = 'resnext_wsl_32x' + str(opt.battleneck_width) + 'd' model = resnext_wsl(opt.classes, opt.battleneck_width) elif opt.network == 'vgg': model = vgg_bn(opt.classes, opt.layers) elif opt.network == 'densenet': model = densenet(opt.classes, opt.layers) elif opt.network == 'inception_v3': model = inception_v3(opt.classes, opt.layers) elif opt.network == 'dpn': model = dpn(opt.classes, opt.layers) elif opt.network == 'effnet': model = effnet(opt.classes, opt.layers) # elif opt.network == 'pnasnet_m': # model = pnasnet_m(opt.classes, opt.layers, opt.pretrained) # model = nn.DataParallel(model, device_ids=[4]) # model = nn.DataParallel(model, device_ids=[0, 1, 2, 3]) model = nn.DataParallel(model, device_ids=[opt.gpu_id, opt.gpu_id + 1]) # model = convert_model(model) model = model.to(device) images, names = utils.read_test_data( os.path.join(opt.root_dir, opt.test_dir)) dict_ = {} for crop_size in [opt.crop_size]: if opt.tta: transforms = test_transform(crop_size) else: transforms = my_transform(False, crop_size) dataset = TestDataset(images, names, transforms) loader = torch.utils.data.DataLoader(dataset, batch_size=opt.batch_size, shuffle=False, num_workers=4) state_dict = torch.load(opt.model_dir + '/' + opt.network + '-' + str(opt.layers) + '-' + str(crop_size) + '_model.ckpt') if opt.network == 'densenet': pattern = re.compile( r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$' ) for key in list(state_dict.keys()): res = pattern.match(key) if res: new_key = res.group(1) + res.group(2) state_dict[new_key] = state_dict[key] del state_dict[key] model.load_state_dict(state_dict) if opt.vote: if opt.tta: im_names, labels = eval_model_tta(loader, model, device=device) else: im_names, labels = eval_model(loader, model, device=device) else: if opt.tta: im_names, labels = eval_logits_tta(loader, model, device=device) else: im_names, labels = eval_logits(loader, model, device) im_labels = [] # print(im_names) for name, label in zip(im_names, labels): if name in dict_: dict_[name].append(label) else: dict_[name] = [label] header = ['filename', 'type'] utils.mkdir(opt.results_dir) result = opt.network + '-' + str(opt.layers) + '-' + str( opt.crop_size) + '_result.csv' filename = os.path.join(opt.results_dir, result) with open(filename, 'w', encoding='utf-8') as f: f_csv = csv.writer(f) f_csv.writerow(header) for key in dict_.keys(): v = np.argmax(np.sum(np.array(dict_[key]), axis=0)) + 1 # v = list(np.sum(np.array(dict_[key]), axis=0)) f_csv.writerow([key, v])
def main(args): bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') bert_model = BertModel.from_pretrained('bert-base-uncased') dataset_descriptors = get_dataset_paths(args.dataset_json) dataset_types = [k['name'] for k in dataset_descriptors] # Init Bert layer and Conv model, conv_model, sent_embedder = init_common(args, bert_model) task_classifier = TaskClassifier(conv_model.get_n_blocks() * args.n_filters) ep_maker = EpisodeMaker( bert_tokenizer, args.max_len, args.max_sent, model.cnn.get_max_kernel(), args.device, datasets=dataset_descriptors, sent_embedder=None if args.finetune else sent_embedder) task_classifier = task_classifier.to(args.device) model = model.to(args.device) optim = torch.optim.Adam(list(model.parameters()) + list(task_classifier.parameters()), lr=args.lr) best_acc = 0. logging.info('Multitask training starting.') time_log = datetime.now().strftime('%y%m%d-%H%M%S') writer = SummaryWriter(f'runs/multitaskep_{time_log}') for batch_nr in range(args.n_epochs): for _ in range(args.meta_batch): optim.zero_grad() dataset_type = random.choice(['gcdc', 'persuasiveness']) one_batch_dataset = ep_maker.get_episode( dataset_type=dataset_type, n_train=args.train_size_support)['support_set'] binary, loss = loss_task_factory(dataset_type) tcw = TaskClassifierWrapper(task_classifier, dataset_type) train_acc, train_loss = train_model(model, tcw, one_batch_dataset, loss, optim, binary, disp_tqdm=False) writer.add_scalar(f'Train/{dataset_type}/multi/accuracy', train_acc, batch_nr) writer.add_scalar(f'Train/{dataset_type}/multi/loss', train_loss, batch_nr) logging.info("dataset_type %s, acc %.4f, loss %.4f", dataset_type, train_acc, train_loss) logging.debug( "max of gradients of task_classifier: %f", max(p.grad.max() for p in task_classifier.parameters()) ) # we take the max because the mean wouldn't be informative logging.debug( "avg of gradients of model: %f", max(p.grad.max() for p in model.parameters() if p.grad is not None)) if batch_nr % 5 == 0: dataset_type = 'hyperpartisan' model_cp = deepcopy(model) tcw_cp = TaskClassifierWrapper(deepcopy(task_classifier), dataset_type) binary_cp, loss_cp = loss_task_factory(dataset_type) optim_cp = torch.optim.Adam(list(model_cp.parameters()) + list(tcw_cp.parameters()), lr=args.lr) for k in range(args.shots): one_batch_dataset = ep_maker.get_episode( dataset_type=dataset_type, n_train=args.train_size_support, n_test=args.train_size_query)['support_set'] train_model(model_cp, tcw_cp, one_batch_dataset, loss_cp, optim_cp, binary_cp, disp_tqdm=False) one_batch_dataset = ep_maker.get_episode( dataset_type=dataset_type, n_train=args.train_size_support, n_test=args.train_size_query)['query_set'] acc, avg_loss, _ = eval_model(model_cp, tcw_cp, one_batch_dataset, loss_cp, binary_cp, disp_tqdm=False) logging.info("Eval acc %f loss %f", acc, avg_loss) if acc > best_acc: trained_general_model = (deepcopy(model), deepcopy(task_classifier)) dataset_type = 'fake_news' model_cp, task_classifier_cp = trained_general_model tcw_cp = TaskClassifierWrapper(task_classifier, dataset_type) binary_cp, loss_cp = loss_task_factory(dataset_type) optim_cp = torch.optim.Adam(list(model_cp.parameters()) + list(tcw_cp.parameters()), lr=args.lr) for k in range(args.shots): one_batch_dataset = ep_maker.get_episode( dataset_type=dataset_type, n_train=args.train_size_support, n_test=args.train_size_query)['support_set'] train_model(model_cp, tcw_cp, one_batch_dataset, loss_cp, optim_cp, binary_cp, disp_tqdm=False) test_set = ep_maker.datasets[dataset_type][0]['test'] test_set.batch_size = 1 test_set.shuffle() test_set = BertPreprocessor(test_set, sent_embedder, conv_model.get_max_kernel(), device=args.device, batch_size=8) acc, loss, f1_stats = eval_model(model_cp, tcw_cp, test_set, loss_cp, binary_cp, disp_tqdm=False) logging.info("%s: accuracy %.4f; f1: %s", test_set.file, acc, str(f1_stats))
def main(args): bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') bert_model = BertModel.from_pretrained('bert-base-uncased') dataset_descriptors = get_dataset_paths(args.dataset_json) dataset_types = [k['name'] for k in dataset_descriptors] # Init Bert layer and Conv model, conv_model, sent_embedder = init_common(args, bert_model) task_classifier = TaskClassifierGCDC(conv_model.get_n_blocks() * args.n_filters) ep_maker = EpisodeMaker( bert_tokenizer, args.max_len, args.max_sent, model.cnn.get_max_kernel(), args.device, datasets=dataset_descriptors, sent_embedder=None if args.finetune else sent_embedder) print(ep_maker.datasets['gcdc']) task_classifier = task_classifier.to(args.device) model = model.to(args.device) optim = torch.optim.Adam(list(model.parameters()) + list(task_classifier.parameters()), lr=args.lr) import random logging.info('Multitask training starting.') time_log = datetime.now().strftime('%y%m%d-%H%M%S') # writer = SummaryWriter(f'runs/multitaskep_{time_log}') for batch_nr in range(args.n_epochs): optim.zero_grad() dataset_type = 'gcdc' one_batch_dataset = ep_maker.get_episode( dataset_type=dataset_type, n_train=args.train_size_support)['support_set'] binary, loss = loss_task_factory(dataset_type) train_acc, train_loss = train_model(model, task_classifier, one_batch_dataset, loss, optim, binary, disp_tqdm=False) # writer.add_scalar(f'Train/{dataset_type}/multi/accuracy', train_acc, batch_nr) # writer.add_scalar(f'Train/{dataset_type}/multi/loss', train_loss, batch_nr) logging.info("dataset_type %s, acc %.4f, loss %.4f", dataset_type, train_acc, train_loss) logging.debug( "max of gradients of task_classifier: %f", max(p.grad.max() for p in task_classifier.parameters() )) # we take the max because the mean wouldn't be informative logging.debug( "avg of gradients of model: %f", max(p.grad.max() for p in model.parameters() if p.grad is not None)) for i in range(4): binary, loss = loss_task_factory('gcdc') test_set = ep_maker.datasets['gcdc'][i]['test'] test_set.batch_size = 1 test_set.shuffle() test_set = BertPreprocessor(test_set, sent_embedder, conv_model.get_max_kernel(), device=args.device, batch_size=8) acc, loss, _ = eval_model(model, task_classifier, test_set, loss, binary, disp_tqdm=False) logging.info("%s: accuracy %.4f", test_set.file, acc)