def test(dataset_size, model_type): """ opens fit dataset and trains SVM/LogReg/Forest model with it, then tests it""" print "MODEL TEST", dataset_size, model_type dset = dataset.read('contradictions', dataset_size) data, targets = [], [] for case in dset['content']: data.append(case) targets.append(case['contradiction']) fit_data, test_data = [], [] fit_cases, test_cases, fit_target, test_target = train_test_split( data, targets, test_size=0.25, shuffle=True, random_state=0) for fit_case in fit_cases: fit_data.append( get_features(fit_case['sentence'], fit_case['hypothesis'])) for test_case in test_cases: test_data.append( get_features(test_case['sentence'], test_case['hypothesis'])) model = ClassificationModel(model_type) start_time = time.time() model.train(fit_data, fit_target, dataset_size) elapsed_time = time.time() - start_time test_results = model.test(test_data) with open( config.CONTRADICTIONS_RESULTS_PATH.format(dataset_size, model_type), 'wb') as csvfile: csv_writer = csv.writer(csvfile, delimiter=',') csv_writer.writerow([ 'hypothesis', 'sentence', 'type', 'contradiction', 'prediction', 'features' ]) for (test_case, result, features) in zip(test_cases, test_results, test_data): csv_writer.writerow([ test_case['hypothesis'], test_case['sentence'], test_case['type'], test_case['contradiction'], result, features ]) precision = metrics.precision_score(test_target, test_results) recall = metrics.recall_score(test_target, test_results) f1_score = metrics.f1_score(test_target, test_results) print "FIT TIME", elapsed_time print "PRECISION", precision print "RECALL", recall print "F1 SCORE", f1_score model.save(dataset_size)
def train(): # 加载数据 tokenizer = Tokenizer(cfg.char2idx) train_dataset = CustomDataset(cfg.train_data_path, tokenizer, cfg) train_dataloader = DataLoader(train_dataset, batch_size=cfg.batch_size, collate_fn=padding, shuffle=True, num_workers=4, pin_memory=True) model = ClassificationModel(len(cfg.char2idx)) # model = load_pretrained_bert(model, cfg.pretrained_model_path, keep_tokens=cfg.keep_tokens).to(cfg.device) model = load_custom_model(model, cfg.save_model_path).to(cfg.device) loss_function = nn.CrossEntropyLoss().to(cfg.device) optimizer = torch.optim.Adam(model.parameters(), lr=cfg.learn_rate) # 迭代训练 iteration, train_loss = 0, 0 model.train() for inputs, mask, targets in tqdm(train_dataloader, position=0, leave=True): inputs, mask, targets = inputs.to(cfg.device), mask.to( cfg.device), targets.to(cfg.device) prediction = model(inputs, mask) loss = loss_function(prediction, targets.reshape(-1)) optimizer.zero_grad() loss.backward() optimizer.step() train_loss += loss.item() iteration += 1 if iteration % cfg.print_loss_steps == 0: eval_loss = evaluate(model, tokenizer, loss_function) print('') print('train_loss:{}'.format(train_loss / cfg.print_loss_steps)) print('evalu_loss:{}'.format(eval_loss)) accuracy(model, tokenizer, cfg.valid_data_path) accuracy(model, tokenizer, cfg.test_data_path) model.train() train_loss = 0 if iteration % cfg.save_model_steps == 0: torch.save(model.state_dict(), cfg.save_model_path)
epoch = 0 iter = 0 while True: try: # Fetch the dataset (tf.Tensor -> numpy array) _img, _label = sess.run([img, labels]) # print(_img.shape) # print(_img[0].shape) # print(_label) # import cv2 # cv2.imshow("img", _img[0]) # cv2.waitKey(0) # cv2.destroyAllWindows() # exit() # Feed numpy array (data) To model's placeholder cost, _ = model.train(_img, _label) iter = iter + 1 if(iter%100 == 0): acc = model.get_accuracy(_img, _label) print('Iter:', '%02d' % (iter), 'cost =', '{:.9f}'.format(cost), 'acc =', acc) except tf.errors.OutOfRangeError: val_acc = 0.0 for i in range(train_dataset._val_array_len): _img_val, _label_val = sess.run([img_val, label_val]) val_acc = val_acc + model.get_accuracy(_img_val, _label_val) print('Validation set acc :', val_acc/train_dataset._val_array_len) print("Validation End") break