def main(): # Load net cnn = CNN() loss_func = nn.MultiLabelSoftMarginLoss() optimizer = optim.Adam(cnn.parameters(), lr=learning_rate) if torch.cuda.is_available(): cnn.cuda() loss_func.cuda() # Load data train_dataloader = dataset.get_train_data_loader() test_dataloader = dataset.get_test_data_loader() # Train model for epoch in range(num_epochs): cnn.train() for i, (images, labels) in enumerate(train_dataloader): images = Variable(images) labels = Variable(labels.long()) if torch.cuda.is_available(): images = images.cuda() labels = labels.cuda() predict_labels = cnn(images) loss = loss_func(predict_labels, labels) optimizer.zero_grad() loss.backward() optimizer.step() if (i + 1) % 100 == 0: print("epoch:", epoch, "step:", i, "loss:", loss.item()) # Save and test model if (epoch + 1) % 10 == 0: filename = "model" + str(epoch + 1) + ".pkl" torch.save(cnn.state_dict(), filename) cnn.eval() correct = 0 total = 0 for (image, label) in test_dataloader: vimage = Variable(image) if torch.cuda.is_available(): vimage = vimage.cuda() output = cnn(vimage) predict_label = "" for k in range(4): predict_label += config.CHAR_SET[np.argmax( output[0, k * config.CHAR_SET_LEN:(k + 1) * config.CHAR_SET_LEN].data.cpu().numpy())] true_label = one_hot.vec2text(label.numpy()[0]) total += label.size(0) if predict_label == true_label: correct += 1 if total % 200 == 0: print( 'Test Accuracy of the model on the %d test images: %f %%' % (total, 100 * correct / total)) print('Test Accuracy of the model on the %d test images: %f %%' % (total, 100 * correct / total)) print("save and test model...") torch.save(cnn.state_dict(), "./model.pkl") # current is model.pkl print("save last model")
def main(): args = parse_args() twitter_csv_path = args.tweet_csv_file device_type = args.device use_bert = False shuffle = False train_data, dev_data, test_data = load_twitter_data(twitter_csv_path, test_split_percent=0.1, val_split_percent=0.2, overfit=True, shuffle=shuffle, use_bert=use_bert, overfit_val=12639) vocab_size = train_data.vocab_size print(vocab_size) print(train_data.length) print(dev_data.length) print(test_data.length) cnn_net = CNN(vocab_size, DIM_EMB=300, NUM_CLASSES = 2) if device_type == "gpu" and torch.cuda.is_available(): device = torch.device('cuda:0') cnn_net = cnn_net.cuda() epoch_losses, eval_accuracy = train_network(cnn_net, train_data.Xwordlist, (train_data.labels + 1.0)/2.0, 10, dev_data, lr=0.003, batchSize=150, use_gpu=True, device=device) cnn_net.eval() print("Test Set") test_accuracy = eval_network(test_data, cnn_net, use_gpu=True, device=device) else: device = torch.device('cpu') epoch_losses, eval_accuracy = train_network(cnn_net, train_data.Xwordlist, (train_data.labels + 1.0)/2.0, 10, dev_data, lr=0.003, batchSize=150, use_gpu=False, device=device) cnn_net.eval() print("Test Set") test_accuracy = eval_network(test_data, cnn_net, use_gpu=False, batch_size=batchSize, device=device) # plot_accuracy((min_accs, eval_accuracy, max_accs), "Sentiment CNN lr=0.001", train_data.length) plot_accuracy(eval_accuracy, "Sentiment CNN lr=0.003", train_data.length) plot_losses(epoch_losses, "Sentiment CNN lr=0.003", train_data.length) torch.save(cnn_net.state_dict(), "saved_models\\cnn.pth") np.save("cnn_train_loss_" + str(train_data.length) + ".npy", np.array(epoch_losses)) np.save("cnn_validation_accuracy_" + str(train_data.length) + ".npy", np.array(eval_accuracy))
def train(model_name='model.pkl'): cnn = CNN() cnn.train() print('init net') criterion = nn.MultiLabelSoftMarginLoss() optimizer = torch.optim.Adam(cnn.parameters(), lr=setting.TRAIN_LEARNING_RATE) # Train the Model train_dataloader = dataset.get_train_data_loader() for epoch in range(setting.TRAIN_NUM_EPOCHS): for i, (images, labels) in enumerate(train_dataloader): images = Variable(images) labels = Variable(labels.float()) predict_labels = cnn(images) loss = criterion(predict_labels, labels) optimizer.zero_grad() loss.backward() optimizer.step() print('epoch: % -3s loss: %s' % (epoch, loss.item())) torch.save(cnn.state_dict(), model_name) # current is model.pkl print('save last model')
def train_model(embedding_size, hidden_size, filter_width, max_or_mean, max_num_epochs, batch_size, learning_rate, loss_margin, training_checkpoint, dropout_prob, eval_batch_size): global load_model_path, train_data, source_questions global dev_data, dev_label_dict, test_data, test_label_dict global dev_pos_data, dev_neg_data, test_pos_data, test_neg_data, target_questions # Generate model cnn = CNN(embedding_size, hidden_size, filter_width, max_or_mean, dropout_prob) optimizer = optim.Adam(cnn.parameters(), lr=learning_rate) criterion = nn.MultiMarginLoss(margin=loss_margin) init_epoch = 1 # Load model if load_model_path is not None: print("Loading model from \"" + load_model_path + "\"...") init_epoch = load_model(load_model_path, cnn, optimizer) # Training print("***************************************") print("Starting run with following parameters:") print(" --embedding size: %d" % (cnn.input_size)) print(" --hidden size: %d" % (cnn.hidden_size)) print(" --filter width: %d" % (cnn.n)) print(" --dropout: %f" % (cnn.dropout_prob)) print(" --pooling: %s" % (cnn.max_or_mean)) print(" --initial epoch: %d" % (init_epoch)) print(" --number of epochs: %d" % (max_num_epochs)) print(" --batch size: %d" % (batch_size)) print(" --learning rate: %f" % (learning_rate)) print(" --loss margin: %f" % (loss_margin)) start = time.time() current_loss = 0 for iter in range(init_epoch, max_num_epochs + 1): current_loss += train(cnn, criterion, optimizer, train_data, source_questions, batch_size, 21) if iter % training_checkpoint == 0: print("Epoch %d: Average Train Loss: %.5f, Time: %s" % (iter, (current_loss / training_checkpoint), timeSince(start))) d_auc = evaluate_auc(cnn, dev_pos_data, dev_neg_data, target_questions, eval_batch_size) t_auc = evaluate_auc(cnn, test_pos_data, test_neg_data, target_questions, eval_batch_size) print("Dev AUC(0.05): %.2f" % (d_auc)) print("Test AUC(0.05): %.2f" % (t_auc)) current_loss = 0 if SAVE_MODEL: state = {} state["model"] = cnn.state_dict() state["optimizer"] = optimizer.state_dict() state["epoch"] = iter save_model(save_model_path, "cnn_dt", state, iter == max_num_epochs) # Compute final results print("-------") print("FINAL RESULTS:") d_auc = evaluate_auc(cnn, dev_pos_data, dev_neg_data, target_questions, eval_batch_size) t_auc = evaluate_auc(cnn, test_pos_data, test_neg_data, target_questions, eval_batch_size) print("Training time: %s" % (timeSince(start))) print("Dev AUC(0.05): %.2f" % (d_auc)) print("Test AUC(0.05): %.2f" % (t_auc)) if SAVE_MODEL: state = {} state["model"] = cnn.state_dict() state["optimizer"] = optimizer.state_dict() state[ "epoch"] = max_num_epochs if init_epoch < max_num_epochs else init_epoch save_model(save_model_path, "cnn", state, True) return (d_auc, t_auc)
def main(): args = parse_args() # twitter_csv_path = args.tweet_csv_file labeled_twitter_csv_path = args.labeled_tweet_csv_file unlabeled_twitter_csv_path = args.unlabeled_tweet_csv_file device_type = args.device acquistion_function_type = args.acquisition_func human_label = args.human_label use_model_acq = True #flag for using model to generate inputs for acquisition funciton if acquistion_function_type == "least_confidence": acquisition_func = least_confidence elif acquistion_function_type == "random": acquisition_func = random_score elif acquistion_function_type == "entropy": acquisition_func = entropy_score elif acquistion_function_type == "tweet_count": acquisition_func = tweet_count_norm use_model_acq = False else: acquisition_func = least_confidence seed_data_size = args.seed_data_size use_bert = False shuffle = False train_data, dev_data, test_data = load_twitter_data( labeled_twitter_csv_path, test_split_percent=0.1, val_split_percent=0.2, shuffle=shuffle, overfit=True, use_bert=use_bert, overfit_val=40000) unlabeled_tweets, ground_truth_labels = load_unlabeled_tweet_csv( unlabeled_twitter_csv_path, num_tweets=45000) #convert "unlabeled" tweets to token ids X_unlabeled = train_data.convert_text_to_ids(unlabeled_tweets) # ground_truth_labels = ground_truth_labels[0:70000] ground_truth_labels = (ground_truth_labels + 1.0) / 2.0 X_seed = train_data.Xwordlist[0:seed_data_size] Y_seed = train_data.labels[0:seed_data_size] Y_seed = (Y_seed + 1.0) / 2.0 print(train_data.vocab_size) print(len(X_seed)) print(dev_data.length) print(test_data.length) num_samples = args.sample_size cnn_net = CNN(train_data.vocab_size, DIM_EMB=300, NUM_CLASSES=2) if device_type == "gpu" and torch.cuda.is_available(): device = torch.device('cuda:0') cnn_net = cnn_net.cuda() epoch_losses, eval_accuracy, hand_labeled_data = train_active_learning( cnn_net, train_data, X_seed, Y_seed, X_unlabeled, ground_truth_labels, dev_data, use_model=use_model_acq, num_epochs=8, human_label=human_label, acquisition_func=acquisition_func, lr=0.0035, batchSize=150, num_samples=num_samples, use_gpu=True, device=device) cnn_net.eval() print("Test Set") test_accuracy = eval_network(test_data, cnn_net, use_gpu=True, device=device) else: device = torch.device('cpu') # cnn_net = cnn_net.cuda() epoch_losses, eval_accuracy, hand_labeled_data = train_active_learning( cnn_net, train_data, X_seed, Y_seed, X_unlabeled, ground_truth_labels, dev_data, use_model=use_model_acq, num_epochs=8, human_label=human_label, acquisition_func=acquisition_func, lr=0.0035, batchSize=150, num_samples=num_samples, use_gpu=False, device=device) cnn_net.eval() print("Test Set") test_accuracy = eval_network(test_data, cnn_net, use_gpu=False, device=device) # plot_accuracy((min_accs, eval_accuracy, max_accs), "Sentiment CNN lr=0.001", train_data.length) plot_accuracy( eval_accuracy, "Sentiment CNN (Active Learning) lr=0.0035 " + acquistion_function_type, seed_data_size) # plot_losses(epoch_losses, "Sentiment CNN (Active Learning) lr=0.0030" + acquistion_function_type, train_data.length) torch.save(cnn_net.state_dict(), "saved_models\\cnn_active_learn.pth") # np.save("cnn_active_learning_train_loss" + acquistion_function_type + "_" + str(seed_data_size) + ".npy", np.array(epoch_losses)) np.save( "human_labelling_results/cnn_active_learning_validation_accuracy_" + acquistion_function_type + "_" + str(seed_data_size) + "_" + str(num_samples) + ".npy", np.array(eval_accuracy)) human_labels = [] ground_truth_labels = [] tweets = [] save_labels = True if save_labels: for tweet, label, ground_truth_label in hand_labeled_data: # tweet, score = sample tweet = train_data.convert_to_words(tweet) tweets.append(tweet) human_labels.append(label) ground_truth_labels.append(ground_truth_label) new_labeled_tweets = pd.DataFrame({ 'label': human_labels, 'ground truth': ground_truth_labels, 'text': tweets }) new_labeled_tweets.to_csv("human_labeled_tweets_lc_rk.csv", header=True, index=False)
def main(): #parameters # sampling_functions = ['random_score', 'entropy_score', 'least_confidence'] sampling_functions = ['tweet_count'] sampling_sizes = [5000, 10000, 15000, 20000] num_active_samples = [10, 25, 50] # sampling_functions = ['least_confidence'] # num_active_samples = [25, 50] # sampling_sizes = [20000] args = parse_args() # twitter_csv_path = args.tweet_csv_file labeled_twitter_csv_path = args.labeled_tweet_csv_file unlabeled_twitter_csv_path = args.unlabeled_tweet_csv_file save_models = args.save_models use_bert = False shuffle = False train_data, dev_data, test_data = load_twitter_data(labeled_twitter_csv_path, test_split_percent=0.1, val_split_percent=0.2, shuffle=shuffle, overfit=True, use_bert=use_bert, overfit_val=40000) unlabeled_tweets, ground_truth_labels = load_unlabeled_tweet_csv(unlabeled_twitter_csv_path, num_tweets=45000) X_unlabeled = train_data.convert_text_to_ids(unlabeled_tweets) ground_truth_labels = ground_truth_labels ground_truth_labels = (ground_truth_labels + 1.0)/2.0 test_accuracies = {} print("Running ablation experiment on sampling functions and seed sizes") use_model=True for af in sampling_functions: if af == 'random_score': acquisition_func = random_score elif af == 'entropy_score': acquisition_func = entropy_score elif af == 'least_confidence': acquisition_func = least_confidence elif af == 'tweet_count': acquisition_func = tweet_count_norm use_model=False for seed_data_size in sampling_sizes: for sample_size in num_active_samples: param_combo = "Acquisition_Func: " + af + " Seed Size: " + str(seed_data_size) + " Sample Size: " + str(sample_size) print(param_combo + "\n") X_seed = train_data.Xwordlist[0:seed_data_size] Y_seed = train_data.labels[0:seed_data_size] Y_seed = (Y_seed + 1.0)/2.0 cnn_net = CNN(train_data.vocab_size, DIM_EMB=300, NUM_CLASSES = 2) device = torch.device('cuda:0') cnn_net = cnn_net.cuda() print("Train active learning") epoch_losses, eval_accuracy, hand_labeled_data = train_active_learning(cnn_net, train_data, X_seed, Y_seed, copy.deepcopy(X_unlabeled), np.copy(ground_truth_labels), dev_data, num_epochs=8, use_model=use_model, acquisition_func=acquisition_func, lr=0.0035, batchSize=150, num_samples=sample_size, use_gpu=True, device=device) print("Finished Training") cnn_net.eval() print("Test Set") test_accuracy = eval_network(test_data, cnn_net, use_gpu=True, device=device) model_save_path = "model_weights/cnn_active_learn_weights_"+ af + "_" + str(seed_data_size) + "_" + str(sample_size) + ".pth" if save_models: torch.save(cnn_net.state_dict(), model_save_path) param_combo = "CNN Active Learning: " + " Acquisition_Func: " + af + " Seed Size: " + str(seed_data_size) + " Sample Size: " + str(sample_size) test_accuracies[param_combo] = test_accuracy filename = "results_ablation/cnn_active_learning_val_accuracy_" + af + "_" + str(seed_data_size) + "_" + str(sample_size) + ".npy" np.save(filename, np.array(eval_accuracy)) print("Finished experiments") with open("ablation_test_accuracies1.txt", "w") as f: for key in test_accuracies.keys(): accuracy = test_accuracies[key] line = key + " Acc: " + str(accuracy) + "\n" f.write(line)