def evaluate(filename): """ Evaluate the models based on the timestamp provided. :param filename: Timestamp of the latest run set. :return: """ device = torch.device("cuda:0" if cuda.is_available() else "cpu") helper = Helper() testing_set, testing_loader = helper.get_data(mode="test", testing_batch_size=1) print("Starting evaluation") to_tensor = transforms.ToTensor() true_val = {} score = {} tpr = {} fpr = {} thresh = {} area = {} for set_n in range(1, 11): true_val[set_n] = [] score[set_n] = [] model = load_model(filename + str(set_n) + ".pt", device) model.eval() total_len = len(testing_loader[set_n]) with torch.no_grad(): for i, (list_1, list_2, labels) in enumerate(testing_loader[set_n]): if type(list_1).__name__ != 'list' or type( list_2).__name__ != 'list': print("Issues with testing file at location {0}".format(i)) print(list_1) print(list_2) continue l1_avg = np.zeros([1, model.features]) l1 = 0 l2_avg = np.zeros([1, model.features]) l2 = 0 for im in list_1: try: image = Image.open(im[0]) tensor_img = to_tensor(image).to(device) output = model(tensor_img.unsqueeze(0)) l1_avg += output.cpu().numpy() l1 += 1 except FileNotFoundError: print("File {0} not found. Skipping.".format(im)) l1_avg /= l1 for im in list_2: try: image = Image.open(im[0]) tensor_img = to_tensor(image).to(device) output = model(tensor_img.unsqueeze(0)) l2_avg += output.cpu().numpy() l2 += 1 except FileNotFoundError: print("File {0} not found. Skipping.".format(im)) l2_avg /= l2 s = cosine_similarity(l1_avg.reshape(1, -1), l2_avg.reshape(1, -1))[0, 0] score[set_n].append(s) true_val[set_n].append(labels.item()) if (i + 1) % 500 == 0: print("Step: {0}/{1}".format(i, total_len)) # print(score[1][i], true_val[1][i]) # Code to evaluate ROC graph is taken from the official documentation. # https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html#sphx-glr-auto-examples-model-selection-plot-roc-py fpr[set_n], tpr[set_n], thresh[set_n] = roc_curve( np.asarray(true_val[set_n]), np.asarray(score[set_n])) area[set_n] = auc(fpr[set_n], tpr[set_n]) plt.figure() plt.plot(fpr[set_n], tpr[set_n], color='darkorange', lw=2, label="ROC curve (area = {0:.2f})".format(area[set_n])) plt.xlim([0.0, 1.05]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver Operating Characteristic for Split {0}'.format( set_n)) plt.legend(loc="lower right") plt.savefig( fname="images/{1}ROC{0}.jpg".format(set_n, filename[:-1])) color_list = [ "aqua", "chocolate", "brown", "navy", "lime", "olive", "silver", "gold", "pink", "magenta" ] plt.figure() print("Thresholds aquired:") for set_n in range(1, 11): print("Split {0}".format(set_n), thresh[set_n]) plt.plot(fpr[set_n], tpr[set_n], color=color_list[set_n - 1], lw=1, label="Split {0}".format(set_n)) plt.xlim([0.0, 1.05]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver Operating Characteristic (Consolidated)}') plt.legend(loc="lower right") plt.savefig(fname="images/{0}ROC_ALL.jpg".format(filename[:-1])) pd.DataFrame(true_val).to_csv( path_or_buf="results/{0}GT.csv".format(filename[:-1])) pd.DataFrame(score).to_csv( path_or_buf="results/{0}Score.csv".format(filename[:-1])) pd.DataFrame(tpr).to_csv( path_or_buf="results/{0}TPR.csv".format(filename[:-1])) pd.DataFrame(fpr).to_csv( path_or_buf="results/{0}FPR.csv".format(filename[:-1])) pd.DataFrame(thresh).to_csv( path_or_buf="results/{0}TH.csv".format(filename[:-1])) pd.DataFrame(area).to_csv( path_or_buf="results/{0}Area.csv".format(filename[:-1])) print("\n\nDone")
def main(batch_size, num_epochs, lr, file_write, flag_dummy, temperature, lr_decay, features): """ Main function for training. :param batch_size : Batch size to use. :param num_epochs : Number of epochs for each split. :param lr : Learning rate to be set at the start of each split. :param file_write : Write output to stdout(default) or a file. :param flag_dummy : Create a dummy file for evaluation. :param temperature : Set default temperature for softmax/log_softmax layer while training. :param lr_decay : Learning rate decay for every drop in min loss observed. :param features : Number of nodes for penultimate feature layer. :return: """ # ''' ---------------------Parameters---------------------''' device = torch.device("cuda:0" if cuda.is_available() else "cpu") # optim_name = 'SGD' # optim_name = 'RMS' optim_name = 'Adam' batch_print = 50 op_dir = "pickles/" t_stmp = time.strftime("%Y%m%d_%H%M%S", time.gmtime()) # Creating a file to store the name of the latest models ff = open("latest_t_stmp.txt", 'w') ff.write("A2_T{0}_S".format(t_stmp)) ff.close() helper = Helper("log/log_" + t_stmp + ".txt") helper.write_file(file_write) helper.log(msg="Starting data loading.") training_set, training_loader = helper.get_data( mode="train", training_batch_size=batch_size) helper.log(msg="Finished data loading. Starting main training.") for set_n in range(1, 11): init_lr = lr model, criterion, optimizer = mod.get_model(device, optim_name, lamb=0, learning_rate=init_lr, final_features=features) model.train(True) model.set_temperature(temperature) if flag_dummy: helper.log(msg="\nCreating dummy file.\n") dummy_file = { "model": model.state_dict(), "criterion": criterion.state_dict(), "optimizer": optimizer.state_dict(), "optim_name": optim_name, "features": features } torch.save(dummy_file, op_dir + "dummy.pt") flag_dummy = False helper.log(msg="\nStart of split {0}\n".format(set_n)) total_len = len(training_loader[set_n]) running_loss = 0.0 cor = 0 tot = 0 cor_b = 0 tot_b = 0 past_loss = 6.0 * batch_print for epoch in range(num_epochs): for i, (images, labels) in enumerate(training_loader[set_n]): # Change variable type to match GPU requirements inp = images.to(device) lab = labels.to(device) # Reset gradients before processing optimizer.zero_grad() # Get model output out = model(inp) # Calculate loss loss = criterion(out, lab) # Accuracy calc _, predicted = torch.max(out.data, 1) tot_b += batch_size cor_b += (predicted == lab).sum().item() tot += batch_size cor += (predicted == lab).sum().item() # Update weights loss.backward() optimizer.step() running_loss += loss.item() # logger.log(msg="\rLoss = {0} ".format(l), end="") if (i + 1) % batch_print == 0: helper.log( msg="Split: {3}, Epoch: {0}, step: {1}/{2} ".format( epoch + 1, i + 1, total_len, set_n), end="\t") helper.log( msg="Running Loss (avg): {0:.06f}, Past: {1:.06f}". format((running_loss / batch_print), (past_loss / batch_print)), end="\t") helper.log( msg="Accuracy: (Per {2})|(Total): {0:.03f}|{1:.03f} %". format((cor_b * 100) / tot_b, (cor * 100) / tot, batch_size * batch_print), end="\t") if running_loss < past_loss: past_loss = running_loss init_lr *= lr_decay for params in optimizer.param_groups: params['lr'] = max(init_lr, 0.001) helper.log(msg="LR: {0:.06f}".format(init_lr)) running_loss = 0.0 cor_b = 0 tot_b = 0 filename = op_dir + "A2_T{1}_S{0}.pt".format(set_n, t_stmp) # Idea for named saved file was picked up from here: # https://github.com/quiltdata/pytorch-examples/blob/master/imagenet/main.py save_file = { "model": model.state_dict(), "criterion": criterion.state_dict(), "optimizer": optimizer.state_dict(), "optim_name": optim_name, "features": features } torch.save(save_file, filename) helper.log( msg="\nFile {0} saved for split {1}".format(filename, set_n)) helper.close()