def create_rand(target_term, split_size, parameter_ranges, nbrTrials, nbrEpochs, GNN, MD, MBTR): with open("./hyper/dataset.pic", 'rb') as f: dataset = pickle.load(f) for trial in range(nbrTrials): results = pd.DataFrame() results = results.astype('object') #print("trial =", trial) for p in range(len(parameter_ranges)): #print("P = ", p) p1 = parameter_ranges[p][0] p2 = parameter_ranges[p][1] #print(p1,p2) # Ignore the learning rate and randomize the others if 0 == p: results.loc[trial, p] = random.uniform(p1, p2) else: if p1 == p2: randValue = p1 else: randValue = random.randrange(p1, p2 + 1) #print(randValue) results.loc[trial, p] = int(randValue) results[p] = results[p].astype(int) #print(results) #print("trial =", trial) my_list = results.loc[trial:trial, 0:(len(parameter_ranges) - 1)].values.tolist() my_list = my_list[0] lr = my_list[0] my_list = [int(x) for x in my_list] my_list[0] = lr train_acc, val_acc = gnn.fit_GNN(1, 0, target_term, dataset, split_size, nbrEpochs, *my_list, GNN, MD, MBTR) results.at[trial, len(parameter_ranges)] = round(val_acc, 5) print(results.loc[trial:trial, :].to_string(header=False)) # Now store the results try: results_old = pd.read_csv( "../results/%s/hyper_random_results.csv" % target_term, header=None) #print("results_old") #print(results_old) all_results = pd.concat([results_old, results], ignore_index=True, axis=0) all_results.to_csv("../results/%s/hyper_random_results.csv" % target_term, index=False, header=False) #subprocess.run(["mv", "./hyper_random/results_temp.csv", "./hyper_random/results.csv"]) except: results.to_csv("../results/%s/hyper_random_results.csv" % target_term, index=False, header=False)
def run_hyper_parallel(i, j, p, target_term, split_size, nbrEpochs, nbrGrid, param_best, GNN, MD, MBTR, show): #print(i, j, p, target_term, dataset, split_size, nbrEpochs, nbrGrid, param_best, MD, show) with open("./hyper/dataset.pic", 'rb') as f: dataset = pickle.load(f) trainLoss, valLoss = gnn.fit_GNN(1, 0, target_term, dataset, split_size, nbrEpochs, *param_best, GNN, MD, MBTR) #print("TrainLoss_trial =", trainLoss, "ValidationLoss_trial =", valLoss) print("param", i, "=", p, "Training loss =", round(trainLoss, 6), "Validation loss =", round(valLoss, 6)) with open("./hyper/train_loss_%s.pic" % j, 'wb') as filehandle: pickle.dump(trainLoss, filehandle) with open("./hyper/val_loss_%s.pic" % j, 'wb') as filehandle: pickle.dump(valLoss, filehandle)
def getLearningCurve(target_term, df_reduced, dataset, split_size, nbrEpochs, param_best): df_results = pd.DataFrame(columns = ["training_size", "r2_GNN", "r2_MD", "r2_MDGNN", "MAE_GNN", "MAE_MD", "MAE_MDGNN"]) index = 0 x = 1000 while x<= len(dataset): end = x print("Training size =", x) dataset_part = dataset[:end] df_results.loc[index,"training_size"] = x df_part = df_reduced[:end] # GET RESULTS FOR gnn AND gnn + md for MD in range(2): trainData, testData = gnn.fit_GNN(0, 0, target_term, dataset_part, split_size, nbrEpochs, *param_best, MD) #gnn.plot_results(trainData, testData, target_term, show = 1) r2 = r2_score(testData["Target"].to_numpy(), testData["Preds"].to_numpy()) MAE = mean_absolute_error(testData["Target"].to_numpy(), testData["Preds"].to_numpy()) if (0 == MD): print("GNN only: r2 =", r2, "MAE =", MAE) df_results.loc[index,"r2_GNN"] = r2 df_results.loc[index,"MAE_GNN"] = MAE trainData.to_csv("../results/%s/learning_size=%s_train_CNN=1_MD=0.csv" % (target_term, x)) testData.to_csv("../results/%s/learning_size=%s_test_CNN=1_MD=0.csv" % (target_term, x)) else: print("MDGNN: r2 =", r2, "MAE =", MAE) df_results.loc[index,"r2_MDGNN"] = r2 df_results.loc[index,"MAE_MDGNN"] = MAE trainData.to_csv("../results/%s/learning_size=%s_train_CNN=1_MD=1.csv" % (target_term, x)) testData.to_csv("../results/%s/learning_size=%s_test_CNN=1_MD=1.csv" % (target_term, x)) # GET RESULTS FOR MD ONLY trainData, testData, feat_importances = molecularDescriptorsOnly(df_part, split_size, target_term, 0) r2 = r2_score(testData["Target"].to_numpy(), testData["Preds"].to_numpy()) MAE = mean_absolute_error(testData["Target"].to_numpy(), testData["Preds"].to_numpy()) print("MD only: r2 =", r2, "MAE =", MAE) print("\n") df_results.loc[index,"r2_MD"] = r2 df_results.loc[index,"MAE_MD"] = MAE trainData.to_csv("../results/%s/learning_size=%s_train_CNN=0_MD=1.csv" % (target_term, x)) testData.to_csv("../results/%s/learning_size=%s_test_CNN=0_MD=1.csv" % (target_term, x)) x *= 2 index = index + 1 return(df_results)
def create_rand(parameter_ranges,nbrTrials, run, nbrEpochs): results = pd.DataFrame() for trial in range(nbrTrials): #print("trial =", trial) list = [] for p in range(len(parameter_ranges)): #print("P = ", p) p1 = parameter_ranges[p][0] p2 = parameter_ranges[p][1] #print(p1,p2) randValue = random.randrange(p1, p2) #print(randValue) results.loc[trial, p] = int(randValue) if (1 == run): print("trial =", trial) list = results.loc[trial:trial, 0:(len(parameter_ranges)-1)].values.tolist() #print("list =", list) array1 = np.asarray(list).flatten().astype(int) array2 = np.asarray([dataset,0.75, nbrEpochs]) full_array = np.concatenate((array2, array1)).flatten() #print(full_array) val_acc = gnn.fit_GNN(0,*full_array) results.at[trial, len(parameter_ranges)] = val_acc return(results)
subprocess.run(["mkdir", "hyper"]) with open("./hyper/dataset.pic", 'wb') as filehandle: pickle.dump(dataset, filehandle, protocol=4) # RANDOM SEARCH #hyper_batch_size, target_term, dataset1, split_size, parameter_ranges, nbrTrials, nbrEpochs, MD param_best, param_best_5 = gnn.fit_hyperParameters_random( 1, target_term, 0.95, param_range, 10, 15, GNN, MD, MBTR) with open("../results/all_hyperparameters.txt", "a") as file_object: file_object.write("%s = %s (GNN = %s MD = %s MBTR = %s)\n" % (target_term, param_best, GNN, MD, MBTR)) ######################### FINAL OPTIMIZATION print("########## ", target_term, " GNN =", GNN, "MD = ", MD, "MBTR =", MBTR, "#############") print("Molecular Descriptor used =", MD) # getloss, verbose, target_term, dataset, split_size, num_epochs, lr, batch_size, p1, p2, numLayer, numFinalFeature, GNN, MD, MBTI trainData, testData = gnn.fit_GNN(0, 1, target_term, dataset, 0.95, num_epochs, *param_best, GNN, MD, MBTR) trainData.to_csv("../results/%s/train_CNN=%s_MD=%s_MBTR=%s.csv" % (target_term, GNN, MD, MBTR)) testData.to_csv("../results/%s/test_CNN=%s_MD=%s_MBTR=%s.csv" % (target_term, GNN, MD, MBTR)) trainData = pd.read_csv("../results/%s/train_CNN=%s_MD=%s_MBTR=%s.csv" % (target_term, GNN, MD, MBTR)) testData = pd.read_csv("../results/%s/test_CNN=%s_MD=%s_MBTR=%s.csv" % (target_term, GNN, MD, MBTR)) if (1 == show_plots): for i in range(num_epochs): gnn.plot_losses(target_term, GNN, MD, MBTR) time.sleep(30) gnn.plot_results(trainData, testData, target_term, show=show_plots) # Now store the final result