コード例 #1
0
def create_rand(target_term, split_size, parameter_ranges, nbrTrials,
                nbrEpochs, GNN, MD, MBTR):
    with open("./hyper/dataset.pic", 'rb') as f:
        dataset = pickle.load(f)
    for trial in range(nbrTrials):
        results = pd.DataFrame()
        results = results.astype('object')
        #print("trial =", trial)
        for p in range(len(parameter_ranges)):
            #print("P = ", p)
            p1 = parameter_ranges[p][0]
            p2 = parameter_ranges[p][1]
            #print(p1,p2)
            # Ignore the learning rate and randomize the others
            if 0 == p:
                results.loc[trial, p] = random.uniform(p1, p2)
            else:
                if p1 == p2:
                    randValue = p1
                else:
                    randValue = random.randrange(p1, p2 + 1)
                #print(randValue)
                results.loc[trial, p] = int(randValue)
                results[p] = results[p].astype(int)
                #print(results)

        #print("trial =", trial)
        my_list = results.loc[trial:trial,
                              0:(len(parameter_ranges) - 1)].values.tolist()
        my_list = my_list[0]
        lr = my_list[0]
        my_list = [int(x) for x in my_list]
        my_list[0] = lr
        train_acc, val_acc = gnn.fit_GNN(1, 0, target_term, dataset,
                                         split_size, nbrEpochs, *my_list, GNN,
                                         MD, MBTR)
        results.at[trial, len(parameter_ranges)] = round(val_acc, 5)
        print(results.loc[trial:trial, :].to_string(header=False))
        # Now store the results
        try:
            results_old = pd.read_csv(
                "../results/%s/hyper_random_results.csv" % target_term,
                header=None)
            #print("results_old")
            #print(results_old)
            all_results = pd.concat([results_old, results],
                                    ignore_index=True,
                                    axis=0)
            all_results.to_csv("../results/%s/hyper_random_results.csv" %
                               target_term,
                               index=False,
                               header=False)
            #subprocess.run(["mv", "./hyper_random/results_temp.csv", "./hyper_random/results.csv"])
        except:
            results.to_csv("../results/%s/hyper_random_results.csv" %
                           target_term,
                           index=False,
                           header=False)
コード例 #2
0
def run_hyper_parallel(i, j, p, target_term, split_size, nbrEpochs, nbrGrid,
                       param_best, GNN, MD, MBTR, show):
    #print(i, j, p, target_term, dataset, split_size, nbrEpochs, nbrGrid, param_best, MD, show)
    with open("./hyper/dataset.pic", 'rb') as f:
        dataset = pickle.load(f)
    trainLoss, valLoss = gnn.fit_GNN(1, 0, target_term, dataset, split_size,
                                     nbrEpochs, *param_best, GNN, MD, MBTR)
    #print("TrainLoss_trial =", trainLoss, "ValidationLoss_trial =", valLoss)

    print("param", i, "=", p, "Training loss =", round(trainLoss, 6),
          "Validation loss =", round(valLoss, 6))

    with open("./hyper/train_loss_%s.pic" % j, 'wb') as filehandle:
        pickle.dump(trainLoss, filehandle)
    with open("./hyper/val_loss_%s.pic" % j, 'wb') as filehandle:
        pickle.dump(valLoss, filehandle)
コード例 #3
0
def getLearningCurve(target_term, df_reduced, dataset, split_size, nbrEpochs, param_best):
    df_results = pd.DataFrame(columns = ["training_size", "r2_GNN", "r2_MD", "r2_MDGNN", "MAE_GNN", "MAE_MD", "MAE_MDGNN"])
    index = 0
    x = 1000
    while x<= len(dataset):
        end = x
        print("Training size =", x)       
        dataset_part = dataset[:end]
        df_results.loc[index,"training_size"] = x
        df_part = df_reduced[:end]
        # GET RESULTS FOR gnn AND gnn + md
        for MD in range(2):
            trainData, testData = gnn.fit_GNN(0, 0, target_term, dataset_part, split_size, nbrEpochs, *param_best, MD)            
            #gnn.plot_results(trainData, testData, target_term, show = 1)
            r2 = r2_score(testData["Target"].to_numpy(), testData["Preds"].to_numpy())
            MAE = mean_absolute_error(testData["Target"].to_numpy(), testData["Preds"].to_numpy())  
            if (0 == MD):
                print("GNN only: r2 =", r2, "MAE =", MAE)
                df_results.loc[index,"r2_GNN"] = r2
                df_results.loc[index,"MAE_GNN"] = MAE
                trainData.to_csv("../results/%s/learning_size=%s_train_CNN=1_MD=0.csv" % (target_term, x))
                testData.to_csv("../results/%s/learning_size=%s_test_CNN=1_MD=0.csv" % (target_term, x))
            else:
                print("MDGNN: r2 =", r2, "MAE =", MAE)
                df_results.loc[index,"r2_MDGNN"] = r2
                df_results.loc[index,"MAE_MDGNN"] = MAE
                trainData.to_csv("../results/%s/learning_size=%s_train_CNN=1_MD=1.csv" % (target_term, x))
                testData.to_csv("../results/%s/learning_size=%s_test_CNN=1_MD=1.csv" % (target_term, x))
        # GET RESULTS FOR MD ONLY
        trainData, testData, feat_importances = molecularDescriptorsOnly(df_part, split_size, target_term, 0)
        r2 = r2_score(testData["Target"].to_numpy(), testData["Preds"].to_numpy())
        MAE = mean_absolute_error(testData["Target"].to_numpy(), testData["Preds"].to_numpy())
        print("MD only: r2 =", r2, "MAE =", MAE)
        print("\n")
        df_results.loc[index,"r2_MD"] = r2
        df_results.loc[index,"MAE_MD"] = MAE
        trainData.to_csv("../results/%s/learning_size=%s_train_CNN=0_MD=1.csv" % (target_term, x))
        testData.to_csv("../results/%s/learning_size=%s_test_CNN=0_MD=1.csv" % (target_term, x))
        x *= 2
        index = index + 1
    return(df_results)
コード例 #4
0
def create_rand(parameter_ranges,nbrTrials, run, nbrEpochs):
    results = pd.DataFrame()
    for trial in range(nbrTrials):
        #print("trial =", trial)
        list = []
        for p in range(len(parameter_ranges)):
            #print("P = ", p)
            p1 = parameter_ranges[p][0]
            p2 = parameter_ranges[p][1]
            #print(p1,p2)
            randValue = random.randrange(p1, p2)
            #print(randValue)
            results.loc[trial, p] = int(randValue) 
        if (1 == run):
            print("trial =", trial)
            list = results.loc[trial:trial, 0:(len(parameter_ranges)-1)].values.tolist()
            #print("list =", list)
            array1 = np.asarray(list).flatten().astype(int)        
            array2 = np.asarray([dataset,0.75, nbrEpochs])
            full_array = np.concatenate((array2, array1)).flatten()
            #print(full_array)
            val_acc = gnn.fit_GNN(0,*full_array) 
            results.at[trial, len(parameter_ranges)] = val_acc
    return(results)
コード例 #5
0
ファイル: 3-run.py プロジェクト: obaidur-rahaman/GNN_MBTR_MD
        subprocess.run(["mkdir", "hyper"])
        with open("./hyper/dataset.pic", 'wb') as filehandle:
            pickle.dump(dataset, filehandle, protocol=4)
        # RANDOM SEARCH
        #hyper_batch_size, target_term, dataset1, split_size, parameter_ranges, nbrTrials, nbrEpochs, MD
        param_best, param_best_5 = gnn.fit_hyperParameters_random(
            1, target_term, 0.95, param_range, 10, 15, GNN, MD, MBTR)
    with open("../results/all_hyperparameters.txt", "a") as file_object:
        file_object.write("%s = %s   (GNN = %s  MD = %s  MBTR = %s)\n" %
                          (target_term, param_best, GNN, MD, MBTR))
    ######################### FINAL OPTIMIZATION
    print("########## ", target_term, " GNN =", GNN, "MD = ", MD, "MBTR =",
          MBTR, "#############")
    print("Molecular Descriptor used =", MD)
    # getloss, verbose, target_term, dataset, split_size, num_epochs, lr, batch_size,  p1, p2, numLayer, numFinalFeature, GNN, MD, MBTI
    trainData, testData = gnn.fit_GNN(0, 1, target_term, dataset, 0.95,
                                      num_epochs, *param_best, GNN, MD, MBTR)
    trainData.to_csv("../results/%s/train_CNN=%s_MD=%s_MBTR=%s.csv" %
                     (target_term, GNN, MD, MBTR))
    testData.to_csv("../results/%s/test_CNN=%s_MD=%s_MBTR=%s.csv" %
                    (target_term, GNN, MD, MBTR))
    trainData = pd.read_csv("../results/%s/train_CNN=%s_MD=%s_MBTR=%s.csv" %
                            (target_term, GNN, MD, MBTR))
    testData = pd.read_csv("../results/%s/test_CNN=%s_MD=%s_MBTR=%s.csv" %
                           (target_term, GNN, MD, MBTR))
    if (1 == show_plots):
        for i in range(num_epochs):
            gnn.plot_losses(target_term, GNN, MD, MBTR)
            time.sleep(30)
        gnn.plot_results(trainData, testData, target_term, show=show_plots)

    # Now store the final result