def training(model, FLAGS, model_name, smi_total, prop_total): np.set_printoptions(threshold=sys.maxsize) print("Start Training XD") stuff = open("C:/Users/Zac Hung/Desktop/masterthesis/experiment/baysept9thamesfirst.txt", 'w') stuff1 = open("C:/Users/Zac Hung/Desktop/masterthesis/experiment/bayfirstmdames.txt", 'w') stuff2 = open("C:/Users/Zac Hung/Desktop/masterthesis/experiment/bayamesaleat.txt", 'w') stuff3 = open("C:/Users/Zac Hung/Desktop/masterthesis/experiment/bayamesepist.txt", 'w') #model.restore("C:/Users/Zac Hung/PycharmProjects/mythesis/uq_molecule/aug12/MC-Dropout_HIV.ckpt-31") num_epochs = FLAGS.epoch_size batch_size = FLAGS.batch_size init_lr = FLAGS.init_lr total_st = time.time() smi_train, smi_eval, smi_test = split_train_eval_test(smi_total, 0, 1, 0) prop_train, prop_eval, prop_test = split_train_eval_test(prop_total, 0, 1, 0) prop_eval = np.asarray(prop_eval) prop_test = np.asarray(prop_test) num_train = len(smi_train) num_eval = len(smi_eval) num_test = len(smi_test) smi_train = smi_train[:num_train] prop_train = prop_train[:num_train] num_batches_train = (num_train // batch_size) + 1 num_batches_eval = (num_eval // batch_size) + 1 num_batches_test = (num_test // batch_size) + 1 num_sampling = 20 total_iter = 0 print("Number of- training data:", num_train, "\t evaluation data:", num_eval, "\t test data:", num_test) for epoch in range(num_epochs): st = time.time() lr = init_lr * 0.5 ** (epoch // 10) model.assign_lr(lr) #smi_train, prop_train = shuffle_two_list(smi_train, prop_train) prop_train = np.asarray(prop_train) # TRAIN num = 0 train_loss = 0.0 Y_pred_total = np.array([]) Y_batch_total = np.array([]) for i in range(num_batches_train): num += 1 st_i = time.time() total_iter += 1 #tmp=smi_train[i * batch_size:(i + 1) * batch_size] A_batch, X_batch = convert_to_graph(smi_train[i * batch_size:(i + 1) * batch_size], FLAGS.max_atoms) Y_batch = prop_train[i * batch_size:(i + 1) * batch_size] #mtr = np.abs(model.get_feature(A_batch, X_batch, Y_batch)) # print(np.shape(mtr)) #print(len(tmp)) #count = -1 # for i in tmp: # count += 1 # iMol = Chem.MolFromSmiles(i.strip()) # # start= (np.argpartition((mtr[count]),-10)) # start=np.array((start[start<len(Chem.rdmolops.GetAdjacencyMatrix(iMol))])).tolist()[0:9] # #stuff.write(str(smi_test[count][start:end + 1]) + "\n") # #print(len(Chem.rdmolops.GetAdjacencyMatrix(iMol))) # print(start) # print(rdkit.Chem.rdmolfiles.MolFragmentToSmiles(iMol,start)) Y_mean, _, loss = model.train(A_batch, X_batch, Y_batch) train_loss += loss Y_pred = np_sigmoid(Y_mean.flatten()) Y_pred_total = np.concatenate((Y_pred_total, Y_pred), axis=0) Y_batch_total = np.concatenate((Y_batch_total, Y_batch), axis=0) et_i = time.time() train_loss /= num train_accuracy = accuracy_score(Y_batch_total, np.around(Y_pred_total).astype(int)) train_auroc = 0.0 try: train_auroc = roc_auc_score(Y_batch_total, Y_pred_total) except: train_auroc = 0.0 # Eval Y_pred_total = np.array([]) Y_batch_total = np.array([]) num = 0 eval_loss = 0.0 for i in range(num_batches_eval): evalbatch=smi_eval[i * batch_size:(i + 1) * batch_size] A_batch, X_batch = convert_to_graph(evalbatch, FLAGS.max_atoms) Y_batch = prop_eval[i * batch_size:(i + 1) * batch_size] # mtr_eval = np.abs(model.get_feature(A_batch, X_batch, Y_batch)) # print(np.shape(mtr_eval)) # count=-1 # print(len(evalbatch)) # for i in evalbatch: # count += 1 # iMol = Chem.MolFromSmiles(i.strip()) # # #start= (np.argpartition((mtr_eval[count]),-10)) # start=mtr_eval[count] # start=start[start>0.1] # start=np.array((start[start<len(Chem.rdmolops.GetAdjacencyMatrix(iMol))])).tolist()[0:9] # #stuff.write(str(smi_test[count][start:end + 1]) + "\n") # #print(len(Chem.rdmolops.GetAdjacencyMatrix(iMol))) # print(start) # print(rdkit.Chem.rdmolfiles.MolFragmentToSmiles(iMol,start)) # MC-sampling P_mean = [] for n in range(1): num += 1 Y_mean, _, loss = model.test(A_batch, X_batch, Y_batch) eval_loss += loss P_mean.append(Y_mean.flatten()) P_mean = np_sigmoid(np.asarray(P_mean)) mean = np.mean(P_mean, axis=0) Y_batch_total = np.concatenate((Y_batch_total, Y_batch), axis=0) Y_pred_total = np.concatenate((Y_pred_total, mean), axis=0) eval_loss /= num eval_accuracy = accuracy_score(Y_batch_total, np.around(Y_pred_total).astype(int)) eval_auroc = 0.0 try: eval_auroc = roc_auc_score(Y_batch_total, Y_pred_total) except: eval_auroc = 0.0 # Save network! ckpt_path = 'save/' + model_name + '.ckpt' model.save(ckpt_path, epoch) et = time.time() # Print Results print("Time for", epoch, "-th epoch: ", et - st) print("Loss Train:", round(train_loss, 3), "\t Evaluation:", round(eval_loss, 3)) print("Accuracy Train:", round(train_accuracy, 3), "\t Evaluation:", round(eval_accuracy, 3)) print("AUROC Train:", round(train_auroc, 3), "\t Evaluation:", round(eval_auroc, 3)) total_et = time.time() print("Finish training! Total required time for training : ", (total_et - total_st)) #model.restore("C:/Users/Zac Hung/PycharmProjects/mythesis/uq_molecule/aug15thames/MC-Dropout_HIV.ckpt-12") #model.restore("C:/Users/Zac Hung/PycharmProjects/mythesis/uq_molecule/amesaug16th/MC-Dropout_HIV.ckpt-7") #model.restore("C:/Users/Zac Hung/PycharmProjects/mythesis/uq_molecule/aug16-256/MC-Dropout_HIV.ckpt-4") #model.restore("C:/Users/Zac Hung/PycharmProjects/mythesis/uq_molecule/aug16thsummax/MC-Dropout_HIV.ckpt-4") #model.restore("C:/Users/Zac Hung/PycharmProjects/mythesis/uq_molecule/aug17japan/MC-Dropout_HIV.ckpt-9") #model.restore("C:/Users/Zac Hung/PycharmProjects/mythesis/uq_molecule/japanaug18bay/MC-Dropout_HIV.ckpt-8") model.restore("./AmesBays/MC-Dropout_HIV.ckpt-11") #model.restore("C:/Users/Zac Hung/PycharmProjects/mythesis/uq_molecule/japanfullbayfirstmodSept7th/MC-Dropout_HIV.ckpt-4") # Test test_st = time.time() Y_pred_total = np.array([]) Y_batch_total = np.array([]) ale_unc_total = np.array([]) epi_unc_total = np.array([]) tot_unc_total = np.array([]) num = 0 test_loss = 0.0 count = -1 for i in range(num_batches_test): num += 1 testBatch=smi_test[i * batch_size:(i + 1) * batch_size] A_batch, X_batch = convert_to_graph(testBatch, FLAGS.max_atoms) Y_batch = prop_test[i * batch_size:(i + 1) * batch_size] mtr_test = np_sigmoid(model.get_feature(A_batch, X_batch, Y_batch)) #print(np.shape(mtr_test)) count = -1 #print(len(testBatch)) for j in testBatch: count += 1 iMol = Chem.MolFromSmiles(j.strip()) adj_len=len(Chem.rdmolops.GetAdjacencyMatrix(iMol)) #start= (np.argpartition((mtr_test[count]),-10)) start=maxSum(mtr_test[count],adj_len,10) #start = mtr_test[count] #print("adj_len",adj_len) #start = (np.squeeze(np.argwhere(start > 1))) #print("this is start",start) #print("adj len",adj_len) #print(j) #print(start) #start = np.array((start[start < adj_len])).tolist()[0:9] # stuff.write(str(smi_test[count][start:end + 1]) + "\n") # print(len(Chem.rdmolops.GetAdjacencyMatrix(iMol))) #print(start) #print(rdkit.Chem.rdmolfiles.MolFragmentToSmiles(iMol, start)) #bondNum=Chem.rdchem.Mol.GetNumBonds(iMol) #tmp=rdkit.Chem.rdmolfiles.MolFragmentToSmarts(iMol, atomsToUse=start,bondsToUse=list(range(1,bondNum)),isomericSmarts=False) #print(tmp) #tmp4=(rdkit.Chem.rdmolfiles.MolFragmentToSmarts(iMol, atomsToUse=start)) #print(tmp4) stuff.write(processUnit(iMol,start,i,batch_size,count,mtr_test[count],adj_len,10)+ "\n") #stuff.write(tmp4+ "\n") #uncomment this for the drawing. #fig = Draw.MolToFile(iMol, "./amesfirstmodImg3/"+str(i*batch_size+count)+'.png', size=size, highlightAtoms=start) # MC-sampling P_mean = [] for n in range(5): Y_mean, _, loss = model.test(A_batch, X_batch, Y_batch) P_mean.append(Y_mean.flatten()) # mtr = np.abs(model.get_feature(A_batch, X_batch, Y_batch)) # print(np.shape(mtr)) # for j in range(len(Y_batch)): # count += 1 # # start, end = maxSum(mtr[j], 503, 15) # stuff.write(str(smi_test[count][start:end + 1]) + "\n") P_mean = np_sigmoid(np.asarray(P_mean)) mean = np.mean(P_mean, axis=0) ale_unc = np.mean(P_mean * (1.0 - P_mean), axis=0) epi_unc = np.mean(P_mean ** 2, axis=0) - np.mean(P_mean, axis=0) ** 2 tot_unc = ale_unc + epi_unc Y_batch_total = np.concatenate((Y_batch_total, Y_batch), axis=0) Y_pred_total = np.concatenate((Y_pred_total, mean), axis=0) ale_unc_total = np.concatenate((ale_unc_total, ale_unc), axis=0) epi_unc_total = np.concatenate((epi_unc_total, epi_unc), axis=0) tot_unc_total = np.concatenate((tot_unc_total, tot_unc), axis=0) stuff1.write(str(np.around(Y_pred_total)) + "\n") stuff2.write(str(ale_unc_total) + "\n") stuff3.write(str(epi_unc_total) + "\n") #print("ale:",ale_unc_total) #print("epi",epi_unc_total) True_positive = 0 False_postive = 0 True_negative = 0 False_negative = 0 Exp = Y_batch_total Pred = np.around(Y_pred_total) for i in range(len(Exp)): if (Exp[i] == Pred[i] and Exp[i] == 1): True_positive += 1 if (Exp[i] != Pred[i] and Exp[i] == 0): False_postive += 1 if (Exp[i] == Pred[i] and Exp[i] == 0): True_negative += 1 if (Exp[i] != Pred[i] and Exp[i] == 1): False_negative += 1 count_TP = True_positive print("True Positive:", count_TP) count_FP = False_postive print("False Positive", count_FP) count_FN = False_negative print("False Negative:", count_FN) count_TN = True_negative print("True negative:", count_TN) Accuracy = (count_TP + count_TN) / (count_TP + count_FP + count_FN + count_TN) print("Accuracy:", Accuracy) import math MCC = (count_TP * count_TN - count_FP * count_FN) / math.sqrt(abs((count_TN + count_FP) * (count_TN + count_FN) * (count_TP + count_FP) * (count_TP + count_FN))) print("MCC", MCC) Specificity = count_TN / (count_TN + count_FP) print("Specificity:", Specificity) Precision = count_TP / (count_TP + count_FP) print("Precision:", Precision) # sensitivity Recall = count_TP / (count_TP + count_FN) print("Recall:", Recall) # F1 Fmeasure = (2 * count_TP) / (2 * count_TP + count_FP + count_FN) print("Fmeasure", Fmeasure) test_et = time.time() print("Finish Testing, Total time for test:", (test_et - test_st)) return
def training(model, FLAGS, model_name, smi_total, prop_total): print("Start Training XD") num_epochs = FLAGS.epoch_size batch_size = FLAGS.batch_size init_lr = FLAGS.init_lr total_st = time.time() smi_train, smi_eval, smi_test = split_train_eval_test( smi_total, 0.8, 0.2, 0.1) prop_train, prop_eval, prop_test = split_train_eval_test( prop_total, 0.8, 0.2, 0.1) prop_eval = np.asarray(prop_eval) prop_test = np.asarray(prop_test) num_train = len(smi_train) num_eval = len(smi_eval) num_test = len(smi_test) smi_train = smi_train[:num_train] prop_train = prop_train[:num_train] num_batches_train = (num_train // batch_size) + 1 num_batches_eval = (num_eval // batch_size) + 1 num_batches_test = (num_test // batch_size) + 1 num_sampling = 20 total_iter = 0 print("Number of- training data:", num_train, "\t evaluation data:", num_eval, "\t test data:", num_test) for epoch in range(num_epochs): st = time.time() lr = init_lr * 0.5**(epoch // 10) model.assign_lr(lr) smi_train, prop_train = shuffle_two_list(smi_train, prop_train) prop_train = np.asarray(prop_train) # TRAIN num = 0 train_loss = 0.0 Y_pred_total = np.array([]) Y_batch_total = np.array([]) for i in range(num_batches_train): num += 1 st_i = time.time() total_iter += 1 A_batch, X_batch = convert_to_graph( smi_train[i * batch_size:(i + 1) * batch_size], FLAGS.max_atoms) Y_batch = prop_train[i * batch_size:(i + 1) * batch_size] Y_noise = np.random.normal(0.0, FLAGS.noise, Y_batch.shape[0]) Y_batch = Y_batch + Y_noise Y_mean, Y_logvar, loss = model.train(A_batch, X_batch, Y_batch) train_loss += loss Y_pred = Y_mean.flatten() Y_pred_total = np.concatenate((Y_pred_total, Y_pred), axis=0) Y_batch_total = np.concatenate((Y_batch_total, Y_batch), axis=0) et_i = time.time() #print ("train_iter : ", total_iter, ", epoch : ", epoch, ", loss : ", loss, "\t Time:", (et_i-st_i)) train_loss /= num train_mae = np.mean(np.abs(Y_batch_total - Y_pred_total)) #Eval Y_pred_total = np.array([]) Y_batch_total = np.array([]) num = 0 eval_loss = 0.0 for i in range(num_batches_eval): A_batch, X_batch = convert_to_graph( smi_eval[i * batch_size:(i + 1) * batch_size], FLAGS.max_atoms) Y_batch = prop_eval[i * batch_size:(i + 1) * batch_size] # MC-sampling P_mean = [] P_logvar = [] for n in range(3): num += 1 Y_mean, Y_logvar, loss = model.test(A_batch, X_batch, Y_batch) eval_loss += loss P_mean.append(Y_mean.flatten()) P_mean = np.asarray(P_mean) mean = np.mean(P_mean, axis=0) Y_batch_total = np.concatenate((Y_batch_total, Y_batch), axis=0) Y_pred_total = np.concatenate((Y_pred_total, mean), axis=0) eval_loss /= num eval_mae = np.mean(np.abs(Y_batch_total - Y_pred_total)) # Save network! ckpt_path = 'save/' + model_name + '.ckpt' model.save(ckpt_path, epoch) et = time.time() # Print Results print("Time for", epoch, "-th epoch: ", et - st) print("Loss Train:", round(train_loss, 3), "\t Evaluation:", round(eval_loss, 3)) print("MAE Train:", round(train_mae, 3), "\t Evaluation:", round(eval_mae, 3)) total_et = time.time() print("Finish training! Total required time for training : ", (total_et - total_st)) #Test test_st = time.time() Y_pred_total = np.array([]) Y_batch_total = np.array([]) ale_unc_total = np.array([]) epi_unc_total = np.array([]) tot_unc_total = np.array([]) num = 0 test_loss = 0.0 for i in range(num_batches_test): num += 1 A_batch, X_batch = convert_to_graph( smi_test[i * batch_size:(i + 1) * batch_size], FLAGS.max_atoms) Y_batch = prop_test[i * batch_size:(i + 1) * batch_size] # MC-sampling P_mean = [] P_logvar = [] for n in range(num_sampling): Y_mean, Y_logvar, loss = model.test(A_batch, X_batch, Y_batch) P_mean.append(Y_mean.flatten()) P_logvar.append(Y_logvar.flatten()) P_mean = np.asarray(P_mean) P_logvar = np.exp(np.asarray(P_logvar)) mean = np.mean(P_mean, axis=0) ale_unc = np.mean(P_logvar, axis=0) epi_unc = np.var(P_mean, axis=0) tot_unc = ale_unc + epi_unc Y_batch_total = np.concatenate((Y_batch_total, Y_batch), axis=0) Y_pred_total = np.concatenate((Y_pred_total, mean), axis=0) ale_unc_total = np.concatenate((ale_unc_total, ale_unc), axis=0) epi_unc_total = np.concatenate((epi_unc_total, epi_unc), axis=0) tot_unc_total = np.concatenate((tot_unc_total, tot_unc), axis=0) np.save('./statistics/' + model_name + '_mc_truth.npy', Y_batch_total) np.save('./statistics/' + model_name + '_mc_pred.npy', Y_pred_total) np.save('./statistics/' + model_name + '_mc_epi_unc.npy', epi_unc_total) np.save('./statistics/' + model_name + '_mc_ale_unc.npy', ale_unc_total) np.save('./statistics/' + model_name + '_mc_tot_unc.npy', tot_unc_total) test_et = time.time() print("Finish Testing, Total time for test:", (test_et - test_st)) return
def training(model, FLAGS, model_name, smi_total, prop_total): print ("Start Training XD") num_epochs = FLAGS.epoch_size batch_size = FLAGS.batch_size init_lr = FLAGS.init_lr total_st = time.time() smi_train, smi_eval, smi_test = split_train_eval_test(smi_total, 0.8, 0.2, 0.1) prop_train, prop_eval, prop_test = split_train_eval_test(prop_total, 0.8, 0.2, 0.1) prop_eval = np.asarray(prop_eval) prop_test = np.asarray(prop_test) num_train = len(smi_train) num_eval = len(smi_eval) num_test = len(smi_test) smi_train = smi_train[:num_train] prop_train = prop_train[:num_train] num_batches_train = (num_train//batch_size) + 1 num_batches_eval = (num_eval//batch_size) + 1 num_batches_test = (num_test//batch_size) + 1 num_sampling = 20 total_iter = 0 print("Number of- training data:", num_train, "\t evaluation data:", num_eval, "\t test data:", num_test) for epoch in range(num_epochs): st = time.time() lr = init_lr * 0.5**(epoch//10) model.assign_lr(lr) smi_train, prop_train = shuffle_two_list(smi_train, prop_train) prop_train = np.asarray(prop_train) # TRAIN num = 0 train_loss = 0.0 Y_pred_total = np.array([]) Y_batch_total = np.array([]) for i in range(num_batches_train): num += 1 st_i = time.time() total_iter += 1 A_batch, X_batch = convert_to_graph(smi_train[i*batch_size:(i+1)*batch_size], FLAGS.max_atoms) Y_batch = prop_train[i*batch_size:(i+1)*batch_size] Y_mean, _, loss = model.train(A_batch, X_batch, Y_batch) train_loss += loss Y_pred = np_sigmoid(Y_mean.flatten()) Y_pred_total = np.concatenate((Y_pred_total, Y_pred), axis=0) Y_batch_total = np.concatenate((Y_batch_total, Y_batch), axis=0) et_i = time.time() train_loss /= num train_accuracy = accuracy_score(Y_batch_total, np.around(Y_pred_total).astype(int)) train_auroc = 0.0 try: train_auroc = roc_auc_score(Y_batch_total, Y_pred_total) except: train_auroc = 0.0 #Eval Y_pred_total = np.array([]) Y_batch_total = np.array([]) num = 0 eval_loss = 0.0 for i in range(num_batches_eval): A_batch, X_batch = convert_to_graph(smi_eval[i*batch_size:(i+1)*batch_size], FLAGS.max_atoms) Y_batch = prop_eval[i*batch_size:(i+1)*batch_size] # MC-sampling P_mean = [] for n in range(3): num += 1 Y_mean, _, loss = model.test(A_batch, X_batch, Y_batch) eval_loss += loss P_mean.append(Y_mean.flatten()) P_mean = np_sigmoid(np.asarray(P_mean)) mean = np.mean(P_mean, axis=0) Y_batch_total = np.concatenate((Y_batch_total, Y_batch), axis=0) Y_pred_total = np.concatenate((Y_pred_total, mean), axis=0) eval_loss /= num eval_accuracy = accuracy_score(Y_batch_total, np.around(Y_pred_total).astype(int)) eval_auroc = 0.0 try: eval_auroc = roc_auc_score(Y_batch_total, Y_pred_total) except: eval_auroc = 0.0 # Save network! ckpt_path = 'save/'+model_name+'.ckpt' model.save(ckpt_path, epoch) et = time.time() # Print Results print ("Time for", epoch, "-th epoch: ", et-st) print ("Loss Train:", round(train_loss,3), "\t Evaluation:", round(eval_loss,3)) print ("Accuracy Train:", round(train_accuracy,3), "\t Evaluation:", round(eval_accuracy,3)) print ("AUROC Train:", round(train_auroc,3), "\t Evaluation:", round(eval_auroc,3)) total_et = time.time() print ("Finish training! Total required time for training : ", (total_et-total_st)) #Test test_st = time.time() Y_pred_total = np.array([]) Y_batch_total = np.array([]) ale_unc_total = np.array([]) epi_unc_total = np.array([]) tot_unc_total = np.array([]) num = 0 test_loss = 0.0 for i in range(num_batches_test): num += 1 A_batch, X_batch = convert_to_graph(smi_test[i*batch_size:(i+1)*batch_size], FLAGS.max_atoms) Y_batch = prop_test[i*batch_size:(i+1)*batch_size] # MC-sampling P_mean = [] for n in range(num_sampling): Y_mean, _, loss = model.test(A_batch, X_batch, Y_batch) P_mean.append(Y_mean.flatten()) P_mean = np_sigmoid(np.asarray(P_mean)) mean = np.mean(P_mean, axis=0) ale_unc = np.mean(P_mean*(1.0-P_mean), axis=0) epi_unc = np.mean(P_mean**2, axis=0) - np.mean(P_mean, axis=0)**2 tot_unc = ale_unc + epi_unc Y_batch_total = np.concatenate((Y_batch_total, Y_batch), axis=0) Y_pred_total = np.concatenate((Y_pred_total, mean), axis=0) ale_unc_total = np.concatenate((ale_unc_total, ale_unc), axis=0) epi_unc_total = np.concatenate((epi_unc_total, epi_unc), axis=0) tot_unc_total = np.concatenate((tot_unc_total, tot_unc), axis=0) np.save('./statistics/'+model_name+'_mc_truth.npy', Y_batch_total) np.save('./statistics/'+model_name+'_mc_pred.npy', Y_pred_total) np.save('./statistics/'+model_name+'_mc_epi_unc.npy', epi_unc_total) np.save('./statistics/'+model_name+'_mc_ale_unc.npy', ale_unc_total) np.save('./statistics/'+model_name+'_mc_tot_unc.npy', tot_unc_total) test_et = time.time() print ("Finish Testing, Total time for test:", (test_et-test_st)) return