def main(): print("Loading data...") traindata, valdata, testdata = load_data( task_params['data_file'], (N_train, N_val, N_test), input_name = 'smiles', target_name = task_params['target_name']) x_trains, y_trains = traindata x_vals, y_vals = valdata x_tests, y_tests = testdata x_trains = np.reshape(x_trains, (N_train, 1)) y_trains = np.reshape(y_trains, (N_train, 1)) x_vals = np.reshape(x_vals, (N_val, 1)) y_vals = np.reshape(y_vals, (N_val, 1)) x_tests = np.reshape(x_tests, (N_test, 1)) y_tests = np.reshape(y_tests, (N_test, 1)).astype(np.float32) def run_conv_experiment(): '''Initialize model''' NNFP = Main(model_params) optimizer = optimizers.Adam() optimizer.setup(NNFP) #gpu_device = 0 #cuda.get_device(gpu_device).use() #NNFP.to_gpu(gpu_device) '''Learn''' trained_NNFP, conv_training_curve, undo_norm = \ train_nn(NNFP, x_trains, y_trains, validation_smiles=x_vals, validation_raw_targets=y_vals) return math.sqrt(trained_NNFP.mse(x_tests, y_tests, undo_norm)._data[0]), conv_training_curve print("Starting neural fingerprint experiment...") test_loss_neural, conv_training_curve = run_conv_experiment() print("Neural test RMSE", test_loss_neural)
def main(): print("Loading data...") #args parser = argparse.ArgumentParser() parser.add_argument("input_file") parser.add_argument("--epochs", type=int) parser.add_argument("--batch_size", type=int) parser.add_argument("--gpu", action="store_false") args = parser.parse_args() task_params = eval(args.input_file.split(".csv")[0] + '_params') ALL_TIME = time.time() traindata, valdata, testdata = load_data( task_params['data_file'], (task_params['train'], task_params['val'], task_params['test']), input_name='smiles', target_name=task_params['target_name']) x_trains, y_trains = traindata x_vals, y_vals = valdata x_tests, y_tests = testdata x_trains = np.reshape(x_trains, (task_params['train'], 1)) y_trains = np.reshape(y_trains, (task_params['train'], 1)) x_vals = np.reshape(x_vals, (task_params['val'], 1)) y_vals = np.reshape(y_vals, (task_params['val'], 1)) x_tests = np.reshape(x_tests, (task_params['test'], 1)) y_tests = np.reshape(y_tests, (task_params['test'], 1)).astype(np.float32) def run_conv_experiment(): '''Initialize model''' NNFP = Main(model_params) optimizer = optimizers.Adam() optimizer.setup(NNFP) '''Learn''' trained_NNFP, conv_training_curve, undo_norm = \ train_nn(NNFP, x_trains, y_trains, args.epochs, args.batch_size, validation_smiles=x_vals, validation_raw_targets=y_vals) return math.sqrt( trained_NNFP.mse(x_tests, y_tests, undo_norm)._data[0]), conv_training_curve print("Starting neural fingerprint experiment...") test_loss_neural, conv_training_curve = run_conv_experiment() print("Neural test RMSE", test_loss_neural) print(conv_training_curve) print(task_params["data_file"]) print("Neural test RMSE", test_loss_neural) print("time : ", time.time() - ALL_TIME)
def main(): print("Loading data...") #args parser = argparse.ArgumentParser() parser.add_argument("--input_file", type=str) parser.add_argument("--epochs", type=int) parser.add_argument("--load_npz", type=str) args = parser.parse_args() task_params = eval(args.input_file.split(".csv")[0] + '_params') ALL_TIME = time.time() traindata, valdata, testdata = load_data( task_params['data_file'], (task_params['train'], task_params['val'], task_params['test']), input_name='smiles', target_name=task_params['target_name']) x_trains, y_trains = traindata x_vals, y_vals = valdata x_tests, y_tests = testdata x_trains = np.reshape(x_trains, (task_params['train'], 1)) y_trains = np.reshape(y_trains, (task_params['train'], 1)) x_vals = np.reshape(x_vals, (task_params['val'], 1)) y_vals = np.reshape(y_vals, (task_params['val'], 1)) x_tests = np.reshape(x_tests, (task_params['test'], 1)) y_tests = np.reshape(y_tests, (task_params['test'], 1)).astype(np.float32) def run_conv_experiment(): '''Initialize model''' NNFP = Main(model_params) optimizer = optimizers.Adam() optimizer.setup(NNFP) #gpu_device = 0 #cuda.get_device(gpu_device).use() #NNFP.to_gpu(gpu_device) '''Learn''' trained_NNFP, conv_training_curve, undo_norm = \ train_nn(NNFP, x_trains, y_trains, args.epochs, validation_smiles=x_vals, validation_raw_targets=y_vals) save_name = "input-attention-fcfp-cep-top-remove.npz" serializers.save_npz(save_name, trained_NNFP) mse, _ = trained_NNFP.mse(x_tests, y_tests, undo_norm) return math.sqrt(mse._data[0]), conv_training_curve def load_model_experiment(): '''Initialize model''' trained_NNFP = Main(model_params) serializers.load_npz(args.load_npz, trained_NNFP) _, undo_norm = normalize_array(y_tests) mse, input_attention = trained_NNFP.mse(x_tests, y_tests, undo_norm) return math.sqrt(mse._data[0]), input_attention print("Starting neural fingerprint experiment...") if args.load_npz == None: test_loss_neural, conv_training_curve = run_conv_experiment() else: test_loss_neural, input_attention = load_model_experiment() x_ecfp = input_attention._data[0] y = [0] * len(x_ecfp) attentions = np.split(x_ecfp, 5, 1) xmin, xmax = 0, 1 for i in range(len(attentions)): fig, ax = plt.subplots(figsize=(10, 10)) fig.set_figheight(1) plt.tight_layout() plt.tick_params(labelbottom=True, bottom=False) plt.tick_params(labelleft=False, left=False) plt.scatter(attentions[i], y, c="red", marker="o", alpha=0.3) plt.hlines(y=0, xmin=xmin, xmax=xmax) plt.vlines(x=[i for i in range(xmin, xmax + 1, 1)], ymin=-0.04, ymax=0.04) plt.vlines(x=[i / 10 for i in range(xmin * 10, xmax * 10 + 1, 1)], ymin=-0.02, ymax=0.02) line_width = 0.1 plt.xticks(np.arange(xmin, xmax + line_width, line_width)) pylab.box(False) #plt.savefig(args.input_file + '_attention_ecfp_' + str(i) + '.png') plt.savefig("test.png") #plt.show() print("Neural test RMSE", test_loss_neural) print("time : ", time.time() - ALL_TIME)
def main(): print("Loading data...") #args parser = argparse.ArgumentParser() parser.add_argument("--input_file", type=str) parser.add_argument("--epochs", type=int) parser.add_argument("--fp_length", type=int) parser.add_argument("--i", type=int) parser.add_argument("--load_npz", type=str) args = parser.parse_args() model_params['fp_length'] = args.fp_length task_params = eval(args.input_file.split(".csv")[0] + '_params') ALL_TIME = time.time() traindata, valdata, testdata = load_data( task_params['data_file'], (task_params['train'], task_params['val'], task_params['test']), input_name='smiles', target_name=task_params['target_name']) x_trains, y_trains = traindata x_vals, y_vals = valdata x_tests, y_tests = testdata x_trains = np.reshape(x_trains, (task_params['train'], 1)) y_trains = np.reshape(y_trains, (task_params['train'], 1)) x_vals = np.reshape(x_vals, (task_params['val'], 1)) y_vals = np.reshape(y_vals, (task_params['val'], 1)) x_tests = np.reshape(x_tests, (task_params['test'], 1)) y_tests = np.reshape(y_tests, (task_params['test'], 1)).astype(np.float32) def run_conv_experiment(): '''Initialize model''' NNFP = Main(model_params) optimizer = optimizers.Adam() optimizer.setup(NNFP) '''Learn''' trained_NNFP, conv_training_curve, undo_norm = \ train_nn(NNFP, x_trains, y_trains, args.epochs, validation_smiles=x_vals, validation_raw_targets=y_vals) #save_name = "fp_concat_" + args.input_file + "_fp_length_" + str(args.fp_length) + "_" + str(args.i) + ".npz" save_name = "test_cep.npz" serializers.save_npz(save_name, trained_NNFP) mse, _, _ = trained_NNFP.mse(x_tests, y_tests, undo_norm) return math.sqrt(mse._data[0]), conv_training_curve def load_model_experiment(): '''Initialize model''' trained_NNFP = Main(model_params) serializers.load_npz(args.load_npz, trained_NNFP) _, undo_norm = normalize_array(y_tests) mse, attention_ecfp, attention_fcfp = trained_NNFP.mse( x_tests, y_tests, undo_norm) return math.sqrt(mse._data[0]), attention_ecfp, attention_fcfp print("Starting neural fingerprint experiment...") if args.load_npz == None: test_loss_neural, conv_training_curve = run_conv_experiment() else: test_loss_neural, attention_ecfp, attention_fcfp = load_model_experiment( ) x_ecfp = attention_ecfp._data[0] x_fcfp = attention_fcfp._data[0] print(x_fcfp, x_ecfp) np.savetxt('weight_delaney.txt', x_ecfp, delimiter=' ') y = [0] * len(x_ecfp) fig, ax = plt.subplots(figsize=(10, 10)) fig.set_figheight(1) ax.tick_params(labelbottom=True, bottom=False) ax.tick_params(labelleft=False, left=False) xmin, xmax = 0, 1 plt.tight_layout() plt.scatter(x_ecfp, y, c="red", marker="o", alpha=0.3, label="Input Representation 1") plt.scatter(x_fcfp, y, c="blue", marker="o", alpha=0.3, label="Input Representation 2") plt.hlines(y=0, xmin=xmin, xmax=xmax) plt.vlines(x=[i for i in range(xmin, xmax + 1, 1)], ymin=-0.04, ymax=0.04) plt.vlines(x=[i / 10 for i in range(xmin * 10, xmax * 10 + 1, 1)], ymin=-0.02, ymax=0.02) line_width = 0.1 plt.xticks(np.arange(xmin, xmax + line_width, line_width)) pylab.box(False) #plt.legend(loc='upper right', bbox_to_anchor=(0.2,1,0.15,0), borderaxespad=0.) #plt.show() #plt.savefig("fp_scalar_" + args.input_file + ".png") #plt.savefig("test.png") print("Neural test RMSE", test_loss_neural) print("time : ", time.time() - ALL_TIME)
def main(): print("Loading data...") #args parser = argparse.ArgumentParser() parser.add_argument("--input_file", type=str) parser.add_argument("--epochs", type=int) parser.add_argument("--load_npz", type=str) args = parser.parse_args() task_params = eval(args.input_file.split(".csv")[0] + '_params') ALL_TIME = time.time() traindata, valdata, testdata = load_data( task_params['data_file'], (task_params['train'], task_params['val'], task_params['test']), input_name='smiles', target_name=task_params['target_name']) x_trains, y_trains = traindata x_vals, y_vals = valdata x_tests, y_tests = testdata x_trains = np.reshape(x_trains, (task_params['train'], 1)) y_trains = np.reshape(y_trains, (task_params['train'], 1)) x_vals = np.reshape(x_vals, (task_params['val'], 1)) y_vals = np.reshape(y_vals, (task_params['val'], 1)) x_tests = np.reshape(x_tests, (task_params['test'], 1)) y_tests = np.reshape(y_tests, (task_params['test'], 1)).astype(np.float32) def run_conv_experiment(): '''Initialize model''' NNFP = Main(model_params) optimizer = optimizers.Adam() optimizer.setup(NNFP) #gpu_device = 0 #cuda.get_device(gpu_device).use() #NNFP.to_gpu(gpu_device) '''Learn''' trained_NNFP, conv_training_curve, undo_norm = \ train_nn(NNFP, x_trains, y_trains, args.epochs, validation_smiles=x_vals, validation_raw_targets=y_vals) save_name = "input_attention_one_hot_" + args.input_file + ".npz" #save_name = "test.npz" serializers.save_npz(save_name, trained_NNFP) mse, _ = trained_NNFP.mse(x_tests, y_tests, undo_norm) return math.sqrt(mse._data[0]), conv_training_curve def load_model_experiment(): '''Initialize model''' trained_NNFP = Main(model_params) serializers.load_npz(args.load_npz, trained_NNFP) _, undo_norm = normalize_array(y_tests) mse, input_attention = trained_NNFP.mse(x_tests, y_tests, undo_norm) return math.sqrt(mse._data[0]), input_attention print("Starting neural fingerprint experiment...") if args.load_npz == None: test_loss_neural, conv_training_curve = run_conv_experiment() else: test_loss_neural, input_attention = load_model_experiment() x_ecfp = input_attention._data[0] np.savetxt('ecfp_propaty_' + args.input_file, x_ecfp, delimiter=' ') np.set_printoptions(threshold=np.inf) def figure_histgram(weight): attentions = weight.T hist_data = [attentions[i] + i for i in range(len(attentions))] group_labels = ['a', 'b', 'c', 'd', 'e'] #fig = ff.create_distplot(hist_data, group_labels, bin_size=.2) #fig.show() figure_histgram(x_ecfp) print("Neural test RMSE", test_loss_neural) print("time : ", time.time() - ALL_TIME)