def plot_2Dimg_logp_predictions(): # Load LogP data print('Loading data') (_, _), (x_test, y_test) = get_data('data/ncidb_2Dimg.npz') # Load Experimental LogP data print('Loading data') (_, _), (exp_x_test, exp_y_test) = get_data('data/ncidb_experim_data_2Dimg.npz', split=0.2) print('Normalize input dividing it by 255') # Input data normalization # Transform all input matrix elements in values belonging to [0,1] interval x_test /= 255 exp_x_test /= 255 # Get input and output dims n_h, n_w, n_c = x_test[0].shape n_y = y_test[0].shape[0] # Declare weights file path weights_file_path = 'weights/incep_resnet_compact_v4_logp_best_val_r2.h5' exp_weights_file_path = 'weights/incep_resnet_compact_v4_exp_logp_trsf_lrng_small_lr_best_val_r2.h5' # Load best LogP data predictor incep_mdl = incep_model_logp(n_h, n_w, n_c, n_y, lmbda=0) incep_mdl.load_weights(weights_file_path) # Load best Experimental LogP data predictor exp_incep_mdl = incep_model_logp(n_h, n_w, n_c, n_y, lmbda=0, frozen_index=7) exp_incep_mdl.load_weights(exp_weights_file_path) # Predict both LogP and experimental LogP y_pred = incep_mdl.predict(x_test) exp_y_pred = exp_incep_mdl.predict(exp_x_test) # Plot scatter fig = plt.figure() plt.scatter(y_test, y_pred, label="LogP model (Test Set). $R^2$: 0.852") plt.scatter(exp_y_test, exp_y_pred, label="Experimental LogP model (Test Set). $R^2$: 0.964") plt.title("Predicted LogP values from 2D molecule images") plt.xlabel("True values") plt.ylabel("Predicted values") plt.plot([-20, 40], [-20, 40], '--', color='red') plt.legend() # Save the plot plt.savefig("output/2Dimg_logp_predictions.png") plt.close(fig)
def plot_fingerprints_logp_predictions(): # Load LogP data print('Loading data') (_, _), (x_test, y_test) = get_data('data/ncidb_fingerprints.npz') # Load Experimental LogP data print('Loading data') (_, _), (exp_x_test, exp_y_test) = get_data('data/ncidb_experim_data_fingerprints.npz', split=0.2) # Get input and output dims n_x = x_test[0].shape[0] n_y = y_test[0].shape[0] # Declare weights file path weights_file_path = 'weights/fcnn_logp_6l_best_val_r2.h5' exp_weights_file_path = 'weights/fcnn_exp_logp_6l_trsf_lrng_best_val_r2.h5' # Load best LogP data predictor fcnn_mdl = fcnn_model_logp(n_x, n_y, lmbda=0) fcnn_mdl.load_weights(weights_file_path) # Load best Experimental LogP data predictor exp_fcnn_mdl = fcnn_model_logp(n_x, n_y, lmbda=0, frozen_layers=4) exp_fcnn_mdl.load_weights(exp_weights_file_path) # Predict both LogP and experimental LogP y_pred = fcnn_mdl.predict(x_test) exp_y_pred = exp_fcnn_mdl.predict(exp_x_test) # Plot scatter fig = plt.figure() plt.scatter(y_test, y_pred, label="LogP model (Test Set). $R^2$: 0.839") plt.scatter(exp_y_test, exp_y_pred, label="Experimental LogP model (Test Set). $R^2$: 0.923") plt.title("Predicted LogP values from molecular fingerprints") plt.xlabel("True values") plt.ylabel("Predicted values") plt.plot([-20, 40], [-20, 40], '--', color='red') plt.legend() # Save the plot plt.savefig("output/1Dfingerprints_logp_predictions.png") plt.close(fig)
def main(train=False): """ Main function """ # get train and test dataset print('Loading data') (x_train, y_train), (x_test, y_test) = get_data('data/tox21_10k_data_all_2Dimg_ml.npz') print('Normalize input dividing it by 255') # Input data normalization # Transform all input matrix elements in values belonging to [0,1] interval x_train /= 255 x_test /= 255 n_h, n_w, n_c = x_train[0].shape n_y = y_train[0].shape[0] # Build model incep_tox21 = incep_model_tox21(n_h, n_w, n_c, n_y, lmbda=0) epochs = 100 if train: # Train model print('\ntrain the model') # Define checkpoints # Create save weights checkpoint callback function weights_ckpt = keras.callbacks.ModelCheckpoint( 'weights/%s_{epoch:d}.h5' % MODEL_NAME, save_weights_only=True, period=5) # Create save best weights checkpoint callback function best_ckpt = keras.callbacks.ModelCheckpoint( 'weights/%s_best_val_r2.h5' % MODEL_NAME, monitor='val_masked_f1', verbose=1, save_best_only=True, mode='max') # Adjust Batch size based on GPUs number batch_size = 64 * GPUs or 32 history = incep_tox21.fit( x_train, y_train, epochs=epochs, validation_split=0.1, batch_size=batch_size, callbacks=[weights_ckpt, best_ckpt] # Save weights ) # Get data from history metrics = ['masked_f1', 'loss', 'masked_accuracy'] save_history(history, "output/%s_%s_history.json" % (MODEL_NAME, epochs)) plot_data(history, MODEL_NAME, epochs, metrics=metrics) else: # Load the model weights weights_file_path = os.path.abspath( os.path.join(os.curdir, 'weights/%s_%s.h5' % (MODEL_NAME, epochs))) if not os.path.exists(weights_file_path): raise Exception( "The weights file path specified does not exists: %s" % os.path.exists(weights_file_path)) incep_mdl.load_weights(weights_file_path) print('\ntest the model') test_loss, test_f1_score, test_acc = incep_tox21.evaluate(x_test, y_test) print('\n#######################################') print('Test loss:', test_loss) print('Test accuracy:', test_acc) print('Test F1 score:', test_f1_score) print('\n#######################################') each_metric(x_test, y_test, incep_tox21)
def main(train=False): """ Main function """ # Get train and test dataset (x_train, y_train), (x_test, y_test) = get_data('data/ncidb_fingerprints.npz') n_x = x_train[0].shape[0] n_y = y_train[0].shape[0] # Build model fcnn_mdl = fcnn_model_logp(n_x, n_y, lmbda=0) epochs = 50 if train: # Train model print('\ntrain the model') # Define checkpoints # Create save weights checkpoint callback function weights_ckpt = keras.callbacks.ModelCheckpoint( 'weights/%s_{epoch:d}.h5' % MODEL_NAME, save_weights_only=True, period=5 ) # Create save best weights checkpoint callback function best_ckpt = keras.callbacks.ModelCheckpoint( 'weights/%s_best_val_r2.h5' % MODEL_NAME, monitor='val_r_squared', verbose=1, save_best_only=True, mode='max' ) history = fcnn_mdl.fit(x_train, y_train, epochs=epochs, validation_split=0.1, callbacks=[weights_ckpt, best_ckpt] # Save weights ) #Get data from history metrics = ['mean_absolute_error', 'r_squared', 'loss'] save_history(history, "output/%s_%s_history.json" % (MODEL_NAME, epochs)) plot_data(history, MODEL_NAME, epochs, metrics=metrics) else: # Load the model weights weights_file_path = os.path.abspath(os.path.join(os.curdir, 'weights/%s_%s.h5' % (MODEL_NAME, epochs))) if not os.path.exists(weights_file_path): raise Exception( "The weights file path specified does not exists: %s" % os.path.exists(weights_file_path) ) fcnn_mdl.load_weights(weights_file_path) print('\ntest the model') test_loss, test_mae, test_r_squared = fcnn_mdl.evaluate(x_test, y_test) print('\n#######################################') print('Test loss:', test_loss) print('Test mae:', test_mae) print('Test R2:', test_r_squared)
def main(train=False, weights_file_path=None): """ Main function """ # get train and test dataset print('Loading data') (x_train, y_train), (x_test, y_test) = get_data('data/ncidb_2Dimg.npz') print('Normalize input dividing it by 255') # Input data normalization # Transform all input matrix elements in values belonging to [0,1] interval x_train /= 255 x_test /= 255 n_h, n_w, n_c = x_train[0].shape n_y = y_train[0].shape[0] # Build model incep_mdl = incep_model_logp(n_h, n_w, n_c, n_y, lmbda=0) epochs = 100 if train: # Train model print('\ntrain the model') # Define checkpoints # Create save weights checkpoint callback function weights_ckpt = keras.callbacks.ModelCheckpoint( 'weights/%s_{epoch:d}.h5' % MODEL_NAME, save_weights_only=True, period=5) # Create save best weights checkpoint callback function best_ckpt = keras.callbacks.ModelCheckpoint( 'weights/%s_best_val_r2.h5' % MODEL_NAME, monitor='val_r_squared', verbose=1, save_best_only=True, mode='max') csv_logger = keras.callbacks.CSVLogger('output/%s_%s_history.csv' % (MODEL_NAME, epochs)) # Adjust Batch size based on GPUs number batch_size = 64 * GPUs or 32 if weights_file_path: incep_mdl.load_weights(weights_file_path) history = incep_mdl.fit( x_train, y_train, epochs=epochs, validation_split=0.1, batch_size=batch_size, callbacks=[weights_ckpt, best_ckpt, csv_logger] # Save weights ) #Get data from history metrics = ['mean_absolute_error', 'r_squared', 'loss'] save_history(history, "output/%s_%s_history.json" % (MODEL_NAME, epochs)) plot_data(history, MODEL_NAME, epochs, metrics=metrics) else: # Load the model weights if not weights_file_path: weights_file_path = os.path.abspath( os.path.join(os.curdir, 'weights/%s_%s.h5' % (MODEL_NAME, epochs))) if not os.path.exists(weights_file_path): raise Exception( "The weights file path specified does not exists: %s" % os.path.exists(weights_file_path)) incep_mdl.load_weights(weights_file_path) print('\ntest the model') test_loss, test_mae, test_r_squared = incep_mdl.evaluate(x_test, y_test) print('\n#######################################') print('Test loss:', test_loss) print('Test mae:', test_mae) print('Test R2:', test_r_squared)
def main(train=False): """ Main function """ # Get train and test dataset (x_train, y_train), (x_test, y_test) = get_data('data/tox21_10k_data_all_fingerprints.npz') #set different data path if run with missing labels if ml == True: (x_train, y_train), (x_test, y_test) = get_data( 'data/tox21_10k_data_all_fingerprints_multi.npz') n_x = x_train[0].shape[0] n_y = y_train[0].shape[0] #get the classifier fcnn_clf = fcnn_classifier_tox21(n_x, n_y) if ml == True: fcnn_clf = fcnn_classifier_tox21_ml(n_x, n_y) epochs = 50 if train: # Train classifier print('\ntrain the classifier') # Define checkpoints # Create save weights checkpoint callback function weights_ckpt = keras.callbacks.ModelCheckpoint( 'weights/%s_{epoch:d}.h5' % MODEL_NAME, save_weights_only=True, period=5) # Create save best weights checkpoint callback function best_ckpt = keras.callbacks.ModelCheckpoint( 'weights/%s_best_f1_score.h5' % MODEL_NAME, monitor='val_f1_score', verbose=1, save_best_only=True, mode='max') history = fcnn_clf.fit( x_train, y_train, epochs=epochs, validation_split=0.1, callbacks=[weights_ckpt, best_ckpt] # Save weights ) # Get data from history metrics = ['f1_score', 'loss'] save_history(history, "output/%s_%s_history.json" % (MODEL_NAME, epochs)) plot_data(history, MODEL_NAME, epochs, metrics=metrics) else: # Load the model weights weights_file_path = os.path.abspath( os.path.join(os.curdir, 'weights/fcnn_tox21_%s.h5' % epochs)) if not os.path.exists(weights_file_path): raise Exception( "The weights file path specified does not exists: %s" % os.path.exists(weights_file_path)) fcnn_clf.load_weights(weights_file_path) print('\ntest the classifier') print('\n#######################################') test_loss, test_f1_score = fcnn_clf.evaluate(x_test, y_test) print('\n#######################################') print('Test loss:', test_loss) print('Test F1 score:', test_f1_score) print('\n#######################################') each_metric(x_test, y_test, fcnn_clf)