def main(*args): model_dir = os.path.join(FLAGS.output_dir, FLAGS.model_name) if tf.gfile.Exists(model_dir): tf.gfile.DeleteRecursively(model_dir) tf.gfile.MakeDirs(model_dir) with tf.Graph().as_default(): # Create a session for running Ops on the Graph. sess = tf.Session() logp_col_name = FLAGS.logp_col if FLAGS.add_logp else None logger.info('Loading Training dataset from {:}'.format( FLAGS.training_file)) train_dataset = DataSet(csv_file_path=FLAGS.training_file, smile_col_name=FLAGS.smile_col, target_col_name=FLAGS.target_col, logp_col_name=logp_col_name, contract_rings=FLAGS.contract_rings) logger.info('Loading validation dataset from {:}'.format( FLAGS.validation_file)) validation_dataset = DataSet(csv_file_path=FLAGS.validation_file, smile_col_name=FLAGS.smile_col, target_col_name=FLAGS.target_col, logp_col_name=logp_col_name, contract_rings=FLAGS.contract_rings) logger.info("Creating Graph.") ugrnn_model = UGRNN(FLAGS.model_name, encoding_nn_hidden_size=FLAGS.model_params[0], encoding_nn_output_size=FLAGS.model_params[1], output_nn_hidden_size=FLAGS.model_params[2], batch_size=FLAGS.batch_size, learning_rate=0.001, add_logp=FLAGS.add_logp, clip_gradients=FLAGS.clip_gradient) logger.info("Succesfully created graph.") init = tf.global_variables_initializer() sess.run(init) logger.info('Run the Op to initialize the variables') ugrnn_model.train(sess, FLAGS.max_epochs, train_dataset, validation_dataset, model_dir) ugrnn_model.save_model(sess, model_dir, FLAGS.max_epochs)
def run_once(session, output_dir, train_data, valid_data, logp_col_name, experiment_name = ''): # logp_col_name=logp_col_name, train_dataset = DataSet(smiles=train_data[0], labels=train_data[1], contract_rings=FLAGS.contract_rings) validation_dataset = DataSet(smiles=valid_data[0], labels=valid_data[1], contract_rings=FLAGS.contract_rings) logger.info("Creating Graph.") ugrnn_model = UGRNN(FLAGS.model_name, encoding_nn_hidden_size=FLAGS.model_params[0], encoding_nn_output_size=FLAGS.model_params[1], output_nn_hidden_size=FLAGS.model_params[2], batch_size=FLAGS.batch_size, learning_rate=0.001, add_logp=FLAGS.add_logp, clip_gradients=FLAGS.clip_gradient) logger.info("Succesfully created graph.") init = tf.global_variables_initializer() session.run(init) logger.info('Run the Op to initialize the variables') print('FLAGS.enable_plotting',FLAGS.enable_plotting) ugrnn_model.train(session, FLAGS.max_epochs, train_dataset, validation_dataset, output_dir, enable_plotting = int(FLAGS.enable_plotting)) ugrnn_model.save_model(session, output_dir, FLAGS.max_epochs)
def build_and_train(logger, session, output_dir, train_data, valid_data, experiment_name='', regression=True, binary_classification=False, model_name='ugrnn_1', batch_size=10, clip_gradient=False, model_params=None, contract_rings=False, learning_rate=1e-3, max_epochs=150, enable_plotting=False, Targets_UnNormalization_fn=lambda x: x, weight_decay_factor=0, *args, **kwargs): # TODO: figure out what causes the internal Tensorflow bug that requires this hack ('remove_SMILES_longer_than'). # is it due to a new ("improved") tensorflow version? train_data = utils.remove_SMILES_longer_than(train_data, config.max_seq_len) valid_data = utils.remove_SMILES_longer_than(valid_data, config.max_seq_len) train_labels, is_masked_t = utils.create_labels_NaN_mask(train_data[1]) valid_labels, is_masked_v = utils.create_labels_NaN_mask(valid_data[1]) # inferring stuff based on the data is_masked = is_masked_t or is_masked_v multitask = (not regression) and binary_classification num_tasks = train_labels.shape[-1] if train_labels.ndim > 1 else 1 assert not ( regression and binary_classification ), 'ERROR: arguments <regression>==True and <binary_classification>==True are mutually exclusive.' if is_masked: if not is_masked_t: train_labels, is_masked_t = utils.create_labels_NaN_mask( train_data[1], force_masked=1) if not is_masked_v: valid_labels, is_masked_v = utils.create_labels_NaN_mask( valid_data[1], force_masked=1) train_dataset = DataSet(smiles=train_data[0], labels=train_labels, contract_rings=contract_rings) validation_dataset = DataSet(smiles=valid_data[0], labels=valid_labels, contract_rings=contract_rings) logger.info("Creating Graph.") ugrnn_model = UGRNN(model_name, encoding_nn_hidden_size=model_params[0], encoding_nn_output_size=model_params[1], output_nn_hidden_size=model_params[2], batch_size=batch_size, learning_rate=learning_rate, add_logp=False, clip_gradients=clip_gradient, regression=regression, weight_decay_factor=weight_decay_factor, num_tasks=num_tasks, multitask=multitask, weighted_loss=is_masked) logger.info("Succesfully created graph.") init = tf.global_variables_initializer() session.run(init) training_scores_dict, validation_scores_dict = ugrnn_model.train( session, max_epochs, train_dataset, validation_dataset, output_dir, enable_plotting=bool(enable_plotting), Targets_UnNormalization_fn=Targets_UnNormalization_fn) ugrnn_model.save_model(session, output_dir, max_epochs) return training_scores_dict, validation_scores_dict
def main(*args): #Next 5 lines of code - To handle a Possible Error that can occur in UGRNN Code df_ext = pd.read_csv("../../External Test Set/External_Test_Set.csv") if (df_ext.shape[1] == 3): print("Moving Forward") else: df_ext.to_csv("../../External Test Set/External_Test_Set.csv") model_dir = os.path.join(FLAGS.output_dir, FLAGS.model_name) # if tf.io.gfile.exists(model_dir): # tf.io.gfile.DeleteRecursively(model_dir) # tf.io.gfile.makedirs(model_dir) with tf.Graph().as_default(): sess = tf.Session() logp_col_name = FLAGS.logp_col if FLAGS.add_logp else None logger.info('Loading Training dataset from {:}'.format( FLAGS.training_file)) train_dataset = DataSet(csv_file_path=FLAGS.training_file, smile_col_name=FLAGS.smile_col, target_col_name=FLAGS.target_col, logp_col_name=logp_col_name, contract_rings=FLAGS.contract_rings) logger.info('Loading validation dataset from {:}'.format( FLAGS.validation_file)) validation_dataset = DataSet(csv_file_path=FLAGS.validation_file, smile_col_name=FLAGS.smile_col, target_col_name=FLAGS.target_col, logp_col_name=logp_col_name, contract_rings=FLAGS.contract_rings) logger.info('Loading test dataset from {:}'.format(FLAGS.test_file)) test_dataset = DataSet(csv_file_path=FLAGS.test_file, smile_col_name=FLAGS.smile_col, target_col_name=FLAGS.target_col, logp_col_name=logp_col_name, contract_rings=FLAGS.contract_rings) logger.info("Creating Graph.") ugrnn_model = UGRNN(FLAGS.model_name, encoding_nn_hidden_size=FLAGS.model_params[0], encoding_nn_output_size=FLAGS.model_params[1], output_nn_hidden_size=FLAGS.model_params[2], batch_size=FLAGS.batch_size, learning_rate=0.001, add_logp=FLAGS.add_logp, clip_gradients=FLAGS.clip_gradient) logger.info("Succesfully created graph.") init = tf.global_variables_initializer() sess.run(init) logger.info('Run the Op to initialize the variables') ugrnn_model.train(sess, FLAGS.max_epochs, train_dataset, validation_dataset, model_dir) print('Saving model...') ugrnn_model.save_model(sess, model_dir, FLAGS.max_epochs) # print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') # hidden_train = ugrnn_model.Hidden(sess, train_dataset) # hidden_validate = ugrnn_model.Hidden(sess, validation_dataset) hidden_test = pd.DataFrame(ugrnn_model.Hidden(sess, test_dataset)) Raw_Test_filtered = pd.read_csv( "../../External Test Set/External_Test_Set_filtered.csv") hidden_test['Canonical SMILES'] = Raw_Test_filtered['Canonical SMILES'] print('Hidden_test created!') # pd.DataFrame(hidden_train).to_csv("./data/DILI/Final_data/Predictions/train_HidenRepresentation.csv") hidden_test.to_csv( "./data/DILI/Final_data/Predictions/UGRNN Encoddings.csv")