Example #1
0
def main(*args):
    model_dir = os.path.join(FLAGS.output_dir, FLAGS.model_name)

    if tf.gfile.Exists(model_dir):
        tf.gfile.DeleteRecursively(model_dir)
    tf.gfile.MakeDirs(model_dir)

    with tf.Graph().as_default():
        # Create a session for running Ops on the Graph.
        sess = tf.Session()

        logp_col_name = FLAGS.logp_col if FLAGS.add_logp else None

        logger.info('Loading Training dataset from {:}'.format(
            FLAGS.training_file))
        train_dataset = DataSet(csv_file_path=FLAGS.training_file,
                                smile_col_name=FLAGS.smile_col,
                                target_col_name=FLAGS.target_col,
                                logp_col_name=logp_col_name,
                                contract_rings=FLAGS.contract_rings)

        logger.info('Loading validation dataset from {:}'.format(
            FLAGS.validation_file))
        validation_dataset = DataSet(csv_file_path=FLAGS.validation_file,
                                     smile_col_name=FLAGS.smile_col,
                                     target_col_name=FLAGS.target_col,
                                     logp_col_name=logp_col_name,
                                     contract_rings=FLAGS.contract_rings)

        logger.info("Creating Graph.")

        ugrnn_model = UGRNN(FLAGS.model_name,
                            encoding_nn_hidden_size=FLAGS.model_params[0],
                            encoding_nn_output_size=FLAGS.model_params[1],
                            output_nn_hidden_size=FLAGS.model_params[2],
                            batch_size=FLAGS.batch_size,
                            learning_rate=0.001,
                            add_logp=FLAGS.add_logp,
                            clip_gradients=FLAGS.clip_gradient)

        logger.info("Succesfully created graph.")

        init = tf.global_variables_initializer()
        sess.run(init)
        logger.info('Run the Op to initialize the variables')
        ugrnn_model.train(sess, FLAGS.max_epochs, train_dataset,
                          validation_dataset, model_dir)
        ugrnn_model.save_model(sess, model_dir, FLAGS.max_epochs)
def run_once(session, output_dir, train_data, valid_data, logp_col_name, experiment_name = ''):
    
#    logp_col_name=logp_col_name,
    train_dataset = DataSet(smiles=train_data[0], labels=train_data[1], contract_rings=FLAGS.contract_rings)
    validation_dataset = DataSet(smiles=valid_data[0], labels=valid_data[1],  contract_rings=FLAGS.contract_rings)



    logger.info("Creating Graph.")
    ugrnn_model = UGRNN(FLAGS.model_name, encoding_nn_hidden_size=FLAGS.model_params[0],
                        encoding_nn_output_size=FLAGS.model_params[1], output_nn_hidden_size=FLAGS.model_params[2],
                        batch_size=FLAGS.batch_size, learning_rate=0.001, add_logp=FLAGS.add_logp, 
                        clip_gradients=FLAGS.clip_gradient)
    logger.info("Succesfully created graph.")
    
    init = tf.global_variables_initializer()
    session.run(init)
    logger.info('Run the Op to initialize the variables')
    print('FLAGS.enable_plotting',FLAGS.enable_plotting)
    ugrnn_model.train(session, FLAGS.max_epochs, train_dataset, validation_dataset, output_dir, enable_plotting = int(FLAGS.enable_plotting))
    ugrnn_model.save_model(session, output_dir, FLAGS.max_epochs)
Example #3
0
def build_and_train(logger,
                    session,
                    output_dir,
                    train_data,
                    valid_data,
                    experiment_name='',
                    regression=True,
                    binary_classification=False,
                    model_name='ugrnn_1',
                    batch_size=10,
                    clip_gradient=False,
                    model_params=None,
                    contract_rings=False,
                    learning_rate=1e-3,
                    max_epochs=150,
                    enable_plotting=False,
                    Targets_UnNormalization_fn=lambda x: x,
                    weight_decay_factor=0,
                    *args,
                    **kwargs):

    # TODO: figure out what causes the internal Tensorflow bug that requires this hack ('remove_SMILES_longer_than').
    # is it due to a new ("improved") tensorflow version?
    train_data = utils.remove_SMILES_longer_than(train_data,
                                                 config.max_seq_len)
    valid_data = utils.remove_SMILES_longer_than(valid_data,
                                                 config.max_seq_len)

    train_labels, is_masked_t = utils.create_labels_NaN_mask(train_data[1])
    valid_labels, is_masked_v = utils.create_labels_NaN_mask(valid_data[1])

    # inferring stuff based on the data

    is_masked = is_masked_t or is_masked_v
    multitask = (not regression) and binary_classification
    num_tasks = train_labels.shape[-1] if train_labels.ndim > 1 else 1

    assert not (
        regression and binary_classification
    ), 'ERROR: arguments <regression>==True and <binary_classification>==True are mutually exclusive.'

    if is_masked:
        if not is_masked_t:
            train_labels, is_masked_t = utils.create_labels_NaN_mask(
                train_data[1], force_masked=1)
        if not is_masked_v:
            valid_labels, is_masked_v = utils.create_labels_NaN_mask(
                valid_data[1], force_masked=1)

    train_dataset = DataSet(smiles=train_data[0],
                            labels=train_labels,
                            contract_rings=contract_rings)
    validation_dataset = DataSet(smiles=valid_data[0],
                                 labels=valid_labels,
                                 contract_rings=contract_rings)

    logger.info("Creating Graph.")
    ugrnn_model = UGRNN(model_name,
                        encoding_nn_hidden_size=model_params[0],
                        encoding_nn_output_size=model_params[1],
                        output_nn_hidden_size=model_params[2],
                        batch_size=batch_size,
                        learning_rate=learning_rate,
                        add_logp=False,
                        clip_gradients=clip_gradient,
                        regression=regression,
                        weight_decay_factor=weight_decay_factor,
                        num_tasks=num_tasks,
                        multitask=multitask,
                        weighted_loss=is_masked)
    logger.info("Succesfully created graph.")

    init = tf.global_variables_initializer()
    session.run(init)

    training_scores_dict, validation_scores_dict = ugrnn_model.train(
        session,
        max_epochs,
        train_dataset,
        validation_dataset,
        output_dir,
        enable_plotting=bool(enable_plotting),
        Targets_UnNormalization_fn=Targets_UnNormalization_fn)
    ugrnn_model.save_model(session, output_dir, max_epochs)
    return training_scores_dict, validation_scores_dict
Example #4
0
def main(*args):

    #Next 5 lines of code - To handle a Possible Error that can occur in UGRNN Code
    df_ext = pd.read_csv("../../External Test Set/External_Test_Set.csv")
    if (df_ext.shape[1] == 3):
        print("Moving Forward")
    else:
        df_ext.to_csv("../../External Test Set/External_Test_Set.csv")

    model_dir = os.path.join(FLAGS.output_dir, FLAGS.model_name)

    #    if tf.io.gfile.exists(model_dir):
    #        tf.io.gfile.DeleteRecursively(model_dir)
    #    tf.io.gfile.makedirs(model_dir)

    with tf.Graph().as_default():

        sess = tf.Session()

        logp_col_name = FLAGS.logp_col if FLAGS.add_logp else None

        logger.info('Loading Training dataset from {:}'.format(
            FLAGS.training_file))
        train_dataset = DataSet(csv_file_path=FLAGS.training_file,
                                smile_col_name=FLAGS.smile_col,
                                target_col_name=FLAGS.target_col,
                                logp_col_name=logp_col_name,
                                contract_rings=FLAGS.contract_rings)

        logger.info('Loading validation dataset from {:}'.format(
            FLAGS.validation_file))
        validation_dataset = DataSet(csv_file_path=FLAGS.validation_file,
                                     smile_col_name=FLAGS.smile_col,
                                     target_col_name=FLAGS.target_col,
                                     logp_col_name=logp_col_name,
                                     contract_rings=FLAGS.contract_rings)

        logger.info('Loading test dataset from {:}'.format(FLAGS.test_file))
        test_dataset = DataSet(csv_file_path=FLAGS.test_file,
                               smile_col_name=FLAGS.smile_col,
                               target_col_name=FLAGS.target_col,
                               logp_col_name=logp_col_name,
                               contract_rings=FLAGS.contract_rings)
        logger.info("Creating Graph.")

        ugrnn_model = UGRNN(FLAGS.model_name,
                            encoding_nn_hidden_size=FLAGS.model_params[0],
                            encoding_nn_output_size=FLAGS.model_params[1],
                            output_nn_hidden_size=FLAGS.model_params[2],
                            batch_size=FLAGS.batch_size,
                            learning_rate=0.001,
                            add_logp=FLAGS.add_logp,
                            clip_gradients=FLAGS.clip_gradient)

        logger.info("Succesfully created graph.")

        init = tf.global_variables_initializer()
        sess.run(init)
        logger.info('Run the Op to initialize the variables')
        ugrnn_model.train(sess, FLAGS.max_epochs, train_dataset,
                          validation_dataset, model_dir)
        print('Saving model...')
        ugrnn_model.save_model(sess, model_dir, FLAGS.max_epochs)

        # print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')

        #        hidden_train = ugrnn_model.Hidden(sess, train_dataset)
        #        hidden_validate = ugrnn_model.Hidden(sess, validation_dataset)
        hidden_test = pd.DataFrame(ugrnn_model.Hidden(sess, test_dataset))
        Raw_Test_filtered = pd.read_csv(
            "../../External Test Set/External_Test_Set_filtered.csv")
        hidden_test['Canonical SMILES'] = Raw_Test_filtered['Canonical SMILES']
        print('Hidden_test created!')
        #        pd.DataFrame(hidden_train).to_csv("./data/DILI/Final_data/Predictions/train_HidenRepresentation.csv")
        hidden_test.to_csv(
            "./data/DILI/Final_data/Predictions/UGRNN Encoddings.csv")