Esempio n. 1
0
def run_fnn_training_multi(opts: opt.TrainOptions) -> None:

    logging.basicConfig(format="DFPL-%(levelname)s: %(message)s",
                        level=logging.INFO)
    logging.info("Adding fingerprint to dataset")

    df = fp.importDataFile(opts.inputFile,
                           import_function=fp.importSmilesCSV,
                           fp_size=opts.fpSize)

    t = opts.ecWeightsFile
    opts.ecWeightsFile = opts.outputDir + t

    if opts.trainAC:
        logging.info("Training autoencoder")
        encoder = ac.train_full_ac(df, opts)
        # encoder.save_weights(opts.acFile)
    else:
        logging.info("Using trained autoencoder")
        (_, encoder) = ac.define_ac_model(input_size=opts.fpSize,
                                          encoding_dim=opts.encFPSize)

    df = ac.compress_fingerprints(df, encoder)

    # train FNNs with compressed features
    logging.info("Training the FNN using compressed input data.")
    fNN.train_nn_models_multi(df=df, opts=opts, use_compressed=True)

    # train FNNs with uncompressed features
    logging.info("Training the FNN using un-compressed input data.")
    fNN.train_nn_models_multi(df=df, opts=opts, use_compressed=False)

    logging.info("Done")
Esempio n. 2
0
def test_predictions():
    opts = test_predict_args

    logging.basicConfig(format="DFPL-%(levelname)s: %(message)s", level=logging.INFO)
    logging.info(f"Predicting compounds in the input file {opts.inputFile} for association with target {opts.target}")

    df = fp.importDataFile(opts.inputFile, import_function=fp.importSmilesCSV, fp_size=opts.fpSize)

    use_compressed = False
    if opts.acFile:
        use_compressed = True
        # load trained model for autoencoder
        (_, encoder) = ac.define_ac_model(input_size=opts.fpSize, encoding_dim=opts.encFPSize)
        encoder.load_weights(opts.acFile)
        # compress the fingerprints using the autoencoder
        df = ac.compress_fingerprints(df, encoder)

    # predict
    df2 = p.predict_values(df=df,
                           opts=opts,
                           use_compressed=use_compressed)

    names_columns = [c for c in df2.columns if c not in ['fp', 'fpcompressed']]

    output_file = path.join(opts.outputDir,
                            path.basename(path.splitext(opts.inputFile)[0]) + ".predictions.csv")
    df2[names_columns].to_csv(path_or_buf=output_file)

    logging.info(f"Predictions done.\nResults written to '{output_file}'.")
Esempio n. 3
0
def predict(opts: options.PredictOptions) -> None:
    """
    Run prediction given specific options
    :param opts: Options defining the details of the prediction
    """
    df = fp.importDataFile(opts.inputFile, import_function=fp.importSmilesCSV, fp_size=opts.fpSize)

    # Create output dir if it doesn't exist
    createDirectory(opts.outputDir)

    use_compressed = False
    if opts.ecWeightsFile:
        logging.info(f"Using fingerprint compression with AC {opts.ecWeightsFile}")
        use_compressed = True
        # load trained model for autoencoder
        (_, encoder) = ac.define_ac_model(input_size=opts.fpSize, encoding_dim=opts.encFPSize)
        encoder.load_weights(opts.ecWeightsFile)
        # compress the fingerprints using the autoencoder
        df = ac.compress_fingerprints(df, encoder)

    # predict
    df2 = predictions.predict_values(df=df,
                                     opts=opts,
                                     use_compressed=use_compressed)

    names_columns = [c for c in df2.columns if c not in ['fp', 'fpcompressed']]

    output_file = path.join(opts.outputDir,
                            path.basename(path.splitext(opts.inputFile)[0]) + ".predictions.csv")
    df2[names_columns].to_csv(path_or_buf=output_file)
Esempio n. 4
0
def train(opts: options.TrainOptions):
    """
    Run the main training procedure
    :param opts: Options defining the details of the training
    """
    df = fp.importDataFile(opts.inputFile, import_function=fp.importSmilesCSV, fp_size=opts.fpSize)

    # Create output dir if it doesn't exist
    createDirectory(opts.outputDir)

    if opts.compressFeatures:  # compress features

        if opts.trainAC:
            # train an autoencoder on the full feature matrix
            encoder = ac.train_full_ac(df, opts)
        else:
            # load trained model for autoencoder
            (_, encoder) = ac.define_ac_model(input_size=opts.fpSize, encoding_dim=opts.encFPSize)
            encoder.load_weights(makePathAbsolute(opts.ecWeightsFile))

        # compress the fingerprints using the autoencoder
        df = ac.compress_fingerprints(df, encoder)

    if opts.trainFNN:
        # train single label models
        fNN.train_nn_models(df=df, opts=opts)

    # train multi-label models
    if opts.enableMultiLabel:
        fNN.train_nn_models_multi(df=df, opts=opts)
Esempio n. 5
0
project_directory = ""
opts = opt.PredictOptions(
    inputFile=f"",
    outputDir=f"/home/hertelj/tmp/",
    model=
    f"/home/hertelj/git-hertelj/deepFPlearn_CODE/validation/case_03/results/ER_compressed-True_sampled-None.best.FNN.model.hdf5",
    target="ER",
    fpSize=2048,
    type="smiles",
    fpType="topological")

(_, encoder) = ac.define_ac_model(input_size=2048, encoding_dim=256)
encoder.load_weights(
    "/home/hertelj/git-hertelj/deepFPlearn_CODE/modeltraining/Sun_etal_dataset.encoder.hdf5"
)
data = ac.compress_fingerprints(dfS, encoder)
s_compressed = data[data['cid'].isin(cid_of_interest)]['fpcompressed']

df2 = predictions.predict_values(df=data, opts=opts, use_compressed=True)
s_predictions = df2[df2['cid'].isin(cid_of_interest)][['cid', 'trained']]

data2 = ac.compress_fingerprints(dfD, encoder)
d_compressed = data2[data2['toxid'].isin(toxid_of_interest)]['fpcompressed']
df3 = predictions.predict_values(df=data2, opts=opts, use_compressed=True)
d_predictions = df3[df3['toxid'].isin(toxid_of_interest)][['toxid', 'trained']]

fp_matrix_S = np.array(df['fpSbool'].to_list(), dtype=bool, copy=False)
predictions_S = encoder.predict(fp_matrix_S)
fp_matrix_D = np.array(df['fpDbool'].to_list(), dtype=bool, copy=False)
predictions_D = encoder.predict(fp_matrix_D)
df['fpcompressedS'] = [s for s in predictions_S]