Beispiel #1
0
def test_predictions():
    opts = test_predict_args

    logging.basicConfig(format="DFPL-%(levelname)s: %(message)s", level=logging.INFO)
    logging.info(f"Predicting compounds in the input file {opts.inputFile} for association with target {opts.target}")

    df = fp.importDataFile(opts.inputFile, import_function=fp.importSmilesCSV, fp_size=opts.fpSize)

    use_compressed = False
    if opts.acFile:
        use_compressed = True
        # load trained model for autoencoder
        (_, encoder) = ac.define_ac_model(input_size=opts.fpSize, encoding_dim=opts.encFPSize)
        encoder.load_weights(opts.acFile)
        # compress the fingerprints using the autoencoder
        df = ac.compress_fingerprints(df, encoder)

    # predict
    df2 = p.predict_values(df=df,
                           opts=opts,
                           use_compressed=use_compressed)

    names_columns = [c for c in df2.columns if c not in ['fp', 'fpcompressed']]

    output_file = path.join(opts.outputDir,
                            path.basename(path.splitext(opts.inputFile)[0]) + ".predictions.csv")
    df2[names_columns].to_csv(path_or_buf=output_file)

    logging.info(f"Predictions done.\nResults written to '{output_file}'.")
Beispiel #2
0
def predict(opts: options.PredictOptions) -> None:
    """
    Run prediction given specific options
    :param opts: Options defining the details of the prediction
    """
    df = fp.importDataFile(opts.inputFile, import_function=fp.importSmilesCSV, fp_size=opts.fpSize)

    # Create output dir if it doesn't exist
    createDirectory(opts.outputDir)

    use_compressed = False
    if opts.ecWeightsFile:
        logging.info(f"Using fingerprint compression with AC {opts.ecWeightsFile}")
        use_compressed = True
        # load trained model for autoencoder
        (_, encoder) = ac.define_ac_model(input_size=opts.fpSize, encoding_dim=opts.encFPSize)
        encoder.load_weights(opts.ecWeightsFile)
        # compress the fingerprints using the autoencoder
        df = ac.compress_fingerprints(df, encoder)

    # predict
    df2 = predictions.predict_values(df=df,
                                     opts=opts,
                                     use_compressed=use_compressed)

    names_columns = [c for c in df2.columns if c not in ['fp', 'fpcompressed']]

    output_file = path.join(opts.outputDir,
                            path.basename(path.splitext(opts.inputFile)[0]) + ".predictions.csv")
    df2[names_columns].to_csv(path_or_buf=output_file)
Beispiel #3
0
    outputDir=f"/home/hertelj/tmp/",
    model=
    f"/home/hertelj/git-hertelj/deepFPlearn_CODE/validation/case_03/results/ER_compressed-True_sampled-None.best.FNN.model.hdf5",
    target="ER",
    fpSize=2048,
    type="smiles",
    fpType="topological")

(_, encoder) = ac.define_ac_model(input_size=2048, encoding_dim=256)
encoder.load_weights(
    "/home/hertelj/git-hertelj/deepFPlearn_CODE/modeltraining/Sun_etal_dataset.encoder.hdf5"
)
data = ac.compress_fingerprints(dfS, encoder)
s_compressed = data[data['cid'].isin(cid_of_interest)]['fpcompressed']

df2 = predictions.predict_values(df=data, opts=opts, use_compressed=True)
s_predictions = df2[df2['cid'].isin(cid_of_interest)][['cid', 'trained']]

data2 = ac.compress_fingerprints(dfD, encoder)
d_compressed = data2[data2['toxid'].isin(toxid_of_interest)]['fpcompressed']
df3 = predictions.predict_values(df=data2, opts=opts, use_compressed=True)
d_predictions = df3[df3['toxid'].isin(toxid_of_interest)][['toxid', 'trained']]

fp_matrix_S = np.array(df['fpSbool'].to_list(), dtype=bool, copy=False)
predictions_S = encoder.predict(fp_matrix_S)
fp_matrix_D = np.array(df['fpDbool'].to_list(), dtype=bool, copy=False)
predictions_D = encoder.predict(fp_matrix_D)
df['fpcompressedS'] = [s for s in predictions_S]
df['fpcompressedD'] = [s for s in predictions_D]

# compressed fp equal?