Python importDataFile Examples, dfpl.fingerprint.importDataFile Python Examples

Example #1

0

Show file

def run_fnn_training_multi(opts: opt.TrainOptions) -> None:

    logging.basicConfig(format="DFPL-%(levelname)s: %(message)s",
                        level=logging.INFO)
    logging.info("Adding fingerprint to dataset")

    df = fp.importDataFile(opts.inputFile,
                           import_function=fp.importSmilesCSV,
                           fp_size=opts.fpSize)

    t = opts.ecWeightsFile
    opts.ecWeightsFile = opts.outputDir + t

    if opts.trainAC:
        logging.info("Training autoencoder")
        encoder = ac.train_full_ac(df, opts)
        # encoder.save_weights(opts.acFile)
    else:
        logging.info("Using trained autoencoder")
        (_, encoder) = ac.define_ac_model(input_size=opts.fpSize,
                                          encoding_dim=opts.encFPSize)

    df = ac.compress_fingerprints(df, encoder)

    # train FNNs with compressed features
    logging.info("Training the FNN using compressed input data.")
    fNN.train_nn_models_multi(df=df, opts=opts, use_compressed=True)

    # train FNNs with uncompressed features
    logging.info("Training the FNN using un-compressed input data.")
    fNN.train_nn_models_multi(df=df, opts=opts, use_compressed=False)

    logging.info("Done")

Example #2

0

Show file

def test_predictions():
    opts = test_predict_args

    logging.basicConfig(format="DFPL-%(levelname)s: %(message)s", level=logging.INFO)
    logging.info(f"Predicting compounds in the input file {opts.inputFile} for association with target {opts.target}")

    df = fp.importDataFile(opts.inputFile, import_function=fp.importSmilesCSV, fp_size=opts.fpSize)

    use_compressed = False
    if opts.acFile:
        use_compressed = True
        # load trained model for autoencoder
        (_, encoder) = ac.define_ac_model(input_size=opts.fpSize, encoding_dim=opts.encFPSize)
        encoder.load_weights(opts.acFile)
        # compress the fingerprints using the autoencoder
        df = ac.compress_fingerprints(df, encoder)

    # predict
    df2 = p.predict_values(df=df,
                           opts=opts,
                           use_compressed=use_compressed)

    names_columns = [c for c in df2.columns if c not in ['fp', 'fpcompressed']]

    output_file = path.join(opts.outputDir,
                            path.basename(path.splitext(opts.inputFile)[0]) + ".predictions.csv")
    df2[names_columns].to_csv(path_or_buf=output_file)

    logging.info(f"Predictions done.\nResults written to '{output_file}'.")

Example #3

0

Show file

def predict(opts: options.PredictOptions) -> None:
    """
    Run prediction given specific options
    :param opts: Options defining the details of the prediction
    """
    df = fp.importDataFile(opts.inputFile, import_function=fp.importSmilesCSV, fp_size=opts.fpSize)

    # Create output dir if it doesn't exist
    createDirectory(opts.outputDir)

    use_compressed = False
    if opts.ecWeightsFile:
        logging.info(f"Using fingerprint compression with AC {opts.ecWeightsFile}")
        use_compressed = True
        # load trained model for autoencoder
        (_, encoder) = ac.define_ac_model(input_size=opts.fpSize, encoding_dim=opts.encFPSize)
        encoder.load_weights(opts.ecWeightsFile)
        # compress the fingerprints using the autoencoder
        df = ac.compress_fingerprints(df, encoder)

    # predict
    df2 = predictions.predict_values(df=df,
                                     opts=opts,
                                     use_compressed=use_compressed)

    names_columns = [c for c in df2.columns if c not in ['fp', 'fpcompressed']]

    output_file = path.join(opts.outputDir,
                            path.basename(path.splitext(opts.inputFile)[0]) + ".predictions.csv")
    df2[names_columns].to_csv(path_or_buf=output_file)

Example #4

0

Show file

def train(opts: options.TrainOptions):
    """
    Run the main training procedure
    :param opts: Options defining the details of the training
    """
    df = fp.importDataFile(opts.inputFile, import_function=fp.importSmilesCSV, fp_size=opts.fpSize)

    # Create output dir if it doesn't exist
    createDirectory(opts.outputDir)

    if opts.compressFeatures:  # compress features

        if opts.trainAC:
            # train an autoencoder on the full feature matrix
            encoder = ac.train_full_ac(df, opts)
        else:
            # load trained model for autoencoder
            (_, encoder) = ac.define_ac_model(input_size=opts.fpSize, encoding_dim=opts.encFPSize)
            encoder.load_weights(makePathAbsolute(opts.ecWeightsFile))

        # compress the fingerprints using the autoencoder
        df = ac.compress_fingerprints(df, encoder)

    if opts.trainFNN:
        # train single label models
        fNN.train_nn_models(df=df, opts=opts)

    # train multi-label models
    if opts.enableMultiLabel:
        fNN.train_nn_models_multi(df=df, opts=opts)

Example #5

0

Show file

File: run_autoencoder.py Project: bernt-matthias/deepFPlearn

def runAutoencoder(opts: opt.TrainOptions) -> None:
    """
    Run and test auto-encoder
    """
    logging.basicConfig(format="DFPL-%(levelname)s: %(message)s", level=logging.INFO)
    logging.info("Adding fingerprint to dataset")
    df = fp.importDataFile(opts.inputFile, import_function=fp.importSmilesCSV, fp_size=opts.fpSize)
    logging.info("Training autoencoder")
    ac.train_full_ac(df, opts)
    logging.info("Done")

Example #6

0

Show file

def test_prepare_nn_training_data():
    project_directory = pathlib.Path(__file__).parent.parent.absolute()
    df = fp.importDataFile(
        os.path.join(project_directory, "data", "Sun_etal_dataset.csv"))

    targets = ["AR", "ER", "GR", "Aromatase", "TR", "PPARg"]
    fractions = [0.5, 1.0, 2.0, 3.0]
    for f in fractions:
        o = opts.TrainOptions(compressFeatures=False, sampleFractionOnes=f)
        for t in targets:
            x, y = fNN.prepare_nn_training_data(df, t, o)
            unique, counts = np.unique(y, return_counts=True)
            assert abs(counts[1] / counts[0] - f) < 0.01
            print(
                f"Wanted \"{t}\" fraction: {f}, got sampling: {dict(zip(unique, counts))}, Result fraction: {counts[1]/counts[0]}"
            )

Example #7

0

Show file

File: try_fpcomparison.py Project: yigbt/deepFPlearn

import dfpl.fingerprint as fp
from rdkit import Chem
from rdkit import DataStructs
from rdkit.Chem import Draw

import pandas as pd
import numpy as np

from dfpl import autoencoder as ac
from dfpl import feedforwardNN as fNN
from dfpl import predictions
from dfpl import options as opt

# read both datasets
dfS = fp.importDataFile("data/S_dataset_extended.pkl",
                        import_function=fp.importSmilesCSV,
                        fp_size=2048)
dfS.dropna(axis=0, subset=['cid'], inplace=True)
dfS['cid'] = dfS['cid'].apply(int).astype(str)
dfD = fp.importDataFile("data/dsstox_20160701.pkl",
                        import_function=fp.importSmilesCSV,
                        fp_size=2048)

# ids and structures of interest
cid_of_interest = ["87587", "77328", "2734118", "2736548", "154257"]
toxid_of_interest = [
    "DTXSID3027798", "DTXSID7041461", "DTXSID9048067", "DTXSID7049344",
    "DTXSID70173593"
]
df = pd.DataFrame(list(zip(cid_of_interest, toxid_of_interest)),
                  columns=["cid", "toxid"])