Python importDataFileの例

プログラミング言語: Python

名前空間/パッケージ名: dfpl.fingerprint

メソッド/関数: importDataFile

hotexamples.comのコード掲載数: 7

Python importDataFile - 7件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのdfpl.fingerprint.importDataFileの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

def run_fnn_training_multi(opts: opt.TrainOptions) -> None:

    logging.basicConfig(format="DFPL-%(levelname)s: %(message)s",
                        level=logging.INFO)
    logging.info("Adding fingerprint to dataset")

    df = fp.importDataFile(opts.inputFile,
                           import_function=fp.importSmilesCSV,
                           fp_size=opts.fpSize)

    t = opts.ecWeightsFile
    opts.ecWeightsFile = opts.outputDir + t

    if opts.trainAC:
        logging.info("Training autoencoder")
        encoder = ac.train_full_ac(df, opts)
        # encoder.save_weights(opts.acFile)
    else:
        logging.info("Using trained autoencoder")
        (_, encoder) = ac.define_ac_model(input_size=opts.fpSize,
                                          encoding_dim=opts.encFPSize)

    df = ac.compress_fingerprints(df, encoder)

    # train FNNs with compressed features
    logging.info("Training the FNN using compressed input data.")
    fNN.train_nn_models_multi(df=df, opts=opts, use_compressed=True)

    # train FNNs with uncompressed features
    logging.info("Training the FNN using un-compressed input data.")
    fNN.train_nn_models_multi(df=df, opts=opts, use_compressed=False)

    logging.info("Done")

コード例 #2

ファイルを表示

def test_predictions():
    opts = test_predict_args

    logging.basicConfig(format="DFPL-%(levelname)s: %(message)s", level=logging.INFO)
    logging.info(f"Predicting compounds in the input file {opts.inputFile} for association with target {opts.target}")

    df = fp.importDataFile(opts.inputFile, import_function=fp.importSmilesCSV, fp_size=opts.fpSize)

    use_compressed = False
    if opts.acFile:
        use_compressed = True
        # load trained model for autoencoder
        (_, encoder) = ac.define_ac_model(input_size=opts.fpSize, encoding_dim=opts.encFPSize)
        encoder.load_weights(opts.acFile)
        # compress the fingerprints using the autoencoder
        df = ac.compress_fingerprints(df, encoder)

    # predict
    df2 = p.predict_values(df=df,
                           opts=opts,
                           use_compressed=use_compressed)

    names_columns = [c for c in df2.columns if c not in ['fp', 'fpcompressed']]

    output_file = path.join(opts.outputDir,
                            path.basename(path.splitext(opts.inputFile)[0]) + ".predictions.csv")
    df2[names_columns].to_csv(path_or_buf=output_file)

    logging.info(f"Predictions done.\nResults written to '{output_file}'.")

コード例 #3

ファイルを表示

def predict(opts: options.PredictOptions) -> None:
    """
    Run prediction given specific options
    :param opts: Options defining the details of the prediction
    """
    df = fp.importDataFile(opts.inputFile, import_function=fp.importSmilesCSV, fp_size=opts.fpSize)

    # Create output dir if it doesn't exist
    createDirectory(opts.outputDir)

    use_compressed = False
    if opts.ecWeightsFile:
        logging.info(f"Using fingerprint compression with AC {opts.ecWeightsFile}")
        use_compressed = True
        # load trained model for autoencoder
        (_, encoder) = ac.define_ac_model(input_size=opts.fpSize, encoding_dim=opts.encFPSize)
        encoder.load_weights(opts.ecWeightsFile)
        # compress the fingerprints using the autoencoder
        df = ac.compress_fingerprints(df, encoder)

    # predict
    df2 = predictions.predict_values(df=df,
                                     opts=opts,
                                     use_compressed=use_compressed)

    names_columns = [c for c in df2.columns if c not in ['fp', 'fpcompressed']]

    output_file = path.join(opts.outputDir,
                            path.basename(path.splitext(opts.inputFile)[0]) + ".predictions.csv")
    df2[names_columns].to_csv(path_or_buf=output_file)

コード例 #4

ファイルを表示

def train(opts: options.TrainOptions):
    """
    Run the main training procedure
    :param opts: Options defining the details of the training
    """
    df = fp.importDataFile(opts.inputFile, import_function=fp.importSmilesCSV, fp_size=opts.fpSize)

    # Create output dir if it doesn't exist
    createDirectory(opts.outputDir)

    if opts.compressFeatures:  # compress features

        if opts.trainAC:
            # train an autoencoder on the full feature matrix
            encoder = ac.train_full_ac(df, opts)
        else:
            # load trained model for autoencoder
            (_, encoder) = ac.define_ac_model(input_size=opts.fpSize, encoding_dim=opts.encFPSize)
            encoder.load_weights(makePathAbsolute(opts.ecWeightsFile))

        # compress the fingerprints using the autoencoder
        df = ac.compress_fingerprints(df, encoder)

    if opts.trainFNN:
        # train single label models
        fNN.train_nn_models(df=df, opts=opts)

    # train multi-label models
    if opts.enableMultiLabel:
        fNN.train_nn_models_multi(df=df, opts=opts)

コード例 #5

ファイルを表示

ファイル: run_autoencoder.py プロジェクト: bernt-matthias/deepFPlearn

def runAutoencoder(opts: opt.TrainOptions) -> None:
    """
    Run and test auto-encoder
    """
    logging.basicConfig(format="DFPL-%(levelname)s: %(message)s", level=logging.INFO)
    logging.info("Adding fingerprint to dataset")
    df = fp.importDataFile(opts.inputFile, import_function=fp.importSmilesCSV, fp_size=opts.fpSize)
    logging.info("Training autoencoder")
    ac.train_full_ac(df, opts)
    logging.info("Done")

コード例 #6

ファイルを表示

def test_prepare_nn_training_data():
    project_directory = pathlib.Path(__file__).parent.parent.absolute()
    df = fp.importDataFile(
        os.path.join(project_directory, "data", "Sun_etal_dataset.csv"))

    targets = ["AR", "ER", "GR", "Aromatase", "TR", "PPARg"]
    fractions = [0.5, 1.0, 2.0, 3.0]
    for f in fractions:
        o = opts.TrainOptions(compressFeatures=False, sampleFractionOnes=f)
        for t in targets:
            x, y = fNN.prepare_nn_training_data(df, t, o)
            unique, counts = np.unique(y, return_counts=True)
            assert abs(counts[1] / counts[0] - f) < 0.01
            print(
                f"Wanted \"{t}\" fraction: {f}, got sampling: {dict(zip(unique, counts))}, Result fraction: {counts[1]/counts[0]}"
            )

コード例 #7

ファイルを表示

ファイル: try_fpcomparison.py プロジェクト: yigbt/deepFPlearn

import dfpl.fingerprint as fp
from rdkit import Chem
from rdkit import DataStructs
from rdkit.Chem import Draw

import pandas as pd
import numpy as np

from dfpl import autoencoder as ac
from dfpl import feedforwardNN as fNN
from dfpl import predictions
from dfpl import options as opt

# read both datasets
dfS = fp.importDataFile("data/S_dataset_extended.pkl",
                        import_function=fp.importSmilesCSV,
                        fp_size=2048)
dfS.dropna(axis=0, subset=['cid'], inplace=True)
dfS['cid'] = dfS['cid'].apply(int).astype(str)
dfD = fp.importDataFile("data/dsstox_20160701.pkl",
                        import_function=fp.importSmilesCSV,
                        fp_size=2048)

# ids and structures of interest
cid_of_interest = ["87587", "77328", "2734118", "2736548", "154257"]
toxid_of_interest = [
    "DTXSID3027798", "DTXSID7041461", "DTXSID9048067", "DTXSID7049344",
    "DTXSID70173593"
]
df = pd.DataFrame(list(zip(cid_of_interest, toxid_of_interest)),
                  columns=["cid", "toxid"])