import pathlib import logging from os import path import dfpl.options as opt import dfpl.fingerprint as fp import dfpl.autoencoder as ac import dfpl.predictions as p project_directory = pathlib.Path(__file__).parent.absolute() test_predict_args = opt.PredictOptions( inputFile=f"{project_directory}/data/Sun_etal_dataset.cids.predictionSet.csv", outputDir=f"{project_directory}/validation/case_01/results/", acFile=f"{project_directory}/validation/case_01/results/Sun_etal_dataset.AC.encoder.weights.hdf5", model=f"{project_directory}/validation/case_01/results/AR_compressed-True.full.FNN-.model.hdf5", target="AR", fpSize=2048, type="smiles", fpType="topological" ) def test_predictions(): opts = test_predict_args logging.basicConfig(format="DFPL-%(levelname)s: %(message)s", level=logging.INFO) logging.info(f"Predicting compounds in the input file {opts.inputFile} for association with target {opts.target}") df = fp.importDataFile(opts.inputFile, import_function=fp.importSmilesCSV, fp_size=opts.fpSize) use_compressed = False
encFPSize=256, enableMultiLabel=False, testingFraction=0.2, kFolds=5, verbose=2, trainAC=True, trainFNN=True, compressFeatures=True) logging.basicConfig(level=logging.INFO) test_predict_args = options.PredictOptions( inputFile= f"{project_directory}/data/Sun_etal_dataset.cids.predictionSet.csv", outputDir=f"{project_directory}/validation/case_01/results/", ecWeightsFile= f"/home/hertelj/git-hertelj/deepFPlearn_CODE/validation/case_00/results_AC_S/ac_S.encoder.hdf5", model= f"{project_directory}/validation/case_01/results/AR_compressed-True.full.FNN-.model.hdf5", target="AR", fpSize=2048, type="smiles", fpType="topological") def train(opts: options.TrainOptions): """ Run the main training procedure :param opts: Options defining the details of the training """ df = fp.importDataFile(opts.inputFile, import_function=fp.importSmilesCSV,
zip(df['cid'], df['tanimoto'], df['allBoolEqual'])] + \ [t for t in df['toxid']] img = Draw.MolsToGridImage(df['molS'].to_list() + df['molD'].to_list(), molsPerRow=df.shape[0], subImgSize=(200, 200), legends=legend) img.save('cidVStoxid.structures.png') img.show() project_directory = "" opts = opt.PredictOptions( inputFile=f"", outputDir=f"/home/hertelj/tmp/", model= f"/home/hertelj/git-hertelj/deepFPlearn_CODE/validation/case_03/results/ER_compressed-True_sampled-None.best.FNN.model.hdf5", target="ER", fpSize=2048, type="smiles", fpType="topological") (_, encoder) = ac.define_ac_model(input_size=2048, encoding_dim=256) encoder.load_weights( "/home/hertelj/git-hertelj/deepFPlearn_CODE/modeltraining/Sun_etal_dataset.encoder.hdf5" ) data = ac.compress_fingerprints(dfS, encoder) s_compressed = data[data['cid'].isin(cid_of_interest)]['fpcompressed'] df2 = predictions.predict_values(df=data, opts=opts, use_compressed=True) s_predictions = df2[df2['cid'].isin(cid_of_interest)][['cid', 'trained']]