def test_predictions(): opts = test_predict_args logging.basicConfig(format="DFPL-%(levelname)s: %(message)s", level=logging.INFO) logging.info(f"Predicting compounds in the input file {opts.inputFile} for association with target {opts.target}") df = fp.importDataFile(opts.inputFile, import_function=fp.importSmilesCSV, fp_size=opts.fpSize) use_compressed = False if opts.acFile: use_compressed = True # load trained model for autoencoder (_, encoder) = ac.define_ac_model(input_size=opts.fpSize, encoding_dim=opts.encFPSize) encoder.load_weights(opts.acFile) # compress the fingerprints using the autoencoder df = ac.compress_fingerprints(df, encoder) # predict df2 = p.predict_values(df=df, opts=opts, use_compressed=use_compressed) names_columns = [c for c in df2.columns if c not in ['fp', 'fpcompressed']] output_file = path.join(opts.outputDir, path.basename(path.splitext(opts.inputFile)[0]) + ".predictions.csv") df2[names_columns].to_csv(path_or_buf=output_file) logging.info(f"Predictions done.\nResults written to '{output_file}'.")
def predict(opts: options.PredictOptions) -> None: """ Run prediction given specific options :param opts: Options defining the details of the prediction """ df = fp.importDataFile(opts.inputFile, import_function=fp.importSmilesCSV, fp_size=opts.fpSize) # Create output dir if it doesn't exist createDirectory(opts.outputDir) use_compressed = False if opts.ecWeightsFile: logging.info(f"Using fingerprint compression with AC {opts.ecWeightsFile}") use_compressed = True # load trained model for autoencoder (_, encoder) = ac.define_ac_model(input_size=opts.fpSize, encoding_dim=opts.encFPSize) encoder.load_weights(opts.ecWeightsFile) # compress the fingerprints using the autoencoder df = ac.compress_fingerprints(df, encoder) # predict df2 = predictions.predict_values(df=df, opts=opts, use_compressed=use_compressed) names_columns = [c for c in df2.columns if c not in ['fp', 'fpcompressed']] output_file = path.join(opts.outputDir, path.basename(path.splitext(opts.inputFile)[0]) + ".predictions.csv") df2[names_columns].to_csv(path_or_buf=output_file)
outputDir=f"/home/hertelj/tmp/", model= f"/home/hertelj/git-hertelj/deepFPlearn_CODE/validation/case_03/results/ER_compressed-True_sampled-None.best.FNN.model.hdf5", target="ER", fpSize=2048, type="smiles", fpType="topological") (_, encoder) = ac.define_ac_model(input_size=2048, encoding_dim=256) encoder.load_weights( "/home/hertelj/git-hertelj/deepFPlearn_CODE/modeltraining/Sun_etal_dataset.encoder.hdf5" ) data = ac.compress_fingerprints(dfS, encoder) s_compressed = data[data['cid'].isin(cid_of_interest)]['fpcompressed'] df2 = predictions.predict_values(df=data, opts=opts, use_compressed=True) s_predictions = df2[df2['cid'].isin(cid_of_interest)][['cid', 'trained']] data2 = ac.compress_fingerprints(dfD, encoder) d_compressed = data2[data2['toxid'].isin(toxid_of_interest)]['fpcompressed'] df3 = predictions.predict_values(df=data2, opts=opts, use_compressed=True) d_predictions = df3[df3['toxid'].isin(toxid_of_interest)][['toxid', 'trained']] fp_matrix_S = np.array(df['fpSbool'].to_list(), dtype=bool, copy=False) predictions_S = encoder.predict(fp_matrix_S) fp_matrix_D = np.array(df['fpDbool'].to_list(), dtype=bool, copy=False) predictions_D = encoder.predict(fp_matrix_D) df['fpcompressedS'] = [s for s in predictions_S] df['fpcompressedD'] = [s for s in predictions_D] # compressed fp equal?