def run_fnn_training_multi(opts: opt.TrainOptions) -> None: logging.basicConfig(format="DFPL-%(levelname)s: %(message)s", level=logging.INFO) logging.info("Adding fingerprint to dataset") df = fp.importDataFile(opts.inputFile, import_function=fp.importSmilesCSV, fp_size=opts.fpSize) t = opts.ecWeightsFile opts.ecWeightsFile = opts.outputDir + t if opts.trainAC: logging.info("Training autoencoder") encoder = ac.train_full_ac(df, opts) # encoder.save_weights(opts.acFile) else: logging.info("Using trained autoencoder") (_, encoder) = ac.define_ac_model(input_size=opts.fpSize, encoding_dim=opts.encFPSize) df = ac.compress_fingerprints(df, encoder) # train FNNs with compressed features logging.info("Training the FNN using compressed input data.") fNN.train_nn_models_multi(df=df, opts=opts, use_compressed=True) # train FNNs with uncompressed features logging.info("Training the FNN using un-compressed input data.") fNN.train_nn_models_multi(df=df, opts=opts, use_compressed=False) logging.info("Done")
def test_predictions(): opts = test_predict_args logging.basicConfig(format="DFPL-%(levelname)s: %(message)s", level=logging.INFO) logging.info(f"Predicting compounds in the input file {opts.inputFile} for association with target {opts.target}") df = fp.importDataFile(opts.inputFile, import_function=fp.importSmilesCSV, fp_size=opts.fpSize) use_compressed = False if opts.acFile: use_compressed = True # load trained model for autoencoder (_, encoder) = ac.define_ac_model(input_size=opts.fpSize, encoding_dim=opts.encFPSize) encoder.load_weights(opts.acFile) # compress the fingerprints using the autoencoder df = ac.compress_fingerprints(df, encoder) # predict df2 = p.predict_values(df=df, opts=opts, use_compressed=use_compressed) names_columns = [c for c in df2.columns if c not in ['fp', 'fpcompressed']] output_file = path.join(opts.outputDir, path.basename(path.splitext(opts.inputFile)[0]) + ".predictions.csv") df2[names_columns].to_csv(path_or_buf=output_file) logging.info(f"Predictions done.\nResults written to '{output_file}'.")
def predict(opts: options.PredictOptions) -> None: """ Run prediction given specific options :param opts: Options defining the details of the prediction """ df = fp.importDataFile(opts.inputFile, import_function=fp.importSmilesCSV, fp_size=opts.fpSize) # Create output dir if it doesn't exist createDirectory(opts.outputDir) use_compressed = False if opts.ecWeightsFile: logging.info(f"Using fingerprint compression with AC {opts.ecWeightsFile}") use_compressed = True # load trained model for autoencoder (_, encoder) = ac.define_ac_model(input_size=opts.fpSize, encoding_dim=opts.encFPSize) encoder.load_weights(opts.ecWeightsFile) # compress the fingerprints using the autoencoder df = ac.compress_fingerprints(df, encoder) # predict df2 = predictions.predict_values(df=df, opts=opts, use_compressed=use_compressed) names_columns = [c for c in df2.columns if c not in ['fp', 'fpcompressed']] output_file = path.join(opts.outputDir, path.basename(path.splitext(opts.inputFile)[0]) + ".predictions.csv") df2[names_columns].to_csv(path_or_buf=output_file)
def train(opts: options.TrainOptions): """ Run the main training procedure :param opts: Options defining the details of the training """ df = fp.importDataFile(opts.inputFile, import_function=fp.importSmilesCSV, fp_size=opts.fpSize) # Create output dir if it doesn't exist createDirectory(opts.outputDir) if opts.compressFeatures: # compress features if opts.trainAC: # train an autoencoder on the full feature matrix encoder = ac.train_full_ac(df, opts) else: # load trained model for autoencoder (_, encoder) = ac.define_ac_model(input_size=opts.fpSize, encoding_dim=opts.encFPSize) encoder.load_weights(makePathAbsolute(opts.ecWeightsFile)) # compress the fingerprints using the autoencoder df = ac.compress_fingerprints(df, encoder) if opts.trainFNN: # train single label models fNN.train_nn_models(df=df, opts=opts) # train multi-label models if opts.enableMultiLabel: fNN.train_nn_models_multi(df=df, opts=opts)
legends=legend) img.save('cidVStoxid.structures.png') img.show() project_directory = "" opts = opt.PredictOptions( inputFile=f"", outputDir=f"/home/hertelj/tmp/", model= f"/home/hertelj/git-hertelj/deepFPlearn_CODE/validation/case_03/results/ER_compressed-True_sampled-None.best.FNN.model.hdf5", target="ER", fpSize=2048, type="smiles", fpType="topological") (_, encoder) = ac.define_ac_model(input_size=2048, encoding_dim=256) encoder.load_weights( "/home/hertelj/git-hertelj/deepFPlearn_CODE/modeltraining/Sun_etal_dataset.encoder.hdf5" ) data = ac.compress_fingerprints(dfS, encoder) s_compressed = data[data['cid'].isin(cid_of_interest)]['fpcompressed'] df2 = predictions.predict_values(df=data, opts=opts, use_compressed=True) s_predictions = df2[df2['cid'].isin(cid_of_interest)][['cid', 'trained']] data2 = ac.compress_fingerprints(dfD, encoder) d_compressed = data2[data2['toxid'].isin(toxid_of_interest)]['fpcompressed'] df3 = predictions.predict_values(df=data2, opts=opts, use_compressed=True) d_predictions = df3[df3['toxid'].isin(toxid_of_interest)][['toxid', 'trained']] fp_matrix_S = np.array(df['fpSbool'].to_list(), dtype=bool, copy=False)