# optional arguments do_append = False if options.do_append: do_append = options.do_append simil_metric = 'Dice' if options.simil: simil_metric = options.simil outpath = path outpath_set = False if options.outpath: outpath_set = True outpath = path+options.outpath # check for sensible input fp_names = scor.checkFPFile(fp_file) if not fp_names: raise ValueError('No fingerprints given in', fp_file) if outpath_set: scor.checkPath(outpath, 'output') scor.checkSimil(simil_metric) scor.checkQueryMols(num_query_mols, conf.list_num_query_mols) # loop over data-set sources for dataset in conf.set_data.keys(): print dataset # loop over targets for target in conf.set_data[dataset]['ids']: print target # read in actives and calculate fps actives = [] for line in gzip.open(inpath_cmp+dataset+'/cmp_list_'+dataset+'_'+str(target)+'_actives.dat.gz', 'r'): if line[0] != '#': # structure of line: [external ID, internal ID, SMILES]]
do_append = options.do_append simil_metric = "Dice" if options.simil: simil_metric = options.simil outpath = path outpath_set = False if options.outpath: outpath_set = True # outpath = path + options.outpath # changed to absolute , by @Matt outpath = options.outpath os.system("mkdir -p {}".format(outpath)) # check for sensible input if outpath_set: scor.checkPath(outpath, "output") scor.checkSimil(simil_metric) scor.checkQueryMols(num_query_mols, conf.list_num_query_mols) # default machine-learning method variables ml_dict = dict( criterion="gini", max_features="auto", n_jobs=1, max_depth=10, min_samples_split=2, min_samples_leaf=1, num_estimators=100, ) if options.ml: ml_dict = ml_func.readMLFile(ml_dict, read_dict, path + options.ml)
# optional arguments do_append = False if options.do_append: do_append = options.do_append simil_metric = 'Dice' if options.simil: simil_metric = options.simil outpath = path outpath_set = False if options.outpath: outpath_set = True outpath = path + options.outpath # check for sensible input fp_names = scor.checkFPFile(fp_file) if not fp_names: raise ValueError('No fingerprints given in', fp_file) if outpath_set: scor.checkPath(outpath, 'output') scor.checkSimil(simil_metric) scor.checkQueryMols(num_query_mols, conf.list_num_query_mols) # loop over data-set sources for dataset in conf.set_data.keys(): print dataset # loop over targets for target in conf.set_data[dataset]['ids']: print target # read in actives and calculate fps actives = [] for line in gzip.open( inpath_cmp + dataset + '/cmp_list_' + dataset + '_' + str(target) + '_actives.dat.gz', 'r'):
dest="do_append", action="store_true", help="append to the output file (default: False)", ) ######################## MAIN PART ########################### if __name__ == "__main__": # read in command line options (options, args) = parser.parse_args() # required arguments if options.inpath: inpath = [path + i for i in options.inpath] for inp in inpath: scor.checkPath(inp, "input") else: raise RuntimeError( "one or more of the required options was not given!" ) # optional arguments method = "max" if options.method: if options.method not in ["max", "ave"]: raise ValueError( "method is unkown. supported methods are: max and ave" ) else: method = options.method remove_fps = []
parser.add_option("-a", "--append", dest="do_append", action="store_true", help="append to the output file (default: False)") ######################## MAIN PART ########################### if __name__ == '__main__': # read in command line options (options, args) = parser.parse_args() # required arguments if options.inpath: inpath = [path + i for i in options.inpath] for inp in inpath: scor.checkPath(inp, 'input') else: raise RuntimeError( 'one or more of the required options was not given!') # optional arguments method = 'max' if options.method: if options.method not in ['max', 'ave']: raise ValueError( 'method is unkown. supported methods are: max and ave') else: method = options.method remove_fps = [] if options.rm_file: remove_fps = scor.readFPs(path + options.rm_file)
do_append = options.do_append simil_metric = "Dice" if options.simil: simil_metric = options.simil outpath = path outpath_set = False if options.outpath: outpath_set = True outpath = path + options.outpath # check for sensible input fp_names = scor.checkFPFile(fp_file) if not fp_names: raise ValueError("No fingerprints given in", fp_file) if outpath_set: scor.checkPath(outpath, "output") scor.checkSimil(simil_metric) # loop over targets for target in conf.set_data: print target # read in training actives and calculate fps actives = cPickle.load(open(inpath_cmp + "ChEMBL_II/Target_no_" + str(target) + ".pkl", "r")) for k in actives.keys(): for i, m in enumerate(actives[k]): fp_dict = scor.getFPDict(fp_names, m[1]) actives[k][i] = [str(target) + "_" + str(k) + "_A_" + str(i + 1), fp_dict] # read in test actives and calculate fps div_actives = []
method = options.method MODE = "SIM" else: raise RuntimeError("the method option was not recognized") else: method = "Tanimoto" MODE = "SIM" DEFAULT_OUTDIR = os.path.join(os.getcwd(), "benchmark_results/") if options.outdir: outdir = options.outdir else: outdir = DEFAULT_OUTDIR if not os.path.exists(outdir): os.system("mkdir {path}".format(path=outdir)) scor.checkPath(outdir, "outdir") ### run the required benchmark scripts ### # first score os.chdir(scoring_path) print("cwd: ", os.getcwd()) if MODE == "ML": # need to change dir os.system( "python calculate_scored_lists_{ml_method}.py -n {ml_num} -f {ml_fp} -s {ml_sim} -o {ml_outpath}".format( ml_method=method, ml_num=num_query_mols, ml_fp=fp, ml_sim="Tanimoto", ml_outpath=os.path.join(
parser.add_option("-m", "--method", dest="method", help="method for data fusion (max or ave, default: max)") parser.add_option("-r", "--remove", dest="rm_file", metavar="FILE", help="FILE containing the fingerprints to be left out (default: all fingerprints are read)") parser.add_option("-o", "--outpath", dest="outpath", metavar="PATH", help="relative output PATH (default: pwd)") parser.add_option("-a", "--append", dest="do_append", action="store_true", help="append to the output file (default: False)") ######################## MAIN PART ########################### if __name__=='__main__': # read in command line options (options, args) = parser.parse_args() # required arguments if options.inpath: inpath = [path+i for i in options.inpath] for inp in inpath: scor.checkPath(inp, 'input') else: raise RuntimeError('one or more of the required options was not given!') # optional arguments method = 'max' if options.method: if options.method not in ['max', 'ave']: raise ValueError('method is unkown. supported methods are: max and ave') else: method = options.method remove_fps = [] if options.rm_file: remove_fps = scor.readFPs(path+options.rm_file) outpath = path if options.outpath: