simil_metric = 'Dice' if options.simil: simil_metric = options.simil outpath = path outpath_set = False if options.outpath: outpath_set = True outpath = path+options.outpath # check for sensible input if outpath_set: scor.checkPath(outpath, 'output') scor.checkSimil(simil_metric) # default machine-learning method variables ml_dict = dict(alpha=1.0, binarize=None, fit_prior=True) if options.ml: ml_dict = ml_func.readMLFile(ml_dict, read_dict, path+options.ml) # initialize machine-learning method ml = BernoulliNB(alpha=ml_dict['alpha'], binarize=ml_dict['binarize'], fit_prior=ml_dict['fit_prior']) # loop over targets for target in conf.set_data: print target # read in training actives and calculate fps actives = cPickle.load(open(inpath_cmp+'ChEMBL_II/Target_no_'+str(target)+'.pkl', 'r')) for k in actives.keys(): for i,m in enumerate(actives[k]): fp_dict = scor.getFP(fp_build, m[1]) actives[k][i] = [str(target)+'_'+str(k)+'_A_'+str(i+1), fp_dict]
# check for sensible input if outpath_set: scor.checkPath(outpath, 'output') scor.checkSimil(simil_metric) scor.checkQueryMols(num_query_mols, conf.list_num_query_mols) # default machine-learning method variables ml_dict = dict(criterion='gini', max_features='auto', n_jobs=1, max_depth=10, min_samples_split=2, min_samples_leaf=1, num_estimators=100) if options.ml: ml_dict = ml_func.readMLFile(ml_dict, read_dict, path + options.ml) # initialize machine-learning method ml = RandomForestClassifier(criterion=ml_dict['criterion'], max_features=ml_dict['max_features'], min_samples_split=ml_dict['min_samples_split'], max_depth=ml_dict['max_depth'], min_samples_leaf=ml_dict['min_samples_leaf'], n_estimators=ml_dict['num_estimators'], n_jobs=ml_dict['n_jobs']) # loop over data-set sources for dataset in conf.set_data.keys(): print dataset # loop over targets for target in conf.set_data[dataset]['ids']: