def approximator(molecules, options, sort_order=None, frameworks=[], ensemble=[]): """ recursively rank queries :param molecules: :param options: :param sort_order: :param frameworks: :param ensemble: :return: """ # set variables ensemble_size = options.ensemble_size if not sort_order: sort_order = classification.get_sort_order(molecules) # construct ensemble print("Performing calculations for ensemble size {s}".format( s=(len(ensemble) + 1))) ensemble = rank_queries(molecules, ensemble, sort_order, options) # write stats & ensemble output.write_ensemble(list(ensemble), options) if len(ensemble) == ensemble_size: return 1 else: return approximator(molecules, options, sort_order, frameworks, ensemble)
def approximator(molecules, options, sort_order=None, frameworks=[], ensemble=[]): """ recursively rank queries :param molecules: :param options: :param sort_order: :param frameworks: :param ensemble: :return: """ # set variables ensemble_size = options.ensemble_size if not sort_order: sort_order = classification.get_sort_order(molecules) # construct ensemble print("Performing calculations for ensemble size {s}".format(s=(len(ensemble) + 1))) ensemble = rank_queries(molecules, ensemble, sort_order, options) # write stats & ensemble output.write_ensemble(list(ensemble), options) if len(ensemble) == ensemble_size: return 1 else: return approximator(molecules, options, sort_order, frameworks, ensemble)
def optimizor(molecules, sort_order, ensemble_size, options): """ Evaluate the performance of all ensembles of fixed size. """ # set variables ncpu = options.ncpu score_field = options.score_field # generate an exhaustive list of all possible ensembles ensemble_list = make_ensemble_list(molecules, score_field, ensemble_size) # set number of processors. if not ncpu: ncpu = multiprocessing.cpu_count() if ncpu > 1: print("Determining the performance of {d} ensembles using {n} processors".format(d=len(ensemble_list), n=ncpu)) if ncpu > len(ensemble_list): ncpu = len(ensemble_list) jobs = [] output_queue = multiprocessing.Queue() for ensemble_chunk in chunker(ensemble_list, ncpu): p = multiprocessing.Process(target=evaluate, args=(molecules, ensemble_chunk, sort_order, options, output_queue)) jobs.append(p) p.start() # collect results into a dictionary results = {} for i in range(len(jobs)): results.update(output_queue.get()) # stop jobs for j in jobs: j.join() else: print("Determining the performance of {d} ensembles using {n} processor".format(d=len(ensemble_list), n=ncpu)) results = evaluate(molecules, ensemble_list, sort_order, options) # peel away the best performing ensemble ensemble = screener.find_best_ensemble(results, options) # write out the best performing ensemble output.write_ensemble(list(ensemble), options) # temp 2/9/15 generate and return a list of auc values and ef at fpf = 0.001 to build up a histogram nd = max([results[x].ef.keys() for x in results.keys()][0]) n = int(round(0.001 * nd)) ef_list = [results[x].get_prop(n, 'ef') for x in results.keys()] auc_list = [results[x].get_prop('auc') for x in results.keys()] # auc_list = [[results[x].get_prop('auc'), results[x].get_prop('ensemble')] for x in results.keys()] return auc_list, ef_list
def optimizor(molecules, sort_order, ensemble_size, options): """ Evaluate the performance of all ensembles of fixed size. """ # set variables ncpu = options.ncpu score_field = options.score_field # generate an exhaustive list of all possible ensembles ensemble_list = make_ensemble_list(molecules, score_field, ensemble_size) # set number of processors. if not ncpu: ncpu = multiprocessing.cpu_count() if ncpu > 1: print(("Determining the performance of {d} ensembles using {n} processors".format(d=len(ensemble_list), n=ncpu))) if ncpu > len(ensemble_list): ncpu = len(ensemble_list) jobs = [] output_queue = multiprocessing.Queue() for ensemble_chunk in chunker(ensemble_list, ncpu): p = multiprocessing.Process(target=evaluate, args=(molecules, ensemble_chunk, sort_order, options, output_queue)) jobs.append(p) p.start() # collect results into a dictionary results = {} for i in range(len(jobs)): results.update(output_queue.get()) # stop jobs for j in jobs: j.join() else: print(("Determining the performance of {d} ensembles using {n} processor".format(d=len(ensemble_list), n=ncpu))) results = evaluate(molecules, ensemble_list, sort_order, options) # peel away the best performing ensemble ensemble = screener.find_best_ensemble(results, options) # write out the best performing ensemble output.write_ensemble(list(ensemble), options) # temp 2/9/15 generate and return a list of auc values and ef at fpf = 0.001 to build up a histogram nd = max([list(results[x].ef.keys()) for x in list(results.keys())][0]) n = int(round(0.001 * nd)) ef_list = [results[x].get_prop(n, 'ef') for x in list(results.keys())] auc_list = [results[x].get_prop('auc') for x in list(results.keys())] # auc_list = [[results[x].get_prop('auc'), results[x].get_prop('ensemble')] for x in results.keys()] return auc_list, ef_list
def approximator(molecules, options, sort_order=None): ensemble_size = options.ensemble_size if not sort_order: sort_order = classification.get_sort_order(molecules) print("Performing calculations") results = rank_queries(molecules, sort_order, options) ensemble = [] while results: ensemble = construct_ensemble(results, ensemble, options) output.write_ensemble(ensemble, options) if len(ensemble) == ensemble_size: return 0