def approximator(molecules, options, sort_order=None, frameworks=[], ensemble=[]): """ recursively rank queries :param molecules: :param options: :param sort_order: :param frameworks: :param ensemble: :return: """ # set variables ensemble_size = options.ensemble_size if not sort_order: sort_order = classification.get_sort_order(molecules) # construct ensemble print("Performing calculations for ensemble size {s}".format( s=(len(ensemble) + 1))) ensemble = rank_queries(molecules, ensemble, sort_order, options) # write stats & ensemble output.write_ensemble(list(ensemble), options) if len(ensemble) == ensemble_size: return 1 else: return approximator(molecules, options, sort_order, frameworks, ensemble)
def approximator(molecules, options, sort_order=None, frameworks=[], ensemble=[]): """ recursively rank queries :param molecules: :param options: :param sort_order: :param frameworks: :param ensemble: :return: """ # set variables ensemble_size = options.ensemble_size if not sort_order: sort_order = classification.get_sort_order(molecules) # construct ensemble print("Performing calculations for ensemble size {s}".format(s=(len(ensemble) + 1))) ensemble = rank_queries(molecules, ensemble, sort_order, options) # write stats & ensemble output.write_ensemble(list(ensemble), options) if len(ensemble) == ensemble_size: return 1 else: return approximator(molecules, options, sort_order, frameworks, ensemble)
def approximator(molecules, options, sort_order=None): ensemble_size = options.ensemble_size if not sort_order: sort_order = classification.get_sort_order(molecules) print("Performing calculations") results = rank_queries(molecules, sort_order, options) ensemble = [] while results: ensemble = construct_ensemble(results, ensemble, options) output.write_ensemble(ensemble, options) if len(ensemble) == ensemble_size: return 0
def compare(molecules, ensemble_lookup, options): """ compare stuff :param molecules: :param ensemble_lookup: :param options: :return: """ print(" Analyzing differences ... ") print('') sort_order = classification.get_sort_order(molecules) ensemble1 = sorted(ensemble_lookup.keys())[0] ensemble2 = sorted(ensemble_lookup.keys())[1] stats = {} stats['header'] = [' '] name = os.path.basename(ensemble1).replace('.csv', '') stats['header'].append(name) name = os.path.basename(ensemble2).replace('.csv', '') stats['header'].append(name) stats['header'].append('Difference') stats['header'].append('95% CI') stats['header'].append('p-value') molecules1 = copy.deepcopy(molecules) molecules2 = copy.deepcopy(molecules) score_structure1 = classification.make_score_structure( molecules1, ensemble_lookup[ensemble1]) score_structure2 = classification.make_score_structure( molecules2, ensemble_lookup[ensemble2]) auc_structure_1 = classification.make_auc_structure(score_structure1) auc_structure_2 = classification.make_auc_structure(score_structure2) # calculate auc value differences auc_diff = classification.calculate_auc_diff(auc_structure_1, auc_structure_2, sort_order) stats['AUC'] = auc_diff # calculate enrichment factor differences fpfList = make_fpfList(options) for fpf in fpfList: fpf = float(fpf) ef_structure1 = classification.make_ef_structure( score_structure1, fpf, sort_order) ef_structure2 = classification.make_ef_structure( score_structure2, fpf, sort_order) if ef_structure1 and ef_structure2: ef_diff = classification.calculate_ef_diff(ef_structure1, ef_structure2, fpf) title = 'E%s' % fpf stats[title] = ef_diff # write results summary output.write_diff_summary(stats, options) # write roc curves if options.write_roc: print(" Writing ROC data ... ") print('') output.write_roc(auc_structure_1, ensemble1, options) output.write_roc(auc_structure_2, ensemble2, options) # plot if options.plot: print(" Making plots ... ") print('') plotter(molecules, ensemble_lookup, options)
def compare(molecules, ensemble_lookup, options): """ compare stuff :param molecules: :param ensemble_lookup: :param options: :return: """ print(" Analyzing differences ... ") print('') sort_order = classification.get_sort_order(molecules) ensemble1 = sorted(ensemble_lookup.keys())[0] ensemble2 = sorted(ensemble_lookup.keys())[1] stats = {} stats['header'] = [' '] name = os.path.basename(ensemble1).replace('.csv', '') stats['header'].append(name) name = os.path.basename(ensemble2).replace('.csv', '') stats['header'].append(name) stats['header'].append('Difference') stats['header'].append('95% CI') stats['header'].append('p-value') molecules1 = copy.deepcopy(molecules) molecules2 = copy.deepcopy(molecules) score_structure1 = classification.make_score_structure(molecules1, ensemble_lookup[ensemble1]) score_structure2 = classification.make_score_structure(molecules2, ensemble_lookup[ensemble2]) auc_structure_1 = classification.make_auc_structure(score_structure1) auc_structure_2 = classification.make_auc_structure(score_structure2) # calculate auc value differences auc_diff = classification.calculate_auc_diff(auc_structure_1, auc_structure_2, sort_order) stats['AUC'] = auc_diff # calculate enrichment factor differences fpfList = make_fpfList(options) for fpf in fpfList: fpf = float(fpf) ef_structure1 = classification.make_ef_structure(score_structure1, fpf, sort_order) ef_structure2 = classification.make_ef_structure(score_structure2, fpf, sort_order) if ef_structure1 and ef_structure2: ef_diff = classification.calculate_ef_diff(ef_structure1, ef_structure2, fpf) title = 'E%s' % fpf stats[title] = ef_diff # write results summary output.write_diff_summary(stats, options) # write roc curves if options.write_roc: print(" Writing ROC data ... ") print('') output.write_roc(auc_structure_1, ensemble1, options) output.write_roc(auc_structure_2, ensemble2, options) # plot if options.plot: print(" Making plots ... ") print('') plotter(molecules, ensemble_lookup, options)