예제 #1
0
def approximator(molecules,
                 options,
                 sort_order=None,
                 frameworks=[],
                 ensemble=[]):
    """
    recursively rank queries
    :param molecules:
    :param options:
    :param sort_order:
    :param frameworks:
    :param ensemble:
    :return:
    """

    # set variables
    ensemble_size = options.ensemble_size

    if not sort_order:
        sort_order = classification.get_sort_order(molecules)

    # construct ensemble
    print("Performing calculations for ensemble size {s}".format(
        s=(len(ensemble) + 1)))
    ensemble = rank_queries(molecules, ensemble, sort_order, options)

    # write stats & ensemble
    output.write_ensemble(list(ensemble), options)

    if len(ensemble) == ensemble_size:
        return 1
    else:
        return approximator(molecules, options, sort_order, frameworks,
                            ensemble)
예제 #2
0
파일: slowheuristic.py 프로젝트: rvswift/EB
def approximator(molecules, options, sort_order=None, frameworks=[], ensemble=[]):
    """
    recursively rank queries
    :param molecules:
    :param options:
    :param sort_order:
    :param frameworks:
    :param ensemble:
    :return:
    """

    # set variables
    ensemble_size = options.ensemble_size

    if not sort_order:
        sort_order = classification.get_sort_order(molecules)

    # construct ensemble
    print("Performing calculations for ensemble size {s}".format(s=(len(ensemble) + 1)))
    ensemble = rank_queries(molecules, ensemble, sort_order, options)

    # write stats & ensemble
    output.write_ensemble(list(ensemble), options)

    if len(ensemble) == ensemble_size:
        return 1
    else:
        return approximator(molecules, options, sort_order, frameworks, ensemble)
예제 #3
0
def approximator(molecules, options, sort_order=None):
    ensemble_size = options.ensemble_size

    if not sort_order:
        sort_order = classification.get_sort_order(molecules)

    print("Performing calculations")
    results = rank_queries(molecules, sort_order, options)

    ensemble = []
    while results:
        ensemble = construct_ensemble(results, ensemble, options)
        output.write_ensemble(ensemble, options)
        if len(ensemble) == ensemble_size:
            return 0
예제 #4
0
def compare(molecules, ensemble_lookup, options):
    """
    compare stuff
    :param molecules:
    :param ensemble_lookup:
    :param options:
    :return:
    """

    print(" Analyzing differences ... ")
    print('')
    sort_order = classification.get_sort_order(molecules)

    ensemble1 = sorted(ensemble_lookup.keys())[0]
    ensemble2 = sorted(ensemble_lookup.keys())[1]

    stats = {}
    stats['header'] = [' ']
    name = os.path.basename(ensemble1).replace('.csv', '')
    stats['header'].append(name)
    name = os.path.basename(ensemble2).replace('.csv', '')
    stats['header'].append(name)
    stats['header'].append('Difference')
    stats['header'].append('95% CI')
    stats['header'].append('p-value')

    molecules1 = copy.deepcopy(molecules)
    molecules2 = copy.deepcopy(molecules)

    score_structure1 = classification.make_score_structure(
        molecules1, ensemble_lookup[ensemble1])
    score_structure2 = classification.make_score_structure(
        molecules2, ensemble_lookup[ensemble2])

    auc_structure_1 = classification.make_auc_structure(score_structure1)
    auc_structure_2 = classification.make_auc_structure(score_structure2)

    # calculate auc value differences
    auc_diff = classification.calculate_auc_diff(auc_structure_1,
                                                 auc_structure_2, sort_order)

    stats['AUC'] = auc_diff

    # calculate enrichment factor differences
    fpfList = make_fpfList(options)
    for fpf in fpfList:
        fpf = float(fpf)
        ef_structure1 = classification.make_ef_structure(
            score_structure1, fpf, sort_order)
        ef_structure2 = classification.make_ef_structure(
            score_structure2, fpf, sort_order)

        if ef_structure1 and ef_structure2:
            ef_diff = classification.calculate_ef_diff(ef_structure1,
                                                       ef_structure2, fpf)
            title = 'E%s' % fpf
            stats[title] = ef_diff

    # write results summary
    output.write_diff_summary(stats, options)

    # write roc curves
    if options.write_roc:
        print(" Writing ROC data ... ")
        print('')
        output.write_roc(auc_structure_1, ensemble1, options)
        output.write_roc(auc_structure_2, ensemble2, options)

    # plot
    if options.plot:
        print(" Making plots ... ")
        print('')
        plotter(molecules, ensemble_lookup, options)
예제 #5
0
파일: postanalysis.py 프로젝트: rvswift/EB
def compare(molecules, ensemble_lookup, options):
    """
    compare stuff
    :param molecules:
    :param ensemble_lookup:
    :param options:
    :return:
    """

    print(" Analyzing differences ... ")
    print('')
    sort_order = classification.get_sort_order(molecules)

    ensemble1 = sorted(ensemble_lookup.keys())[0]
    ensemble2 = sorted(ensemble_lookup.keys())[1]

    stats = {}
    stats['header'] = [' ']
    name = os.path.basename(ensemble1).replace('.csv', '')
    stats['header'].append(name)
    name = os.path.basename(ensemble2).replace('.csv', '')
    stats['header'].append(name)
    stats['header'].append('Difference')
    stats['header'].append('95% CI')
    stats['header'].append('p-value')

    molecules1 = copy.deepcopy(molecules)
    molecules2 = copy.deepcopy(molecules)

    score_structure1 = classification.make_score_structure(molecules1, ensemble_lookup[ensemble1])
    score_structure2 = classification.make_score_structure(molecules2, ensemble_lookup[ensemble2])

    auc_structure_1 = classification.make_auc_structure(score_structure1)
    auc_structure_2 = classification.make_auc_structure(score_structure2)

    # calculate auc value differences
    auc_diff = classification.calculate_auc_diff(auc_structure_1, auc_structure_2, sort_order)

    stats['AUC'] = auc_diff

    # calculate enrichment factor differences
    fpfList = make_fpfList(options)
    for fpf in fpfList:
        fpf = float(fpf)
        ef_structure1 = classification.make_ef_structure(score_structure1, fpf, sort_order)
        ef_structure2 = classification.make_ef_structure(score_structure2, fpf, sort_order)

        if ef_structure1 and ef_structure2:
            ef_diff = classification.calculate_ef_diff(ef_structure1, ef_structure2, fpf)
            title = 'E%s' % fpf
            stats[title] = ef_diff

    # write results summary
    output.write_diff_summary(stats, options)

    # write roc curves
    if options.write_roc:
        print(" Writing ROC data ... ")
        print('')
        output.write_roc(auc_structure_1, ensemble1, options)
        output.write_roc(auc_structure_2, ensemble2, options)

    # plot
    if options.plot:
        print(" Making plots ... ")
        print('')
        plotter(molecules, ensemble_lookup, options)