Example #1
0
def calculate_performance(molecules, ensemble, sort_order, options):
    """
    determine the virtual screening performance of the input ensemble, and return the results in an
    ensemble storage object.
    :param molecules:
    :param ensemble:
    :param sort_order: string. either 'asc' (for binding energy estimates)  or 'dsc' (for similarity scores)
    :param options: instance of s
    :return:
    """
    es = EnsembleStorage()
    es.set_prop('ensemble', ensemble)

    # calculate the appropriate score structure type
    score_structure = classification.make_score_structure(molecules, ensemble)

    # determine auc value
    auc_structure = classification.make_auc_structure(score_structure)
    auc = classification.calculate_auc(auc_structure, sort_order, 'no stats')
    es.set_prop('auc', auc)

    # calculate enrichment factors
    for fpf in classification.make_fpfList(options, score_structure):
        fpf = float(fpf)
        ef_structure = classification.make_ef_structure(
            score_structure, fpf, sort_order)
        if ef_structure:
            ef = classification.calculate_ef(ef_structure, fpf)
            es.set_prop(ef[0], ef[1], 'ef')
    return es
Example #2
0
def rank_queries(molecules, ensemble, sort_order, options):
    """
    rank queries by value added to existing ensemble
    :param molecules:
    :param score_field:
    :param ensemble:
    :param sort_order:
    :param options:
    :return:
    """

    # generate query list
    query_list = [
        x for x in list(molecules[0].scores.keys()) if x not in ensemble
    ]

    results = {}

    for query in query_list:
        es = EnsembleStorage()  # an ensemble storage project

        # generate test_ensemble
        test_ensemble = ensemble[0:]
        test_ensemble.append(query)
        test_ensemble = tuple(test_ensemble)
        es.set_prop('ensemble', test_ensemble)

        # calculate its performance
        score_structure = classification.make_score_structure(
            molecules, test_ensemble)

        # determine auc value
        auc_structure = classification.make_auc_structure(score_structure)
        auc = classification.calculate_auc(auc_structure, sort_order,
                                           'no stats')
        es.set_prop('auc', auc)

        # if the enrichment factor was set to anything other than 1, then we're training to maximize the corresponding
        # enrichment factor
        for fpf in classification.make_fpfList(options, score_structure):
            fpf = float(fpf)
            ef_structure = classification.make_ef_structure(
                score_structure, fpf, sort_order)
            if ef_structure:
                ef = classification.calculate_ef(ef_structure, fpf)
                es.set_prop(ef[0], ef[1], 'ef')

        # append results to metric list
        results[test_ensemble] = es

    # peel away the best performing ensemble
    best_ensemble = screener.find_best_ensemble(results, options)

    return list(best_ensemble)
Example #3
0
def rank_queries(molecules, sort_order, options):
    results = {}
    for query in [query for query in list(molecules[0].scores.keys())]:
        formatted_query = []
        formatted_query.append(query)
        formatted_query = tuple(formatted_query)
        es = EnsembleStorage()
        es.set_prop('ensemble', formatted_query)
        score_structure = classification.make_score_structure(molecules, formatted_query)
        auc_structure = classification.make_auc_structure(score_structure)
        auc = classification.calculate_auc(auc_structure, sort_order, 'no stats')
        es.set_prop('auc', auc)
        for fpf in classification.make_fpfList(options, score_structure):
            fpf = float(fpf)
            ef_structure = classification.make_ef_structure(score_structure, fpf, sort_order)
            if ef_structure:
                ef = classification.calculate_ef(ef_structure, fpf)
                es.set_prop(ef[0], ef[1], 'ef')
        results[formatted_query] = es
    return results