Ejemplo n.º 1
0
def calculate_metrics(molecules, ensemble_lookup, filename, options):
    """
    Determine the virtual screening performance of the ensemble
    :param molecules: list [mol_object_1, mol_object_2, .... ]
    :param ensemble: tuple (receptor_x, receptor_y, .... )
    :param options: interface object that makes command line arguments available.
    :return:
    """
    metric_List = [
    ]  # [(auc, auclow, auchigh), (fpf, ef, eflow, efhigh), (fpf, ef, eflow, efhigh), ..., ]
    sort_order = 'asc'

    # set up the appropriate score_structure data
    score_structure = classification.make_score_structure(
        molecules, ensemble_lookup[filename])

    # calculate auc values
    auc_structure = classification.make_auc_structure(score_structure)
    auc = classification.calculate_auc(auc_structure, sort_order)
    metric_List.append(auc)

    # calculate enrichment factor values
    for fpf in make_fpfList(options):
        fpf = float(fpf)
        ef_structure = classification.make_ef_structure(
            score_structure, fpf, sort_order)
        if ef_structure:
            ef = classification.calculate_ef(ef_structure, fpf, None,
                                             'include_intervals')
            metric_List.append(ef)

    if options.write_roc:
        output.write_roc(auc_structure, filename, options)

    return metric_List
Ejemplo n.º 2
0
def calculate_performance(molecules, ensemble, sort_order, options):
    """
    determine the virtual screening performance of the input ensemble, and return the results in an
    ensemble storage object.
    :param molecules:
    :param ensemble:
    :param sort_order: string. either 'asc' (for binding energy estimates)  or 'dsc' (for similarity scores)
    :param options: instance of s
    :return:
    """
    es = EnsembleStorage()
    es.set_prop('ensemble', ensemble)

    # calculate the appropriate score structure type
    score_structure = classification.make_score_structure(molecules, ensemble)

    # determine auc value
    auc_structure = classification.make_auc_structure(score_structure)
    auc = classification.calculate_auc(auc_structure, sort_order, 'no stats')
    es.set_prop('auc', auc)

    # calculate enrichment factors
    for fpf in classification.make_fpfList(options, score_structure):
        fpf = float(fpf)
        ef_structure = classification.make_ef_structure(score_structure, fpf, sort_order)
        if ef_structure:
            ef = classification.calculate_ef(ef_structure, fpf)
            es.set_prop(ef[0], ef[1], 'ef')
    return es
Ejemplo n.º 3
0
def calculate_performance(molecules, ensemble, sort_order, options):
    """
    determine the virtual screening performance of the input ensemble, and return the results in an
    ensemble storage object.
    :param molecules:
    :param ensemble:
    :param sort_order: string. either 'asc' (for binding energy estimates)  or 'dsc' (for similarity scores)
    :param options: instance of s
    :return:
    """
    es = EnsembleStorage()
    es.set_prop('ensemble', ensemble)

    # calculate the appropriate score structure type
    score_structure = classification.make_score_structure(molecules, ensemble)

    # determine auc value
    auc_structure = classification.make_auc_structure(score_structure)
    auc = classification.calculate_auc(auc_structure, sort_order, 'no stats')
    es.set_prop('auc', auc)

    # calculate enrichment factors
    for fpf in classification.make_fpfList(options, score_structure):
        fpf = float(fpf)
        ef_structure = classification.make_ef_structure(
            score_structure, fpf, sort_order)
        if ef_structure:
            ef = classification.calculate_ef(ef_structure, fpf)
            es.set_prop(ef[0], ef[1], 'ef')
    return es
Ejemplo n.º 4
0
def calculate_metrics(molecules, ensemble_lookup, filename, options):
    """
    Determine the virtual screening performance of the ensemble
    :param molecules: list [mol_object_1, mol_object_2, .... ]
    :param ensemble: tuple (receptor_x, receptor_y, .... )
    :param options: interface object that makes command line arguments available.
    :return:
    """
    metric_List = []    # [(auc, auclow, auchigh), (fpf, ef, eflow, efhigh), (fpf, ef, eflow, efhigh), ..., ]
    sort_order = 'asc'

    # set up the appropriate score_structure data
    score_structure = classification.make_score_structure(molecules, ensemble_lookup[filename])

    # calculate auc values
    auc_structure = classification.make_auc_structure(score_structure)
    auc = classification.calculate_auc(auc_structure, sort_order)
    metric_List.append(auc)

    # calculate enrichment factor values
    for fpf in make_fpfList(options):
        fpf = float(fpf)
        ef_structure = classification.make_ef_structure(score_structure, fpf, sort_order)
        if ef_structure:
            ef = classification.calculate_ef(ef_structure, fpf, None, 'include_intervals')
            metric_List.append(ef)

    if options.write_roc:
        output.write_roc(auc_structure, filename, options)

    return metric_List
Ejemplo n.º 5
0
def rank_queries(molecules, ensemble, sort_order, options):
    """
    rank queries by value added to existing ensemble
    :param molecules:
    :param score_field:
    :param ensemble:
    :param sort_order:
    :param options:
    :return:
    """

    # generate query list
    query_list = [
        x for x in list(molecules[0].scores.keys()) if x not in ensemble
    ]

    results = {}

    for query in query_list:
        es = EnsembleStorage()  # an ensemble storage project

        # generate test_ensemble
        test_ensemble = ensemble[0:]
        test_ensemble.append(query)
        test_ensemble = tuple(test_ensemble)
        es.set_prop('ensemble', test_ensemble)

        # calculate its performance
        score_structure = classification.make_score_structure(
            molecules, test_ensemble)

        # determine auc value
        auc_structure = classification.make_auc_structure(score_structure)
        auc = classification.calculate_auc(auc_structure, sort_order,
                                           'no stats')
        es.set_prop('auc', auc)

        # if the enrichment factor was set to anything other than 1, then we're training to maximize the corresponding
        # enrichment factor
        for fpf in classification.make_fpfList(options, score_structure):
            fpf = float(fpf)
            ef_structure = classification.make_ef_structure(
                score_structure, fpf, sort_order)
            if ef_structure:
                ef = classification.calculate_ef(ef_structure, fpf)
                es.set_prop(ef[0], ef[1], 'ef')

        # append results to metric list
        results[test_ensemble] = es

    # peel away the best performing ensemble
    best_ensemble = screener.find_best_ensemble(results, options)

    return list(best_ensemble)
Ejemplo n.º 6
0
def rank_queries(molecules, ensemble, sort_order, options):
    """
    rank queries by value added to existing ensemble
    :param molecules:
    :param score_field:
    :param ensemble:
    :param sort_order:
    :param options:
    :return:
    """

    # generate query list
    query_list = [x for x in list(molecules[0].scores.keys()) if x not in ensemble]

    results = {}

    for query in query_list:
        es = EnsembleStorage() # an ensemble storage project

        # generate test_ensemble
        test_ensemble = ensemble[0:]
        test_ensemble.append(query)
        test_ensemble = tuple(test_ensemble)
        es.set_prop('ensemble', test_ensemble)

        # calculate its performance
        score_structure = classification.make_score_structure(molecules, test_ensemble)

        # determine auc value
        auc_structure = classification.make_auc_structure(score_structure)
        auc = classification.calculate_auc(auc_structure, sort_order, 'no stats')
        es.set_prop('auc', auc)

        # if the enrichment factor was set to anything other than 1, then we're training to maximize the corresponding
        # enrichment factor
        for fpf in classification.make_fpfList(options, score_structure):
            fpf = float(fpf)
            ef_structure = classification.make_ef_structure(score_structure, fpf, sort_order)
            if ef_structure:
                ef = classification.calculate_ef(ef_structure, fpf)
                es.set_prop(ef[0], ef[1], 'ef')

        # append results to metric list
        results[test_ensemble] = es

    # peel away the best performing ensemble
    best_ensemble = screener.find_best_ensemble(results, options)

    return list(best_ensemble)
Ejemplo n.º 7
0
def rank_queries(molecules, sort_order, options):
    results = {}
    for query in [query for query in list(molecules[0].scores.keys())]:
        formatted_query = []
        formatted_query.append(query)
        formatted_query = tuple(formatted_query)
        es = EnsembleStorage()
        es.set_prop('ensemble', formatted_query)
        score_structure = classification.make_score_structure(molecules, formatted_query)
        auc_structure = classification.make_auc_structure(score_structure)
        auc = classification.calculate_auc(auc_structure, sort_order, 'no stats')
        es.set_prop('auc', auc)
        for fpf in classification.make_fpfList(options, score_structure):
            fpf = float(fpf)
            ef_structure = classification.make_ef_structure(score_structure, fpf, sort_order)
            if ef_structure:
                ef = classification.calculate_ef(ef_structure, fpf)
                es.set_prop(ef[0], ef[1], 'ef')
        results[formatted_query] = es
    return results
Ejemplo n.º 8
0
def compare(molecules, ensemble_lookup, options):
    """
    compare stuff
    :param molecules:
    :param ensemble_lookup:
    :param options:
    :return:
    """

    print(" Analyzing differences ... ")
    print('')
    sort_order = classification.get_sort_order(molecules)

    ensemble1 = sorted(ensemble_lookup.keys())[0]
    ensemble2 = sorted(ensemble_lookup.keys())[1]

    stats = {}
    stats['header'] = [' ']
    name = os.path.basename(ensemble1).replace('.csv', '')
    stats['header'].append(name)
    name = os.path.basename(ensemble2).replace('.csv', '')
    stats['header'].append(name)
    stats['header'].append('Difference')
    stats['header'].append('95% CI')
    stats['header'].append('p-value')

    molecules1 = copy.deepcopy(molecules)
    molecules2 = copy.deepcopy(molecules)

    score_structure1 = classification.make_score_structure(
        molecules1, ensemble_lookup[ensemble1])
    score_structure2 = classification.make_score_structure(
        molecules2, ensemble_lookup[ensemble2])

    auc_structure_1 = classification.make_auc_structure(score_structure1)
    auc_structure_2 = classification.make_auc_structure(score_structure2)

    # calculate auc value differences
    auc_diff = classification.calculate_auc_diff(auc_structure_1,
                                                 auc_structure_2, sort_order)

    stats['AUC'] = auc_diff

    # calculate enrichment factor differences
    fpfList = make_fpfList(options)
    for fpf in fpfList:
        fpf = float(fpf)
        ef_structure1 = classification.make_ef_structure(
            score_structure1, fpf, sort_order)
        ef_structure2 = classification.make_ef_structure(
            score_structure2, fpf, sort_order)

        if ef_structure1 and ef_structure2:
            ef_diff = classification.calculate_ef_diff(ef_structure1,
                                                       ef_structure2, fpf)
            title = 'E%s' % fpf
            stats[title] = ef_diff

    # write results summary
    output.write_diff_summary(stats, options)

    # write roc curves
    if options.write_roc:
        print(" Writing ROC data ... ")
        print('')
        output.write_roc(auc_structure_1, ensemble1, options)
        output.write_roc(auc_structure_2, ensemble2, options)

    # plot
    if options.plot:
        print(" Making plots ... ")
        print('')
        plotter(molecules, ensemble_lookup, options)
Ejemplo n.º 9
0
def compare(molecules, ensemble_lookup, options):
    """
    compare stuff
    :param molecules:
    :param ensemble_lookup:
    :param options:
    :return:
    """

    print(" Analyzing differences ... ")
    print('')
    sort_order = classification.get_sort_order(molecules)

    ensemble1 = sorted(ensemble_lookup.keys())[0]
    ensemble2 = sorted(ensemble_lookup.keys())[1]

    stats = {}
    stats['header'] = [' ']
    name = os.path.basename(ensemble1).replace('.csv', '')
    stats['header'].append(name)
    name = os.path.basename(ensemble2).replace('.csv', '')
    stats['header'].append(name)
    stats['header'].append('Difference')
    stats['header'].append('95% CI')
    stats['header'].append('p-value')

    molecules1 = copy.deepcopy(molecules)
    molecules2 = copy.deepcopy(molecules)

    score_structure1 = classification.make_score_structure(molecules1, ensemble_lookup[ensemble1])
    score_structure2 = classification.make_score_structure(molecules2, ensemble_lookup[ensemble2])

    auc_structure_1 = classification.make_auc_structure(score_structure1)
    auc_structure_2 = classification.make_auc_structure(score_structure2)

    # calculate auc value differences
    auc_diff = classification.calculate_auc_diff(auc_structure_1, auc_structure_2, sort_order)

    stats['AUC'] = auc_diff

    # calculate enrichment factor differences
    fpfList = make_fpfList(options)
    for fpf in fpfList:
        fpf = float(fpf)
        ef_structure1 = classification.make_ef_structure(score_structure1, fpf, sort_order)
        ef_structure2 = classification.make_ef_structure(score_structure2, fpf, sort_order)

        if ef_structure1 and ef_structure2:
            ef_diff = classification.calculate_ef_diff(ef_structure1, ef_structure2, fpf)
            title = 'E%s' % fpf
            stats[title] = ef_diff

    # write results summary
    output.write_diff_summary(stats, options)

    # write roc curves
    if options.write_roc:
        print(" Writing ROC data ... ")
        print('')
        output.write_roc(auc_structure_1, ensemble1, options)
        output.write_roc(auc_structure_2, ensemble2, options)

    # plot
    if options.plot:
        print(" Making plots ... ")
        print('')
        plotter(molecules, ensemble_lookup, options)