Esempio n. 1
0
def calculate_metrics(molecules, ensemble_lookup, filename, options):
    """
    Determine the virtual screening performance of the ensemble
    :param molecules: list [mol_object_1, mol_object_2, .... ]
    :param ensemble: tuple (receptor_x, receptor_y, .... )
    :param options: interface object that makes command line arguments available.
    :return:
    """
    metric_List = [
    ]  # [(auc, auclow, auchigh), (fpf, ef, eflow, efhigh), (fpf, ef, eflow, efhigh), ..., ]
    sort_order = 'asc'

    # set up the appropriate score_structure data
    score_structure = classification.make_score_structure(
        molecules, ensemble_lookup[filename])

    # calculate auc values
    auc_structure = classification.make_auc_structure(score_structure)
    auc = classification.calculate_auc(auc_structure, sort_order)
    metric_List.append(auc)

    # calculate enrichment factor values
    for fpf in make_fpfList(options):
        fpf = float(fpf)
        ef_structure = classification.make_ef_structure(
            score_structure, fpf, sort_order)
        if ef_structure:
            ef = classification.calculate_ef(ef_structure, fpf, None,
                                             'include_intervals')
            metric_List.append(ef)

    if options.write_roc:
        output.write_roc(auc_structure, filename, options)

    return metric_List
Esempio n. 2
0
def calculate_metrics(molecules, ensemble_lookup, filename, options):
    """
    Determine the virtual screening performance of the ensemble
    :param molecules: list [mol_object_1, mol_object_2, .... ]
    :param ensemble: tuple (receptor_x, receptor_y, .... )
    :param options: interface object that makes command line arguments available.
    :return:
    """
    metric_List = []    # [(auc, auclow, auchigh), (fpf, ef, eflow, efhigh), (fpf, ef, eflow, efhigh), ..., ]
    sort_order = 'asc'

    # set up the appropriate score_structure data
    score_structure = classification.make_score_structure(molecules, ensemble_lookup[filename])

    # calculate auc values
    auc_structure = classification.make_auc_structure(score_structure)
    auc = classification.calculate_auc(auc_structure, sort_order)
    metric_List.append(auc)

    # calculate enrichment factor values
    for fpf in make_fpfList(options):
        fpf = float(fpf)
        ef_structure = classification.make_ef_structure(score_structure, fpf, sort_order)
        if ef_structure:
            ef = classification.calculate_ef(ef_structure, fpf, None, 'include_intervals')
            metric_List.append(ef)

    if options.write_roc:
        output.write_roc(auc_structure, filename, options)

    return metric_List
Esempio n. 3
0
def compare(molecules, ensemble_lookup, options):
    """
    compare stuff
    :param molecules:
    :param ensemble_lookup:
    :param options:
    :return:
    """

    print(" Analyzing differences ... ")
    print('')
    sort_order = classification.get_sort_order(molecules)

    ensemble1 = sorted(ensemble_lookup.keys())[0]
    ensemble2 = sorted(ensemble_lookup.keys())[1]

    stats = {}
    stats['header'] = [' ']
    name = os.path.basename(ensemble1).replace('.csv', '')
    stats['header'].append(name)
    name = os.path.basename(ensemble2).replace('.csv', '')
    stats['header'].append(name)
    stats['header'].append('Difference')
    stats['header'].append('95% CI')
    stats['header'].append('p-value')

    molecules1 = copy.deepcopy(molecules)
    molecules2 = copy.deepcopy(molecules)

    score_structure1 = classification.make_score_structure(
        molecules1, ensemble_lookup[ensemble1])
    score_structure2 = classification.make_score_structure(
        molecules2, ensemble_lookup[ensemble2])

    auc_structure_1 = classification.make_auc_structure(score_structure1)
    auc_structure_2 = classification.make_auc_structure(score_structure2)

    # calculate auc value differences
    auc_diff = classification.calculate_auc_diff(auc_structure_1,
                                                 auc_structure_2, sort_order)

    stats['AUC'] = auc_diff

    # calculate enrichment factor differences
    fpfList = make_fpfList(options)
    for fpf in fpfList:
        fpf = float(fpf)
        ef_structure1 = classification.make_ef_structure(
            score_structure1, fpf, sort_order)
        ef_structure2 = classification.make_ef_structure(
            score_structure2, fpf, sort_order)

        if ef_structure1 and ef_structure2:
            ef_diff = classification.calculate_ef_diff(ef_structure1,
                                                       ef_structure2, fpf)
            title = 'E%s' % fpf
            stats[title] = ef_diff

    # write results summary
    output.write_diff_summary(stats, options)

    # write roc curves
    if options.write_roc:
        print(" Writing ROC data ... ")
        print('')
        output.write_roc(auc_structure_1, ensemble1, options)
        output.write_roc(auc_structure_2, ensemble2, options)

    # plot
    if options.plot:
        print(" Making plots ... ")
        print('')
        plotter(molecules, ensemble_lookup, options)
Esempio n. 4
0
def compare(molecules, ensemble_lookup, options):
    """
    compare stuff
    :param molecules:
    :param ensemble_lookup:
    :param options:
    :return:
    """

    print(" Analyzing differences ... ")
    print('')
    sort_order = classification.get_sort_order(molecules)

    ensemble1 = sorted(ensemble_lookup.keys())[0]
    ensemble2 = sorted(ensemble_lookup.keys())[1]

    stats = {}
    stats['header'] = [' ']
    name = os.path.basename(ensemble1).replace('.csv', '')
    stats['header'].append(name)
    name = os.path.basename(ensemble2).replace('.csv', '')
    stats['header'].append(name)
    stats['header'].append('Difference')
    stats['header'].append('95% CI')
    stats['header'].append('p-value')

    molecules1 = copy.deepcopy(molecules)
    molecules2 = copy.deepcopy(molecules)

    score_structure1 = classification.make_score_structure(molecules1, ensemble_lookup[ensemble1])
    score_structure2 = classification.make_score_structure(molecules2, ensemble_lookup[ensemble2])

    auc_structure_1 = classification.make_auc_structure(score_structure1)
    auc_structure_2 = classification.make_auc_structure(score_structure2)

    # calculate auc value differences
    auc_diff = classification.calculate_auc_diff(auc_structure_1, auc_structure_2, sort_order)

    stats['AUC'] = auc_diff

    # calculate enrichment factor differences
    fpfList = make_fpfList(options)
    for fpf in fpfList:
        fpf = float(fpf)
        ef_structure1 = classification.make_ef_structure(score_structure1, fpf, sort_order)
        ef_structure2 = classification.make_ef_structure(score_structure2, fpf, sort_order)

        if ef_structure1 and ef_structure2:
            ef_diff = classification.calculate_ef_diff(ef_structure1, ef_structure2, fpf)
            title = 'E%s' % fpf
            stats[title] = ef_diff

    # write results summary
    output.write_diff_summary(stats, options)

    # write roc curves
    if options.write_roc:
        print(" Writing ROC data ... ")
        print('')
        output.write_roc(auc_structure_1, ensemble1, options)
        output.write_roc(auc_structure_2, ensemble2, options)

    # plot
    if options.plot:
        print(" Making plots ... ")
        print('')
        plotter(molecules, ensemble_lookup, options)