Example #1
0
def performanc_by_interpro():
    pred_df = pd.read_pickle(DATA_ROOT + 'test-' + FUNCTION + '-preds.pkl')
    ipro_df = load_prot_ipro()
    df = pred_df.merge(ipro_df, on='proteins', how='left')
    ipro = get_ipro()

    def reshape(values):
        values = np.hstack(values).reshape(
            len(values), len(values[0]))
        return values

    for ipro_id in ipro:
        if len(ipro[ipro_id]['parents']) > 0:
            continue
        labels = list()
        predictions = list()
        gos = list()
        for i, row in df.iterrows():
            if not isinstance(row['ipros'], list):
                continue
            if ipro_id in row['ipros']:
                labels.append(row['labels'])
                predictions.append(row['predictions'])
                gos.append(row['gos'])
        pr = 0
        rc = 0
        total = 0
        p_total = 0
        for i in range(len(labels)):
            tp = np.sum(labels[i] * predictions[i])
            fp = np.sum(predictions[i]) - tp
            fn = np.sum(labels[i]) - tp
            all_gos = set()
            for go_id in gos[i]:
                if go_id in all_functions:
                    all_gos |= get_anchestors(go, go_id)
            all_gos.discard(GO_ID)
            all_gos -= func_set
            fn += len(all_gos)
            if tp == 0 and fp == 0 and fn == 0:
                continue
            total += 1
            if tp != 0:
                p_total += 1
                precision = tp / (1.0 * (tp + fp))
                recall = tp / (1.0 * (tp + fn))
                pr += precision
                rc += recall
        if total > 0 and p_total > 0:
            rc /= total
            pr /= p_total
            if pr + rc > 0:
                f = 2 * pr * rc / (pr + rc)
                logging.info('%s\t%d\t%f\t%f\t%f' % (
                    ipro_id, len(labels), f, pr, rc))
Example #2
0
def ipro_table():
    ipro = get_ipro()
    cc = get_ipro_data('ipro_cc.res')
    mf = get_ipro_data('ipro_mf.res')
    bp = get_ipro_data('ipro_bp.res')
    inter = set(cc).intersection(set(mf)).intersection(set(bp))
    res = list()
    sup = 50
    for key in inter:
        if bp[key][0] >= sup and mf[key][0] >= sup and cc[key][0] >= sup:
            res.append((key, ipro[key]['name'], bp[key][1], bp[key][2],
                        bp[key][3], mf[key][1], mf[key][2], mf[key][3],
                        cc[key][1], cc[key][2], cc[key][3]))
    res = sorted(res, key=lambda x: x[2], reverse=True)
    for item in res:
        print(
            '%s & %s & %.2f & %.2f & %.2f & %.2f & %.2f & %.2f & %.2f & %.2f & %.2f \\\\'
            % item)