Example #1
0
def CalcMCCF1(pred=None, truth=None, probCutoff=0.5, contactCutoff=8.0):
    if pred is None:
        print 'please provide a predicted contact matrix'
        exit(-1)

    if truth is None:
        print 'please provide a true distance matrix'
        exit(-1)

    assert pred.shape == truth.shape

    ## in case the matrix is not square, e.g., interfacial contact matrix
    seqLen = pred.shape[0]
    seqLen2 = pred.shape[1]

    pred_binary = (pred > probCutoff)
    truth_binary = (0 < truth) & (truth < contactCutoff)
    pred_truth = pred_binary * 2 + truth_binary
    numPredicted = np.sum(pred_binary)
    numTruths = np.sum(truth_binary)
    #print "#predicted=", numPredicted, "#natives=", numTruths

    mask_LR = np.triu_indices(seqLen, 24, m=seqLen2)
    mask_MLR = np.triu_indices(seqLen, 12, m=seqLen2)
    mask_SMLR = np.triu_indices(seqLen, 6, m=seqLen2)

    metrics = []
    for mask in [mask_LR, mask_MLR, mask_SMLR]:

        res = pred_truth[mask]
        total = res.shape[0]
        count = np.bincount(res, minlength=4)
        assert (total == np.sum(count))

        ## pred=0, truth=0
        TN = count[0]

        ## pred=0, truth=1
        FN = count[1]

        ## pred=1, truth=0
        FP = count[2]

        ## pred=1, truth=1
        TP = count[3]

        #print TP, FP, TN, FN

        MCC = Metrics.MCC(TP, FP, TN, FN)
        F1, precision, recall = Metrics.F1(TP, FP, TN, FN)

        metrics.extend([MCC, TP, FP, TN, FN, F1, precision, recall])

    return np.array(metrics)
        with open(nativeFile, 'rb') as fh:
            nativeInfo = cPickle.load(fh)
        truth = nativeInfo['atomDistMatrix']['CbCb']
        truths[protein] = truth

    for prob in np.arange(5, 60, 1):
        #print "prob=", prob
        accs = []
        for protein in content:
            acc = CalcMCCF1(pred=preds[protein],
                            truth=truths[protein],
                            probCutoff=prob / 100.)
            accs.append(acc)
        avgacc = np.average(accs, axis=0)

        resultStr = 'per-target avgMCCF1 at cutoff=' + str(
            prob) + ': ' + str_display(avgacc)
        print resultStr

        lrMCC = Metrics.MCC(avgacc[1], avgacc[2], avgacc[3], avgacc[4])
        lrF1, lrprecision, lrrecall = Metrics.F1(avgacc[1], avgacc[2],
                                                 avgacc[3], avgacc[4])

        mrMCC = Metrics.MCC(avgacc[9], avgacc[10], avgacc[11], avgacc[12])
        mrF1, mrprecision, mrrecall = Metrics.F1(avgacc[9], avgacc[10],
                                                 avgacc[11], avgacc[12])
        print 'per-pair avgMCCF1 at cutoff=' + str(prob) + ': ' + str_display([
            lrMCC, lrF1, lrprecision, lrrecall, mrMCC, mrF1, mrprecision,
            mrrecall
        ])