コード例 #1
0
ファイル: Smetric.py プロジェクト: ppypp/CAFA_assessment_tool
def MI(Info, threshold):
    '''
    Calculates the MI (Misinformation) for a given prediction at a threshold
    
    Input:
    Info      : Object     
    threshold : Float       {0.00 -> 1.00}
    
    Output:
    [0]       : Float       Misinformation
    '''
    
    # Sum of all proteins 
    total = 0
    for protein in Info.prediction:
        # Get the FP (False Positive)
        try:
            FP = Info.data[protein][threshold]['FP']
            vwrite("Protein: {},\t FP: {:.2f}\n".format(protein, FP), Info.local_path, 1)
            total += FP
        # Bad Error
        except KeyError:
            vprint("Protein: {} has no FP".format(protein), 1)
    # If in full mode, divide by N (# proteins in benchmark)
    if Info.mode == "full":
        div = Info.ProteinInBenchmark
        misinfo = total / div
    # If in partial mode, divide by m(0) (# proteins in prediction)
    else: # "partial"
        div = Info.ProteinInPrediction[0.00]  
        misinfo = total / div
    vwrite("Total: {:.2f},\t Ne: {},\t MI: {:.2f}\n".format(total, div, misinfo), Info.local_path, 1)
    
    # Return the calculated Misinformation  
    return misinfo   
コード例 #2
0
ファイル: Smetric.py プロジェクト: ppypp/CAFA_assessment_tool
def RU(Info, threshold):
    '''
    Calculates the RU (Remaining Uncertainity) for a given prediction at a threshold
    
    Input:
    Info      : Object     
    threshold : Float       {0.00 -> 1.00}
    
    Output:
    [0]       : Float       Remaining Uncertainity
    '''
    
    # Sum of all proteins 
    total = 0
    for protein in Info.prediction:
        # Get the FN (False Negative)
        try:
            FN = Info.data[protein][threshold]['FN']
            vwrite("Protein: {},\t FN: {:.2f}\n".format(protein, FN), Info.local_path, 1)
            total += FN
        # Bad Error
        except KeyError:
            vprint("Protein: {} has no FN".format(protein), 1)
    # If in full mode, divide by N (# proteins in benchmark)
    if Info.mode == "full":
        div = Info.ProteinInBenchmark
        remain = total / div
    # If in partial mode, divide by m(0) (# proteins in prediction)
    else: # "partial"
        div = Info.ProteinInPrediction[0.00]
        remain = total / div
    vwrite("Total: {:.2f},\t Ne: {},\t RU: {:.2f}\n".format(total, div, remain), Info.local_path, 1)
    
    # Return the calculated Remaining Uncertainity    
    return remain
コード例 #3
0
def RC(Info, threshold):
    '''
    Calculates the RC (Recall) for a given prediction at a threshold
    
    Input:
    Info      : Object     
    threshold : Float       {0.00 -> 1.00}
    
    Output:
    [0]       : Float       Recall
    '''
    
    # Sum of all proteins 
    total = 0
    for protein in Info.prediction:
        # Summation of Sum(TP)/ Sum(TRUE)
        try:
            # Get the TP (True Positive)
            TP = Info.data_unweighted[protein][threshold]['TP']
            vprint("TP : {}".format(TP),15)
            # Get the TRUE (TP + FN) (Truth)
            TRUE = Info.data_unweighted[protein][threshold]['TRUE'] 
            vprint("TRUE : {}".format(TRUE), 15)
            try:
                val = TP / TRUE
                vwrite('Protein: {} RC : {:.2f}\n'.format(protein, val), "{}-Protein.txt".format(Info.local_path), 1)
                total += val
            except ZeroDivisionError:
                vprint("Protein {} had a 0 TRUE @ {}".format(protein, threshold), 1)
                vwrite('Protein: {} RC : NONE\n'.format(protein), "{}-Protein.txt".format(Info.local_path), 1)
                # Evreytime a protein has no TRUE, ie. TRUE: 0, dont count protein
                # Means there are no True terms in benchmark
                #This should NEVER happen for FMAX
        # Bad Error
        except KeyError:
            vprint("Protein: {} has no TRUE".format(protein), 1)
        # Store intermediate data
        vwrite('Protein: {}\t TP: {}\t TRUE: {}\n'.format(protein, TP, TRUE), Info.local_path, 1)
    # If in full mode, divide by N (# proteins in benchmark)    
    if Info.mode == "full":
        # ProteinInBenchmark should never be 0
        recall = total / Info.ProteinInBenchmark
    # If in partial mode, divide by m(0) (# proteins in prediction)
    else: # "partial"
        try:    
            recall = total / (Info.ProteinInPrediction[0.00])
        except ZeroDivisionError:
            vprint("No protein predicted at 0", 4)
            recall = 0
        
    vwrite('Recall: {:.2f}\n'.format(recall), Info.local_path, 1)
    # Return the calculated Recall
    return recall
コード例 #4
0
def PR(Info, threshold):
    '''
    Calculates the PR (Precision) for a given prediction at a threshold
    
    Input:
    Info      : Object     
    threshold : Float       {0.00 -> 1.00}
    
    Output:
    [0]       : Float       Precision
    '''

    # Sum of all proteins
    total = 0
    for protein in Info.prediction:
        # Summation of Sum(TP)/ Sum(POS)
        try:
            # Get the TP (True Positive)
            TP = Info.data[protein][threshold]['TP']
            vprint("TP : {}".format(TP), 15)
            # Get the POS (TP + FP) (Positive)
            POS = Info.data[protein][threshold]['POS']
            vprint("POS : {}".format(POS), 15)
            try:
                val = TP / POS

                vwrite('Protein: {} PR : {:.2f}\n'.format(protein, val),
                       "{}-Protein.txt".format(Info.local_path), 1)
                total += val
            except ZeroDivisionError:
                vprint(
                    "Protein {} had a 0 POS @ {}".format(protein, threshold),
                    14)
                vwrite('Protein: {} PR : NONE\n'.format(protein),
                       "{}-Protein.txt".format(Info.local_path), 1)
        # Bad Error
        except KeyError:
            vprint("Protein: {} has no POS".format(protein), 1)
        # Store intermediate data
        vwrite(
            'Protein: {}\t TP: {:.2f}\t POS: {:.2f}\n'.format(
                protein, TP, POS), Info.local_path, 1)
    # Divide by m(T) (# of proteins with at least one prediction @ threshold)
    try:
        precision = total / Info.ProteinInPrediction[threshold]
    except ZeroDivisionError:
        vprint("No protein predicted at {}".format(threshold), 4)
        precision = 0
    vwrite('Precision: {:.2f}\n'.format(precision), Info.local_path, 1)

    # Return the calculated Precision
    return precision
コード例 #5
0
def WFMAX(Info):
    '''
    Calculate the weighted maximun harmonic mean (WFMAX) for a CAFA prediction
    
    Input:
    Info : Object
    
    Output:
    [0]  : List    [Float, Float]
    '''

    # Intialize paths
    path = Info.ResultPath + "/WFMAX/{}/{}/{}".format(
        (Info.ontology.lower()), Info.Type, Info.mode)
    overview = "{}/WFMAX_Overview.txt".format(path)
    clear(overview)
    # Intilize F-val, Threshold
    Fmax = 0
    FmaxThreshold = -1
    # For all thresholds
    for threshold in numpy.arange(0.00, 1.01, 0.01, float):
        threshold = numpy.around(threshold, decimals=2)
        # Set path for this threshold
        data = "{}/{}/WFMAX_Data.txt".format(path, threshold)
        # Delete old threshold file
        clear(data)
        clear("{}-Protein.txt".format(data))
        # Store for inner methods
        Info.local_path = data
        vprint("Threshold is {}".format(threshold), 15)
        # PR for this prediction @ threshold
        pr = PR(Info, threshold)
        vprint("PR is {}".format(pr), 15)
        # RC for this prediction @ threshold
        rc = RC(Info, threshold)
        vprint("RC is {}".format(rc), 15)
        # F-val for this prediction @ threshold
        Fval = F(pr, rc)
        vprint("The WF-val at {:.2f} is {}".format(threshold, Fval), 15)
        # Write to overview
        vwrite(
            "Threshold: {:.2f},\t PR: {:.4f},\t RC: {:.4f},\t WF: {:.4f}\n".
            format(threshold, pr, rc, Fval), overview, 1)
        # Check if F-val is greater than current F-max
        if Fval > Fmax:
            Fmax = Fval
            FmaxThreshold = threshold
    # Clear local_path when done
    Info.local_path = ""
    # Return the F-max and its threshold
    return [Fmax, FmaxThreshold]
コード例 #6
0
def NSMIN(Info):
    '''
    Calculate the normalized minimum semantic distance for a CAFA prediction
    
    Input:
    Info : Object
    
    Output:
    [0]  : List    [Float, Float]
    '''

    # Intialize paths
    path = Info.ResultPath + "/NSMIN/{}/{}/{}".format(
        (Info.ontology.lower()), Info.Type, Info.mode)
    overview = "{}/NSMIN_Overview.txt".format(path)
    clear(overview)
    # Intilize S-val, Threshold
    Smin = float("inf")
    SminThreshold = -1
    # For all thresholds
    for threshold in numpy.arange(0.00, 1.01, 0.01, float):
        threshold = numpy.around(threshold, decimals=2)
        # Set path for this threshold
        data = "{}/{}/NSMIN_Data.txt".format(path, threshold)
        # Delete old threshold file
        clear(data)
        # Store for inner methods
        Info.local_path = data
        vprint("Threshold is {}".format(threshold), 15)
        # RU for this prediction @ threshold
        ru = RU(Info, threshold)
        vprint("RU is {}".format(ru), 15)
        # MI for this prediction @ threshold
        mi = MI(Info, threshold)
        vprint("MI is {}".format(mi), 15)
        # S-val for this prediction @ threshold
        Sval = S(ru, mi)
        vprint("The NS-val at {:.2f} is {}".format(threshold, Sval), 15)
        # Write to overview
        vwrite(
            "Threshold: {:.2f},\t RU: {:.4f},\t MI: {:.4f},\t NS: {:.4f}\n".
            format(threshold, ru, mi, Sval), overview, 1)
        # Check if S-val is less than current S-min
        if Sval < Smin:
            Smin = Sval
            SminThreshold = threshold
    # Clear local_path when done
    Info.local_path = ""
    # Return the S-min and its threshold
    return [Smin, SminThreshold]
コード例 #7
0
from assessment_new.GOPrediction import GOPrediction
from assessment_new.Tools import Info, readOBO, vprint, vwrite, clear, getTime
import helper
import os
import gc
import pickle as cp

if __name__=='__main__':
    '''
    Main function that takes a predicition and returns calculated values
    '''
    start_time = getTime(0)
    # Read Config
    obo_path, ic_path, prediction_path, benchmark_directory, results_directory = helper.read_config_MAIN()
    # Setup workspace
    vprint('\n Evaluating {}\n'.format(prediction_path), 1 )
    
    ###################################### Prediction IN #################################3
    
    # Get predictions
    all_prediction  = GOPrediction()
    
    prediction_file = open(prediction_path, 'r')
    
    # Read in predictions, split by ontology, and save to disk
    all_prediction.read_and_split_and_write(obo_path, prediction_file)
    
    ##################################################
    # Speed up if rerunning
    #cp.dump(all_prediction, open("Temp/Prediction.all","wb"))
    #all_prediction = cp.load( open("Temp/Prediction.all","rb"))