def MI(Info, threshold): ''' Calculates the MI (Misinformation) for a given prediction at a threshold Input: Info : Object threshold : Float {0.00 -> 1.00} Output: [0] : Float Misinformation ''' # Sum of all proteins total = 0 for protein in Info.prediction: # Get the FP (False Positive) try: FP = Info.data[protein][threshold]['FP'] vwrite("Protein: {},\t FP: {:.2f}\n".format(protein, FP), Info.local_path, 1) total += FP # Bad Error except KeyError: vprint("Protein: {} has no FP".format(protein), 1) # If in full mode, divide by N (# proteins in benchmark) if Info.mode == "full": div = Info.ProteinInBenchmark misinfo = total / div # If in partial mode, divide by m(0) (# proteins in prediction) else: # "partial" div = Info.ProteinInPrediction[0.00] misinfo = total / div vwrite("Total: {:.2f},\t Ne: {},\t MI: {:.2f}\n".format(total, div, misinfo), Info.local_path, 1) # Return the calculated Misinformation return misinfo
def RU(Info, threshold): ''' Calculates the RU (Remaining Uncertainity) for a given prediction at a threshold Input: Info : Object threshold : Float {0.00 -> 1.00} Output: [0] : Float Remaining Uncertainity ''' # Sum of all proteins total = 0 for protein in Info.prediction: # Get the FN (False Negative) try: FN = Info.data[protein][threshold]['FN'] vwrite("Protein: {},\t FN: {:.2f}\n".format(protein, FN), Info.local_path, 1) total += FN # Bad Error except KeyError: vprint("Protein: {} has no FN".format(protein), 1) # If in full mode, divide by N (# proteins in benchmark) if Info.mode == "full": div = Info.ProteinInBenchmark remain = total / div # If in partial mode, divide by m(0) (# proteins in prediction) else: # "partial" div = Info.ProteinInPrediction[0.00] remain = total / div vwrite("Total: {:.2f},\t Ne: {},\t RU: {:.2f}\n".format(total, div, remain), Info.local_path, 1) # Return the calculated Remaining Uncertainity return remain
def RC(Info, threshold): ''' Calculates the RC (Recall) for a given prediction at a threshold Input: Info : Object threshold : Float {0.00 -> 1.00} Output: [0] : Float Recall ''' # Sum of all proteins total = 0 for protein in Info.prediction: # Summation of Sum(TP)/ Sum(TRUE) try: # Get the TP (True Positive) TP = Info.data_unweighted[protein][threshold]['TP'] vprint("TP : {}".format(TP),15) # Get the TRUE (TP + FN) (Truth) TRUE = Info.data_unweighted[protein][threshold]['TRUE'] vprint("TRUE : {}".format(TRUE), 15) try: val = TP / TRUE vwrite('Protein: {} RC : {:.2f}\n'.format(protein, val), "{}-Protein.txt".format(Info.local_path), 1) total += val except ZeroDivisionError: vprint("Protein {} had a 0 TRUE @ {}".format(protein, threshold), 1) vwrite('Protein: {} RC : NONE\n'.format(protein), "{}-Protein.txt".format(Info.local_path), 1) # Evreytime a protein has no TRUE, ie. TRUE: 0, dont count protein # Means there are no True terms in benchmark #This should NEVER happen for FMAX # Bad Error except KeyError: vprint("Protein: {} has no TRUE".format(protein), 1) # Store intermediate data vwrite('Protein: {}\t TP: {}\t TRUE: {}\n'.format(protein, TP, TRUE), Info.local_path, 1) # If in full mode, divide by N (# proteins in benchmark) if Info.mode == "full": # ProteinInBenchmark should never be 0 recall = total / Info.ProteinInBenchmark # If in partial mode, divide by m(0) (# proteins in prediction) else: # "partial" try: recall = total / (Info.ProteinInPrediction[0.00]) except ZeroDivisionError: vprint("No protein predicted at 0", 4) recall = 0 vwrite('Recall: {:.2f}\n'.format(recall), Info.local_path, 1) # Return the calculated Recall return recall
def PR(Info, threshold): ''' Calculates the PR (Precision) for a given prediction at a threshold Input: Info : Object threshold : Float {0.00 -> 1.00} Output: [0] : Float Precision ''' # Sum of all proteins total = 0 for protein in Info.prediction: # Summation of Sum(TP)/ Sum(POS) try: # Get the TP (True Positive) TP = Info.data[protein][threshold]['TP'] vprint("TP : {}".format(TP), 15) # Get the POS (TP + FP) (Positive) POS = Info.data[protein][threshold]['POS'] vprint("POS : {}".format(POS), 15) try: val = TP / POS vwrite('Protein: {} PR : {:.2f}\n'.format(protein, val), "{}-Protein.txt".format(Info.local_path), 1) total += val except ZeroDivisionError: vprint( "Protein {} had a 0 POS @ {}".format(protein, threshold), 14) vwrite('Protein: {} PR : NONE\n'.format(protein), "{}-Protein.txt".format(Info.local_path), 1) # Bad Error except KeyError: vprint("Protein: {} has no POS".format(protein), 1) # Store intermediate data vwrite( 'Protein: {}\t TP: {:.2f}\t POS: {:.2f}\n'.format( protein, TP, POS), Info.local_path, 1) # Divide by m(T) (# of proteins with at least one prediction @ threshold) try: precision = total / Info.ProteinInPrediction[threshold] except ZeroDivisionError: vprint("No protein predicted at {}".format(threshold), 4) precision = 0 vwrite('Precision: {:.2f}\n'.format(precision), Info.local_path, 1) # Return the calculated Precision return precision
def WFMAX(Info): ''' Calculate the weighted maximun harmonic mean (WFMAX) for a CAFA prediction Input: Info : Object Output: [0] : List [Float, Float] ''' # Intialize paths path = Info.ResultPath + "/WFMAX/{}/{}/{}".format( (Info.ontology.lower()), Info.Type, Info.mode) overview = "{}/WFMAX_Overview.txt".format(path) clear(overview) # Intilize F-val, Threshold Fmax = 0 FmaxThreshold = -1 # For all thresholds for threshold in numpy.arange(0.00, 1.01, 0.01, float): threshold = numpy.around(threshold, decimals=2) # Set path for this threshold data = "{}/{}/WFMAX_Data.txt".format(path, threshold) # Delete old threshold file clear(data) clear("{}-Protein.txt".format(data)) # Store for inner methods Info.local_path = data vprint("Threshold is {}".format(threshold), 15) # PR for this prediction @ threshold pr = PR(Info, threshold) vprint("PR is {}".format(pr), 15) # RC for this prediction @ threshold rc = RC(Info, threshold) vprint("RC is {}".format(rc), 15) # F-val for this prediction @ threshold Fval = F(pr, rc) vprint("The WF-val at {:.2f} is {}".format(threshold, Fval), 15) # Write to overview vwrite( "Threshold: {:.2f},\t PR: {:.4f},\t RC: {:.4f},\t WF: {:.4f}\n". format(threshold, pr, rc, Fval), overview, 1) # Check if F-val is greater than current F-max if Fval > Fmax: Fmax = Fval FmaxThreshold = threshold # Clear local_path when done Info.local_path = "" # Return the F-max and its threshold return [Fmax, FmaxThreshold]
def NSMIN(Info): ''' Calculate the normalized minimum semantic distance for a CAFA prediction Input: Info : Object Output: [0] : List [Float, Float] ''' # Intialize paths path = Info.ResultPath + "/NSMIN/{}/{}/{}".format( (Info.ontology.lower()), Info.Type, Info.mode) overview = "{}/NSMIN_Overview.txt".format(path) clear(overview) # Intilize S-val, Threshold Smin = float("inf") SminThreshold = -1 # For all thresholds for threshold in numpy.arange(0.00, 1.01, 0.01, float): threshold = numpy.around(threshold, decimals=2) # Set path for this threshold data = "{}/{}/NSMIN_Data.txt".format(path, threshold) # Delete old threshold file clear(data) # Store for inner methods Info.local_path = data vprint("Threshold is {}".format(threshold), 15) # RU for this prediction @ threshold ru = RU(Info, threshold) vprint("RU is {}".format(ru), 15) # MI for this prediction @ threshold mi = MI(Info, threshold) vprint("MI is {}".format(mi), 15) # S-val for this prediction @ threshold Sval = S(ru, mi) vprint("The NS-val at {:.2f} is {}".format(threshold, Sval), 15) # Write to overview vwrite( "Threshold: {:.2f},\t RU: {:.4f},\t MI: {:.4f},\t NS: {:.4f}\n". format(threshold, ru, mi, Sval), overview, 1) # Check if S-val is less than current S-min if Sval < Smin: Smin = Sval SminThreshold = threshold # Clear local_path when done Info.local_path = "" # Return the S-min and its threshold return [Smin, SminThreshold]
from assessment_new.GOPrediction import GOPrediction from assessment_new.Tools import Info, readOBO, vprint, vwrite, clear, getTime import helper import os import gc import pickle as cp if __name__=='__main__': ''' Main function that takes a predicition and returns calculated values ''' start_time = getTime(0) # Read Config obo_path, ic_path, prediction_path, benchmark_directory, results_directory = helper.read_config_MAIN() # Setup workspace vprint('\n Evaluating {}\n'.format(prediction_path), 1 ) ###################################### Prediction IN #################################3 # Get predictions all_prediction = GOPrediction() prediction_file = open(prediction_path, 'r') # Read in predictions, split by ontology, and save to disk all_prediction.read_and_split_and_write(obo_path, prediction_file) ################################################## # Speed up if rerunning #cp.dump(all_prediction, open("Temp/Prediction.all","wb")) #all_prediction = cp.load( open("Temp/Prediction.all","rb"))