EfficiencyConstraint, OptimizationStrategy, BreakSymmetries, Silent, UniquenessConstraint, PerfectClassifier, UpperBoundFalsePos, UpperBoundFalseNeg ) if 1 : # basel classifier GateInputs = "gate_input(1,negative,g7) gate_input(2,negative,g6) gate_input(3,negative,g4) gate_input(4,negative,g3) " GateInputs+= "gate_input(5,positive,g1) gate_input(5,positive,g2) gate_input(5,positive,g8) " GateInputs+= "gate_input(6,positive,g1) gate_input(6,positive,g5) gate_input(6,positive,g8)" classifier.check_classifier(FnameCSV, GateInputs) if 0 : # without Kobi's constraints GateInputs = "gate_input(2,positive,g41) gate_input(1,negative,g4) gate_input(1,negative,g13) gate_input(1,negative,g34) gate_input(2,negative,g3) gate_input(2,negative,g18)" FnamePDF = "C2_classifier_without_kobi.pdf" classifier.gateinputs2pdf(FnamePDF, GateInputs) if 0 : FnameMAT = "casestudy01.mat" Threshold = 250 classifier.mat2csv(FnameMAT, Threshold)
def scores(GateInputs, FnameBinaryCSV, FnameOriginalCSV, BinThreshold): #INPUT: Classifier Gates NegativeGates = [] Inputs = GateInputs.split() for x in Inputs: if "negative" in x: interstep = x.split(",negative,") miRNA = interstep[1][:-1] gatenumber = interstep[0][11:] NegativeGates.append(gatenumber + ", " + miRNA) PositiveGates = [] for x in Inputs: if "positive" in x: interstep = x.split(",positive,") miRNA = interstep[1][:-1] gatenumber = interstep[0][11:] PositiveGates.append(gatenumber + ", " + miRNA) print PositiveGates #use Hannes' function to read CSV to dictionary data_miRNA, data_samples = classifier.csv2rows(FnameOriginalCSV) #use Hannes' check_classifier to count false negative, false positive false_neg, false_pos = classifier.check_classifier(FnameBinaryCSV, GateInputs) print "" print "------------------------------------------------------------------" print "SCORES:" print "------------------------------------------------------------------" print "" print "Biochemical parameters used:" print "-----------" print "C_1: " + str(C_1) print "C_2: " + str(C_2) print "FF4_max: " + str(FF4_max) print "T_max: " + str(T_max) print "Out_max: " + str(Out_max) print "" print "Circuit outputs:" print "-----------" #for margins first_term_up = float(0) second_term_up = float(0) first_term_down = float(0) #number of positive observations second_term_down = float(0) #number of negative observations min_first_term = float(1000000000) #TODO large number max_second_term = float(0) NumberPosSamples = 0 NumberNegSamples = 0 #circuit output for each sample annots = [] circuit_outputs = [] for x in data_samples: annots.append(int(x["Annots"])) if int(x["Annots"]) == 1: NumberPosSamples = NumberPosSamples + 1 if int(x["Annots"]) == 0: NumberNegSamples = NumberNegSamples + 1 #calculate FF4 if not PositiveGates: FF4_value = 0 else: FF4_value = FF4(x, PositiveGates) #calculate circuit output circ_out = circuit_output(x, NegativeGates, FF4_value) circuit_outputs.append(circ_out) #average classification margin for whole circuit add_first = float(x["Annots"]) * float(math.log10(circ_out)) first_term_up = first_term_up + add_first if add_first != 0: if add_first < min_first_term: min_first_term = add_first first_term_down = first_term_down + float(x["Annots"]) add_sec = float(1 - float(x["Annots"])) * float(math.log10(circ_out)) second_term_up = second_term_up + add_sec if add_sec > max_second_term: max_second_term = add_sec second_term_down = second_term_down + float(1 - float(x["Annots"])) print "" print "Margins:" print "-----------" average_margin = (first_term_up / first_term_down) - (second_term_up / second_term_down) print "Average margin of Circuit (C_MarginA): " + str(average_margin) worst_margin = min_first_term - max_second_term print "Worst margin of Circuit (C_MarginW): " + str(worst_margin) #PERFORMANCE SCORE 2: Margins MyLambda = 0.5 score2 = (MyLambda * average_margin) + ((1 - MyLambda) * worst_margin) print "" print "Classification:" print "-----------" print "Number of positive samples (cancer) : " + str(NumberPosSamples) print "Number of negative samples (healthy) : " + str(NumberNegSamples) print "Number of false positive : " + str(false_pos) print "Number of false negative : " + str(false_neg) true_pos = NumberPosSamples - false_neg true_neg = NumberNegSamples - false_pos print "Number of true positive : " + str(true_pos) print "Number of true negative : " + str(true_neg) sensitivity = float(true_pos) / float(NumberPosSamples) specificity = float(true_neg) / float(NumberNegSamples) false_neg_rate = float(false_neg) / float(NumberPosSamples) false_pos_rate = float(false_pos) / float(NumberNegSamples) print "" print "Statistics:" print "-----------" print "Sensitivity : " + str(sensitivity) print "Specificity : " + str(specificity) print "False positive rate : " + str(false_pos_rate) print "False negative rate : " + str(false_neg_rate) print "" print "Binarization margins:" print "-----------" BinMarginsCSV = binaryvalue_margins(FnameOriginalCSV, BinThreshold) print "Wrote .csv file with margins for binary values:" print str(FnameOriginalCSV[:-4]) + "_binarymargins.csv" print "" print "Performance:" print "-----------" y_true = np.array(annots) y_scores = np.array(circuit_outputs) auc = roc_auc_score(y_true, y_scores) print "First performance score: Area under the ROC curve: " + str(auc) print "Second performance score: Margins (lambda=0.5): " + str(score2) print " Margins (lambda=1.0): " + str( average_margin) print ""
EfficiencyConstraint = True OptimizationStrategy = 1 BreakSymmetries = True import sys sys.path = ["../"] + sys.path import classifier if __name__ == "__main__": if 1: classifier.csv2asp(FnameCSV, FnameASP, LowerBoundInputs, UpperBoundInputs, LowerBoundGates, UpperBoundGates, GateTypes, EfficiencyConstraint, OptimizationStrategy, BreakSymmetries) if 1: GateInputs = "gate_input(1,negative,g8) gate_input(2,negative,g7) gate_input(3,negative,g6) gate_input(4,negative,g6) " GateInputs += "gate_input(5,positive,g1) gate_input(5,positive,g2) gate_input(5,positive,g3) " GateInputs += "gate_input(6,positive,g1) gate_input(6,positive,g3) gate_input(6,positive,g4)" classifier.check_classifier(FnameCSV, GateInputs) if 0: GateInputs = "gate_input(2,positive,g2) gate_input(1,negative,g1)" FnamePDF = "toy_classifier.pdf" classifier.gateinputs2pdf(FnamePDF, GateInputs) if 0: FnameMAT = "casestudy01.mat" Threshold = 250 classifier.mat2csv(FnameMAT, Threshold)
def test_classifiers(solutions, test_data, train_p, train_n, test_p, test_n): """ Tests classifiers on test data set. Parameters ---------- solutions : list list of found solutions test_data : str test data set train_p : int number of positives in train data train_n : int number of negatives in train data test_p : int number of positives in test data test_n : int number of negatives in test data """ print("\n\n###########################################") print("############TESTING CLASSIFIERS############") print("###########################################\n") bacc_train_list = [] bacc_test_list = [] tpr_test_list = [] tnr_test_list = [] size_list = [] solution_id = 1 for solution in solutions: # iterate over solutions # train data scores print("\nSOLUTION ", solution_id) # show solution id solution_id += 1 print("##SUM: ", solution.errors, "##") # show number of errors print("FP: ", solution.fp, "FN: ", solution.fn) # show number of false positives and negatives tp = train_p - solution.fn # calculate number of true positives tn = train_n - solution.fp # calculate number of true negatives train_bacc = calculate_balanced_accuracy( tp, tn, train_p, train_n) # calculate train bacc print("TRAIN BACC: ", train_bacc) bacc_train_list.append(train_bacc) size_list.append( solution.size) # size of classifier in number of inputs print(solution.solutions_str) # show solution if test_data is not None: # test data scores # calculate false positives and negatives fn, fp = classifier.check_classifier(test_data, solution.solutions_str) print("FP: ", fp, " FN: ", fn) tp = test_p - fn # calculate true positives tpr_test_list.append( tp / test_p) # calculate true positive rate and add to list tn = test_n - fp # calculate true negatives tnr_test_list.append( tn / test_n) # calculate true negative rate and add to list bacc = calculate_balanced_accuracy(tp, tn, test_p, test_n) # calculate bacc print("TEST BACC: ", bacc) bacc_test_list.append(bacc) # path_train = test_data # head_tail = os.path.split(path_train) # path = head_tail[0] # file_name = head_tail[1] # feature_analyser.rank_features_by_frequency(solutions, path, file_name) # analyse features # average results for all solutions print("\n\n###################################") print("############AVERAGE RESULTS############") print("###################################\n") print("AVG TRAIN BACC: ", numpy.average(bacc_train_list)) # calculate average train bacc if len(bacc_train_list) > 1: # if more than one solution was found train_std = numpy.std(bacc_train_list, ddof=1) print("STD TRAIN BACC: ", train_std) else: train_std = 0.0 print("STD TRAIN BACC: ", 0.0) if test_data is not None: print("AVG TEST BACC: ", numpy.average(bacc_test_list)) # calculate average test bacc if len(bacc_test_list) > 1: # if more than one solution was found test_std = numpy.std(bacc_test_list, ddof=1) print("STD TEST BACC: ", test_std) else: test_std = 0.0 print("STD TEST BACC: ", test_std) print("TEST TPR: ", numpy.average(tpr_test_list)) # calculate average tpr print("TEST TNR: ", numpy.average(tnr_test_list)) # calculate average tnr print("AVG SIZE: ", numpy.average(size_list)) # calculate average size print("\n") if test_data is not None: print("CSV", ";", numpy.average(bacc_train_list), ";", train_std, ";", numpy.average(bacc_test_list), ";", test_std, ";", numpy.average(tpr_test_list), ";", numpy.average(tnr_test_list), ";", numpy.average(size_list)) else: print("CSV", ";", numpy.average(bacc_train_list), ";", train_std, ";", numpy.average(bacc_test_list), ";", 0.0, ";", 0.0, ";", 0.0, ";", numpy.average(size_list))