コード例 #1
0
ファイル: C2.py プロジェクト: hklarner/RnaCancerClassifier
            EfficiencyConstraint,
            OptimizationStrategy,
            BreakSymmetries,
            Silent,
            UniquenessConstraint,
	    PerfectClassifier,
	    UpperBoundFalsePos,
	    UpperBoundFalseNeg
            )
            
    if 1 :
        # basel classifier
        GateInputs = "gate_input(1,negative,g7) gate_input(2,negative,g6) gate_input(3,negative,g4) gate_input(4,negative,g3) "
        GateInputs+= "gate_input(5,positive,g1) gate_input(5,positive,g2) gate_input(5,positive,g8) "
        GateInputs+= "gate_input(6,positive,g1) gate_input(6,positive,g5) gate_input(6,positive,g8)"
        classifier.check_classifier(FnameCSV, GateInputs)

    if 0 :
        # without Kobi's constraints
        GateInputs = "gate_input(2,positive,g41) gate_input(1,negative,g4) gate_input(1,negative,g13) gate_input(1,negative,g34) gate_input(2,negative,g3) gate_input(2,negative,g18)"

        FnamePDF = "C2_classifier_without_kobi.pdf"
        classifier.gateinputs2pdf(FnamePDF, GateInputs)
        
        
    if 0 : 
    	  FnameMAT = "casestudy01.mat"
    	  Threshold = 250
    	  classifier.mat2csv(FnameMAT, Threshold)

    
コード例 #2
0
def scores(GateInputs, FnameBinaryCSV, FnameOriginalCSV, BinThreshold):

    #INPUT: Classifier Gates
    NegativeGates = []
    Inputs = GateInputs.split()
    for x in Inputs:
        if "negative" in x:
            interstep = x.split(",negative,")
            miRNA = interstep[1][:-1]
            gatenumber = interstep[0][11:]
            NegativeGates.append(gatenumber + ", " + miRNA)

    PositiveGates = []
    for x in Inputs:
        if "positive" in x:
            interstep = x.split(",positive,")
            miRNA = interstep[1][:-1]
            gatenumber = interstep[0][11:]
            PositiveGates.append(gatenumber + ", " + miRNA)

    print PositiveGates

    #use Hannes' function to read CSV to dictionary
    data_miRNA, data_samples = classifier.csv2rows(FnameOriginalCSV)
    #use Hannes' check_classifier to count false negative, false positive
    false_neg, false_pos = classifier.check_classifier(FnameBinaryCSV,
                                                       GateInputs)

    print ""
    print "------------------------------------------------------------------"
    print "SCORES:"
    print "------------------------------------------------------------------"
    print ""
    print "Biochemical parameters used:"
    print "-----------"
    print "C_1: " + str(C_1)
    print "C_2: " + str(C_2)
    print "FF4_max: " + str(FF4_max)
    print "T_max: " + str(T_max)
    print "Out_max: " + str(Out_max)
    print ""
    print "Circuit outputs:"
    print "-----------"
    #for margins
    first_term_up = float(0)
    second_term_up = float(0)
    first_term_down = float(0)  #number of positive observations
    second_term_down = float(0)  #number of negative observations

    min_first_term = float(1000000000)  #TODO large number
    max_second_term = float(0)

    NumberPosSamples = 0
    NumberNegSamples = 0

    #circuit output for each sample
    annots = []
    circuit_outputs = []
    for x in data_samples:
        annots.append(int(x["Annots"]))
        if int(x["Annots"]) == 1:
            NumberPosSamples = NumberPosSamples + 1
        if int(x["Annots"]) == 0:
            NumberNegSamples = NumberNegSamples + 1
        #calculate FF4
        if not PositiveGates:
            FF4_value = 0
        else:
            FF4_value = FF4(x, PositiveGates)

        #calculate circuit output
        circ_out = circuit_output(x, NegativeGates, FF4_value)
        circuit_outputs.append(circ_out)

        #average classification margin for whole circuit
        add_first = float(x["Annots"]) * float(math.log10(circ_out))
        first_term_up = first_term_up + add_first
        if add_first != 0:
            if add_first < min_first_term:
                min_first_term = add_first
        first_term_down = first_term_down + float(x["Annots"])

        add_sec = float(1 - float(x["Annots"])) * float(math.log10(circ_out))
        second_term_up = second_term_up + add_sec
        if add_sec > max_second_term:
            max_second_term = add_sec
        second_term_down = second_term_down + float(1 - float(x["Annots"]))
    print ""
    print "Margins:"
    print "-----------"
    average_margin = (first_term_up / first_term_down) - (second_term_up /
                                                          second_term_down)
    print "Average margin of Circuit (C_MarginA): " + str(average_margin)

    worst_margin = min_first_term - max_second_term
    print "Worst margin of Circuit (C_MarginW): " + str(worst_margin)
    #PERFORMANCE SCORE 2: Margins
    MyLambda = 0.5
    score2 = (MyLambda * average_margin) + ((1 - MyLambda) * worst_margin)
    print ""
    print "Classification:"
    print "-----------"
    print "Number of positive samples (cancer) : " + str(NumberPosSamples)
    print "Number of negative samples (healthy) : " + str(NumberNegSamples)

    print "Number of false positive : " + str(false_pos)
    print "Number of false negative : " + str(false_neg)
    true_pos = NumberPosSamples - false_neg
    true_neg = NumberNegSamples - false_pos
    print "Number of true positive : " + str(true_pos)
    print "Number of true negative : " + str(true_neg)
    sensitivity = float(true_pos) / float(NumberPosSamples)
    specificity = float(true_neg) / float(NumberNegSamples)
    false_neg_rate = float(false_neg) / float(NumberPosSamples)
    false_pos_rate = float(false_pos) / float(NumberNegSamples)
    print ""
    print "Statistics:"
    print "-----------"
    print "Sensitivity : " + str(sensitivity)
    print "Specificity : " + str(specificity)
    print "False positive rate : " + str(false_pos_rate)
    print "False negative rate : " + str(false_neg_rate)
    print ""
    print "Binarization margins:"
    print "-----------"
    BinMarginsCSV = binaryvalue_margins(FnameOriginalCSV, BinThreshold)
    print "Wrote .csv file with margins for binary values:"
    print str(FnameOriginalCSV[:-4]) + "_binarymargins.csv"
    print ""
    print "Performance:"
    print "-----------"
    y_true = np.array(annots)
    y_scores = np.array(circuit_outputs)
    auc = roc_auc_score(y_true, y_scores)
    print "First performance score: Area under the ROC curve: " + str(auc)
    print "Second performance score: Margins (lambda=0.5): " + str(score2)
    print "                          Margins (lambda=1.0): " + str(
        average_margin)
    print ""
コード例 #3
0
EfficiencyConstraint = True
OptimizationStrategy = 1
BreakSymmetries = True

import sys
sys.path = ["../"] + sys.path
import classifier

if __name__ == "__main__":
    if 1:
        classifier.csv2asp(FnameCSV, FnameASP, LowerBoundInputs,
                           UpperBoundInputs, LowerBoundGates, UpperBoundGates,
                           GateTypes, EfficiencyConstraint,
                           OptimizationStrategy, BreakSymmetries)

    if 1:
        GateInputs = "gate_input(1,negative,g8) gate_input(2,negative,g7) gate_input(3,negative,g6) gate_input(4,negative,g6) "
        GateInputs += "gate_input(5,positive,g1) gate_input(5,positive,g2) gate_input(5,positive,g3) "
        GateInputs += "gate_input(6,positive,g1) gate_input(6,positive,g3) gate_input(6,positive,g4)"
        classifier.check_classifier(FnameCSV, GateInputs)

    if 0:
        GateInputs = "gate_input(2,positive,g2) gate_input(1,negative,g1)"
        FnamePDF = "toy_classifier.pdf"
        classifier.gateinputs2pdf(FnamePDF, GateInputs)

    if 0:
        FnameMAT = "casestudy01.mat"
        Threshold = 250
        classifier.mat2csv(FnameMAT, Threshold)
コード例 #4
0
def test_classifiers(solutions, test_data, train_p, train_n, test_p, test_n):
    """

    Tests classifiers on test data set.

    Parameters
    ----------
    solutions : list
        list of found solutions
    test_data : str
        test data set
    train_p : int
        number of positives in train data
    train_n : int
        number of negatives in train data
    test_p : int
        number of positives in test data
    test_n : int
        number of negatives in test data

    """

    print("\n\n###########################################")
    print("############TESTING CLASSIFIERS############")
    print("###########################################\n")

    bacc_train_list = []
    bacc_test_list = []
    tpr_test_list = []
    tnr_test_list = []
    size_list = []

    solution_id = 1
    for solution in solutions:  # iterate over solutions

        # train data scores
        print("\nSOLUTION ", solution_id)  # show solution id
        solution_id += 1
        print("##SUM: ", solution.errors, "##")  # show number of errors
        print("FP: ", solution.fp, "FN: ",
              solution.fn)  # show number of false positives and negatives
        tp = train_p - solution.fn  # calculate number of true positives
        tn = train_n - solution.fp  # calculate number of true negatives
        train_bacc = calculate_balanced_accuracy(
            tp, tn, train_p, train_n)  # calculate train bacc
        print("TRAIN BACC: ", train_bacc)
        bacc_train_list.append(train_bacc)
        size_list.append(
            solution.size)  # size of classifier in number of inputs
        print(solution.solutions_str)  # show solution

        if test_data is not None:
            # test data scores
            # calculate false positives and negatives
            fn, fp = classifier.check_classifier(test_data,
                                                 solution.solutions_str)
            print("FP: ", fp, " FN: ", fn)
            tp = test_p - fn  # calculate true positives
            tpr_test_list.append(
                tp / test_p)  # calculate true positive rate and add to list
            tn = test_n - fp  # calculate true negatives
            tnr_test_list.append(
                tn / test_n)  # calculate true negative rate and add to list
            bacc = calculate_balanced_accuracy(tp, tn, test_p,
                                               test_n)  # calculate bacc
            print("TEST BACC: ", bacc)
            bacc_test_list.append(bacc)

    # path_train = test_data
    # head_tail = os.path.split(path_train)
    # path = head_tail[0]
    # file_name = head_tail[1]

    # feature_analyser.rank_features_by_frequency(solutions, path, file_name)  # analyse features

    # average results for all solutions
    print("\n\n###################################")
    print("############AVERAGE RESULTS############")
    print("###################################\n")
    print("AVG TRAIN BACC: ",
          numpy.average(bacc_train_list))  # calculate average train bacc
    if len(bacc_train_list) > 1:  # if more than one solution was found
        train_std = numpy.std(bacc_train_list, ddof=1)
        print("STD TRAIN BACC: ", train_std)
    else:
        train_std = 0.0
        print("STD TRAIN BACC: ", 0.0)

    if test_data is not None:
        print("AVG TEST BACC: ",
              numpy.average(bacc_test_list))  # calculate average test bacc
        if len(bacc_test_list) > 1:  # if more than one solution was found
            test_std = numpy.std(bacc_test_list, ddof=1)
            print("STD TEST BACC: ", test_std)
        else:
            test_std = 0.0
            print("STD TEST BACC: ", test_std)
        print("TEST TPR: ",
              numpy.average(tpr_test_list))  # calculate average tpr
        print("TEST TNR: ",
              numpy.average(tnr_test_list))  # calculate average tnr
    print("AVG SIZE: ", numpy.average(size_list))  # calculate average size
    print("\n")

    if test_data is not None:
        print("CSV", ";", numpy.average(bacc_train_list), ";", train_std, ";",
              numpy.average(bacc_test_list), ";", test_std, ";",
              numpy.average(tpr_test_list), ";", numpy.average(tnr_test_list),
              ";", numpy.average(size_list))
    else:
        print("CSV", ";", numpy.average(bacc_train_list), ";", train_std, ";",
              numpy.average(bacc_test_list), ";", 0.0, ";", 0.0, ";", 0.0, ";",
              numpy.average(size_list))