Example #1
0
def analyseData(data):
    #for member in data:
    #    print member.CHVFound, member.hasCHV, member.hasUMLS, member.hasCHVMisspelled, member.comboScore

    #combo = [ round(member.comboScore, 3) for member in data if round(member.comboScore,3) != 0.290]
    combo = [ round(member.comboScore, 3) for member in data]
    npCombo = generateStatsVector(combo)
    countingCombo = Counter(combo)

    print "Combo Score: Mean - %.3f (+/- %.3f) Median - %.4f " % (npCombo.mean, npCombo.std, npCombo.median)
    return npCombo, countingCombo
def calculateWinner(filename1, filename2, alg, metricIndex):
    # MetricIndex => 2: Acc, 3: f1

    metricVec0 = getVector(filename1, alg, metricIndex)
    metricVec1 = getVector(filename2, alg, metricIndex)

    metricSv0 = generateStatsVector(metricVec0)
    metricSv1 = generateStatsVector(metricVec1)

    winner = -1
    if metricSv0.mean > metricSv1.mean:
        winner = 0
    elif metricSv1.mean > metricSv0.mean:
        winner = 1
   
    print metricSv0.mean, metricSv1.mean
    #from scipy import stats
    #t, p = stats.ttest_rel(metricVec0, metricVec1)
    #if p > 0.05:
    #    winner = -1

    return winner
Example #3
0
def compareValues(minIndex):
    blAccs, blF1s, blwF1s, cAccs, cF1s, cwF1s = [], [], [], [], [], []
    for group in returnedValues:
        accBaseline, f1Baseline, wf1Baseline, classacc, classf1, classwf1 = group[0], group[1], group[2], group[minIndex], group[minIndex+1], group[minIndex+2]
        blAccs.append(accBaseline)
        blF1s.append(f1Baseline)
        blwF1s.append(wf1Baseline)
        cAccs.append(classacc)
        cF1s.append(classf1)
        cwF1s.append(classwf1)

    (tacc, probacc) = stats.ttest_rel(blAccs, cAccs)
    (tf1, probf1) = stats.ttest_rel(blF1s, cF1s)
    (wtf1, probwf1) = stats.ttest_rel(blwF1s, cwF1s)
    
    blAccsStat = generateStatsVector(blAccs)
    blF1sStat = generateStatsVector(blF1s)
    blwF1sStat = generateStatsVector(blwF1s)
    cAccsStat = generateStatsVector(cAccs)
    cF1sStat  = generateStatsVector(cF1s)
    cwF1sStat = generateStatsVector(cwF1s)

    print "BL = %.3f (+/- %.5f)" % (blAccsStat.mean, blAccsStat.std)
    print "F1 = %.3f (+/- %.5f)" % (blF1sStat.mean, blF1sStat.std)
    print "wF1 = %.3f (+/- %.5f)" % (blwF1sStat.mean, blwF1sStat.std)

    print "Classifier ACC = %.3f (+/- %.5f)" % (cAccsStat.mean, cAccsStat.std)
    print "Classifier F1          = %.3f (+/- %.5f)" % (cF1sStat.mean, cF1sStat.std)
    print "Classifier wF1        = %.3f (+/- %.5f)" % (cwF1sStat.mean, cwF1sStat.std)
  
    print "ACC GAIN --> %0.2f%% " % (100.0 * (cAccsStat.mean - blAccsStat.mean) / blAccsStat.mean)
    print "F1 GAIN --> %0.2f%% " %  (100.0 * (cF1sStat.mean - blF1sStat.mean) / blF1sStat.mean)
    print "WF1 GAIN --> %0.2f%% " % (100.0 * (cwF1sStat.mean - blwF1sStat.mean) / blwF1sStat.mean)



    print "Tacc, Probacc = ", tacc, probacc, probacc > 0.05
    print "Tf1, probF1   = ", tf1, probf1, probf1 > 0.05
    print "wtF1, probwf1 = ", wtf1, probwf1, probwf1 > 0.05
for classify, listOfValues in clfs.items():
    print classify, listOfValues
    k = len(listOfValues)

for classify, listOfValues in clfs.items():
    vs = defaultdict(list)
    for n in range(nMeasures):
        for values in listOfValues:
            vs[n].append(values[n])
    
    for n in range(nMeasures):
        clfs2[classify][n] = vs[n]
     
    for v in vs.values():
        npv = generateStatsVector(v)
        means[classify].append( (npv.mean, npv.std) )

# all the keys in pais (A,B), (A,C), (B,C)...
for clf in list(it.combinations(clfs.keys(), 2)):
    print "=== Comparing", clf[0], "and", clf[1], "==="

    for n in range(nMeasures):
        print "============"
        print "Measure ", n
        vec1 = clfs2[clf[0]][n]
        vec2 = clfs2[clf[1]][n]
        mean0 =  means[clf[0]][n][0]
        mean1 =  means[clf[1]][n][0]
        print "%s %.3f (%.3f) " % (clf[0], mean0, means[clf[0]][n][1])
        print "%s %.3f (%.3f) " % (clf[1], mean1, means[clf[1]][n][1])