def getThreshold(f): f= open(estimate) p = [] #predict c = [] #class for line in f: c.append(float(line.split(" ")[1])) p.append(float(line.split(" ")[2][:-1])) F, prec, rec, threshold = optimalFThreshold(p,c) f.close() return threshold
def getThreshold(f): f = open(estimate) p = [] #predict c = [] #class for line in f: c.append(float(line.split(" ")[1])) p.append(float(line.split(" ")[2][:-1])) F, prec, rec, threshold = optimalFThreshold(p, c) f.close() return threshold
regex = re.compile('(predict)(\d+\.?\d*)(\.out)') for corpus in corpora: print "\n" + corpus for foretell in glob.glob(base + predict + os.sep + corpus + os.sep + "predict*"): lamda = regex.search(os.path.basename(foretell)).group(2) estimate = base + predict + os.sep + corpus + os.sep + "threshold" + lamda + ".out" f = open(estimate) p = [] c = [] print estimate for line in f: #Very slow c.append(float(line.split(" ")[1])) p.append(float(line.split(" ")[2][:-1])) F, prec, rec, threshold = optimalFThreshold(p, c) f.close() f = open(foretell) auc = readAUC(f) f.close() f = open(foretell) F, prec, rec = readResults(f, threshold) f.close() f = open(foretell) TP, FP, FN, TN = getAbsoluteNumbers(f, threshold) f.close() resultFile.write(
p = [] c = [] dict = {} fulldict= {} lambdaRegex = re.compile('(predict)(\d+\.?\d*)(.out)') for corpus in corpora: for predicted in corpora: if predicted == corpus: dict= {} #Reset the dictionary for foretell in glob.glob(base +predict +os.sep +corpus +os.sep +predicted +os.sep +"*"): f= open(foretell) for line in f: c.append(float(line.split(" ")[1])) p.append(float(line.split(" ")[2][:-1])) l= lambdaRegex.search(os.path.basename(foretell)).group(2) F, prec, rec, threshold = optimalFThreshold(p,c) dict[l]= threshold # print corpus +" l=" +l +" " +str(threshold) #print dict fulldict[corpus] = dict #print fulldict print "Step 6 estimate quality!" #regex = re.compile('(train)(\d+)') lambdaRegex = re.compile('(predict)(\d+\.?\d*)(.out)') resultFile = open('CC.txt','w') for corpus in corpora: print "\n" +corpus for predicted in corpora: if predicted != corpus: print predicted
FN = 0. TN = 0. for foldind in range(len(folds)): #Pick a fold fold = folds[foldind] fold_complement = list(set(range(tsetsize)).difference(fold)) #Calculate predictions for the holdout set HO = rls.rectangularCV(fold, fold_complement) for prediction in HO.A: predictions[loglambdaindex].extend(prediction) #For each indice in the fold if loglambdaindex == 0: for ind1 in range(len(fold)): real_inst_ind = fold[ind1] correct.append(Y[real_inst_ind, 0]) F, prec, rec, threshold = optimalFThreshold(predictions[loglambdaindex], correct) print "Threshold:", threshold print "Precision:", prec print "Recall:", rec print "F:", F results.append((F, prec, rec, threshold)) if F > bestperf: bestperf = F bestloglambda = loglambda bestindex = loglambdaindex print 'Average F-score:', F, "(best so far", bestperf, "with loglambda", bestloglambda, ")" print 'Best F1',bestperf*100,'% (loglambda', bestloglambda,')' if options.params: f = open(parameter_file,'w') for loglambda, perf in zip(loglambdavec, results): f.write("loglambda:%d threshold:%f F:%f P:%f R%f\n" %(loglambda, perf[3], perf[0], perf[1], perf[2]))