def classification(trainingName, positiveTestName, negativeTestName, cod, lenPositiveTraining, lenNegativeTraining, techniqueLow, techniqueMid): #AUCs = open("referral/AUCs.txt","wb") directory = "referral/" + techniqueLow + "/" + techniqueMid + "/classification/" start = timeit.default_timer() # define some variables resultFileTrain = directory + "/resultTrain.dat" scaledTraining = trainingName + ".scale" model = directory + "/referral-" + cod + ".model" scaledNegativeTest = negativeTestName + ".scale" scaledPositiveTest = positiveTestName + ".scale" resultFileNegative = directory + "/resultNegative" + cod + ".dat" resultFilePositive = directory + "/resultPositive" + cod + ".dat" rangeFile = directory + "/range-" + cod # Scale the training data command = SVM + "svm-scale -l -1 -u +1 -s " + rangeFile + " " + trainingName + " > " + scaledTraining os.system(command) # Escala os dados de teste das normais command = SVM + "svm-scale -r " + rangeFile + " " + negativeTestName + " > " + scaledNegativeTest os.system(command) # Escala os dados de teste das doentes command = SVM + "svm-scale -r " + rangeFile + " " + positiveTestName + " > " + scaledPositiveTest os.system(command) # Run the grid-search print('Cross validation...') cmd = '%s -svmtrain "%s" -gnuplot "%s" "%s"' % (SVM + "grid.py", SVM + "svm-train", "/usr/bin/gnuplot", scaledTraining) os.system(cmd + " > tmp/info-referral-" + techniqueLow + "-" + techniqueMid + "-" + cod + ".txt 2> tmp/errors.txt") line = open("tmp/info-referral-" + techniqueLow + "-" + techniqueMid + "-" + cod + ".txt","rb").readlines() last_line = line[-1:][0] c,g,rate = map(float,last_line.split()) print('Best c=%s, g=%s CV rate=%s' % (c,g,rate)) wnormais = (float(lenPositiveTraining/float(lenPositiveTraining + lenNegativeTraining)) * 2.0) wdoentes = (2.0 - wnormais) # Train the classifier command = SVM + "svm-train -t 2 -c " + str(c) + " -g " + str(g) + " -w1 " + str(wdoentes) + " -w-1 " + str(wnormais) + " " + scaledTraining + " " + model os.system(command + " > tmp/info.txt 2> tmp/errors.txt") # Classify the training images command = SVM + "svm-predict " + scaledTraining + " " + model + " " + resultFileTrain os.system(command + " > tmp/info.txt 2> tmp/errors.txt") # Classify the negative test images command = SVM + "svm-predict " + scaledNegativeTest + " " + model + " " + resultFileNegative os.system(command + " > tmp/info.txt 2> tmp/errors.txt") # Classify the positive test images command = SVM + "svm-predict " + scaledPositiveTest + " " + model + " " + resultFilePositive os.system(command + " > tmp/info.txt 2> tmp/errors.txt") # Shift the hyperplan numberOfPoints = 500 # Record the offsets to allow a future choice of most adequate operating point shifts = [] result = open(resultFileTrain,"rb").readlines() upperBound = -9999999999 lowerBound = 9999999999 for i in result: temp = i.split("\n") res = float(temp[0]) if res > upperBound: upperBound = res if res < lowerBound: lowerBound = res interval = upperBound - lowerBound variationFactor = interval/numberOfPoints limiar = lowerBound negativeResults = open(resultFileNegative,"rb").readlines() positiveResults = open(resultFilePositive,"rb").readlines() valuesOut = [] numero_tics = 0 while ((limiar <= upperBound) and (numero_tics < numberOfPoints)): numero_tics = numero_tics + 1 acertosNormais = 0 acertosDoentes = 0 for i in negativeResults: temp = i.split("\n") res = float(temp[0]) if res <= limiar: acertosNormais = acertosNormais + 1 for i in positiveResults: temp = i.split("\n") res = float(temp[0]) if res > limiar: acertosDoentes = acertosDoentes + 1 shifts.append(limiar) line = str(acertosNormais) + "\t" + str(acertosDoentes) + "\t\n" valuesOut.append(line) limiar = limiar + variationFactor arqout = open(directory + "/operating-points-" + cod + ".dat","wb") arqout.seek(0) for i in valuesOut: arqout.write(i) arqout.close() # Scale the operating points and calculate the area under the ROC curve (AUC) lines = open(directory + "/operating-points-" + cod + ".dat", "rb").readlines() max_x = -9999999999.0 max_y = -9999999999.0 for line in lines: line = line.split() x = float(line[0]) y = float(line[1]) if (x > max_x): max_x = x if (y > max_y): max_y = y x = [] y = [] last_x = -1 last_y = -1 operatingPointsFile = open(directory + "/operating-points-" + cod + "-scale.dat", "wb") shiftsSensSpec = [] shiftsSensSpecFile = open(directory + "/shifts-sens-spec-" + cod + ".dat", "wb") indShifts = 0 for line in lines: line = line.split() xx = 1 - float(line[0])/float(max_x) yy = float(line[1])/float(max_y) if xx != last_x or yy != last_y: x.append(xx) y.append(yy) last_x = xx last_y = yy operatingPointsFile.write(str(xx) + "\t" + str(yy) + "\n") if xx <= 0.5 and yy > 0.5: shiftsSensSpec.append((str(shifts[indShifts]), " {0:0.1f}%".format(yy*100), " {0:0.1f}%".format((1-xx)*100))) indShifts += 1 operatingPointsFile.close() sens_aux = "" (last_shift, last_sens, last_spec) = shiftsSensSpec[0] for (shift, sens, spec) in shiftsSensSpec[1:]: if sens != last_sens: shiftsSensSpecFile.write(last_shift + " " + last_sens + " " + last_spec + "\n") if spec == " 100.0%": break last_shift = shift last_sens = sens last_spec = spec shiftsSensSpecFile.write(shift + " " + sens + " " + spec + "\n") shiftsSensSpecFile.close() stop = timeit.default_timer() print "Model created in " + common_functions.convertTime(stop - start) auc = np.trapz(y, x) * -100 print u"AUC = {0:0.1f}%\n\n".format(auc) #AUCs.write(u"\nAUC = {0:0.1f}%\n\n".format(auc)) # Clear name = trainingName.split("/")[-1:][0] if os.path.exists(name + ".scale.png"): os.system("rm " + name + ".scale.png") if os.path.exists(name + ".scale.out"): os.system("rm " + name + ".scale.out") return u"\nAUC = {0:0.1f}%\n\n".format(auc)
# run k-means start = timeit.default_timer() if technique == "sparse": os.system(KMEANS + " -i codebooks/" + technique + "/" + str(size) + "-codewords-" + lesion + ".in -o codebooks/" + technique + "/codebook-" + lesion + ".cb") else: for diametro in [12, 19, 31, 50, 80, 128]: radius = diametro / 2.0 os.system(KMEANS + " -i codebooks/" + technique + "/" + str(size) + "-codewords-" + lesion + "-" + str(radius) + ".in -o codebooks/" + technique + "/codebook-" + lesion + "-" + str(radius) + ".cb") stop = timeit.default_timer() sys.stdout.write(" - codebook created in " + common_functions.convertTime(stop - start) + "\n") sys.stdout.flush() for lesion in lesions: if lesion == "imagem-normal": continue sys.stdout.write("Concatenating the codebooks") sys.stdout.flush() start = timeit.default_timer() for technique in techniques: if not sizeFlag: # if the used did not pass a size, use the default if technique == "sparse": size = 250 # 250 * 2 (normal and disease) = 500 else: size = 125 # 125 * 2 (normal and disease) * 6 (scales) = 1500 common_functions.mergeCodebooks(lesion, size, technique) stop = timeit.default_timer()
else: listImages = [image] for im in listImages: im_special = common_functions.specialName(im) if os.path.exists(OutDir + im[:-3] + "hist"): continue # define the output file (histogram) OutFile = OutDir + im[:-3] + "hist" f = open(OutFile,"wb") # get the points of interest PoIsTemp = open(PoIsDir + im[:-3] + "key","rb").readlines() PoIs = [] for i in range(2,len(PoIsTemp),2): PoIs.append([ float(p) for p in PoIsTemp[i].split() ]) PoIs = numpy.asarray(PoIs) sys.stdout.write(". ") sys.stdout.flush() if techniqueMid == "hard": hardSum(PoIs, Codebook, OutFile, size, label) elif techniqueMid == "soft": softMax(PoIs, Codebook, OutFile, size, label) else: #if techniqueMid == "semi": semiSoft(PoIs, Codebook, OutFile, size, label) stop = timeit.default_timer() sys.stdout.write(" Done in " + common_functions.convertTime(stop - start) + "\n") ################################################
"key") common_functions.filterPoints(type, technique, im) else: denseExtraction( im, im_special, technique, type, "datasets/" + type + "-images-by-lesions/" + common_functions.specialName(lesion_en)) common_functions.organizeFileSurfToDescriptor(directory + technique + "/" + type + "/" + im[:-3] + "key") stop = timeit.default_timer() sys.stdout.write(common_functions.convertTime(stop - start) + "\n") ################################################ ################################################ # Describe additional images # (with marked regions but not labeled as normal or disease) # Only when DR1 is defined as the training dataset ################################################ if train == "DR1": print "Low-level feature extraction for additional images (DR1) - used just because contain marked regions" start = timeit.default_timer() listImages = os.listdir("datasets/DR1-additional-marked-images/") for im in listImages: sys.stdout.write(". ")
listImages = os.listdir("datasets/" + type + "-images-by-lesions/" + lesion_en) else: listImages = [image] for im in listImages: im_special = common_functions.specialName(im) if os.path.exists(OutDir + im[:-3] + "hist"): continue # define the output file (histogram) OutFile = OutDir + im[:-3] + "hist" f = open(OutFile,"wb") # get the points of interest PoIsTemp = open(PoIsDir + im[:-3] + "key","rb").readlines() PoIs = [] for i in range(2,len(PoIsTemp),2): PoIs.append([ float(p) for p in PoIsTemp[i].split() ]) PoIs = numpy.asarray(PoIs) sys.stdout.write(". ") sys.stdout.flush() if techniqueMid == "hard": hardSum(PoIs, Codebook, OutFile, size, label) else: # techniqueMid == "soft": softMax(PoIs, Codebook, OutFile, size, label) stop = timeit.default_timer() sys.stdout.write(" Done in " + common_functions.convertTime(stop - start) + "\n") ################################################
candidatesFile.close() else: common_functions.getCandidateRegions(lesion, train, technique) # define the parameters of k-means common_functions.adjustParametersKmeans(lesion, size, technique) # run k-means start = timeit.default_timer() if technique == "sparse": os.system(KMEANS + " -i codebooks/" + technique + "/" + str(size) + "-codewords-" + lesion + ".in -o codebooks/" + technique + "/codebook-" + lesion + ".cb") else: for diametro in [12, 19, 31, 50, 80, 128]: radius = diametro/2.0 os.system(KMEANS + " -i codebooks/" + technique + "/" + str(size) + "-codewords-" + lesion + "-" + str(radius) + ".in -o codebooks/" + technique + "/codebook-" + lesion + "-" + str(radius) + ".cb") stop = timeit.default_timer() sys.stdout.write(" - codebook created in " + common_functions.convertTime(stop - start) + "\n") sys.stdout.flush() for lesion in lesions: if lesion == "imagem-normal": continue sys.stdout.write("Concatenating the codebooks") sys.stdout.flush() start = timeit.default_timer() for technique in techniques: if not sizeFlag: # if the used did not pass a size, use the default if technique == "sparse": size = 250 # 250 * 2 (normal and disease) = 500 else: size = 125 # 125 * 2 (normal and disease) * 6 (scales) = 1500 common_functions.mergeCodebooks(lesion, size, technique) stop = timeit.default_timer() sys.stdout.write(" - done in " + common_functions.convertTime(stop - start) + "\n")
im_special = common_functions.specialName(im) for technique in techniques: if os.path.exists(directory + technique + "/" + type + "/" + im[:-3] + "key"): continue fAux = open(directory + technique + "/" + type + "/" + im[:-3] + "key","wb") if technique == "sparse": sparseExtraction(im_special, technique, type, "datasets/" + type + "-images-by-lesions/" + common_functions.specialName(lesion_en)) common_functions.organizeFileSurfToDescriptor(directory + technique + "/" + type + "/" + im[:-3] + "key") common_functions.filterPoints(type, technique, im) else: denseExtraction(im, im_special, technique, type, "datasets/" + type + "-images-by-lesions/" + common_functions.specialName(lesion_en)) common_functions.organizeFileSurfToDescriptor(directory + technique + "/" + type + "/" + im[:-3] + "key") stop = timeit.default_timer() sys.stdout.write(common_functions.convertTime(stop - start) + "\n") ################################################ ################################################ # Describe additional images # (with marked regions but not labeled as normal or disease) # Only when DR1 is defined as the training dataset ################################################ if train == "DR1": print "Low-level feature extraction for additional images (DR1) - used just because contain marked regions" start = timeit.default_timer() listImages = os.listdir("datasets/DR1-additional-marked-images/")