Example #1
0
def classification(trainingName, positiveTestName, negativeTestName, cod, lenPositiveTraining, lenNegativeTraining, techniqueLow, techniqueMid):
	#AUCs = open("referral/AUCs.txt","wb")
	directory = "referral/" + techniqueLow + "/" + techniqueMid + "/classification/"
	start = timeit.default_timer()

	# define some variables
	
	resultFileTrain = directory + "/resultTrain.dat"
	scaledTraining = trainingName + ".scale"
	model = directory + "/referral-" + cod + ".model"
	scaledNegativeTest = negativeTestName + ".scale"
	scaledPositiveTest = positiveTestName + ".scale"
	resultFileNegative = directory + "/resultNegative" + cod + ".dat"
	resultFilePositive = directory + "/resultPositive" + cod + ".dat"
	rangeFile = directory + "/range-" + cod

	

	# Scale the training data
	command = SVM + "svm-scale -l -1 -u +1 -s " + rangeFile + " " + trainingName + " > " + scaledTraining
	os.system(command)

	# Escala os dados de teste das normais
	command = SVM + "svm-scale -r " + rangeFile + " " + negativeTestName + " > " + scaledNegativeTest
	os.system(command)

	# Escala os dados de teste das doentes
	command = SVM + "svm-scale -r " + rangeFile + " " + positiveTestName + " > " + scaledPositiveTest
	os.system(command)


	# Run the grid-search
	print('Cross validation...')			
	cmd = '%s -svmtrain "%s" -gnuplot "%s" "%s"' % (SVM + "grid.py", SVM + "svm-train", "/usr/bin/gnuplot", scaledTraining)
	os.system(cmd + "  > tmp/info-referral-" + techniqueLow + "-" + techniqueMid + "-" + cod + ".txt 2> tmp/errors.txt")

	line = open("tmp/info-referral-" + techniqueLow + "-" + techniqueMid + "-" + cod + ".txt","rb").readlines()
	last_line = line[-1:][0]
	c,g,rate = map(float,last_line.split())


	print('Best c=%s, g=%s CV rate=%s' % (c,g,rate))

	wnormais = (float(lenPositiveTraining/float(lenPositiveTraining + lenNegativeTraining)) * 2.0)
	wdoentes = (2.0 - wnormais)


	# Train the classifier
	command = SVM + "svm-train -t 2 -c " + str(c) + " -g " + str(g) + " -w1 " + str(wdoentes) + " -w-1 " + str(wnormais) + " " + scaledTraining + " " + model
	os.system(command + "  > tmp/info.txt 2> tmp/errors.txt")

	# Classify the training images
	command = SVM + "svm-predict " + scaledTraining + " " + model + " " + resultFileTrain
	os.system(command + "  > tmp/info.txt 2> tmp/errors.txt")

	# Classify the negative test images
	command = SVM + "svm-predict " + scaledNegativeTest + " " + model + " " + resultFileNegative
	os.system(command + "  > tmp/info.txt 2> tmp/errors.txt")

	# Classify the positive test images
	command = SVM + "svm-predict " + scaledPositiveTest + " " + model + " " + resultFilePositive
	os.system(command + "  > tmp/info.txt 2> tmp/errors.txt")



	# Shift the hyperplan
	numberOfPoints = 500

	# Record the offsets to allow a future choice of most adequate operating point
	shifts = []

	result = open(resultFileTrain,"rb").readlines()
	upperBound = -9999999999
	lowerBound = 9999999999
	for i in result:
		temp = i.split("\n")
		res = float(temp[0])
		if res > upperBound:	upperBound = res
		if res < lowerBound:	lowerBound = res
	interval = upperBound - lowerBound
	variationFactor = interval/numberOfPoints
	limiar = lowerBound

	negativeResults = open(resultFileNegative,"rb").readlines()
	positiveResults = open(resultFilePositive,"rb").readlines()

	valuesOut = []
	numero_tics = 0
	while ((limiar <= upperBound) and (numero_tics < numberOfPoints)):
		numero_tics = numero_tics + 1
		acertosNormais = 0
		acertosDoentes = 0
		for i in negativeResults:
			temp = i.split("\n")
			res = float(temp[0])
			if res <= limiar:	acertosNormais = acertosNormais + 1
		for i in positiveResults:
			temp = i.split("\n")
			res = float(temp[0])
			if res > limiar:	acertosDoentes = acertosDoentes + 1
		shifts.append(limiar)
		line = str(acertosNormais) + "\t" + str(acertosDoentes) + "\t\n"
		valuesOut.append(line)
		limiar = limiar + variationFactor

	arqout = open(directory + "/operating-points-" + cod + ".dat","wb")
	arqout.seek(0)
	for i in valuesOut:
		arqout.write(i)
	arqout.close()



	# Scale the operating points and calculate the area under the ROC curve (AUC)
	lines = open(directory + "/operating-points-" + cod + ".dat", "rb").readlines()

	max_x = -9999999999.0
	max_y = -9999999999.0

	for line in lines:
		line = line.split()
		x = float(line[0])
		y = float(line[1])
		if (x > max_x):	max_x = x
		if (y > max_y):	max_y = y

	x = []
	y = []
	last_x = -1
	last_y = -1
	operatingPointsFile = open(directory + "/operating-points-" + cod + "-scale.dat", "wb")
	shiftsSensSpec = []
	shiftsSensSpecFile = open(directory + "/shifts-sens-spec-" + cod + ".dat", "wb")
	indShifts = 0

	for line in lines:
		line = line.split()
		xx = 1 - float(line[0])/float(max_x)
		yy = float(line[1])/float(max_y)
		if xx != last_x or yy != last_y:
			x.append(xx)
			y.append(yy)
			last_x = xx
			last_y = yy
			operatingPointsFile.write(str(xx) + "\t" + str(yy) + "\n")
			if xx <= 0.5 and yy > 0.5:
				shiftsSensSpec.append((str(shifts[indShifts]), " {0:0.1f}%".format(yy*100), " {0:0.1f}%".format((1-xx)*100)))
		indShifts += 1
	operatingPointsFile.close()


	sens_aux = ""
	(last_shift, last_sens, last_spec) = shiftsSensSpec[0]
	for (shift, sens, spec) in shiftsSensSpec[1:]:
		if sens != last_sens:
			shiftsSensSpecFile.write(last_shift + " " + last_sens + " " + last_spec + "\n")
			if spec == " 100.0%": break
		last_shift = shift
		last_sens = sens
		last_spec = spec
	shiftsSensSpecFile.write(shift + " " + sens + " " + spec + "\n")
	shiftsSensSpecFile.close()
	
	
	stop = timeit.default_timer()
	print "Model created in " + common_functions.convertTime(stop - start)

	auc = np.trapz(y, x) * -100
	print u"AUC = {0:0.1f}%\n\n".format(auc)
	#AUCs.write(u"\nAUC = {0:0.1f}%\n\n".format(auc))


	# Clear
	name = trainingName.split("/")[-1:][0]
	if os.path.exists(name + ".scale.png"):
		os.system("rm " + name + ".scale.png")
	if os.path.exists(name + ".scale.out"):
		os.system("rm " + name + ".scale.out")

		
	return u"\nAUC = {0:0.1f}%\n\n".format(auc)
Example #2
0
        # run k-means
        start = timeit.default_timer()
        if technique == "sparse":
            os.system(KMEANS + " -i codebooks/" + technique + "/" + str(size) +
                      "-codewords-" + lesion + ".in -o codebooks/" +
                      technique + "/codebook-" + lesion + ".cb")
        else:
            for diametro in [12, 19, 31, 50, 80, 128]:
                radius = diametro / 2.0
                os.system(KMEANS + " -i codebooks/" + technique + "/" +
                          str(size) + "-codewords-" + lesion + "-" +
                          str(radius) + ".in -o codebooks/" + technique +
                          "/codebook-" + lesion + "-" + str(radius) + ".cb")
        stop = timeit.default_timer()
        sys.stdout.write(" - codebook created in " +
                         common_functions.convertTime(stop - start) + "\n")
        sys.stdout.flush()

for lesion in lesions:
    if lesion == "imagem-normal": continue
    sys.stdout.write("Concatenating the codebooks")
    sys.stdout.flush()
    start = timeit.default_timer()
    for technique in techniques:
        if not sizeFlag:  # if the used did not pass a size, use the default
            if technique == "sparse":
                size = 250  # 250 * 2 (normal and disease) = 500
            else:
                size = 125  # 125 * 2 (normal and disease) * 6 (scales) = 1500
            common_functions.mergeCodebooks(lesion, size, technique)
    stop = timeit.default_timer()
					else: listImages = [image]
				
					for im in listImages:
						im_special = common_functions.specialName(im)
						if os.path.exists(OutDir + im[:-3] + "hist"): continue
						
						# define the output file (histogram)
						OutFile = OutDir + im[:-3] + "hist"
						f = open(OutFile,"wb")
					
						# get the points of interest
						PoIsTemp = open(PoIsDir + im[:-3] + "key","rb").readlines()
						PoIs = []
						for i in range(2,len(PoIsTemp),2):
							PoIs.append([ float(p) for p in PoIsTemp[i].split() ])
						PoIs = numpy.asarray(PoIs)				
									
						sys.stdout.write(". ")
						sys.stdout.flush()
			
						if techniqueMid == "hard":
							hardSum(PoIs, Codebook, OutFile, size, label)
						elif techniqueMid == "soft":
							softMax(PoIs, Codebook, OutFile, size, label)
						else:				#if techniqueMid == "semi":
							semiSoft(PoIs, Codebook, OutFile, size, label)
						
				stop = timeit.default_timer()
				sys.stdout.write(" Done in " + common_functions.convertTime(stop - start) + "\n")
################################################
                                                                  "key")
                    common_functions.filterPoints(type, technique, im)
                else:
                    denseExtraction(
                        im, im_special, technique, type,
                        "datasets/" + type + "-images-by-lesions/" +
                        common_functions.specialName(lesion_en))
                    common_functions.organizeFileSurfToDescriptor(directory +
                                                                  technique +
                                                                  "/" + type +
                                                                  "/" +
                                                                  im[:-3] +
                                                                  "key")

        stop = timeit.default_timer()
        sys.stdout.write(common_functions.convertTime(stop - start) + "\n")
################################################

################################################
# Describe additional images
# (with marked regions but not labeled as normal or disease)
# Only when DR1 is defined as the training dataset
################################################

if train == "DR1":
    print "Low-level feature extraction for additional images (DR1) - used just because contain marked regions"
    start = timeit.default_timer()

    listImages = os.listdir("datasets/DR1-additional-marked-images/")
    for im in listImages:
        sys.stdout.write(". ")
Example #5
0
							listImages = os.listdir("datasets/" + type + "-images-by-lesions/" + lesion_en)
					else: listImages = [image]
				
					for im in listImages:
						im_special = common_functions.specialName(im)
						if os.path.exists(OutDir + im[:-3] + "hist"): continue
						
						# define the output file (histogram)
						OutFile = OutDir + im[:-3] + "hist"
						f = open(OutFile,"wb")
					
						# get the points of interest
						PoIsTemp = open(PoIsDir + im[:-3] + "key","rb").readlines()
						PoIs = []
						for i in range(2,len(PoIsTemp),2):
							PoIs.append([ float(p) for p in PoIsTemp[i].split() ])
						PoIs = numpy.asarray(PoIs)				
									
						sys.stdout.write(". ")
						sys.stdout.flush()
			
						if techniqueMid == "hard":
							hardSum(PoIs, Codebook, OutFile, size, label)
						else:				# techniqueMid == "soft":
							softMax(PoIs, Codebook, OutFile, size, label)
					
						
				stop = timeit.default_timer()
				sys.stdout.write(" Done in " + common_functions.convertTime(stop - start) + "\n")
################################################
					candidatesFile.close()
		else:	common_functions.getCandidateRegions(lesion, train, technique)
		
		# define the parameters of k-means
		common_functions.adjustParametersKmeans(lesion, size, technique)
		
		# run k-means
		start = timeit.default_timer()
		if technique == "sparse":
			os.system(KMEANS + " -i codebooks/" + technique + "/" + str(size) + "-codewords-" + lesion + ".in -o codebooks/" + technique + "/codebook-" + lesion + ".cb")
		else:
			for diametro in [12, 19, 31, 50, 80, 128]:
				radius = diametro/2.0
				os.system(KMEANS + " -i codebooks/" + technique + "/" + str(size) + "-codewords-" + lesion + "-" + str(radius) + ".in -o codebooks/" + technique + "/codebook-" + lesion + "-" + str(radius) + ".cb")
		stop = timeit.default_timer()
		sys.stdout.write(" - codebook created in " + common_functions.convertTime(stop - start) + "\n") 
		sys.stdout.flush()
	

for lesion in lesions:
	if lesion == "imagem-normal": continue
	sys.stdout.write("Concatenating the codebooks")
	sys.stdout.flush()
	start = timeit.default_timer()
	for technique in techniques:
		if not sizeFlag:	# if the used did not pass a size, use the default
	   		if technique == "sparse": size = 250	# 250 * 2 (normal and disease) = 500
	   		else: size = 125			# 125 * 2 (normal and disease) * 6 (scales) = 1500
			common_functions.mergeCodebooks(lesion, size, technique)
	stop = timeit.default_timer()
	sys.stdout.write(" - done in " + common_functions.convertTime(stop - start) + "\n") 
			im_special = common_functions.specialName(im)
			
			for technique in techniques:
				if os.path.exists(directory + technique + "/" + type + "/" + im[:-3] + "key"): continue
				fAux = open(directory + technique + "/" + type + "/" + im[:-3] + "key","wb")
								
				if technique == "sparse":
					sparseExtraction(im_special, technique, type, "datasets/" + type + "-images-by-lesions/" + common_functions.specialName(lesion_en))
					common_functions.organizeFileSurfToDescriptor(directory + technique + "/" + type + "/" + im[:-3] + "key")
					common_functions.filterPoints(type, technique, im)
				else:
					denseExtraction(im, im_special, technique, type, "datasets/" + type + "-images-by-lesions/" + common_functions.specialName(lesion_en))
					common_functions.organizeFileSurfToDescriptor(directory + technique + "/" + type + "/" + im[:-3] + "key")
			
		stop = timeit.default_timer()
		sys.stdout.write(common_functions.convertTime(stop - start) + "\n")
################################################



################################################
# Describe additional images
# (with marked regions but not labeled as normal or disease)
# Only when DR1 is defined as the training dataset
################################################

if train == "DR1":
	print "Low-level feature extraction for additional images (DR1) - used just because contain marked regions"
	start = timeit.default_timer()
	
	listImages = os.listdir("datasets/DR1-additional-marked-images/")