Ejemplo n.º 1
0
def k_nearest_neighbor(distance_matrix,labels,exemplars,k,sourcedir,destdir):
	counter = 0
	tp = fp = 0
	knn_index = []
	selection = set()
	for row in distance_matrix:
		idx = argsort(row)
#		print "idx = ",idx
#		print "k = ",k
		knn_index = idx[:int(k)]
		w_cat = 0 ## wrong category
		r_cat = 0 ## right category
		tp_once = 0
		for x in knn_index:
		#	print("datapoint = %s, index = %s , exemplar/neighbor = %s") % (labels[counter] , idx , exemplars[idx])
			part1Class, part1Details = labels[counter].split("-")
			category1 = dynamic.categorisePayload(part1Class, categories)           # Classify the header -> return 14 for 14.1, 14.1.1, 14.1.2
			part2Class, part2Details = exemplars[x].split("-")
			category2 = dynamic.categorisePayload(part2Class, categories)           # Classify the header -> return 14 for 14.1, 14.1.1, 14.1.2
			if category1 != category2:
				w_cat = w_cat + 1
				print("datapoint = %s, index = %s , exemplar/neighbor = %s , FP") % (labels[counter] , idx[:int(k)] , exemplars[x])

			else:
				r_cat = r_cat + 1
				print("datapoint = %s, index = %s , exemplar/neighbor = %s , TP") % (labels[counter] , idx[:int(k)] , exemplars[x])

		if r_cat > w_cat:
			tp = tp + 1
			print "Majority vote True: ", r_cat
			print "Adding to set: ", labels[counter]
			selection.add(labels[counter])
		else:
			fp = fp + 1
			print "Majority vote False: ", w_cat
		counter = counter + 1

	dimension_c = len(exemplars)
	dimension_r = len(labels)
	print "Exemplars: ", dimension_c
	print "Datapoints: ", dimension_r
	print "Total TP = ",tp
	print "Total FP = ",fp
	selection = list(selection)
	selection.sort()
	s = open("knn-select.txt",'a')
	for item in selection:
		s.write("\n" + str(item))
		os.system("cp " + sourcedir + "/" + str(item) + " " + destdir)
	s.close()
	print "Selection set size: ", len(selection)
	f = open("knn.txt",'a')
#	f.write("\n####### NEW RECORD ########")
#	f.write( "\nExemplars: " + str(dimension_c) )
#	f.write( "\nDatapoints: " + str(dimension_r) )
	f.write( "\n" + k + ","  + str(dimension_c) + "," + str(dimension_r) + "," + str(tp) + "," + str(fp) )
#	f.write( "\nTotal FP = " + str(fp) )
	f.close()
Ejemplo n.º 2
0
def sum_labels(labels, distance_matrix):
    val = {}
    match_labels = {}
    possfpval = {}
    possfp_labels = {}
    res = {}
    ressum = 0
    global categories
    for label in labels:
        part1Class, part1Details = label.split("-")
        category1 = dynamic.categorisePayload(
            part1Class, categories
        )  # Classify the header -> return 14 for 14.1, 14.1.1, 14.1.2
        for i in range(0, len(labels)):
            part2Class, part2Details = labels[i].split("-")
            category2 = dynamic.categorisePayload(part2Class, categories)
            if category1 == category2:  ## Match main category or sub-cat
                if label in val:
                    val[label].append(distance_matrix[labels.index(label)][i])  ## Append values from same category
                    match_labels[label].append(
                        labels[i]
                    )  ## Append corresponding label for the values from same category
                if label not in val:
                    val[label] = []  ## setup dictionary key for val
                    match_labels[label] = []
                    res[label] = []  ## setup dicitonary key for res
                    val[label].append(
                        distance_matrix[labels.index(label)][i]
                    )  ## Append corresponding values from same category
                    match_labels[label].append(
                        labels[i]
                    )  ## Append corresponding label for the values from same category
            else:  ## the categories dont match list of possible fp
                if label in possfpval:
                    possfpval[label].append(
                        distance_matrix[labels.index(label)][i]
                    )  ## Append values from diff category
                    possfp_labels[label].append(
                        labels[i]
                    )  ## Append corresponding label for the values from diff category
                if label not in possfpval:
                    possfpval[label] = []  ## setup dictionary key for fpval
                    possfp_labels[label] = []
                    # 			       res[label] = [] ## setup dicitonary key for res
                    possfpval[label].append(
                        distance_matrix[labels.index(label)][i]
                    )  ## Append corresponding values from same category
                    possfp_labels[label].append(
                        labels[i]
                    )  ## Append corresponding label for the values from same category

        ressum = 0
        for item in val[label]:  ## for every item in the dictionary key
            ressum = ressum + item  ## Add each item with its predecessor
        res[label].append(ressum)  ## Append the sum to the result corresponding the label
    return possfpval, possfp_labels, res, val, match_labels  ## return result and val dict
Ejemplo n.º 3
0
def k_nearest_neighbor(distance_matrix,labels,exemplars,k):
	counter = 0
	tp = fp = 0
	knn_index = []
	for row in distance_matrix:
		idx = argsort(row)
#		print "idx = ",idx
#		print "k = ",k
		knn_index = idx[:int(k)]
		w_cat = 0 ## wrong category
		r_cat = 0 ## right category
		tp_once = 0
		for x in knn_index:
		#	print("datapoint = %s, index = %s , exemplar/neighbor = %s") % (labels[counter] , idx , exemplars[idx])
			part1Class, part1Details = labels[counter].split("-")
			category1 = dynamic.categorisePayload(part1Class, categories)           # Classify the header -> return 14 for 14.1, 14.1.1, 14.1.2
			part2Class, part2Details = exemplars[x].split("-")
			category2 = dynamic.categorisePayload(part2Class, categories)           # Classify the header -> return 14 for 14.1, 14.1.1, 14.1.2
			if category1 != category2:
				w_cat = w_cat + 1
				print("datapoint = %s, index = %s , exemplar/neighbor = %s , FP") % (labels[counter] , idx[:int(k)] , exemplars[x])

			else:
				r_cat = r_cat + 1
				print("datapoint = %s, index = %s , exemplar/neighbor = %s , TP") % (labels[counter] , idx[:int(k)] , exemplars[x])

		counter = counter + 1
		if r_cat > w_cat:
			tp = tp + 1
			print "Majority vote True: ", r_cat
		else:
			fp = fp + 1
			print "Majority vote False: ", w_cat

	dimension_c = len(exemplars)
	dimension_r = len(labels)
	print "Exemplars: ", dimension_c
	print "Datapoints: ", dimension_r
	print "Total TP = ",tp
	print "Total FP = ",fp

	f = open("knn.txt",'a')
#	f.write("\n####### NEW RECORD ########")
#	f.write( "\nExemplars: " + str(dimension_c) )
#	f.write( "\nDatapoints: " + str(dimension_r) )
	f.write( "\n" + k + ","  + str(dimension_c) + "," + str(dimension_r) + "," + str(tp) + "," + str(fp) )
#	f.write( "\nTotal FP = " + str(fp) )
	f.close()
Ejemplo n.º 4
0
def loadModel(filename,categories):
        global threshDict;
        f = open(filename,'r')
        for line in f:
                if line == "": continue  ## ignore empty lines
                line = line.strip()
                ex_label, ex_thresh = line.split(',')
                part1Class, part1Details = ex_label.split("-")
                categ = dynamic.categorisePayload(part1Class, categories)           # Classify the header -> return 14 for 14.1, 14.1.1, 14.1.2
#		print "categ: ",categ
		if categ not in threshDict:
                        threshDict[categ] = []
                        threshDict[categ].append([ex_label,ex_thresh])  ## Append the model to the dictionary for the category
                else:
                        threshDict[categ].append([ex_label,ex_thresh])
Ejemplo n.º 5
0
temp = []
counter = 0
for row in distance_matrix:
   if counter <= max:
        temp = list(row)
        temp.insert(0,labels[counter])
        writer.writerow(temp)
        counter = counter + 1

counter = 0
tp = fp = 0
for row in distance_matrix:
	idx = nonzero(row==min(row))[0][0]
#	print("datapoint = %s, index = %s , exemplar/neighbor = %s") % (labels[counter] , idx , exemplars[idx])
        part1Class, part1Details = labels[counter].split("-")
        category1 = dynamic.categorisePayload(part1Class, categories)           # Classify the header -> return 14 for 14.1, 14.1.1, 14.1.2
        part2Class, part2Details = exemplars[idx].split("-")
        category2 = dynamic.categorisePayload(part2Class, categories)           # Classify the header -> return 14 for 14.1, 14.1.1, 14.1.2
	if category1 == category2:
		print("datapoint = %s, index = %s , exemplar/neighbor = %s , TP") % (labels[counter] , idx , exemplars[idx])
		tp = tp + 1
	if category1 != category2:
		print("datapoint = %s, index = %s , exemplar/neighbor = %s , FP") % (labels[counter] , idx , exemplars[idx])
		fp = fp + 1
	counter = counter + 1

print "Exemplars: ", dimension_c
print "Datapoints: ", dimension_r
print "Total TP = ",tp
print "Total FP = ",fp