Ejemplo n.º 1
0
def run_classifier(name, Xtr, Ytr, Xte, Yte, init={}, ensemble_run=False):
    learner = classifier(name, init=init)
    l = learner.get_learner()
    print("learner initialized......")
    score = cross_validation(l, Xtr, Ytr)
    print("cross_validation complete.....")
    learner.train(Xtr, Ytr)
    print("learner training complete.....")
    Yte_hat = learner.predict(Xte)
    print(Yte_hat)
    mean_cv_score = np.mean(score['test_score'])
    auc = compute_auc(Yte, Yte_hat)
    print("{} - AUC: {}".format(name, auc))

    f1 = f1_score(Yte, Yte_hat)
    precision = precision_score(Yte, Yte_hat)
    recall = recall_score(Yte, Yte_hat)

    print("{} - Mean 10-fold CV: {}".format(name, mean_cv_score))
    print("{} - precision: {}".format(name, precision))
    print("{} - recall: {}".format(name, recall))
    print("{} - f1-score: {}".format(name, f1))
    print("{} - AUC: {}".format(name, auc))

    return [name, auc, mean_cv_score, f1, precision, recall] if not ensemble_run else [Yte_hat, mean_cv_score]
Ejemplo n.º 2
0
def runData(col):
    global auc_sum
    global err_sum

    clf = classify.classifier(dataFile, castFile, col)
    err, auc = clf.SVM(C=.1)
    # err, auc = clf.SGD()

    return auc, err
Ejemplo n.º 3
0
def main():
    training = np.loadtxt("out.txt")
    #training = filefinder.getTrainingData()

    trainingdata = training[:, range(0, 128)].astype(np.float32)
    size = len(trainingdata)
    responses = np.array(training[:, 128]).reshape(size, 1).astype(np.float32)

    cc = classify.classifier(trainingdata, responses)

    #tester.testme(knn)
    tester.testme(cc)
    print("end of program")
Ejemplo n.º 4
0
def filterAddr(ifile):
	seen = []
	counter = 0
	classCnt = {}
	with open(ifile,"r") as f:
		for line in f:
			gid, insns = line.strip().split(" ",1)
			insns = insns.split("u'")
			gidDaddr = gid + "-" + insns[1].split()[1]
			if not gidDaddr in seen:
				seen.append(gidDaddr)
				counter += 1

				classification = classifier(getPattern(insns[1:-1]))
				#print classification
				currClassCnt = classCnt.get(classification,0)
				classCnt[classification] = currClassCnt + 1


	return counter,classCnt
Ejemplo n.º 5
0
def displayGadget(dfile, output="b.png", min_instr=0, min_mem_in=0, count=False, inspect=False):
	g = pgv.AGraph(dfile)


	visited = {}
	stack = []
	queue = []
	# Look for all tail node
	numGadget = 0
	gadgets = []
	for data_vtx in g.in_edges_iter():
		gadget = pgv.AGraph(directed=True, strict=False)
		gadget.node_attr.update(shape="box")
		# gadgetcol = []

		parent = data_vtx[0]
		child = data_vtx[1]

		if not child.startswith("mem_"):
			continue

		if visited.get(parent, False):
			continue

		visited[parent] = True

		# if g.in_degree(parent) == 0 or not g.in_edges(parent)[0][0].startswith("R"):
			# continue

		# print data_vtx
		# gadgetcol.append(data_vtx)

		t_parent_node = g.get_node(parent)
		t_child_node = g.get_node(child)
		gadget.add_node(t_parent_node.name, **t_parent_node.attr)
		gadget.add_node(t_child_node.name, **t_child_node.attr)
		gadget.add_edge(parent, child, None, **data_vtx.attr)

		# Traverse up to split
		hasMemInPreRead = False
		rootinsn = [[t_parent_node, t_parent_node]]
		leafinsn = t_parent_node
		while g.in_degree(parent) == 1:
			rootinsn = None
			parent_edge = g.in_edges(parent)[0]
			parent = parent_edge[0]

			t_parent_node = g.get_node(parent)
			gadget.add_node(t_parent_node.name, **t_parent_node.attr)
			gadget.add_edge(parent_edge[0], parent_edge[1], None, **parent_edge.attr)
			# gadgetcol.append(parent_edge)

			# print "pr",parent
			if parent.startswith("mem_"):
				hasMemInPreRead = True
				if g.in_degree(parent) != 0:
					rootinsn = [ [g.get_node(g.in_edges(parent)[0][0]), g.get_node(parent_edge[1]) ]]
				else:
					rootinsn = [[g.get_node(parent_edge[0]), g.get_node(parent_edge[1])]]



				# print "mem in pr"
				break

			# gadget.add_edge(parent_edge)

		if rootinsn is None:
			if g.in_degree(parent) == 0:
				t_parent_node = g.get_node(parent)
				rootinsn = [[t_parent_node, t_parent_node]]
			else:
				rootinsn = []

		# print gadget



		# print parent,child
		# print "gc",gadgetcol
		# continue
		# at the point where we have input from 2 memory loc
		def followToMemLoad(edge, g, gadget):
			parent = edge[0]
			child = edge[1]
			t_parent_node = g.get_node(parent)
			c_node = g.get_node(child)
			gadget.add_node(t_parent_node.name, **t_parent_node.attr)
			gadget.add_edge(parent, child, None, **edge.attr)


			# print parent,child
			# print "pear"
			if parent.startswith("mem_"):
				hasRegOp = False
				for edge2 in g.in_edges_iter(child):
					if not edge2[0].startswith("mem_"):
						hasRegOp = True
						# print "asdsad",edge2
						break
				if hasRegOp:
					t_node = g.get_node(edge2[0])

					gadget.add_node(t_node.name, **t_node.attr)
					gadget.add_edge(edge2[0], child, None, **edge.attr)
					if g.in_degree(parent) != 0:
			 			return -1, [[g.get_node(g.in_edges(parent)[0][0]), c_node], [t_node, c_node]]
			 		else:
			 			return -1, [[c_node, c_node], [t_node, c_node]]
				# print g.in_edges(parent)[0]

				if g.in_degree(parent) != 0:
					return -1, [[g.get_node(g.in_edges(parent)[0][0]), c_node]]
				else:
					return -1, [[c_node, c_node]]


			# print "apple"
			# noMemLoad = 0
			rval = 0
			parentCnt = 0
			rootinsn = []
			visited2 = {}
			for edge in g.in_edges_iter(parent):

				if visited2.get(edge[0], False):
					continue

				visited2[edge[0]] = True

				# $print edge
				arval, insn = followToMemLoad(edge, g, gadget)
				if arval == -1:
					return 0, insn
				rval += arval
				parentCnt += 1
				rootinsn += insn


			if parentCnt == 0:
				return 1, [[t_parent_node, t_parent_node]]
			return rval, rootinsn  # if not len(rootinsn) == 1 else rootinsn[0]


		if not hasMemInPreRead:
			# rootinsn = []
			forkHasNoMemLoad = 1
			for fork_edge in g.in_edges_iter(parent):
				forkHasNoMemLoad, rinsn = followToMemLoad(fork_edge, g, gadget)
				rootinsn += rinsn
				# print forkHasNoMemLoad
				if forkHasNoMemLoad == -1:
					# rootinsn += rinsn
					break
				# rootinsn += rinsn
					# raise Exception("get -1")
					# hasRegOp = False
					# for edge in g.in_edges_iter(parent):
					#  	if not edge[0].startswith("mem_"):
					#  		hasRegOp = True
					# 		# break

					# if hasRegOp:
						# forkHasNoMemLoad = 1
					# break
				# if forkHasNoMemLoad > 0: break

			# if forkHasNoMemLoad >0 : continue

		# #filtering of address

		min_addr = int("8048000",16)
		max_addr = int("808a4ff",16) #wuf
		# max_addr = int("805c86b",16) #sudo
		# max_addr = int("804e93b",16) #ghttpd
		# max_addr = int("80511f3",16) #orzhttpd
		

		if not any((min_addr <= instr_addr and instr_addr <= max_addr) for instr_addr in getAddrs(gadget)):
			continue

		RegInFilter = ["R_ESP_0_pre"]
		if len(rootinsn) == 1 and rootinsn[0][0] in RegInFilter:
			continue

		LastInsnFilter = ["push"]
		if any(f in leafinsn.attr["label"] for f in LastInsnFilter):
			continue

		FirstInsnFilter = ["popl"]
		# print len(rootinsn)
		# print (rootinsn)
		if len(rootinsn) == 1 and any(f in rootinsn[0][1].attr["label"] for f in FirstInsnFilter):
			continue

		instructions = fetchInstruction(gadget)
		#insnException = ["calll","%ebp","%esp","%eip"]
		insnException = ["calll","%eip"]
		if any(keyword in instruction for keyword in insnException for instruction in instructions):
			continue

		# print rootinsn,leafinsn

		if min_instr or min_mem_in or count:
			instrCnt = 0
			mem_in_cnt = 0
			for v in gadget.nodes():
				if (count or min_instr) and v.attr["shape"] == "box":
					instrCnt += 1
				if (count or min_mem_in) and gadget.in_degree(v) == 0:
					mem_in_cnt += 1

			if min_instr and instrCnt < min_instr:
				continue
			if min_mem_in and mem_in_cnt < min_mem_in:
				continue
			if count:
				print "instrCnt", instrCnt
				print "min_mem_in", min_mem_in

		numGadget += 1
		gclass = classifier(getPattern(gadget))
		gadgets.append([gadget, gclass, rootinsn, leafinsn])

		if inspect:
			# if gclass == (GadgetType.Unknown,GadgetClass.Unknown):
			print "insn"
			print instructions
			print 			
			print "pattern"
			print getPattern(gadget)
			print 
			print "addrs"
			print getAddrs(gadget)
			print 
			print "rootinsn"
			print rootinsn
			print 
			print "leafinsn"
			print leafinsn
			gadget.draw(output, "png", "dot")
			print "Press enter to load next gadget"
			raw_input()
			# else:
			# 	print "class",gclass
	g.clear()
	return numGadget, gadgets
from flask import Flask, render_template, url_for, flash, redirect, request
from classify import classifier

model = classifier()

app = Flask(__name__)
app.config['SECRET_KEY'] = '5791628bb0b13ce0c676dfde280ba245'


@app.route("/", methods=['GET', 'POST'])
def home():
    if request.method == "POST":
        return render_template('result.html',
                               results=model.classify(request.form['text']))
    return render_template('home.html')


@app.route("/about")
def about():
    return render_template('help.html')


@app.errorhandler(404)
def page_not_found(e):
    # note that we set the 404 status explicitly
    return render_template('404.html', title='404'), 404


if __name__ == '__main__':
    app.run(debug=True)
             retval, labels, stats, centroids	=	cv2.connectedComponentsWithStats(index)
             ##to finally get the symbols of one column in a list and the column image with the bounding boxes detected
             draw_rec(crop_list[jj],stats)
             jj=jj+1
         
         #cutouts, rec = draw_rec(crop_list[index],stats)
         
         flag=0
                  
 else:       
     #el mfrod hna na5od eli tl3 fl akher w nb3tto ll mobile
     #"""with open(r'C:\Users\user\Desktop\glyphreader-master\examples\img%d.jpg'%i, 'rb') as image:
         #encode_bytes = base64.encodebytes(same_image.read())"""
         
     #calling the classifier which returns gardiner label for each Image
     glbls,imagePaths=classifier()
     i=0
     jsonList=[]
     fullMsg=''
     for lbl in glbls:
         print(*lbl)
         #img_file=open(r'C:\Users\user\Desktop\glyphreader-master\examples\img%d.jpg'%i,'rb')
         # read the image file
         #imgData = img_file.read()
         
         """# build JSON object
         outjson = {}
         outjson['img'] = str(imgData,'latin-1')
         outjson['firstLabel']=str(lbl[0])
         outjson['secondLabel']=str(lbl[1])
         outjson['thirdLabel']=str(lbl[2])