# from sklearn.feature_extraction.text import CountVectorizer import json import os questionList = os.listdir("dataset_new1") questionList.sort() print questionList for i in range(len(questionList) - 2): dirList = os.listdir("dataset_new1/" + questionList[i]) # dir is your directory path dirList.sort() os.mkdir("data/" + questionList[i]) for j in range(len(dirList) - 1): # print("**************" + dirList[j] + "*************") try: cfg = buildCFG("dataset_new1/" + questionList[i] + "/" + dirList[j]) except: cfg = buildCFG( "dataset_new1/" + questionList[i] + "/" + dirList[j], 'main') with open('graph.json', "r") as f1: dta = json.load(f1) with open( "data/" + questionList[i] + "/" + dirList[j][:-2] + ".json", "w") as f2: json.dump(dta, f2) dirList = os.listdir("dataset_new1/" + questionList[i] + "/goodset") os.mkdir("data/" + questionList[i] + "/goodset") dirList.sort() for j in range(len(dirList)): # print(dirList[j])
filen = d.split(" ")[1] if checkFuncInFile(sourcefiles+filen): print len(dataset) dataset.append([sourcefiles+filen,label]) print len(data) extractFilenameLabel(database) print "Total " + str(len(dataset)) succ = [] failed = [] for f in dataset: try: cfgs = buildCFG(f[0]) print f succ.append(f) for cfg in cfgs: #content = "\n[+] Function: " + str(cfg[0]) content = cfg[1].printer() dataMatrix = dictoMatrix(content) temp = np.array(dataMatrix).flatten().tolist() data = " ".join(str(x) for x in temp) writetofile("CWE119/dataset", data + " " + str(f[1]) + "\n") except: #print f[0] + "failed" failed.append(f)
from utils import buildCFG cfgs = buildCFG('Using_freed_memory.c') print cfgs for cfg in cfgs: print cfg print "\n[+] Function: ", cfg[0] print cfg[1].printer()
from utils import buildCFG #cfgs = buildCFG('Using_freed_memory.c') filename = 'testcode.c' cfgs = buildCFG(filename) print cfgs l = [] for cfg in cfgs: print cfg print "\n[+] Function: ", cfg[0] print cfg[1].printer() with open(filename, 'r') as content_file: content = content_file.read() start = content.index(cfg[0]) start = content.index("{", start) stack = [] funcContent = "" for i in range(start,len(content)): if content[i] == "{": stack.append("{") elif content[i] == "}": stack.remove(stack[0]) if len(stack) is 0: funcContent = content[start+len(cfg[0]):i] break print funcContent
from utils import buildCFG from time import sleep PATH = '/home/gbaduz/Downloads/bash-4.3/' import os from glob import glob result = [y for x in os.walk(PATH) for y in glob(os.path.join(x[0], '*.c'))] succ = [] failed = [] for f in result: try: cfgs = buildCFG(f) print f succ.append(f) for cfg in cfgs: print "\n[+] Function: ", cfg[0] print cfg[1].printer() sleep(2) except: print f + "failed" failed.append(f) print "success " + str(len(succ)) print "failed " + str(len(failed))
# for j in range(len(dirList) - 1): # try: # cfg = buildCFG(datasetLocation + questionList[i] + "/goodset/" + dirList[j]) # except: # cfg = buildCFG(datasetLocation + questionList[i] + "/goodset/" + dirList[j], 'main') # with open('graph.json', "r") as f1: # dta = json.load(f1) # with open(dataLocation + questionList[i] + "/goodset/" + dirList[j][:-2] + ".json", "w") as f2: # json.dump(dta, f2) # if (isTest < 10): # try : # cfg = buildCFG("test.c") # except : cfg = buildCFG("test.c", 'main') # print cfg.printer() # cfg = buildCFG('test.c', 'main') # with open('data2/1/goodset/2.json', "r") as f1: # dta = json.load(f1) # with open('data2/1/1.json', "r") as f1: # dta1 = json.load(f1) # print dta['features'][0]['basic'] # featBasic = " ".join(dta['features'][0]['basic']) # corpus = [] # corpus.append(featBasic) # corpus.append(" ".join(dta1['features'][0]['basic']))
from utils import buildCFG cfg = buildCFG('testcode/example.c', 'addNumbers') print "[+] Size of the CFG:", str(cfg.size()) print cfg.printer()