def load_dataset(f): """ Loads a given dataset (.gsp file) :param f: dataset path :return: list of graphs """ return load_graphs(f)
def main(filename='gd10.txt', min_sup=8): filename = os.path.join(filepath, filename) graphs = load_graphs(filename) C = list() with open("Malware_sub_Goodware.txt", "r") as ins: content = ins.read().splitlines() temp = list() for line in content: if line != 'Pattern': line = re.sub(r'[\(\)]', '', line) u, v, L_u, L_v, L_uv = line.split(", ") u, v, L_u, L_v, L_uv = int(u), int(v), L_u.strip( "'"), L_v.strip("'"), L_uv.strip("'") #print("u= {} v= {} L_u= {} L_v= {} L_uv= {}\n".format(u,v,L_u,L_v,L_uv)) temp.append((u, v, L_u, L_v, L_uv)) elif line == 'Pattern': C.append(temp) temp = [] # for num, li in enumerate(C): # for t in li: # print("Element {}: {}\n".format(num,t[2])) for g, graph in enumerate(graphs): for p, li in enumerate(C): E = subgraph_isomorphisms(li, graph) if len(E) == 0: print("Graph: {} , Pattern: {} Not Matched".format( g + 1, p + 1)) else: print("Graph: {} , Pattern: {} Matched".format(g + 1, p + 1))
def main(filename='data/exampleG.txt', min_sup=2): filename = os.path.join(filepath, filename) graphs = load_graphs(filename) n = len(graphs) extensions = [] gSpan([], graphs, min_sup=min_sup, extensions=extensions) for i, ext in enumerate(extensions): print('Pattern %d' % (i + 1)) for _c in ext: print(_c) print('')
def main(filename=fname, min_sup=msup): start_time = time.time() filename = os.path.join(filepath, filename) graphs, maxW = load_graphs(filename, rough_min, span, db_size, bin_count) for wdt in WDT: print("*************") print(wdt) n = len(graphs) extensions = [] canCount = 0 gCount = 0 fwsCount = 0 oc = range(len(graphs)) fwsCount, canCount, gCount = gSpan([], graphs, min_sup=min_sup, extensions=extensions, maxW=maxW, fwsCount=fwsCount, canCount=canCount, gCount=gCount, oc=oc) end_time = time.time() obj = open("output.txt", "w+") for i, ext in enumerate(extensions): obj.write('Pattern %d\n' % (i + 1)) for _c in ext: obj.write(str(_c)) obj.write('\n') obj.write('') obj.write("--- %s seconds ---\n" % (end_time - start_time)) obj.write(str(fwsCount)) obj.write('\n') obj.write(str(canCount)) obj.write('\n') obj.write(str(gCount)) obj.write('\n') obj.close()
def gspan(train_test_split, path_to_graph_dataset, min_sup): graphs=[] graph = load_graphs(path_to_graph_dataset) n = len(graph) idxs = np.arange(0,n,1) random.shuffle(idxs) #graphs = graph[idxs.tolist()] # print idxs for e in idxs: graphs.append(graph[e]) #print e #print graphs with open("/home/N1801734D/grid/graph_2D_CNN/datasets/classes/test/test_classes.txt", 'r') as f: y = f.read().splitlines() y = [int(elt) for elt in y] for e in y: ys = y[e] # print("number of graphs: ", n) np.save("shuffled_graph_idxs", idxs) train_graphs = graphs[:(int)(train_test_split*n)] test_graphs = graphs[(int)(train_test_split*n):] #print("number of graphs for train: ", len(train_graphs), " for test: ", len(test_graphs)) extensions = [] gSpan([], train_graphs, min_sup=30, extensions=extensions) #with open("out.txt","w") as f: for i, ext in enumerate(extensions): #print('Pattern %d' % (i+1)) for _c in ext: print(_c) #f.write(_c) print('') #f.write('') #print("Compute subgraphs list in train graphs: ") support, train_graphs_list = get_graphs_for_each_subgraph(extensions=extensions, graphs=train_graphs) train_subgraphs_list = get_subgraphs_for_each_graph(graphs=train_graphs, graphs_list=train_graphs_list) for i in range(len(train_graphs)): np.save("id/train_np/g_" + str(i), np.array(train_subgraphs_list[i])) with open("id/train/g_" + str(i) + ".txt","w") as f: f.writelines(["%s " % item for item in train_subgraphs_list[i]]) #print(train_subgraphs_list[i]) #print("Compute subgraphs list in test graphs: ") _, test_graphs_list = get_graphs_for_each_subgraph(extensions=extensions, graphs=test_graphs) test_subgraphs_list = get_subgraphs_for_each_graph(graphs=test_graphs, graphs_list=test_graphs_list) for i in range(len(test_graphs)): np.save("id/test_np/g_" + str(i), np.array(test_subgraphs_list[i])) with open("id/test/g_" + str(i) + ".txt","w") as fo: fo.writelines(["%s " % item for item in test_subgraphs_list[i]]) #print(test_subgraphs_list[i]) np.save("extensions", extensions) np.save("support", support)
def main(filename='data/Compound_422.txt', min_sup=300): filename = os.path.join(filepath, filename) graphs1 = load_graphs(filename) extensions = [] print("original frequent pattern:") gSpan([], graphs1, min_sup=min_sup, extensions=extensions) for i, ext in enumerate(extensions): print('Pattern %d' % (i + 1)) for _c in ext: print(_c) print('') print("--- %s seconds ---" % (time.time() - start_time)) extensions1 = extensions print("original frequent pattern list:") print(extensions1) n = len(graphs1) print("total number of graph:") print(n) x = int(input('what percentage want to take:')) p = round((x / 100) * n) print("graph size now is :") print(p) graphs = random.sample(graphs1, p) #print(graphs) #b=len(graphs) #print(b) extensions = [] print("percentwise frequent pattern:") gSpan([], graphs, min_sup=175, extensions=extensions) for i, ext in enumerate(extensions): print('Pattern %d' % (i + 1)) for _c in ext: print(_c) print('') print("percentwise frequent pattern list:") print(extensions) l2 = len(extensions) '''results = confusion_matrix(extensions1, extensions) print ('Confusion Matrix :') print(results) print ('Accuracy Score :',accuracy_score(extensions1, extensions) ) print ('Report : ') print (classification_report(extensions1, extensions))''' tp1 = [] fp1 = [] for list in extensions: if list in extensions1: tp1.append(list) #print("differences") #print(differences) tp = len(tp1) #print("tp :",tp) l1 = len(extensions1) #print("length of frqnt pattern:") #print(l1) for list in extensions: if list not in extensions1: fp1.append(list) fp = len(fp1) # print("fp :" ,fp) fn = l1 - tp #print("fn :", fn) accuracy = (tp / l1) * 100 print("accuracy:", accuracy) prcn = tp / (fp + tp) print("precission", prcn) recall = tp / (tp + fn) print("recall", recall) jc = (tp / (l1 + l2)) * 100 print("jaccard coefficient", jc) f1score = 2 * ((prcn * recall) / (prcn + recall)) print("F1Score:", f1score)
def main(min_sup): allGraph2Dic = generateInput() #filename = os.path.join(filepath, filename) gspanInput = [] for graph2Dic in allGraph2Dic: gspanInput.append(list(numberingIndex(graph2Dic))) graphs = load_graphs(gspanInput) n = len(graphs) extensions = [] extensions_sups = list() graphIndex = list() gSpan([], graphs, min_sup, extensions, extensions_sups) _report_df = pd.DataFrame() ''' _report_df = _report_df.append( pd.DataFrame( { 'support': [_support], 'vertex': [nodeInfo], 'link' : [linkInfo], 'num_vert': [_num_vertices] }, index=[i] ) ) ''' for i, ext in enumerate(extensions): #print('Pattern %d' % (i+1)) #print('Support : %d' %extensions_sups[i]) nodelist = list() edgelist = list() graphIndex.append(i) for _c in ext: #print(_c) nodes = [] nodes.append(_c[0]) nodes.append(_c[2]) nodelist.append(nodes) nodes = [] nodes.append(_c[1]) nodes.append(_c[3]) nodelist.append(nodes) edges = [] edges.append(_c[2]) edges.append(_c[3]) edges.append(_c[4]) edgelist.append(edges) nodelist = list(set(map(tuple, nodelist))) nodecnt = len(nodelist) _report_df = _report_df.append( pd.DataFrame( { 'support': [extensions_sups[i]], 'vertex': [nodelist], 'link': [edgelist], 'num_vert': [nodecnt] }, index=[i])) #print('') #print("_report_df : print") #print(_report_df) return _report_df