def main(filename='data/exampleG.txt', min_sup=2): filename = os.path.join(filepath, filename) graphs = load_graphs(filename) n = len(graphs) extensions = [] gSpan([], graphs, min_sup=min_sup, extensions=extensions) for i, ext in enumerate(extensions): print('Pattern %d' % (i + 1)) for _c in ext: print(_c) print('')
def main(filename=fname, min_sup=msup): start_time = time.time() filename = os.path.join(filepath, filename) graphs, maxW = load_graphs(filename, rough_min, span, db_size, bin_count) for wdt in WDT: print("*************") print(wdt) n = len(graphs) extensions = [] canCount = 0 gCount = 0 fwsCount = 0 oc = range(len(graphs)) fwsCount, canCount, gCount = gSpan([], graphs, min_sup=min_sup, extensions=extensions, maxW=maxW, fwsCount=fwsCount, canCount=canCount, gCount=gCount, oc=oc) end_time = time.time() obj = open("output.txt", "w+") for i, ext in enumerate(extensions): obj.write('Pattern %d\n' % (i + 1)) for _c in ext: obj.write(str(_c)) obj.write('\n') obj.write('') obj.write("--- %s seconds ---\n" % (end_time - start_time)) obj.write(str(fwsCount)) obj.write('\n') obj.write(str(canCount)) obj.write('\n') obj.write(str(gCount)) obj.write('\n') obj.close()
def gspan(train_test_split, path_to_graph_dataset, min_sup): graphs=[] graph = load_graphs(path_to_graph_dataset) n = len(graph) idxs = np.arange(0,n,1) random.shuffle(idxs) #graphs = graph[idxs.tolist()] # print idxs for e in idxs: graphs.append(graph[e]) #print e #print graphs with open("/home/N1801734D/grid/graph_2D_CNN/datasets/classes/test/test_classes.txt", 'r') as f: y = f.read().splitlines() y = [int(elt) for elt in y] for e in y: ys = y[e] # print("number of graphs: ", n) np.save("shuffled_graph_idxs", idxs) train_graphs = graphs[:(int)(train_test_split*n)] test_graphs = graphs[(int)(train_test_split*n):] #print("number of graphs for train: ", len(train_graphs), " for test: ", len(test_graphs)) extensions = [] gSpan([], train_graphs, min_sup=30, extensions=extensions) #with open("out.txt","w") as f: for i, ext in enumerate(extensions): #print('Pattern %d' % (i+1)) for _c in ext: print(_c) #f.write(_c) print('') #f.write('') #print("Compute subgraphs list in train graphs: ") support, train_graphs_list = get_graphs_for_each_subgraph(extensions=extensions, graphs=train_graphs) train_subgraphs_list = get_subgraphs_for_each_graph(graphs=train_graphs, graphs_list=train_graphs_list) for i in range(len(train_graphs)): np.save("id/train_np/g_" + str(i), np.array(train_subgraphs_list[i])) with open("id/train/g_" + str(i) + ".txt","w") as f: f.writelines(["%s " % item for item in train_subgraphs_list[i]]) #print(train_subgraphs_list[i]) #print("Compute subgraphs list in test graphs: ") _, test_graphs_list = get_graphs_for_each_subgraph(extensions=extensions, graphs=test_graphs) test_subgraphs_list = get_subgraphs_for_each_graph(graphs=test_graphs, graphs_list=test_graphs_list) for i in range(len(test_graphs)): np.save("id/test_np/g_" + str(i), np.array(test_subgraphs_list[i])) with open("id/test/g_" + str(i) + ".txt","w") as fo: fo.writelines(["%s " % item for item in test_subgraphs_list[i]]) #print(test_subgraphs_list[i]) np.save("extensions", extensions) np.save("support", support)
group1 = [] group2 = [] for filename in os.listdir(group1folder): graph = read(group1folder + filename) group1.append(graph) for filename in os.listdir(group2folder): graph = read(group2folder + filename) group2.append(graph) testgraph = read(testgraphFile) print "Frequent subgraphs in group 1:" extensionlist = gSpan(group1, minSup=4, maxthreads=1) logPatterns(extensionlist, 'group1.out') print "Frequent subgraphs in group 2:" extensionlist = gSpan(group2, minSup=4, maxthreads=1) logPatterns(extensionlist, 'group2.out') graphs = {'group1': group1, 'group2': group2} labels = {'group1': [('group1.out', 4)], 'group2': [('group2.out', 4)]} #list of association rules derived from frequent patterns ARs = buildARfromlogs(labels, graphs) print "Predicted label for graph '" + testgraphFile + "': " + predict(
def main(filename='data/Compound_422.txt', min_sup=300): filename = os.path.join(filepath, filename) graphs1 = load_graphs(filename) extensions = [] print("original frequent pattern:") gSpan([], graphs1, min_sup=min_sup, extensions=extensions) for i, ext in enumerate(extensions): print('Pattern %d' % (i + 1)) for _c in ext: print(_c) print('') print("--- %s seconds ---" % (time.time() - start_time)) extensions1 = extensions print("original frequent pattern list:") print(extensions1) n = len(graphs1) print("total number of graph:") print(n) x = int(input('what percentage want to take:')) p = round((x / 100) * n) print("graph size now is :") print(p) graphs = random.sample(graphs1, p) #print(graphs) #b=len(graphs) #print(b) extensions = [] print("percentwise frequent pattern:") gSpan([], graphs, min_sup=175, extensions=extensions) for i, ext in enumerate(extensions): print('Pattern %d' % (i + 1)) for _c in ext: print(_c) print('') print("percentwise frequent pattern list:") print(extensions) l2 = len(extensions) '''results = confusion_matrix(extensions1, extensions) print ('Confusion Matrix :') print(results) print ('Accuracy Score :',accuracy_score(extensions1, extensions) ) print ('Report : ') print (classification_report(extensions1, extensions))''' tp1 = [] fp1 = [] for list in extensions: if list in extensions1: tp1.append(list) #print("differences") #print(differences) tp = len(tp1) #print("tp :",tp) l1 = len(extensions1) #print("length of frqnt pattern:") #print(l1) for list in extensions: if list not in extensions1: fp1.append(list) fp = len(fp1) # print("fp :" ,fp) fn = l1 - tp #print("fn :", fn) accuracy = (tp / l1) * 100 print("accuracy:", accuracy) prcn = tp / (fp + tp) print("precission", prcn) recall = tp / (tp + fn) print("recall", recall) jc = (tp / (l1 + l2)) * 100 print("jaccard coefficient", jc) f1score = 2 * ((prcn * recall) / (prcn + recall)) print("F1Score:", f1score)
def main(min_sup): allGraph2Dic = generateInput() #filename = os.path.join(filepath, filename) gspanInput = [] for graph2Dic in allGraph2Dic: gspanInput.append(list(numberingIndex(graph2Dic))) graphs = load_graphs(gspanInput) n = len(graphs) extensions = [] extensions_sups = list() graphIndex = list() gSpan([], graphs, min_sup, extensions, extensions_sups) _report_df = pd.DataFrame() ''' _report_df = _report_df.append( pd.DataFrame( { 'support': [_support], 'vertex': [nodeInfo], 'link' : [linkInfo], 'num_vert': [_num_vertices] }, index=[i] ) ) ''' for i, ext in enumerate(extensions): #print('Pattern %d' % (i+1)) #print('Support : %d' %extensions_sups[i]) nodelist = list() edgelist = list() graphIndex.append(i) for _c in ext: #print(_c) nodes = [] nodes.append(_c[0]) nodes.append(_c[2]) nodelist.append(nodes) nodes = [] nodes.append(_c[1]) nodes.append(_c[3]) nodelist.append(nodes) edges = [] edges.append(_c[2]) edges.append(_c[3]) edges.append(_c[4]) edgelist.append(edges) nodelist = list(set(map(tuple, nodelist))) nodecnt = len(nodelist) _report_df = _report_df.append( pd.DataFrame( { 'support': [extensions_sups[i]], 'vertex': [nodelist], 'link': [edgelist], 'num_vert': [nodecnt] }, index=[i])) #print('') #print("_report_df : print") #print(_report_df) return _report_df