def consolidate_features_add(base_graphs, k, Gcollab_delta): # Get all the k-hop features features = consolidate_features(base_graphs, Gcollab_delta, k) Gcollab_base = base_graphs[graphutils.Graph.COLLAB] feature_graphs = graphutils.split_feat_graphs(base_graphs) org = len(features) cnt = 0 # Add positive features for edges that are not k-hop for edge in Gcollab_delta.Edges(): if cnt < org: u = edge.GetSrcNId() v = edge.GetDstNId() tup = (u, v) # Not k-hop and not an edge in base graph if (u, v) not in features and ( v, u) not in features and not Gcollab_base.IsEdge(u, v): features[tup] = get_all_features(feature_graphs, u, v) cnt += 1 else: break print("Added %d" % (cnt - org)) return features
def consolidate_features(base_graphs, Gcollab_delta, k): features = {} Gcollab = base_graphs[graphutils.Graph.COLLAB] feature_graphs = graphutils.split_feat_graphs(base_graphs) for node in Gcollab.Nodes(): nodeID = node.GetId() for neighborID in graphutils.getKHopN(Gcollab, nodeID, k): if nodeID > neighborID: # swap nodeID = neighborID + nodeID neighborID = nodeID - neighborID nodeID = nodeID - neighborID if (nodeID, neighborID) in features: continue features[(nodeID, neighborID)] = [] for graph in feature_graphs: features = getFeatures(Gcollab, Gcollab_delta, graph, features) return features
def consolidate_features(base_graphs, Gcollab_delta, k): features = {} Gcollab = base_graphs[graphutils.Graph.COLLAB] feature_graphs = graphutils.split_feat_graphs(base_graphs) for node in Gcollab.Nodes(): nodeID= node.GetId() for neighborID in graphutils.getKHopN(Gcollab, nodeID, k): if nodeID > neighborID: # swap nodeID= neighborID + nodeID neighborID= nodeID - neighborID nodeID= nodeID - neighborID if (nodeID, neighborID) in features: continue features[(nodeID, neighborID)]= [] for graph in feature_graphs: features = getFeatures(Gcollab, Gcollab_delta, graph, features) return features
def consolidate_features_add(base_graphs, k, Gcollab_delta): # Get all the k-hop features features = consolidate_features(base_graphs, Gcollab_delta, k) Gcollab_base = base_graphs[graphutils.Graph.COLLAB] feature_graphs = graphutils.split_feat_graphs(base_graphs) org = len(features) cnt = 0 # Add positive features for edges that are not k-hop for edge in Gcollab_delta.Edges(): if cnt<org: u = edge.GetSrcNId() v = edge.GetDstNId() tup = (u,v) # Not k-hop and not an edge in base graph if (u,v) not in features and (v,u) not in features and not Gcollab_base.IsEdge(u,v): features[tup] = get_all_features(feature_graphs, u, v) cnt+=1 else: break print("Added %d"%(cnt-org)) return features
feature_graphs = graphutils.get_feat_graphs(db, uid, None, date1) base_graphs = graphutils.get_base_dict(Gcollab_base, feature_graphs) # from base graph take a random source node in every iteration baseG = base_graphs[graphutils.Graph.COLLAB] featG = graphutils.split_feat_graphs(base_graphs) ''' followers = reader.followers() baseG = graphutils.get_db_graph(graphutils.Graph.FOLLOW, uid, followers) #Gp = snapext.EUNGraph() #featG = [Gp, Gp, Gp] featG = graphutils.split_feat_graphs( graphutils.get_feat_graphs(db, uid, None, date1)) class RequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler): def end_headers(self): self.send_head() SimpleHTTPServer.SimpleHTTPRequestHandler.end_headers(self) def send_head(self): self.send_header("Content-type", "application/json") self.send_header("Access-Control-Allow-Origin", "*") def do_GET(self): try: userid = self.path.lstrip("/")
def main(args): db = args[0] date1 = args[1] date2 = args[2] date3 = args[3] k = int(args[4]) OUTFILE=args[5] if len(args)>=7: beta = float(args[6]) bstart = beta bfinish = beta else: bstart = 0 bfinish = 20 reader = DBReader(db) print("Getting uid") uid = reader.uid() print("Getting all the base graphs") feature_graphs = graphutils.get_feat_graphs(db, uid, None, date1) Gcollab_base = graphutils.get_collab_graph(db, uid, date3, date1) base_graphs = graphutils.get_base_dict(Gcollab_base, feature_graphs) print("Getting Gcollab_delta graph") Gcollab_delta = graphutils.get_collab_graph(db, uid, date1, date2) # from base graph take a random source node in every iteration baseG = base_graphs[graphutils.Graph.COLLAB] featG = graphutils.split_feat_graphs(base_graphs) deltaNIDs = [node.GetId() for node in Gcollab_delta.Nodes()] baseNIDs = [node.GetId() for node in baseG.Nodes()] common_nodes = list(set(deltaNIDs).intersection(baseNIDs)) ktop = 20 subGraphK= 10 f = open(OUTFILE,"w") print_and_log("# Beta\tRecall\tAvg. Rank\tPrec 20\n", f) n_iterations = 10 it = 0 sources = [] while it < n_iterations: src = random.choice(common_nodes) subBaseG= graphutils.getSubGraph(baseG, src, subGraphK) #print 'subgraph: ', subBaseG.GetNodes(), subBaseG.GetEdges() actual = evaluate.getYList(subBaseG, Gcollab_delta, src) #actual = evaluate.getYList(baseG, Gcollab_delta, src) # Consider source node if it forms at least one edge in delta graph if len(actual)>0: sources.append(src) common_nodes.remove(src) it += 1 else: print("Warning. Ignoring node with 0 new edges") print 'number of nodes and edges in graph:', baseG.GetNodes(), baseG.GetEdges() for beta in frange(bstart, bfinish, 4): total_recall = 0 total_avg_rank= 0 total_preck = 0 for src in sources: subBaseG= graphutils.getSubGraph(baseG, src, subGraphK) print 'sub graph nodes:', subBaseG.GetNodes() print 'sub graph edges:', subBaseG.GetEdges() topIDs = runrw.runrw(subBaseG, featG, src, beta) #topIDs = runrw.runrw(baseG, featG, Gcollab_delta, src, beta) # compare topIDs with list of labels already formed actual = evaluate.getYList(subBaseG, Gcollab_delta, src) #actual = evaluate.getYList(baseG, Gcollab_delta, src) # ignore if the node did not form an edge in the delta recall, preck, average_rank = evaluate.getAccuracy(topIDs, actual, ktop) print recall, preck, average_rank total_recall+=recall total_avg_rank += average_rank total_preck += len(preck) num = float(n_iterations) print_and_log("%f\t%f\t%f\t%f\n"%(beta, total_recall/num, total_avg_rank/num, total_preck/num), f) f.close()
feature_graphs = graphutils.get_feat_graphs(db, uid, None, date1) base_graphs = graphutils.get_base_dict(Gcollab_base, feature_graphs) # from base graph take a random source node in every iteration baseG = base_graphs[graphutils.Graph.COLLAB] featG = graphutils.split_feat_graphs(base_graphs) ''' followers = reader.followers() baseG = graphutils.get_db_graph(graphutils.Graph.FOLLOW, uid, followers) #Gp = snapext.EUNGraph() #featG = [Gp, Gp, Gp] featG = graphutils.split_feat_graphs(graphutils.get_feat_graphs(db, uid, None, date1)) class RequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler): def end_headers(self): self.send_head() SimpleHTTPServer.SimpleHTTPRequestHandler.end_headers(self) def send_head(self): self.send_header("Content-type", "application/json") self.send_header("Access-Control-Allow-Origin", "*") def do_GET(self): try: userid = self.path.lstrip("/")
def main(args): db = args[0] date1 = args[1] date2 = args[2] date3 = args[3] k = int(args[4]) OUTFILE = args[5] if len(args) >= 7: beta = float(args[6]) bstart = beta bfinish = beta else: bstart = 0 bfinish = 20 reader = DBReader(db) print("Getting uid") uid = reader.uid() print("Getting all the base graphs") feature_graphs = graphutils.get_feat_graphs(db, uid, None, date1) Gcollab_base = graphutils.get_collab_graph(db, uid, date3, date1) base_graphs = graphutils.get_base_dict(Gcollab_base, feature_graphs) print("Getting Gcollab_delta graph") Gcollab_delta = graphutils.get_collab_graph(db, uid, date1, date2) # from base graph take a random source node in every iteration baseG = base_graphs[graphutils.Graph.COLLAB] featG = graphutils.split_feat_graphs(base_graphs) deltaNIDs = [node.GetId() for node in Gcollab_delta.Nodes()] baseNIDs = [node.GetId() for node in baseG.Nodes()] common_nodes = list(set(deltaNIDs).intersection(baseNIDs)) ktop = 20 subGraphK = 10 f = open(OUTFILE, "w") print_and_log("# Beta\tRecall\tAvg. Rank\tPrec 20\n", f) n_iterations = 10 it = 0 sources = [] while it < n_iterations: src = random.choice(common_nodes) subBaseG = graphutils.getSubGraph(baseG, src, subGraphK) #print 'subgraph: ', subBaseG.GetNodes(), subBaseG.GetEdges() actual = evaluate.getYList(subBaseG, Gcollab_delta, src) #actual = evaluate.getYList(baseG, Gcollab_delta, src) # Consider source node if it forms at least one edge in delta graph if len(actual) > 0: sources.append(src) common_nodes.remove(src) it += 1 else: print("Warning. Ignoring node with 0 new edges") print 'number of nodes and edges in graph:', baseG.GetNodes( ), baseG.GetEdges() for beta in frange(bstart, bfinish, 4): total_recall = 0 total_avg_rank = 0 total_preck = 0 for src in sources: subBaseG = graphutils.getSubGraph(baseG, src, subGraphK) print 'sub graph nodes:', subBaseG.GetNodes() print 'sub graph edges:', subBaseG.GetEdges() topIDs = runrw.runrw(subBaseG, featG, src, beta) #topIDs = runrw.runrw(baseG, featG, Gcollab_delta, src, beta) # compare topIDs with list of labels already formed actual = evaluate.getYList(subBaseG, Gcollab_delta, src) #actual = evaluate.getYList(baseG, Gcollab_delta, src) # ignore if the node did not form an edge in the delta recall, preck, average_rank = evaluate.getAccuracy( topIDs, actual, ktop) print recall, preck, average_rank total_recall += recall total_avg_rank += average_rank total_preck += len(preck) num = float(n_iterations) print_and_log( "%f\t%f\t%f\t%f\n" % (beta, total_recall / num, total_avg_rank / num, total_preck / num), f) f.close()
import path from learning import getFeatures from db.interface import * from analysis import graphutils db = '../db/github.2013_1_20.2013_2_20.db' reader = DBReader(db) uid = reader.uid() base_graphs = graphutils.get_db_graphs(db, uid) Gcollab = base_graphs["Gcollab"] feature_graphs = graphutils.split_feat_graphs(base_graphs) reader.close() start = Gcollab.GetRndNId() walk_features = getFeatures.get_walk_features(Gcollab, feature_graphs, start) print(walk_features)