def main(): RANDOM_SEED = 23 SYNTHETIC_NW_NODES = 4846609 # How many nodes in the fake networks. SYNTHETIC_NW_EDGES = 42851237 # How many nodes in the fake networks. SYNTHETIC_NW_AVG_DEGREE = int(SYNTHETIC_NW_EDGES / SYNTHETIC_NW_NODES) random.seed(RANDOM_SEED) print "Generating preferential attachment graph..." tRnd = snap.TRnd() tRnd.PutSeed(RANDOM_SEED) # Re-seed every time. PAGraph = snap.GenPrefAttach(SYNTHETIC_NW_NODES, SYNTHETIC_NW_AVG_DEGREE, tRnd) filename = 'PrefAttachSynthetic-4.8M.txt' print "Saving edge list to file: %s" % filename snap.SaveEdgeList(PAGraph, filename, 'Synthetic preferential attachment graph') print "Generating random graph..." tRnd.PutSeed(RANDOM_SEED) # Re-seed every time. RndGraph = snap.GenRndGnm(snap.PUNGraph, SYNTHETIC_NW_NODES, SYNTHETIC_NW_EDGES, False, tRnd) filename = 'GnmRandomGraph-4.8M.txt' print "Saving edge list to file: %s" % filename snap.SaveEdgeList(RndGraph, filename, 'Random Gnm graph') print "Generating small world graph..." tRnd.PutSeed(RANDOM_SEED) # Re-seed every time. SWGraph = snap.GenSmallWorld(SYNTHETIC_NW_NODES, SYNTHETIC_NW_AVG_DEGREE, 0.1, tRnd) filename = 'SmallWorldGraph-4.8M.txt' print "Saving edge list to file: %s" % filename snap.SaveEdgeList(RndGraph, filename, 'Small world graph with rewire prob=0.1') print "Done" sys.exit(0)
def getFeaturesForTerm(term): df = loadBillData(term) #get bill data for a specific term start_year, end_year = common_function.getTermMapping(term) fin_df = loadFinancialData(start_year - 2, end_year - 2) #get financial data from two years prior bill_node = df['SrcNId'].unique().tolist() legislator_node = df['DstNId'].unique().tolist() comm_node = fin_df['SrcNId'].unique().tolist() legislator_node_from_campaign = fin_df['DstNId'].unique().tolist() G_Campaign = getCampaign(fin_df) G_CoSponsor = getSponsorLink(df) G_Campaign_folded = getCampaign_folded(G_Campaign, legislator_node_from_campaign) snap.SaveEdgeList(G_Campaign, 'G_Campaign.txt') snap.SaveEdgeList(G_Campaign_folded, 'G_Campaign_folded.txt') snap.SaveEdgeList(G_CoSponsor, 'G_CoSponsor.txt') ''' G_Campaign_folded = snap.LoadEdgeList(snap.PUNGraph, 'G_Campaign_folded.txt',0,1) G_CoSponsor = snap.LoadEdgeList(snap.PUNGraph, 'G_CoSponsor.txt',0,1) ''' X, Y = getFeatures(G_CoSponsor, G_Campaign, bill_node, legislator_node, comm_node, legislator_node_from_campaign, G_Campaign_folded) X['term'] = term return X, Y
def gen_data(): graph = snap.GenRndGnm(snap.PNGraph, 300, 2400, True) snap.SaveEdgeList(graph, "../data/Erdos-Renyi.txt") graph = snap.GenPrefAttach(300, 8) snap.SaveEdgeList(graph, "../data/PrefAttach.txt") graph = snap.GenRndPowerLaw(300, 1.2) snap.SaveEdgeList(graph, "../data/power-law.txt")
def initialize(self): base = self.base_graph() self.generate_graphs(base, self.k) for i, g in enumerate(self.train_graph_list): snap.SaveEdgeList( g, self.graph_directory + '/train/' + str(i) + '.edgelist') for i, g in enumerate(self.test_graph_list): snap.SaveEdgeList( g, self.graph_directory + '/test/' + str(len(self.train_graph_list) + i) + '.edgelist') return self.graph_directory
def get_graph_by_month(graph_path, date_username_path): date_username_df = load_user_attr_to_df(date_username_path) date_username_df.sort_values(by='created_date', ascending=False, inplace=True) print 'date_username loaded' Graph, H = load_graph(graph_path) cur_date = '' print date_username_df['created_date'] for idx, row in date_username_df.iterrows(): if Graph.GetNodes() < 30000: break print row['created_date'] try: if cur_date == '': cur_date = str(row['created_date']) if cur_date[-2:] == '01' and str(row['created_date']) != cur_date: snap.SaveEdgeList(Graph, './data/graph/Graph_%s.txt' % cur_date) cur_date = str(row['created_date']) username = row['username'] Node_ID = H.GetDat(username) Graph.DelNode(Node_ID) except Exception as e: print '!' print e
def get_commits_graph(path): context = snap.TTableContext() e_schema = snap.Schema() e_schema.Add(snap.TStrTAttrPr("source", snap.atStr)) e_schema.Add(snap.TStrTAttrPr("target", snap.atStr)) e_schema.Add(snap.TStrTAttrPr("weight", snap.atStr)) n_schema = snap.Schema() n_schema.Add(snap.TStrTAttrPr("id", snap.atStr)) n_schema.Add(snap.TStrTAttrPr("username", snap.atStr)) n_schema.Add(snap.TStrTAttrPr("size", snap.atStr)) edgetable = snap.TTable.LoadSS(e_schema, path + '{}_edges.csv'.format(pname), context, ",", snap.TBool(True)) nodetable = snap.TTable.LoadSS(n_schema, path + '{}_nodes.csv'.format(pname), context, ",", snap.TBool(True)) edgeattrv = snap.TStrV() nodeattrv = snap.TStrV() net = snap.ToNetwork(snap.PNEANet, edgetable, "source", "target", edgeattrv, nodetable, "id", nodeattrv, snap.aaFirst) snap.DelSelfEdges(net) snap.SaveEdgeList(net, 'temp/commits_temp_edgelist.csv') Data = open('temp/commits_temp_edgelist.csv', 'r') Graphtype = nx.Graph() G = nx.parse_edgelist(Data, delimiter='\t', create_using=Graphtype, nodetype=int, data=(('weight', float),), comments='#') return G
def generate(adjusted): #Lendo o arquivo corrigido s = "\'" data = pd.read_csv(adjusted) #Gerando um dicionário com os ids ids = {} for i in data.index: person_id = data.iloc[i, 0].replace(s, "") ids[person_id] = i # ## Gerando grafo do snap G = snap.TNGraph.New() for i in data.index: #Gerando os nós node = ids[str(data.iloc[i, 0]).replace(s, "")] G.AddNode(node) #Gerando as arestas friends = strToList(data.iloc[i, -1]) for friend in friends: try: G.AddEdge(i, ids[friend]) except: pass #Removendo nós de grau zero snap.DelZeroDegNodes(G) #Salvando o grafo snap.SaveEdgeList(G, "grafo.txt", "Save as tab-separated list of edges")
def projection(Graph): if Graph == 'campaign': G2 = readGraph("../processed-data/campaignNetworks_v2.txt") elif Graph == 'bill': G2 = readGraph("../processed-data/legislator_bill_edge_list_graph.txt") else: raise ValueError("Invalid graph: please use 'campaign' or 'bill'. ") H = snap.TUNGraph.New() for i in G2.Nodes(): for j in G2.Nodes(): if (i.GetId() < j.GetId() and j.GetId() < 10000): #10000 is the upper limit for candidate nodes NbrV = snap.TIntV() Num = snap.GetLen2Paths(G2, i.GetId(), j.GetId(), NbrV) if Num > 0: if H.IsNode(i.GetId()) == False: H.AddNode(i.GetId()) if H.IsNode(j.GetId()) == False: H.AddNode(j.GetId()) if H.IsEdge(i.GetId(), j.GetId()) == False: H.AddEdge(i.GetId(),j.GetId()) print "Compressed Graph Node count total: %d" % (H.GetNodes()) print "Compressed Edge count total: %d" % (H.GetEdges()) GraphClustCoeff = snap.GetClustCf(H, -1) print Graph + " Network Clustering coefficient: %f" % GraphClustCoeff snap.SaveEdgeList(H, "../processed-data/"+Graph+"_projection.txt", Graph + " network - Save projected network info as tab-separated list of edges, using unified candidate node IDs") return
def combineGraphs(): GB = readGraph("../processed-data/legislator_bill_edge_list_graph.txt") bill_node = pd.read_csv('../processed-data/bill_node.csv') legislator_node = pd.read_csv('../processed-data/legislator_node.csv') GC = readGraph("../processed-data/campaignNetworks_v2.txt") cnt = 0 for EI in GB.Edges(): a = EI.GetSrcNId() b = EI.GetDstNId() if GC.IsNode(a) == False: GC.AddNode(a) #print "Adding a legislator node, meaning he/she has no donations, check - node id %d" % (legislator_node['NId'][i]) if a < 10000: cnt = cnt + 1 if GC.IsNode(b) == False: GC.AddNode(b) #print "Adding a legislator node, meaning he/she has no donations, check - node id %d" % (legislator_node['NId'][j]) if b < 10000: cnt = cnt + 1 if GC.IsEdge(a,b) == False: GC.AddEdge(a,b) print "Added %d new legislator nodes" % (cnt) print "Overall graph node count: %d, and edge count %d" % (GC.GetNodes(), GC.GetEdges()) snap.SaveEdgeList(GC, "../processed-data/combined_network.txt", "Save 1981 to 2016 combined network info as tab-separated list of edges, using unified candidate node IDs") return
def write_to_graph(load_file_name, save_file_name): graph = snap.PNGraph.New() # 1787443 row_number = 0 original_uid = None retweet_num = None with open(load_file_name) as f: for line in f: row_number += 1 if row_number%10000 == 0: print row_number elements = line.split() if row_number % 2 == 1: original_uid = int(elements[2]) retweet_num = int(elements[3]) else: if not graph.IsNode(original_uid): graph.AddNode(original_uid) for i in range(0, len(elements), 2): retweet_uid = int(elements[i]) if not graph.IsNode(retweet_uid): graph.AddNode(retweet_uid) graph.AddEdge(original_uid, retweet_uid) snap.SaveEdgeList(graph, save_file_name)
def getXYFromEmb(bill_term, fin_start_year, fin_end_year, p, q): df = link_prediction.loadBillData(bill_term) fin_df = link_prediction.loadFinancialData( fin_start_year, fin_end_year) #get financial data from two years prior bill_node = df['SrcNId'].unique().tolist() legislator_node = df['DstNId'].unique().tolist() comm_node = fin_df['SrcNId'].unique().tolist() legislator_node_from_campaign = fin_df['DstNId'].unique().tolist() G_CoSponsor = link_prediction.getSponsorLink(df) G_Campaign = link_prediction.getCampaign(fin_df) snap.SaveEdgeList(G_Campaign, "G_campaign.txt") #G_Campaign = snap.LoadEdgeList(snap.PUNGraph, "G_campaign.txt", 0, 1) walk_length = 80 getEmbeddings("G_campaign.txt", p, q, walk_length) emb = np.loadtxt('embedding.emb', skiprows=1) node_id = emb[:, 0] X, Y = getFeatures(G_CoSponsor, G_Campaign, bill_node, legislator_node, comm_node, legislator_node_from_campaign, emb) inds = pd.isnull(X).any(1).nonzero()[0] X = X.drop(inds) Y = Y.drop(inds) return X, Y
def main(): # data_list = [] # for year in range(1997, 2019): # print year G = snap.LoadEdgeList(snap.PNGraph, 'unweighted.txt', 0, 1) os.system('snap/snap/examples/motifs/motifs -i:unweighted.txt -o:orig') f = open('orig-counts.tab') df = pd.read_csv(f, sep='\t') f.close() orig_counts = df['Count'].values print orig_counts spokes = get_spokes(G) motifs = np.zeros((10, 13)) for i in range(10): sample_name = 'sample' + str(i) print sample_name rewired = partly_undir_rewire(G, spokes) snap.SaveEdgeList(rewired, sample_name + '.txt') os.system('snap/snap/examples/motifs/motifs -i:' + sample_name + '.txt -o:' + sample_name) f = open(sample_name + '-counts.tab') df = pd.read_csv(f, sep='\t') f.close() motif_counts = df['Count'].values motifs[i,:] = motif_counts motifs[0,:] += 1 os.system('rm -rf sample*') mean = np.mean(motifs, axis=0) std = np.std(motifs, axis=0) z = (orig_counts - mean) / std print z
def generate_graphs(): for path in GRAPHS: name = path.split('/')[-1].split('.')[0] metrics = Metrics(path, True).calculate_basic() print metrics # Generate Erdos-Renyi (Random) Graph # args: type, num_nodes, num_edges er = snap.GenRndGnm(snap.PNGraph, metrics.num_nodes, metrics.num_edges) snap.SaveEdgeList(er, "{}_er.elist".format(name)) # Generate Watts-Strogatz (Small World) Graph # args: num_nodes, node_out_degree (average out degree will be twice this value, rewire_prob) ws = snap.GenSmallWorld(metrics.num_nodes, int(metrics.avg_degree) / 2, 0.2) snap.SaveEdgeList(ws, "{}_ws.elist".format(name)) # Generate Barabasi-Albert model (scale-free with preferential attachment) Graph # args: (num_nodes, degree of each node desired) ba = snap.GenPrefAttach(metrics.num_nodes, int(metrics.avg_degree) / 2) snap.SaveEdgeList(ba, "{}_ba.elist".format(name)) # Generate Forest Fire model Graph # args: (num_nodes, forward_prob, backward_prob) if name == "USairport_2010": ff = snap.GenForestFire( metrics.num_nodes, 0.3599, 0.3599) # Selected value for US Airports data-set snap.SaveEdgeList(ff, "{}_ff.elist".format(name)) ff = snap.GenForestFire(int(metrics.num_nodes / 10), 0.3599, 0.3599) snap.SaveEdgeList(ff, "{}_ffdiv10.elist".format(name)) ff = snap.GenForestFire(metrics.num_nodes * 10, 0.3599, 0.3599) snap.SaveEdgeList(ff, "{}_ffx10.elist".format(name)) else: ff = snap.GenForestFire(metrics.num_nodes, 0.3467, 0.3467) # selected snap.SaveEdgeList(ff, "{}_ff.elist".format(name)) ff = snap.GenForestFire(int(metrics.num_nodes / 10), 0.3467, 0.3467) snap.SaveEdgeList(ff, "{}_ffdiv10.elist".format(name)) ff = snap.GenForestFire(metrics.num_nodes * 10, 0.3467, 0.3467) snap.SaveEdgeList(ff, "{}_ffx10.elist".format(name))
def create_weighted_cosponsorship_graph(chamber, session): print("Creating weighted cosponsorship graph (wcg)...") m = np.load('raw_data/govtrack_cosponsor_temp/m_%s_%s.npy' % (chamber, session)) b = np.load('raw_data/govtrack_cosponsor_temp/b_%s_%s.npy' % (chamber, session)).item() to_bills = np.load('raw_data/govtrack_cosponsor_temp/to_bills_%s_%s.npy' % (chamber, session)).item() g, node_info, id_to_nid = read_bcg(chamber, session) edge_weights = {} sponsored_bills = {} wcg = snap.TUNGraph.New() for node in tqdm(node_info, total=len(node_info), position=0): if node_info[node]['type'] == 'bill': continue if not wcg.IsNode(node): wcg.AddNode(node) connected = snap.TIntV() if not g.IsNode(node): print("F**K WHY IS %s NOT A NODE" % (node, )) continue snap.GetNodesAtHop(g, node, 2, connected, False) if node in sponsored_bills: num_bills = sponsored_bills[node] else: bills = snap.TIntV() snap.GetNodesAtHop(g, node, 1, bills, False) num_bills = len(bills) sponsored_bills[node] = num_bills for node2 in connected: if node == node2: continue if not wcg.IsNode(node2): wcg.AddNode(node2) if node2 in sponsored_bills: num_bills2 = sponsored_bills[node2] else: bills2 = snap.TIntV() snap.GetNodesAtHop(g, node2, 1, bills2, False) num_bills2 = len(bills2) sponsored_bills[node2] = num_bills2 common_bills = len( get_cosponsorship(node_info[node]['info']['id'], node_info[node2]['info']['id'], to_bills)) edge_weights[(node, node2)] = common_bills / len( to_bills[node_info[node]['info']['id']]) edge_weights[(node2, node)] = common_bills / len( to_bills[node_info[node2]['info']['id']]) wcg.AddEdge(node, node2) snap.SaveEdgeList(wcg, 'govtrack_data/wcg_%s_%s.graph' % (chamber, session)) np.save('govtrack_data/wcg_edge_weights_%s_%s.npy' % (chamber, session), edge_weights) np.save('govtrack_data/wcg_sponsored_bills_%s_%s.npy' % (chamber, session), sponsored_bills) print("Completed weighted cosponsorship graph!")
def gen_ba(args): """Generate a BA Graph""" for i in range(args.num_graphs): out_deg = int(np.random.uniform(2, 6)) Rnd = snap.TRnd() Graph = snap.GenPrefAttach(args.num_vertices, out_deg, Rnd) snap.SaveEdgeList(Graph, f'{args.data_loc}/BA/BA_{i}.edges') print(f"BA Graph {i} Generated and Saved")
def __init__(self, num_nodes, subgraph_path = "../data/subgraphs"): self.node_motifs = {2:2, 3:13, 4:199} self.subgraph_path = os.path.join(subgraph_path, str(num_nodes)) if not os.path.exists(self.subgraph_path): os.makedirs(self.subgraph_path) self.num_nodes = num_nodes if len(glob.glob(os.path.join(self.subgraph_path, '*.txt')))==0: self.motifs = self.create_motifs() [snap.SaveEdgeList(graph, os.path.join(self.subgraph_path,"{}.txt".format(i))) for i,graph in enumerate(self.motifs)] else: self.motifs = [snap.LoadEdgeList(snap.PNGraph, os.path.join(self.subgraph_path,"{}.txt".format(i)), 0, 1) for i in range(self.node_motifs[num_nodes])]
def gen_sw(args): """Generate a SW Graph""" for i in range(args.num_graphs): fp = np.random.uniform(0, 0.5) Rnd = snap.TRnd() Graph = snap.GenSmallWorld(args.num_vertices, 3, fp, Rnd) snap.SaveEdgeList(Graph, f'{args.data_loc}/SW/SW_{i}.edges') print(f"SW Graph {i} Generated and Saved")
def gen_er(args): """Generate a ER Graph""" for i in range(args.num_graphs): num_edges = int( np.random.uniform((args.num_vertices / 2), (args.num_vertices * 2))) Graph = snap.GenRndGnm(snap.PNGraph, args.num_vertices, num_edges) snap.SaveEdgeList(Graph, f'{args.data_loc}/ER/ER_{i}.edges') print(f"ER Graph {i} Generated and Saved")
def main(args): review_file = args.review review_maxwcc_file = args.review_maxwcc # load graph G = snap.LoadEdgeList(snap.PUNGraph, review_file, 0, 1) # get wcc MxWcc = snap.GetMxWcc(G) # save snap.SaveEdgeList(MxWcc, review_maxwcc_file)
def gen_ff(args): """Generate FF Graph""" for i in range(args.num_graphs): fp = np.random.uniform(0, 0.5) bp = np.random.uniform(0, 0.5) Graph = snap.GenForestFire(args.num_vertices, fp, bp) snap.SaveEdgeList(Graph, f'{args.data_loc}/FF/FF_{i}.edges') print(f"FF Graph {i} Generated and Saved")
def genGraph(self, nodes): print "Generating random graph..." genFileName = 'random5000by6.txt' nodesV = snap.TIntV() for i in range(nodes): if (i % 6 != 0): nodesV.Add(i) comG = snap.GenFull(snap.PUNGraph, nodes) snap.DelNodes(comG, nodesV) snap.SaveEdgeList(comG, genFileName) return genFileName
def gen_rm(args): """Generate a RM Graph""" for i in range(args.num_graphs): a = np.random.uniform(0, 0.3) b = np.random.uniform(0, 0.1) c = np.random.uniform(0, 0.1) num_edges = int( np.random.uniform((args.num_vertices), (args.num_vertices * 2))) Graph = snap.GenRMat(args.num_vertices, num_edges, a, b, c) snap.SaveEdgeList(Graph, f'{args.data_loc}/RM/RM_{i}.edges') print(f"RM Graph {i} Generated and Saved")
def create_weighted_vote_graph(chamber, session): print("Creating weighted vote graph (wcg)...") g = snap.TUNGraph.New() node_info = {} id_to_nid = {} covote_data = {} edge_weights = defaultdict(dict) created_nodes = set() m = get_congress_members(chamber, session) for m1, m2 in tqdm(combinations(m, 2), desc='member pairs', total=comb(len(m), 2)): if m1['id'] not in created_nodes: nid = g.GetMxNId() node_info[nid] = {'type': 'member', 'info': m1} id_to_nid[m1['id']] = nid created_nodes.add(m1['id']) g.AddNode(nid) if m2['id'] not in created_nodes: nid = g.GetMxNId() node_info[nid] = {'type': 'member', 'info': m2} id_to_nid[m2['id']] = nid created_nodes.add(m2['id']) g.AddNode(nid) d = get_covote_data(m1['id'], m2['id'], chamber, session) if d is None: continue data = d['results'][0] key = tuple(sorted([data['first_member_id'], data['second_member_id']])) covote_data[key] = { 'common_votes': data['common_votes'], 'disagree_votes': data['disagree_votes'], 'agree_percent': data['agree_percent'], 'disagree_percent': data['disagree_percent'] } g.AddEdge(id_to_nid[m1['id']], id_to_nid[m2['id']]) edge_weights[id_to_nid[m1['id']]][id_to_nid[m2['id']]] = float( data['agree_percent']) / 100 edge_weights[id_to_nid[m2['id']]][id_to_nid[m1['id']]] = float( data['agree_percent']) / 100 snap.SaveEdgeList(g, 'data2/wvg_%s_%s.graph' % (chamber, session)) np.save('data2/wvg_node_info_%s_%s.npy' % (chamber, session), node_info) np.save('data2/wvg_id_to_nid_%s_%s.npy' % (chamber, session), id_to_nid) np.save('data2/wvg_edge_weights_%s_%s.npy' % (chamber, session), edge_weights) np.save('data2/wvg_covote_data_%s_%s.npy' % (chamber, session), covote_data) print("Completed weighted vote graph!")
def TriClosure(G, name): G_new = snap.PNGraph.New() for u_ in G.Nodes(): u = u_.GetId() G_new.AddNode(u) for u_ in G.Nodes(): u = u_.GetId() for v in u_.GetInEdges(): v_ = G.GetNI(v) for w in u_.GetOutEdges(): if v != w and not G.IsEdge(v, w) and not G.IsEdge(w, v): G_new.AddEdge(v, w) snap.SaveEdgeList(G_new, name)
def getSubgraph(self,subgraphNodeIdHV): lblFiles=[] walker = subgraphNodeIdHV.BegI() while not walker.IsEnd(): graphId=walker.GetKey() subgraphNodeIdV=walker.GetDat() subG = snap.GetSubGraph(self.G, subgraphNodeIdV) print "Network %s: (%d,%d)" % ("induced subgraph " + str(graphId), subG.GetNodes(), subG.GetEdges()) subgraph1Name = self.graphName + "_" + str(graphId) snap.SaveEdgeList(subG, self.targetDir + "/" + subgraph1Name + ".txt") lblFile=self.targetDir + "/" + subgraph1Name + ".txt" #self.saveLblGraph(subG, lblFile) lblFiles.append(lblFile) walker.Next() return lblFiles;
def create_CCG(products, C_P_graph, cust_num, customers_int_to_string, products_int_to_string, customer_product_weights): ### Create Customer-Category Graph ### C_C_graph, categories_int_to_tuple, categories_to_products, customer_category_weights = customer_category_graph( C_P_graph, products, cust_num, customers_int_to_string, products_int_to_string, customer_product_weights) snap.SaveEdgeList(C_C_graph, 'C_C_graph', 'Customer-Category graph edgeslist') p = open("customer_category_graph.pkl", "wb") pickle.dump(categories_int_to_tuple, p) pickle.dump(categories_to_products, p) pickle.dump(customer_category_weights, p) p.close() return C_C_graph, categories_int_to_tuple, customer_category_weights
def create_bipartite_consponsorship_graph(chamber, session): print("Creating bipartite cosponsorship graph (bcg)...") m, b, to_bills = process_govtrack_data(chamber, session) g = snap.TUNGraph.New() node_info = {} id_to_nid = {} created_nodes = set() for m1, m2 in tqdm(combinations(m, 2), desc='member pairs', total=comb(len(m), 2)): if m1['id'] not in created_nodes: nid = g.GetMxNId() node_info[nid] = {'type': 'member', 'info': m1} id_to_nid[m1['id']] = nid created_nodes.add(m1['id']) g.AddNode(nid) if m2['id'] not in created_nodes: nid = g.GetMxNId() node_info[nid] = {'type': 'member', 'info': m2} id_to_nid[m2['id']] = nid created_nodes.add(m2['id']) g.AddNode(nid) bills = get_cosponsorship(m1['id'], m2['id'], to_bills) for bill in bills: if b[bill] not in created_nodes: nid = g.GetMxNId() node_info[nid] = {'type': 'bill', 'info': bill} id_to_nid[b[bill]] = nid created_nodes.add(b[bill]) g.AddNode(nid) g.AddEdge(id_to_nid[m1['id']], id_to_nid[b[bill]]) g.AddEdge(id_to_nid[m2['id']], id_to_nid[b[bill]]) snap.SaveEdgeList(g, 'govtrack_data/bcg_%s_%s.graph' % (chamber, session)) np.save('govtrack_data/bcg_node_info_%s_%s.npy' % (chamber, session), node_info) np.save('govtrack_data/bcg_id_to_nid_%s_%s.npy' % (chamber, session), id_to_nid) np.save( 'raw_data/govtrack_cosponsor_temp/m_%s_%s.npy' % (chamber, session), m) np.save( 'raw_data/govtrack_cosponsor_temp/b_%s_%s.npy' % (chamber, session), b) np.save( 'raw_data/govtrack_cosponsor_temp/to_bills_%s_%s.npy' % (chamber, session), to_bills) print("Completed bipartite cosponsorship graph!")
def setCategorys(): G = getGraph("../files/G.graph") list_post = getListFromFile("../files/list_comment_category_nbsvm_1.txt") post_comments = list_post["post"] print "post_comments:", len(post_comments) i = 0 m = 0 for NI in G.Nodes(): nid = NI.GetId() NLabel = G.GetStrAttrDatN(nid, "NLabel") #-------------PHOTO if NLabel == 'photo': c = 0 comments = post_comments[i]["comments"] #------------------------------------------------IN EDGES------------------------------------------------ for nid1 in NI.GetInEdges(): NLabel1 = G.GetStrAttrDatN(nid1, "NLabel") NName1 = G.GetStrAttrDatN(nid1, "NName") NCategory = G.GetStrAttrDatN(nid1, "NCategory") eid = G.GetEId(nid1, nid) ETime = G.GetStrAttrDatE(eid, "ETime") #------------COMMENT if NLabel1 == "comment": if NCategory == "text": G.AddStrAttrDatN(nid1, "other", 'NCategory') newCategory = comments[c]['category'] G.AddStrAttrDatN(nid1, newCategory, 'NCategory') NCategory_1 = G.GetStrAttrDatN(nid1, "NCategory") print c, NCategory_1, "--", newCategory c += 1 print i, "-->", len(comments), "=", c i += 1 #---------------save Graph as an output file snap.SaveEdgeList(G, "../files/new_G.txt", "Save as tab-separated list of edges") #---------------save binary FOut = snap.TFOut("../files/new_G.graph") G.Save(FOut)
def process_cite_2(Lfile, Cfile): path = config.path + config.subpath ''' # 保存最大连通图 # graph loading Graph = snap.LoadEdgeList(snap.PUNGraph, Lfile, 0, 1) MxScc = snap.GetMxScc(Graph) snap.SaveEdgeList(MxScc, path+"processed_2_cite.txt", "Save as tab-separated list of edges") ''' LL = set() # 把入度>某个值的节点加入进去 Graph = snap.LoadEdgeList(snap.PNGraph, Lfile, 0, 1) for node in Graph.Nodes(): if node.GetInDeg() > 200: LL.add(node.GetId()) SubG = snap.GetSubGraph(Graph, snap.TIntV.GetV(5193110)) snap.SaveEdgeList(SubG, path + "Sub_cite.txt")
def create_edge_list(filename): graph = snap.TUNGraph.New() with open(filename) as f: lines = f.readlines() for line in lines: anime_id = int(line.split("|")[0]) graph.AddNode(anime_id) # Add edges for line in lines: split_line = line.split("|") anime_id = int(split_line[0]) recs = split_line[5].split(",") for i in recs: if not i or not i.isdigit(): break if graph.IsNode(int(i)): graph.AddEdge(anime_id, int(i)) snap.SaveEdgeList(graph, "mal-rec-graph")