예제 #1
0
def main():
    RANDOM_SEED = 23

    SYNTHETIC_NW_NODES = 4846609        # How many nodes in the fake networks.
    SYNTHETIC_NW_EDGES = 42851237       # How many nodes in the fake networks.
    SYNTHETIC_NW_AVG_DEGREE = int(SYNTHETIC_NW_EDGES / SYNTHETIC_NW_NODES)

    random.seed(RANDOM_SEED)

    print "Generating preferential attachment graph..."
    tRnd = snap.TRnd()
    tRnd.PutSeed(RANDOM_SEED) # Re-seed every time.
    PAGraph = snap.GenPrefAttach(SYNTHETIC_NW_NODES, SYNTHETIC_NW_AVG_DEGREE, tRnd)
    filename = 'PrefAttachSynthetic-4.8M.txt'
    print "Saving edge list to file: %s" % filename
    snap.SaveEdgeList(PAGraph, filename, 'Synthetic preferential attachment graph')

    print "Generating random graph..."
    tRnd.PutSeed(RANDOM_SEED) # Re-seed every time.
    RndGraph = snap.GenRndGnm(snap.PUNGraph, SYNTHETIC_NW_NODES, SYNTHETIC_NW_EDGES, False, tRnd)
    filename = 'GnmRandomGraph-4.8M.txt'
    print "Saving edge list to file: %s" % filename
    snap.SaveEdgeList(RndGraph, filename, 'Random Gnm graph')

    print "Generating small world graph..."
    tRnd.PutSeed(RANDOM_SEED) # Re-seed every time.
    SWGraph = snap.GenSmallWorld(SYNTHETIC_NW_NODES, SYNTHETIC_NW_AVG_DEGREE, 0.1, tRnd)
    filename = 'SmallWorldGraph-4.8M.txt'
    print "Saving edge list to file: %s" % filename
    snap.SaveEdgeList(RndGraph, filename, 'Small world graph with rewire prob=0.1')

    print "Done"

    sys.exit(0)
예제 #2
0
def getFeaturesForTerm(term):
    df = loadBillData(term)  #get bill data for a specific term
    start_year, end_year = common_function.getTermMapping(term)
    fin_df = loadFinancialData(start_year - 2, end_year -
                               2)  #get financial data from two years prior

    bill_node = df['SrcNId'].unique().tolist()
    legislator_node = df['DstNId'].unique().tolist()
    comm_node = fin_df['SrcNId'].unique().tolist()
    legislator_node_from_campaign = fin_df['DstNId'].unique().tolist()

    G_Campaign = getCampaign(fin_df)

    G_CoSponsor = getSponsorLink(df)
    G_Campaign_folded = getCampaign_folded(G_Campaign,
                                           legislator_node_from_campaign)

    snap.SaveEdgeList(G_Campaign, 'G_Campaign.txt')
    snap.SaveEdgeList(G_Campaign_folded, 'G_Campaign_folded.txt')
    snap.SaveEdgeList(G_CoSponsor, 'G_CoSponsor.txt')
    '''
    G_Campaign_folded = snap.LoadEdgeList(snap.PUNGraph, 'G_Campaign_folded.txt',0,1)
    G_CoSponsor = snap.LoadEdgeList(snap.PUNGraph, 'G_CoSponsor.txt',0,1)
    '''
    X, Y = getFeatures(G_CoSponsor, G_Campaign, bill_node, legislator_node,
                       comm_node, legislator_node_from_campaign,
                       G_Campaign_folded)
    X['term'] = term
    return X, Y
예제 #3
0
def gen_data():
    graph = snap.GenRndGnm(snap.PNGraph, 300, 2400, True)
    snap.SaveEdgeList(graph, "../data/Erdos-Renyi.txt")
    graph = snap.GenPrefAttach(300, 8)
    snap.SaveEdgeList(graph, "../data/PrefAttach.txt")
    graph = snap.GenRndPowerLaw(300, 1.2)
    snap.SaveEdgeList(graph, "../data/power-law.txt")
예제 #4
0
    def initialize(self):
        base = self.base_graph()
        self.generate_graphs(base, self.k)
        for i, g in enumerate(self.train_graph_list):
            snap.SaveEdgeList(
                g, self.graph_directory + '/train/' + str(i) + '.edgelist')

        for i, g in enumerate(self.test_graph_list):
            snap.SaveEdgeList(
                g, self.graph_directory + '/test/' +
                str(len(self.train_graph_list) + i) + '.edgelist')

        return self.graph_directory
def get_graph_by_month(graph_path, date_username_path):
    date_username_df = load_user_attr_to_df(date_username_path)
    date_username_df.sort_values(by='created_date',
                                 ascending=False,
                                 inplace=True)
    print 'date_username loaded'
    Graph, H = load_graph(graph_path)
    cur_date = ''
    print date_username_df['created_date']
    for idx, row in date_username_df.iterrows():
        if Graph.GetNodes() < 30000:
            break
        print row['created_date']
        try:
            if cur_date == '':
                cur_date = str(row['created_date'])
            if cur_date[-2:] == '01' and str(row['created_date']) != cur_date:
                snap.SaveEdgeList(Graph,
                                  './data/graph/Graph_%s.txt' % cur_date)
            cur_date = str(row['created_date'])
            username = row['username']
            Node_ID = H.GetDat(username)
            Graph.DelNode(Node_ID)
        except Exception as e:
            print '!'
            print e
def get_commits_graph(path):
        context = snap.TTableContext()
        e_schema = snap.Schema()
        e_schema.Add(snap.TStrTAttrPr("source", snap.atStr))
        e_schema.Add(snap.TStrTAttrPr("target", snap.atStr))
        e_schema.Add(snap.TStrTAttrPr("weight", snap.atStr))
        n_schema = snap.Schema()
        n_schema.Add(snap.TStrTAttrPr("id", snap.atStr))
        n_schema.Add(snap.TStrTAttrPr("username", snap.atStr))
        n_schema.Add(snap.TStrTAttrPr("size", snap.atStr))

        edgetable = snap.TTable.LoadSS(e_schema, path + '{}_edges.csv'.format(pname), context, ",", snap.TBool(True))
        nodetable = snap.TTable.LoadSS(n_schema, path + '{}_nodes.csv'.format(pname), context, ",", snap.TBool(True))


        edgeattrv = snap.TStrV()
        nodeattrv = snap.TStrV()

        net = snap.ToNetwork(snap.PNEANet, edgetable, "source", "target", edgeattrv, nodetable, "id", nodeattrv, snap.aaFirst)
        snap.DelSelfEdges(net)
        snap.SaveEdgeList(net, 'temp/commits_temp_edgelist.csv')
        
        Data = open('temp/commits_temp_edgelist.csv', 'r')
        Graphtype = nx.Graph()
        G = nx.parse_edgelist(Data, delimiter='\t', create_using=Graphtype, nodetype=int, data=(('weight', float),), comments='#')
        
        return G
예제 #7
0
def generate(adjusted):
    #Lendo o arquivo corrigido
    s = "\'"
    data = pd.read_csv(adjusted)

    #Gerando um dicionário com os ids
    ids = {}

    for i in data.index:
        person_id = data.iloc[i, 0].replace(s, "")
        ids[person_id] = i

    # ## Gerando grafo do snap
    G = snap.TNGraph.New()

    for i in data.index:
        #Gerando os nós
        node = ids[str(data.iloc[i, 0]).replace(s, "")]
        G.AddNode(node)

        #Gerando as arestas
        friends = strToList(data.iloc[i, -1])
        for friend in friends:
            try:
                G.AddEdge(i, ids[friend])
            except:
                pass

    #Removendo nós de grau zero
    snap.DelZeroDegNodes(G)

    #Salvando o grafo
    snap.SaveEdgeList(G, "grafo.txt", "Save as tab-separated list of edges")
예제 #8
0
def projection(Graph):
    if Graph == 'campaign':
        G2 = readGraph("../processed-data/campaignNetworks_v2.txt")
    elif Graph == 'bill':
        G2 = readGraph("../processed-data/legislator_bill_edge_list_graph.txt")
    else:
        raise ValueError("Invalid graph: please use 'campaign' or 'bill'. ")

    H = snap.TUNGraph.New()
    
    for i in G2.Nodes():
        for j in G2.Nodes():
            if (i.GetId() < j.GetId() and j.GetId() < 10000): #10000 is the upper limit for candidate nodes
                NbrV = snap.TIntV()
                Num = snap.GetLen2Paths(G2, i.GetId(), j.GetId(), NbrV)
                if Num > 0:
                    if H.IsNode(i.GetId()) == False:
                        H.AddNode(i.GetId())
                    if H.IsNode(j.GetId()) == False:
                        H.AddNode(j.GetId())
                    if H.IsEdge(i.GetId(), j.GetId()) == False:
                        H.AddEdge(i.GetId(),j.GetId())
    
    print "Compressed Graph Node count total: %d" % (H.GetNodes())

    print "Compressed Edge count total: %d" % (H.GetEdges())
    
    GraphClustCoeff = snap.GetClustCf(H, -1)
    print Graph + " Network Clustering coefficient: %f" % GraphClustCoeff

    snap.SaveEdgeList(H, "../processed-data/"+Graph+"_projection.txt", 
        Graph + " network - Save projected network info as tab-separated list of edges, using unified candidate node IDs")

    return
예제 #9
0
def combineGraphs():
    GB = readGraph("../processed-data/legislator_bill_edge_list_graph.txt")
    bill_node = pd.read_csv('../processed-data/bill_node.csv')
    legislator_node = pd.read_csv('../processed-data/legislator_node.csv')

    GC = readGraph("../processed-data/campaignNetworks_v2.txt")
    cnt = 0

    for EI in GB.Edges():
        a = EI.GetSrcNId()
        b = EI.GetDstNId()
        if GC.IsNode(a) == False:
            GC.AddNode(a)
            #print "Adding a legislator node, meaning he/she has no donations, check - node id %d" % (legislator_node['NId'][i])
            if a < 10000:
                cnt = cnt + 1
        if GC.IsNode(b) == False:
            GC.AddNode(b)
            #print "Adding a legislator node, meaning he/she has no donations, check - node id %d" % (legislator_node['NId'][j])
            if b < 10000:
                cnt = cnt + 1
        if GC.IsEdge(a,b) == False:
            GC.AddEdge(a,b)

    print "Added %d new legislator nodes" % (cnt)
    print "Overall graph node count: %d, and edge count %d" % (GC.GetNodes(), GC.GetEdges())

    snap.SaveEdgeList(GC, "../processed-data/combined_network.txt", "Save 1981 to 2016 combined network info as tab-separated list of edges, using unified candidate node IDs")

    return
예제 #10
0
def write_to_graph(load_file_name, save_file_name): 
	graph = snap.PNGraph.New()

	# 1787443
	row_number = 0
	original_uid = None
	retweet_num = None

	with open(load_file_name) as f:
		for line in f:
			row_number += 1
			if row_number%10000 == 0:
				print row_number
			elements = line.split()
			if row_number % 2 == 1:
				original_uid = int(elements[2])
				retweet_num = int(elements[3])
			else:
				if not graph.IsNode(original_uid):
					graph.AddNode(original_uid)
				for i in range(0, len(elements), 2):
					retweet_uid = int(elements[i])
					if not graph.IsNode(retweet_uid):
						graph.AddNode(retweet_uid)	
					graph.AddEdge(original_uid, retweet_uid)

	snap.SaveEdgeList(graph, save_file_name)
예제 #11
0
def getXYFromEmb(bill_term, fin_start_year, fin_end_year, p, q):
    df = link_prediction.loadBillData(bill_term)
    fin_df = link_prediction.loadFinancialData(
        fin_start_year, fin_end_year)  #get financial data from two years prior

    bill_node = df['SrcNId'].unique().tolist()
    legislator_node = df['DstNId'].unique().tolist()
    comm_node = fin_df['SrcNId'].unique().tolist()

    legislator_node_from_campaign = fin_df['DstNId'].unique().tolist()

    G_CoSponsor = link_prediction.getSponsorLink(df)

    G_Campaign = link_prediction.getCampaign(fin_df)

    snap.SaveEdgeList(G_Campaign, "G_campaign.txt")
    #G_Campaign = snap.LoadEdgeList(snap.PUNGraph, "G_campaign.txt", 0, 1)

    walk_length = 80
    getEmbeddings("G_campaign.txt", p, q, walk_length)

    emb = np.loadtxt('embedding.emb', skiprows=1)
    node_id = emb[:, 0]

    X, Y = getFeatures(G_CoSponsor, G_Campaign, bill_node, legislator_node,
                       comm_node, legislator_node_from_campaign, emb)

    inds = pd.isnull(X).any(1).nonzero()[0]

    X = X.drop(inds)
    Y = Y.drop(inds)

    return X, Y
def main():
	# data_list = []
	# for year in range(1997, 2019):
		# print year
	G = snap.LoadEdgeList(snap.PNGraph, 'unweighted.txt', 0, 1)
	os.system('snap/snap/examples/motifs/motifs -i:unweighted.txt -o:orig')
	f = open('orig-counts.tab')
	df = pd.read_csv(f, sep='\t')
	f.close()
	orig_counts = df['Count'].values
	print orig_counts

	spokes = get_spokes(G)
	motifs = np.zeros((10, 13))
	for i in range(10):
		sample_name = 'sample' + str(i)
		print sample_name
		rewired = partly_undir_rewire(G, spokes)
		snap.SaveEdgeList(rewired, sample_name + '.txt')
		os.system('snap/snap/examples/motifs/motifs -i:' + sample_name + '.txt -o:' + sample_name)
		f = open(sample_name + '-counts.tab')
		df = pd.read_csv(f, sep='\t')
		f.close()
		motif_counts = df['Count'].values
		motifs[i,:] = motif_counts
	motifs[0,:] += 1
	os.system('rm -rf sample*')
	mean = np.mean(motifs, axis=0)
	std = np.std(motifs, axis=0)
	z = (orig_counts - mean) / std
	print z
예제 #13
0
def generate_graphs():
    for path in GRAPHS:
        name = path.split('/')[-1].split('.')[0]

        metrics = Metrics(path, True).calculate_basic()

        print metrics

        # Generate Erdos-Renyi (Random) Graph
        # args: type, num_nodes, num_edges
        er = snap.GenRndGnm(snap.PNGraph, metrics.num_nodes, metrics.num_edges)
        snap.SaveEdgeList(er, "{}_er.elist".format(name))

        # Generate Watts-Strogatz (Small World) Graph
        # args: num_nodes, node_out_degree (average out degree will be twice this value, rewire_prob)
        ws = snap.GenSmallWorld(metrics.num_nodes,
                                int(metrics.avg_degree) / 2, 0.2)
        snap.SaveEdgeList(ws, "{}_ws.elist".format(name))

        # Generate Barabasi-Albert model (scale-free with preferential attachment) Graph
        # args: (num_nodes, degree of each node desired)
        ba = snap.GenPrefAttach(metrics.num_nodes, int(metrics.avg_degree) / 2)
        snap.SaveEdgeList(ba, "{}_ba.elist".format(name))

        # Generate Forest Fire model Graph
        # args: (num_nodes, forward_prob, backward_prob)
        if name == "USairport_2010":
            ff = snap.GenForestFire(
                metrics.num_nodes, 0.3599,
                0.3599)  # Selected value for US Airports data-set
            snap.SaveEdgeList(ff, "{}_ff.elist".format(name))

            ff = snap.GenForestFire(int(metrics.num_nodes / 10), 0.3599,
                                    0.3599)
            snap.SaveEdgeList(ff, "{}_ffdiv10.elist".format(name))

            ff = snap.GenForestFire(metrics.num_nodes * 10, 0.3599, 0.3599)
            snap.SaveEdgeList(ff, "{}_ffx10.elist".format(name))
        else:
            ff = snap.GenForestFire(metrics.num_nodes, 0.3467,
                                    0.3467)  # selected
            snap.SaveEdgeList(ff, "{}_ff.elist".format(name))

            ff = snap.GenForestFire(int(metrics.num_nodes / 10), 0.3467,
                                    0.3467)
            snap.SaveEdgeList(ff, "{}_ffdiv10.elist".format(name))

            ff = snap.GenForestFire(metrics.num_nodes * 10, 0.3467, 0.3467)
            snap.SaveEdgeList(ff, "{}_ffx10.elist".format(name))
예제 #14
0
def create_weighted_cosponsorship_graph(chamber, session):
    print("Creating weighted cosponsorship graph (wcg)...")
    m = np.load('raw_data/govtrack_cosponsor_temp/m_%s_%s.npy' %
                (chamber, session))
    b = np.load('raw_data/govtrack_cosponsor_temp/b_%s_%s.npy' %
                (chamber, session)).item()
    to_bills = np.load('raw_data/govtrack_cosponsor_temp/to_bills_%s_%s.npy' %
                       (chamber, session)).item()
    g, node_info, id_to_nid = read_bcg(chamber, session)
    edge_weights = {}
    sponsored_bills = {}
    wcg = snap.TUNGraph.New()
    for node in tqdm(node_info, total=len(node_info), position=0):
        if node_info[node]['type'] == 'bill':
            continue
        if not wcg.IsNode(node):
            wcg.AddNode(node)
        connected = snap.TIntV()
        if not g.IsNode(node):
            print("F**K WHY IS %s NOT A NODE" % (node, ))
            continue
        snap.GetNodesAtHop(g, node, 2, connected, False)
        if node in sponsored_bills:
            num_bills = sponsored_bills[node]
        else:
            bills = snap.TIntV()
            snap.GetNodesAtHop(g, node, 1, bills, False)
            num_bills = len(bills)
            sponsored_bills[node] = num_bills
        for node2 in connected:
            if node == node2:
                continue
            if not wcg.IsNode(node2):
                wcg.AddNode(node2)
            if node2 in sponsored_bills:
                num_bills2 = sponsored_bills[node2]
            else:
                bills2 = snap.TIntV()
                snap.GetNodesAtHop(g, node2, 1, bills2, False)
                num_bills2 = len(bills2)
                sponsored_bills[node2] = num_bills2
            common_bills = len(
                get_cosponsorship(node_info[node]['info']['id'],
                                  node_info[node2]['info']['id'], to_bills))
            edge_weights[(node, node2)] = common_bills / len(
                to_bills[node_info[node]['info']['id']])
            edge_weights[(node2, node)] = common_bills / len(
                to_bills[node_info[node2]['info']['id']])
            wcg.AddEdge(node, node2)
    snap.SaveEdgeList(wcg,
                      'govtrack_data/wcg_%s_%s.graph' % (chamber, session))
    np.save('govtrack_data/wcg_edge_weights_%s_%s.npy' % (chamber, session),
            edge_weights)
    np.save('govtrack_data/wcg_sponsored_bills_%s_%s.npy' % (chamber, session),
            sponsored_bills)
    print("Completed weighted cosponsorship graph!")
예제 #15
0
def gen_ba(args):
    """Generate a BA Graph"""

    for i in range(args.num_graphs):

        out_deg = int(np.random.uniform(2, 6))
        Rnd = snap.TRnd()
        Graph = snap.GenPrefAttach(args.num_vertices, out_deg, Rnd)
        snap.SaveEdgeList(Graph, f'{args.data_loc}/BA/BA_{i}.edges')

        print(f"BA Graph {i} Generated and Saved")
예제 #16
0
 def __init__(self, num_nodes, subgraph_path = "../data/subgraphs"):
     self.node_motifs = {2:2, 3:13, 4:199}
     self.subgraph_path = os.path.join(subgraph_path, str(num_nodes))
     if not os.path.exists(self.subgraph_path):
         os.makedirs(self.subgraph_path)
     self.num_nodes = num_nodes
     if len(glob.glob(os.path.join(self.subgraph_path, '*.txt')))==0:
         self.motifs = self.create_motifs()
         [snap.SaveEdgeList(graph, os.path.join(self.subgraph_path,"{}.txt".format(i))) for i,graph in enumerate(self.motifs)]
     else:
         self.motifs = [snap.LoadEdgeList(snap.PNGraph, os.path.join(self.subgraph_path,"{}.txt".format(i)), 0, 1) for i in range(self.node_motifs[num_nodes])]
예제 #17
0
def gen_sw(args):
    """Generate a SW Graph"""

    for i in range(args.num_graphs):

        fp = np.random.uniform(0, 0.5)
        Rnd = snap.TRnd()
        Graph = snap.GenSmallWorld(args.num_vertices, 3, fp, Rnd)
        snap.SaveEdgeList(Graph, f'{args.data_loc}/SW/SW_{i}.edges')

        print(f"SW Graph {i} Generated and Saved")
예제 #18
0
def gen_er(args):
    """Generate a ER Graph"""

    for i in range(args.num_graphs):

        num_edges = int(
            np.random.uniform((args.num_vertices / 2),
                              (args.num_vertices * 2)))
        Graph = snap.GenRndGnm(snap.PNGraph, args.num_vertices, num_edges)
        snap.SaveEdgeList(Graph, f'{args.data_loc}/ER/ER_{i}.edges')

        print(f"ER Graph {i} Generated and Saved")
예제 #19
0
def main(args):
    review_file = args.review
    review_maxwcc_file = args.review_maxwcc

    # load graph
    G = snap.LoadEdgeList(snap.PUNGraph, review_file, 0, 1)

    # get wcc
    MxWcc = snap.GetMxWcc(G)

    # save
    snap.SaveEdgeList(MxWcc, review_maxwcc_file)
예제 #20
0
def gen_ff(args):
    """Generate FF Graph"""

    for i in range(args.num_graphs):

        fp = np.random.uniform(0, 0.5)
        bp = np.random.uniform(0, 0.5)

        Graph = snap.GenForestFire(args.num_vertices, fp, bp)
        snap.SaveEdgeList(Graph, f'{args.data_loc}/FF/FF_{i}.edges')

        print(f"FF Graph {i} Generated and Saved")
예제 #21
0
    def genGraph(self, nodes):
        print "Generating random graph..."
        genFileName = 'random5000by6.txt'
        nodesV = snap.TIntV()
        for i in range(nodes):
            if (i % 6 != 0):
                nodesV.Add(i)

        comG = snap.GenFull(snap.PUNGraph, nodes)
        snap.DelNodes(comG, nodesV)

        snap.SaveEdgeList(comG, genFileName)
        return genFileName
예제 #22
0
def gen_rm(args):
    """Generate a RM Graph"""

    for i in range(args.num_graphs):

        a = np.random.uniform(0, 0.3)
        b = np.random.uniform(0, 0.1)
        c = np.random.uniform(0, 0.1)
        num_edges = int(
            np.random.uniform((args.num_vertices), (args.num_vertices * 2)))
        Graph = snap.GenRMat(args.num_vertices, num_edges, a, b, c)
        snap.SaveEdgeList(Graph, f'{args.data_loc}/RM/RM_{i}.edges')

        print(f"RM Graph {i} Generated and Saved")
예제 #23
0
파일: aprsusc.py 프로젝트: AzurNova/aprsusc
def create_weighted_vote_graph(chamber, session):
    print("Creating weighted vote graph (wcg)...")
    g = snap.TUNGraph.New()
    node_info = {}
    id_to_nid = {}
    covote_data = {}
    edge_weights = defaultdict(dict)
    created_nodes = set()
    m = get_congress_members(chamber, session)
    for m1, m2 in tqdm(combinations(m, 2),
                       desc='member pairs',
                       total=comb(len(m), 2)):
        if m1['id'] not in created_nodes:
            nid = g.GetMxNId()
            node_info[nid] = {'type': 'member', 'info': m1}
            id_to_nid[m1['id']] = nid
            created_nodes.add(m1['id'])
            g.AddNode(nid)
        if m2['id'] not in created_nodes:
            nid = g.GetMxNId()
            node_info[nid] = {'type': 'member', 'info': m2}
            id_to_nid[m2['id']] = nid
            created_nodes.add(m2['id'])
            g.AddNode(nid)
        d = get_covote_data(m1['id'], m2['id'], chamber, session)
        if d is None:
            continue
        data = d['results'][0]
        key = tuple(sorted([data['first_member_id'],
                            data['second_member_id']]))
        covote_data[key] = {
            'common_votes': data['common_votes'],
            'disagree_votes': data['disagree_votes'],
            'agree_percent': data['agree_percent'],
            'disagree_percent': data['disagree_percent']
        }
        g.AddEdge(id_to_nid[m1['id']], id_to_nid[m2['id']])
        edge_weights[id_to_nid[m1['id']]][id_to_nid[m2['id']]] = float(
            data['agree_percent']) / 100
        edge_weights[id_to_nid[m2['id']]][id_to_nid[m1['id']]] = float(
            data['agree_percent']) / 100
    snap.SaveEdgeList(g, 'data2/wvg_%s_%s.graph' % (chamber, session))
    np.save('data2/wvg_node_info_%s_%s.npy' % (chamber, session), node_info)
    np.save('data2/wvg_id_to_nid_%s_%s.npy' % (chamber, session), id_to_nid)
    np.save('data2/wvg_edge_weights_%s_%s.npy' % (chamber, session),
            edge_weights)
    np.save('data2/wvg_covote_data_%s_%s.npy' % (chamber, session),
            covote_data)
    print("Completed weighted vote graph!")
예제 #24
0
def TriClosure(G, name):
    G_new = snap.PNGraph.New()
    for u_ in G.Nodes():
        u = u_.GetId()
        G_new.AddNode(u)

    for u_ in G.Nodes():
        u = u_.GetId()
        for v in u_.GetInEdges():
            v_ = G.GetNI(v)
            for w in u_.GetOutEdges():
                if v != w and not G.IsEdge(v, w) and not G.IsEdge(w, v):
                    G_new.AddEdge(v, w)

    snap.SaveEdgeList(G_new, name)
    def getSubgraph(self,subgraphNodeIdHV):
        lblFiles=[]
        walker = subgraphNodeIdHV.BegI()

        while not walker.IsEnd():
            graphId=walker.GetKey()
            subgraphNodeIdV=walker.GetDat()
            subG = snap.GetSubGraph(self.G, subgraphNodeIdV)
            print "Network %s: (%d,%d)" % ("induced subgraph " + str(graphId), subG.GetNodes(), subG.GetEdges())
            subgraph1Name = self.graphName + "_" + str(graphId)
            snap.SaveEdgeList(subG, self.targetDir + "/" + subgraph1Name + ".txt")
            lblFile=self.targetDir + "/" + subgraph1Name + ".txt"
            #self.saveLblGraph(subG, lblFile)
            lblFiles.append(lblFile)
            walker.Next()
        return lblFiles;
예제 #26
0
def create_CCG(products, C_P_graph, cust_num, customers_int_to_string,
               products_int_to_string, customer_product_weights):
    ### Create Customer-Category Graph ###

    C_C_graph, categories_int_to_tuple, categories_to_products, customer_category_weights = customer_category_graph(
        C_P_graph, products, cust_num, customers_int_to_string,
        products_int_to_string, customer_product_weights)
    snap.SaveEdgeList(C_C_graph, 'C_C_graph',
                      'Customer-Category graph edgeslist')
    p = open("customer_category_graph.pkl", "wb")
    pickle.dump(categories_int_to_tuple, p)
    pickle.dump(categories_to_products, p)
    pickle.dump(customer_category_weights, p)
    p.close()

    return C_C_graph, categories_int_to_tuple, customer_category_weights
예제 #27
0
def create_bipartite_consponsorship_graph(chamber, session):
    print("Creating bipartite cosponsorship graph (bcg)...")
    m, b, to_bills = process_govtrack_data(chamber, session)
    g = snap.TUNGraph.New()
    node_info = {}
    id_to_nid = {}
    created_nodes = set()
    for m1, m2 in tqdm(combinations(m, 2),
                       desc='member pairs',
                       total=comb(len(m), 2)):
        if m1['id'] not in created_nodes:
            nid = g.GetMxNId()
            node_info[nid] = {'type': 'member', 'info': m1}
            id_to_nid[m1['id']] = nid
            created_nodes.add(m1['id'])
            g.AddNode(nid)
        if m2['id'] not in created_nodes:
            nid = g.GetMxNId()
            node_info[nid] = {'type': 'member', 'info': m2}
            id_to_nid[m2['id']] = nid
            created_nodes.add(m2['id'])
            g.AddNode(nid)
        bills = get_cosponsorship(m1['id'], m2['id'], to_bills)
        for bill in bills:
            if b[bill] not in created_nodes:
                nid = g.GetMxNId()
                node_info[nid] = {'type': 'bill', 'info': bill}
                id_to_nid[b[bill]] = nid
                created_nodes.add(b[bill])
                g.AddNode(nid)
            g.AddEdge(id_to_nid[m1['id']], id_to_nid[b[bill]])
            g.AddEdge(id_to_nid[m2['id']], id_to_nid[b[bill]])
    snap.SaveEdgeList(g, 'govtrack_data/bcg_%s_%s.graph' % (chamber, session))
    np.save('govtrack_data/bcg_node_info_%s_%s.npy' % (chamber, session),
            node_info)
    np.save('govtrack_data/bcg_id_to_nid_%s_%s.npy' % (chamber, session),
            id_to_nid)
    np.save(
        'raw_data/govtrack_cosponsor_temp/m_%s_%s.npy' % (chamber, session), m)
    np.save(
        'raw_data/govtrack_cosponsor_temp/b_%s_%s.npy' % (chamber, session), b)
    np.save(
        'raw_data/govtrack_cosponsor_temp/to_bills_%s_%s.npy' %
        (chamber, session), to_bills)
    print("Completed bipartite cosponsorship graph!")
예제 #28
0
def setCategorys():
    G = getGraph("../files/G.graph")

    list_post = getListFromFile("../files/list_comment_category_nbsvm_1.txt")
    post_comments = list_post["post"]

    print "post_comments:", len(post_comments)
    i = 0
    m = 0

    for NI in G.Nodes():
        nid = NI.GetId()
        NLabel = G.GetStrAttrDatN(nid, "NLabel")

        #-------------PHOTO
        if NLabel == 'photo':
            c = 0
            comments = post_comments[i]["comments"]

            #------------------------------------------------IN EDGES------------------------------------------------
            for nid1 in NI.GetInEdges():
                NLabel1 = G.GetStrAttrDatN(nid1, "NLabel")
                NName1 = G.GetStrAttrDatN(nid1, "NName")
                NCategory = G.GetStrAttrDatN(nid1, "NCategory")
                eid = G.GetEId(nid1, nid)
                ETime = G.GetStrAttrDatE(eid, "ETime")
                #------------COMMENT
                if NLabel1 == "comment":
                    if NCategory == "text":
                        G.AddStrAttrDatN(nid1, "other", 'NCategory')
                    newCategory = comments[c]['category']
                    G.AddStrAttrDatN(nid1, newCategory, 'NCategory')
                    NCategory_1 = G.GetStrAttrDatN(nid1, "NCategory")
                    print c, NCategory_1, "--", newCategory
                    c += 1

            print i, "-->", len(comments), "=", c
            i += 1
    #---------------save Graph as an output file
    snap.SaveEdgeList(G, "../files/new_G.txt",
                      "Save as tab-separated list of edges")

    #---------------save binary
    FOut = snap.TFOut("../files/new_G.graph")
    G.Save(FOut)
예제 #29
0
def process_cite_2(Lfile, Cfile):
    path = config.path + config.subpath
    '''
    # 保存最大连通图
    
    # graph loading
    Graph = snap.LoadEdgeList(snap.PUNGraph, Lfile, 0, 1)
    MxScc = snap.GetMxScc(Graph)
    snap.SaveEdgeList(MxScc, path+"processed_2_cite.txt", "Save as tab-separated list of edges")
    '''

    LL = set()
    # 把入度>某个值的节点加入进去
    Graph = snap.LoadEdgeList(snap.PNGraph, Lfile, 0, 1)
    for node in Graph.Nodes():
        if node.GetInDeg() > 200:
            LL.add(node.GetId())
    SubG = snap.GetSubGraph(Graph, snap.TIntV.GetV(5193110))
    snap.SaveEdgeList(SubG, path + "Sub_cite.txt")
예제 #30
0
def create_edge_list(filename):
    graph = snap.TUNGraph.New()
    with open(filename) as f:
        lines = f.readlines()
        for line in lines:
            anime_id = int(line.split("|")[0])
            graph.AddNode(anime_id)

        # Add edges
        for line in lines:
            split_line = line.split("|")
            anime_id = int(split_line[0])
            recs = split_line[5].split(",")
            for i in recs:
                if not i or not i.isdigit():
                    break
                if graph.IsNode(int(i)):
                    graph.AddEdge(anime_id, int(i))
    snap.SaveEdgeList(graph, "mal-rec-graph")