Esempio n. 1
0
    def test_directed_projection(self):
        G = nx.DiGraph()
        G.add_edge('A', 1)
        G.add_edge(1, 'B')
        G.add_edge('A', 2)
        G.add_edge('B', 2)
        P = bipartite.projected_graph(G, 'AB')
        assert_edges_equal(list(P.edges()), [('A', 'B')])
        P = bipartite.weighted_projected_graph(G, 'AB')
        assert_edges_equal(list(P.edges()), [('A', 'B')])
        assert_equal(P['A']['B']['weight'], 1)

        P = bipartite.projected_graph(G, 'AB', multigraph=True)
        assert_edges_equal(list(P.edges()), [('A', 'B')])

        G = nx.DiGraph()
        G.add_edge('A', 1)
        G.add_edge(1, 'B')
        G.add_edge('A', 2)
        G.add_edge(2, 'B')
        P = bipartite.projected_graph(G, 'AB')
        assert_edges_equal(list(P.edges()), [('A', 'B')])
        P = bipartite.weighted_projected_graph(G, 'AB')
        assert_edges_equal(list(P.edges()), [('A', 'B')])
        assert_equal(P['A']['B']['weight'], 2)

        P = bipartite.projected_graph(G, 'AB', multigraph=True)
        assert_edges_equal(list(P.edges()), [('A', 'B'), ('A', 'B')])
Esempio n. 2
0
def collaborativeness(B):

    # splitting the types of nodes of the graph object B
    top_nodes = set(node for node, d in B.nodes(data=True)
                    if d['bipartite'] == 0)  #set of top nodes
    bottom_nodes = set(B) - top_nodes  #set of bottom nodes
    deg_top, deg_bottom = bipartite.degrees(
        B, bottom_nodes)  #dictionary: nodes as keys, degrees as values

    # creating simple graph and multigraph bottom projections
    G = bipartite.projected_graph(B, bottom_nodes)
    Gm = bipartite.projected_graph(B, bottom_nodes, multigraph=True)

    col_dict = {}
    #ratio_dict = {}
    #div_dict = {}

    for node in bottom_nodes:
        if G.degree(node) > 0:
            gamma = 0
            shared = 0
            for nbr in B[node]:
                gamma += math.log(B.degree(nbr))
                if B.degree(nbr) > 1:
                    shared += 1

            col_dict[node] = ((float(shared) / B.degree(node)) * gamma,
                              float(G.degree(node)) / Gm.degree(node))
            #ratio_dict[node] = (float(shared)/B.degree(node))
            #diversity_dict[node] = float(G.degree(node))/Gm.degree(node)

    return col_dict
    def test_directed_projection(self):
        G = nx.DiGraph()
        G.add_edge("A", 1)
        G.add_edge(1, "B")
        G.add_edge("A", 2)
        G.add_edge("B", 2)
        P = bipartite.projected_graph(G, "AB")
        assert_equal(sorted(P.edges()), [("A", "B")])
        P = bipartite.weighted_projected_graph(G, "AB")
        assert_equal(sorted(P.edges()), [("A", "B")])
        assert_equal(P["A"]["B"]["weight"], 1)

        P = bipartite.projected_graph(G, "AB", multigraph=True)
        assert_equal(sorted(P.edges()), [("A", "B")])

        G = nx.DiGraph()
        G.add_edge("A", 1)
        G.add_edge(1, "B")
        G.add_edge("A", 2)
        G.add_edge(2, "B")
        P = bipartite.projected_graph(G, "AB")
        assert_equal(sorted(P.edges()), [("A", "B")])
        P = bipartite.weighted_projected_graph(G, "AB")
        assert_equal(sorted(P.edges()), [("A", "B")])
        assert_equal(P["A"]["B"]["weight"], 2)

        P = bipartite.projected_graph(G, "AB", multigraph=True)
        assert_equal(sorted(P.edges()), [("A", "B"), ("A", "B")])
Esempio n. 4
0
    def test_directed_projection(self):
        G=nx.DiGraph()
        G.add_edge('A',1)
        G.add_edge(1,'B')
        G.add_edge('A',2)
        G.add_edge('B',2)
        P=bipartite.projected_graph(G,'AB')
        assert_edges_equal(list(P.edges()),[('A','B')])
        P=bipartite.weighted_projected_graph(G,'AB')
        assert_edges_equal(list(P.edges()),[('A','B')])
        assert_equal(P['A']['B']['weight'],1)

        P=bipartite.projected_graph(G,'AB',multigraph=True)
        assert_edges_equal(list(P.edges()),[('A','B')])

        G=nx.DiGraph()
        G.add_edge('A',1)
        G.add_edge(1,'B')
        G.add_edge('A',2)
        G.add_edge(2,'B')
        P=bipartite.projected_graph(G,'AB')
        assert_edges_equal(list(P.edges()),[('A','B')])
        P=bipartite.weighted_projected_graph(G,'AB')
        assert_edges_equal(list(P.edges()),[('A','B')])
        assert_equal(P['A']['B']['weight'],2)

        P=bipartite.projected_graph(G,'AB',multigraph=True)
        assert_edges_equal(list(P.edges()),[('A','B'),('A','B')])
Esempio n. 5
0
    def test_directed_projection(self):
        G = nx.DiGraph()
        G.add_edge("A", 1)
        G.add_edge(1, "B")
        G.add_edge("A", 2)
        G.add_edge("B", 2)
        P = bipartite.projected_graph(G, "AB")
        assert_edges_equal(list(P.edges()), [("A", "B")])
        P = bipartite.weighted_projected_graph(G, "AB")
        assert_edges_equal(list(P.edges()), [("A", "B")])
        assert P["A"]["B"]["weight"] == 1

        P = bipartite.projected_graph(G, "AB", multigraph=True)
        assert_edges_equal(list(P.edges()), [("A", "B")])

        G = nx.DiGraph()
        G.add_edge("A", 1)
        G.add_edge(1, "B")
        G.add_edge("A", 2)
        G.add_edge(2, "B")
        P = bipartite.projected_graph(G, "AB")
        assert_edges_equal(list(P.edges()), [("A", "B")])
        P = bipartite.weighted_projected_graph(G, "AB")
        assert_edges_equal(list(P.edges()), [("A", "B")])
        assert P["A"]["B"]["weight"] == 2

        P = bipartite.projected_graph(G, "AB", multigraph=True)
        assert_edges_equal(list(P.edges()), [("A", "B"), ("A", "B")])
Esempio n. 6
0
 def test_path_projected_graph(self):
     G = nx.path_graph(4)
     P = bipartite.projected_graph(G, [1, 3])
     assert_nodes_equal(list(P), [1, 3])
     assert_edges_equal(list(P.edges()), [(1, 3)])
     P = bipartite.projected_graph(G, [0, 2])
     assert_nodes_equal(list(P), [0, 2])
     assert_edges_equal(list(P.edges()), [(0, 2)])
 def test_path_projected_graph(self):
     G = nx.path_graph(4)
     P = bipartite.projected_graph(G, [1, 3])
     assert_equal(sorted(P.nodes()), [1, 3])
     assert_equal(sorted(P.edges()), [(1, 3)])
     P = bipartite.projected_graph(G, [0, 2])
     assert_equal(sorted(P.nodes()), [0, 2])
     assert_equal(sorted(P.edges()), [(0, 2)])
 def test_project_multigraph(self):
     G = nx.Graph()
     G.add_edge("a", 1)
     G.add_edge("b", 1)
     G.add_edge("a", 2)
     G.add_edge("b", 2)
     P = bipartite.projected_graph(G, "ab")
     assert_edges_equal(P.edges(), [("a", "b")])
     P = bipartite.weighted_projected_graph(G, "ab")
     assert_edges_equal(P.edges(), [("a", "b")])
     P = bipartite.projected_graph(G, "ab", multigraph=True)
     assert_edges_equal(P.edges(), [("a", "b"), ("a", "b")])
Esempio n. 9
0
 def test_project_multigraph(self):
     G = nx.Graph()
     G.add_edge("a", 1)
     G.add_edge("b", 1)
     G.add_edge("a", 2)
     G.add_edge("b", 2)
     P = bipartite.projected_graph(G, "ab")
     assert_edges_equal(list(P.edges()), [("a", "b")])
     P = bipartite.weighted_projected_graph(G, "ab")
     assert_edges_equal(list(P.edges()), [("a", "b")])
     P = bipartite.projected_graph(G, "ab", multigraph=True)
     assert_edges_equal(list(P.edges()), [("a", "b"), ("a", "b")])
Esempio n. 10
0
 def test_path_projected_properties_graph(self):
     G=nx.path_graph(4)
     G.add_node(1,name='one')
     G.add_node(2,name='two')
     P=bipartite.projected_graph(G,[1,3])
     assert_nodes_equal(list(P),[1,3])
     assert_edges_equal(list(P.edges()),[(1,3)])
     assert_equal(P.node[1]['name'],G.node[1]['name'])
     P=bipartite.projected_graph(G,[0,2])
     assert_nodes_equal(list(P),[0,2])
     assert_edges_equal(list(P.edges()),[(0,2)])
     assert_equal(P.node[2]['name'],G.node[2]['name'])
Esempio n. 11
0
 def test_path_projected_properties_graph(self):
     G = nx.path_graph(4)
     G.add_node(1, name='one')
     G.add_node(2, name='two')
     P = bipartite.projected_graph(G, [1, 3])
     assert_nodes_equal(list(P), [1, 3])
     assert_edges_equal(list(P.edges()), [(1, 3)])
     assert_equal(P.node[1]['name'], G.node[1]['name'])
     P = bipartite.projected_graph(G, [0, 2])
     assert_nodes_equal(list(P), [0, 2])
     assert_edges_equal(list(P.edges()), [(0, 2)])
     assert_equal(P.node[2]['name'], G.node[2]['name'])
Esempio n. 12
0
 def test_project_multigraph(self):
     G=nx.Graph()
     G.add_edge('a',1)
     G.add_edge('b',1)
     G.add_edge('a',2)
     G.add_edge('b',2)
     P=bipartite.projected_graph(G,'ab')
     assert_edges_equal(list(P.edges()),[('a','b')])
     P=bipartite.weighted_projected_graph(G,'ab')
     assert_edges_equal(list(P.edges()),[('a','b')])
     P=bipartite.projected_graph(G,'ab',multigraph=True)
     assert_edges_equal(list(P.edges()),[('a','b'),('a','b')])
Esempio n. 13
0
 def test_path_projected_properties_graph(self):
     G = nx.path_graph(4)
     G.add_node(1, name="one")
     G.add_node(2, name="two")
     P = bipartite.projected_graph(G, [1, 3])
     assert_nodes_equal(list(P), [1, 3])
     assert_edges_equal(list(P.edges()), [(1, 3)])
     assert P.nodes[1]["name"] == G.nodes[1]["name"]
     P = bipartite.projected_graph(G, [0, 2])
     assert_nodes_equal(list(P), [0, 2])
     assert_edges_equal(list(P.edges()), [(0, 2)])
     assert P.nodes[2]["name"] == G.nodes[2]["name"]
Esempio n. 14
0
    def test_star_projected_graph(self):
        G = nx.star_graph(3)
        P = bipartite.projected_graph(G, [1, 2, 3])
        assert_nodes_equal(list(P), [1, 2, 3])
        assert_edges_equal(list(P.edges()), [(1, 2), (1, 3), (2, 3)])
        P = bipartite.weighted_projected_graph(G, [1, 2, 3])
        assert_nodes_equal(list(P), [1, 2, 3])
        assert_edges_equal(list(P.edges()), [(1, 2), (1, 3), (2, 3)])

        P = bipartite.projected_graph(G, [0])
        assert_nodes_equal(list(P), [0])
        assert_edges_equal(list(P.edges()), [])
Esempio n. 15
0
 def test_project_multigraph(self):
     G = nx.Graph()
     G.add_edge('a', 1)
     G.add_edge('b', 1)
     G.add_edge('a', 2)
     G.add_edge('b', 2)
     P = bipartite.projected_graph(G, 'ab')
     assert_edges_equal(list(P.edges()), [('a', 'b')])
     P = bipartite.weighted_projected_graph(G, 'ab')
     assert_edges_equal(list(P.edges()), [('a', 'b')])
     P = bipartite.projected_graph(G, 'ab', multigraph=True)
     assert_edges_equal(list(P.edges()), [('a', 'b'), ('a', 'b')])
 def test_path_projected_properties_graph(self):
     G = nx.path_graph(4)
     G.add_node(1, name="one")
     G.add_node(2, name="two")
     P = bipartite.projected_graph(G, [1, 3])
     assert_equal(sorted(P.nodes()), [1, 3])
     assert_equal(sorted(P.edges()), [(1, 3)])
     assert_equal(P.node[1]["name"], G.node[1]["name"])
     P = bipartite.projected_graph(G, [0, 2])
     assert_equal(sorted(P.nodes()), [0, 2])
     assert_equal(sorted(P.edges()), [(0, 2)])
     assert_equal(P.node[2]["name"], G.node[2]["name"])
    def test_star_projected_graph(self):
        G = nx.star_graph(3)
        P = bipartite.projected_graph(G, [1, 2, 3])
        assert_equal(sorted(P.nodes()), [1, 2, 3])
        assert_equal(sorted(P.edges()), [(1, 2), (1, 3), (2, 3)])
        P = bipartite.weighted_projected_graph(G, [1, 2, 3])
        assert_equal(sorted(P.nodes()), [1, 2, 3])
        assert_equal(sorted(P.edges()), [(1, 2), (1, 3), (2, 3)])

        P = bipartite.projected_graph(G, [0])
        assert_equal(sorted(P.nodes()), [0])
        assert_equal(sorted(P.edges()), [])
Esempio n. 18
0
def create_projection(B):
    B.add_edges_from([('D', 1), ('H', 1), ('B', 2), ('C', 2), ('D', 2),
                      ('E', 2), ('G', 2), ('E', 3), ('F', 3), ('H', 3),
                      ('J', 3), ('E', 4), ('I', 4), ('J', 4)])
    X = {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'}
    N = {1, 2, 3, 4}
    P = bipartite.projected_graph(B, X)
    print(nx.info(P))
    P2 = bipartite.projected_graph(B, N)
    print(nx.info(P2))
    # if we want weights:
    P3 = bipartite.weighted_projected_graph(B, N)
    print(P3.edges(data=True))
    return P, P2, P3
Esempio n. 19
0
def block_cutpoint_tree(G, projected=False, verbose=False):
    input_graph = Graph(G)
    top_nodes = []
    bottom_nodes = []
    articulation_points = set(nx.articulation_points(input_graph))
    if verbose:
        print "Articulation points:", articulation_points
    for biconnected_component in nx.biconnected_components(input_graph):
        inter = biconnected_component.intersection(articulation_points)
        if verbose:
            print "Inter:", inter
        top_nodes.extend(
            [json.dumps(sorted(biconnected_component)) for _ in inter]
            )
        #top_nodes.extend([G.subgraph(bcc) for _ in inter])
        bottom_nodes.extend([x for x in inter])
        #bottom_nodes.extend([G.subgraph(x) for x in inter])
    if verbose:
        print "Top nodes:", top_nodes
        print "Bottom nodes:", bottom_nodes
    edges = zip(top_nodes, bottom_nodes)
    if verbose:
        print "Edges:", edges
    bc_tree = Graph()
    bc_tree.add_edges_from(edges)
    if projected:
        return Graph(bipartite.projected_graph(bc_tree, top_nodes))
    else:
        return bc_tree
def read_movies():
    # Was unable to figure out how to use nx.read_pajek for this file,
    # ended up looking at the data and doing it manually. Edgelist starts
    # at line 107 and ends at line 298.
    file = NETWORKS_DIR + "movies/Movies.paj"
    ##    G = nx.read_pajek(file)
    elist = []
    with open(file, "r") as f:
        linecount = 1
        while linecount <= 298:
            line = f.readline()
            if linecount >= 107:
                line = re.sub(r'\s+', ' ', line).strip()
                e = set(int(x) for x in line.split(" ")[:2])
                elist.append(e)

            linecount += 1

    B = nx.Graph()
    B.add_edges_from(elist)
    B.add_node(78)  # node 78 doesn't appear in any edges

    # Project onto composers. Two composers are linked if they worked with the
    # same producer.
    G = bipartite.projected_graph(B, list(range(63, 103)))

    return get_giant_component(G)
Esempio n. 21
0
def Dulmage_Mendelsohn(g, eqs):
    '''The input graph g is assumed to be a bipartite graph with no isolated 
    nodes. Returns the diagonal blocks as a list of (equations, variables).'''
    assert_all_in_graph(g, eqs)
    assert_no_isolates(g)
    assert eqs, 'At least one equation is expected'
    assert is_bipartite_node_set(g, eqs)
    # Maximum matching
    mate = nx.max_weight_matching(g, maxcardinality=True)
    matches = sorted((k, mate[k]) for k in mate if k in eqs)
    log('Matches:')
    for eq, var in matches:
        log(eq, var)
    # Direct the edges of g according to the matching
    bipart = to_digraph(g, matches)
    plot(bipart)
    # Find the strongly connected components (SCCs) of the equations
    eq_sccs = nx.condensation(projected_graph(bipart, eqs))
    plot(eq_sccs)
    # Q: With proper implementation, shouldn't the SCCs be already top. sorted?
    precedence_order = nx.topological_sort(eq_sccs)
    # Collect the diagonal blocks as a list of (equations, variables)
    diagonal_blocks = []
    seen = set()
    for scc in precedence_order:
        equations = eq_sccs.node[scc]['members']
        variables = {
            n
            for eq in equations for n in g.edge[eq] if n not in seen
        }
        seen.update(variables)
        diagonal_blocks.append((equations, list(variables)))
    return diagonal_blocks
Esempio n. 22
0
def experiment1(db, extra_round):
    output_file_name = 'timeline_analysis_predictions_round1.csv'
    cursor = db.get_authors_timelines_temp()

    print('retrieving authors and posts')
    author_post_tuples, author_type_dict = create_author_post_and_author_type_tuples(db, cursor)

    print('creating bi-partite graph')
    bi_graph = create_bi_graph(author_post_tuples)
    authors, posts = list(zip(*author_post_tuples))
    del author_post_tuples
    authors = list(set(authors))

    print('creating authors graph')
    authors_projection_graph = bipartite.projected_graph(bi_graph, authors)
    del bi_graph

    print('counting shared posts')
    unlabeled_author_shared_posts_dict = count_shared_posts(author_type_dict, authors_projection_graph)

    #Add an extra iteration to the experiment
    if extra_round:
        output_file_name = 'timeline_analysis_predictions_round2.csv'
        for author, num_shared_posts_with_bad in unlabeled_author_shared_posts_dict.items():
            if unlabeled_author_shared_posts_dict[author] is not None:
                author_type_dict[author] = 'bad_actor'
        unlabeled_author_shared_posts_dict = count_shared_posts(author_type_dict, authors_projection_graph)

    print('writing results to file')
    with open(output_file_name, 'wb') as csv_file:
        writer = csv.writer(csv_file)
        for key, value in list(unlabeled_author_shared_posts_dict.items()):
            writer.writerow([key, value])
Esempio n. 23
0
def nearest_neighbors_weighted_projection(TG_graph, gen,
                                          tumor_gene_relation_bi_adj_mat):
    #for nearest neighbors#

    #G = bipartite.weighted_projected_graph(TG_graph, gen)
    #for jaccard similarity nearest neighbors#

    #G = bipartite.overlap_weighted_projected_graph(TG_graph, gen)

    #for other baseline link prediction alogrithm#
    G = bipartite.projected_graph(TG_graph, gen)
    print "calculating the link prediction scores", G.number_of_edges()
    preds = nx.preferential_attachment(G, ebunch=G.edges())

    pred_scores_graph = nx.Graph()
    pred_scores_graph.add_nodes_from(gen)
    cnt = 0
    for u, v, p in preds:
        pred_scores_graph.add_edge(u, v, weight=p)

    print cnt

    print "sorted !"
    #G = pred_scores_graph
    A = nx.to_scipy_sparse_matrix(G)
    scores = np.dot(tumor_gene_relation_bi_adj_mat, A)
    return scores
def create_global_graph(posts_df, fb_group_df, NODE_COLOR, GRAPH_DIRECTORY, DATE):
    bipartite_graph = nx.Graph()

    for _, row in fb_group_df.iterrows():
        bipartite_graph.add_node(int(row['account_id']),
                                 label=row['account_name'],
                                 nb_fake_news_shared=row['nb_fake_news'],
                                 nb_followers=row['account_subscriber_count'],
                                # # This is to make the graph with ipySigma and the gradient colors:
                                #  color=color_gradient(row['nb_fake_news_climate']/row['nb_fake_news'], 
                                #                       row['nb_fake_news_health']/row['nb_fake_news'], 
                                #                       NODE_COLOR),
                                #  size=np.sqrt(row['nb_fake_news'])
                                # This is to make the graph with Minivan and the three unique colors:
                                 main_topic=row['main_topic']
                                 )

    bipartite_graph.add_nodes_from(posts_df["url"].tolist())
    
    bipartite_graph.add_edges_from(list(posts_df[['account_id', 'url']]\
                                   .itertuples(index=False, name=None)))

    monopartite_graph = bipartite.projected_graph(bipartite_graph, 
                                                 fb_group_df['account_id'].unique().tolist())

    monopartite_graph_path = os.path.join(".", GRAPH_DIRECTORY, "global_{}.gexf".format(DATE))
    nx.write_gexf(monopartite_graph, monopartite_graph_path, encoding="utf-8")

    return monopartite_graph
Esempio n. 25
0
def nets(df):
    edgelist = df[['author', 'post_id']]

    B = nx.Graph()
    B.add_nodes_from(set(edgelist['author']), bipartite=0)
    B.add_nodes_from(set(edgelist['post_id']), bipartite=1)
    B.add_edges_from(list(zip(edgelist['author'], edgelist['post_id'])))

    assert (len(B.nodes()) == len(df['author'].unique()) +
            len(df['post_id'].unique())), 'number of nodes is off'
    assert (len(B.edges()) == len(
        edgelist.drop_duplicates())), 'number of edges is off'
    print(f'the two-mode network has {len(B.nodes())} nodes'
          f' and density of {nx.density(B)}')

    ## one mode projections
    authors = bipartite.projected_graph(B, set(edgelist['author']))
    assert (len(authors.nodes()) == len(
        df['author'].unique())), 'number of authors is off'
    print(f'the author network has {len(authors.nodes())} nodes'
          f' and density of {nx.density(authors)}')

    posts = bipartite.weighted_projected_graph(B, set(edgelist['post_id']))
    assert (len(posts.nodes()) == len(
        df['post_id'].unique())), 'number of posts is off'
    print(f'the post network has {len(posts.nodes())} nodes'
          f' and density of {nx.density(posts)}')

    return B, authors, posts
Esempio n. 26
0
def bi_graph():
    B = nx.Graph()
    # B.add_edges_from([('a', 1), ('b', 1), (1, 2), ('b', 2)])
    B.add_edges_from([('a', 1), ('b', 1), ('a', 2), ('c', 2)])
    B.add_edges_from([])
    print(list(B.nodes()))
    print(list(B.edges()))
    print(bipartite.is_bipartite(B))

    e = list(B.edges())

    f = []

    for i in e:
        f.append(list(i))
    print(f)

    # P = bipartite.collaboration_weighted_projected_graph(B,['a','b','c'])
    P = bipartite.projected_graph(B, ['a', 'b', 'c'])
    print(list(P.nodes()))
    print(list(P.edges(data=True)))

    f = []

    for i in list(P.edges(data=True)):
        # print('i')
        # print(i)
        f.append(list(i)[:2])
        if 'weight' in list(i)[2]:
            print(i[2]['weight'])
        else:
            print('no weight')

    print(f)
Esempio n. 27
0
def get_projections(single_bipartite_graph, sites, pollutants, verbose=False):
    start_time = time.time()
    sites_graph = multi_to_single_graph_projection(
        bipartite.projected_graph(single_bipartite_graph,
                                  nodes=sites,
                                  multigraph=True), verbose)

    pollutants_graph = multi_to_single_graph_projection(
        bipartite.projected_graph(single_bipartite_graph,
                                  nodes=pollutants,
                                  multigraph=True), verbose)

    if verbose:
        print("Getting projections of bipartite graph:",
              time.time() - start_time)

    return sites_graph, pollutants_graph
Esempio n. 28
0
def experiment2(db):
    output_file_name = 'timeline_analysis.experiment2.csv'
    output_file_name2 = 'sharedposts.experiment2.csv'

    print('retrieving bad actors timelines')
    cursor = db.get_labeled_bad_actors_timelines_temp()

    author_post_tuples, author_type_dict = create_author_post_and_author_type_tuples(db, cursor)
    authors, posts = list(zip(*author_post_tuples))
    print('creating bi-partite graph')
    bi_graph = create_bi_graph(author_post_tuples)
    del author_post_tuples

    authors = list(set(authors))
    print('creating projection into authors graph')
    authors_projection_graph = bipartite.projected_graph(bi_graph, authors)

    edges_list = authors_projection_graph.edges()
    del authors_projection_graph
    del authors


    manually_labeled_authors_shared_posts_dict = {}

    bought_sub_types = ['crowdturfer','acquired']
    manually_labeled_authors_sub_types = ['spammer','bot','news_feed','company','private']
    interesting_edges = []

    #count how many bought actors we find sharing posts with manually labeled ones/
    print('iterating over author-author edges')
    for edge in edges_list:
        source_author_name = edge[0]
        destination_author_name = edge[1]
        if author_type_dict[source_author_name] in manually_labeled_authors_sub_types and author_type_dict[destination_author_name] in bought_sub_types:
            interesting_edges.append(edge)
            if source_author_name not in manually_labeled_authors_shared_posts_dict:
                manually_labeled_authors_shared_posts_dict[source_author_name] = 1
            else:
                manually_labeled_authors_shared_posts_dict[source_author_name] += 1

        elif author_type_dict[destination_author_name] in manually_labeled_authors_sub_types and author_type_dict[source_author_name] in bought_sub_types:
            interesting_edges.append(edge)
            if destination_author_name not in manually_labeled_authors_shared_posts_dict:
                manually_labeled_authors_shared_posts_dict[destination_author_name] = 1
            else:
                manually_labeled_authors_shared_posts_dict[destination_author_name] += 1

    print('writing results into file')
    with open(output_file_name, 'wb') as csv_file:
        writer = csv.writer(csv_file)
        for key, value in list(manually_labeled_authors_shared_posts_dict.items()):
            writer.writerow([key, value, author_type_dict[key]])

    with open(output_file_name2, 'wb') as csv_file:
        writer = csv.writer(csv_file)
        for item in interesting_edges:
            writer.writerow([item])
Esempio n. 29
0
def gen_image_network(metadata_file, output_file, malware_projection, resource_projection):
    malware_data = pandas.read_csv(metadata_file, dtype=str, keep_default_na=False, skipinitialspace=True)

    network = networkx.Graph()

    # search the target directory for PE files to extract images from
    image_objects = []
    i = 1
    for malware_path in malware_data['FILE']:
        print("\t i: {}, j: {}, so: {}".format(i, malware_data['FILE'].size, i/malware_data['FILE'].size))
        i += 1
        # try to parse the path to see if it's a valid PE file
        try:
            pe = pefile.PE(malware_path)
        except pefile.PEFormatError:
            continue

        images = ExtractImages(malware_path)
        images.work()
        image_objects.append(images)

        # create the network by linking malware samples to their images
        filehash = hashlib.sha256(open(malware_path, 'rb').read()).hexdigest()
        node_name = malware_path.split("/")[-1] + '_' + str(filehash)
        if not node_name in network:
            network.add_node(node_name,type="malware")

        for path, image_hash in images.images:
            # set the image attribute on the image nodes to tell GraphViz to render images within these nodes
            if not image_hash in network:
                network.add_node(image_hash,image=path,label='',type='image')
            # node_name = path.split("/")[-1]
            print(node_name, image_hash)
            network.add_edge(node_name,image_hash)
    # write the bipartite network, then do the two projections and write them
    print(network)
    write_dot(network, output_file)
    malware = set(n for n,d in network.nodes(data=True) if d['type']=='malware')
    resource = set(network) - malware
    malware_network = bipartite.projected_graph(network, malware)
    resource_network = bipartite.projected_graph(network, resource)

    write_dot(malware_network, malware_projection)
    write_dot(resource_network, resource_projection)
Esempio n. 30
0
def connected_reconstructions(reconstruction_shots):
    g = nx.Graph()
    for r in reconstruction_shots:
        g.add_node(r, bipartite=0)
        for shot_id in reconstruction_shots[r]:
            g.add_node(shot_id, bipartite=1)
            g.add_edge(r, shot_id)

    p = bipartite.projected_graph(g, reconstruction_shots.keys())

    return p.edges()
Esempio n. 31
0
def connected_reconstructions(reconstruction_shots):
    g = nx.Graph()
    for r in reconstruction_shots:
        g.add_node(r, bipartite=0)
        for shot_id in reconstruction_shots[r]:
            g.add_node(shot_id, bipartite=1)
            g.add_edge(r, shot_id)

    p = bipartite.projected_graph(g, reconstruction_shots.keys())

    return p.edges()
Esempio n. 32
0
def main():
    G = initialize()
    user, business = node_initialize(G)
    user = list(set(user) & set(G.nodes()))
    business = list(set(business) & set(G.nodes()))
    G = make_bipartite(G, user, business)
    print nx.is_bipartite(G)
    G = list(nx.connected_component_subgraphs(G))[0]
    user, business = bipartite.sets(G)
    print "nodes separated"
    Gu = bipartite.projected_graph(G, user)
    print Gu.number_of_nodes()
 def projected_graph(self):
     E = bipartite.sets(self.B)[0]
     P = bipartite.projected_graph(self.B, E, multigraph=False)
     # self.plot_graph(P,'projected_gragh')
     self.plot_graph_2(P, 'projected_gragh')
     print('projected_graph:number of edges:', P.number_of_edges())
     print(P.edges())
     print(list(P.edges(data=True)))
     print('kkkkkkkkkkkkkkkk')
     print(list(P.nodes()))
     print(P['Goeff'])
     P = bipartite.projected_graph(self.B, E, multigraph=True)
     # self.plot_graph(P, 'projected_gragh_multigraph')
     self.plot_graph_2(P, 'projected_gragh_multigraph')
     print('MMMMMMMMMMMMMMMMMMMMM')
     print(P.nodes())
     print(P.edges())
     # for e in P.edges():
     #     print(e)
     #     print(e['edge'])
     print('NNNNNNNNNNNNNNNNNNNNN')
Esempio n. 34
0
def main():
    G = nx.Graph()

    with open('clean_essays.csv', 'rb') as csvfile:
        readerz = csv.reader(csvfile, delimiter=',')
        for row in readerz:
            title = row[0]
            thnx = row[3].replace(' and', ',').replace(', ', ',').split(',')
            people = [x.strip() for x in thnx if (x != ' ' and x != '')]
            G.add_node(title, klass='essay')
            for p in people:
                if p != 'None':
                    G.add_node(p, klass='person')
                    G.add_edge(p, title)

    p_nodes = [n for n, d in G.nodes_iter(data=True) if d['klass'] == 'person']
    e_nodes = [n for n, d in G.nodes_iter(data=True) if d['klass'] == 'essay']

    BP = bipartite.projected_graph(G, p_nodes)
    BE = bipartite.projected_graph(G, e_nodes)
    nx.write_gml(BP, 'people.gml')
    nx.write_gml(BE, 'essays.gml')
    print "Done"
Esempio n. 35
0
def main():
	G=nx.Graph()

	with open('clean_essays.csv', 'rb') as csvfile:
		readerz = csv.reader(csvfile, delimiter=',')
		for row in readerz:
			title = row[0]
			thnx = row[3].replace(' and', ',').replace(', ', ',').split(',')
			people = [x.strip() for x in thnx if (x!=' ' and x!='')]
			G.add_node(title, klass='essay')
			for p in people:
				if p != 'None':
					G.add_node(p, klass='person')
					G.add_edge(p, title)

	p_nodes = [n for n,d in G.nodes_iter(data=True) if d['klass']=='person']
	e_nodes = [n for n,d in G.nodes_iter(data=True) if d['klass']=='essay']
	
	BP = bipartite.projected_graph(G,p_nodes)
	BE = bipartite.projected_graph(G,e_nodes)
	nx.write_gml(BP, 'people.gml')
	nx.write_gml(BE, 'essays.gml')
	print "Done"
def resource_allocation_index(TG_graph, gen, tumor_gene_relation_bi_adj_mat):

    G = bipartite.projected_graph(TG_graph, gen)
    A = nx.to_scipy_sparse_matrix(G)
    degrees = A.sum(axis=0)

    with np.errstate(divide='ignore'):
        weights = sparse.csr_matrix(1. / (degrees))  ###to avoid divide by 0
    AA = A.multiply(weights) * A.T

    tumor_gene_relation_bi_adj_mat = sparse.csr_matrix(
        tumor_gene_relation_bi_adj_mat)

    scores = np.dot(tumor_gene_relation_bi_adj_mat, AA)
    return scores
def read_club_membership():
    file = NETWORKS_DIR + "brunson_club-membership/out.brunson_club-membership_club-membership"
    elist = []
    with open(file, "r") as f:
        skip = [next(f) for _ in range(2)]  # skip a line
        for line in f:
            line = line.rstrip().split()[:2]
            e = (int(line[0]), int(line[1]) + 25)
            elist.append(e)

    B = nx.Graph()
    B.add_edges_from(elist)

    # Project onto people. Two people are linked if they belong to the
    # same club/board.
    G = bipartite.projected_graph(B, list(range(1, 26)))

    ##    # Project onto clubs.
    ##    G = bipartite.projected_graph(B, list(range(26,41)))

    return G
Esempio n. 38
0
    def execute(self, window_start):
        author_timelines_tuples = self._db.get_author_timelines_by_min_num_of_posts(
            self._domain, self._min_number_of_posts_per_author)
        print 'retrieving authors and posts'
        if self._num_of_random_authors_for_graph is not None:
            random_author_timelines_tuples = []
            random_author_guid_post_id_dict = self._db.get_random_author_guid_post_id_dictionary(
            )
            for author_timeline in author_timelines_tuples:
                author_guid = author_timeline[0]
                if author_guid in random_author_guid_post_id_dict:
                    random_author_timelines_tuples.append(author_timeline)

            author_timelines_tuples = random_author_timelines_tuples

        author_post_tuples, author_type_dict = self._create_author_post_and_author_type_tuples(
            author_timelines_tuples)

        print 'creating bi-partite graph'
        bi_graph = self._create_bi_graph(author_post_tuples)
        author_guids, posts = zip(*author_post_tuples)
        del author_post_tuples
        author_guids = list(set(author_guids))

        print 'creating authors graph'
        authors_projection_graph = bipartite.projected_graph(bi_graph,
                                                             author_guids,
                                                             multigraph=True)
        del bi_graph

        print 'counting shared posts'

        source_author_guid_dest_author_guid_shared_posts_dict = self._count_shared_posts(
            authors_projection_graph)

        self._fill_common_posts_connections(
            source_author_guid_dest_author_guid_shared_posts_dict)
Esempio n. 39
0
		tmp_node = line.split("||")
		try:
			B.add_node(tmp_node[0],bipartite=0)
			B.add_node(tmp_node[1],bipartite=1)
			edgelist.append((tmp_node[0],tmp_node[1]))
		except Exception,Err:
			pass
		# we found 2 range exception error in 1000000

		# proccessing element limit 1M
		if i >= parse_limit:
			break
		else:
			i += 1	

	file.close()

	#add edge list
	B.add_edges_from(edgelist)

	authors = set(n for n,d in B.nodes(data=True) if d['bipartite'] == 0)
	G = bipartite.projected_graph(B, authors)

	get_k_highest_degree(G, 10)
	export_degree_statistic(G)

	nx.draw(G)
	plt.figure(num=None, figsize=(20, 20), dpi=80)
	plt.savefig("degree_histogram.png")
	plt.show()
for line in readFile:

    length = len(line.split(","))
    srcIp = line.split(",")[1]
    destIp = line.split(",")[length -2]

    keyVal = srcIp+"-"+destIp;
    if keyVal not in edgeMap:
        edgeMap[keyVal] = True
        edgeArr.append((srcIp,destIp))
        validNodes.append(srcIp)


srcNodes = list(set(srcNodes)- (set(srcNodes) - set(validNodes)))
G.add_edges_from(edgeArr)
PG = bipartite.projected_graph(G,srcNodes)

listVal = nx.connected_components(PG)

for el in listVal:
    length = len(el)
    if length < 5:
        continue
    print(length)


# print(nx.number_connected_components(G))
# print(nx.number_connected_components(PG))


        DG.add_node(directors.index(row['name']),label=row['name'],name=row['name'])
    if row['cname'] not in companies:
        companies.append(row['cname'])
        DG.add_node(row['ocid'],label=row['cname'],name=row['cname'])   
    DG.add_edge(directors.index(row['name']),row['ocid'])


if reduce!=-1:
    from networkx.algorithms import bipartite
    officers,companies=bipartite.sets(DG)
    #print list(officers)
    #print list(companies)

    if reduce=="officers":
        #Collapse the bipartite graph to a graph of journalists connected via a common tag
        OG= bipartite.projected_graph(DG, officers)
    elif reduce=='companies':
        #Collapse the bipartite graph to a set of tags connected via a common journalist
        OG= bipartite.projected_graph(DG, companies)
    else: OG=DG
else: OG=DG

'''
modularity=1
if modularity!=-1:
    import community
    partition = community.best_partition(G)
    size = float(len(set(partition.values())))
    pos = nx.spring_layout(G)
    count = 0.
    for com in set(partition.values()) :
Esempio n. 42
0
df = df[(df[CREATE_GRAPH_OF].notnull()) & (df[LINKED_BY].notnull())]
print "[+] Removed null values..."

# Dedup
dd = df.groupby(columns).size().reset_index().rename(columns={0: 'w'})
print "[+] Created deduplicated dataset..."


# Creating the bipartite graph
G = nx.Graph()
G.add_nodes_from( dd[CREATE_GRAPH_OF].unique(),  bipartite=0 )
G.add_nodes_from( dd[LINKED_BY].unique(), bipartite=1 )
G.add_edges_from( zip(dd[CREATE_GRAPH_OF], dd[LINKED_BY]) )
print "[+] Created bipartite graph..."


# Projecting the main projected graph
graph = bipartite.projected_graph(G, dd[CREATE_GRAPH_OF].unique(), multigraph=False)
print "[+] Created projected graph..."


# Outputting the corresponding data frame
d = pd.DataFrame(graph.edges())
d.columns = [CREATE_GRAPH_OF + '__1', CREATE_GRAPH_OF + '__2']


# Recipe outputs
print "[+] Writing output dataset..."
graph = dataiku.Dataset(output_name)
graph.write_with_schema(d)
        count += 1
    return Sum / count


print "is bottom_nodes a bipartite set?",bipartite.is_bipartite_node_set(G, bottom_nodes) 	
print "is top_nodesa  bipartite set?",bipartite.is_bipartite_node_set(G, top_nodes) 	

print len(bottom_nodes)," bottom nodes",len(top_nodes)," top nodes"

print "Average subreddits moderated per moderator: ",mymean(G.degree_iter(bottom_nodes))
print "Average moderators per subreddit: ",mymean(G.degree_iter(top_nodes))
if export:
    nx.write_gexf(G,"C:\\Users\\Theseus\\Documents\\moderatorproject\\untouched.gexf")
    print "gexf exported"

pg1 = bipartite.projected_graph(G, bottom_nodes)
print "Unweighted moderator to moderator projection made"
print "Average unweighted degree: ",mymean(pg1.degree_iter())
if export:
    nx.write_gexf(pg1,"C:\\Users\\Theseus\\Documents\\moderatorproject\\bottoms.gexf")
    print "gexf exported"

pg2 = bipartite.projected_graph(G, top_nodes)
print "Unweighted subreddit to subreddit projection made"
print "Average unweighted degree: ",mymean(pg2.degree_iter())
if export:
    nx.write_gexf(pg2,"C:\\Users\\Theseus\\Documents\\moderatorproject\\tops.gexf")
    print "gexf exported"

wpg1 = bipartite.weighted_projected_graph(G, bottom_nodes)
print "Weighted bottom node projection made"
Esempio n. 44
0
    def describe(self, extra=False):
        """
        Provides a summary of graph statistics. Includes basic statistics like the number of nodes, edges,
        denstiy, and the average degree for one mode. Prints a string that contains each of the items that make up the summary.
        Density is calculated using one of the modes of the original bipartite network graph.

        **Parameters** :

        > *extra* : `bool`

        >> Runs the low efficiency algorithms, which can be resource-intensive on large networks.
        >> Recommended maximum network size for the low efficiency algorithms is around 100 nodes.

        **Returns** : `string`

        > Returns the descriptive string that contains information about the `MultiGraphPlus` object.

        """
        mode1 = self.mode1
        mode2 = self.mode2
        density = bipartite.density(self, bipartite.sets(self)[0])
        edges = self.number_of_edges()
        nodes_mode1 = 0
        nodes_mode2 = 0
        for n in self.nodes():
            if self.node[n]['type'] == mode1:
                nodes_mode1 += 1
            elif self.node[n]['type'] == mode2:
                nodes_mode2 += 1

        descriptives_nodes = "This is a bipartite network of types '{}' and '{}'.\n " \
                             "{} nodes are of the type '{}'.\n " \
                             "{} nodes are of the type '{}'.\n".format(str(mode1), str(mode2), str(nodes_mode1),
                                                                       str(mode1), str(nodes_mode2), str(mode2))
        descriptives_edges = "There are {} edges.\n".format(str(edges))
        descriptives_density = "Density: {}.\n".format(str(density))
        descriptives = descriptives_nodes + descriptives_edges + descriptives_density

        if extra:
            # Note: for each mode of the bipartite graph, degree and betweenness centrality are the same.
            # Keeping them both makes it easy to compare them and make sure they are the same.
            degree_mode1 = bipartite.degree_centrality(self, bipartite.sets(self)[0])
            degree_mode2 = bipartite.degree_centrality(self, bipartite.sets(self)[1])
            degree_mode1 = list(degree_mode1.values())
            degree_mode2 = list(degree_mode2.values())
            degree_mode1 = np.mean(degree_mode1)
            degree_mode2 = np.mean(degree_mode2)
            betweenness_mode1 = bipartite.betweenness_centrality(self, bipartite.sets(self)[0])
            betweenness_mode1 = list(betweenness_mode1.values())
            betweenness_mode1 = np.mean(betweenness_mode1)
            betweenness_mode2 = bipartite.betweenness_centrality(self, bipartite.sets(self)[1])
            betweenness_mode2 = list(betweenness_mode2.values())
            betweenness_mode2 = np.mean(betweenness_mode2)
            g = nx.Graph(self)
            projection = bipartite.projected_graph(g, bipartite.sets(g)[0])
            transitivity = nx.transitivity(projection)
            descriptives_transitivity = "Transitivity: {}.\n".format(str(transitivity))
            descriptives_degree_centrality = "Mean Degree Centrality for '{}': {}.\n" \
                                             "Mean Degree Centrality for '{}': {}.\n".format(str(mode1),
                                                                                             str(degree_mode1),
                                                                                             str(mode2),
                                                                                             str(degree_mode2))
            descriptives_btwn_centrality = "Mean Betweenness Centrality for '{}': {}.\n"\
                                           "Mean Betweenness Centrality for '{}': {}.\n".format(str(mode1),
                                                                                                str(betweenness_mode1),
                                                                                                str(mode2),
                                                                                                str(betweenness_mode2))
            descriptives = descriptives + descriptives_transitivity +\
                descriptives_degree_centrality + descriptives_btwn_centrality
        print(descriptives)
        return descriptives
Esempio n. 45
0
They represent observed attendance at 14 social events by 18 Southern women.
The graph is bipartite (clubs, women).
"""
import matplotlib.pyplot as plt
import networkx as nx
import networkx.algorithms.bipartite as bipartite

G = nx.davis_southern_women_graph()
women = G.graph['top']
clubs = G.graph['bottom']

print("Biadjacency tar_matrix")
print(bipartite.biadjacency_matrix(G, women, clubs))

# project bipartite graph onto women nodes
W = bipartite.projected_graph(G, women)
print('')
print("#Friends, Member")
for w in women:
    print('%d %s' % (W.degree(w), w))

# project bipartite graph onto women nodes keeping number of co-occurence
# the degree computed is weighted and counts the total number of shared contacts
W = bipartite.weighted_projected_graph(G, women)
print('')
print("#Friend meetings, Member")
for w in women:
    print('%d %s' % (W.degree(w, weight='weight'), w))

nx.draw(G)
plt.show()
    nodes_1 = list(dict.fromkeys(nodes_1))

    print("Build a bipartite graph ...")
    # Build a bipartite graph:
    G = nx.Graph()
    G.add_nodes_from(nodes_0, bipartite=0)  # disease
    G.add_nodes_from(nodes_1, bipartite=1)  # active substance

    for m in vdmdata_reduce.iterrows():
        enfermedad = m[1][0]
        sustancia = m[1][1]
        G.add_edge(enfermedad, sustancia)

    if type_nx == 'projected' and type_proj == 'icd':
        # Build Projected Graph Diseases
        GP = bipartite.projected_graph(G, nodes_0)
        print('Calculate Global properties for projected graph ' + type_proj)
        print("\n")
    elif type_nx == 'projected' and type_proj == 'atc':
        # Build Projected Graph Active Ingredients
        GP = bipartite.projected_graph(G, nodes_1)
        print('Calculate Global properties for projected graph ' + type_proj)
        print("\n")
    else:
        print('Calculate Global properties for bipartite network')
        print("\n")

    if type_nx == 'bipartite':
        print("Nodes Number : " + str(G.number_of_nodes()))
        print("\n")
        print("Edges Number : " + str(G.number_of_edges()))
Esempio n. 47
0
    return hostnames


for root,dirs,files in os.walk(args.target):
    for file in files:
	try:
	    pe = pefile.PE(os.path.join(root, file))
	except pefile.PEFormatError:
	    continue
        f_path = os.path.join(root, file)
        contents = os.popen("strings '{0}'".format(f_path)).read()
        hostnames = get_hostnames(contents)
        if len(hostnames):
            network.add_node(file,label=file ,color='blue', penwidth=3,bipartite=0)
        for hostname in hostnames:
            network.add_node(hostname,label=hostname,color='purple', penwidth=10,bipartite=1)
            network.add_edge(hostname, file ,penwidth=2)
	if hostnames:
	    print "Extracted hostname from:", file 
	    pprint.pprint(hostname)

write_dot(network, args.filename)
codes= set(n for n,d in network.nodes(data=True) if d['bipartite']==0)
hostname = set(network)-codes

codes = bipartite.projected_graph(network, codes)
hostname = bipartite.projected_graph(network, hostname)

write_dot(codes ,args.malware_pro)
write_dot(hostname ,args.hostname_pro)
Esempio n. 48
0
# check set of nodes is bipartite
X = set([1, 2, 3, 4])
bipartite.is_bipartite_node_set(B, X)

X = set(["A", "B", "C", "D", "E"])
bipartite.is_bipartite_node_set(B, X)

bipartite.sets(B)

# Projected Graphs
B = nx.Graph()
B.add_edges_from([("A", 1), ("B", 1), ("C", 1), ("D", 1), ("H", 1), ("B", 2),
                  ("C", 2), ("D", 2), ("E", 2), ("G", 2), ("E", 3), ("F", 3),
                  ("H", 3), ("J", 3), ("E", 4), ("I", 4), ("J", 4)])
X = set(["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"])
P = bipartite.projected_graph(B, X)
nx.draw(P)

X = set([1, 2, 3, 4])
P = bipartite.projected_graph(B, X)
nx.draw(P, with_labels=1)

# Weighted Projected Graphs
X = set([1, 2, 3, 4])
P = bipartite.weighted_projected_graph(B, X)
nx.draw(P, with_labels=1)

# generate network data
import pandas as pd
import numpy as np
import random
Esempio n. 49
0
These data were collected by Davis et al. in the 1930s.  
They represent observed attendance at 14 social events by 18 Southern women.  
The graph is bipartite (clubs, women).
"""
import networkx as nx
import networkx.algorithms.bipartite as bipartite

G = nx.davis_southern_women_graph()
women = G.graph['top']
clubs = G.graph['bottom']

print("Biadjacency matrix")
print(bipartite.biadjacency_matrix(G,women,clubs))

# project bipartite graph onto women nodes
W = bipartite.projected_graph(G, women)
print('') 
print("#Friends, Member")
for w in women:
    print('%d %s' % (W.degree(w),w))

# project bipartite graph onto women nodes keeping number of co-occurence
# the degree computed is weighted and counts the total number of shared contacts
W = bipartite.weighted_projected_graph(G, women)
print('') 
print("#Friend meetings, Member")
for w in women:
    print('%d %s' % (W.degree(w,weight='weight'),w))


def load_stock(stock):
	with open(PROCESSED_PATH+stock, 'r') as f:
		return pickle.load(f)
def load_user(user):
	with open(USER_PATH+user, 'r') as f:
		return pickle.load(f)

stocks = {f: load_stock(f) for f in listdir(PROCESSED_PATH) if isfile(join(PROCESSED_PATH,f))}
users = {f: load_user(f) for f in listdir(USER_PATH) if isfile(join(USER_PATH, f))}

# Bipartite graphs in networkx are implemented using a normal graph where nodes have an attribute
# value of either 0 or 1 depending on which graph it belongs to

G = nx.Graph()
# Add all stocks
G.add_nodes_from(stocks.keys(), bipartite=0)
# Add all users
G.add_nodes_from(users.keys(), bipartite=1)
for (symbol, stock_tweets) in stocks.iteritems():
	# Find the users that tweeted about this stock
	user_ids = set([tweet.author.id_str for tweet in stock_tweets])
	G.add_edges_from([(symbol, user) for user in user_ids if user in users.keys()])

# Create the user network from the bipartite network
stock_nodes = set(n for n,d in G.nodes(data=True) if d['bipartite']==0)
user_nodes = set(G) - stock_nodes
U = bipartite.projected_graph(G, user_nodes)

tx.size()
disciplines = df.SC_l.unique()
techniques = {det  for de in df.topics if de is not nan for det in de}

# <codecell>

df.SC_l

# <codecell>

len(techniques)

# <codecell>

disc = bi.projected_graph(tx, g2.nodes())

# <codecell>

sorted(disc.degree().iteritems(), key= operator.itemgetter(1))

# <markdowncell>

# ## Retrieving further literature if needed from WoS

# <codecell>

## generate searches that can be run back against WoS - it says it will take up to 5000 terms!

'"'+'" or "'.join([de for de,val in de_counts_sorted if val > 90 and val < 200]) + '"'
Esempio n. 52
0
            nx.set_node_attributes(g,'co',co)
                       
            d=nx.degree(g1)
            nx.set_node_attributes(g1,'d',d)
            d=nx.degree(g2)
            nx.set_node_attributes(g2,'d',d)
            d=nx.degree(g3)
            nx.set_node_attributes(g3,'d',d)
            d=nx.degree(g4)
            nx.set_node_attributes(g4,'d',d)
            d=nx.degree(g5)
            nx.set_node_attributes(g5,'d',d)
            
            #projected eigenvector centrality
            bio_nodes = set(n for n in g.nodes() if n < 1000 and n > 0)
            eg = bipartite.projected_graph(g, bio_nodes)
            ec_pro=nx.eigenvector_centrality(eg, 1000)
            nx.set_node_attributes(eg,'ec',ec_pro) 
#            ho=sh.structural_holes(g)
            
            for n in g.nodes():
                if(n < 1000 and n != -9):
                    
                    
                    #calculate number of partner types
                    n_bio = 0
                    n_npr = 0
                    n_gov = 0
                    n_fin = 0
                    n_pha = 0
                    n_oth = 0
Esempio n. 53
0
def folded_graph(B):
    
    bottom_nodes, top_nodes = bipartite.sets(B)
    F = bipartite.projected_graph(B, top_nodes)
    
    return F