def test_from_numpy_array_parallel_edges(self):
        """Tests that the :func:`networkx.from_numpy_array` function
        interprets integer weights as the number of parallel edges when
        creating a multigraph.

        """
        A = np.array([[1, 1], [1, 2]])
        # First, with a simple graph, each integer entry in the adjacency
        # matrix is interpreted as the weight of a single edge in the graph.
        expected = nx.DiGraph()
        edges = [(0, 0), (0, 1), (1, 0)]
        expected.add_weighted_edges_from([(u, v, 1) for (u, v) in edges])
        expected.add_edge(1, 1, weight=2)
        actual = nx.from_numpy_array(A, parallel_edges=True,
                                      create_using=nx.DiGraph())
        assert_graphs_equal(actual, expected)
        actual = nx.from_numpy_array(A, parallel_edges=False,
                                      create_using=nx.DiGraph())
        assert_graphs_equal(actual, expected)
        # Now each integer entry in the adjacency matrix is interpreted as the
        # number of parallel edges in the graph if the appropriate keyword
        # argument is specified.
        edges = [(0, 0), (0, 1), (1, 0), (1, 1), (1, 1)]
        expected = nx.MultiDiGraph()
        expected.add_weighted_edges_from([(u, v, 1) for (u, v) in edges])
        actual = nx.from_numpy_array(A, parallel_edges=True,
                                      create_using=nx.MultiDiGraph())
        assert_graphs_equal(actual, expected)
        expected = nx.MultiDiGraph()
        expected.add_edges_from(set(edges), weight=1)
        # The sole self-loop (edge 0) on vertex 1 should have weight 2.
        expected[1][1][0]['weight'] = 2
        actual = nx.from_numpy_array(A, parallel_edges=False,
                                      create_using=nx.MultiDiGraph())
        assert_graphs_equal(actual, expected)
    def test_from_numpy_matrix_type(self):
        A = np.matrix([[1]])
        G = nx.from_numpy_matrix(A)
        assert_equal(type(G[0][0]['weight']), int)

        A = np.matrix([[1]]).astype(np.float)
        G = nx.from_numpy_matrix(A)
        assert_equal(type(G[0][0]['weight']), float)

        A = np.matrix([[1]]).astype(np.str)
        G = nx.from_numpy_matrix(A)
        assert_equal(type(G[0][0]['weight']), str)

        A = np.matrix([[1]]).astype(np.bool)
        G = nx.from_numpy_matrix(A)
        assert_equal(type(G[0][0]['weight']), bool)

        A = np.matrix([[1]]).astype(np.complex)
        G = nx.from_numpy_matrix(A)
        assert_equal(type(G[0][0]['weight']), complex)

        A = np.matrix([[1]]).astype(np.object)
        assert_raises(TypeError, nx.from_numpy_matrix, A)

        G = nx.cycle_graph(3)
        A = nx.adj_matrix(G).todense()
        H = nx.from_numpy_matrix(A)
        assert_true(all(type(m) == int and type(n) == int for m, n in H.edges()))
        H = nx.from_numpy_array(A)
        assert_true(all(type(m) == int and type(n) == int for m, n in H.edges()))
 def identity_conversion(self, G, A, create_using):
     assert(A.sum() > 0)
     GG = nx.from_numpy_array(A, create_using=create_using)
     self.assert_equal(G, GG)
     GW = nx.to_networkx_graph(A, create_using=create_using)
     self.assert_equal(G, GW)
     GI = create_using.__class__(A)
     self.assert_equal(G, GI)
 def test_from_numpy_array_dtype(self):
     dt=[('weight',float),('cost',int)]
     A=np.array([[(1.0,2)]],dtype=dt)
     G=nx.from_numpy_array(A)
     assert_equal(type(G[0][0]['weight']),float)
     assert_equal(type(G[0][0]['cost']),int)
     assert_equal(G[0][0]['cost'],2)
     assert_equal(G[0][0]['weight'],1.0)
    def test_symmetric(self):
        """Tests that a symmetric array has edges added only once to an
        undirected multigraph when using :func:`networkx.from_numpy_array`.

        """
        A = np.array([[0, 1], [1, 0]])
        G = nx.from_numpy_array(A, create_using=nx.MultiGraph())
        expected = nx.MultiGraph()
        expected.add_edge(0, 1, weight=1)
        assert_graphs_equal(G, expected)
    def test_from_numpy_array_type(self):
        A=np.array([[1]])
        G=nx.from_numpy_array(A)
        assert_equal(type(G[0][0]['weight']),int)

        A=np.array([[1]]).astype(np.float)
        G=nx.from_numpy_array(A)
        assert_equal(type(G[0][0]['weight']),float)

        A=np.array([[1]]).astype(np.str)
        G=nx.from_numpy_array(A)
        assert_equal(type(G[0][0]['weight']),str)

        A=np.array([[1]]).astype(np.bool)
        G=nx.from_numpy_array(A)
        assert_equal(type(G[0][0]['weight']),bool)

        A=np.array([[1]]).astype(np.complex)
        G=nx.from_numpy_array(A)
        assert_equal(type(G[0][0]['weight']),complex)

        A=np.array([[1]]).astype(np.object)
        assert_raises(TypeError,nx.from_numpy_array,A)
Exemple #7
0
def get_graph_from_prob_matrix(p_mat: np.array,
                               thresh: float = None) -> nx.Graph:
    """
    Generates a NetworkX graph from probability matrix
    :param p_mat: matrix of edge probabilities
    :return:
    """
    n = p_mat.shape[0]  # number of rows / nodes

    if thresh is not None:
        rand_mat = np.ones((n, n)) * thresh
    else:
        rand_mat = np.random.rand(n, n)

    sampled_mat = rand_mat <= p_mat
    # sampled_mat = sampled_mat * sampled_mat.T  # to make sure it is symmetric

    sampled_mat = sampled_mat.astype(int)
    np.fill_diagonal(sampled_mat, 0)  # zero out the diagonals
    g = nx.from_numpy_array(sampled_mat, create_using=nx.Graph())
    return g
Exemple #8
0
def calc_fluidC(adj_matrix, nr_communities_range=(5, 40)):
    nx_G = nx.from_numpy_array(adj_matrix)
    for nr in range(nr_communities_range[0], nr_communities_range[1] + 1):
        communities = nx.algorithms.community.asyn_fluid.asyn_fluidc(nx_G,
                                                                     nr,
                                                                     seed=0)
        # search for optimal communities

    number_communities = max(communities, key=lambda x: communities[x]) + 1

    community_list = []
    for i in range(number_communities):
        grp_list = []
        for grp in communities:
            if communities[grp] == i:
                grp_list.append(grp)
        else:
            if grp_list:
                community_list.append(grp_list)

    return community_list
Exemple #9
0
def generate_summary(file_name, top_n=5):
    stop_words = stopwords.words('english')
    summarize_text = []

    sentences = read_article(file_name)

    sentence_similarity_martix = build_similarity_matrix(sentences, stop_words)

    sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_martix)
    scores = nx.pagerank(sentence_similarity_graph)
    ranked_sentence = sorted(((scores[i], s) for i, s in enumerate(sentences)),
                             reverse=True)
    print("Indexes of top ranked_sentence order are ", ranked_sentence)

    for i in range(top_n):
        summarize_text.append(" ".join(ranked_sentence[i][1]))

    print("Summarize Text: \n", ". ".join(summarize_text))


#generate_summary( "msft.txt", 2)
Exemple #10
0
def fuzzy_geom_graph(size, radius, deg, ret_coords=True, force_connected=True):
    for _ in range(10000):
        # sample coordinates
        x, y = coords = np.random.rand(2, size) / radius

        # build the adjacency matrix
        adj = np.zeros((size, size)).astype(np.bool)
        for i, (xi, yi, di) in enumerate(zip(x, y, deg)):
            # sample neighbors based on euclidian distance
            p = np.exp(-np.sqrt((xi - x) ** 2 + (yi - y) ** 2))
            other_nodes = [k for k in range(size) if k != i]
            p = p[other_nodes]
            p /= p.sum()
            neighbors = np.random.choice(other_nodes, size=di, replace=False, p=p)
            adj[i, neighbors] = True
        adj |= adj.T

        G = nx.from_numpy_array(adj)
        if not force_connected or nx.is_connected(G):
            return G
    print('failed graph generation fuzzy_geom_graph')
Exemple #11
0
    def get_bridge_bonds_matrix(self):
        """
        Returning a boolean matrix of size (n_defined_atoms, n_defined_atoms) representing whether bonds of the
        molecular graph are bridges.
        """

        # Converting the molecular graph to a NetworkX object
        nx_mol_graph = nx.from_numpy_array(self.get_adjacency_matrix())

        # Initialization of the output matrix of bridge bonds
        output_bridges_matrix = np.full(
            (self.get_n_atoms(), self.get_n_atoms()), False)

        # Extracting the list of bridges in the molecular simple graph
        bridges_list = list(nx.bridges(nx_mol_graph))

        for bridge in bridges_list:
            output_bridges_matrix[bridge[0], bridge[1]] = True
            output_bridges_matrix[bridge[1], bridge[0]] = True

        return output_bridges_matrix
Exemple #12
0
def draw_graph(adj=None, G = None, marginals=None,
               draw_edge_color=False, title=None,
               node_size=300, node_labels=None):

    node_color = marginals
    if G is None:
        assert adj is not None, "you have to provide either the adjacency matrix or the graph"        
        G = nx.from_numpy_array(adj)
    edge_color = G.number_of_edges()*[1]
    n = G.number_of_nodes()
    if adj is not None:
        edges = adj[np.triu_indices(n,1)]  # strict upper triangle inds
        if draw_edge_color:
            edge_color = edges[edges != 0].ravel().astype(float).tolist()
    if node_labels is not None:
        node_dict = dict([(i, str(node_labels[i])) for i in range(n)])
    else: node_dict = None
    nx.draw(G, node_color=marginals, edge_color = edge_color,
                     label=title, node_size = node_size,
                     labels=node_dict)
    plt.show()
Exemple #13
0
def generate_summary(file_name, top_n):
    stop_words = stopwords.words('english')
    summarize_text = []

    sentences =  ra.read_article(file_name)

    sentence_similarity_martix = sm.build_similarity_matrix(sentences, stop_words)

    sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_martix)
    scores = nx.pagerank(sentence_similarity_graph)

    ranked_sentence = sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True)
    if(top_n>len(ranked_sentence)):
        print("Entered number of sentences is greater than the actual summary")
        return


    for i in range(top_n):
      summarize_text.append(" ".join(ranked_sentence[i][1]))

    print("Summarize Text: \n", ". ".join(summarize_text))
Exemple #14
0
    def generate_summary(self):
        stop_words = stopwords.words('english')
        summarize_text = []

        sentences = self.read_article()

        sentence_similarity_martix = self.build_similarity_matrix(
            sentences, stop_words)

        sentence_similarity_graph = nx.from_numpy_array(
            sentence_similarity_martix)
        scores = nx.pagerank(sentence_similarity_graph)

        ranked_sentence = sorted(
            ((scores[i], s) for i, s in enumerate(sentences)), reverse=True)

        for i in range(self.top_n):
            summarize_text.append(" ".join(ranked_sentence[i][1]))

        summary = "".join(summarize_text)
        return summary
def generate_summary(file_name):
    """
    The main function to generate summary by finding similarity among sentences and ranking them
    :param file_name: filename and path
    :return: Summarized text
    """
    summarize_text = []
    sentences = read_article(file_name)
    if len(sentences) > 4:
        num_sentences = int(len(sentences) / 3)
    else:
        num_sentences = len(sentences)
    sentence_similarity_martix = build_similarity_matrix(sentences)
    sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_martix)
    scores = nx.pagerank(sentence_similarity_graph)
    ranked_sentence = sorted(((scores[i], s) for i, s in enumerate(sentences)),
                             reverse=True)
    for i in range(num_sentences):
        summarize_text.append(" ".join(ranked_sentence[i][1]))
    final_text = '. '.join(summarize_text)
    return final_text
def generate_summary(text, n=5):
    summary = []

    # Sentence-tokenize text
    sentences = process_input(text)

    # Generate similiarity matrix
    sim_mx = make_similiarity_matrix(sentences)

    # Rank sentences in the matrix
    sim_graph = nx.from_numpy_array(sim_mx)
    scores = nx.pagerank(sim_graph)

    # Sort rank + pick top ranked sentences
    ranked = sorted(((scores[i], s) for i, s in enumerate(sentences)),
                    reverse=True)

    for i in range(n):
        summary += [ranked[i][1]]

    return '. '.join(summary)
def extract_links(n, connections, link_cap):
    A = np.zeros((n,n))

    for a,c in zip(A,connections):
        a[c]=1

    G=nx.from_numpy_array(A, create_using=nx.DiGraph())
    edges=list(G.edges)
    capacities_links = []
    # The edges 0-2 or 2-0 can exist. They are duplicated (up and down) and they must have same capacity.
    for e in edges:
        if str(e[0])+':'+str(e[1]) in link_cap:
            capacity = link_cap[str(e[0])+':'+str(e[1])]
            capacities_links.append(capacity)
        elif str(e[1])+':'+str(e[0]) in link_cap:
            capacity = link_cap[str(e[1])+':'+str(e[0])]
            capacities_links.append(capacity)
        else:
            print("ERROR IN THE DATASET!")
            exit()
    return edges, capacities_links
Exemple #18
0
def erdos_renyi_graph(n, k_avg):
    r"""
    Generates an Erdos-Renyi random graph by randomly connecting two nodes, $i$
    and $j$, with a probability $p$, corresponding to the specified average
    degree, $\langle k \rangle$.

    Parameters
    ----------
    n (int): number of nodes
    k_avg (float): desired average degreee of the resulting network

    Returns
    -------
    g (nx.Graph): a networkx graph

    """

    a = np.triu(np.random.rand(n, n) < k_avg / (n - 1), 1)
    g = nx.from_numpy_array(np.array(a + a.T, dtype=int))

    return g
Exemple #19
0
    def get_articulation_points_vector(self):
        """
        Returning a boolean vector representing whether the atoms of the molecular graph are articulation points
        (vertices whose removal would create two connected components).
        :return:
        """

        # Articulation points vector initialization
        art_points_vector = np.zeros((self.get_n_atoms(), ))

        # Converting the molecular graph to a NetworkX object
        nx_mol_graph = nx.from_numpy_array(self.get_adjacency_matrix())

        # Computing articulation points
        art_points_ids = nx.articulation_points(nx_mol_graph)

        # Setting output vector
        for art_points_id in art_points_ids:
            art_points_vector[art_points_id] = 1

        return art_points_vector
    def summarize(self, paragraph, mode="clustering", keep_sentences=5):
        origin_sentence = sent_tokenize(paragraph)
        sentences = self.clearner.preprocessing(paragraph)
        sent_vectors = self.vectorizer.vectorize(sentences)  # row vector

        if mode == "clustering":
            kmeans = KMeans(n_clusters=keep_sentences)
            kmeans = kmeans.fit(sent_vectors)
            avg = []
            for j in range(keep_sentences):
                idx = np.where(kmeans.labels_ == j)[0]
                avg.append(np.mean(idx))
            closest, _ = pairwise_distances_argmin_min(kmeans.cluster_centers_,
                                                       sent_vectors)
            # top_sentences = sorted(range(n_clusters), key=lambda k: avg[k])
            top_sentences = sorted(closest)

        elif mode == "lsa":
            # input: column vector
            sent_vectors_t = sent_vectors.T
            U, S, VT = np.linalg.svd(sent_vectors_t)
            saliency_vec = np.dot(np.square(S), np.square(VT))
            top_sentences = saliency_vec.argsort()[-keep_sentences:][::-1]
            top_sentences.sort()

        else:
            sim_mat = np.zeros([len(sentences), len(sentences)])
            for i in range(len(sentences)):
                for j in range(len(sentences)):
                    if i != j:
                        sim_mat[i][j] = cosine_similarity(
                            sent_vectors[i].reshape(1, -1),
                            sent_vectors[j].reshape(1, -1))[0][0]

            nx_graph = nx.from_numpy_array(sim_mat)
            scores = list(nx.pagerank(nx_graph).values())
            top_sentences = np.argsort(scores)[-keep_sentences:][::-1]
            top_sentences.sort()
        summary = " ".join([origin_sentence[i] for i in top_sentences])
        return summary, top_sentences
def generate_summary(file_or_url_string, out_file_string, top_n=5):
    stop_words = stopwords.words('english')
    summarize_text = []

    # Step 1 - Read text anc split it
    # article = read_article(file_or_url_string)

    article = read_file_or_web_contents(file_or_url_string)
    # article = read_article_single_or_multi_lines(file_or_url_string)

    sentences = process_sentences(article)

    # Step 2 - Generate Similary Martix across sentences
    sentence_similarity_martix = build_similarity_matrix(sentences, stop_words)

    # Step 3 - Rank sentences in similarity martix
    sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_martix)
    scores = nx.pagerank(sentence_similarity_graph)

    # Step 4 - Sort the rank and pick top sentences
    sentences_list = enumerate(sentences)

    ranked_sentence = sorted(((scores[i], s) for i, s in enumerate(sentences)),
                             reverse=True)
    print("Indexes of top ranked_sentence order are ", ranked_sentence)

    for i in range(top_n):
        summarize_text.append(" ".join(ranked_sentence[i][1]))

    # Step 5 - Offcourse, output the summarize texr
    print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Summarize Text: ")
    print(summarize_text)

    # Step 6 - Output to a file
    with open(out_file_string, 'w') as outfile:
        #for item in summarize_text:
        #    outfile.write("%s\n" % item)
        outfile.write("\n".join(str(item) for item in summarize_text))

    return summarize_text
Exemple #22
0
def find_minimum_spanning_tree(graph: nx.Graph) -> nx.Graph:
    """
        find_minimum_spanning_tree methods used to fing mimimum spanning tree
        for given graph
        :param gr: nx.Graph type where we are looking for minimum spanning tree

        :return: minimum spanning tree as a grap
        :rtype: nx.Graph
    """
    adj_matrix = nx.to_numpy_array(graph)
    no_nodes = adj_matrix.shape[0]
    selected = np.zeros(no_nodes)
    minimum_spanning_tree = np.zeros((no_nodes, no_nodes))
    no_edge = 0
    selected[0] = True

    while no_edge < no_nodes - 1:
        minimum = sys.maxsize
        vertex_begin = 0
        vertex_end = 0
        for first_dim_iter in range(no_nodes):
            if selected[first_dim_iter]:
                for second_dim_iter in range(no_nodes):
                    if not selected[second_dim_iter] and adj_matrix[
                            first_dim_iter][second_dim_iter]:
                        if minimum > adj_matrix[first_dim_iter][
                                second_dim_iter]:
                            minimum = adj_matrix[first_dim_iter][
                                second_dim_iter]
                            vertex_begin = first_dim_iter
                            vertex_end = second_dim_iter

        minimum_spanning_tree[vertex_begin][vertex_end] = adj_matrix[
            vertex_begin][vertex_end]
        minimum_spanning_tree[vertex_end][vertex_begin] = adj_matrix[
            vertex_begin][vertex_end]
        selected[vertex_end] = True
        no_edge += 1

    return nx.from_numpy_array(minimum_spanning_tree)
def test_held_karp_ascent_asymmetric_3():
    """
    Tests the ascent method using a truly asymmetric graph with a fractional
    solution for which the solution has been brute forced.

    In this graph their are two different optimal, integral solutions (which
    are also the overall atsp solutions) to the Held Karp relaxation. However,
    this particular graph has two different tours of optimal value and the
    possible solutions in the held_karp_ascent function are not stored in an
    ordered data structure.
    """
    import networkx.algorithms.approximation.traveling_salesman as tsp

    np = pytest.importorskip("numpy")

    G_array = np.array([
        [0, 1, 5, 2, 7, 4],
        [7, 0, 7, 7, 1, 4],
        [4, 7, 0, 9, 2, 1],
        [7, 2, 7, 0, 4, 4],
        [5, 5, 4, 4, 0, 3],
        [3, 9, 1, 3, 4, 0],
    ])

    solution1_edges = [(0, 3), (1, 4), (2, 5), (3, 1), (4, 2), (5, 0)]

    solution2_edges = [(0, 3), (3, 1), (1, 4), (4, 5), (2, 0), (5, 2)]

    G = nx.from_numpy_array(G_array, create_using=nx.DiGraph)
    opt_hk, z_star = tsp.held_karp_ascent(G)

    assert round(opt_hk, 2) == 13.00
    # Check that the z_stars are the same
    solution1 = nx.DiGraph()
    solution1.add_edges_from(solution1_edges)
    solution2 = nx.DiGraph()
    solution2.add_edges_from(solution2_edges)
    assert nx.utils.edges_equal(z_star.edges,
                                solution1.edges) or nx.utils.edges_equal(
                                    z_star.edges, solution2.edges)
def extract_links(num_nodes, connections, link_capacity_dict):
    """

    :param num_nodes:
    :param connections:
    :param link_capacity_dict:
    :return:
    """
    # An adjacency matrix representation of a graph
    grph_adjcny_mtrx = np.zeros((num_nodes, num_nodes))

    #
    for adjacencies, connection_set in zip(grph_adjcny_mtrx, connections):
        # For adjacencies row n of the matrix, corresponding to node n, set a value of 1 for each
        # position in the row corresponding to the other nodes that node n has a link to.
        adjacencies[connection_set] = 1

    # Given the adjacency matrix, construct a directed graph.
    graph = nx.from_numpy_array(grph_adjcny_mtrx, create_using=nx.DiGraph())
    # From the graph, "Edges are represented as links between nodes ...". The links is a list
    # of tuples representing the connections_lists from node n to node m, and node m to node n.
    links = list(graph.edges)
    # The link_capacities is a list of the capacities of the links in order of the the connections_lists
    # in links.
    link_capacities = []
    # The links are duplicated from n to m and m to n, so they must have the same capacity in both
    # directions. The link_capacity_dict keys are of the form n:m and m:n.
    for link in links:
        if str(link[0]) + ':' + str(link[1]) in link_capacity_dict:
            capacity = link_capacity_dict[str(link[0]) + ':' + str(link[1])]
            link_capacities.append(capacity)
        elif str(link[1]) + ':' + str(link[0]) in link_capacity_dict:
            capacity = link_capacity_dict[str(link[1]) + ':' + str(link[0])]
            link_capacities.append(capacity)
        else:
            raise Exception(
                'Error in dataset - link not found in link capacities - ',
                link)

    return links, link_capacities
Exemple #25
0
 def cooc_2d(self):
     """
     Calculates the number of co-occurrences of the index phrases in the
     text (output as the matrix ``cooc_mat``), as well as records the first
     time a co-occurrence occurred in the text (``dist_mat``).
     """
     dim = len(self.index_labels)
     cooc_mat = np.zeros((dim, dim))
     first_cooc = np.zeros((dim, dim))
     sentences = self.windows
     timeline = {}
     for sent, num in tqdm_notebook(
             list(zip(sentences, range(1,
                                       len(sentences) + 1)))):
         joined_sent = ' '.join(sent)
         for i in range(dim):
             if self.index_labels[i] in joined_sent:
                 cooc_mat[i, i] += 1
                 if first_cooc[i, i] == 0:
                     first_cooc[i, i] = num
                 for j in range(i + 1, dim):
                     if self.index_labels[j] in joined_sent:
                         cooc_mat[(np.array([i, j]), np.array([j, i]))] += 1
                         if first_cooc[i, j] == 0:
                             first_cooc[(np.array([i, j]),
                                         np.array([j, i]))] = num
                             timeline[tuple(self.index_labels[np.array(
                                 [i, j])])] = num
     first_cooc[first_cooc == 0] = np.inf
     self.cutoff = len(sentences)
     self.dist_mat = first_cooc
     self.cooc_mat = cooc_mat
     self.timeline = timeline
     # make graph
     G = nx.from_numpy_array(cooc_mat)
     G.remove_edges_from(nx.selfloop_edges(G))
     name_mapping = {i: label for i, label in enumerate(self.index_labels)}
     nx.relabel_nodes(G, name_mapping, copy=False)
     self.graph = G
     return cooc_mat, first_cooc, timeline
Exemple #26
0
def get_smoothnes_kNN(embeddings, energies, K):
    """ kNN based graph for smoothness calc

    Args:
        embeddings ([type]): [description]
        energies ([type]): [description]
        K ([type]): [description]

    Returns:
        [type]: [description]
    """

    N = embeddings.shape[0]

    energies = energies.reshape(N, 1)

    # get kNN graph
    print("getting kNN graph")
    A_mat = kneighbors_graph(embeddings, n_neighbors=K,
                             mode='connectivity').todense()

    # make symmetric
    A_mat = A_mat + A_mat.T
    A_mat = A_mat.clip(0, 1)

    nn_graph = nx.from_numpy_array(A_mat)

    print("computing combinatorial graph laplacian")
    L_mat = nx.laplacian_matrix(nn_graph).todense()

    # compute smoothness index
    print("computing smoothness value")
    lap_smooth = np.matmul(L_mat, energies)
    lap_smooth = np.matmul(energies.T, lap_smooth)
    signal_dot = np.matmul(energies.T, energies)
    lap_smooth = lap_smooth / signal_dot

    print("smoothness for K={}: {}".format(K, lap_smooth.item()))

    return lap_smooth.item()
Exemple #27
0
def generate_summary(file_name, top_n=5):
    stop_words = stopwords.words('english')
    summarize_text = []
    path = 'Add/path'
    for filename in os.listdir(path):
        if fnmatch.fnmatch(filename, '*.story'):
            print("Filename", filename)
            file_name = os.path.join(path, filename)
            # Step 1 - Read text and split it
            sentences = read_article(file_name)

            # Step 2 - Generate Similary Martix across sentences
            sentence_similarity_martix = build_similarity_matrix(
                sentences, stop_words)

            # Step 3 - Rank sentences in similarity martix
            sentence_similarity_graph = nx.from_numpy_array(
                sentence_similarity_martix)
            scores = nx.pagerank(sentence_similarity_graph,
                                 alpha=0.85,
                                 personalization=None,
                                 max_iter=10000,
                                 tol=1e-06,
                                 nstart=None,
                                 weight='weight',
                                 dangling=None)

            # Step 4 - Sort the rank and pick top sentences
            ranked_sentence = sorted(
                ((scores[i], s) for i, s in enumerate(sentences)),
                reverse=True)
            print("Indexes of top ranked_sentence order are ", ranked_sentence)

            for i in range(top_n):
                summarize_text.append(" ".join(ranked_sentence[i][1]))

            # Step 5 - Output summarized text
            print('\n')
            print("Summarize Text: \n", ". ".join(summarize_text))
            print('\n\n')
def DocumentSimalarity(Filepath,CSVFILE,Topic):
    df=pd.read_csv(CSVFILE, low_memory=False)
    df=df[df['topic']==Topic]
    df=df[df['pdf_json_files'].notnull()]
    PubPaths=df['pdf_json_files'].tolist()
    df.loc[df['pmc_json_files'].notnull(),'Full PMC']=True
    df.loc[df['pdf_json_files'].notnull(),'Full PDF']=True
    df.loc[df['pmc_json_files'].notnull(),'Full PDF']=False#### look at PMC if both PMC and PDF are available
    #df.loc[df['pmc_json_files'].isnull(),'Full PMC']=False
    Titles=df[df['Full PMC']==True]['title'].tolist()
    Titles.extend(df[df['Full PDF']==True]['title'].tolist())

    PubPaths=df[df['Full PMC']==True]['pmc_json_files'].tolist()
    PDFPapers=df[df['Full PDF']==True]['pdf_json_files'].tolist()
    PubPaths.extend(PDFPapers)
#del df
    Corpus=[]
    nlpsci=InitSciSpacy();
    nlpsci.disable_pipes("parser","ner")

    for pub in PubPaths:
        BodyText=[]
        pub=pub.split("; ")
        for p in pub:
            #p=p.replace(" ","")
            print(p)
            SkimmedText=SkimallText(Filepath+p,nlpsci)
            if len(SkimmedText)==0:continue
            BodyText.extend(SkimmedText)####Full text not just the abstract
        Doc=".".join(BodyText)
        Corpus.append(Doc)
    tfidf_vectorizer = TfidfVectorizer()
    tfidf = tfidf_vectorizer.fit_transform(Corpus)
    tfidf_feature_names = tfidf_vectorizer.get_feature_names()

    SimilarityArray=cosine_similarity(tfidf, tfidf)
    print(len(PubPaths),SimilarityArray.shape)
    nx_graph = nx.from_numpy_array(SimilarityArray)#### Convert similarity matrix into a graph
    scores = nx.pagerank(nx_graph)
    return scores,Titles
Exemple #29
0
def value_graph_laplacians():
    n_states = 8
    n_actions = 2

    det_pis = utils.get_deterministic_policies(n_states, n_actions)
    N = len(det_pis)
    print('n pis: {}'.format(N))
    for i in range(1):
        mdp = utils.build_random_mdp(n_states, n_actions, 0.5)

        values = [utils.value_functional(mdp.P, mdp.r, pi, mdp.discount).squeeze() for pi in det_pis]
        Vs = np.stack(values).reshape((N, n_states))
        A = graph.mdp_topology(det_pis)

        W = np.exp(-np.linalg.norm(Vs[None, :, :] - Vs[:, None, :], ord=np.inf, axis=-1)+1e-8)

        # mVs = np.mean(Vs, axis=0)  # n_states
        # W = np.dot((Vs - mVs) , (Vs - mVs).T)
        adj = W * A

        G = nx.from_numpy_array(adj)
        pos = nx.spectral_layout(G) #, iterations=500)
        plt.figure(figsize=(16,16))
        nx.draw(G, pos, node_color=[np.sum(v) for v in values], node_size=150)
        plt.savefig('figs/value_graphs/{}-value_graph-{}-{}.png'.format(i, n_states, n_actions))
        plt.close()

        u, v = graph_laplacian_spectra(adj)
        plt.figure(figsize=(8,8))
        plt.bar(range(len(u)), u)
        plt.savefig('figs/value_graphs/{}-lap.png'.format(i))
        plt.close()

        plt.figure(figsize=(16,16))
        n = 5
        for j in range(n*n):
            plt.subplot(n,n,j+1)
            nx.draw(G, pos, node_color=u[10*j] * v[10*j], node_size=150)
        plt.savefig('figs/value_graphs/{}-spectra.png'.format(i, n_states, n_actions))
        plt.close()
Exemple #30
0
def sample(prediction_step, n_balls, _delta_T=0.001, sample_freq=100):
    """
    This function generate training data for IVP(init value problem) prediction.
    Sampling time intervel is (_delta_T * sample_freq), i.e. regular intervel.
    Input:
        --prediction_step: int
        --n_balls
        --_delta_T: minimum time intervel of simulation
        --sample_freq: 
    Notices:
    (_delta_T*sample_freq) is the time intervel of output data
    """
    # 开始采样的时间点随机设置,增加样本的多样性
    sample_t0 = np.random.choice(range(10, 500))

    T = (sample_t0 + prediction_step) * sample_freq
    model = SpringSim(n_balls=n_balls, _delta_T=_delta_T)
    pos, vel, adj = model.sample_trajectory(T, sample_freq)
    pos = pos[sample_t0:]
    vel = vel[sample_t0:]

    G = nx.from_numpy_array(adj)
    edge_index = torch.LongTensor(np.array(G.edges()).T)
    edge_index = tg.utils.to_undirected(edge_index)

    pos_0 = torch.Tensor(pos[0])
    vel_0 = torch.Tensor(vel[0])

    pos_res = torch.Tensor(pos[1:])
    vel_res = torch.Tensor(vel[1:])

    delta_t = torch.arange(prediction_step) * (_delta_T * sample_freq)
    data = Data(num_nodes=n_balls,
                edge_index=edge_index,
                pos_0=pos_0.transpose(0, 1),
                pos_res=pos_res.transpose(1, 2),
                vel_0=vel_0.transpose(0, 1),
                vel_res=vel_res.transpose(1, 2),
                delta_t=delta_t)
    return data
Exemple #31
0
def summarize(content, isFile):
    sentences = []
    if isFile:
        sentences = read_data(content)
    else:
        sentences.append(sent_tokenize(content))

    word_embeddings = get_word_embeddings()
    output = []
    for item in sentences:
        cleaned_sentences = clean_sentences(item)
        sentence_vectors = get_sentence_vectors(cleaned_sentences,
                                                word_embeddings)

        # similarity matrix
        sim_mat = np.zeros([len(item), len(item)])
        for i in range(len(item)):
            for j in range(len(item)):
                if i != j:
                    sim_mat[i][j] = \
                    cosine_similarity(sentence_vectors[i].reshape(1, 100), sentence_vectors[j].reshape(1, 100))[
                        0, 0]

        nx_graph = nx.from_numpy_array(sim_mat)
        scores = nx.pagerank(nx_graph)
        ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(item)),
                                  reverse=True)

        # Specify number of sentences to form the summary
        sn = 3

        # Generate summary
        result = []
        for i in range(sn):
            # print(ranked_sentences[i][1])
            if len(ranked_sentences) > i:
                result.append(ranked_sentences[i][1])
        output.append(result)

    return output
def text_rank(sentences: list, word_embeddings: dict) -> dict:
    """
		Input: List, Dict
		Output: Dict
		Takes a list of sentences and Glove word embeddings as input and returns a dictionary containing sentences index as key and rank as value.
		The ranking is done based on the PageRank algorithm
	"""
    # Clean sentences for PageRank algorithm.
    clean_sentences = pd.Series(sentences).str.replace("[^a-zA-Z]", " ")
    clean_sentences = [s.lower() for s in clean_sentences]
    clean_sentences = [remove_stopwords(r) for r in clean_sentences]

    # Replace each word with Glove embeddings. The Sentence vector is the average of the sum of embeddings of all words in that
    # sentence.
    sentence_vectors = []
    for i in clean_sentences:
        if len(i) != 0:
            v = sum(
                [word_embeddings.get(w, np.zeros((100, )))
                 for w in i.split()]) / (len(i.split()) + 0.001)
        else:
            v = np.zeros((100, ))
        sentence_vectors.append(v)

    # Initialize a similarity matrix for pair of sentences
    sim_mat = np.zeros([len(sentences), len(sentences)])

    # Calculate cosine similarity for each pair of sentences
    for i in range(len(sentences)):
        for j in range(len(sentences)):
            if i != j:
                sim_mat[i][j] = cosine_similarity(
                    sentence_vectors[i].reshape(1, 100),
                    sentence_vectors[j].reshape(1, 100))[0, 0]

    # Create a PageRank graph using similarity matrix
    nx_graph = nx.from_numpy_array(sim_mat)
    scores = nx.pagerank(nx_graph)

    return scores
Exemple #33
0
def generate_summary(text, top_n):
    stop_words = stopwords.words('english')
    summarize_text = []

    # Step 1 - Read text and split it
    sentences = format_text(text)

    # Step 2 - Generate Similary Martix across sentences
    sentence_similarity_martix = build_similarity_matrix(sentences, stop_words)

    # Step 3 - Rank sentences in similarity martix
    sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_martix)
    scores = nx.pagerank(sentence_similarity_graph)

    # Step 4 - Sort the rank and pick top sentences
    ranked_sentence = sorted(((scores[i], s) for i, s in enumerate(sentences)),
                             reverse=True)
    for i in range(top_n):
        summarize_text.append(" ".join(ranked_sentence[i][1]))

    # Step 5 - Offcourse, output the summarize texr
    print("Summarize Text: ", ".\n\nSAD ".join(summarize_text))
Exemple #34
0
def ExponentialTwoGraph(size: int) -> nx.DiGraph:
    """Generate graph topology such that each points only
    connected to a point such that the index difference is the power of 2.

    Example: A ExponentialTwoGraph with 12 nodes:

    .. plot::
        :context: close-figs

        >>> import networkx as nx
        >>> from bluefog.common import topology_util
        >>> G = topology_util.ExponentialTwoGraph(12)
        >>> nx.draw_circular(G)
    """
    assert size > 0
    x = np.array([1.0 if i & (i - 1) == 0 else 0 for i in range(size)])
    x /= x.sum()
    topo = np.empty((size, size))
    for i in range(size):
        topo[i] = np.roll(x, i)
    G = nx.from_numpy_array(topo, create_using=nx.DiGraph)
    return G
Exemple #35
0
    def as_graph(self, directed=True):

        if self.normalized_difference.ndim > 2:
            raise MarkovError("You can only graph one-step chains.")

        try:
            import networkx as nx
        except ImportError:
            nx = None

        if nx is None:
            print("Please install networkx with `pip install networkx`.")
            return

        if directed:
            alg = nx.DiGraph
        else:
            alg = nx.Graph

        G = nx.from_numpy_array(self.normalized_difference, create_using=alg)
        nx.set_node_attributes(G, self._state_dict, 'state')
        return G
Exemple #36
0
def test_get_metrics(metric):
    """
    Test various wrappers for getting nx graph metrics
    """
    base_dir = str(Path(__file__).parent/"examples")
    est_path = f"{base_dir}/miscellaneous/sub-0021001_rsn-Default_nodetype-parc_model-sps_template-MNI152_T1_thrtype-DENS_thr-0.19.npy"

    in_mat = np.load(est_path)
    G = nx.from_numpy_array(in_mat)
    ci = np.ones(in_mat.shape[0])
    metric_list_names = []
    net_met_val_list_final = []


    if metric == 'participation':
        metric_list_names, net_met_val_list_final = \
            netstats.get_participation(in_mat, ci, metric_list_names, net_met_val_list_final)
        assert len(metric_list_names) == len(netstats.participation_coef(in_mat, ci))+1
        assert len(net_met_val_list_final) == len(netstats.participation_coef(in_mat, ci))+1
    elif metric == 'diversity':
        metric_list_names, net_met_val_list_final = \
            netstats.get_diversity(in_mat, ci, metric_list_names, net_met_val_list_final)
        assert len(metric_list_names) == np.shape(netstats.diversity_coef_sign(in_mat, ci))[1]+1
        assert len(net_met_val_list_final) == np.shape(netstats.diversity_coef_sign(in_mat, ci))[1]+1
    elif metric == 'local_efficiency':
        metric_list_names, net_met_val_list_final = \
            netstats.get_local_efficiency(G, metric_list_names, net_met_val_list_final)
        assert len(metric_list_names) == len(netstats.local_efficiency(G))+1
        assert len(net_met_val_list_final) == len(netstats.local_efficiency(G))+1
    elif metric == 'comm_centrality':
        metric_list_names, net_met_val_list_final = \
            netstats.get_comm_centrality(G, metric_list_names, net_met_val_list_final)
        assert len(metric_list_names) == len(nx.algorithms.communicability_betweenness_centrality(G))+1
        assert len(net_met_val_list_final) == len(nx.algorithms.communicability_betweenness_centrality(G))+1
    elif metric == 'rich_club_coeff':
        metric_list_names, net_met_val_list_final = \
            netstats.get_rich_club_coeff(G, metric_list_names, net_met_val_list_final)
        assert len(metric_list_names) == len(nx.algorithms.rich_club_coefficient(G))+1
        assert len(net_met_val_list_final) == len(nx.algorithms.rich_club_coefficient(G))+1