def test_weight_keyword(self):
     WP4 = nx.Graph()
     WP4.add_edges_from( (n,n+1,dict(weight=0.5,other=0.3)) for n in range(3) )
     P4 = path_graph(4)
     A = nx.to_numpy_array(P4)
     np_assert_equal(A, nx.to_numpy_array(WP4,weight=None))
     np_assert_equal(0.5*A, nx.to_numpy_array(WP4))
     np_assert_equal(0.3*A, nx.to_numpy_array(WP4,weight='other'))
 def test_numpy_multigraph(self):
     G=nx.MultiGraph()
     G.add_edge(1,2,weight=7)
     G.add_edge(1,2,weight=70)
     A=nx.to_numpy_array(G)
     assert_equal(A[1,0],77)
     A=nx.to_numpy_array(G,multigraph_weight=min)
     assert_equal(A[1,0],7)
     A=nx.to_numpy_array(G,multigraph_weight=max)
     assert_equal(A[1,0],70)
Exemple #3
0
def test_tnet_to_nx():
    df = pd.DataFrame({'i': [0, 0], 'j': [1, 2], 't': [0, 1]})
    dfnx = teneto.utils.tnet_to_nx(df, t=0)
    G = nx.to_numpy_array(dfnx)
    if not G.shape == (2, 2):
        raise AssertionError()
    if not G[0, 1] == 1:
        raise AssertionError()
    def test_dtype_int_multigraph(self):
        """Test that setting dtype int actually gives an integer array.

        For more information, see GitHub pull request #1363.

        """
        G = nx.MultiGraph(nx.complete_graph(3))
        A = nx.to_numpy_array(G, dtype=int)
        assert_equal(A.dtype, int)
    def test_nodelist(self):
        """Conversion from graph to array to graph with nodelist."""
        P4 = path_graph(4)
        P3 = path_graph(3)
        nodelist = list(P3)
        A = nx.to_numpy_array(P4, nodelist=nodelist)
        GA = nx.Graph(A)
        self.assert_equal(GA, P3)

        # Make nodelist ambiguous by containing duplicates.
        nodelist += [nodelist[0]]
        assert_raises(nx.NetworkXError, nx.to_numpy_array, P3, nodelist=nodelist)
Exemple #6
0
def spectral_layout(G, weight='weight', scale=1, center=None, dim=2):
    """Position nodes using the eigenvectors of the graph Laplacian.

    Parameters
    ----------
    G : NetworkX graph or list of nodes
        A position will be assigned to every node in G.

    weight : string or None   optional (default='weight')
        The edge attribute that holds the numerical value used for
        the edge weight.  If None, then all edge weights are 1.

    scale : number (default: 1)
        Scale factor for positions.

    center : array-like or None
        Coordinate pair around which to center the layout.

    dim : int
        Dimension of layout.

    Returns
    -------
    pos : dict
        A dictionary of positions keyed by node

    Examples
    --------
    >>> G = nx.path_graph(4)
    >>> pos = nx.spectral_layout(G)

    Notes
    -----
    Directed graphs will be considered as undirected graphs when
    positioning the nodes.

    For larger graphs (>500 nodes) this will use the SciPy sparse
    eigenvalue solver (ARPACK).
    """
    # handle some special cases that break the eigensolvers
    import numpy as np

    G, center = _process_params(G, center, dim)

    if len(G) <= 2:
        if len(G) == 0:
            pos = np.array([])
        elif len(G) == 1:
            pos = np.array([center])
        else:
            pos = np.array([np.zeros(dim), np.array(center) * 2.0])
        return dict(zip(G, pos))
    try:
        # Sparse matrix
        if len(G) < 500:  # dense solver is faster for small graphs
            raise ValueError
        A = nx.to_scipy_sparse_matrix(G, weight=weight, dtype='d')
        # Symmetrize directed graphs
        if G.is_directed():
            A = A + np.transpose(A)
        pos = _sparse_spectral(A, dim)
    except (ImportError, ValueError):
        # Dense matrix
        A = nx.to_numpy_array(G, weight=weight)
        # Symmetrize directed graphs
        if G.is_directed():
            A += A.T
        pos = _spectral(A, dim)

    pos = rescale_layout(pos, scale) + center
    pos = dict(zip(G, pos))
    return pos
 def test_identity_digraph_array(self):
     """Conversion from digraph to array to digraph."""
     A = nx.to_numpy_array(self.G2)
     self.identity_conversion(self.G2, A, nx.DiGraph())
Exemple #8
0
def google_matrix(
    G, alpha=0.85, personalization=None, nodelist=None, weight="weight", dangling=None
):
    """Returns the Google matrix of the graph.

    Parameters
    ----------
    G : graph
      A NetworkX graph.  Undirected graphs will be converted to a directed
      graph with two directed edges for each undirected edge.

    alpha : float
      The damping factor.

    personalization: dict, optional
      The "personalization vector" consisting of a dictionary with a
      key some subset of graph nodes and personalization value each of those.
      At least one personalization value must be non-zero.
      If not specfiied, a nodes personalization value will be zero.
      By default, a uniform distribution is used.

    nodelist : list, optional
      The rows and columns are ordered according to the nodes in nodelist.
      If nodelist is None, then the ordering is produced by G.nodes().

    weight : key, optional
      Edge data key to use as weight.  If None weights are set to 1.

    dangling: dict, optional
      The outedges to be assigned to any "dangling" nodes, i.e., nodes without
      any outedges. The dict key is the node the outedge points to and the dict
      value is the weight of that outedge. By default, dangling nodes are given
      outedges according to the personalization vector (uniform if not
      specified) This must be selected to result in an irreducible transition
      matrix (see notes below). It may be common to have the dangling dict to
      be the same as the personalization dict.

    Returns
    -------
    A : NumPy matrix
       Google matrix of the graph

    Notes
    -----
    The matrix returned represents the transition matrix that describes the
    Markov chain used in PageRank. For PageRank to converge to a unique
    solution (i.e., a unique stationary distribution in a Markov chain), the
    transition matrix must be irreducible. In other words, it must be that
    there exists a path between every pair of nodes in the graph, or else there
    is the potential of "rank sinks."

    This implementation works with Multi(Di)Graphs. For multigraphs the
    weight between two nodes is set to be the sum of all edge weights
    between those nodes.

    See Also
    --------
    pagerank, pagerank_numpy, pagerank_scipy
    """
    import numpy as np

    # TODO: Remove this warning in version 3.0
    import warnings

    warnings.warn(
        "google_matrix will return an np.ndarray instead of a np.matrix in\n"
        "NetworkX version 3.0.",
        FutureWarning,
        stacklevel=2,
    )

    if nodelist is None:
        nodelist = list(G)

    A = nx.to_numpy_array(G, nodelist=nodelist, weight=weight)
    N = len(G)
    if N == 0:
        # TODO: Remove np.asmatrix wrapper in version 3.0
        return np.asmatrix(A)

    # Personalization vector
    if personalization is None:
        p = np.repeat(1.0 / N, N)
    else:
        p = np.array([personalization.get(n, 0) for n in nodelist], dtype=float)
        if p.sum() == 0:
            raise ZeroDivisionError
        p /= p.sum()

    # Dangling nodes
    if dangling is None:
        dangling_weights = p
    else:
        # Convert the dangling dictionary into an array in nodelist order
        dangling_weights = np.array([dangling.get(n, 0) for n in nodelist], dtype=float)
        dangling_weights /= dangling_weights.sum()
    dangling_nodes = np.where(A.sum(axis=1) == 0)[0]

    # Assign dangling_weights to any dangling nodes (nodes with no out links)
    A[dangling_nodes] = dangling_weights

    A /= A.sum(axis=1)[:, np.newaxis]  # Normalize rows to sum to 1

    # TODO: Remove np.asmatrix wrapper in version 3.0
    return np.asmatrix(alpha * A + (1 - alpha) * p)
Exemple #9
0
def spectral_layout(G, weight="weight", scale=1, center=None, dim=2):
    """Position nodes using the eigenvectors of the graph Laplacian.

    Using the unnormalized Laplacian, the layout shows possible clusters of
    nodes which are an approximation of the ratio cut. If dim is the number of
    dimensions then the positions are the entries of the dim eigenvectors
    corresponding to the ascending eigenvalues starting from the second one.

    Parameters
    ----------
    G : NetworkX graph or list of nodes
        A position will be assigned to every node in G.

    weight : string or None   optional (default='weight')
        The edge attribute that holds the numerical value used for
        the edge weight.  If None, then all edge weights are 1.

    scale : number (default: 1)
        Scale factor for positions.

    center : array-like or None
        Coordinate pair around which to center the layout.

    dim : int
        Dimension of layout.

    Returns
    -------
    pos : dict
        A dictionary of positions keyed by node

    Examples
    --------
    >>> G = nx.path_graph(4)
    >>> pos = nx.spectral_layout(G)

    Notes
    -----
    Directed graphs will be considered as undirected graphs when
    positioning the nodes.

    For larger graphs (>500 nodes) this will use the SciPy sparse
    eigenvalue solver (ARPACK).
    """
    # handle some special cases that break the eigensolvers
    import numpy as np

    G, center = _process_params(G, center, dim)

    if len(G) <= 2:
        if len(G) == 0:
            pos = np.array([])
        elif len(G) == 1:
            pos = np.array([center])
        else:
            pos = np.array([np.zeros(dim), np.array(center) * 2.0])
        return dict(zip(G, pos))
    try:
        # Sparse matrix
        if len(G) < 500:  # dense solver is faster for small graphs
            raise ValueError
        A = nx.to_scipy_sparse_matrix(G, weight=weight, dtype="d")
        # Symmetrize directed graphs
        if G.is_directed():
            A = A + np.transpose(A)
        pos = _sparse_spectral(A, dim)
    except (ImportError, ValueError):
        # Dense matrix
        A = nx.to_numpy_array(G, weight=weight)
        # Symmetrize directed graphs
        if G.is_directed():
            A += A.T
        pos = _spectral(A, dim)

    pos = rescale_layout(pos, scale=scale) + center
    pos = dict(zip(G, pos))
    return pos
Exemple #10
0
def save_graph(path_output, graph):
    format = '%1.4f'
    matrix = nx.to_numpy_array(graph)
    np.savetxt(path_output, matrix, delimiter=',', fmt=format)

    return matrix
Exemple #11
0
    #diff = np.array(diff.tolist()).reshape(-1, max_nodes, max_nodes)
    #feat = np.array(feat.tolist()).reshape(-1, max_nodes, feat_dim)
    #import pdb; pdb.set_trace()
    return adj, diff, feat, labels, num_nodes


if __name__ == '__main__':
    # MUTAG, PTC_M R, IMDB-BINARY, IMDB-MULTI, REDDIT-BINARY, REDDIT-MULTI-5K,
    # adj, diff, feat, labels = load('REDDIT-BINARY')
    dataset = 'REDDIT-BINARY'
    basedir = os.path.dirname(os.path.abspath(__file__))
    datadir = os.path.join(basedir, 'data', dataset)
    graphs, diff = process(dataset)
    feat, adj, labels = [], [], []

    for idx, graph in enumerate(graphs):
        adj.append(nx.to_numpy_array(graph))
        labels.append(graph.graph['label'])
        feat.append(
            np.array(list(nx.get_node_attributes(graph, 'feat').values())))

    adj, diff, feat, labels = np.array(adj), np.array(diff), np.array(
        feat), np.array(labels)

    np.save(f'{datadir}/adj.npy', adj)
    #np.save(f'{datadir}/diff.npy', diff)
    np.save(f'{datadir}/feat.npy', feat)
    np.save(f'{datadir}/labels.npy', labels)
    # import pdb; pdb.set_trace()
    print('done')
Exemple #12
0
 def test_identity_weighted_graph_array(self):
     """Conversion from weighted graph to array to weighted graph."""
     A = nx.to_numpy_array(self.G3)
     self.identity_conversion(self.G3, A, nx.Graph())
Exemple #13
0
def hub_matrix(G, nodelist=None):
    """Returns the HITS hub matrix."""
    M = nx.to_numpy_array(G, nodelist=nodelist)
    return M @ M.T
Exemple #14
0
 def test_identity_graph_array(self):
     "Conversion from graph to array to graph."
     A = nx.to_numpy_array(self.G1)
     self.identity_conversion(self.G1, A, nx.Graph())
Exemple #15
0
 def test_identity_digraph_array(self):
     """Conversion from digraph to array to digraph."""
     A = nx.to_numpy_array(self.G2)
     self.identity_conversion(self.G2, A, nx.DiGraph())
Exemple #16
0
 def get_edge_index(self):
     adj = torch.Tensor(nx.to_numpy_array(self))
     edge_index, _ = dense_to_sparse(adj)
     return edge_index
 def test_adjacency_interface_numpy(self):
     A = nx.to_numpy_array(self.Gs)
     pos = nx.drawing.layout._fruchterman_reingold(A)
     assert_equal(pos.shape, (6, 2))
     pos = nx.drawing.layout._fruchterman_reingold(A, dim=3)
     assert_equal(pos.shape, (6, 3))
Exemple #18
0
def plot_connectogram(conn_matrix, conn_model, atlas_select, dir_path, ID,
                      network, label_names):
    import json
    from networkx.readwrite import json_graph
    from pathlib import Path
    from pynets.thresholding import normalize
    from pynets.netstats import most_important
    from scipy.cluster.hierarchy import linkage, fcluster
    from nipype.utils.filemanip import save_json

    ##Advanced Settings
    comm = 'nodes'
    pruned = False
    #color_scheme = 'interpolateCool'
    #color_scheme = 'interpolateGnBu'
    #color_scheme = 'interpolateOrRd'
    #color_scheme = 'interpolatePuRd'
    #color_scheme = 'interpolateYlOrRd'
    #color_scheme = 'interpolateReds'
    #color_scheme = 'interpolateGreens'
    color_scheme = 'interpolateBlues'
    ##Advanced Settings

    conn_matrix = normalize(conn_matrix)
    G = nx.from_numpy_matrix(conn_matrix)
    if pruned == True:
        [G, pruned_nodes, pruned_edges] = most_important(G)
        conn_matrix = nx.to_numpy_array(G)

        pruned_nodes.sort(reverse=True)
        for j in pruned_nodes:
            del label_names[label_names.index(label_names[j])]

        pruned_edges.sort(reverse=True)
        for j in pruned_edges:
            del label_names[label_names.index(label_names[j])]

    def doClust(X, clust_levels):
        ##get the linkage diagram
        Z = linkage(
            X,
            'ward',
        )
        ##choose # cluster levels
        cluster_levels = range(1, int(clust_levels))
        ##init array to store labels for each level
        clust_levels_tmp = int(clust_levels) - 1
        label_arr = np.zeros((int(clust_levels_tmp), int(X.shape[0])))
        ##iterate thru levels
        for c in cluster_levels:
            fl = fcluster(Z, c, criterion='maxclust')
            #print(fl)
            label_arr[c - 1, :] = fl
        return label_arr, clust_levels_tmp

    if comm == 'nodes' and len(conn_matrix) > 40:
        from pynets.netstats import modularity_louvain_dir
        if len(conn_matrix) < 50:
            gamma = 0.00001
        elif len(conn_matrix) < 100:
            gamma = 0.0001
        elif len(conn_matrix) < 200:
            gamma = 0.001
        elif len(conn_matrix) < 500:
            gamma = 0.01
        elif len(conn_matrix) < 1000:
            gamma = 0.5
        else:
            gamma = 1

        [node_comm_aff_mat, q] = modularity_louvain_dir(conn_matrix,
                                                        hierarchy=True,
                                                        gamma=gamma)
        print('Found ' + str(len(np.unique(node_comm_aff_mat))) +
              ' communities with gamma=' + str(gamma) + '...')
        clust_levels = len(node_comm_aff_mat)
        clust_levels_tmp = int(clust_levels) - 1
        mask_mat = np.squeeze(np.array([node_comm_aff_mat == 0]).astype('int'))
        label_arr = node_comm_aff_mat * np.expand_dims(
            np.arange(1, clust_levels + 1), axis=1) + mask_mat
    elif comm == 'links' and len(conn_matrix) > 40:
        from pynets.netstats import link_communities
        ##Plot link communities
        link_comm_aff_mat = link_communities(conn_matrix,
                                             type_clustering='single')
        print('Found ' + str(len(link_comm_aff_mat)) + ' communities...')
        clust_levels = len(link_comm_aff_mat)
        clust_levels_tmp = int(clust_levels) - 1
        mask_mat = np.squeeze(np.array([link_comm_aff_mat == 0]).astype('int'))
        label_arr = link_comm_aff_mat * np.expand_dims(
            np.arange(1, clust_levels + 1), axis=1) + mask_mat
    elif len(conn_matrix) > 20:
        print(
            'Graph too small for reliable plotting of communities. Plotting by fcluster instead...'
        )
        if len(conn_matrix) >= 250:
            clust_levels = 7
        elif len(conn_matrix) >= 200:
            clust_levels = 6
        elif len(conn_matrix) >= 150:
            clust_levels = 5
        elif len(conn_matrix) >= 100:
            clust_levels = 4
        elif len(conn_matrix) >= 50:
            clust_levels = 3
        else:
            clust_levels = 2
        [label_arr, clust_levels_tmp] = doClust(conn_matrix, clust_levels)

    def get_node_label(node_idx, labels, clust_levels_tmp):
        from collections import OrderedDict

        def write_roman(num):
            roman = OrderedDict()
            roman[1000] = "M"
            roman[900] = "CM"
            roman[500] = "D"
            roman[400] = "CD"
            roman[100] = "C"
            roman[90] = "XC"
            roman[50] = "L"
            roman[40] = "XL"
            roman[10] = "X"
            roman[9] = "IX"
            roman[5] = "V"
            roman[4] = "IV"
            roman[1] = "I"

            def roman_num(num):
                for r in roman.keys():
                    x, y = divmod(num, r)
                    yield roman[r] * x
                    num -= (r * x)
                    if num > 0:
                        roman_num(num)
                    else:
                        break

            return "".join([a for a in roman_num(num)])

        rn_list = []
        node_idx = node_idx - 1
        node_labels = labels[:, node_idx]
        for i in [int(l) for i, l in enumerate(node_labels)]:
            rn_list.append(json.dumps(write_roman(i)))
        abet = rn_list
        return ".".join([
            "{}{}".format(abet[i], int(l)) for i, l in enumerate(node_labels)
        ]) + ".{}".format(label_names[node_idx])

    output = []

    adj_dict = {}
    for i in list(G.adjacency()):
        source = list(i)[0]
        target = list(list(i)[1])
        adj_dict[source] = target

    for node_idx, connections in adj_dict.items():
        weight_vec = []
        for i in connections:
            wei = G.get_edge_data(node_idx, int(i))['weight']
            weight_vec.append(wei)
        entry = {}
        nodes_label = get_node_label(node_idx, label_arr, clust_levels_tmp)
        entry["name"] = nodes_label
        entry["size"] = len(connections)
        entry["imports"] = [
            get_node_label(int(d) - 1, label_arr, clust_levels_tmp)
            for d in connections
        ]
        entry["weights"] = weight_vec
        output.append(entry)

    if network:
        json_file_name = str(
            ID
        ) + '_' + network + '_connectogram_' + conn_model + '_network.json'
        json_fdg_file_name = str(
            ID) + '_' + network + '_fdg_' + conn_model + '_network.json'
        connectogram_plot = dir_path + '/' + json_file_name
        fdg_js_sub = dir_path + '/' + str(
            ID) + '_' + network + '_fdg_' + conn_model + '_network.js'
        fdg_js_sub_name = str(
            ID) + '_' + network + '_fdg_' + conn_model + '_network.js'
        connectogram_js_sub = dir_path + '/' + str(
            ID) + '_' + network + '_connectogram_' + conn_model + '_network.js'
        connectogram_js_name = str(
            ID) + '_' + network + '_connectogram_' + conn_model + '_network.js'
    else:
        json_file_name = str(ID) + '_connectogram_' + conn_model + '.json'
        json_fdg_file_name = str(ID) + '_fdg_' + conn_model + '.json'
        connectogram_plot = dir_path + '/' + json_file_name
        connectogram_js_sub = dir_path + '/' + str(
            ID) + '_connectogram_' + conn_model + '.js'
        fdg_js_sub = dir_path + '/' + str(ID) + '_fdg_' + conn_model + '.js'
        fdg_js_sub_name = str(ID) + '_fdg_' + conn_model + '.js'
        connectogram_js_name = str(ID) + '_connectogram_' + conn_model + '.js'
    save_json(connectogram_plot, output)

    ##Force-directed graphing
    G = nx.from_numpy_matrix(np.round(conn_matrix.astype('float64'), 6))
    data = json_graph.node_link_data(G)
    data.pop('directed', None)
    data.pop('graph', None)
    data.pop('multigraph', None)
    for k in range(len(data['links'])):
        data['links'][k]['value'] = data['links'][k].pop('weight')
    for k in range(len(data['nodes'])):
        data['nodes'][k]['id'] = str(data['nodes'][k]['id'])
    for k in range(len(data['links'])):
        data['links'][k]['source'] = str(data['links'][k]['source'])
        data['links'][k]['target'] = str(data['links'][k]['target'])

    ##Add community structure
    for k in range(len(data['nodes'])):
        data['nodes'][k]['group'] = str(label_arr[0][k])

    ##Add node labels
    for k in range(len(data['nodes'])):
        data['nodes'][k]['name'] = str(label_names[k])

    out_file = str(dir_path + '/' + json_fdg_file_name)
    save_json(out_file, data)

    ##Copy index.html and json to dir_path
    #conn_js_path = '/Users/PSYC-dap3463/Applications/PyNets/pynets/connectogram.js'
    #index_html_path = '/Users/PSYC-dap3463/Applications/PyNets/pynets/index.html'
    conn_js_path = str(Path(__file__).parent / "connectogram.js")
    index_html_path = str(Path(__file__).parent / "index.html")
    fdg_replacements_js = {"FD_graph.json": str(json_fdg_file_name)}
    replacements_html = {
        'connectogram.js': str(connectogram_js_name),
        'fdg.js': str(fdg_js_sub_name)
    }
    fdg_js_path = str(Path(__file__).parent / "fdg.js")
    with open(index_html_path) as infile, open(str(dir_path + '/index.html'),
                                               'w') as outfile:
        for line in infile:
            for src, target in replacements_html.items():
                line = line.replace(src, target)
            outfile.write(line)

    replacements_js = {
        'template.json': str(json_file_name),
        'interpolateCool': str(color_scheme)
    }
    with open(conn_js_path) as infile, open(connectogram_js_sub,
                                            'w') as outfile:
        for line in infile:
            for src, target in replacements_js.items():
                line = line.replace(src, target)
            outfile.write(line)

    with open(fdg_js_path) as infile, open(fdg_js_sub, 'w') as outfile:
        for line in infile:
            for src, target in fdg_replacements_js.items():
                line = line.replace(src, target)
            outfile.write(line)
 def test_identity_weighted_digraph_array(self):
     """Conversion from weighted digraph to array to weighted digraph."""
     A = nx.to_numpy_array(self.G4)
     self.identity_conversion(self.G4, A, nx.DiGraph())
Exemple #20
0
 def test_identity_weighted_digraph_array(self):
     """Conversion from weighted digraph to array to weighted digraph."""
     A = nx.to_numpy_array(self.G4)
     self.identity_conversion(self.G4, A, nx.DiGraph())
Exemple #21
0
def plot_all(conn_matrix, conn_model, atlas_select, dir_path, ID, network,
             label_names, mask, coords, thr, node_size, edge_threshold):
    import matplotlib
    matplotlib.use('Agg')
    from matplotlib import pyplot as plt
    from nilearn import plotting as niplot
    pruning = True
    dpi_resolution = 1000
    import pkg_resources
    import networkx as nx
    from pynets import plotting
    from pynets.netstats import most_important
    G_pre = nx.from_numpy_matrix(conn_matrix)
    if pruning == True:
        [G, pruned_nodes, pruned_edges] = most_important(G_pre)
    else:
        G = G_pre
    conn_matrix = nx.to_numpy_array(G)

    pruned_nodes.sort(reverse=True)
    for j in pruned_nodes:
        del label_names[label_names.index(label_names[j])]
        del coords[coords.index(coords[j])]

    pruned_edges.sort(reverse=True)
    for j in pruned_edges:
        del label_names[label_names.index(label_names[j])]
        del coords[coords.index(coords[j])]

    ##Plot connectogram
    if len(conn_matrix) > 20:
        try:
            plotting.plot_connectogram(conn_matrix, conn_model, atlas_select,
                                       dir_path, ID, network, label_names)
        except RuntimeError:
            print('\n\n\nError: Connectogram plotting failed!')
    else:
        print(
            'Error: Cannot plot connectogram for graphs smaller than 20 x 20!')

    ##Plot adj. matrix based on determined inputs
    plotting.plot_conn_mat(conn_matrix, conn_model, atlas_select, dir_path, ID,
                           network, label_names, mask, thr, node_size)

    ##Plot connectome
    if mask:
        if network:
            out_path_fig = dir_path + '/' + ID + '_' + str(
                atlas_select) + '_' + str(conn_model) + '_' + str(
                    os.path.basename(mask).split('.')[0]) + '_' + str(
                        network) + '_' + str(thr) + '_' + str(
                            node_size) + '_connectome_viz.png'
        else:
            out_path_fig = dir_path + '/' + ID + '_' + str(
                atlas_select) + '_' + str(conn_model) + '_' + str(
                    os.path.basename(mask).split('.')[0]) + '_' + str(
                        thr) + '_' + str(node_size) + '_connectome_viz.png'
    else:
        if network:
            out_path_fig = dir_path + '/' + ID + '_' + str(
                atlas_select) + '_' + str(conn_model) + '_' + str(
                    network) + '_' + str(thr) + '_' + str(
                        node_size) + '_connectome_viz.png'
        else:
            out_path_fig = dir_path + '/' + ID + '_' + str(
                atlas_select) + '_' + str(conn_model) + '_' + str(
                    thr) + '_' + str(node_size) + '_connectome_viz.png'
    #niplot.plot_connectome(conn_matrix, coords, edge_threshold=edge_threshold, node_size=20, colorbar=True, output_file=out_path_fig)
    ch2better_loc = pkg_resources.resource_filename(
        "pynets", "templates/ch2better.nii.gz")
    connectome = niplot.plot_connectome(np.zeros(shape=(1, 1)), [(0, 0, 0)],
                                        black_bg=True,
                                        node_size=0.0001)
    connectome.add_overlay(ch2better_loc, alpha=0.4, cmap=plt.cm.gray)
    [z_min, z_max] = -np.abs(conn_matrix).max(), np.abs(conn_matrix).max()
    connectome.add_graph(conn_matrix,
                         coords,
                         edge_threshold=edge_threshold,
                         edge_cmap='Greens',
                         edge_vmax=z_max,
                         edge_vmin=z_min,
                         node_size=4)
    connectome.savefig(out_path_fig, dpi=dpi_resolution)
    return
def subgraph_centrality(G):
    r"""Returns subgraph centrality for each node in G.

    Subgraph centrality  of a node `n` is the sum of weighted closed
    walks of all lengths starting and ending at node `n`. The weights
    decrease with path length. Each closed walk is associated with a
    connected subgraph ([1]_).

    Parameters
    ----------
    G: graph

    Returns
    -------
    nodes : dictionary
       Dictionary of nodes with subgraph centrality as the value.

    Raises
    ------
    NetworkXError
       If the graph is not undirected and simple.

    See Also
    --------
    subgraph_centrality_exp:
        Alternative algorithm of the subgraph centrality for each node of G.

    Notes
    -----
    This version of the algorithm computes eigenvalues and eigenvectors
    of the adjacency matrix.

    Subgraph centrality of a node `u` in G can be found using
    a spectral decomposition of the adjacency matrix [1]_,

    .. math::

       SC(u)=\sum_{j=1}^{N}(v_{j}^{u})^2 e^{\lambda_{j}},

    where `v_j` is an eigenvector of the adjacency matrix `A` of G
    corresponding to the eigenvalue `\lambda_j`.

    Examples
    --------
    (Example from [1]_)
    >>> G = nx.Graph(
    ...     [
    ...         (1, 2),
    ...         (1, 5),
    ...         (1, 8),
    ...         (2, 3),
    ...         (2, 8),
    ...         (3, 4),
    ...         (3, 6),
    ...         (4, 5),
    ...         (4, 7),
    ...         (5, 6),
    ...         (6, 7),
    ...         (7, 8),
    ...     ]
    ... )
    >>> sc = nx.subgraph_centrality(G)
    >>> print([f"{node} {sc[node]:0.2f}" for node in sorted(sc)])
    ['1 3.90', '2 3.90', '3 3.64', '4 3.71', '5 3.64', '6 3.71', '7 3.64', '8 3.90']

    References
    ----------
    .. [1] Ernesto Estrada, Juan A. Rodriguez-Velazquez,
       "Subgraph centrality in complex networks",
       Physical Review E 71, 056103 (2005).
       https://arxiv.org/abs/cond-mat/0504730

    """
    import numpy as np

    nodelist = list(G)  # ordering of nodes in matrix
    A = nx.to_numpy_array(G, nodelist)
    # convert to 0-1 matrix
    A[np.nonzero(A)] = 1
    w, v = np.linalg.eigh(A)
    vsquare = np.array(v)**2
    expw = np.exp(w)
    xg = vsquare @ expw
    # convert vector dictionary keyed by node
    sc = dict(zip(nodelist, map(float, xg)))
    return sc
Exemple #23
0
print(
    f"Preprocessed graph {graph_type} with threshold={threshold}, weight={weight}"
)

out_classes = [
    "O_dVNC",
    "O_dSEZ",
    "O_IPC",
    "O_ITP",
    "O_dSEZ;FFN",
    "O_CA-LP",
    "O_dSEZ;FB2N",
]
sens_classes = ["sens"]

adj = nx.to_numpy_array(mg.g, weight=weight, nodelist=mg.meta.index.values)
prob_mat = adj.copy()
row_sums = prob_mat.sum(axis=1)
dead_inds = np.where(row_sums == 0)[0]
row_sums[row_sums == 0] = 1
prob_mat = prob_mat / row_sums[:, np.newaxis]

n_verts = len(prob_mat)
meta = mg.meta.copy()
g = mg.g.copy()
meta["idx"] = range(len(meta))
from_inds = meta[meta["Class 1"].isin(sens_classes)]["idx"].values
out_inds = meta[meta["Class 1"].isin(out_classes)]["idx"].values

ind_map = dict(zip(meta.index, meta["idx"]))
g = nx.relabel_nodes(g, ind_map, copy=True)
def subgraph_centrality_exp(G):
    r"""Returns the subgraph centrality for each node of G.

    Subgraph centrality  of a node `n` is the sum of weighted closed
    walks of all lengths starting and ending at node `n`. The weights
    decrease with path length. Each closed walk is associated with a
    connected subgraph ([1]_).

    Parameters
    ----------
    G: graph

    Returns
    -------
    nodes:dictionary
        Dictionary of nodes with subgraph centrality as the value.

    Raises
    ------
    NetworkXError
        If the graph is not undirected and simple.

    See Also
    --------
    subgraph_centrality:
        Alternative algorithm of the subgraph centrality for each node of G.

    Notes
    -----
    This version of the algorithm exponentiates the adjacency matrix.

    The subgraph centrality of a node `u` in G can be found using
    the matrix exponential of the adjacency matrix of G [1]_,

    .. math::

        SC(u)=(e^A)_{uu} .

    References
    ----------
    .. [1] Ernesto Estrada, Juan A. Rodriguez-Velazquez,
       "Subgraph centrality in complex networks",
       Physical Review E 71, 056103 (2005).
       https://arxiv.org/abs/cond-mat/0504730

    Examples
    --------
    (Example from [1]_)
    >>> G = nx.Graph(
    ...     [
    ...         (1, 2),
    ...         (1, 5),
    ...         (1, 8),
    ...         (2, 3),
    ...         (2, 8),
    ...         (3, 4),
    ...         (3, 6),
    ...         (4, 5),
    ...         (4, 7),
    ...         (5, 6),
    ...         (6, 7),
    ...         (7, 8),
    ...     ]
    ... )
    >>> sc = nx.subgraph_centrality_exp(G)
    >>> print([f"{node} {sc[node]:0.2f}" for node in sorted(sc)])
    ['1 3.90', '2 3.90', '3 3.64', '4 3.71', '5 3.64', '6 3.71', '7 3.64', '8 3.90']
    """
    # alternative implementation that calculates the matrix exponential
    import scipy as sp
    import scipy.linalg  # call as sp.linalg

    nodelist = list(G)  # ordering of nodes in matrix
    A = nx.to_numpy_array(G, nodelist)
    # convert to 0-1 matrix
    A[A != 0.0] = 1
    expA = sp.linalg.expm(A)
    # convert diagonal to dictionary keyed by node
    sc = dict(zip(nodelist, map(float, expA.diagonal())))
    return sc
Exemple #25
0
def notears_live(G: nx.DiGraph,
                 X: np.ndarray,
                 lambda1: float,
                 max_iter: int = 100,
                 h_tol: float = 1e-8,
                 w_threshold: float = 0.3) -> np.ndarray:
    """Monitor the optimization progress live in notebook.

    Args:
        G: ground truth graph
        X: [n,d] sample matrix
        lambda1: l1 regularization parameter
        max_iter: max number of dual ascent steps
        h_tol: exit if |h(w)| <= h_tol
        w_threshold: fixed threshold for edge weights

    Returns:
        W_est: [d,d] estimate
    """
    # initialization
    n, d = X.shape
    w_est, w_new = np.zeros(d * d), np.zeros(d * d)
    rho, alpha, h, h_new = 1.0, 0.0, np.inf, np.inf

    # ground truth
    w_true = nx.to_numpy_array(G).flatten()

    # progress, stream
    progress_data = {
        key: []
        for key in ['step', 'F', 'h', 'rho', 'alpha', 'l2_dist']
    }
    progress_source = ColumnDataSource(data=progress_data)

    # heatmap, patch
    ids = [str(i) for i in range(d)]
    all_ids = np.tile(ids, [d, 1])
    row = all_ids.T.flatten()
    col = all_ids.flatten()
    heatmap_data = {
        'row': row,
        'col': col,
        'w_true': w_true,
        'w_est': w_est,
        'w_diff': w_true - w_est
    }
    heatmap_source = ColumnDataSource(data=heatmap_data)
    mapper = LinearColorMapper(palette=Palette, low=-2, high=2)

    # common tools
    tools = 'crosshair,save,reset'

    # F(w_est) vs step
    F_true = cppext.F_func(w_true, X, lambda1)
    fig0 = figure(plot_width=270,
                  plot_height=240,
                  y_axis_type='log',
                  tools=tools)
    fig0.ray(0,
             F_true,
             length=0,
             angle=0,
             color='green',
             line_dash='dashed',
             line_width=2,
             legend='F(w_true)')
    fig0.line('step',
              'F',
              source=progress_source,
              color='red',
              line_width=2,
              legend='F(w_est)')
    fig0.title.text = "Objective"
    fig0.xaxis.axis_label = "step"
    fig0.legend.location = "bottom_left"
    fig0.legend.background_fill_alpha = 0.5
    fig0.add_tools(
        HoverTool(tooltips=[("step", "@step"), ("F", "@F"),
                            ("F_true", '%.6g' % F_true)],
                  mode='vline'))

    # h(w_est) vs step
    fig1 = figure(plot_width=280,
                  plot_height=240,
                  y_axis_type='log',
                  tools=tools)
    fig1.line('step',
              'h',
              source=progress_source,
              color='magenta',
              line_width=2,
              legend='h(w_est)')
    fig1.title.text = "Constraint"
    fig1.xaxis.axis_label = "step"
    fig1.legend.location = "bottom_left"
    fig1.legend.background_fill_alpha = 0.5
    fig1.add_tools(
        HoverTool(tooltips=[("step", "@step"), ("h", "@h"), ("rho", "@rho"),
                            ("alpha", "@alpha")],
                  mode='vline'))

    # ||w_true - w_est|| vs step
    fig2 = figure(plot_width=270,
                  plot_height=240,
                  y_axis_type='log',
                  tools=tools)
    fig2.line('step',
              'l2_dist',
              source=progress_source,
              color='blue',
              line_width=2)
    fig2.title.text = "L2 distance to W_true"
    fig2.xaxis.axis_label = "step"
    fig2.add_tools(
        HoverTool(tooltips=[("step", "@step"), ("w_est", "@l2_dist")],
                  mode='vline'))

    # heatmap of w_true
    fig3 = figure(plot_width=270,
                  plot_height=240,
                  x_range=ids,
                  y_range=list(reversed(ids)),
                  tools=tools)
    fig3.rect(x='col',
              y='row',
              width=1,
              height=1,
              source=heatmap_source,
              line_color=None,
              fill_color=transform('w_true', mapper))
    fig3.title.text = 'W_true'
    fig3.axis.visible = False
    fig3.add_tools(
        HoverTool(tooltips=[("row, col", "@row, @col"), ("w_true",
                                                         "@w_true")]))

    # heatmap of w_est
    fig4 = figure(plot_width=280,
                  plot_height=240,
                  x_range=ids,
                  y_range=list(reversed(ids)),
                  tools=tools)
    fig4.rect(x='col',
              y='row',
              width=1,
              height=1,
              source=heatmap_source,
              line_color=None,
              fill_color=transform('w_est', mapper))
    fig4.title.text = 'W_est'
    fig4.axis.visible = False
    fig4.add_tools(
        HoverTool(tooltips=[("row, col", "@row, @col"), ("w_est", "@w_est")]))

    # heatmap of w_true - w_est
    fig5 = figure(plot_width=270,
                  plot_height=240,
                  x_range=ids,
                  y_range=list(reversed(ids)),
                  tools=tools)
    fig5.rect(x='col',
              y='row',
              width=1,
              height=1,
              source=heatmap_source,
              line_color=None,
              fill_color=transform('w_diff', mapper))
    fig5.title.text = 'W_true - W_est'
    fig5.axis.visible = False
    fig5.add_tools(
        HoverTool(tooltips=[("row, col", "@row, @col"), ("w_diff",
                                                         "@w_diff")]))

    # display figures as grid
    grid = gridplot([[fig0, fig1, fig2], [fig3, fig4, fig5]],
                    merge_tools=False)
    handle = show(grid, notebook_handle=True)

    # enter main loop
    for it in range(max_iter):
        while rho < 1e+20:
            w_new = cppext.minimize_subproblem(w_est, X, rho, alpha, lambda1)
            h_new = cppext.h_func(w_new)
            if h_new > 0.25 * h:
                rho *= 10
            else:
                break
        w_est, h = w_new, h_new
        alpha += rho * h
        # update figures
        progress_source.stream({
            'step': [it],
            'F': [cppext.F_func(w_est, X, lambda1)],
            'h': [h],
            'rho': [rho],
            'alpha': [alpha],
            'l2_dist': [np.linalg.norm(w_est - w_true)],
        })
        heatmap_source.patch({
            'w_est': [(slice(d * d), w_est)],
            'w_diff': [(slice(d * d), w_true - w_est)]
        })
        push_notebook(handle=handle)
        # check termination of main loop
        if h <= h_tol:
            break

    # final threshold
    w_est[np.abs(w_est) < w_threshold] = 0
    return w_est.reshape([d, d])
def communicability_betweenness_centrality(G):
    r"""Returns subgraph communicability for all pairs of nodes in G.

    Communicability betweenness measure makes use of the number of walks
    connecting every pair of nodes as the basis of a betweenness centrality
    measure.

    Parameters
    ----------
    G: graph

    Returns
    -------
    nodes : dictionary
        Dictionary of nodes with communicability betweenness as the value.

    Raises
    ------
    NetworkXError
        If the graph is not undirected and simple.

    Notes
    -----
    Let `G=(V,E)` be a simple undirected graph with `n` nodes and `m` edges,
    and `A` denote the adjacency matrix of `G`.

    Let `G(r)=(V,E(r))` be the graph resulting from
    removing all edges connected to node `r` but not the node itself.

    The adjacency matrix for `G(r)` is `A+E(r)`,  where `E(r)` has nonzeros
    only in row and column `r`.

    The subraph betweenness of a node `r`  is [1]_

    .. math::

         \omega_{r} = \frac{1}{C}\sum_{p}\sum_{q}\frac{G_{prq}}{G_{pq}},
         p\neq q, q\neq r,

    where
    `G_{prq}=(e^{A}_{pq} - (e^{A+E(r)})_{pq}`  is the number of walks
    involving node r,
    `G_{pq}=(e^{A})_{pq}` is the number of closed walks starting
    at node `p` and ending at node `q`,
    and `C=(n-1)^{2}-(n-1)` is a normalization factor equal to the
    number of terms in the sum.

    The resulting `\omega_{r}` takes values between zero and one.
    The lower bound cannot be attained for a connected
    graph, and the upper bound is attained in the star graph.

    References
    ----------
    .. [1] Ernesto Estrada, Desmond J. Higham, Naomichi Hatano,
       "Communicability Betweenness in Complex Networks"
       Physica A 388 (2009) 764-774.
       https://arxiv.org/abs/0905.4102

    Examples
    --------
    >>> G = nx.Graph([(0, 1), (1, 2), (1, 5), (5, 4), (2, 4), (2, 3), (4, 3), (3, 6)])
    >>> cbc = nx.communicability_betweenness_centrality(G)
    >>> print([f"{node} {cbc[node]:0.2f}" for node in sorted(cbc)])
    ['0 0.03', '1 0.45', '2 0.51', '3 0.45', '4 0.40', '5 0.19', '6 0.03']
    """
    import numpy as np
    import scipy as sp
    import scipy.linalg  # call as sp.linalg

    nodelist = list(G)  # ordering of nodes in matrix
    n = len(nodelist)
    A = nx.to_numpy_array(G, nodelist)
    # convert to 0-1 matrix
    A[np.nonzero(A)] = 1
    expA = sp.linalg.expm(A)
    mapping = dict(zip(nodelist, range(n)))
    cbc = {}
    for v in G:
        # remove row and col of node v
        i = mapping[v]
        row = A[i, :].copy()
        col = A[:, i].copy()
        A[i, :] = 0
        A[:, i] = 0
        B = (expA - sp.linalg.expm(A)) / expA
        # sum with row/col of node v and diag set to zero
        B[i, :] = 0
        B[:, i] = 0
        B -= np.diag(np.diag(B))
        cbc[v] = float(B.sum())
        # put row and col back
        A[i, :] = row
        A[:, i] = col
    # rescale when more than two nodes
    order = len(cbc)
    if order > 2:
        scale = 1.0 / ((order - 1.0)**2 - (order - 1.0))
        for v in cbc:
            cbc[v] *= scale
    return cbc
Exemple #27
0
def load(dataset):
    basedir = os.path.dirname(os.path.abspath(__file__))
    datadir = os.path.join(basedir, 'data', dataset)
    print(datadir)

    if not os.path.exists(datadir):
        download(dataset)
        graphs, diff = process(dataset)
        feat, adj, labels = [], [], []

        for idx, graph in enumerate(graphs):
            adj.append(nx.to_numpy_array(graph))
            labels.append(graph.graph['label'])
            feat.append(
                np.array(list(nx.get_node_attributes(graph, 'feat').values())))

        adj, diff, feat, labels = np.array(adj), np.array(diff), np.array(
            feat), np.array(labels)

        np.save(f'{datadir}/adj.npy', adj)
        #np.save(f'{datadir}/diff.npy', diff)
        np.save(f'{datadir}/feat.npy', feat)
        np.save(f'{datadir}/labels.npy', labels)

    else:
        adj = np.load(f'{datadir}/adj.npy', allow_pickle=True)
        #diff = np.load(f'{datadir}/diff.npy', allow_pickle=True)
        feat = np.load(f'{datadir}/feat.npy', allow_pickle=True)
        labels = np.load(f'{datadir}/labels.npy', allow_pickle=True)

    max_nodes = max([a.shape[0] for a in adj])
    feat_dim = feat[0].shape[-1]

    num_nodes = []
    #import pdb;pdb.set_trace()
    for idx in range(adj.shape[0]):

        num_nodes.append(adj[idx].shape[-1])

        #adj[idx] = normalize_adj(adj[idx]).todense()

        #diff[idx] = sparse.csr_matrix(np.hstack(
        #    (np.vstack((diff[idx], np.zeros((max_nodes - diff[idx].shape[0], diff[idx].shape[0])))),
        #     np.zeros((max_nodes, max_nodes - diff[idx].shape[1])))))
        diff = []
        adj[idx] = sparse.csr_matrix(
            np.hstack((np.vstack(
                (adj[idx],
                 np.zeros(
                     (max_nodes - adj[idx].shape[0], adj[idx].shape[0])))),
                       np.zeros((max_nodes, max_nodes - adj[idx].shape[1])))))

        feat[idx] = sparse.csr_matrix(
            np.vstack(
                (feat[idx], np.zeros(
                    (max_nodes - feat[idx].shape[0], feat_dim)))))

    #adj = np.array(adj.tolist()).reshape(-1, max_nodes, max_nodes)
    #diff = np.array(diff.tolist()).reshape(-1, max_nodes, max_nodes)
    #feat = np.array(feat.tolist()).reshape(-1, max_nodes, feat_dim)
    #import pdb; pdb.set_trace()
    return adj, diff, feat, labels, num_nodes
Exemple #28
0
def load_pubmed(feature_dim, initializer):
    #hardcoded for simplicity...
    num_nodes = 19717
    num_feats = feature_dim if initializer != 'None' else 500
    num_classes = 3
    train_size = num_classes * 20
    if initializer == "1hot":
        num_feats = num_nodes
    feat_data = np.zeros((num_nodes, num_feats))
    labels = np.empty((num_nodes, 1), dtype=np.int64)

    node_map = {}
    label_map = {}
    label_node_list_map = {}
    train_idx = []
    test_idx = []
    val_idx = []

    if initializer == "None":
        with open("pubmed-data/Pubmed-Diabetes.NODE.paper.tab") as fp:
            fp.readline()
            feat_map = {
                entry.split(":")[1]: i - 1
                for i, entry in enumerate(fp.readline().split("\t"))
            }
            for i, line in enumerate(fp):
                info = line.split("\t")
                node_map[info[0]] = i
                labels[i] = int(info[1].split("=")[1]) - 1
                if labels[i][0] not in label_node_list_map:
                    label_node_list_map[labels[i][0]] = []
                label_node_list_map[labels[i][0]].append(i)
                for word_info in info[2:-1]:
                    word_info = word_info.split("=")
                    feat_data[i][feat_map[word_info[0]]] = float(word_info[1])
    else:
        with open("pubmed-data/Pubmed-Diabetes.NODE.paper.tab") as fp:
            fp.readline()
            fp.readline()
            for i, line in enumerate(fp):
                info = line.split("\t")
                node_map[info[0]] = i
                labels[i] = int(info[1].split("=")[1]) - 1
                if labels[i][0] not in label_node_list_map:
                    label_node_list_map[labels[i][0]] = []
                label_node_list_map[labels[i][0]].append(i)

        # set initializer method
        if initializer == "1hot":
            feat_data = np.eye(num_nodes)
        elif initializer == "random_normal":
            feat_data = np.random.normal(0, 1, (num_nodes, feature_dim))
        elif initializer == "shared":
            feat_data = np.ones((num_nodes, feature_dim))
        elif initializer == "node_degree":
            feat_data = np.zeros((num_nodes, 1))
        elif initializer == "3371" or "eigen" in initializer or initializer == "pagerank":
            G = nx.Graph()
            G.add_nodes_from(node_map.values())
        elif initializer == "deepwalk":
            feat_data = extract_deepwalk_embeddings(
                "pubmed-data/pubmed_{dim}.embeddings".format(dim=feature_dim),
                node_map)

    adj_lists = defaultdict(set)
    with open("pubmed-data/Pubmed-Diabetes.DIRECTED.cites.tab") as fp:
        fp.readline()
        fp.readline()
        for line in fp:
            info = line.strip().split("\t")
            paper1 = node_map[info[1].split(":")[1]]
            paper2 = node_map[info[-1].split(":")[1]]
            adj_lists[paper1].add(paper2)
            adj_lists[paper2].add(paper1)
            if initializer == "pagerank" or initializer == "eigen":
                G.add_edge(paper1, paper2)
                G.add_edge(paper2, paper1)

    if initializer == "node_degree":
        # convert to 1hot representation
        node_degrees = [len(v) for v in adj_lists.values()]
        max_degree = max(node_degrees)
        feat_data = np.zeros((num_nodes, max_degree + 1))
        for k, v in adj_lists.items():
            feat_data[k, len(v)] = 1
    elif initializer == "pagerank":
        feat_data = np.zeros((num_nodes, feature_dim))
        pagerank = nx.pagerank(G)
        for k, v in pagerank.items():
            feat_data[k, :] = v
    elif "eigen" in initializer:
        try:
            if initializer == "eigen":
                v = np.load("pubmed-data/pubmed_eigenvector.npy")
            else:
                v = np.load(
                    "pubmed-data/pubmed_eigenvector_degree_normalized.npy")
            print(v.shape)
        except:
            adj_matrix = nx.to_numpy_array(G)
            # normalize adjacency matrix with degree
            if initializer == "eigen_norm":
                sum_of_rows = adj_matrix.sum(axis=1)
                adj_matrix = adj_matrix / sum_of_rows[:, None]
            print("start computing eigen vectors")
            w, v = LA.eig(adj_matrix)
            indices = np.argsort(w)[::-1]
            v = v.transpose()[indices]
            # only save top 1000 eigenvectors
            if initializer == "eigen":
                np.save("pubmed-data/pubmed_eigenvector", v[:1000])
            else:
                np.save("pubmed-data/pubmed_eigenvector_degree_normalized",
                        v[:1000])
        # print(v)
        feat_data = np.zeros((num_nodes, feature_dim))
        # assert(feature_dim <= 1000)
        for i in range(num_nodes):
            for j in range(feature_dim):
                feat_data[i, j] = v[j, i]

    for label, node_list in label_node_list_map.items():
        random.shuffle(node_list)
        train_idx.extend(node_list[:20])
        anchor = 20 + int(500 / num_classes)
        val_idx.extend(node_list[20:anchor])
        test_idx.extend(node_list[anchor:])

    return feat_data, labels, train_idx, test_idx, val_idx, adj_lists
Exemple #29
0
 def test_graphin(self):
     G = nx.from_numpy_array(self.A)
     np.testing.assert_array_equal(nx.to_numpy_array(G), gus.import_graph(G))
Exemple #30
0
def load_citeseer(feature_dim, initializer="None"):
    '''
    hard coded for simplicity
    '''

    num_nodes = 3312
    num_feats = feature_dim if initializer != 'None' else 3703
    num_classes = 6
    train_size = num_classes * 20
    if initializer == "1hot":
        num_feats = num_nodes
    feat_data = np.zeros((num_nodes, num_feats))
    labels = np.empty((num_nodes, 1), dtype=np.int64)

    node_map = {}
    label_map = {}
    label_node_list_map = {}
    train_idx = []
    test_idx = []
    val_idx = []

    if initializer == "None":
        with open("citeseer/citeseer.content") as fp:
            for i, line in enumerate(fp):
                info = line.strip().split()
                # print(len(list(map(float, info[1:-1]))))
                feat_data[i, :] = list(map(float, info[1:-1]))
                print(feat_data[i, :].shape)
                node_map[info[0]] = i
                if not info[-1] in label_map:
                    label_map[info[-1]] = len(label_map)
                    label_node_list_map[len(label_map) - 1] = []
                labels[i] = label_map[info[-1]]
                label_node_list_map[labels[i][0]].append(i)
    else:
        print("Initializing with", initializer)
        with open("citeseer/citeseer.content") as fp:
            for i, line in enumerate(fp):
                info = line.strip().split()
                node_map[info[0]] = i
                if not info[-1] in label_map:
                    label_map[info[-1]] = len(label_map)
                    label_node_list_map[len(label_map) - 1] = []
                labels[i] = label_map[info[-1]]
                label_node_list_map[labels[i][0]].append(i)
        # set initializer method
        if initializer == "1hot":
            feat_data = np.eye(num_nodes)
        elif initializer == "random_normal":
            feat_data = np.random.normal(0, 1, (num_nodes, feature_dim))
        elif initializer == "shared":
            feat_data = np.ones((num_nodes, feature_dim))
        elif initializer == "node_degree":
            feat_data = np.zeros((num_nodes, 1))
        elif initializer == "pagerank" or "eigen" in initializer:
            G = nx.Graph()
            G.add_nodes_from(node_map.values())
        elif initializer == "deepwalk":
            feat_data = extract_deepwalk_embeddings(
                "citeseer/citeseer_{dim}.embeddings".format(dim=feature_dim),
                node_map, "citeseer")

    adj_lists = defaultdict(set)
    with open("citeseer/citeseer.cites") as fp:
        for i, line in enumerate(fp):
            info = line.strip().split()
            try:
                paper1 = node_map[info[0]]
                paper2 = node_map[info[1]]
            except:
                # print(info[0], info[1])
                continue
            adj_lists[paper1].add(paper2)
            adj_lists[paper2].add(paper1)
            if initializer == "pagerank" or initializer == "eigen":
                G.add_edge(paper1, paper2)
                G.add_edge(paper2, paper1)

    if initializer == "node_degree":
        # convert to 1hot representation
        node_degrees = [len(v) for v in adj_lists.values()]
        max_degree = max(node_degrees)
        feat_data = np.zeros((num_nodes, max_degree + 1))
        for k, v in adj_lists.items():
            feat_data[k, len(v)] = 1
    elif initializer == "pagerank":
        feat_data = np.zeros((num_nodes, feature_dim))
        pagerank = nx.pagerank(G)
        for k, v in pagerank.items():
            feat_data[k, :] = v
    elif "eigen" in initializer:
        try:
            if initializer == "eigen":
                v = np.load("citeseer/citeseer_eigenvector.npy")
            else:
                v = np.load(
                    "citeseer/citeseer_eigenvector_degree_normalized.npy")
            print(v.shape)
        except:
            adj_matrix = nx.to_numpy_array(G)
            # normalize adjacency matrix with degree
            if initializer == "eigen_norm":
                sum_of_rows = adj_matrix.sum(axis=1)
                adj_matrix = adj_matrix / sum_of_rows[:, None]
            print("start computing eigen vectors")
            w, v = LA.eig(adj_matrix)
            indices = np.argsort(w)[::-1]
            v = v.transpose()[indices]
            # only save top 1000 eigenvectors
            if initializer == "eigen":
                np.save("citeseer/citeseer_eigenvector", v[:1000])
            else:
                np.save("citeseer/citeseer_eigenvector_degree_normalized",
                        v[:1000])

        # for j in range(0, 5):
        #     count = 0
        #     for i in range(v.shape[1]):
        #         if v[j, i].real < 1e-200 and v[j, i].real > 0:
        #             print("real part smaller than 1e-200", j, i, v[j, i])
        #             count += 1
        #     print(j, count)

        adj_matrix = nx.to_numpy_array(G)
        v = v.real
        # zeros = 3312-np.count_nonzero(v, axis=1)
        # np.savetxt("citeseer_eigenvector_nonzeros.txt", zeros, fmt="%d")
        # plt.bar(range(len(zeros)), zeros)
        # plt.xlabel("node index")
        # plt.ylabel("number of zeros in eigenvectors")
        # plt.savefig('plot.png', dpi=300, bbox_inches='tight')

        # plt.show()
        feat_data = np.zeros((num_nodes, feature_dim))
        # assert(feature_dim <= 1000)
        for i in range(num_nodes):
            for j in range(feature_dim):
                feat_data[i, j] = v[j, i]

    for label, node_list in label_node_list_map.items():
        random.shuffle(node_list)
        train_idx.extend(node_list[:20])
        anchor = 20 + int(500 / num_classes)
        val_idx.extend(node_list[20:anchor])
        test_idx.extend(node_list[anchor:])

    return feat_data, labels, train_idx, test_idx, val_idx, adj_lists
Exemple #31
0
def fruchterman_reingold_layout(G,
                                k=None,
                                pos=None,
                                fixed=None,
                                iterations=50,
                                threshold=1e-4,
                                weight='weight',
                                scale=1,
                                center=None,
                                dim=2,
                                seed=None):
    """Position nodes using Fruchterman-Reingold force-directed algorithm.

    Parameters
    ----------
    G : NetworkX graph or list of nodes
        A position will be assigned to every node in G.

    k : float (default=None)
        Optimal distance between nodes.  If None the distance is set to
        1/sqrt(n) where n is the number of nodes.  Increase this value
        to move nodes farther apart.

    pos : dict or None  optional (default=None)
        Initial positions for nodes as a dictionary with node as keys
        and values as a coordinate list or tuple.  If None, then use
        random initial positions.

    fixed : list or None  optional (default=None)
        Nodes to keep fixed at initial position.

    iterations : int  optional (default=50)
        Maximum number of iterations taken

    threshold: float optional (default = 1e-4)
        Threshold for relative error in node position changes.
        The iteration stops if the error is below this threshold.

    weight : string or None   optional (default='weight')
        The edge attribute that holds the numerical value used for
        the edge weight.  If None, then all edge weights are 1.

    scale : number (default: 1)
        Scale factor for positions. Not used unless `fixed is None`.

    center : array-like or None
        Coordinate pair around which to center the layout.
        Not used unless `fixed is None`.

    dim : int
        Dimension of layout.

    seed : int, RandomState instance or None  optional (default=None)
        Set the random state for deterministic node layouts.
        If int, `seed` is the seed used by the random number generator,
        if numpy.random.RandomState instance, `seed` is the random
        number generator,
        if None, the random number generator is the RandomState instance used
        by numpy.random.

    Returns
    -------
    pos : dict
        A dictionary of positions keyed by node

    Examples
    --------
    >>> G = nx.path_graph(4)
    >>> pos = nx.spring_layout(G)

    # The same using longer but equivalent function name
    >>> pos = nx.fruchterman_reingold_layout(G)
    """
    import numpy as np

    G, center = _process_params(G, center, dim)

    if fixed is not None:
        nfixed = dict(zip(G, range(len(G))))
        fixed = np.asarray([nfixed[v] for v in fixed])

    if pos is not None:
        # Determine size of existing domain to adjust initial positions
        dom_size = max(coord for pos_tup in pos.values() for coord in pos_tup)
        if dom_size == 0:
            dom_size = 1
        pos_arr = seed.rand(len(G), dim) * dom_size + center

        for i, n in enumerate(G):
            if n in pos:
                pos_arr[i] = np.asarray(pos[n])
    else:
        pos_arr = None

    if len(G) == 0:
        return {}
    if len(G) == 1:
        return {nx.utils.arbitrary_element(G.nodes()): center}

    try:
        # Sparse matrix
        if len(G) < 500:  # sparse solver for large graphs
            raise ValueError
        A = nx.to_scipy_sparse_matrix(G, weight=weight, dtype='f')
        if k is None and fixed is not None:
            # We must adjust k by domain size for layouts not near 1x1
            nnodes, _ = A.shape
            k = dom_size / np.sqrt(nnodes)
        pos = _sparse_fruchterman_reingold(A, k, pos_arr, fixed,
                                           iterations, threshold,
                                           dim, seed)
    except:
        A = nx.to_numpy_array(G, weight=weight)
        if k is None and fixed is not None:
            # We must adjust k by domain size for layouts not near 1x1
            nnodes, _ = A.shape
            k = dom_size / np.sqrt(nnodes)
        pos = _fruchterman_reingold(A, k, pos_arr, fixed, iterations,
                                    threshold, dim, seed)
    if fixed is None:
        pos = rescale_layout(pos, scale=scale) + center
    pos = dict(zip(G, pos))
    return pos
Exemple #32
0
 def graph_to_array(g):
     if type(g) == nx.Graph:
         g = nx.to_numpy_array(g)
     elif type(g) == sp.sparse.csr_matrix:
         g = g.toarray()
     return g
 def test_identity_graph_array(self):
     "Conversion from graph to array to graph."
     A = nx.to_numpy_array(self.G1)
     self.identity_conversion(self.G1, A, nx.Graph())
Exemple #34
0
def count_noiseless_subsystems(g: nx.Graph):
    n = g.number_of_nodes()
    m = nx.to_numpy_array(g) if g.number_of_edges() > 0 else np.zeros([n, n])
    return count_noiseless_eigenvectors(m)
 def test_identity_weighted_graph_array(self):
     """Conversion from weighted graph to array to weighted graph."""
     A = nx.to_numpy_array(self.G3)
     self.identity_conversion(self.G3, A, nx.Graph())
Exemple #36
0
def fruchterman_reingold_layout(
    G,
    k=None,
    pos=None,
    fixed=None,
    iterations=50,
    threshold=1e-4,
    weight="weight",
    scale=1,
    center=None,
    dim=2,
    seed=None,
):
    """Position nodes using Fruchterman-Reingold force-directed algorithm.

    The algorithm simulates a force-directed representation of the network
    treating edges as springs holding nodes close, while treating nodes
    as repelling objects, sometimes called an anti-gravity force.
    Simulation continues until the positions are close to an equilibrium.

    There are some hard-coded values: minimal distance between
    nodes (0.01) and "temperature" of 0.1 to ensure nodes don't fly away.
    During the simulation, `k` helps determine the distance between nodes,
    though `scale` and `center` determine the size and place after
    rescaling occurs at the end of the simulation.

    Fixing some nodes doesn't allow them to move in the simulation.
    It also turns off the rescaling feature at the simulation's end.
    In addition, setting `scale` to `None` turns off rescaling.

    Parameters
    ----------
    G : NetworkX graph or list of nodes
        A position will be assigned to every node in G.

    k : float (default=None)
        Optimal distance between nodes.  If None the distance is set to
        1/sqrt(n) where n is the number of nodes.  Increase this value
        to move nodes farther apart.

    pos : dict or None  optional (default=None)
        Initial positions for nodes as a dictionary with node as keys
        and values as a coordinate list or tuple.  If None, then use
        random initial positions.

    fixed : list or None  optional (default=None)
        Nodes to keep fixed at initial position.
        ValueError raised if `fixed` specified and `pos` not.

    iterations : int  optional (default=50)
        Maximum number of iterations taken

    threshold: float optional (default = 1e-4)
        Threshold for relative error in node position changes.
        The iteration stops if the error is below this threshold.

    weight : string or None   optional (default='weight')
        The edge attribute that holds the numerical value used for
        the edge weight.  If None, then all edge weights are 1.

    scale : number or None (default: 1)
        Scale factor for positions. Not used unless `fixed is None`.
        If scale is None, no rescaling is performed.

    center : array-like or None
        Coordinate pair around which to center the layout.
        Not used unless `fixed is None`.

    dim : int
        Dimension of layout.

    seed : int, RandomState instance or None  optional (default=None)
        Set the random state for deterministic node layouts.
        If int, `seed` is the seed used by the random number generator,
        if numpy.random.RandomState instance, `seed` is the random
        number generator,
        if None, the random number generator is the RandomState instance used
        by numpy.random.

    Returns
    -------
    pos : dict
        A dictionary of positions keyed by node

    Examples
    --------
    >>> G = nx.path_graph(4)
    >>> pos = nx.spring_layout(G)

    # The same using longer but equivalent function name
    >>> pos = nx.fruchterman_reingold_layout(G)
    """
    import numpy as np

    G, center = _process_params(G, center, dim)

    if fixed is not None:
        if pos is None:
            raise ValueError("nodes are fixed without positions given")
        for node in fixed:
            if node not in pos:
                raise ValueError("nodes are fixed without positions given")
        nfixed = {node: i for i, node in enumerate(G)}
        fixed = np.asarray([nfixed[node] for node in fixed])

    if pos is not None:
        # Determine size of existing domain to adjust initial positions
        dom_size = max(coord for pos_tup in pos.values() for coord in pos_tup)
        if dom_size == 0:
            dom_size = 1
        pos_arr = seed.rand(len(G), dim) * dom_size + center

        for i, n in enumerate(G):
            if n in pos:
                pos_arr[i] = np.asarray(pos[n])
    else:
        pos_arr = None
        dom_size = 1

    if len(G) == 0:
        return {}
    if len(G) == 1:
        return {nx.utils.arbitrary_element(G.nodes()): center}

    try:
        # Sparse matrix
        if len(G) < 500:  # sparse solver for large graphs
            raise ValueError
        A = nx.to_scipy_sparse_matrix(G, weight=weight, dtype="f")
        if k is None and fixed is not None:
            # We must adjust k by domain size for layouts not near 1x1
            nnodes, _ = A.shape
            k = dom_size / np.sqrt(nnodes)
        pos = _sparse_fruchterman_reingold(A, k, pos_arr, fixed, iterations,
                                           threshold, dim, seed)
    except ValueError:
        A = nx.to_numpy_array(G, weight=weight)
        if k is None and fixed is not None:
            # We must adjust k by domain size for layouts not near 1x1
            nnodes, _ = A.shape
            k = dom_size / np.sqrt(nnodes)
        pos = _fruchterman_reingold(A, k, pos_arr, fixed, iterations,
                                    threshold, dim, seed)
    if fixed is None and scale is not None:
        pos = rescale_layout(pos, scale=scale) + center
    pos = dict(zip(G, pos))
    return pos
Exemple #37
0
def load_brazil_airport(feature_dim, initializer="None"):
    '''
    hardcoded for simplicity
    '''
    num_nodes = 131
    num_feats = feature_dim if initializer != 'None' else 1433
    train_size = int(num_nodes * 0.8)
    test_size = num_nodes - train_size
    if initializer == "1hot":
        num_feats = num_nodes
    feat_data = np.zeros((num_nodes, num_feats))
    labels = np.empty((num_nodes, 1), dtype=np.int64)

    node_map = {}
    label_map = {}
    label_node_list_map = {}
    train_idx = []
    test_idx = []

    if initializer == "None":
        with open("brazil-airports/labels-brazil-airports.txt") as fp:
            for i, line in enumerate(fp):
                info = line.strip().split()
                feat_data[i, :] = list(map(float, info[1:-1]))
                node_map[info[0]] = i
                if not info[-1] in label_map:
                    label_map[info[-1]] = len(label_map)
                    label_node_list_map[len(label_map) - 1] = []
                labels[i] = label_map[info[-1]]
                label_node_list_map[labels[i][0]].append(i)
    else:
        print("Initializing with", initializer)
        with open("brazil-airports/labels-brazil-airports.txt") as fp:
            fp.readline()
            for i, line in enumerate(fp):
                info = line.strip().split()
                node_map[info[0]] = i
                if not info[-1] in label_map:
                    label_map[info[-1]] = len(label_map)
                    label_node_list_map[len(label_map) - 1] = []
                labels[i] = label_map[info[-1]]
                label_node_list_map[labels[i][0]].append(i)
        # set initializer method
        if initializer == "1hot":
            feat_data = np.eye(num_nodes)
        elif initializer == "random_normal":
            feat_data = np.random.normal(0, 1, (num_nodes, feature_dim))
        elif initializer == "shared":
            feat_data = np.ones((num_nodes, feature_dim))
        elif initializer == "node_degree":
            feat_data = np.zeros((num_nodes, 1))
        elif initializer == "pagerank" or "eigen" in initializer:
            G = nx.Graph()
            G.add_nodes_from(node_map.values())
        elif initializer == "deepwalk":
            feat_data = extract_deepwalk_embeddings(
                "brazil-airports/brazil-airports_{dim}.embeddings".format(
                    dim=feature_dim), node_map)

    adj_lists = defaultdict(set)
    with open("brazil-airports/brazil-airports.edgelist") as fp:
        for i, line in enumerate(fp):
            info = line.strip().split()
            paper1 = node_map[info[0]]
            paper2 = node_map[info[1]]
            adj_lists[paper1].add(paper2)
            adj_lists[paper2].add(paper1)
            if initializer == "pagerank" or "eigen" in initializer:
                G.add_edge(paper1, paper2)
                G.add_edge(paper2, paper1)

    if "degree" in initializer:
        # for k, v in adj_lists.items():
        #     feat_data[k] = len(v)

        # convert to 1hot representation
        node_degrees = [len(v) for v in adj_lists.values()]
        if "degree_bucket" in initializer:
            # simple hack for degree bucketization
            boundaries = []
            if initializer == "degree_bucket_range":
                bucket_num = 10
                bucket_size = int(80 / bucket_num)
                boundaries = np.arange(bucket_size, 81, bucket_size)
            elif initializer == "degree_bucket_distribution":
                bucket_num = 10
                bucket_size = int(100 / bucket_num)
                for i in range(bucket_size, 101, bucket_size):
                    boundaries.append(
                        np.percentile(node_degrees, i, interpolation="higher"))
            print("boundaries:", boundaries)
            feat_data = np.zeros((num_nodes, bucket_num))
            for k, v in adj_lists.items():
                idx = torch.bucketize(len(v), torch.Tensor(boundaries))
                feat_data[k, idx] = 1
                assert np.count_nonzero(feat_data[k]) == 1
            print(feat_data)
        elif initializer == "node_degree":
            max_degree = max(node_degrees)
            feat_data = np.zeros((num_nodes, max_degree + 1))
            for k, v in adj_lists.items():
                feat_data[k, len(v)] = 1

    elif initializer == "pagerank":
        feat_data = np.zeros((num_nodes, feature_dim))
        pagerank = nx.pagerank(G)
        for k, v in pagerank.items():
            feat_data[k, :] = v
    elif "eigen" in initializer:
        try:
            if initializer == "eigen":
                v = np.load("brazil-airports/brazil-airports_eigenvector.npy")
            else:
                v = np.load(
                    "brazil-airports/brazil-airports_eigenvector_degree_normalized.npy"
                )
            print(v.shape)
        except:
            adj_matrix = nx.to_numpy_array(G)
            # normalize adjacency matrix with degree
            if initializer == "eigen_norm":
                sum_of_rows = adj_matrix.sum(axis=1)
                adj_matrix = adj_matrix / sum_of_rows[:, None]
            print("start computing eigen vectors")
            w, v = LA.eig(adj_matrix)
            indices = np.argsort(w)[::-1]
            v = v.transpose()[indices]
            # only save top 1000 eigenvectors
            if initializer == "eigen":
                np.save("brazil-airports/brazil-airports_eigenvector",
                        v[:1000])
            else:
                np.save(
                    "brazil-airports/brazil-airports_eigenvector_degree_normalized",
                    v[:1000])
        # print(v)
        feat_data = np.zeros((num_nodes, feature_dim))
        # assert(feature_dim <= 1000)
        for i in range(num_nodes):
            for j in range(feature_dim):
                feat_data[i, j] = v[j, i]

    test_idx = []
    for label, node_list in label_node_list_map.items():
        node_list_size = len(node_list)
        anchor = int(node_list_size * 0.8)
        random.shuffle(node_list)
        train_idx.extend(node_list[:anchor])
        test_idx.extend(node_list[anchor:])

    return feat_data, labels, train_idx, test_idx, adj_lists
    def dist(
        self,
        G1,
        G2,
        normed=True,
        kernel='normal',
        hwhm=0.011775,
        measure='jensen-shannon',
        k=None,
        which='LM',
    ):
        """Graph distances using different measure between the Laplacian
        spectra of the two graphs

        The spectra of both Laplacian matrices (normalized or not) is
        computed. Then, the discrete spectra are convolved with a kernel to
        produce continuous ones. Finally, these distribution are compared
        using a metric.

        The results dictionary also stores a 2-tuple of the underlying
        adjacency matrices in the key `'adjacency_matrices'`, the Laplacian
        matrices in `'laplacian_matrices'`, the eigenvalues of the
        Laplacians in `'eigenvalues'`. If the networks being compared are
        directed, the augmented adjacency matrices are calculated and
        stored in `'augmented_adjacency_matrices'`.

        Parameters
        ----------

        G1, G2 (nx.Graph)
            two networkx graphs to be compared.

        normed (bool)
            If True, uses the normalized laplacian matrix, otherwise the
            raw laplacian matrix is used.

        kernel (str)
            kernel to obtain a continuous spectrum. Choices available are
            'normal', 'lorentzian', or None. If None is chosen, the
            discrete spectrum is used instead, and the measure is simply
            the euclidean distance between the vector of eigenvalues for
            each graph.

        hwhm (float)
            half-width at half-maximum for the kernel. The default value is
            chosen such that the standard deviation for the normal
            distribution is :math:`0.01`, as in reference [1]_. This option
            is relevant only if kernel is not None.

        measure (str)
            metric between the two continuous spectra. Choices available
            are 'jensen-shannon' or 'euclidean'. This option is relevant
            only if kernel is not None.

        k (int)
            number of eigenvalues kept for the (discrete) spectrum, also
            used to create the continuous spectrum. If None, all the
            eigenvalues are used. k must be smaller (strictly) than the
            size of both graphs.

        which (str)
            if k is not None, this option specifies the eigenvalues that
            are kept. See the choices offered by
            `scipy.sparse.linalg.eigsh`.  The largest eigenvalues in
            magnitude are kept by default.

        Returns
        -------

        dist (float)
            the distance between G1 and G2.

        Notes
        -----
        The methods are usually applied to undirected (unweighted)
        networks. We however relax this assumption using the same method
        proposed for the Hamming-Ipsen-Mikhailov. See [2]_.

        References
        ----------

        .. [1] https://www.sciencedirect.com/science/article/pii/S0303264711001869.

        .. [2] https://ieeexplore.ieee.org/abstract/document/7344816.

        """
        adj1 = nx.to_numpy_array(G1)
        adj2 = nx.to_numpy_array(G2)
        self.results['adjacency_matrices'] = adj1, adj2
        directed = nx.is_directed(G1) or nx.is_directed(G2)

        if directed:
            # create augmented adjacency matrices
            N1 = len(G1)
            N2 = len(G2)
            null_mat1 = np.zeros((N1, N1))
            null_mat2 = np.zeros((N2, N2))
            adj1 = np.block([[null_mat1, adj1.T], [adj1, null_mat1]])
            adj2 = np.block([[null_mat2, adj2.T], [adj2, null_mat2]])
            self.results['augmented_adjacency_matrices'] = adj1, adj2

        # get the laplacian matrices
        lap1 = laplacian(adj1, normed=normed)
        lap2 = laplacian(adj2, normed=normed)
        self.results['laplacian_matrices'] = lap1, lap2

        # get the eigenvalues of the laplacian matrices
        if k is None:
            ev1 = np.abs(eigvalsh(lap1))
            ev2 = np.abs(eigvalsh(lap2))
        else:
            # transform the dense laplacian matrices to sparse representations
            lap1 = csgraph_from_dense(lap1)
            lap2 = csgraph_from_dense(lap2)
            ev1 = np.abs(eigsh(lap1, k=k, which=which)[0])
            ev2 = np.abs(eigsh(lap2, k=k, which=which)[0])
        self.results['eigenvalues'] = ev1, ev2

        if kernel is not None:
            # define the proper support
            a = 0
            if normed:
                b = 2
            else:
                b = np.inf

            # create continuous spectra
            density1 = _create_continuous_spectrum(ev1, kernel, hwhm, a, b)
            density2 = _create_continuous_spectrum(ev2, kernel, hwhm, a, b)

            # compare the spectra
            dist = _spectra_comparison(density1, density2, a, b, measure)
            self.results['dist'] = dist
        else:
            # euclidean distance between the two discrete spectra
            dist = np.linalg.norm(ev1 - ev2)
            self.results['dist'] = dist

        return dist
Exemple #39
0
def load_cora(feature_dim, initializer="None"):
    num_nodes = 2708
    num_feats = feature_dim if initializer != 'None' else 1433
    num_classes = 7
    train_size = num_classes * 20
    if initializer == "1hot":
        num_feats = num_nodes
    feat_data = np.zeros((num_nodes, num_feats))
    labels = np.empty((num_nodes, 1), dtype=np.int64)

    node_map = {}
    label_map = {}
    label_node_list_map = {}
    train_idx = []
    test_idx = []
    val_idx = []

    if initializer == "None":
        with open("cora/cora.content") as fp:
            for i, line in enumerate(fp):
                info = line.strip().split()
                feat_data[i, :] = list(map(float, info[1:-1]))
                node_map[info[0]] = i
                if not info[-1] in label_map:
                    label_map[info[-1]] = len(label_map)
                    label_node_list_map[len(label_map) - 1] = []
                labels[i] = label_map[info[-1]]
                label_node_list_map[labels[i][0]].append(i)
    else:
        print("Initializing with", initializer)
        with open("cora/cora.content") as fp:
            for i, line in enumerate(fp):
                info = line.strip().split()
                node_map[info[0]] = i
                if not info[-1] in label_map:
                    label_map[info[-1]] = len(label_map)
                    label_node_list_map[len(label_map) - 1] = []
                labels[i] = label_map[info[-1]]
                label_node_list_map[labels[i][0]].append(i)
        # set initializer method
        if initializer == "1hot":
            feat_data = np.eye(num_nodes)
        elif initializer == "random_normal":
            feat_data = np.random.normal(0, 1, (num_nodes, feature_dim))
        elif initializer == "shared":
            feat_data = np.ones((num_nodes, feature_dim))
        elif initializer == "node_degree":
            feat_data = np.zeros((num_nodes, 1))
        elif initializer == "pagerank" or initializer == "eigen":
            G = nx.Graph()
            G.add_nodes_from(node_map.values())
        elif initializer == "deepwalk":
            feat_data = extract_deepwalk_embeddings(
                "cora/cora_{dim}.embeddings".format(dim=feature_dim), node_map)

    adj_lists = defaultdict(set)
    with open("cora/cora.cites") as fp:
        for i, line in enumerate(fp):
            info = line.strip().split()
            paper1 = node_map[info[0]]
            paper2 = node_map[info[1]]
            adj_lists[paper1].add(paper2)
            adj_lists[paper2].add(paper1)
            if initializer == "pagerank" or initializer == "eigen":
                G.add_edge(paper1, paper2)
                G.add_edge(paper2, paper1)

    if initializer == "node_degree":
        # convert to 1hot representation
        node_degrees = [len(v) for v in adj_lists.values()]
        max_degree = max(node_degrees)
        feat_data = np.zeros((num_nodes, max_degree + 1))
        for k, v in adj_lists.items():
            feat_data[k, len(v)] = 1
    elif initializer == "pagerank":
        feat_data = np.zeros((num_nodes, feature_dim))
        pagerank = nx.pagerank(G)
        for k, v in pagerank.items():
            feat_data[k, :] = v
    elif "eigen" in initializer:
        try:
            if initializer == "eigen":
                v = np.load("cora/cora_eigenvector.npy")
            else:
                v = np.load("cora/cora_eigenvector_degree_normalized.npy")
            print(v.shape)
        except:
            adj_matrix = nx.to_numpy_array(G)
            # normalize adjacency matrix with degree
            if initializer == "eigen_norm":
                sum_of_rows = adj_matrix.sum(axis=1)
                adj_matrix = adj_matrix / sum_of_rows[:, None]
            print("start computing eigen vectors")
            w, v = LA.eig(adj_matrix)
            indices = np.argsort(w)[::-1]
            v = v.transpose()[indices]
            # only save top 1000 eigenvectors
            np.save("cora/cora_eigenvector", v[:2000])
            if initializer == "eigen":
                np.save("cora/cora_eigenvector", v[:1000])
            else:
                np.save("cora/cora_eigenvector_degree_normalized", v[:1000])
        # print(v)
        feat_data = np.zeros((num_nodes, feature_dim))
        for i in range(num_nodes):
            for j in range(feature_dim):
                feat_data[i, j] = v[j, i]

    for label, node_list in label_node_list_map.items():
        random.shuffle(node_list)
        train_idx.extend(node_list[:20])
        anchor = 20 + int(500 / num_classes)
        val_idx.extend(node_list[20:anchor])
        test_idx.extend(node_list[anchor:])

    return feat_data, labels, train_idx, test_idx, val_idx, adj_lists
Exemple #40
0
def authority_matrix(G, nodelist=None):
    """Returns the HITS authority matrix."""
    M = nx.to_numpy_array(G, nodelist=nodelist)
    return M.T @ M
Exemple #41
0
def simrank_similarity_numpy(G, source=None, target=None, importance_factor=0.9,
                             max_iterations=100, tolerance=1e-4):
    """Calculate SimRank of nodes in ``G`` using matrices with ``numpy``.

    The SimRank algorithm for determining node similarity is defined in
    [1]_.

    Parameters
    ----------
    G : NetworkX graph
        A NetworkX graph

    source : node
        If this is specified, the returned dictionary maps each node
        ``v`` in the graph to the similarity between ``source`` and
        ``v``.

    target : node
        If both ``source`` and ``target`` are specified, the similarity
        value between ``source`` and ``target`` is returned. If
        ``target`` is specified but ``source`` is not, this argument is
        ignored.

    importance_factor : float
        The relative importance of indirect neighbors with respect to
        direct neighbors.

    max_iterations : integer
        Maximum number of iterations.

    tolerance : float
        Error tolerance used to check convergence. When an iteration of
        the algorithm finds that no similarity value changes more than
        this amount, the algorithm halts.

    Returns
    -------
    similarity : dictionary or float
        If ``source`` and ``target`` are both ``None``, this returns a
        dictionary of dictionaries, where keys are node pairs and value
        are similarity of the pair of nodes.

        If ``source`` is not ``None`` but ``target`` is, this returns a
        dictionary mapping node to the similarity of ``source`` and that
        node.

        If neither ``source`` nor ``target`` is ``None``, this returns
        the similarity value for the given pair of nodes.

    Examples
    --------
        >>> import networkx as nx
        >>> from numpy import array
        >>> G = nx.cycle_graph(4)
        >>> sim = nx.simrank_similarity_numpy(G)

    References
    ----------
    .. [1] G. Jeh and J. Widom.
           "SimRank: a measure of structural-context similarity",
           In KDD'02: Proceedings of the Eighth ACM SIGKDD
           International Conference on Knowledge Discovery and Data Mining,
           pp. 538--543. ACM Press, 2002.
    """
    # This algorithm follows roughly
    #
    #     S = max{C * (A.T * S * A), I}
    #
    # where C is the importance factor, A is the column normalized
    # adjacency matrix, and I is the identity matrix.
    import numpy as np
    adjacency_matrix = nx.to_numpy_array(G)

    # column-normalize the ``adjacency_matrix``
    adjacency_matrix /= adjacency_matrix.sum(axis=0)

    newsim = np.eye(adjacency_matrix.shape[0], dtype=np.float64)
    for _ in range(max_iterations):
        prevsim = np.copy(newsim)
        newsim = importance_factor * np.matmul(
            np.matmul(adjacency_matrix.T, prevsim), adjacency_matrix)
        np.fill_diagonal(newsim, 1.0)

        if np.allclose(prevsim, newsim, atol=tolerance):
            break

    if source is not None and target is not None:
        return newsim[source, target]
    if source is not None:
        return newsim[source]
    return newsim
Exemple #42
0
    def recalc_layout_force(self):
        """calculate new change_array"""
        logging.info('force recalculating starting')
        
        # get node array
        # self.base_pos_ar = np.array([(self.g.nodes[i]['x'],self.g.nodes[i]['y']) for i in self.g.nodes])
        # base_pos_ar = np.array([(g.nodes[i]['x'],g.nodes[i]['y']) for i in g.nodes])
        
        pos_nds = np.copy(self.base_pos_ar)
        # pos_nds = pos_nds.astype('float64')
        pos_nds = pos_nds.astype('float32')

        A = nx.to_numpy_array(self.g)
        At = A.T
        A = A + At
        np.clip(A, 0, 1, out = A)
        # A = A.astype('float')
        
        # get corner points pos
        
        sqs = []
        for n in self.g.nodes():
            logging.info(['node', n])
            sqx = rect_points([self.g.nodes[n]['x'], self.g.nodes[n]['y'], 
                                    self.g.nodes[n]['width'], self.g.nodes[n]['height']])
            
            sqs.append(sqx)

        pos = np.concatenate(sqs)


        pos = pos.astype('float32')

        row_order = get_reorder_order_sliced(len(self.g.nodes))

        nbr_nds = A.shape[0]
        nbr_pts = pos.shape[0]

        # self.dim_ar = np.array([[self.g.nodes[i]['width'], self.g.nodes[i]['height']] for i in self.g.nodes])
        dim_ar2 = self.dim_ar.astype('float32')


        # pythran_res = pythran_itrtr_cbn(pos, pos_nds, A, row_order, dim_ar2, self.t, self.def_itr,
        #                         self.rep_nd_brd_start, self.k, self.height*1.0, self.width*1.0, grav_multiplier)

        # pos_nds = pythran_res[0]
        # ctr = pythran_res[2]

        
        # construct objects for seeing which nodes are edge label nodes
        elbl_pos_list = []
        elbl_cnct_nds = []
        c = 0
        for v in self.g.nodes:
            if self.g.nodes[v]['nd_tp'] == 'lbl':
                # g.nodes[v]['e_lbl'] = 1
                logging.info(["v: ", v, ", c: ", c])
                elbl_pos_list.append(c)
                cnct_nodes = list(self.g.predecessors(v)) + list(self.g.successors(v))
                logging.info(["connected nodes: ", cnct_nodes])
                elbl_cnct_nds.append([self.vd[cnct_nodes[0]], self.vd[cnct_nodes[1]]])
            c +=1
            
        elbl_pos_list = np.array(elbl_pos_list)
        elbl_cnct_nds = np.array(elbl_cnct_nds)
        
        logging.info(["elbl_pos_list:\n", elbl_pos_list])
        logging.info(["elbl_cnct_nds:\n", elbl_cnct_nds])

        t1 = time()
        ctr = 0

        grav_multiplier = 5.0

        logging.info(['pos_nds:\n', pos_nds])
        pos_nds = frucht(pos_nds, dim_ar2, self.k*1.0, A, self.width*1.0, self.height*1.0, self.t,
                         500, self.def_itr, self.rep_nd_brd_start,
                         elbl_pos_list, elbl_cnct_nds, 1.0
                         )
        logging.info(['pos_nds:\n', pos_nds])
        ctr = 0

        t2 = time()
        logging.info('calculated layout in ' + str(round(t2-t1,4)) + ' seconds with ' + str(ctr) + ' iterations')

        # base_pos_ar = np.array([(g.nodes[i]['x'],g.nodes[i]['y']) for i in g.nodes])

        # self.goal_vp = sfdp_layout(self.g, K=0.5, pos=self.pos_vp, **set_dict)
        # self.goal_vp = fruchterman_reingold_layout(self.g, pos = self.pos_vp)

        self.chng_ar = (pos_nds - self.base_pos_ar)/self.step

        # re-assign back to graph, just do once at end
        for i in zip(self.g.nodes, pos_nds):
            self.g.nodes[i[0]]['x'] = i[1][0]
            self.g.nodes[i[0]]['y'] = i[1][1]

        # print("base_pos_ar: ", self.base_pos_ar)
        logging.info('force recalculating done')