def mask_test_edges_old(adj, is_directed=True, test_frac=.1, val_frac=.05,
                    prevent_disconnect=True, only_largest_wcc=False, seed=0, verbose=False):
    """
    Perform train-test split of the adjacency matrix and return the train-set and test-set edgelist (indices
    instead of node label). Node sampling of the testing set is after excluding bridges edges to prevent disconnect
    (implemented for undirected graph).

    :param adj: adjacency matrix in sparse format
    :param is_directed:
    :param test_frac:
    :param val_frac:
    :param prevent_disconnect:
    :param only_largest_wcc:
    :param seed:
    :param verbose:
    :return:
    """
    # Remove diagonal elements
    adj = adj - sp.dia_matrix((adj.diagonal()[np.newaxis, :], [0]), shape=adj.shape)
    adj.eliminate_zeros()

    # Convert to networkx graph to calc num. weakly connected components
    g = nx.from_scipy_sparse_matrix(adj, create_using=nx.DiGraph() if is_directed else nx.Graph())
    orig_num_wcc = nx.number_weakly_connected_components(g) if is_directed else nx.number_connected_components(g)
    adj_tuple = sparse_to_tuple(adj)  # (coords, values, shape)
    edges = adj_tuple[0]  # List of ALL edges (either direction)
    edge_pairs = [(edge[0], edge[1]) for edge in edges]  # store edges as list of tuples (from_node, to_node)
    edge_values = adj_tuple[1]

    num_test = int(np.floor(edges.shape[0] * test_frac))  # controls how large the test set should be
    num_val = int(np.floor(edges.shape[0] * val_frac))  # controls how alrge the validation set should be
    num_train = len(edge_pairs) - num_test - num_val  # num train edges

    ### ---------- TRUE EDGES ---------- ###
    # Shuffle and iterate over all edges
    # Add MST edges to train_edges, to exclude bridge edges from the test and validation set
    mst_edges = set(nx.minimum_spanning_tree(g.to_undirected() if is_directed else g).edges())
    train_edges = set([pair for pair in edge_pairs if
                       (pair[0], pair[1]) in mst_edges or (pair[0], pair[1])[::-1] in mst_edges])
    if verbose: print("edges in MST:", len(train_edges))

    all_edge_set = [pair for pair in edge_pairs if pair not in train_edges]
    np.random.seed(seed)
    np.random.shuffle(all_edge_set)
    train_edges = list(train_edges)

    test_edges = all_edge_set[0 : num_test]
    val_edges = all_edge_set[num_test : num_test+num_val]
    train_edges.extend(all_edge_set[num_test+num_val:])

    # Remove edges from g to test connected-ness
    if prevent_disconnect:
        g.remove_edges_from(test_edges)
        g.remove_edges_from(val_edges)

    # Check that enough test/val edges were found
    if (len(val_edges) < num_val or len(test_edges) < num_test):
        print("WARNING: not enough removable edges to perform full train-test split!")
        print("Num. (test, val) edges requested: (", num_test, ", ", num_val, ")")
        print("Num. (test, val) edges returned: (", len(test_edges), ", ", len(val_edges), ")")

    if prevent_disconnect == True:
        assert nx.number_weakly_connected_components(g) if is_directed else nx.number_connected_components(g) == orig_num_wcc

    # Print stats for largest remaining WCC
    if verbose:
        print('Num WCC: ', nx.number_weakly_connected_components(g) if is_directed else nx.number_connected_components(g))
        largest_wcc_set = max(nx.weakly_connected_components(g) if is_directed else nx.connected_components(g), key=len)
        largest_wcc = g.subgraph(largest_wcc_set)
        print('Largest WCC num nodes: ', largest_wcc.number_of_nodes())
        print('Largest WCC num edges: ', largest_wcc.number_of_edges())


    # Fraction of edges with both endpoints in largest WCC
    def frac_edges_in_wcc(edge_set):
        if len(edge_set) == 0:
            return "N/A"
        num_wcc_contained_edges = 0.0
        num_total_edges = 0.0
        for edge in edge_set:
            num_total_edges += 1
            if edge[0] in largest_wcc_set and edge[1] in largest_wcc_set:
                num_wcc_contained_edges += 1
        frac_in_wcc = num_wcc_contained_edges / num_total_edges
        return frac_in_wcc

    # Ignore edges with endpoint not in largest WCC
    if only_largest_wcc:
        print('Removing edges with either endpoint not in L-WCC from train-test split...')
        train_edges = {edge for edge in train_edges if edge[0] in largest_wcc_set and edge[1] in largest_wcc_set}
        test_edges = {edge for edge in test_edges if edge[0] in largest_wcc_set and edge[1] in largest_wcc_set}
        val_edges = {edge for edge in val_edges if edge[0] in largest_wcc_set and edge[1] in largest_wcc_set}

    # assert: test, val, train positive edges disjoint
    assert set(val_edges).isdisjoint(set(train_edges))
    assert set(test_edges).isdisjoint(set(train_edges))
    assert set(val_edges).isdisjoint(set(test_edges))

    # Re-build adj matrix using remaining graph
    adj_train = nx.adjacency_matrix(g)

    # Convert edge-lists to numpy arrays
    train_edges = np.array([list(edge_tuple) for edge_tuple in train_edges])
    val_edges = np.array([list(edge_tuple) for edge_tuple in val_edges])
    test_edges = np.array([list(edge_tuple) for edge_tuple in test_edges])

    # Return final edge lists (edges can go either direction!)
    return adj_train, train_edges, \
           val_edges, test_edges
Exemple #2
0
def cluster_graph(
    corpus,
    titles = None,
    colors = None,
    threshold = 0.3,
    stemming = True,
    max_df = 0.95,
    min_df = 2,
    ngram = (1, 3),
    cleaning = simple_textcleaning,
    vectorizer = 'bow',
    stop_words = None,
    num_clusters = 5,
    clustering = KMeans,
    figsize = (17, 9),
    with_labels = True,
    **kwargs
):
    """
    plot undirected graph with similar texts.

    Parameters
    ----------
    corpus: list
    titles: list
        list of titles, length must same with corpus.
    colors: list
        list of colors, length must same with num_clusters.
    threshold: float, (default=0.3)
        threshold to assume similarity for covariance matrix.
    num_clusters: int, (default=5)
        size of unsupervised clusters.
    stemming: bool, (default=True)
        If True, sastrawi_stemmer will apply.
    max_df: float, (default=0.95)
        maximum of a word selected based on document frequency.
    min_df: int, (default=2)
        minimum of a word selected on based on document frequency.
    ngram: tuple, (default=(1,3))
        n-grams size to train a corpus.
    cleaning: function, (default=simple_textcleaning)
        function to clean the corpus.
    stop_words: list, (default=None)
        list of stop words to remove. If None, default is malaya.texts._text_functions.STOPWORDS
    vectorizer: str, (default='bow')
        vectorizer technique. Allowed values:

        * ``'bow'`` - Bag of Word.
        * ``'tfidf'`` - Term frequency inverse Document Frequency.
        * ``'skip-gram'`` - Bag of Word with skipping certain n-grams.

    Returns
    -------
    dictionary: {
        'G': G,
        'pos': pos,
        'node_colors': node_colors,
        'node_labels': node_labels,
    }
    """
    if not isinstance(corpus, list):
        raise ValueError('corpus must be a list')
    if not isinstance(corpus[0], str):
        raise ValueError('corpus must be list of strings')
    if not isinstance(titles, list) and titles is not None:
        raise ValueError('titles must be a list or None')
    if not isinstance(colors, list) and colors is not None:
        raise ValueError('colors must be a list or None')
    if titles:
        if len(titles) != len(corpus):
            raise ValueError('length of titles must be same with corpus')
    if colors:
        if len(colors) != num_clusters:
            raise ValueError(
                'size of colors must be same with number of clusters'
            )
    if not isinstance(vectorizer, str):
        raise ValueError('vectorizer must be a string')
    if not isinstance(stemming, bool):
        raise ValueError('bool must be a boolean')
    vectorizer = vectorizer.lower()
    if not vectorizer in ['tfidf', 'bow', 'skip-gram']:
        raise ValueError("vectorizer must be in  ['tfidf', 'bow', 'skip-gram']")
    if not isinstance(ngram, tuple):
        raise ValueError('ngram must be a tuple')
    if not len(ngram) == 2:
        raise ValueError('ngram size must equal to 2')
    if not isinstance(min_df, int):
        raise ValueError('min_df must be an integer')
    if not isinstance(max_df, float):
        raise ValueError('max_df must be a float')
    if min_df < 1:
        raise ValueError('min_df must be bigger than 0')
    if not (max_df <= 1 and max_df > 0):
        raise ValueError(
            'max_df must be bigger than 0, less than or equal to 1'
        )
    if not isinstance(threshold, float):
        raise ValueError('threshold must be a float')
    if not (threshold <= 1 and threshold > 0):
        raise ValueError(
            'threshold must be bigger than 0, less than or equal to 1'
        )

    if vectorizer == 'tfidf':
        Vectorizer = TfidfVectorizer
    elif vectorizer == 'bow':
        Vectorizer = CountVectorizer
    elif vectorizer == 'skip-gram':
        Vectorizer = SkipGramVectorizer
    else:
        raise ValueError("vectorizer must be in  ['tfidf', 'bow', 'skip-gram']")

    try:
        import matplotlib.pyplot as plt
        import seaborn as sns
        import networkx as nx
        import networkx.drawing.layout as nxlayout

        sns.set()
    except:
        raise Exception(
            'matplotlib, seaborn, networkx not installed. Please install it and try again.'
        )
    if stop_words is None:
        stop_words = STOPWORDS

    tf_vectorizer = Vectorizer(
        ngram_range = ngram,
        min_df = min_df,
        max_df = max_df,
        stop_words = stop_words,
        **kwargs
    )
    if cleaning is not None:
        for i in range(len(corpus)):
            corpus[i] = cleaning(corpus[i])
    if stemming:
        for i in range(len(corpus)):
            corpus[i] = sastrawi(corpus[i])
    text_clean = []
    for text in corpus:
        text_clean.append(
            ' '.join([word for word in text.split() if word not in stop_words])
        )
    tf_vectorizer.fit(text_clean)
    DxT = tf_vectorizer.transform(text_clean)
    DxD = np.dot(DxT, DxT.T)
    km = clustering(n_clusters = num_clusters)
    km.fit(DxT)
    clusters = km.labels_.tolist()
    features = tf_vectorizer.get_feature_names()
    if not titles:
        titles = []
        for i in range(DxT.shape[0]):
            indices = np.argsort(np.array(DxT[i].todense())[0])[::-1]
            titles.append(' '.join([features[i] for i in indices[: ngram[1]]]))
    if not colors:
        colors = sns.color_palette(n_colors = num_clusters)
    G = nx.Graph()
    for i in range(DxT.shape[0]):
        G.add_node(i, text = titles[i], label = clusters[i])
    dense_DxD = DxD.toarray()
    len_dense = len(dense_DxD)
    for i in range(len_dense):
        for j in range(len_dense):
            if j == i:
                continue
            if dense_DxD[i, j] >= threshold:
                weight = dense_DxD[i, j]
                G.add_edge(i, j, weight = weight)
    node_colors, node_labels = [], {}
    for node in G:
        node_colors.append(colors[G.node[node]['label']])
        node_labels[node] = G.node[node]['text']
    pos = nxlayout.fruchterman_reingold_layout(
        G, k = 1.5 / np.sqrt(len(G.nodes()))
    )
    plt.figure(figsize = figsize)
    if with_labels:
        nx.draw(G, node_color = node_colors, pos = pos, labels = node_labels)
    else:
        nx.draw(G, node_color = node_colors, pos = pos)
    return {
        'G': G,
        'pos': pos,
        'node_colors': node_colors,
        'node_labels': node_labels,
    }
Exemple #3
0
def exportPOV(path='/mnt/htw20/Documents/data/retrack/go/1/',
              head='J1_thr0_radMin3.1_radMax0_6min',
              tail='_t000',
              out='/home/mathieu/Documents/Thesis/data/go/1/mrco_ico.pov',
              ico_thr=-0.027,
              zmin=100,
              zmax=175,
              header='go1.inc',
              polydisperse=False):
    if polydisperse:
        positions = np.load(path + head + tail + '.npy')
        radii = positions[:, -2] * np.sqrt(2)
        positions = positions[:, :-2]
    else:
        positions = np.loadtxt(path + head + tail + '.dat', skiprows=2)
    Q6 = np.loadtxt(path + head + '_space' + tail + '.cloud', usecols=[1])
    bonds = np.loadtxt(path + head + tail + '.bonds', dtype=int)
    q6, w6 = np.loadtxt(path + head + tail + '.cloud',
                        usecols=[1, 5],
                        unpack=True)
    u6 = ((2 * 6 + 1) / (4.0 * np.pi))**1.5 * w6 * q6**3
    ico_bonds = np.bitwise_and(
        u6[bonds].min(axis=-1) < ico_thr,
        np.bitwise_and(positions[:, -1][bonds].min(axis=-1) < zmax,
                       positions[:, -1][bonds].max(axis=-1) > zmin))
    ico = np.unique(bonds[ico_bonds])
    mrco = np.unique(bonds[np.bitwise_and(
        Q6[bonds].max(axis=-1) > 0.25,
        np.bitwise_and(positions[:, -1][bonds].min(axis=-1) < zmax,
                       positions[:, -1][bonds].max(axis=-1) > zmin))])
    gr = nx.Graph()
    gr.add_nodes(ico)
    for a, b in bonds[ico_bonds]:
        gr.add_edge((a, b))

    try:
        cc = nx.connected_components(gr)
    except RuntimeError:
        print("Graph is too large for ico_thr=%g, lower the threshold." %
              ico_thr)
        return
    #remove clusters than contain less than 10 particles
##        sizes = np.zeros(max(cc.values()), int)
##        for p,cl in cc.iteritems():
##          sizes[cl-1] +=1
##        cc2 = dict()
##        for p,cl in cc.iteritems():
##          if sizes[cl-1]>9:
##            cc2[p] = cl
##        cc =cc2
    if polydisperse:
        pov_mrco = [
            Sphere((x, y, z), r)
            for x, y, z, r in np.column_stack((positions,
                                               radii))[np.setdiff1d(mrco, ico)]
        ]
    else:
        pov_mrco = [
            Sphere((x, y, z), 6)
            for x, y, z in positions[np.setdiff1d(mrco, ico)]
        ]
    pov_mrco = Union(*pov_mrco + [Texture(Pigment(color="Green"))])
    if polydisperse:
        pov_ico = [
            Sphere(
                tuple(positions[p].tolist()), radii[p],
                Texture(
                    Pigment(color="COLORSCALE(%f)" %
                            (cl * 120.0 / max(cc.values())))))
            for p, cl in cc.items()
        ]
    else:
        pov_ico = [
            Sphere(
                tuple(positions[p].tolist()), 6,
                Texture(
                    Pigment(color="COLORSCALE(%f)" %
                            (cl * 120.0 / max(cc.values())))))
            for p, cl in cc.items()
        ]
    pov_ico = Union(*pov_ico)
    f = File(out, "colors.inc", header)
    f.write(pov_mrco, pov_ico)
    f.file.flush()
Exemple #4
0
    def infer_trajectory(self,
                         init_node: int,
                         labels=None,
                         cutoff: Optional[float] = None,
                         is_plot: bool = True,
                         path: Optional[str] = None):
        '''Infer the trajectory.        

        Parameters
        ----------
        init_node : int
            The initial node for the inferred trajectory.
        cutoff : string, optional
            The threshold for filtering edges with scores less than cutoff.
        is_plot : boolean, optional
            Whether to plot or not.
        path : string, optional  
            The path to save figure, or don't save if it is None.

        Returns
        ----------
        G : nx.Graph 
            The modified graph that indicates the inferred trajectory.
        w : np.array
            \([N,k]\) The modified \(\\tilde{w}\).
        pseudotime : np.array
            \([N,]\) The pseudotime based on projected trajectory.      
        '''
        # select edges
        if len(self.edges) == 0:
            milestone_net = select_edges = []
            G = nx.Graph()
            G.add_nodes_from(self.G.nodes)
        else:
            if self.no_loop:
                G = nx.maximum_spanning_tree(self.G)
            else:
                G = self.G
            if cutoff is None:
                cutoff = 0.01
            graph = nx.to_numpy_matrix(G)
            graph[graph <= cutoff] = 0
            G = nx.from_numpy_array(graph)
            connected_comps = nx.node_connected_component(G, init_node)
            subG = G.subgraph(connected_comps)
            if len(subG.edges) > 0:
                milestone_net = self.build_milestone_net(subG, init_node)
                if self.no_loop is False and milestone_net.shape[0] < len(
                        G.edges):
                    warnings.warn(
                        "The directed graph shown is a minimum spanning tree of the estimated trajectory backbone to avoid arbitrary assignment of the directions."
                    )
                select_edges = milestone_net[:, :2]
                select_edges_score = graph[select_edges[:, 0], select_edges[:,
                                                                            1]]
                if select_edges_score.max() - select_edges_score.min() == 0:
                    select_edges_score = select_edges_score / select_edges_score.max(
                    )
                else:
                    select_edges_score = (select_edges_score -
                                          select_edges_score.min()) / (
                                              select_edges_score.max() -
                                              select_edges_score.min()) * 3
            else:
                milestone_net = select_edges = []

        # modify w_tilde
        w = self.modify_wtilde(self.w_tilde, select_edges)

        # compute pseudotime
        pseudotime = self.comp_pseudotime(milestone_net, init_node, w)

        if is_plot:
            fig, ax = plt.subplots(1, 1, figsize=(20, 10))

            cmap = matplotlib.cm.get_cmap('viridis')
            colors = [
                plt.cm.jet(float(i) / self.NUM_CLUSTER)
                for i in range(self.NUM_CLUSTER)
            ]
            if np.sum(pseudotime > -1) > 0:
                norm = matplotlib.colors.Normalize(vmin=np.min(
                    pseudotime[pseudotime > -1]),
                                                   vmax=np.max(pseudotime))
                sc = ax.scatter(*self.embed_z[pseudotime > -1, :].T,
                                norm=norm,
                                c=pseudotime[pseudotime > -1],
                                s=16,
                                alpha=0.5)
                plt.colorbar(sc, ax=[ax], location='right')
            else:
                norm = None

            if np.sum(pseudotime == -1) > 0:
                ax.scatter(*self.embed_z[pseudotime == -1, :].T,
                           c='gray',
                           s=16,
                           alpha=0.4)

            for i in range(len(select_edges)):
                points = self.embed_z[
                    np.sum(w[:, select_edges[i, :]] > 0, axis=-1) == 2, :]
                points = points[points[:, 0].argsort()]
                try:
                    x_smooth, y_smooth = _get_smooth_curve(
                        points, self.embed_mu[select_edges[i, :], :])
                except:
                    x_smooth, y_smooth = self.embed_mu[select_edges[
                        i, :], 0], self.embed_mu[select_edges[i, :], 1]
                ax.plot(x_smooth,
                        y_smooth,
                        '-',
                        linewidth=1 + select_edges_score[0, i],
                        color="black",
                        alpha=0.8,
                        path_effects=[
                            pe.Stroke(linewidth=1 + select_edges_score[0, i] +
                                      1.5,
                                      foreground='white'),
                            pe.Normal()
                        ],
                        zorder=1)

                delta_x = self.embed_mu[select_edges[i, 1], 0] - x_smooth[-2]
                delta_y = self.embed_mu[select_edges[i, 1], 1] - y_smooth[-2]
                length = np.sqrt(delta_x**2 + delta_y**2) * 50
                ax.arrow(
                    self.embed_mu[select_edges[i, 1], 0] - delta_x / length,
                    self.embed_mu[select_edges[i, 1], 1] - delta_y / length,
                    delta_x / length,
                    delta_y / length,
                    color='black',
                    alpha=1.0,
                    shape='full',
                    lw=0,
                    length_includes_head=True,
                    head_width=0.02,
                    zorder=2)

            for i in range(len(self.CLUSTER_CENTER)):
                ax.scatter(
                    *self.embed_mu[i:i + 1, :].T,
                    c=[colors[i]],
                    edgecolors='white',  # linewidths=10,
                    norm=norm,
                    s=250,
                    marker='*',
                    label=str(i))
                ax.text(self.embed_mu[i, 0],
                        self.embed_mu[i, 1],
                        '%02d' % i,
                        fontsize=16)

            plt.setp(ax, xticks=[], yticks=[])
            box = ax.get_position()
            ax.set_position([
                box.x0, box.y0 + box.height * 0.1, box.width, box.height * 0.9
            ])
            ax.legend(loc='upper center',
                      bbox_to_anchor=(0.5, -0.05),
                      fancybox=True,
                      shadow=True,
                      ncol=5)

            ax.set_title('Trajectory')
            if path is not None:
                plt.savefig(path, dpi=300)
            plt.show()
        return G, w, pseudotime
Exemple #5
0
 def test_empty_scipy(self):
     G = networkx.Graph()
     assert_equal(networkx.pagerank_scipy(G), {})
def max_weight(Gph, maxcardinality=True):
    return nx.Graph(max_weight_matching(Gph, maxcardinality))
    def getFileFileMatrix(self):
        self.getFileFileDictionary()
        with open("depends/outputDep.json") as f:
            data = json.load(f)

        # Get class names of the entire project
        name_of_classes = list()
        for key in data['variables']:
            filename = pathlib.PureWindowsPath(key)

            # Convert path to the right format for the current operating system
            path = pathlib.PurePath(filename)
            name_of_classes.append(path.name)

        self._classNames = name_of_classes

        dependencies = list()
        dependenciesRow = list()

        # Iterating all the pairs of classes that have dependencies: index goes from 0 to n (#number of classes)
        for i in range(0, len(data["variables"])):
            # Iterating all classes (from 0 to n)
            for j in range(0, len(data["variables"])):
                # Boolean variable that tell us whether any dependencies are found
                noDependencies = True
                # Iterating the dependencies found by "Depends":
                for index in range(0, len(data["cells"])):
                    # If there are dependencies from the class indexed as 'i'...
                    if (data["cells"][index]["src"] == i):
                        # ...to the class indexed as 'j'
                        if (data["cells"][index]["dest"] == j):
                            # DEPENDENCY FOUND! Put the boolean = False and compute the sum of the dependencies!
                            noDependencies = False
                            dependenciesRow.append(sum(data["cells"][index]["values"].values()))
                # No dependencies between the class 'i' and the class 'j': put 0 in the list
                if (noDependencies):
                    dependenciesRow.append(0)

            # We are going to the next row, this means that 'i' is going to change (another class is going to be
            # analyzed): let's copy in a support list the 'partialDepencies' list, in order to save results in the
            # 'dependencies' matrix, and re-use the 'dependenciesRow' list in another iteration!
            supportList = deepcopy(dependenciesRow)  # copy
            del dependenciesRow[:]  # empty the list
            dependencies.extend([supportList])  # dependencies matrix filling

        k = 0
        dict_to_return = dict()
        for class_name in name_of_classes:
            j = 0
            dict_to_return[class_name] = dict()
            for class_name_2 in name_of_classes:
                if dependencies[k][j] > 0:
                    dict_to_return[class_name][class_name_2] = dependencies[k][j]
                j = j + 1
            k = k + 1

        # Create the graph
        y = nx.Graph()
        for file, file_dep in dict_to_return.items():
            for file2, val in file_dep.items():
                y.add_edge(file, file2, weight=val)

        # Add the edges to the graph
        pos = nx.spring_layout(y)
        nx.draw_networkx_nodes(y, pos, node_size=70)
        nx.draw_networkx_edges(y, pos, edgelist=y.edges, edge_color="b", style="solid")
        nx.draw_networkx_labels(y, pos, font_size=5, font_family="sans-serif")

        # Print the graph
        plt.axis("off")
        plt.show()

        return dependencies, name_of_classes
Exemple #8
0
def load_mat(filename,
             order):  # @todo: need to be updated (auto order) or deprecated.
    """Load graph data from a MATLAB (up to version 7.1) .mat file.

	Notes
	------
	A MAT file contains a struct array containing graphs, and a column vector lx containing a class label for each graph.
	Check README in `downloadable file <http://mlcb.is.tuebingen.mpg.de/Mitarbeiter/Nino/WL/>`__ for detailed structure.
	"""
    from scipy.io import loadmat
    import numpy as np
    import networkx as nx
    data = []
    content = loadmat(filename)
    # print(content)
    # print('----')
    for key, value in content.items():
        if key[0] == 'l':  # class label
            y = np.transpose(value)[0].tolist()
            # print(y)
        elif key[0] != '_':
            # print(value[0][0][0])
            # print()
            # print(value[0][0][1])
            # print()
            # print(value[0][0][2])
            # print()
            # if len(value[0][0]) > 3:
            #	 print(value[0][0][3])
            # print('----')
            # if adjacency matrix is not compressed / edge label exists
            if order[1] == 0:
                for i, item in enumerate(value[0]):
                    # print(item)
                    # print('------')
                    g = nx.Graph(name=i)  # set name of the graph
                    nl = np.transpose(item[order[3]][0][0][0])  # node label
                    # print(item[order[3]])
                    # print()
                    for index, label in enumerate(nl[0]):
                        g.add_node(index, label_1=str(label))
                    el = item[order[4]][0][0][0]  # edge label
                    for edge in el:
                        g.add_edge(edge[0] - 1,
                                   edge[1] - 1,
                                   label_1=str(edge[2]))
                    data.append(g)
            else:
                # 				from scipy.sparse import csc_matrix
                for i, item in enumerate(value[0]):
                    # print(item)
                    # print('------')
                    g = nx.Graph(name=i)  # set name of the graph
                    nl = np.transpose(item[order[3]][0][0][0])  # node label
                    # print(nl)
                    # print()
                    for index, label in enumerate(nl[0]):
                        g.add_node(index, label_1=str(label))
                    sam = item[order[0]]  # sparse adjacency matrix
                    index_no0 = sam.nonzero()
                    for col, row in zip(index_no0[0], index_no0[1]):
                        # print(col)
                        # print(row)
                        g.add_edge(col, row)
                    data.append(g)
                    # print(g.edges(data=True))

    label_names = {
        'node_labels': ['label_1'],
        'edge_labels': [],
        'node_attrs': [],
        'edge_attrs': []
    }
    if order[1] == 0:
        label_names['edge_labels'].append('label_1')

    return data, y, label_names
Exemple #9
0
def load_tud(filename):
    """Load graph data from TUD dataset files.

	Notes
	------
	The graph data is loaded from separate files.
	Check README in `downloadable file <http://tiny.cc/PK_MLJ_data>`__, 2018 for detailed structure.
	"""
    import networkx as nx
    from os import listdir
    from os.path import dirname, basename

    def get_infos_from_readme(
            frm):  # @todo: add README (cuniform), maybe node/edge label maps.
        """Get information from DS_label_readme.txt file.
		"""
        def get_label_names_from_line(line):
            """Get names of labels/attributes from a line.
			"""
            str_names = line.split('[')[1].split(']')[0]
            names = str_names.split(',')
            names = [attr.strip() for attr in names]
            return names

        def get_class_label_map(label_map_strings):
            label_map = {}
            for string in label_map_strings:
                integer, label = string.split('\t')
                label_map[int(integer.strip())] = label.strip()
            return label_map

        label_names = {
            'node_labels': [],
            'node_attrs': [],
            'edge_labels': [],
            'edge_attrs': []
        }
        class_label_map = None
        class_label_map_strings = []
        with open(frm) as rm:
            content_rm = rm.read().splitlines()
        i = 0
        while i < len(content_rm):
            line = content_rm[i].strip()
            # get node/edge labels and attributes.
            if line.startswith('Node labels:'):
                label_names['node_labels'] = get_label_names_from_line(line)
            elif line.startswith('Node attributes:'):
                label_names['node_attrs'] = get_label_names_from_line(line)
            elif line.startswith('Edge labels:'):
                label_names['edge_labels'] = get_label_names_from_line(line)
            elif line.startswith('Edge attributes:'):
                label_names['edge_attrs'] = get_label_names_from_line(line)
            # get class label map.
            elif line.startswith(
                    'Class labels were converted to integer values using this map:'
            ):
                i += 2
                line = content_rm[i].strip()
                while line != '' and i < len(content_rm):
                    class_label_map_strings.append(line)
                    i += 1
                    line = content_rm[i].strip()
                class_label_map = get_class_label_map(class_label_map_strings)
            i += 1

        return label_names, class_label_map

    # get dataset name.
    dirname_dataset = dirname(filename)
    filename = basename(filename)
    fn_split = filename.split('_A')
    ds_name = fn_split[0].strip()

    # load data file names
    for name in listdir(dirname_dataset):
        if ds_name + '_A' in name:
            fam = dirname_dataset + '/' + name
        elif ds_name + '_graph_indicator' in name:
            fgi = dirname_dataset + '/' + name
        elif ds_name + '_graph_labels' in name:
            fgl = dirname_dataset + '/' + name
        elif ds_name + '_node_labels' in name:
            fnl = dirname_dataset + '/' + name
        elif ds_name + '_edge_labels' in name:
            fel = dirname_dataset + '/' + name
        elif ds_name + '_edge_attributes' in name:
            fea = dirname_dataset + '/' + name
        elif ds_name + '_node_attributes' in name:
            fna = dirname_dataset + '/' + name
        elif ds_name + '_graph_attributes' in name:
            fga = dirname_dataset + '/' + name
        elif ds_name + '_label_readme' in name:
            frm = dirname_dataset + '/' + name
        # this is supposed to be the node attrs, make sure to put this as the last 'elif'
        elif ds_name + '_attributes' in name:
            fna = dirname_dataset + '/' + name

    # get labels and attributes names.
    if 'frm' in locals():
        label_names, class_label_map = get_infos_from_readme(frm)
    else:
        label_names = {
            'node_labels': [],
            'node_attrs': [],
            'edge_labels': [],
            'edge_attrs': []
        }
        class_label_map = None

    with open(fgi) as gi:
        content_gi = gi.read().splitlines()  # graph indicator
    with open(fam) as am:
        content_am = am.read().splitlines()  # adjacency matrix

    # load targets.
    if 'fgl' in locals():
        with open(fgl) as gl:
            content_targets = gl.read().splitlines(
            )  # targets (classification)
        targets = [float(i) for i in content_targets]
    elif 'fga' in locals():
        with open(fga) as ga:
            content_targets = ga.read().splitlines()  # targets (regression)
        targets = [int(i) for i in content_targets]
    else:
        raise Exception(
            'Can not find targets file. Please make sure there is a "',
            ds_name, '_graph_labels.txt" or "', ds_name,
            '_graph_attributes.txt"', 'file in your dataset folder.')
    if class_label_map is not None:
        targets = [class_label_map[t] for t in targets]

    # create graphs and add nodes
    data = [nx.Graph(name=str(i)) for i in range(0, len(content_targets))]
    if 'fnl' in locals():
        with open(fnl) as nl:
            content_nl = nl.read().splitlines()  # node labels
        for idx, line in enumerate(content_gi):
            # transfer to int first in case of unexpected blanks
            data[int(line) - 1].add_node(idx)
            labels = [l.strip() for l in content_nl[idx].split(',')]
            if label_names['node_labels'] == []:  # @todo: need fix bug.
                for i, label in enumerate(labels):
                    l_name = 'label_' + str(i)
                    data[int(line) - 1].nodes[idx][l_name] = label
                    label_names['node_labels'].append(l_name)
            else:
                for i, l_name in enumerate(label_names['node_labels']):
                    data[int(line) - 1].nodes[idx][l_name] = labels[i]
    else:
        for i, line in enumerate(content_gi):
            data[int(line) - 1].add_node(i)

    # add edges
    for line in content_am:
        tmp = line.split(',')
        n1 = int(tmp[0]) - 1
        n2 = int(tmp[1]) - 1
        # ignore edge weight here.
        g = int(content_gi[n1]) - 1
        data[g].add_edge(n1, n2)

    # add edge labels
    if 'fel' in locals():
        with open(fel) as el:
            content_el = el.read().splitlines()
        for idx, line in enumerate(content_el):
            labels = [l.strip() for l in line.split(',')]
            n = [int(i) - 1 for i in content_am[idx].split(',')]
            g = int(content_gi[n[0]]) - 1
            if label_names['edge_labels'] == []:
                for i, label in enumerate(labels):
                    l_name = 'label_' + str(i)
                    data[g].edges[n[0], n[1]][l_name] = label
                    label_names['edge_labels'].append(l_name)
            else:
                for i, l_name in enumerate(label_names['edge_labels']):
                    data[g].edges[n[0], n[1]][l_name] = labels[i]

    # add node attributes
    if 'fna' in locals():
        with open(fna) as na:
            content_na = na.read().splitlines()
        for idx, line in enumerate(content_na):
            attrs = [a.strip() for a in line.split(',')]
            g = int(content_gi[idx]) - 1
            if label_names['node_attrs'] == []:
                for i, attr in enumerate(attrs):
                    a_name = 'attr_' + str(i)
                    data[g].nodes[idx][a_name] = attr
                    label_names['node_attrs'].append(a_name)
            else:
                for i, a_name in enumerate(label_names['node_attrs']):
                    data[g].nodes[idx][a_name] = attrs[i]

    # add edge attributes
    if 'fea' in locals():
        with open(fea) as ea:
            content_ea = ea.read().splitlines()
        for idx, line in enumerate(content_ea):
            attrs = [a.strip() for a in line.split(',')]
            n = [int(i) - 1 for i in content_am[idx].split(',')]
            g = int(content_gi[n[0]]) - 1
            if label_names['edge_attrs'] == []:
                for i, attr in enumerate(attrs):
                    a_name = 'attr_' + str(i)
                    data[g].edges[n[0], n[1]][a_name] = attr
                    label_names['edge_attrs'].append(a_name)
            else:
                for i, a_name in enumerate(label_names['edge_attrs']):
                    data[g].edges[n[0], n[1]][a_name] = attrs[i]

    return data, targets, label_names
Exemple #10
0
 def test_density_selfloop(self):
     G = nx.Graph()
     G.add_edge(1,1)
     assert_equal(networkx.density(G), 0.0)
     G.add_edge(1,2)
     assert_equal(networkx.density(G), 2.0)
Exemple #11
0
    def _encode_to_numeric(self):

        # Unique_layers = set(n[1] for n in self.core_network.nodes())
        # individual_adj = defaultdict(list)

        # for layer in unique_layers:

        if self.network_type != "multiplex":
            new_edges = []
            nmap = {}
            n_count = 0
            n1 = []
            n2 = []
            w = []

            if self.directed:
                simple_graph = nx.DiGraph()
            else:
                simple_graph = nx.Graph()

            for edge in self.core_network.edges(data=True):
                node_first = edge[0]
                node_second = edge[1]
                if node_first not in nmap:
                    nmap[node_first] = n_count
                    n_count += 1
                if node_second not in nmap:
                    nmap[node_second] = n_count
                    n_count += 1
                try:
                    weight = float(edge[2]['weight'])
                except:
                    weight = 1

                simple_graph.add_edge(nmap[node_first],
                                      nmap[node_second],
                                      weight=weight)
            vectors = nx.to_scipy_sparse_matrix(simple_graph)
            self.numeric_core_network = vectors
            self.node_order_in_matrix = simple_graph.nodes()

        else:
            unique_layers = set(n[1] for n in self.core_network.nodes())
            individual_adj = []
            all_nodes = []
            for layer in unique_layers:
                layer_nodes = [
                    n for n in self.core_network.nodes() if n[1] == layer
                ]
                H = self.core_network.subgraph(layer_nodes)
                adj = nx.to_numpy_matrix(H)
                all_nodes += list(H.nodes())
                individual_adj.append(adj)

            whole_mat = []
            for en, adj_mat in enumerate(individual_adj):
                cross = np.identity(adj_mat.shape[0])
                one_row = []
                for j in range(len(individual_adj)):
                    if j < en or j > en:
                        one_row.append(cross)
                    if j == en:
                        one_row.append(adj_mat)

                whole_mat.append(np.hstack((x for x in one_row)))
                vectors = np.vstack((x for x in whole_mat))
            self.numeric_core_network = vectors
            self.node_order_in_matrix = all_nodes
Exemple #12
0
 def test_density(self):
     assert_equal(networkx.density(self.G), 0.5)
     assert_equal(networkx.density(self.DG), 0.3)
     G=networkx.Graph()
     G.add_node(1)
     assert_equal(networkx.density(G), 0.0)
Exemple #13
0
 def __init__(self):
     self.network = networkx.Graph()
     self.channel_identifier_to_participants = {}
Exemple #14
0
# game matrix payoffs
a = float(sys.argv[1])
b = float(sys.argv[2])

# need for coordination
c = float(sys.argv[6])
if c < 0 or c > 1:
    sys.exit("the condition 0 <= c <= 1 is violated")

# two player game matrix
game_matrix_1 = [[(a, a), ((1-c)*a, (1-c)*b)], [((1-c)*b, (1-c)*a), (b, b)]]
game_matrix_2 = [[(b, b), ((1-c)*b, (1-c)*a)], [((1-c)*a, (1-c)*b), (a, a)]]
game_matrix = game_matrix_1

# initializing the network structure
network = nx.Graph()
network_type = str(sys.argv[3])              # 'watts' for watts strogatz model, 'grid' for grid network
network_parameters_str = str(sys.argv[4])
network_parameters_list = network_parameters_str.split(",")
network_model = []
if network_type == 'grid':
    network_model = [int(i) for i in network_parameters_list]
elif network_type == 'watts':
    network_model = list()
    network_model.append(int(network_parameters_list[0]))
    network_model.append(int(network_parameters_list[1]))
    network_model.append(float(network_parameters_list[2]))
elif network_type == 'clusters':
    network_model = list()
    network_model.append(int(network_parameters_list[0]))
    network_model.append(int(network_parameters_list[1]))
	for cluster in nodes_of_clusters.keys():
		print cluster
		new = []
		for i in nodes_of_clusters[cluster]:
			new.append(int(i)-1)
		sub_pos = A_emb[new]
		plt.scatter(sub_pos[:,0],sub_pos[:,1],color = plt.cm.Spectral(cluster / 5.),linewidths = 0.1,alpha=0.1)
	print "end!"
	plt.show()


if __name__ == "__main__":
	filename = sys.argv[1]
	num_clusters = int(sys.argv[2])
	G = read_graph(filename)
	graph = nx.Graph(G)

	# largest connected component: 
	# adj_largest_mat, nodes_largest_graph = largest_subgraph_adj(graph)
	# nodes_largest_map, nodes_largest_invmap = map_nodes(nodes_largest_graph)

	# original graph:
	adj_largest_mat = nx.adjacency_matrix(graph)
	nodes_largest_graph = list(graph.nodes())
	print "adj_largest_mat shape = ", adj_largest_mat.shape
	# map nodes 
	nodes_largest_map, nodes_largest_invmap = map_nodes(nodes_largest_graph)

	# 2-D embedding
	A_d, A_emb = embedding_adj(adj_largest_mat)
	print "A_d shape = ", A_d.shape
Exemple #16
0
def load_ct(
    filename
):  # @todo: this function is only tested on CTFile V2000; header not considered; only simple cases (atoms and bonds are considered.)
    """load data from a Chemical Table (.ct) file.

	Notes
	------
	a typical example of data in .ct is like this:

	3 2  <- number of nodes and edges

	0.0000	0.0000	0.0000 C <- each line describes a node (x,y,z + label)

	0.0000	0.0000	0.0000 C

	0.0000	0.0000	0.0000 O

	1  3  1  1 <- each line describes an edge : to, from, bond type, bond stereo

	2  3  1  1
	  
	Check `CTFile Formats file <https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=10&ved=2ahUKEwivhaSdjsTlAhVhx4UKHczHA8gQFjAJegQIARAC&url=https%3A%2F%2Fwww.daylight.com%2Fmeetings%2Fmug05%2FKappler%2Fctfile.pdf&usg=AOvVaw1cDNrrmMClkFPqodlF2inS>`__
	for detailed format discription.
	"""
    import networkx as nx
    from os.path import basename
    g = nx.Graph()
    with open(filename) as f:
        content = f.read().splitlines()
        g = nx.Graph(name=str(content[0]),
                     filename=basename(filename))  # set name of the graph

        # read the counts line.
        tmp = content[1].split(' ')
        tmp = [x for x in tmp if x != '']
        nb_atoms = int(tmp[0].strip())  # number of atoms
        nb_bonds = int(tmp[1].strip())  # number of bonds
        count_line_tags = [
            'number_of_atoms', 'number_of_bonds', 'number_of_atom_lists', '',
            'chiral_flag', 'number_of_stext_entries', '', '', '', '',
            'number_of_properties', 'CT_version'
        ]
        i = 0
        while i < len(tmp):
            if count_line_tags[i] != '':  # if not obsoleted
                g.graph[count_line_tags[i]] = tmp[i].strip()
            i += 1

        # read the atom block.
        atom_tags = [
            'x', 'y', 'z', 'atom_symbol', 'mass_difference', 'charge',
            'atom_stereo_parity', 'hydrogen_count_plus_1', 'stereo_care_box',
            'valence', 'h0_designator', '', '', 'atom_atom_mapping_number',
            'inversion_retention_flag', 'exact_change_flag'
        ]
        for i in range(0, nb_atoms):
            tmp = content[i + 2].split(' ')
            tmp = [x for x in tmp if x != '']
            g.add_node(i)
            j = 0
            while j < len(tmp):
                if atom_tags[j] != '':
                    g.nodes[i][atom_tags[j]] = tmp[j].strip()
                j += 1

        # read the bond block.
        bond_tags = [
            'first_atom_number', 'second_atom_number', 'bond_type',
            'bond_stereo', '', 'bond_topology', 'reacting_center_status'
        ]
        for i in range(0, nb_bonds):
            tmp = content[i + g.number_of_nodes() + 2].split(' ')
            tmp = [x for x in tmp if x != '']
            n1, n2 = int(tmp[0].strip()) - 1, int(tmp[1].strip()) - 1
            g.add_edge(n1, n2)
            j = 2
            while j < len(tmp):
                if bond_tags[j] != '':
                    g.edges[(n1, n2)][bond_tags[j]] = tmp[j].strip()
                j += 1

    # get label names.
    label_names = {
        'node_labels': [],
        'edge_labels': [],
        'node_attrs': [],
        'edge_attrs': []
    }
    atom_symbolic = [0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, None, None, 1, 1, 1]
    for nd in g.nodes():
        for key in g.nodes[nd]:
            if atom_symbolic[atom_tags.index(key)] == 1:
                label_names['node_labels'].append(key)
            else:
                label_names['node_attrs'].append(key)
        break
    bond_symbolic = [None, None, 1, 1, None, 1, 1]
    for ed in g.edges():
        for key in g.edges[ed]:
            if bond_symbolic[bond_tags.index(key)] == 1:
                label_names['edge_labels'].append(key)
            else:
                label_names['edge_attrs'].append(key)
        break

    return g, label_names
    """
    rankval_lp = pulp.lpProblem("rank-value", pulp.LpMaximize)
    agents = [(agent, data) for agent, data in G.nodes.items() if data['bipartite'] == 1]
    items = [(item, data) for item, data in G.nodes.items() if data['bipartite'] == 0]
    assig = [[]]
    for i in range(len(agents)):
        for item in items:
            assig[i].append(pulp.LpVariable('x' + str(agents[i]) + str(item), lowBound=0, upBound=1, cat='Continuous'))
        assig.append([])
    for i in range(len(agents)):
        for j in range(len(items)):
            rank = agents[i]['ranking'][items[j]]
            rankValue = rank_vector[rank-1]
            rankval_lp += rankValue * assig[i][j]
    rankval_lp.solve()
    matchG = nx.Graph()
    matchG.add_nodes_from(Gph)
    for i in range(len(agents)):
        for j in range(len(items)):
            if assig[i][j].varValue == 1:
                matchG.add_edge(agents[i], items[j])
    return matchG

#
# def psm(G):
#


# def galeShapleyG(G):
#     matchG = nx.Graph()
#     matchG.add_nodes_from(G)
Exemple #18
0
def text_rank(dateJson, source, model, wiki_model):

    data = []

    for item in source:
        if item['create_time'][0:8] == dateJson:
            data.append(item)
    if len(data) == 0:
        return

    print '# of records:' + str(len(data))

    print data[0]['create_time']
    dict_day = collections.OrderedDict()

    Graph = networkx.Graph()

    for item in data:
        for word in item['cut_text']:
            if word not in dict_day:
                dict_day[word] = 1
            else:
                dict_day[word] += 1

    dict_day = sorted(dict_day.iteritems(), key=lambda d: d[1], reverse=True)
    print dict_day[0]

    fre_files = open(path.hotwords_path_fre.format(dateJson), 'w+')
    try:
        for i in range(len(dict_day)):
            line = {
                'keyword': dict_day[i][0].encode('utf-8'),
                'num': str(dict_day[i][1])
            }
            fre_files.write('{}\n'.format(json.dumps(line,
                                                     ensure_ascii=False)))
    except IndexError:
        print 'done'
    fre_files.close()

    dict_day = [item for item in dict_day if item[1] > 100]

    print 'adding edges...'

    scale = len(dict_day)
    for x in range(scale):
        for y in range(x, scale):
            if x == y:
                continue
            sim = 0.1 * cosine.get_cosine(
                collections.Counter(dict_day[x][0].encode('utf-8')),
                collections.Counter(dict_day[y][0].encode('utf-8')))
            if dict_day[x][0] in model and dict_day[y][0] in model:
                sim += 0.2 * model.similarity(dict_day[x][0], dict_day[y][0])
            if dict_day[x][0] in wiki_model and dict_day[y][0] in wiki_model:
                sim += 0.7 * wiki_model.similarity(dict_day[x][0],
                                                   dict_day[y][0])
            if sim > 0:
                Graph.add_edge(x, y, weight=sim)

    print '# of nodes: ' + str(Graph.number_of_nodes())
    print '# of edges: ' + str(Graph.number_of_edges())
    print 'pageranking...'

    pr = networkx.pagerank(Graph)

    result = [[
        dict_day[pr.keys()[j]][0], dict_day[pr.keys()[j]][1],
        pr.values()[j]
    ] for j in range(len(pr.values()))]

    sum_of_fre = sum([item[1] for item in result])

    result = sorted(result, key=lambda d: d[2], reverse=True)
    pr_file = open(path.hotwords_path_pr.format(dateJson), 'w+')
    try:
        for i in range(len(result)):
            line = {
                'keyword': result[i][0].encode('utf-8'),
                'num': str(result[i][1]),
                'factor': str(result[i][2])
            }
            pr_file.write('{}\n'.format(json.dumps(line, ensure_ascii=False)))
    except IndexError:
        print 'done'
    pr_file.close()

    result = sorted(result, key=lambda d: d[2] * d[1], reverse=True)
    hot_words_file = open(path.hotwords_path_frepr.format(dateJson), 'w+')
    try:
        for i in range(len(result)):
            line = {
                'keyword': result[i][0].encode('utf-8'),
                'num': str(result[i][1]),
                'factor': str(result[i][2])
            }
            hot_words_file.write('{}\n'.format(
                json.dumps(line, ensure_ascii=False)))
    except IndexError:
        print 'done'
    hot_words_file.close()
def createGraph(dummy, switch, crossing):
    G = nx.Graph()
    dummy_counter = dummy
    switch_counter = switch
    crossing_counter = crossing

    G.add_node(0, type="LimitOfNetwork")
    node_list = [0]
    node_number = 1
    # print("Adding base nodes")
    for counter in range(dummy_counter+switch_counter+crossing_counter):
        choice_list = ["dummy", "switch", "crossing"]        
        if dummy_counter == 0:
            choice_list.remove("dummy")
        if switch_counter == 0:
            choice_list.remove("switch")
        if crossing_counter == 0:
            choice_list.remove("crossing")
        if choice_list:
            random_type = random.choice(choice_list)
        if crossing_counter == 0 and switch_counter == 0 and dummy_counter == 0:
            #do nothing
            dummy = 0
        elif crossing_counter == 0 and switch_counter == 0:
            # print("if crossing_counter == 0 and switch_counter == 0")
            G.add_node(node_number, type="DummyNode")
            # edge_choice = random.choice(node_list)
            # G.add_edge(edge_choice, node_number)
            # node_list.remove(edge_choice)
            G.add_edge(random.choice(node_list), node_number)
            if 0 in node_list:
                node_list.remove(0)
            dummy_counter -= 1
            node_list.append(node_number)
        elif switch_counter == 0 and dummy_counter == 0:
            # print("switch_counter == 0 and dummy_counter == 0")
            G.add_node(node_number, type="FlatCrossing")
            G.add_edge(random.choice(node_list), node_number)
            G.add_edge(random.choice(node_list), node_number)
            G.add_edge(random.choice(node_list), node_number)
            if 0 in node_list:
                node_list.remove(0)
            crossing_counter -= 1
            node_list.append(node_number)
        elif crossing_counter == 0 and dummy_counter == 0:
            # print("crossing_counter == 0 and dummy_counter == 0")
            G.add_node(node_number, type="Switch")
            G.add_edge(random.choice(node_list), node_number)
            G.add_edge(random.choice(node_list), node_number)
            if 0 in node_list:
                node_list.remove(0)
            switch_counter -= 1
            node_list.append(node_number)
        elif random_type == "dummy" and dummy_counter > 0:
            # print("random_type == 'dummy' and dummy_counter > 0")
            G.add_node(node_number, type="DummyNode")
            G.add_edge(random.choice(node_list), node_number)
            if 0 in node_list:
                node_list.remove(0)
            dummy_counter -= 1
            node_list.append(node_number)
        elif random_type == "switch" and switch_counter > 0:
            # print("random_type == 'switch' and switch_counter > 0")
            G.add_node(node_number, type="Switch")
            G.add_edge(random.choice(node_list), node_number)
            G.add_edge(random.choice(node_list), node_number)
            if 0 in node_list:
                G.add_edge(0, node_number)
                node_list.remove(0)
            switch_counter -= 1
            node_list.append(node_number)
        elif random_type == "crossing" and crossing_counter > 0:
            # print("random_type == 'crossing' and crossing_counter > 0")
            G.add_node(node_number, type="FlatCrossing")
            G.add_edge(random.choice(node_list), node_number)
            G.add_edge(random.choice(node_list), node_number)
            G.add_edge(random.choice(node_list), node_number)
            if 0 in node_list:
                node_list.remove(0)
            crossing_counter -= 1
            node_list.append(node_number)
        
        node_number += 1

    
        # print("Remove nodes which satisfy type and degree")
        for node in G:
            if G.nodes[node]['type'] == 'Switch' and G.degree[node] == 3:
                # print("Remove: G.nodes[node]['type'] == 'Switch' and G.degree[node] == 3")
                if node in node_list:
                    node_list.remove(node)
            elif G.nodes[node]['type'] == 'FlatCrossing' and G.degree[node] == 4:
                # print("Remove: G.nodes[node]['type'] == 'FlatCrossing' and G.degree[node] == 4")
                if node in node_list:
                    node_list.remove(node)
            elif G.nodes[node]['type'] == 'DummyNode' and G.degree[node] == 2:
                # print("Remove: G.nodes[node]['type'] == 'DummyNode' and G.degree[node] == 2")
                if node in node_list:
                    node_list.remove(node)
    

    add_limit_node_number = len(G.nodes())
    # print("Adding LimitOFNetwork to nodes whcih dont have the correct degree for their type")
    for node in node_list:
        node
        if G.nodes[node]['type'] == 'DummyNode' and G.degree[node] == 1:
            # print("G.nodes[node]['type'] == 'DummyNode' and G.degree[node] == 1")
            G.add_node(add_limit_node_number, type="LimitOfNetwork")
            G.add_edge(node, add_limit_node_number)
            add_limit_node_number += 1
        elif G.nodes[node]['type'] == 'Switch' and G.degree[node] == 1:
            # print("G.nodes[node]['type'] == 'Switch' and G.degree[node] == 1")
            G.add_node(add_limit_node_number, type="LimitOfNetwork")
            G.add_edge(node, add_limit_node_number)
            add_limit_node_number += 1
            G.add_node(add_limit_node_number, type="LimitOfNetwork")
            G.add_edge(node, add_limit_node_number)
            add_limit_node_number += 1
        elif G.nodes[node]['type'] == 'Switch' and G.degree[node] == 2:
            # print("G.nodes[node]['type'] == 'Switch' and G.degree[node] == 2")
            G.add_node(add_limit_node_number, type="LimitOfNetwork")
            G.add_edge(node, add_limit_node_number)
            add_limit_node_number += 1
        elif G.nodes[node]['type'] == 'FlatCrossing' and G.degree[node] == 1:
            # print("G.nodes[node]['type'] == 'FlatCrossing' and G.degree[node] == 1")
            G.add_node(add_limit_node_number, type="LimitOfNetwork")
            G.add_edge(node, add_limit_node_number)
            add_limit_node_number += 1
            G.add_node(add_limit_node_number, type="LimitOfNetwork")
            G.add_edge(node, add_limit_node_number)
            add_limit_node_number += 1
            G.add_node(add_limit_node_number, type="LimitOfNetwork")
            G.add_edge(node, add_limit_node_number)
            add_limit_node_number += 1
        elif G.nodes[node]['type'] == 'FlatCrossing' and G.degree[node] == 2:
            # print("G.nodes[node]['type'] == 'FlatCrossing' and G.degree[node] == 2")
            G.add_node(add_limit_node_number, type="LimitOfNetwork")
            G.add_edge(node, add_limit_node_number)
            add_limit_node_number += 1
            G.add_node(add_limit_node_number, type="LimitOfNetwork")
            G.add_edge(node, add_limit_node_number)
            add_limit_node_number += 1
        elif G.nodes[node]['type'] == 'FlatCrossing' and G.degree[node] == 3:
            # print("G.nodes[node]['type'] == 'FlatCrossing' and G.degree[node] == 3")
            G.add_node(add_limit_node_number, type="LimitOfNetwork")
            G.add_edge(node, add_limit_node_number)
            add_limit_node_number += 1
    """Graph data for originally made graph"""
    """
    print("Data and graph for G")
    print(type(nx.number_connected_components(G)))
    print(nx.info(G)) 
    print("\n")
    """
    return G
Exemple #20
0
def dump_actors_to_gexf(id):
    films = []
    actormap = {}
    edgemap = {}

    query[0]['!pd:/film/film_series/films_in_series'][0]['id'] = id
    r = requests.get('https://www.googleapis.com/freebase/v1/mqlread', params={'query': json.dumps(query)})
    response = json.loads(r.text)
    results = response['result']

    if not results:
        return

    seriesname = results[0]['!pd:/film/film_series/films_in_series'][0]['name']

    for r in results:
        if r['id'] in blacklist: continue

        actors = []
        for s in r['starring']:
            if s['actor'] is None: continue

            aid = s['actor']['id']
            alabel = s['actor']['name']
            actors.append({
                'id': aid,
                'label': alabel
            })
            if aid not in actormap:
                actormap[aid] = {'label': alabel, 'size': 0, 'films': []}
            actormap[aid]['films'].append(r['name'])

        films.append({
            'id': r['id'],
            'label': r['name'],
            'actors': actors
        })

    actorids = list(actormap.keys())

    for f in films:
        comb = itertools.combinations(f['actors'], 2)
        for c in comb:
            e = tuple(sorted([c[0]['id'], c[1]['id']]))
            edgemap[e] = edgemap.get(e, 0) + 1

    G = nx.Graph()
    for a in actormap:
        attr = {'label': actormap[a]['label'],
                    'films': '|'.join(actormap[a]['films'])
                    }
        attr.update(dict((f,True) for f in actormap[a]['films']))

        G.add_node(a, attr)
        G.node[a]['viz'] = {'size': len(actormap[a]['films'])}

    for e in edgemap:
        G.add_edge(e[0], e[1], {'weight': edgemap[e]})

    outputfile = seriesname.replace(' ','')+'.gexf'
    nx.write_gexf(G, outputfile, version='1.2draft')
    return outputfile
Exemple #21
0
def generate_solution_graphs(prediction, folder_name, file_names,
                             sol_file_names):
    test_graph_edges, test_solution_edges, test_terminals = [], [], []
    connected_graphs = 0
    disconnected_graphs = 0
    disconnected_opt_solutions = 0
    rat_arr = []
    node_id_additive = 0

    for j, (file_name,
            sol_file_name) in enumerate(zip(file_names, sol_file_names)):
        edge_list, tree_ver = take_input(folder_name + file_name)
        sol_edge_list, sol_tree_ver = take_input(folder_name + sol_file_name)
        max_id = max([max(u, v) for u, v, _ in edge_list])
        min_id = min([min(u, v) for u, v, _ in edge_list])
        pred = pr[node_id_additive:node_id_additive + max_id + 1]
        sorted_ind = np.argsort(pred)
        #print(pred)
        #print(sorted_ind)
        pred = np.round(pred)
        opt = len(sol_edge_list)
        G = nx.Graph()
        for e in edge_list:
            u, v, w = e
            G.add_edge(u, v)
        V = []
        '''
  for i, val in enumerate(pred):
   if val==1:
    V.append(i)
    #G.add_edge(*id_to_edge[i])
  '''
        for terminal in tree_ver[0]:
            if terminal not in V:
                V.append(terminal)
        S = G.subgraph(V)
        #'''
        #is_disconnected = False
        #if not nx.is_connected(S):
        # is_disconnected = True
        is_disconnected = not nx.is_connected(S)
        k = len(sorted_ind) - 1
        #print("k", k)
        while not nx.is_connected(S):
            u = sorted_ind[k]
            if u not in V:
                V.append(u)
                S = G.subgraph(V)
            k -= 1
        #if is_disconnected:
        # mst=nx.minimum_spanning_edges(S)
        # apprx = len(list(mst))
        # if opt==apprx:
        #  disconnected_opt_solutions += 1
        mst = nx.minimum_spanning_edges(S)
        apprx = len(list(mst))
        if opt == apprx:
            disconnected_opt_solutions += 1
        #'''
        if nx.is_connected(S):
            connected_graphs += 1
            mst = nx.minimum_spanning_edges(S)
            apprx = len(list(mst))
            rat_arr.append(apprx / opt)
        else:
            disconnected_graphs += 1

        node_id_additive += (max_id + 1)

    print("No. of connected graphs:", connected_graphs)
    total_rat = 0
    max_rat = 0
    min_rat = 1000
    for rat in rat_arr:
        max_rat = max(max_rat, rat)
        min_rat = min(min_rat, rat)
        total_rat += rat
    avg_rat = total_rat / connected_graphs
    print("maximum ratio:", max_rat)
    print("average ratio:", avg_rat)
    print("minimum ratio:", min_rat)
    print("No. of disconnected graphs:", disconnected_graphs)
    print("No. of disconnected graphs become optimal after connecting:",
          disconnected_opt_solutions)
Exemple #22
0
def homophilic_barabasi_albert_graph(N, m , minority_fraction, similitude, p_clustering = 0.3 ):
    """Return homophilic random graph using BA preferential attachment model.

    A graph of n nodes is grown by attaching new nodes each with m
    edges that are preferentially attached to existing nodes with high
    degree. The connections are established by linking probability which 
    depends on the connectivity of sites and the similitude (similarities).
    similitude varies ranges from 0 to 1.

    Parameters
    ----------
    N : int
        Number of nodes
    m : int
        Number of edges to attach from a new node to existing nodes
    seed : int, optional
        Seed for random number generator (default=None).

    minority_fraction : float
        fraction of minorities in the network

    similitude: float
        value between 0 to 1. similarity between nodes. if nodes have same attribute
        their similitude (distance) is smaller.

    Returns
    -------
    G : Graph

    Notes
    -----
    The initialization is a graph with with m nodes and no edges.

    References
    ----------
    .. [1] A. L. Barabasi and R. Albert "Emergence of scaling in
       random networks", Science 286, pp 509-512, 1999.
    """
    if m < 1 or N < m:
        raise ("Network must have m>1 and m<n, m=%d,n=%d"%(m,n))


    G = nx.Graph()

    minority = int(minority_fraction * N)

    minority_nodes = random.sample(range(N),minority)
    node_attribute = {}
    for n in range(N):
        if n in minority_nodes:
            G.add_node(n , color = 'red')
            node_attribute[n] = 'minority'
        else:
            G.add_node(n , color = 'blue')
            node_attribute[n] = 'majority'



    #create homophilic distance ### faster to do it outside loop ###
    dist = defaultdict(int) #distance between nodes

    for n1 in range(N):
        n1_attr = node_attribute[n1]
        for n2 in range(N):
            n2_attr = node_attribute[n2]
            if n1_attr == n2_attr:
                dist[(n1,n2)] = 1 - similitude # higher similarity, lower distance
            else:
                dist[(n1,n2)] = similitude



    target_list=list(range(m))
    source = m #start with m nodes

    while source < N:

        targets = _pick_targets(G,source,target_list,dist,m)
        if targets == set([]): #if target list is empty
            continue
        # do one homophilic pref. attachment for new node
        target = targets.pop()
        #if targets != set(): #if the node does  find the neighbor
        G.add_edge(source, target)

        if targets == set([]): #if target list is empty
            continue
        count = 1
        while count < m:
            if random.random() < p_clustering:
                neighborhood=[nbr for nbr in G.neighbors(target) \
                               if not G.has_edge(source,nbr) \
                               and not nbr==source]
                if neighborhood: # if there is a neighbor without a link
                    nbr=random.choice(neighborhood)
                    G.add_edge(source,nbr) # add triangle
                    count=count+1
                    continue # go to top of while loop
            # else do preferential attachment step if above fails
            target=targets.pop()
            G.add_edge(source,target)
            count=count+1
        target_list.append(source)
        source += 1

    return G
Exemple #23
0
 def test_empty(self):
     G = networkx.Graph()
     assert_equal(networkx.pagerank(G), {})
     assert_equal(networkx.pagerank_numpy(G), {})
     assert_equal(networkx.google_matrix(G).shape, (0, 0))
def authorGroup(articleList, anchorArticle, anchorAuthor,
                absMatchLimit=0.5, titleMatchLimit=0.6, unconnLimit=5):
    """
    Given a list of articles, go through and find the ones that are connected to anchorArticle.

    Return the list of papers and the paper network graph that was constructed
    """
    # so I can do article.references to get the list of ads.Article references.
    # Looking at http://adsabs.github.io/help/actions/visualize/#paper-network
    # Maybe I should just resort to a graph network. Make a node for each paper. Make an edge if two papers share more than N references in common, or 3 or more authors in common, or an affiliation and year+/-2?

    # to find common elements of two lists:
    # common = set(b1) & set(b2)

    if anchorArticle not in articleList:
        articleList.append(anchorArticle)

    # Use these as the nodes in the graph
    articleBibcodes = [article.bibcode for article in articleList]
    # Make a handy dict for later:
    bibcodeDict = {}
    for article in articleList:
        bibcodeDict[article.bibcode] = article

    # For each article, add an atribute that is a list of
    # bibcodes for that article's references
    # and an atribute that is the set of authors
    # I should really make a dict, but it's sooo compact to just add atributes
    for article in articleList:
    #    refs = article.references
    #    article.refbibcodes = [ref.bibcode for ref in refs]
        article.authorset = set([authSimple(author)  for author in article.author])

    # Create graph
    paperGraph = nx.Graph()
    # Add the bibcodes as nodes
    paperGraph.add_nodes_from(articleBibcodes)
    nArticles = len(articleList)

    # Do the abstract analysis here to try and speed it up
    abstracts = []
    titles = []
    for paper in articleList:
        if hasattr(paper,'abstract'):
            if paper.abstract is not None:
                abstracts.append(paper.abstract)
            else:
                abstracts.append('')
        else:
            abstracts.append('')
        if hasattr(paper,'title'):
            titles.append(paper.title[0])
        else:
            titles.append('')
    vect = TfidfVectorizer(min_df=1)
    tfidf = vect.fit_transform(abstracts)
    abstractArray = (tfidf * tfidf.T).A

    vect = TfidfVectorizer(min_df=1)
    tfidf = vect.fit_transform(titles)
    titleArray = (tfidf * tfidf.T).A


    for i in range(nArticles-1):
        for j in np.arange(nArticles-i)+i:
            match = False
            if abstractArray[i][j] > absMatchLimit:
                match = True
            elif titleArray[i][j] > titleMatchLimit:
                match = True
            if not match:
                match = checkAuthorMatch(articleList[i], articleList[j],
                                         authorName=anchorAuthor)
            if match:
                paperGraph.add_edge(articleList[i].bibcode,
                                    articleList[j].bibcode)

    # Find all the papers that are connected to the articleNode
    connectedBibcodes = nx.node_connected_component(paperGraph, anchorArticle.bibcode)
    # Convert to a list of ads article objects to pass back
    connectedPapers = [bibcodeDict[paperbibcode] for paperbibcode in connectedBibcodes]
    # Now I have the bibcodes for all the papers that are connected
    # to the anchorArticle

    return connectedPapers, paperGraph
 def island(self, threshold=50):
     g2 = nx.Graph()
     for germ1, germ2, edata in self.geneNet.edges(data=True):
         if edata['weight'] < threshold:
             g2.add_edge(germ1, germ2, edata)
     return g2
    # for (x,y) in G_init.edges(): make_link(G_R,x,y)
    # #print G_R
    # for v in G_R.keys():
    #     clustering_coefficient(G_R,v), " "+v
    #     G_Robustness[v]=clustering_coefficient(G_R,v)
    newCI={}
    for nid in G_CI_value.keys():
        #newCI[nid]=round(float((0.5*G_Balance[nid]+0.5*G_Strength[nid])*G_CI_value[nid])/float(1+G_Robustness[nid]),3)
        #newCI[nid]=round(float(0.5*G_Balance[nid]/float(1+G_c[nid])*G_CI_value[nid]+0.5*G_Strength[nid]*G_CI_value[nid]),3)
        newCI[nid]=round(float(float(1+G_c[nid])*G_CI_value[nid]),3)
        #newCI[nid]=round(float((0.6*G_Strength[nid]+0.4*G_Balance[nid])/float(1+G_c[nid])*G_CI_value[nid]),3)
    return newCI
    #G_NewCI=dict(0.5*G_Balance)
    #list_G_CI_value
G=createGraph(r"C:\Python27\sjxwork\LNewCI\political blogs.csv")
G1 = nx.Graph()
with open(r"C:\Python27\sjxwork\LNewCI\Political blogs.txt") as f:
    n, m = f.readline().split()
    for line in f:
        u, v = map(int, line.split())
        try:
            G1[u][v]['weight'] += 1
        except:
            G1.add_edge(u,v, weight=1)
LCI={}
CI={}
LCIequalto0=[]
K_SORT={}
k=60 ###需要找的影响力节点组的个数
lamida=1
c=0
Exemple #27
0
def cluster_entity_linking(
    corpus,
    entity_model,
    topic_modeling_model,
    topic_decomposition = 2,
    topic_length = 10,
    threshold = 0.3,
    fuzzy_ratio = 70,
    accepted_entities = ['law', 'location', 'organization', 'person', 'event'],
    colors = None,
    max_df = 1.0,
    min_df = 1,
    ngram = (2, 3),
    stemming = True,
    cleaning = simple_textcleaning,
    vectorizer = 'bow',
    stop_words = None,
    figsize = (17, 9),
    **kwargs
):
    """
    plot undirected graph for Entities and topics relationship.

    Parameters
    ----------
    corpus: list or str
    titles: list
        list of titles, length must same with corpus.
    colors: list
        list of colors, length must same with num_clusters.
    threshold: float, (default=0.3)
        threshold to assume similarity for covariance matrix.
    topic_decomposition: int, (default=2)
        size of decomposition.
    topic_length: int, (default=10)
        size of topic models.
    fuzzy_ratio: int, (default=70)
        size of ratio for fuzzywuzzy.
    stemming: bool, (default=True)
        If True, sastrawi_stemmer will apply.
    max_df: float, (default=0.95)
        maximum of a word selected based on document frequency.
    min_df: int, (default=2)
        minimum of a word selected on based on document frequency.
    ngram: tuple, (default=(1,3))
        n-grams size to train a corpus.
    cleaning: function, (default=simple_textcleaning)
        function to clean the corpus.
    stop_words: list, (default=None)
        list of stop words to remove. If None, default is malaya.texts._text_functions.STOPWORDS
    vectorizer: str, (default='bow')
        vectorizer technique. Allowed values:

        * ``'bow'`` - Bag of Word.
        * ``'tfidf'`` - Term frequency inverse Document Frequency.
        * ``'skip-gram'`` - Bag of Word with skipping certain n-grams.

    Returns
    -------
    dictionary: {
        'G': G,
        'pos': pos,
        'node_colors': node_colors,
        'node_labels': node_labels,
    }
    """
    if not isinstance(corpus, list) and not isinstance(corpus, str):
        raise ValueError('corpus must be a list')
    if isinstance(corpus, list):
        if not isinstance(corpus[0], str):
            raise ValueError('corpus must be list of strings')
    if not isinstance(colors, list) and colors is not None:
        raise ValueError('colors must be a list or None')
    if not isinstance(vectorizer, str):
        raise ValueError('vectorizer must be a string')
    if not isinstance(stemming, bool):
        raise ValueError('bool must be a boolean')
    vectorizer = vectorizer.lower()
    if not vectorizer in ['tfidf', 'bow', 'skip-gram']:
        raise ValueError("vectorizer must be in  ['tfidf', 'bow', 'skip-gram']")
    if not isinstance(ngram, tuple):
        raise ValueError('ngram must be a tuple')
    if not len(ngram) == 2:
        raise ValueError('ngram size must equal to 2')
    if not isinstance(min_df, int):
        raise ValueError('min_df must be an integer')
    if not isinstance(topic_decomposition, int):
        raise ValueError('topic_decomposition must be an integer')
    if not isinstance(topic_length, int):
        raise ValueError('topic_length must be an integer')
    if not isinstance(fuzzy_ratio, int):
        raise ValueError('fuzzy_ratio must be an integer')
    if not isinstance(max_df, float):
        raise ValueError('max_df must be a float')
    if min_df < 1:
        raise ValueError('min_df must be bigger than 0')
    if not (max_df <= 1 and max_df > 0):
        raise ValueError(
            'max_df must be bigger than 0, less than or equal to 1'
        )
    if not (fuzzy_ratio > 0 and fuzzy_ratio <= 100):
        raise ValueError(
            'fuzzy_ratio must be bigger than 0, less than or equal to 100'
        )
    if not isinstance(threshold, float):
        raise ValueError('threshold must be a float')
    if not (threshold <= 1 and threshold > 0):
        raise ValueError(
            'threshold must be bigger than 0, less than or equal to 1'
        )
    if stop_words is None:
        stop_words = STOPWORDS

    try:
        import matplotlib.pyplot as plt
        import seaborn as sns
        import networkx as nx
        import networkx.drawing.layout as nxlayout

        sns.set()
    except:
        raise Exception(
            'matplotlib, seaborn, networkx not installed. Please install it and try again.'
        )

    if vectorizer == 'tfidf':
        Vectorizer = TfidfVectorizer
    elif vectorizer == 'bow':
        Vectorizer = CountVectorizer
    elif vectorizer == 'skip-gram':
        Vectorizer = SkipGramVectorizer
    else:
        raise ValueError("vectorizer must be in  ['tfidf', 'bow', 'skip-gram']")

    if isinstance(corpus, str):
        corpus = corpus.replace('\n', '.')
        corpus = split_by_dot(corpus)
    else:
        corpus = [c + '.' for c in corpus]
        corpus = ' '.join(corpus)
        corpus = re.findall('(?=\S)[^.\n]+(?<=\S)', corpus)
    corpus = [string for string in corpus if len(string) > 5]

    if not colors:
        colors = sns.color_palette(n_colors = len(accepted_entities) + 1)
    else:
        if len(colors) != (len(accepted_entities) + 1):
            raise ValueError(
                'len of colors must same as %d' % (len(accepted_entities) + 1)
            )

    topic_model = topic_modeling_model(
        corpus,
        topic_decomposition,
        stemming = stemming,
        vectorizer = vectorizer,
        ngram = ngram,
        max_df = max_df,
        min_df = min_df,
    )
    topics = []
    for no, topic in enumerate(topic_model.comp.components_):
        for i in topic.argsort()[: -topic_length - 1 : -1]:
            topics.append(topic_model.features[i])

    entities_cluster = {entity: [] for entity in accepted_entities}
    for string in corpus:
        entities_clustered = cluster_entities(entity_model.predict(string))
        for entity in accepted_entities:
            entities_cluster[entity].extend(entities_clustered[entity])
    for entity in accepted_entities:
        entities_cluster[entity] = cluster_words(
            list(set(entities_cluster[entity]))
        )

    topics = cluster_words(list(set(topics)))
    color_dict = {topic: colors[-1] for topic in topics}
    for no, entity in enumerate(accepted_entities):
        for e in entities_cluster[entity]:
            topics.append(e)
            color_dict[e] = colors[no]

    topics_corpus = []
    for topic in topics:
        nested_corpus = []
        for string in corpus:
            if (
                topic in string
                or fuzz.token_set_ratio(topic, string) >= fuzzy_ratio
            ):
                nested_corpus.append(string)
        topics_corpus.append(' '.join(nested_corpus))

    tf_vectorizer = Vectorizer(
        ngram_range = ngram,
        min_df = min_df,
        max_df = max_df,
        stop_words = stop_words,
        **kwargs
    )
    if cleaning is not None:
        for i in range(len(topics_corpus)):
            topics_corpus[i] = cleaning(topics_corpus[i])
    if stemming:
        for i in range(len(topics_corpus)):
            topics_corpus[i] = sastrawi(topics_corpus[i])

    tf_vectorizer.fit(topics_corpus)
    DxT = tf_vectorizer.transform(topics_corpus)
    DxD = np.dot(DxT, DxT.T)

    G = nx.Graph()
    for i in range(DxT.shape[0]):
        G.add_node(i, text = topics[i], label = topics[i])

    dense_DxD = DxD.toarray()
    len_dense = len(dense_DxD)
    for i in range(len_dense):
        for j in range(len_dense):
            if j == i:
                continue
            if dense_DxD[i, j] >= threshold:
                weight = dense_DxD[i, j]
                G.add_edge(i, j, weight = weight)
    node_colors, node_labels = [], {}
    for node in G:
        node_colors.append(color_dict[G.node[node]['label']])
        node_labels[node] = G.node[node]['text']
    pos = nxlayout.fruchterman_reingold_layout(
        G, k = 1.5 / np.sqrt(len(G.nodes()))
    )
    f = plt.figure(figsize = figsize)
    ax = f.add_subplot(1, 1, 1)
    for no, entity in enumerate(accepted_entities):
        ax.plot([0], [0], color = colors[no], label = entity)
    ax.plot([0], [0], color = colors[-1], label = 'topics')
    nx.draw(
        G, node_color = node_colors, pos = pos, labels = node_labels, ax = ax
    )
    plt.legend()
    plt.tight_layout()
    plt.show()
    return {
        'G': G,
        'pos': pos,
        'node_colors': node_colors,
        'node_labels': node_labels,
    }
def l_Connection_strength(G):
    l_Connection_strength_Dic={}
    node_set=G.nodes()
    Connection_num=0

    #_l阶连通图的数量
      
    #print nid,i_2_nei   
    for nid in node_set:
       
        degree=G.degree(nid)
        Neighbor_Set=G.neighbors(nid)
        #print nid,Neighbor_Set
        #print len(Neighbor_Set)
   
        
        # i__nei=set(G.neighbors(i))
       
        ###current_1_neighbor=G.neighbors(nid)
        #print nid,current_1_neighbor
        ###current_2_neighbor=[]
        ###for nnid in current_1_neighbor:
            ###current_2_neighbor = list(set(current_2_neighbor).union(set(G.neighbors(nnid))))
        #print '2_hop:', nid,current_2_neighbor
        ###current_2_neighbor= list(  set(current_2_neighbor).difference( set(current_1_neighbor).union(set([nid]))  ) ) 
        #print nid ,current_2_neighbor 
        #print nid,Neighbor_Set
        
        if len(Neighbor_Set)==1:
            Connection_num=1
            #print nid 
            l_Connection_strength_Dic[nid]=1.0
            #print nid,l_Connection_strength_Dic[nid]

        elif len(Neighbor_Set)>1:
            G_conn=nx.Graph()
            #print nid, Neighbor_Set
            ##vi,j组合
            Cluster_head_connection_set=[]
            for i in range(0,len(Neighbor_Set)):
                #vi目标节点的邻居
                vi=Neighbor_Set[i]
                #print nid,Neighbor_Set[i]
                n_vi_2=[]
                ##n_vi 是vi的邻居
                for n_vi in G.neighbors(vi):
                    n_vi_2= list(set(n_vi_2).union(set(G.neighbors(n_vi))))
                n_vi_2=list(set(n_vi_2).difference(set(G.neighbors(vi)).union(set([nid]))))
                for j in range(i+1,len(Neighbor_Set)):
                    vj=Neighbor_Set[j]
                    #print vi,vj
                    fai_ij=list(set(n_vi_2).intersection(set(G.neighbors(vj))))
                    #print vi,vj,fai_ij
                    if fai_ij:
                        Cluster_head_connection_set.append(list([vi,vj]))
                        #
            #print nid,Cluster_head_connection_set
            for k in Cluster_head_connection_set:
                G_conn.add_edge(k[0],k[1])
            H=len(list(nx.connected_components(G_conn)))
            #print nid,H
            G_conn_nodenums=int(nx.number_of_nodes(G_conn))
            ##独立簇的数量
            independent_cluster_num=int(len(Neighbor_Set))-int(G_conn_nodenums )
            ##l-阶的连通数
            Connection_num=int(H)+int(independent_cluster_num)
            l_Connection_strength_Dic[nid]=round(float(Connection_num)/float(len(Neighbor_Set)),3)
            #print nid,l_Connection_strength_Dic[nid]
    return l_Connection_strength_Dic
Exemple #29
0
    prog.rx(2 * beta, range(len(V)))

    prog.swap(input_qubit[1], input_qubit[0])  # number=8
    prog.cx(input_qubit[1], input_qubit[0])  # number=10
    prog.cx(input_qubit[1], input_qubit[0])  # number=11
    # circuit end

    return prog


if __name__ == '__main__':
    n = 4
    V = np.arange(0, n, 1)
    E = [(0, 1, 1.0), (0, 2, 1.0), (1, 2, 1.0), (3, 2, 1.0), (3, 1, 1.0)]

    G = nx.Graph()
    G.add_nodes_from(V)
    G.add_weighted_edges_from(E)

    step_size = 0.1

    a_gamma = np.arange(0, np.pi, step_size)
    a_beta = np.arange(0, np.pi, step_size)
    a_gamma, a_beta = np.meshgrid(a_gamma, a_beta)

    F1 = 3 - (np.sin(2 * a_beta)**2 * np.sin(2 * a_gamma)**2 - 0.5 * np.sin(
        4 * a_beta) * np.sin(4 * a_gamma)) * (1 + np.cos(4 * a_gamma)**2)

    result = np.where(F1 == np.amax(F1))
    a = list(zip(result[0], result[1]))[0]
Exemple #30
0
def find_max_cut(graph, positions):
    # Move G to LCF notation.
    G = nx.convert_matrix.from_scipy_sparse_matrix(graph)
    pos = {i: positions[i] for i in range(len(positions))}

    # Generate edge capacities.
    c = {}
    for e in sorted(G.edges(data=True)):
        capacity = 1
        e[2]['capacity'] = capacity
        c[(e[0], e[1])] = capacity
        c[(e[1], e[0])] = capacity

    # Convert the capacities to a PICOS expression.
    cc = pic.new_param('c', c)

    # Set source and sink nodes for flow computation.
    s = 16
    t = 10

    # Set node colors.
    N = len(positions)
    node_colors = ['lightgrey'] * N
    node_colors[s] = 'lightgreen'  # Source is green.
    node_colors[t] = 'lightblue'  # Sink is blue.

    # Define a plotting helper that closes the old and opens a new figure.
    def new_figure():
        try:
            global fig
            pylab.close(fig)
        except NameError:
            pass
        fig = pylab.figure(figsize=(11, 8))
        fig.gca().axes.get_xaxis().set_ticks([])
        fig.gca().axes.get_yaxis().set_ticks([])

    # Plot the graph with the edge capacities.
    new_figure()
    nx.draw_networkx(G, pos, node_color=node_colors)
    labels = {
        e: '{} | {}'.format(c[(e[0], e[1])], c[(e[1], e[0])])
        for e in G.edges if e[0] < e[1]
    }
    nx.draw_networkx_edge_labels(G, pos, edge_labels=labels)
    pylab.show()

    # Make G undirected.
    G = nx.Graph(G)

    # Allocate weights to the edges.
    for (i, j) in G.edges():
        G[i][j]['weight'] = 1

    maxcut = pic.Problem()

    # Add the symmetric matrix variable.
    X = maxcut.add_variable('X', (N, N), 'symmetric')

    # Retrieve the Laplacian of the graph.
    LL = 1 / 4. * nx.laplacian_matrix(G).todense()
    L = pic.new_param('L', LL)

    # Constrain X to have ones on the diagonal.
    maxcut.add_constraint(pic.diag_vect(X) == 1)

    # Constrain X to be positive semidefinite.
    maxcut.add_constraint(X >> 0)

    # Set the objective.
    maxcut.set_objective('max', L | X)

    # print(maxcut)

    # Solve the problem.
    maxcut.solve(solver='cvxopt')

    # print('bound from the SDP relaxation: {0}'.format(maxcut.obj_value()))

    # Use a fixed RNG seed so the result is reproducable.
    cvx.setseed(1)

    # Perform a Cholesky factorization.
    V = X.value
    cvxopt.lapack.potrf(V)
    for i in range(N):
        for j in range(i + 1, N):
            V[i, j] = 0

    # Do up to 100 projections. Stop if we are within a factor 0.878 of the SDP
    # optimal value.
    count = 0
    obj_sdp = maxcut.obj_value()
    obj = 0
    while (count < 100 or obj < 0.878 * obj_sdp):
        r = cvx.normal(20, 1)
        x = cvx.matrix(np.sign(V * r))
        o = (x.T * L * x).value
        if o > obj:
            x_cut = x
            obj = o
        count += 1
    x = x_cut

    # Extract the cut and the seperated node sets.
    S1 = [n for n in range(N) if x[n] < 0]
    S2 = [n for n in range(N) if x[n] > 0]
    cut = [(i, j) for (i, j) in G.edges() if x[i] * x[j] < 0]
    leave = [e for e in G.edges if e not in cut]

    # Close the old figure and open a new one.
    new_figure()

    # Assign colors based on set membership.
    node_colors = [('lightgreen' if n in S1 else 'lightblue')
                   for n in range(N)]

    # Draw the nodes and the edges that are not in the cut.
    nx.draw_networkx(G, pos, node_color=node_colors, edgelist=leave)
    labels = {e: '{}'.format(G[e[0]][e[1]]['weight']) for e in leave}
    nx.draw_networkx_edge_labels(G, pos, edge_labels=labels)

    # Draw the edges that are in the cut.
    nx.draw_networkx_edges(G, pos, edgelist=cut, edge_color='r')
    labels = {e: '{}'.format(G[e[0]][e[1]]['weight']) for e in cut}
    nx.draw_networkx_edge_labels(G, pos, edge_labels=labels, font_color='r')

    # Show the relaxation optimum value and the cut capacity.
    rval = maxcut.obj_value()
    sval = sum(G[e[0]][e[1]]['weight'] for e in cut)
    fig.suptitle(
        'SDP relaxation value: {0:.1f}\nCut value: {1:.1f} = {2:.3f}×{0:.1f}'.
        format(rval, sval, sval / rval),
        fontsize=16,
        y=0.97)

    # Show the figure.
    pylab.show()

    return S1, S2