Exemple #1
0
def SimulateSbm(sbm_data,
                num_vertices,
                num_edges,
                pi,
                prop_mat,
                out_degs=None):
    """Generates a stochastic block model, storing data in sbm_data.graph.

  This function uses graph_tool.generate_sbm. Refer to that
  documentation for more information on the model and parameters.

  Args:
    sbm_data: StochasticBlockModel dataclass to store result data.
    num_vertices: (int) number of nodes in the graph.
    num_edges: (int) expected number of edges in the graph.
    pi: interable of non-zero community size proportions. Must sum to 1.0.
    prop_mat: square, symmetric matrix of community edge count rates.
    out_degs: Out-degree propensity for each node. If not provided, a constant
      value will be used. Note that the values will be normalized inside each
      group, if they are not already so.
  Returns: (none)
  """
    if np.sum(pi) != 1.0:
        raise ValueError("entries of pi must sum to 1.0")
    if prop_mat.shape[0] != len(pi) or prop_mat.shape[1] != len(pi):
        raise ValueError("prop_mat must be k x k where k = len(pi)")
    sbm_data.graph_memberships = _GenerateNodeMemberships(num_vertices, pi)
    edge_counts = _ComputeExpectedEdgeCounts(num_edges, num_vertices, pi,
                                             prop_mat)
    sbm_data.graph = generation.generate_sbm(sbm_data.graph_memberships,
                                             edge_counts, out_degs)
    stats.remove_self_loops(sbm_data.graph)
    stats.remove_parallel_edges(sbm_data.graph)
    sbm_data.graph.reindex_edges()
Exemple #2
0
    def command_draw(self):
        """
		Draws the current resulting paths as a directed graph.

		Usage:
		draw
		"""
        if self.last_res is None:
            return "No saved paths. Do a search before you try to draw them."

        edge_list = [[
            t.replace('_', ' ')
            for t in self.db.get_titles_of_ids((val, path[ind + 1]))
        ] for path in self.last_res for ind, val in enumerate(path[:-1])]

        graph = gt.Graph()
        strings = graph.add_edge_list(edge_list, string_vals=True, hashed=True)

        stats.remove_parallel_edges(graph)

        fill_color = graph.new_vertex_property('vector<float>',
                                               val=[0, 0, 0.640625, 0.9])
        fill_color[graph.vertex(0)] = [0, 0.640625, 0, 0.9]
        fill_color[graph.vertex(len(self.last_res[0]) -
                                1)] = [0.640625, 0, 0, 0.9]

        draw.interactive_window(
            graph,
            vertex_fill_color=fill_color,
            vertex_text=strings,
            vertex_text_position=graph.new_vertex_property('float', val=0),
            vertex_anchor=graph.new_vertex_property('int', val=0),
            geometry=(1600, 1200),
            vertex_font_size=graph.new_vertex_property('float', val=20))
Exemple #3
0
def load_GT_graph(graphExample, gcc=False, removeSL=False, removePE=False):
    '''
    Input:  - graphExample,  graph example from Graph-tool collections  (e.g.,  'cond-mat-2003', 'adjnoun' 'karate' 'netscience') or a graphfile in .gml format
            - gcc = True if only the giant connected component should be returned
            - removeSL = True if any self-loops must be removed 
            - removePE = True if any parallel-edge must be removed
    Output: the corresponding graph_tool graph object
    '''

    if graphExample[-4:] == ".gml":
        g = load_graph(graphExample)
    else:
        g = collection.data[graphExample]

    if g.is_directed:
        g.set_directed(False)


#         g = Graph(g, directed=False)

    if removePE:
        gtStats.remove_parallel_edges(g)
    if removeSL:
        gtStats.remove_self_loops(g)
    if gcc:
        l = topology.label_largest_component(
            g)  #Keep Largest Connected Component
        g.set_vertex_filter(l)
        #g = GraphView(g, vfilt=l)
    g.purge_vertices()

    return g
def generuj(n, rand_edges, num_colors):
    if rand_edges < 3: rand_edges = 3
    g = random_graph(n, (lambda: (randint(0, rand_edges), randint(1, rand_edges))), parallel_edges=False)
    g.set_directed(False)
    remove_parallel_edges(g)

    g.vertex_properties['liczba_kolorow'] = g.new_vertex_property('int')
    for v in g.vertices():
        g.vertex_properties['liczba_kolorow'][v] = randint(1, num_colors + 1)

    return g
Exemple #5
0
def generuj(n, rand_edges, num_colors):
    if rand_edges < 3: rand_edges = 3
    g = random_graph(n, (lambda:
                         (randint(0, rand_edges), randint(1, rand_edges))),
                     parallel_edges=False)
    g.set_directed(False)
    remove_parallel_edges(g)

    g.vertex_properties['liczba_kolorow'] = g.new_vertex_property('int')
    for v in g.vertices():
        g.vertex_properties['liczba_kolorow'][v] = randint(1, num_colors + 1)

    return g
Exemple #6
0
 def get(self, args):
   from depth_first_searcher import dfs_search_with_limit
   root = int(args["root"])
   limit = int(args["limit"])
   vertices = dfs_search_with_limit(graph, graph.vertex(root), limit)
   v_filter = graph.new_vertex_property('bool')
   for v in vertices:
     v_filter[v] = True
   subgraph = GraphView(graph, v_filter)
   from graph_tool.stats import remove_parallel_edges
   remove_parallel_edges(subgraph)
   subgraph = self.set_properties(subgraph)
   from graph_json_builder import create_json_graph
   return create_json_graph(subgraph)
def load_graph(infile):
    inmatrix = np.loadtxt(infile, dtype=np.dtype('uint32'), delimiter=" ")
    numv = np.amax(inmatrix[:,0:2])

    #print numv, inmatrix[:,0:2]

    g = Graph(directed=False)
    edge_weights = g.new_edge_property("double")
    g.edge_properties["weights"] = edge_weights
    vlist = list(g.add_vertex(numv))

    for i in inmatrix:
        edge = g.add_edge(vlist[i[0]-1], vlist[i[1]-1]) # need to convert from 1-based index in file to 0-based
        edge_weights[edge] = i[2]

    remove_parallel_edges(g)
    return g
Exemple #8
0
def make_simple_graph(g, undirected=True, gcc=True):
    '''
    Returns input graph -g- in a version without parallel edges or self-loops.
    If undirected = True, returned graph is also undirected.
    If gcc        = True, returned graph is giant connected component of g.
    '''

    if undirected and g.is_directed:
        g.set_directed(False)

    gtStats.remove_self_loops(g)
    gtStats.remove_parallel_edges(g)

    if gcc:
        l = topology.label_largest_component(
            g)  # Keep Largest Connected Component.
        print "Nodes in largest connected component: " + str(np.sum(l.a))
        g.set_vertex_filter(l)
    g.purge_vertices()
    return g
Exemple #9
0
def loadGraphWithAnnotations(graphFile):
    '''
    Used to read the graphs provides by RoleSim people, regarding scientific collaborations
    and the g/h index.
    '''
    g = Graph(directed=False)
    with open(graphFile, "r") as inF:
        num_nodes = int(inF.readline().split()[1])
        g.add_vertex(num_nodes)
        g_names = g.new_vertex_property("string")
        g_H_ind = g.new_vertex_property("int")
        g_G_ind = g.new_vertex_property("int")

        for i, line in enumerate(inF):  # Read Meta-Data of Nodes
            if rstrip(line) == "*Edges": break
            contents = rstrip(line).split("\t")
            gID, name, gIndex, hIndex = contents[0], contents[1], int(
                contents[2]), int(contents[3])
            assert (gID == str(i))
            #             print gID, name, gIndex, hIndex
            g_names[g.vertex(i)] = name
            g_H_ind.a[i] = gIndex
            g_G_ind.a[i] = hIndex

        for i, line in enumerate(inF):  # Read Edges
            tokens = line.split()
            fromE, toE = int(tokens[0]), int(tokens[1])
            g.add_edge(fromE, toE)

        g.vp["names"] = g_names
        g.vp["h-Index"] = g_H_ind
        g.vp["g-Index"] = g_G_ind

    gtStats.remove_parallel_edges(g)
    gtStats.remove_self_loops(g)
    l = topology.label_largest_component(g)  #Keep Largest Connected Component
    g.set_vertex_filter(l)
    g.purge_vertices()

    return g
Exemple #10
0
def f_centralization(D, stats, options={'features': []}):
    """"""

    if not 'centralization' in options['features']:
        return

    D_copied = D.copy()
    D = None

    remove_parallel_edges(D_copied)

    degree_list = D_copied.degree_property_map('total').a
    max_degree = degree_list.max()

    stats['centralization_degree'] = float(
        (max_degree - degree_list).sum()) / ((degree_list.size - 1) *
                                             (degree_list.size - 2))

    # stats['centralization_in_degree'] = (v_max_in[0]-(D.get_in_degrees( D.get_vertices() ))).sum() / ( ( num_vertices-1 )*(num_vertices-2))
    # stats['centralization_out_degree'] = (v_max_out[0]-(D.get_out_degrees( D.get_vertices() ))).sum() / ( ( num_vertices-1 )*(num_vertices-2))

    log.debug('done centrality measures')
Exemple #11
0
    def __init__(self,
                 nodes=0,
                 copy_graph=None,
                 weighted=True,
                 directed=True,
                 **kwargs):
        '''
        @todo: document that
        see :class:`gt.Graph`'s constructor '''
        self._nattr = _GtNProperty(self)
        self._eattr = _GtEProperty(self)

        self._edges_deleted = False

        g = copy_graph.graph if copy_graph is not None else None

        if g is not None:
            from graph_tool import Graph as GtGraph
            from graph_tool.stats import remove_parallel_edges

            num_edges = copy_graph.edge_nb()

            if copy_graph._edges_deleted:
                # set edge filter for non-deleted edges
                eprop = g.new_edge_property("bool",
                                            vals=np.ones(num_edges,
                                                         dtype=bool))

                g.set_edge_filter(eprop)
                g = GtGraph(g, directed=g.is_directed(), prune=True)

            if not directed and g.is_directed():
                g = g.copy()
                g.set_directed(False)
                remove_parallel_edges(g)
            elif directed and not g.is_directed():
                g = g.copy()
                g.set_directed(True)

            self._from_library_graph(g, copy=True)

            # make edge id property map
            if "eid" in g.edge_properties:
                g.edge_properties["eid"].a = list(range(num_edges))
            else:
                eids = self._graph.new_edge_property("int",
                                                     vals=list(
                                                         range(self._max_eid)))

                g.edge_properties["eid"] = eids

            self._max_eid = num_edges
        else:
            self._graph = nngt._config["graph"](directed=directed)

            if nodes:
                self._graph.add_vertex(nodes)

            # make edge id property map
            self._max_eid = 0

            eids = self._graph.new_edge_property("int")

            self._graph.edge_properties["eid"] = eids
def motif_operation(mid, cnt_mid):
    #
    ''' The steps of computing the exposure nodes are as follows:
        1. For each pair of intervals of a cascde [I-1, I] for I \in [1, I_C], form the cascade + historical network N_I.
        2. Compute the network motifs of size 3 exhibited in N_I.
        3. For each node $u$ activated in interval I, see whether $u$ was a part of any of
           the instances belonging to the 6 motif patterns mentioned in the paper such that $u$ had participated in the
           those instances after the other two nodes, one of which must be the source
        4. Add the third node in those selected instances barring the parent and $u$ (which together form the 3-motif instance)
           in the exposure set of $u$ for the cascade C.
        5. This step imposes the AND gating constraint to remove nodes that violate the boolean TRUE threshold constraint.
    '''
    # 1.
    cascade_set = dataDf[dataDf['mid'] == mid]

    print("Cascade : ", cnt_mid, " and reshare size: ", len(cascade_set))

    numIntervals = np.max(np.array(list(cascade_set['interval_2:'])))
    last_time = 0
    inf_nodes = {
    }  # stores the influential nodes apart from parent for each node retweet
    inf_edges = {
    }  # stores the influential edges between nodes apart from parent for each node retweet
    motif_graph_patterns = [[] for _ in range(500)]
    for int_idx in range(1, numIntervals):
        # print("Interval: ", int_idx)
        ''' Operation 1. '''
        cascade_intervalDf_prev = cascade_set[cascade_set['interval_1'] ==
                                              int_idx - 1]
        cascade_intervalDf_curr = cascade_set[cascade_set['interval_1'] ==
                                              int_idx]
        cascade_intervalDf = pd.concat(
            [cascade_intervalDf_prev, cascade_intervalDf_curr])

        cascade_Df = cascade_intervalDf[cascade_intervalDf['edge_type'] ==
                                        'cascade']
        historical_Df = cascade_intervalDf[cascade_intervalDf['edge_type'] ==
                                           'historical']

        # Create the vertex time dictionary
        vertex_rtTime_dict = {}
        for i, r in cascade_intervalDf.iterrows():
            if r['edge_type'] == 'historical':
                continue

            src = r['source']
            tgt = r['target']
            vertex_rtTime_dict[tgt] = r['retweet_time']
            if src not in vertex_rtTime_dict:
                vertex_rtTime_dict[src] = last_time

            last_time = r['retweet_time']

        # Store the cascade edges
        edges_cascade = []
        edges_historical = []
        for i, r in cascade_intervalDf.iterrows():
            src = r['source']
            tgt = r['target']

            if r['edge_type'] == 'cascade':
                edges_cascade.append((src, tgt))
            else:
                edges_historical.append((src, tgt))
        ''' Operation 2. '''
        cascade_graph = gt.Graph(directed=True)
        node_cascades_diff_map = {}
        cnt_nodes = 0
        cascade_vertices = cascade_graph.new_vertex_property("string")
        cascade_edge_prop = cascade_graph.new_edge_property("int")
        cascade_map_write_file = {}

        # Add the cascade edges
        # 0 - Cascade edges
        # 1 - Diffusion edges

        for i, r in cascade_Df.iterrows():
            src = r['source']
            tgt = r['target']
            if src not in node_cascades_diff_map:
                node_cascades_diff_map[
                    src] = cnt_nodes  # map from user ID to graphnode ID
                v1 = cascade_graph.add_vertex()
                cascade_vertices[v1] = src  # map from graphnode ID to user ID
                cascade_map_write_file[cnt_nodes] = src
                cnt_nodes += 1
            else:
                v1 = cascade_graph.vertex(node_cascades_diff_map[src])

            if tgt not in node_cascades_diff_map:
                node_cascades_diff_map[
                    tgt] = cnt_nodes  # map from user ID to graphnode ID
                v2 = cascade_graph.add_vertex()
                cascade_vertices[v2] = tgt  # map from graphnode ID to user ID
                cascade_map_write_file[cnt_nodes] = tgt
                cnt_nodes += 1
            else:
                v2 = cascade_graph.vertex(node_cascades_diff_map[tgt])

            if cascade_graph.edge(v1, v2):
                continue
            else:
                e = cascade_graph.add_edge(v1, v2)
                cascade_edge_prop[e] = 0

        gts.remove_parallel_edges(cascade_graph)

        # Add the historical diffusion edges (even if there already exists a cascade edge, but only once)
        edges_seen = []
        for i, r in historical_Df.iterrows():
            src = r['source']
            tgt = r['target']
            v1 = node_cascades_diff_map[src]
            v2 = node_cascades_diff_map[tgt]

            if (v1, v2) in edges_seen:
                continue

            edges_seen.append((v1, v2))
            e = cascade_graph.add_edge(v1, v2)
            cascade_edge_prop[e] = 1

        gts.remove_self_loops(cascade_graph)
        # gts.remove_parallel_edges(cascade_graph)
        '''' Operation 3. '''
        # FINDING THE MOTIFS IN THE CASCADE GRAPH + DIFFUSION NETWORK - SIZE 3
        motifs_graph, motifs_count, vertex_maps = \
            gt.clustering.motifs(cascade_graph, 3, return_maps=True)

        # Store the motif patterns interval wise for retrieval later
        for idx_pat in range(len(motifs_graph)):
            motif_graph_patterns[int_idx - 1].append(motifs_graph[idx_pat])
        '''' Operation 4. '''
        # Find the influential nodes for each node in the retweet cascade for the current interval only - NOT the previous interval
        for i, r in cascade_intervalDf_curr.iterrows():
            src = r['source']
            tgt = r['target']
            if tgt in inf_nodes:
                continue

            # extract the graph node IDs
            src_vert = node_cascades_diff_map[src]
            tgt_vert = node_cascades_diff_map[tgt]
            # extract the vertex timestamps
            src_rtTime = vertex_rtTime_dict[src]
            tgt_rtTime = vertex_rtTime_dict[tgt]

            # only consider the cascade retweets for (src, tgt) pair
            edge_type = r['edge_type']
            if edge_type == 'historical':
                continue

            # find the motifs of particular patterns attached to that pair of src and tgt
            # Patterns - [M4, M7, M16, M17, M23, M25]

            # Patterns handcoded - can be automated into lists or arrays for more efficiency
            graph_pat_act_M4 = motif_patterns_dict['M4']
            graph_pat_act_M7 = motif_patterns_dict['M7']
            graph_pat_act_M16 = motif_patterns_dict['M16']
            graph_pat_act_M23 = motif_patterns_dict['M23']
            graph_pat_act_M25 = motif_patterns_dict['M25']
            graph_pat_act_M31 = motif_patterns_dict['M31']

            # Extract the motif instances belonging to this pattern
            for idx_map in range(len(motifs_graph)):
                graph_pat_curr = motifs_graph[idx_map]
                # check if the instance belongs to any of these patterns
                if (not gtt.isomorphism(graph_pat_act_M4, graph_pat_curr) ) and (not gtt.isomorphism(graph_pat_act_M7, graph_pat_curr) ) \
                    and (not gtt.isomorphism(graph_pat_act_M16, graph_pat_curr) ) and (not gtt.isomorphism(graph_pat_act_M23, graph_pat_curr) ) \
                        and (not gtt.isomorphism(graph_pat_act_M25, graph_pat_curr)) and (not gtt.isomorphism(graph_pat_act_M31, graph_pat_curr) ):
                    continue

                # for M in motif_patterns_dict:
                #     if gtt.isomorphism(motif_patterns_dict[M], graph_pat_curr):
                #         print(M, motifs_count[idx_map])

                # return

                # 1st constraint: Traverse through all the motif instances of this pattern that only contain the (src, tgt) cascade edge
                vMaps = vertex_maps[idx_map]
                # print(len(vMaps))
                cnt_maps = 0
                for vertices in vMaps:
                    # print('hello....')
                    # Cond. 1: the source and target should be in the motif instance
                    vertex_list = list(vertices.a)
                    if src_vert not in vertex_list or tgt_vert not in vertex_list:
                        continue
                    # print('hello1')

                    # Find the non-source and non-target vertex
                    for v in vertex_list:
                        if v != src_vert and v != tgt_vert:
                            third_vertex = cascade_vertices[
                                v]  # this is the potential non-parent exposure node to target node
                            break

                    # print('hello2')
                    # Cond. 2: the target vertex should have retweeted last among all the motif vertices
                    third_rtTime = vertex_rtTime_dict[third_vertex]
                    max_time = max([tgt_rtTime, src_rtTime, third_rtTime])
                    if max_time != tgt_rtTime:
                        continue

                    # For different motif patterns, need to check different types of motif edges - this is difficult !!

                    # print(tgt, third_vertex)
                    if tgt not in inf_nodes:
                        inf_nodes[tgt] = []
                        inf_edges[tgt] = []

                    inf_nodes[tgt].append(third_vertex)
                    inf_nodes[tgt] = list(set(inf_nodes[tgt]))
                    # if (third_vertex, tgt) in edges_cas_curr:
                    #     inf_edges[tgt].append((third_vertex, tgt, 'cascade'))
                    # elif (third_vertex, tgt) in edges_hist_curr:
                    #     inf_edges[tgt].append((third_vertex, tgt, 'historical'))
        ''' Operation 5. '''
        thresh = 5 * 0.00001
        for node in inf_nodes:
            time_dict = {}
            exposure_nodes = inf_nodes[node]
            for e in exposure_nodes:
                time_dict[e] = vertex_rtTime_dict[e]

            sorted_dict = sorted(time_dict.items())

            product = np.prod(np.array(list(sorted_dict.values())))
            val = list(sorted_dict.values())
            while product < thresh:
                val = val[1:]

            inf_nodes[node] = np.find(sorted_dict, val)

    #  Create a dataframe with the rows as original
    cascade_Df = cascade_set[cascade_set['edge_type'] == 'cascade']
    inf_nodes_df = []
    count_row = 0

    for idx, row in cascade_Df.iterrows():
        tgt = row['target']

        if tgt not in inf_nodes:
            inf_nodes_df.append([])
        else:
            inf_nodes_df.append(inf_nodes[tgt])

        count_row += 1

    cascade_Df['exposureNodes'] = inf_nodes_df

    return cascade_Df