Ejemplo n.º 1
0
def simulate_cascade(g, p, source=None, return_tree=False):
    """
    graph_tool version of simulating cascade
    return np.ndarray on vertices as the infection time in cascade
    uninfected node has dist -1
    """
    gv = sample_graph_by_p(g, p)

    if source is None:
        # consider the largest cc
        infected_nodes = np.nonzero(label_largest_component(gv).a)[0]
        source = np.random.choice(infected_nodes)

    times = get_infection_time(gv, source)

    if return_tree:
        # get the tree edges
        _, pred_map = shortest_distance(gv, source=source, pred_map=True)
        edges = [(pred_map[i], i) for i in infected_nodes if i != source]

        # create tree
        tree = Graph(directed=True)
        tree.add_vertex(g.num_vertices())
        for u, v in edges:
            tree.add_edge(int(u), int(v))
            vfilt = tree.new_vertex_property('bool')
            vfilt.a = False
        for v in set(itertools.chain(*edges)):
            vfilt[v] = True
        tree.set_vertex_filter(vfilt)

    if return_tree:
        return source, times, tree
    else:
        return source, times
Ejemplo n.º 2
0
    def __init__(self, graphml=None, graph=None):
        if graphml != None:
            self.g = gt.load_graph(graphml)
            self.g.set_directed(False)
            print("Create graph from graphml {}".format(graphml))

        elif graph != None:
            self.g = graph
            giant = gt.label_largest_component(self.g)
            origin_size = self.g.num_vertices()
            for v in range(1, origin_size + 1):
                if giant[origin_size - v] == False:
                    self.g.remove_vertex(origin_size - v, fast=True)
            self.g.set_directed(False)
            print("Create graph from graph.")

        else:
            print("No graphml or graph are provided!")

        print("Number of vertices: {}\nNumber of edges: {}"\
              .format(self.g.num_vertices(), self.g.num_edges()))
        print("\n-----------------------------------------")

        self.v_residents = self.g.new_vertex_property("object")
        self.e_weights = self.g.new_edge_property("float")
        self.e_filter = self.g.new_edge_property("bool")
        self.walkers = [{} for v in self.g.vertices()]

        self.boost_size = 100
        self.random_boost = [[100, [0 for i in range(self.boost_size)]]
                             for v in self.g.vertices()]
        self.exetime = [0., 0., 0., 0.]
Ejemplo n.º 3
0
    def __init__(self, graphml=None, graph=None):
        if graphml != None:
            self.g = gt.load_graph(graphml)
            #self.v_name = self.g.new_edge_property("str")
            print(self.g.list_properties())

            print("Create graph from graphml {}".format(graphml))
        elif graph != None:
            self.g = graph
            giant = gt.label_largest_component(self.g)
            origin_size = self.g.num_vertices()
            for v in range(1, origin_size + 1):
                if giant[origin_size - v] == False:
                    self.g.remove_vertex(origin_size - v, fast=True)
            self.g.set_directed(False)
            print("Create graph from graph.")
        else:
            print("No graphml or graph are provided!")

        print("Number of vertices: {}\nNumber of edges: {}"\
              .format(self.g.num_vertices(), self.g.num_edges()))
        print("\n-----------------------------------------")

        self.v_infected = self.g.new_vertex_property("bool")
        self.v_reinfected = self.g.new_vertex_property("int")
        self.e_reinfected = self.g.new_edge_property("int")
        self.e_spread_beta = self.g.new_edge_property("double")
        self.e_extinct_beta = self.g.new_edge_property("double")
Ejemplo n.º 4
0
def main():
    input_fasta = sys.argv[3]
    K = int(sys.argv[1])
    x = float(sys.argv[2])

    ht = khmer.Nodegraph(K, x, 4)

    sparse_graph = gt.Graph()
    hashes = sparse_graph.new_vertex_property("long long")

    for n, record in enumerate(screed.open(input_fasta)):
        if n % 1000 == 0:
            print('...loaded and tagged {} sequences'.format(n),
                  file=sys.stderr)
        name = record.name
        sequence = record.sequence

        ht.consume_sequence_and_tag_with_labels(sequence, n)
        tags = ht.sweep_tag_neighborhood(sequence, 0)
        for i in range(len(tags) - 1):
            src = tags[i]
            dst = tags[i + 1]

            new = False

            srcv = gt.find_vertex(sparse_graph, hashes, src)
            if not srcv:
                srcv = sparse_graph.add_vertex()
                hashes[srcv] = src
                new = True
            else:
                srcv = srcv[0]

            dstv = gt.find_vertex(sparse_graph, hashes, dst)
            if not dstv:
                dstv = sparse_graph.add_vertex()
                hashes[dstv] = dst
                new = True
            else:
                dstv = dstv[0]

            if new:
                e = sparse_graph.add_edge(srcv, dstv)

    print('Sparse graph has {} nodes, {} edges'.format(
        sparse_graph.num_vertices(), sparse_graph.num_edges()))
    comp = gt.label_largest_component(sparse_graph, directed=False)
    #pos = gt.radial_tree_layout(sparse_graph, sparse_graph.vertex(0))
    gt.graph_draw(sparse_graph,
                  output_size=(5000, 5000),
                  output=input_fasta + '_sparse.png')
    sparse_graph.set_vertex_filter(comp)
    gt.graph_draw(sparse_graph,
                  output_size=(5000, 5000),
                  output=input_fasta + '_sparse_comp.png')
Ejemplo n.º 5
0
def main():
    input_fasta = sys.argv[3]
    K = int(sys.argv[1])
    x = float(sys.argv[2])
    
    ht = khmer.new_hashbits(K, x, 4)

    sparse_graph = gt.Graph()
    hashes = sparse_graph.new_vertex_property("long long")


    for n, record in enumerate(screed.open(input_fasta)):
        if n % 1000 == 0:
            print >>sys.stderr, '...loaded and tagged {} sequences'.format(n)
        name = record.name
        sequence = record.sequence

        ht.consume_sequence_and_tag_with_labels(sequence, n)
        tags = ht.sweep_tag_neighborhood(sequence, 0)
        for i in xrange(len(tags) - 1):
            src = tags[i]
            dst = tags[i + 1]

            new = False

            srcv = gt.find_vertex(sparse_graph, hashes, src)
            if not srcv:
                srcv = sparse_graph.add_vertex()
                hashes[srcv] = src
                new = True
            else:
                srcv = srcv[0]

            dstv = gt.find_vertex(sparse_graph, hashes, dst)
            if not dstv:
                dstv = sparse_graph.add_vertex()
                hashes[dstv] = dst
                new = True
            else:
                dstv = dstv[0]

            if new:
                e = sparse_graph.add_edge(srcv, dstv)

    print 'Sparse graph has {} nodes, {} edges'.format(sparse_graph.num_vertices(), sparse_graph.num_edges())
    comp = gt.label_largest_component(sparse_graph, directed=False)
    #pos = gt.radial_tree_layout(sparse_graph, sparse_graph.vertex(0))
    gt.graph_draw(sparse_graph, output_size=(
        5000, 5000), output=input_fasta + '_sparse.png')
    sparse_graph.set_vertex_filter(comp)
    gt.graph_draw(sparse_graph, output_size=(
        5000, 5000), output=input_fasta + '_sparse_comp.png')
Ejemplo n.º 6
0
def main():
    # The description about the data is available at
    # <https://graph-tool.skewed.de/static/doc/collection.html>
    for name in ['karate', 'lesmis', 'football', 'dolphins', 'netscience']:
        g = gt.collection.data[name]
        g = gt.GraphView(g, directed=False)

        if name == 'netscience':
            # Use only the largest component in the netscience data
            l = gt.label_largest_component(g)
            g = gt.Graph(gt.GraphView(g, vfilt=l), prune=True)

        process(name, g)
Ejemplo n.º 7
0
def load_graph(path, algorithms, format='graphml', component=False):
    sys.stdout.write('Loading network ...')
    sys.stdout.flush()
    t0 = time.time()
    g = gt.load_graph(path, fmt=format)
    if 'kores' in algorithms:
        gt.remove_parallel_edges(g)
    gt.remove_self_loops(g)
    if component:
        largest_component = gt.label_largest_component(g, directed=False)
        g.set_vertex_filter(largest_component)
        g.purge_vertices()
    t = time.time()
    sys.stdout.write('Ok! ({0} s.)\n'.format(t - t0))

    return g
Ejemplo n.º 8
0
def save_largest_component():
    global Graph

    l = gt.label_largest_component(Graph)

    print l.a
    remove = []
    for x in xrange(len(l.a)):
        if l.a[x] == 0:
            remove.append(x)

    Graph.remove_vertex(remove)
    #u = gt.GraphView(Graph, vfilt=l)

    gt.remove_parallel_edges(Graph)

    Graph.save(base_path + graph_tool_file)
Ejemplo n.º 9
0
def save_largest_component():
    global Graph

    l = gt.label_largest_component(Graph)

    print l.a
    remove = []
    for x in xrange(len(l.a)):
        if l.a[x] == 0:
            remove.append(x)

    Graph.remove_vertex(remove)
    #u = gt.GraphView(Graph, vfilt=l)

    gt.remove_parallel_edges(Graph)

    Graph.save(base_path + graph_tool_file)
Ejemplo n.º 10
0
def paint_kcore(path, graph, name):
    if path:
        sys.stdout.write('Drawing kcore graph ... ')
        sys.stdout.flush()
        network = gt.Graph(graph, directed=False)
        folder = os.path.abspath(path)

        network = gt.GraphView(network,
                               vfilt=gt.label_largest_component(network))
        kcore = gt.kcore_decomposition(network)

        pos = gt.sfdp_layout(network)
        gt.graph_draw(network,
                      pos=pos,
                      vertex_fill_color=kcore,
                      vertex_text=kcore,
                      output=os.path.join(folder,
                                          str(name) + '-graph-kcore.svg'))
        sys.stdout.write('Ok!\n')
        sys.stdout.flush()
Ejemplo n.º 11
0
    def largest_strongly_connected_component(self, graph):
        from graph_tool import Graph
        import graph_tool.all as gt

        largest_connected_component = Graph(directed=True)
        if not self.is_relationship:
            edge_prop_time = largest_connected_component.new_edge_property(
                "int")
            edge_prop_type = largest_connected_component.new_edge_property(
                "string")

        for edge in tqdm(graph.edges(data=True)):
            e = tuple(edge[:2])
            largest_connected_component.add_edge(e[0], e[1])
            if not self.is_relationship:
                edge_prop_time[e] = edge[-1]["time"]
                edge_prop_type[e] = edge[-1]["type"]

        largest_connected_component_view = gt.label_largest_component(
            largest_connected_component)
        largest_connected_component = gt.GraphView(
            largest_connected_component,
            vfilt=largest_connected_component_view)

        print(
            "Total nodes {0} in largest strongly connected component.".format(
                largest_connected_component.num_vertices()))
        print(
            "Total edges {0} in largest strongly connected component.".format(
                largest_connected_component.num_edges()))

        with open(self.output, "w+") as output_file:
            for edge in tqdm(largest_connected_component.edges()):
                if not self.is_relationship:
                    output_file.write("{0} {1} {2} {3}\n".format(
                        edge.source(), edge.target(), edge_prop_time[edge],
                        edge_prop_type[edge]))
                else:
                    output_file.write("{0} {1}\n".format(
                        edge.source(), edge.target()))
Ejemplo n.º 12
0
def kcore_growing_daily_rewiring(fn,
                                 ofn=None,
                                 freq='D',
                                 model='constrained-configuration'):
    """The growing of kcore by rewiring daily."""
    if ofn is None:
        ofn = 'kcore.growing.daily-rewiring.{}.csv'.format(model)
    # load only necessary columns
    df = pd.read_csv(fn, parse_dates=['tweet_created_at'], usecols=[2, 3, 4])
    df = df.set_index('tweet_created_at')
    # remove self-loop
    df = df.loc[df.from_raw_id != df.to_raw_id]
    df['row_id'] = np.arange(len(df))
    df['gpf'] = False
    gpf_rows = df.row_id.groupby(pd.Grouper(freq=freq)).last()
    gpf_rows = gpf_rows.loc[gpf_rows.notnull()].astype('int')
    df.loc[df.row_id.isin(gpf_rows.values), 'gpf'] = True

    v_map = dict()
    e_set = set()
    v_counter = -1
    g = gt.Graph()
    mcore_k = []
    mcore_s = []
    mcore_idx = []
    vnum = []
    enum = []
    largest_component_vnum = []
    ts = []
    for created_at, from_raw_id, to_raw_id, gpf in df[[
            'from_raw_id', 'to_raw_id', 'gpf'
    ]].itertuples():
        e = (from_raw_id, to_raw_id)
        if e not in e_set:
            if from_raw_id not in v_map:
                v_counter += 1
                v_map[from_raw_id] = v_counter
            if to_raw_id not in v_map:
                v_counter += 1
                v_map[to_raw_id] = v_counter
            source = v_map.get(from_raw_id)
            target = v_map.get(to_raw_id)
            g.add_edge(source, target, add_missing=True)
            e_set.add(e)
        if gpf:
            g1 = g.copy()
            rejected = gt.random_rewire(g1, model=model, edge_sweep=True)
            logger.info('Number of rejected when rewiring: %s', rejected)
            ts.append(created_at)
            kcore = pd.Series(gt.kcore_decomposition(g1).a.copy())
            mcore = kcore.value_counts().sort_index(ascending=False)
            mk = mcore.index[0]
            ms = mcore.iloc[0]
            mcore_k.append(mk)
            mcore_s.append(ms)
            mcore_idx.append(kcore.loc[kcore == mk].index.tolist())
            lcv = gt.label_largest_component(g1, directed=False)
            vnum.append(g1.num_vertices())
            enum.append(g1.num_edges())
            largest_component_vnum.append(lcv.a.sum())
            logger.info(g1)
            logger.info('Main core at %s: k=%s, num=%s', created_at, mk, ms)
    cdf = pd.DataFrame(
        dict(timeline=ts,
             mcore_k=mcore_k,
             mcore_s=mcore_s,
             mcore_idx=mcore_idx,
             vnum=vnum,
             enum=enum,
             largest_commponent_vnum=largest_component_vnum))
    cdf.to_csv(ofn, index=False)
Ejemplo n.º 13
0
# If True, the frames will be dumped to disk as images.
offscreen = "offscreen" if args.offscreen else False
dir = './frames_dynamic-graph_with-comm-2+'
if offscreen and not os.path.exists(dir):
    os.mkdir(dir)

# load the graph
g = gt.load_graph(args.file)
filter_comm = g.new_vertex_property("bool")

comm_infomap = np.array(list(g.vp['comm_infomap']))
filter_comm.a = (comm_infomap > 1)

g = gt.GraphView(g, vfilt=filter_comm, directed=False)
g = gt.GraphView(g, vfilt=gt.label_largest_component(g), directed=False)
g = gt.Graph(g, prune=True)
pos = g.vp["pos_sfdp_infomap"]  # layout positions

# find the initial and final date
id, im, iy = map(int, (g.gp['initial-date'].split('-')))
fd, fm, fy = map(int, (g.gp['final-date'].split('-')))
initial_date = date(iy, im, id)
final_date = date(fy, fm, fd)

# set the posible state of each vertex
future = sns.xkcd_rgb['grey']
present = sns.xkcd_rgb['yellow']
past = sns.xkcd_rgb['brick red']

# Initialize all vertices to the _future_ state
Ejemplo n.º 14
0
        new = False

        srcv = gt.find_vertex(sparse_graph, hashes, src)
        if not srcv:
            srcv = sparse_graph.add_vertex()
            hashes[srcv] = src
            new = True
        else:
            srcv = srcv[0]

        dstv = gt.find_vertex(sparse_graph, hashes, dst)
        if not dstv:
            dstv = sparse_graph.add_vertex()
            hashes[dstv] = dst
            new = True
        else:
            dstv = dstv[0]

        if new:
            e = sparse_graph.add_edge(srcv, dstv)

print 'Sparse graph has {} nodes, {} edges'.format(sparse_graph.num_vertices(), sparse_graph.num_edges())
comp = gt.label_largest_component(sparse_graph, directed=False)
#pos = gt.radial_tree_layout(sparse_graph, sparse_graph.vertex(0))
gt.graph_draw(sparse_graph, output_size=(
    5000, 5000), output=input_fasta + '_sparse.png')
sparse_graph.set_vertex_filter(comp)
gt.graph_draw(sparse_graph, output_size=(
    5000, 5000), output=input_fasta + '_sparse_comp.png')
Ejemplo n.º 15
0
def main():
    import sys
    import os.path
    import glob
    import itertools
    from argparse import ArgumentParser

    parser = ArgumentParser(
        description='Read a graph, and produce a layout with t-SNE.')

    # Input
    parser.add_argument(
        'graphs',
        nargs='+',
        help='(List of) input graph(s). Or a folder with graphs.')

    # Output
    parser.add_argument('-o',
                        default='./output',
                        help='Folder to write output to. Default: ./output')
    parser.add_argument('--save_every',
                        type=int,
                        help='Save a jpg snapshot ever x epochs.')
    parser.add_argument(
        '--render_video',
        action='store_true',
        help=
        'Render a video of the layout evolution. Needs ImageMagick and ffmpeg.'
    )
    parser.add_argument(
        '--retain_snaps',
        action='store_true',
        help=
        'Retain the snapshots. This argument is ignored if no video is rendered.'
    )
    parser.add_argument(
        '--save_layout_data',
        action='store_true',
        help='Save all layout coordinates in a .pickle file and a .txt file.')
    parser.add_argument('--opacity',
                        type=float,
                        default=0.3,
                        help='Edge opacity.')

    # Manipulations to graph
    parser.add_argument(
        '--strip_graph',
        action='store_true',
        help='Retain only the largest connected component in the graph.')
    parser.add_argument('--rnd_seed',
                        '-r',
                        type=int,
                        nargs='+',
                        default=[None],
                        help='Seed for random state. (Default: Random seed)')
    parser.add_argument(
        '--pre_sfdp',
        action='store_true',
        help=
        'If this flag is given, the vertices will be pre-initialized with SFDP.'
    )
    parser.add_argument('--only_sfdp',
                        action='store_true',
                        help='If this flag is given, only SFDP will be done.')
    parser.add_argument(
        '--accept_all_sfdp',
        action='store_true',
        help=
        'If this flag is given, no confirmation is asked for the SFDP layouts.'
    )
    parser.add_argument(
        '--remove_rnd_edges',
        nargs='+',
        type=float,
        default=[0],
        help=
        'Mutate the graph by removing random edges. If this is used without a random seed, a random random seed will be generated. The value given to this argument is the fraction of edges that will be removed.'
    )

    # Hyperparameters
    parser.add_argument('--n_epochs',
                        '-e',
                        nargs='+',
                        type=int,
                        default=[1000],
                        help='One or more numbers of t-SNE epochs.')
    parser.add_argument('--lr_init',
                        nargs='+',
                        type=float,
                        default=[80],
                        help='One or more initial learning rates.')
    parser.add_argument(
        '--lr_final',
        nargs='+',
        type=float,
        default=[None],
        help='One or more final learning rates. Default: Same as lr_init.')
    parser.add_argument('--lr_switch',
                        nargs='+',
                        type=int,
                        default=[None],
                        help='One or more learning rate switch-points.')
    parser.add_argument('--momentum_init',
                        nargs='+',
                        type=float,
                        default=[0.5],
                        help='One or more initial momenta.')
    parser.add_argument('--momentum_final',
                        nargs='+',
                        type=float,
                        default=[0.5],
                        help='One or more initial momenta.')
    parser.add_argument('--momentum_switch',
                        nargs='+',
                        type=int,
                        default=[None],
                        help='One or more momentum switch-points.')

    # Distance metric parameters
    parser.add_argument(
        '--distance_metric',
        '-d',
        choices=['shortest_path', 'spdm', 'modified_adjacency', 'mam'],
        default='spdm',
        help='The distance metric that is used for the pairwise distances.')
    parser.add_argument('-k',
                        nargs='+',
                        type=float,
                        default=[1],
                        help='Exponent for transfer function.')

    # Cost function parameters
    #   Kullback-Leibler
    parser.add_argument('--perplexity',
                        '-p',
                        nargs='+',
                        type=float,
                        default=[80],
                        help='One or more perplexities.')
    parser.add_argument('--l_kl_init',
                        nargs='+',
                        type=float,
                        default=[1],
                        help='One or more KL factors.')
    parser.add_argument('--l_kl_final',
                        nargs='+',
                        type=float,
                        default=[1],
                        help='One or more KL factors.')
    parser.add_argument('--l_kl_switch',
                        nargs='+',
                        type=int,
                        default=[None],
                        help='One or more KL switch-points')
    #   Edge contraction
    parser.add_argument('--l_e_init',
                        nargs='+',
                        type=float,
                        default=[0],
                        help='One or more edge contraction factors.')
    parser.add_argument('--l_e_final',
                        nargs='+',
                        type=float,
                        default=[0],
                        help='One or more edge contraction factors.')
    parser.add_argument('--l_e_switch',
                        nargs='+',
                        type=int,
                        default=[None],
                        help='One or more edge contraction switch-points')
    #   Compression
    parser.add_argument('--l_c_init',
                        nargs='+',
                        type=float,
                        default=[1.2],
                        help='One or more compression factors.')
    parser.add_argument('--l_c_final',
                        nargs='+',
                        type=float,
                        default=[0],
                        help='One or more compression factors.')
    parser.add_argument('--l_c_switch',
                        nargs='+',
                        type=int,
                        default=[None],
                        help='One or more compression switch-points')
    #   Repulsion
    parser.add_argument('--l_r_init',
                        nargs='+',
                        type=float,
                        default=[0],
                        help='One or more repulsion factors.')
    parser.add_argument('--l_r_final',
                        nargs='+',
                        type=float,
                        default=[0.5],
                        help='One or more repulsion factors.')
    parser.add_argument('--l_r_switch',
                        nargs='+',
                        type=int,
                        default=[None],
                        help='One or more repulsion switch-points')
    parser.add_argument(
        '--r_eps',
        nargs='+',
        type=float,
        default=[0.2],
        help='Additional term in denominator to prevent near-singularities.')

    args = parser.parse_args()

    # Retrieve a list of all files in the directory, if args.graphs[0] is a directory.
    if len(args.graphs) == 1 and os.path.isdir(args.graphs[0]):
        args.graphs = glob.glob(args.graphs[0] + '/*')

    # Check graph input
    for g_file in args.graphs:
        if not os.path.isfile(g_file):
            raise FileNotFoundError(g_file + ' is not a file.')

    # Generate random random seed if none is given.
    if args.rnd_seed == [None]:
        args.rnd_seed = [np.random.randint(1e8)]

    # Ignore retain_snaps argument if no video is rendered.
    if not args.render_video:
        args.retain_snaps = True

    # Get names of the graphs (by splitting of path and extension)
    names = [
        os.path.split(os.path.splitext(file)[0])[1] for file in args.graphs
    ]

    # Determine output folders. One is created in the specified output folder
    # for every graph that is supplied.
    output_folders = [args.o + '/' + name for name in names]

    # Check (and possibly create) output folders
    for folder in [args.o] + output_folders:
        if not os.path.exists(folder):
            os.makedirs(folder)

    # At least everything is fine for now.
    there_were_exceptions = False

    # Loop over all graphs (and their respective output folders)
    for g_file, g_name, output_folder in zip(args.graphs, names,
                                             output_folders):
        # Load the graph
        g = graph_io.load_graph(g_file)
        print(
            '[tsnetwork] Loaded graph {0} (|V| = {1}, |E| = {2}) into memory.'.
            format(g_name, g.num_vertices(), g.num_edges()))

        # Add graph name as propery in the internal representation
        g.graph_properties['name'] = g.new_graph_property('string', g_name)

        # Usually this loop has just one iteration, with only 0 as the value
        # for rmv_edge_frac (that is, no edges are removed).
        for rmv_edge_frac in args.remove_rnd_edges:
            print(
                '[tsnetwork] Original graph: (|V|, |E|) = ({0}, {1}).'.format(
                    g.num_vertices(), g.num_edges()))

            # Create a temporary copy of the graph that will be manipulated.
            gv = gt.GraphView(g)

            # Remove rmv_edge_frac of the graphs edges from gv.
            gv.clear_filters()
            gv.reindex_edges()
            edge_list = list(gv.edges())
            not_here_ep = gv.new_edge_property('bool', val=True)
            n_remove_edges = int(rmv_edge_frac * gv.num_edges())
            for e in np.random.randint(0, gv.num_edges(), n_remove_edges):
                not_here_ep[edge_list[e]] = False
            gv.set_edge_filter(not_here_ep)

            if n_remove_edges > 0:
                print(
                    '[tsnetwork] Removed {2} random edges: (|V|, |E|) = ({0}, {1}).'
                    .format(gv.num_vertices(), gv.num_edges(), n_remove_edges))

            # Filter the graph s.t. only the largest connected component
            # remains.
            if args.strip_graph:
                largest_connected_component = gt.label_largest_component(gv)
                gv.set_vertex_filter(largest_connected_component)
                gv.purge_vertices()
                print(
                    '[tsnetwork] Filtered largest component: (|V|, |E|) = ({0}, {1}).'
                    .format(gv.num_vertices(), gv.num_edges()))

            if args.pre_sfdp or args.only_sfdp:
                # Perform a SFDP layout (either as the only layout or as a
                # starting point for t-SNE.)
                Y_init, _ = sfdp_placement(
                    gv,
                    output_folder,
                    ask_for_acceptance=not args.accept_all_sfdp,
                    opacity=args.opacity)
                if args.only_sfdp:
                    continue
            else:
                # Random positions will be generated
                Y_init = None

            # Compute distance matrix of this graph with the specified metric
            X = distance_matrix.get_distance_matrix(gv, args.distance_metric)

            # Retrieve the adjacency matrix of the graph
            Adj_sparse = gt.adjacency(gv)
            Adj = np.zeros(Adj_sparse.shape, dtype='float32')
            for i, j in zip(*Adj_sparse.nonzero()):
                Adj[i, j] = Adj_sparse[i, j]

            # Make list of tsnetwork configuration objects. These are objects
            # that represent a configuration for a t-SNE layout.
            tsn_configs = []
            for perplexity, n_epochs, initial_lr, final_lr, lr_switch, initial_momentum,\
                final_momentum, momentum_switch,\
                initial_l_kl, final_l_kl, l_kl_switch,\
                initial_l_e, final_l_e, l_e_switch,\
                initial_l_c, final_l_c, l_c_switch,\
                initial_l_r, final_l_r, l_r_switch,\
                r_eps, k, rnd_seed in itertools.product(
                    args.perplexity, args.n_epochs, args.lr_init, args.lr_final,
                    args.lr_switch, args.momentum_init, args.momentum_final,
                    args.momentum_switch,
                    args.l_kl_init, args.l_kl_final, args.l_kl_switch,
                    args.l_e_init, args.l_e_final, args.l_e_switch,
                    args.l_c_init, args.l_c_final, args.l_c_switch,
                    args.l_r_init, args.l_r_final, args.l_r_switch,
                    args.r_eps, args.k, args.rnd_seed):

                # Use 50% for the switching points if no argument is given
                if lr_switch is None:
                    lr_switch = int(n_epochs * 0.5)
                if momentum_switch is None:
                    momentum_switch = int(n_epochs * 0.5)
                if l_kl_switch is None:
                    l_kl_switch = int(n_epochs * 0.5)
                if l_e_switch is None:
                    l_e_switch = int(n_epochs * 0.5)
                if l_c_switch is None:
                    l_c_switch = int(n_epochs * 0.5)
                if l_r_switch is None:
                    l_r_switch = int(n_epochs * 0.5)

                if final_lr is None:
                    final_lr = initial_lr

                cfg = TsnConfig(perplexity=perplexity,
                                n_epochs=n_epochs,
                                initial_lr=initial_lr,
                                final_lr=final_lr,
                                lr_switch=lr_switch,
                                initial_momentum=initial_momentum,
                                final_momentum=final_momentum,
                                momentum_switch=momentum_switch,
                                initial_l_kl=initial_l_kl,
                                final_l_kl=final_l_kl,
                                l_kl_switch=l_kl_switch,
                                initial_l_e=initial_l_e,
                                final_l_e=final_l_e,
                                l_e_switch=l_e_switch,
                                initial_l_c=initial_l_c,
                                final_l_c=final_l_c,
                                l_c_switch=l_c_switch,
                                initial_l_r=initial_l_r,
                                final_l_r=final_l_r,
                                l_r_switch=l_r_switch,
                                r_eps=r_eps,
                                k=k,
                                pre_sfdp=args.pre_sfdp,
                                rmv_edge_frac=rmv_edge_frac,
                                rnd_seed=rnd_seed,
                                distance_matrix=args.distance_metric)

                # Do no add the configurations that already have files matching
                # the description, unless the user confirms to overwrite.
                if any([
                        file.startswith(cfg.get_description() + '.')
                        for file in os.listdir(output_folder)
                ]):
                    if not usr_input.confirm('[tsnetwork] ' +
                                             cfg.get_description() +
                                             ' files exists! Overwrite?'):
                        continue
                tsn_configs.append(cfg)

            # Loop over the t-SNE configurations for a single graph
            for cfg in tsn_configs:
                print('[tsnetwork] Processing: ' + cfg.get_description())

                # String that has the path to the directory where the snapshots
                # will come. (If --save_every is given)
                snaps_dir = output_folder + '/snaps_' + cfg.get_description()

                # Clean out existing snaps directory if it exists.
                if args.save_every is not None and os.path.exists(snaps_dir):
                    if usr_input.confirm('[tsnetwork] ' + snaps_dir +
                                         ' exists. Delete contents?'):
                        for file in os.listdir(snaps_dir):
                            file_path = os.path.join(snaps_dir, file)
                            try:
                                if os.path.isfile(file_path):
                                    os.unlink(file_path)
                                elif os.path.isdir(file_path):
                                    shutil.rmtree(file_path)
                            except Exception as e:
                                print(e)
                elif args.save_every is not None and not os.path.exists(
                        snaps_dir):
                    # Make folder for snaps, if it is necessary and it doesn't
                    # exist yet.
                    os.makedirs(snaps_dir)

                # Apply the transfer function
                X_transfered = X**cfg.k

                # Try to do the tsne layout.
                try:
                    Y, costs = thesne.tsne(
                        X_transfered,
                        random_state=cfg.rnd_seed,
                        perplexity=cfg.perplexity,
                        n_epochs=cfg.n_epochs,
                        Y=Y_init,
                        initial_lr=cfg.initial_lr,
                        final_lr=cfg.final_lr,
                        lr_switch=cfg.lr_switch,
                        initial_momentum=cfg.initial_momentum,
                        final_momentum=cfg.final_momentum,
                        momentum_switch=cfg.momentum_switch,
                        initial_l_kl=cfg.initial_l_kl,
                        final_l_kl=cfg.final_l_kl,
                        l_kl_switch=cfg.l_kl_switch,
                        initial_l_e=cfg.initial_l_e,
                        final_l_e=cfg.final_l_e,
                        l_e_switch=cfg.l_e_switch,
                        initial_l_c=cfg.initial_l_c,
                        final_l_c=cfg.final_l_c,
                        l_c_switch=cfg.l_c_switch,
                        initial_l_r=cfg.initial_l_r,
                        final_l_r=cfg.final_l_r,
                        l_r_switch=cfg.l_r_switch,
                        r_eps=cfg.r_eps,
                        Adj=Adj,
                        g=gv,
                        snaps_output_folder=snaps_dir,
                        save_every=args.save_every)
                except (thesne.NaNException, thesne.SigmaTooLowException) as e:
                    there_were_exceptions = True
                    print('[exception] {0}'.format(e))

                    # Also write exception to a file.
                    with open(
                            output_folder + '/exception_' +
                            cfg.get_description() + '.out', 'w') as f:
                        print('{0}'.format(e), file=f)
                        f.close()
                    print('[tsnetwork] Continuing with next TsnConfig.')
                    continue

                # Render an animation of the snapshots
                if args.render_video:
                    animations.save_animation(snaps_dir, cfg.get_description())

                # Remove the directory with snapshots.
                if args.save_every is not None and not args.retain_snaps and os.path.exists(
                        snaps_dir):
                    print('[tsnetwork] Cleaning up snaps directory.')
                    shutil.rmtree(snaps_dir)

                # Save the data (graph, vertex coordinates)
                if args.save_layout_data:
                    layout_io.save_vna_layout(
                        output_folder + '/layout_' + cfg.get_description() +
                        '.vna', gv, Y)
                    layout_io.save_layout_txt(
                        output_folder + '/layout_edges_' +
                        cfg.get_description() + '.txt', gv, Y)

                # Save final drawing of the layout
                layout_io.save_drawing(output_folder,
                                       gv,
                                       Y.T,
                                       cfg.get_description(),
                                       formats=['jpg', 'pdf'],
                                       edge_colors="rgb",
                                       draw_vertices=False,
                                       opacity=args.opacity)

    if there_were_exceptions:
        print('[tsnetwork] Done! However, be wary. There were exceptions.')
    else:
        print('[tsnetwork] Done!')
        ver_names[v2] = w2
    else:
        v2 = pairs_graph.vertex(word_dict[w2])

    if cur_weight == 0:
        continue
    e = pairs_graph.add_edge(v1, v2)
    edge_weights[e] = cur_weight
# adding properties
pairs_graph.vertex_properties["name"] = ver_names
pairs_graph.vertex_properties["id"] = ver_id
pairs_graph.edge_properties["weight"] = edge_weights
print("graph builded")
print(str(len(word_dict)))

largest_label = label_largest_component(pairs_graph)

# reading negative and positive parts
pos_file = open('../results/pos' + ftag + '.txt', 'r', encoding="utf-8")
neg_file = open('../results/neg' + ftag + '.txt', 'r', encoding="utf-8")
positive = []
negative = []
neutral = []

for s in pos_file:
    s = s.strip(' \n\uefef')
    if len(s) != 0:
        positive.append(s)
for s in neg_file:
    s = s.strip(' \n\uefef')
    if len(s) != 0:
Ejemplo n.º 17
0
def kcore_growing_shuffle(fn1='retweet.201710.claim.raw.csv',
                          fn2='graph.daily.csv',
                          ofn=None,
                          rewiring=None):
    """The growing of kcore by shuffling the edge list."""
    if ofn is None:
        ofn = 'kcore.growing.shuffle'
        if rewiring:
            ofn += '.' + rewiring
        ofn += '.csv'
    g = prepare_network_from_raw(fn1)
    if rewiring is not None:
        gt.random_rewire(g, model=rewiring)
    evmap = pd.read_csv(fn2)
    enum_list = evmap['enum'].tolist()
    emap = pd.DataFrame(g.get_edges().copy(),
                        columns=['source', 'target', 'idx'])
    emap = emap[['source', 'target']]
    emap = emap.reindex(np.random.permutation(
        emap.index)).reset_index(drop=True)
    v_map = dict()
    v_counter = -1
    gp_counter = 0
    g = gt.Graph()
    mcore_k = []
    mcore_s = []
    mcore_idx = []
    vnum = []
    enum = []
    largest_component_vnum = []
    g = gt.Graph()
    for i, s, t in emap.itertuples():
        if s not in v_map:
            v_counter += 1
            v_map[s] = v_counter
        if t not in v_map:
            v_counter += 1
            v_map[t] = v_counter
        source = v_map.get(s)
        target = v_map.get(t)
        g.add_edge(source, target, add_missing=True)
        if g.num_edges() >= enum_list[gp_counter]:
            kcore = pd.Series(gt.kcore_decomposition(g).a.copy())
            mcore = kcore.value_counts().sort_index(ascending=False)
            mk = mcore.index[0]
            ms = mcore.iloc[0]
            mcore_k.append(mk)
            mcore_s.append(ms)
            mcore_idx.append(kcore.loc[kcore == mk].index.tolist())
            lcv = gt.label_largest_component(g, directed=False)
            vnum.append(g.num_vertices())
            enum.append(g.num_edges())
            largest_component_vnum.append(lcv.a.sum())
            logger.info(g)
            logger.info('gp counter: %s', gp_counter)
            logger.info('Main core at enum=%s: k=%s, num=%s', g.num_edges(),
                        mk, ms)
            gp_counter += 1
    cdf = pd.DataFrame(
        dict(mcore_k=mcore_k,
             mcore_s=mcore_s,
             mcore_idx=mcore_idx,
             vnum=vnum,
             enum=enum,
             largest_commponent_vnum=largest_component_vnum))
    cdf.to_csv(ofn, index=False)
Ejemplo n.º 18
0
def kcore_growing_ba(
    fn1='ba.gml',
    fn2='graph.daily.csv',
    ofn=None,
):
    """The growing of kcore for a BA model."""
    if ofn is None:
        ofn = 'kcore.growing.ba.csv'
    g = gt.load_graph(fn1)
    evmap = pd.read_csv(fn2)
    vnum_list = evmap['vnum'].tolist()
    emap = pd.DataFrame(g.get_edges().copy(),
                        columns=['source', 'target', 'idx'])
    emap = emap[['source', 'target']]
    v_map = dict()
    v_counter = -1
    gp_counter = 0
    g = gt.Graph()
    mcore_k = []
    mcore_s = []
    mcore_idx = []
    vnum = []
    enum = []
    largest_component_vnum = []
    g = gt.Graph()
    for i, s, t in emap.itertuples():
        if s not in v_map:
            v_counter += 1
            v_map[s] = v_counter
        if t not in v_map:
            v_counter += 1
            v_map[t] = v_counter
        source = v_map.get(s)
        target = v_map.get(t)
        g.add_edge(source, target, add_missing=True)
        if g.num_vertices() >= vnum_list[gp_counter]:
            kcore = pd.Series(gt.kcore_decomposition(g).a.copy())
            mcore = kcore.value_counts().sort_index(ascending=False)
            mk = mcore.index[0]
            ms = mcore.iloc[0]
            mcore_k.append(mk)
            mcore_s.append(ms)
            mcore_idx.append(kcore.loc[kcore == mk].index.tolist())
            lcv = gt.label_largest_component(g, directed=False)
            vnum.append(g.num_vertices())
            enum.append(g.num_edges())
            largest_component_vnum.append(lcv.a.sum())
            logger.info(g)
            logger.info('gp counter: %s', gp_counter)
            logger.info('Main core at vnum=%s: k=%s, num=%s', g.num_vertices(),
                        mk, ms)
            gp_counter += 1
            try:
                vnum_list[gp_counter]
            except IndexError:
                break
    cdf = pd.DataFrame(
        dict(mcore_k=mcore_k,
             mcore_s=mcore_s,
             mcore_idx=mcore_idx,
             vnum=vnum,
             enum=enum,
             largest_commponent_vnum=largest_component_vnum))
    cdf.to_csv(ofn, index=False)
Ejemplo n.º 19
0
def load_ply_layout(file):
    g = gt.Graph(directed=False)

    with open(file) as f:
        all_lines = f.read().splitlines()
        it = iter(all_lines)

        line = next(it)
        assert (line == 'ply')

        line = next(it)
        assert (line.startswith('format ascii'))

        line = next(it)
        while not line.startswith('element'):
            line = next(it)

        words = line.split(' ')
        assert (words[0] == 'element')
        assert (words[1] == 'vertex')
        assert (words[2].isdigit())
        n_vertices = int(words[2])
        g.add_vertex(n_vertices)
        assert (g.num_vertices() == n_vertices)

        line = next(it)
        v_props = OrderedDict()
        while line.startswith('property'):
            words = line.split(' ')
            the_type = words[1]
            if the_type == 'list':
                name = words[4]
                v_props[name] = dict()
                count_type = words[2]
                entry_type = words[3]
                v_props[name]['count_type'] = count_type
                v_props[name]['entry_type'] = entry_type
            else:
                name = words[2]
                v_props[name] = dict()
            v_props[name]['type'] = the_type
            line = next(it)
        print(v_props)

        vps = dict()
        for i, v_prop in enumerate(v_props):
            name = list(v_props.keys())[i]
            the_type = v_props[name]['type']
            if the_type == 'float':
                vp = g.new_vp(the_type)
                vps[name] = vp
            else:
                raise NotImplementedError()

        print(vps)
        assert ('x' in vps.keys())
        assert ('y' in vps.keys())
        assert ('z' in vps.keys())

        # Scan to next element
        while not line.startswith('element'):
            line = next(it)

        words = line.split(' ')
        assert (words[0] == 'element')
        assert (words[1] == 'face')
        assert (words[2].isdigit())
        n_faces = int(words[2])
        print(n_faces)

        line = next(it)
        f_props = OrderedDict()
        while line.startswith('property'):
            words = line.split(' ')
            the_type = words[1]
            if the_type == 'list':
                name = words[4]
                f_props[name] = dict()
                count_type = words[2]
                entry_type = words[3]
                f_props[name]['count_type'] = count_type
                f_props[name]['entry_type'] = entry_type
            else:
                name = words[2]
                f_props[name] = dict()
            f_props[name]['type'] = the_type
            line = next(it)
        print(f_props)

        while not line.startswith('end_header'):
            line = next(it)

        for i in range(n_vertices):
            line = next(it)
            words = line.split(' ')
            words = [word for word in words if word != '']
            assert (len(words) == len(v_props.keys()))
            for j, word in enumerate(words):
                name = list(v_props.keys())[j]
                the_type = v_props[name]['type']
                if the_type == 'float':
                    vps[name][i] = float(word)
                else:
                    raise NotImplementedError

        for _ in range(n_faces):
            line = next(it)
            words = line.split(' ')
            words = [word for word in words if word != '']
            i = 0
            for name in f_props.keys():
                the_type = f_props[name]['type']
                if the_type == 'list':
                    if f_props[name]['count_type'] == 'uchar':
                        n_items = int(words[i])
                    else:
                        raise NotImplementedError
                    the_list = [
                        int(word) for word in words[i + 1:i + 1 + n_items]
                    ]
                    i += 1 + n_items

                    if name == 'vertex_indices':
                        for j, idx1 in enumerate(the_list):
                            idx2 = the_list[(j + 1) % len(the_list)]
                            g.add_edge(idx1, idx2)
            assert (i == len(words))

    gt.remove_parallel_edges(g)

    largest_connected_component = gt.label_largest_component(g)
    unreferenced = sum([1 for i in largest_connected_component.a if i == 0])
    if unreferenced > 0:
        g.set_vertex_filter(largest_connected_component)
        g.purge_vertices()
        print('Filtered {0} unreferenced vertices.'.format(unreferenced))

    if 'x' in vps.keys() and 'y' in vps.keys():
        if 'z' in vps.keys():
            Y = np.zeros((n_vertices, 3))
            for v in g.vertices():
                print(type(v))
                Y[v, 0] = vps['x'][v]
                Y[v, 1] = vps['y'][v]
                Y[v, 2] = vps['z'][v]
        else:
            Y = np.zeros((n_vertices, 2))
            for v in g.vertices():
                Y[v, 0] = vps['x'][v]
                Y[v, 1] = vps['y'][v]

    return g, Y
Ejemplo n.º 20
0
def kcore_growing_weighted_shuffle(fn1,
                                   fn2='graph.daily.csv',
                                   ofn=None,
                                   freq='D'):
    """The growing of kcore by shuffling the retweet list."""
    if ofn is None:
        ofn = 'kcore.growing.weighted-shuffle.csv'
    # load only necessary columns
    df = pd.read_csv(fn1, usecols=[3, 4])
    # remove self-loop
    df = df.loc[df.from_raw_id != df.to_raw_id]
    df = df.reindex(np.random.permutation(df.index))
    evmap = pd.read_csv(fn2)
    enum_list = evmap['enum'].tolist()
    v_map = dict()
    v_counter = -1
    e_set = set()
    gp_counter = 0
    g = gt.Graph()
    mcore_k = []
    mcore_s = []
    mcore_idx = []
    vnum = []
    enum = []
    largest_component_vnum = []
    ts = []
    g = gt.Graph()
    for from_raw_id, to_raw_id in df[['from_raw_id',
                                      'to_raw_id']].itertuples(index=False):
        e = (from_raw_id, to_raw_id)
        if e not in e_set:
            if from_raw_id not in v_map:
                v_counter += 1
                v_map[from_raw_id] = v_counter
            if to_raw_id not in v_map:
                v_counter += 1
                v_map[to_raw_id] = v_counter
            source = v_map.get(from_raw_id)
            target = v_map.get(to_raw_id)
            g.add_edge(source, target, add_missing=True)
            e_set.add(e)
        if g.num_edges() >= enum_list[gp_counter]:
            is_group = False
        if by == 'v':
            try:
                if g.num_vertices() == vlist[gcounter]:
                    is_group = True
                    gcounter += 1
            except IndexError:
                break
        if by == 'e':
            try:
                if g.num_edges() == elist[gcounter]:
                    is_group = True
                    gcounter += 1
            except IndexError:
                break
        if is_group:
            kcore = pd.Series(gt.kcore_decomposition(g).a.copy())
            mcore = kcore.value_counts().sort_index(ascending=False)
            mk = mcore.index[0]
            ms = mcore.iloc[0]
            mcore_k.append(mk)
            mcore_s.append(ms)
            mcore_idx.append(kcore.loc[kcore == mk].index.tolist())
            lcv = gt.label_largest_component(g, directed=False)
            vnum.append(g.num_vertices())
            enum.append(g.num_edges())
            largest_component_vnum.append(lcv.a.sum())
            logger.info(g)
            logger.info('gp counter: %s', gp_counter)
            logger.info('Main core at enum=%s: k=%s, num=%s', g.num_edges(),
                        mk, ms)
            gp_counter += 1
            if gp_counter > len(enum_list):
                break
    cdf = pd.DataFrame(
        dict(mcore_k=mcore_k,
             mcore_s=mcore_s,
             mcore_idx=mcore_idx,
             vnum=vnum,
             enum=enum,
             largest_commponent_vnum=largest_component_vnum))
    cdf.to_csv(ofn, index=False)
# Merging data 
ctrlity_frame = [df_eigen_centrality, df_harmnic_centrality, df_betweenness_centrality, df_degree_centrality]
ctrlity_merged = reduce(lambda left,right: pd.merge(left, right, on=['ctry', 'year'],
                                            how='inner'), ctrlity_frame).fillna('0')

ctrlity_merged.to_csv("/content/drive/MyDrive/G11-MEA-Diffusion/dataMEA_Ctrlity/ctrlity_output.csv")

"""### visualization"""

#eigenvector centrality

ee, x = gt.eigenvector(gt_2018_univ)
x.a /= (x.a*10 - 0.7)/0.04 # follow the formula in the book 
gt.graph_draw(gt_2018_univ, vertex_fill_color=x, vcmap=matplotlib.cm.gist_earth, vorder=x) #

gc = gt.GraphView(gt_2018_univ, vfilt=gt.label_largest_component(gt_2018_univ))
c = gt.closeness(gc)
c.a /= c.a / 232
gt.graph_draw(gc, vertex_fill_color=c, vcmap=matplotlib.cm.Oranges, vorder=c)

#betweenness centrality 

bv, be = betweenness(gt_2018_univ)
graph_draw(gt_2018_univ, pos=None, vertex_fill_color=bv, vcmap=matplotlib.cm.summer)

deg = gt_2018_univ.degree_property_map("total")
gt.graph_draw(gt_2018_univ, vertex_fill_color=deg, vorder=deg)

# https://colab.research.google.com/github/count0/colab-gt/blob/master/colab-gt.ipynb#scrollTo=6km1lWMF2kAm

!apt-get install
Ejemplo n.º 22
0
import powerlaw
import sys
import scipy
import math
import numpy as np
import graph_tool.all as gt
import matplotlib.pyplot as plt

import vis

G = gt.collection.data[sys.argv[1]]
giant = gt.label_largest_component(G)
origin_size = G.num_vertices()
print('test\n\n\n\n')
for v in range(1, origin_size + 1):
    if giant[origin_size - v] == False:
        G.remove_vertex(origin_size - v, fast=True)
G.set_directed(False)
data = sorted(G.get_out_degrees([v for v in G.vertices()
                                 ]))  # data can be list or numpy array
print('test\n\n\n\n')
print(len(data))
print('test\n\n\n\n')
results = powerlaw.Fit(data)
print(results.power_law.alpha)
print(results.power_law.xmin)
R, p = results.distribution_compare('power_law', 'lognormal')

y = []
x = []
        ver_names[v2] = w2
    else:
        v2 = pairs_graph.vertex(word_dict[w2])

    if cur_weight == 0:
        continue
    e = pairs_graph.add_edge(v1, v2)
    edge_weights[e] = cur_weight
# adding properties
pairs_graph.vertex_properties["name"] = ver_names
pairs_graph.vertex_properties["id"] = ver_id
pairs_graph.edge_properties["weight"] = edge_weights
print("graph builded")
print(str(len(word_dict)))

largest_label = label_largest_component(pairs_graph)

# reading negative and positive parts
pos_file = open('../results/pos' + ftag + '.txt', 'r', encoding="utf-8")
neg_file = open('../results/neg' + ftag + '.txt', 'r', encoding="utf-8")
positive = []
negative = []
neutral = []


for s in pos_file:
    s = s.strip(' \n\uefef')
    if len(s) != 0:
        positive.append(s)
for s in neg_file:
    s = s.strip(' \n\uefef')
Ejemplo n.º 24
0
f_g.edge_properties['cofield'] = e_w

# <codecell>

print f_g.num_edges()
f_g.num_vertices()

# <codecell>

v_comm = gt.community_structure(f_g, 1000, 5)
#v_comm = gt.betweenness(f_g)

# <codecell>

import numpy
u = gt.GraphView(f_g, vfilt=gt.label_largest_component(f_g))
deg = u.degree_property_map('total', weight = f_g.edge_properties['cofield'])
deg.fa = 2*(numpy.sqrt(deg.fa)*0.5  + 0.4)
edg = f_g.edge_properties['cofield']
edg.fa = (numpy.sqrt(edg.fa)*0.6+1)
ebet = gt.betweenness(f_g)[1]

# <codecell>


# <codecell>

pos, int = gt.interactive_window(u, pos=gt.radial_tree_layout(f_g, f_g.vertex(1)),
                         vertex_size = deg, 
                         vertex_fill_color = v_comm, 
                         vertex_text = f_g.vertex_properties['field'],
Ejemplo n.º 25
0
import graph_tool.all as gtool

gr = gtool.collection.data["polblogs"]
gr = gtool.GraphView(gr, vfilt=gtool.label_largest_component(gr))

cness = gtool.closeness(gr)

gtool.graph_draw(gr,
                 pos=gr.vp["pos"],
                 vertex_fill_color=cness,
                 vertex_size=gtool.prop_to_size(cness, mi=5, ma=15),
                 vorder=cness,
                 vcmap=matplotlib.cm.gist_heat,
                 output="political_closeness.pdf")
Ejemplo n.º 26
0
def load_net(infile, core = False, filter = False):
    '''
    Load a `graphml` file.  
    :param infile: The `graphml` file to load.
    :param core: Does the net contain a core vertex property map?  
    :filter: Apply a filter? 
    :return: the graph_tool `Graph`, a prefix for output files, and 
        (if core is True) the property map for core vertices
    '''
    # Output filename
    #  Prefix only, not extension: 
    #  `split('.')` splits `infile` at the periods and returns a list 
    #  `[:-1]` grabs everything except the extension
    #  `'.'.join` recombines everything with periods
    outfile_pre = '.'.join(infile.split('.')[:-1])
    
    if path.exists('output/' + outfile_pre + '.out.gt'):
        print('Found pre-procesed graph')
        infile = 'output/' + outfile_pre + '.out.gt'
    
    print('Loading ' + infile)
    net = gt.load_graph(infile)
    
    # If `core` is true, extract the core set
    if core:
        core_pmap = net.vertex_properties['core']
        core_vertices = [vertex for vertex in net.vertices() if core_pmap[vertex]]
    
    # Print basic network statistics
    print('Loaded ' + infile)
    print('Vertices: ' + str(net.num_vertices()))
    print('Edges: ' + str(net.num_edges()))
    if core:
        print('Core vertices: ' + str(len(core_vertices)))
    
    if core and filter:
        # Add a filter
        print('Adding filter')
        # Recent papers filter for the citation net
        if 'citenet0' in infile:
            year = net.vp['year']
            recent_list = [year[vertex] > 2005 for vertex in net.vertices()]
            recent_pmap = net.new_vertex_property('boolean')
            recent_pmap.a = np.array(recent_list)
            net.set_vertex_filter(recent_pmap)
        # Distance from core set for the author nets
        else:
            net.set_directed(False)
            extended_set_pmap = core_pmap.copy()
            gt.infect_vertex_property(net, extended_set_pmap, vals=[True])
            gt.infect_vertex_property(net, extended_set_pmap, vals=[True])
            net.set_vertex_filter(extended_set_pmap)
        # Remove everything caught in the filter
        net.purge_vertices()
        # Extract the largest component
        net.set_vertex_filter(gt.label_largest_component(net, directed=False))
        net.purge_vertices()
        # Rebuild core
        core_pmap = net.vertex_properties['core']
        core_vertices = [vertex for vertex in net.vertices() if core_pmap[vertex]]

        print('Filtered vertices: ' + str(net.num_vertices()))
        print('Filtered edges: ' + str(net.num_edges()))
        print('Filtered core: ' + str(len(core_vertices)))
    elif filter and not core:
        print('Filter = true with core = false')
    
    if core:
        return net, outfile_pre, core_pmap, core_vertices
    else:
        return net, outfile_pre
        helper = lambda: int(random.random() * num_nodes)
    elif deg_sample_type == 'exp':
        helper = lambda: np.random.exponential(scale=0.06) * num_nodes
    deg_sample = lambda: (helper(), helper())
    # print deg_sample()
    while True:
        g = gt.random_graph(num_nodes, deg_sampler=deg_sample, directed=directed)
        use_weights = random.random() > 0.5
        if use_weights:
            weights = g.new_vertex_property('float')
            weights.a = np.array(
                [(1. + random.random() * 9) if i else 1. for i in (np.random.random(size=g.num_vertices()) > 0.5)])
        else:
            weights = None

        lcc = gt.label_largest_component(g)
        g.set_vertex_filter(lcc)
        g.purge_vertices()
        g.purge_edges()
        g.clear_filters()
        if g.num_vertices() > 5 and check_aperiodic(g):
            break
    print str(str(g.num_vertices()).ljust(5) + ' | ' + str(iteration).ljust(4) + ' | ' + deg_sample_type.ljust(5) + ' | ' + str(
            use_weights).ljust(6)).ljust(20),

    # eigenvector stat dist
    A = gt.adjacency(g)
    if weights is not None:
        bias = diags(weights.a, 0)
        A = bias.dot(A)
    Q = normalize(A, norm='l1', axis=0, copy=False)