Exemple #1
0
def get_square_bounds():
    global NETWORK
    if not NETWORK:
        NETWORK = reload_json(USER_GRAPH_FNAME,
                              nx.DiGraph,
                              transform=nx.node_link_graph)
    positions = graphviz_layout(NETWORK, prog="sfdp")
    xs = []
    ys = []
    for node in positions:
        cur_x, cur_y = positions[node]
        xs.append(cur_x)
        ys.append(cur_y)

    min_x, max_x = min(xs), max(xs)
    min_y, max_y = min(ys), max(ys)
    x_range = max_x - min_x
    y_range = max_y - min_y
    x_center = (max_x + min_x) / 2
    y_center = (max_y + min_y) / 2

    if x_range > y_range:
        extra = x_range * 0.1
        min_y = y_center - (x_range / 2) - extra
        max_y = y_center + (x_range / 2) + extra
        min_x -= extra
        max_x += extra
    else:
        extra = y_range * 0.1
        min_x = x_center - (y_range / 2) - extra
        max_x = x_center + (y_range / 2) + extra
        min_y -= extra
        max_y += extra
    return (min_x, max_x), (min_y, max_y)
Exemple #2
0
def make_plottable(colors=None, sizes=None, tweets=None):
    global NETWORK
    if not NETWORK:
        NETWORK = reload_json(USER_GRAPH_FNAME,
                              nx.DiGraph,
                              transform=nx.node_link_graph)
    graph = from_networkx(NETWORK, graphviz_layout, prog="sfdp")

    if not colors:
        node_color = Spectral4[0]
    else:
        node_color = "color"
        graph.node_renderer.data_source.add(colors, "color")

    if not sizes:
        node_size = 8
    else:
        node_size = "size"
        graph.node_renderer.data_source.add(sizes, "size")

    if tweets:
        graph.node_renderer.data_source.add(tweets, "desc")

    edge_width = 0.3
    select_edge_width = 2
    node_alpha = 0.95
    edge_alpha = 0.3
    edge_color = "#666666"
    select_color = Spectral4[2]
    hover_color = Spectral4[1]

    graph.node_renderer.glyph = Circle(
        size=node_size,
        fill_color=node_color,
        fill_alpha=node_alpha,
        line_color=node_color,
        line_alpha=node_alpha,
    )
    graph.node_renderer.selection_glyph = Circle(size=node_size,
                                                 fill_color=select_color,
                                                 line_color=select_color)
    graph.node_renderer.hover_glyph = Circle(size=node_size,
                                             fill_color=hover_color,
                                             line_color=hover_color)

    graph.edge_renderer.glyph = MultiLine(line_color=edge_color,
                                          line_alpha=edge_alpha,
                                          line_width=edge_width)
    graph.edge_renderer.selection_glyph = MultiLine(
        line_color=select_color, line_width=select_edge_width)
    graph.edge_renderer.hover_glyph = MultiLine(line_color=hover_color,
                                                line_width=select_edge_width)
    graph.selection_policy = NodesAndLinkedEdges()
    graph.inspection_policy = NodesAndLinkedEdges()

    return graph
Exemple #3
0
def trim_graph(graph, reduce_sample=True, pickle=True, from_scratch=True):
    if not graph and not from_scratch:
        graph = reload_json(USER_GRAPH_FNAME, transform=nx.node_link_graph)
        return graph

    rng_state = reload_object(RNG_FNAME, random.getstate)
    random.setstate(rng_state)
    print("Trimming graph...")
    significant_id_set = set()

    for direct in (Direct.IN, Direct.OUT):
        sample = []
        ids = []
        for user_id in graph:
            ids.append(user_id)
            num_neighb = direct.deg_view(graph)[user_id]
            sample.append(num_neighb)
        sample_mean = mean(sample)
        pop_stdev = stdev(sample)
        for i, degree in enumerate(sample):
            if abs(degree - sample_mean) > STDEV_MOD * pop_stdev:
                user_id = ids[i]
                significant_id_set.add((user_id, degree))

    by_asc_degree = sorted(list(significant_id_set), key=lambda x: x[1])
    significant_ids = [i[0] for i in by_asc_degree]

    to_subgraph = set()
    for user_id in significant_ids:
        try:
            others = set(graph.neighbors(user_id))
        except KeyError:
            continue
        if reduce_sample and len(others) != 0:
            others = random.sample(others, int(len(others) * OTHERS_MOD))

        if len(others) == 0:
            continue

        to_subgraph.add(user_id)
        for other in others:
            to_subgraph.add(other)

    pickle_it(rng_state, RNG_FNAME)

    user_graph = graph.subgraph(to_subgraph)

    if pickle:
        json_it(user_graph, USER_GRAPH_FNAME, nx.node_link_data)

    return user_graph
Exemple #4
0
def construct_graph_data():
    global GRAPH_DATA
    GRAPH_DATA = reload_json("graph_data", lambda: None)

    if GRAPH_DATA:
        return

    GRAPH_DATA = {}
    GRAPH_DATA["raw_tweets"] = run_tweets()
    for name, d_source in DataSource.__members__.items():
        sizes, colors = run_data(d_source)
        GRAPH_DATA[str(d_source)] = (sizes, colors)
    x_range, y_range = get_square_bounds()
    GRAPH_DATA["range"] = (x_range, y_range)

    json_it(GRAPH_DATA, "graph_data")
Exemple #5
0
def run_data(d_source):
    global USER_DICT
    global TWEET_DICT
    global NETWORK
    if not USER_DICT:
        USER_DICT = reload_object(USER_DICT_FNAME, dict)
    if not TWEET_DICT:
        TWEET_DICT = reload_object(TWEETS_FNAME, dict)
    if not NETWORK:
        NETWORK = reload_json(USER_GRAPH_FNAME,
                              nx.DiGraph,
                              transform=nx.node_link_graph)

    values = []
    for node in NETWORK:
        try:
            node_info = USER_DICT[str(node)]
        except KeyError:
            # some nodes may not show up in the user dictionary!
            node_info = {"followers": [], "friends": []}

        if d_source in [DataSource.FRIENDS, DataSource.FOLLOWERS]:
            values.append(len(node_info[str(d_source)]))
        elif d_source == DataSource.TWEETS:
            try:
                num_tweets = len(TWEET_DICT[str(node)])
                values.append(num_tweets)
            except KeyError:
                values.append(0)
        else:
            values.append(1)

    maximum = max(values)
    minimum = min(values)

    colors = []
    sizes = []
    for value in values:
        index_ratio = (value - minimum) / maximum
        color_index = int(index_ratio * (len(PALETTE) - 1))
        sizes.append(2**(color_index + 1))
        colors.append(PALETTE[color_index])
    return sizes, colors
Exemple #6
0
def run_tweets():
    global TWEET_DICT
    global NETWORK
    if not TWEET_DICT:
        TWEET_DICT = reload_object(TWEETS_FNAME, dict)
    if not NETWORK:
        NETWORK = reload_json(USER_GRAPH_FNAME,
                              nx.DiGraph,
                              transform=nx.node_link_graph)
    tweets = []
    for node in NETWORK:
        try:
            node_tweets = TWEET_DICT[str(node)]
        except KeyError:
            node_tweets = []
        if node_tweets:
            tweets.append(node_tweets[0]["text"])
        else:
            tweets.append("")
    return tweets
Exemple #7
0
import sys
from enum import Enum, unique
from statistics import mean, stdev

import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import pandas as pd
import ujson
from networkx.drawing.nx_agraph import to_agraph

from global_vars import (NO_DATA_EXIT_CODE, RNG_FNAME, TWEETS_FNAME,
                         USER_DICT_FNAME, USER_FRAME_FNAME, USER_GRAPH_FNAME)
from utils import json_it, pickle_it, reload_json, reload_object

USER_FRAME = pd.DataFrame(reload_json(USER_DICT_FNAME, dict)).transpose()

friends = USER_FRAME["friends"]
followers = USER_FRAME["followers"]
with_friends = USER_FRAME.loc[USER_FRAME["friends"].astype(bool)]
VALID_USER_FRAME = with_friends.loc[with_friends["followers"].astype(bool)]
USER_LIST = VALID_USER_FRAME.axes[0]

STDEV_MOD = 1
OTHERS_MOD = 0.001


@unique
class Direct(Enum):
    IN = True
    OUT = False