def get_square_bounds(): global NETWORK if not NETWORK: NETWORK = reload_json(USER_GRAPH_FNAME, nx.DiGraph, transform=nx.node_link_graph) positions = graphviz_layout(NETWORK, prog="sfdp") xs = [] ys = [] for node in positions: cur_x, cur_y = positions[node] xs.append(cur_x) ys.append(cur_y) min_x, max_x = min(xs), max(xs) min_y, max_y = min(ys), max(ys) x_range = max_x - min_x y_range = max_y - min_y x_center = (max_x + min_x) / 2 y_center = (max_y + min_y) / 2 if x_range > y_range: extra = x_range * 0.1 min_y = y_center - (x_range / 2) - extra max_y = y_center + (x_range / 2) + extra min_x -= extra max_x += extra else: extra = y_range * 0.1 min_x = x_center - (y_range / 2) - extra max_x = x_center + (y_range / 2) + extra min_y -= extra max_y += extra return (min_x, max_x), (min_y, max_y)
def make_plottable(colors=None, sizes=None, tweets=None): global NETWORK if not NETWORK: NETWORK = reload_json(USER_GRAPH_FNAME, nx.DiGraph, transform=nx.node_link_graph) graph = from_networkx(NETWORK, graphviz_layout, prog="sfdp") if not colors: node_color = Spectral4[0] else: node_color = "color" graph.node_renderer.data_source.add(colors, "color") if not sizes: node_size = 8 else: node_size = "size" graph.node_renderer.data_source.add(sizes, "size") if tweets: graph.node_renderer.data_source.add(tweets, "desc") edge_width = 0.3 select_edge_width = 2 node_alpha = 0.95 edge_alpha = 0.3 edge_color = "#666666" select_color = Spectral4[2] hover_color = Spectral4[1] graph.node_renderer.glyph = Circle( size=node_size, fill_color=node_color, fill_alpha=node_alpha, line_color=node_color, line_alpha=node_alpha, ) graph.node_renderer.selection_glyph = Circle(size=node_size, fill_color=select_color, line_color=select_color) graph.node_renderer.hover_glyph = Circle(size=node_size, fill_color=hover_color, line_color=hover_color) graph.edge_renderer.glyph = MultiLine(line_color=edge_color, line_alpha=edge_alpha, line_width=edge_width) graph.edge_renderer.selection_glyph = MultiLine( line_color=select_color, line_width=select_edge_width) graph.edge_renderer.hover_glyph = MultiLine(line_color=hover_color, line_width=select_edge_width) graph.selection_policy = NodesAndLinkedEdges() graph.inspection_policy = NodesAndLinkedEdges() return graph
def trim_graph(graph, reduce_sample=True, pickle=True, from_scratch=True): if not graph and not from_scratch: graph = reload_json(USER_GRAPH_FNAME, transform=nx.node_link_graph) return graph rng_state = reload_object(RNG_FNAME, random.getstate) random.setstate(rng_state) print("Trimming graph...") significant_id_set = set() for direct in (Direct.IN, Direct.OUT): sample = [] ids = [] for user_id in graph: ids.append(user_id) num_neighb = direct.deg_view(graph)[user_id] sample.append(num_neighb) sample_mean = mean(sample) pop_stdev = stdev(sample) for i, degree in enumerate(sample): if abs(degree - sample_mean) > STDEV_MOD * pop_stdev: user_id = ids[i] significant_id_set.add((user_id, degree)) by_asc_degree = sorted(list(significant_id_set), key=lambda x: x[1]) significant_ids = [i[0] for i in by_asc_degree] to_subgraph = set() for user_id in significant_ids: try: others = set(graph.neighbors(user_id)) except KeyError: continue if reduce_sample and len(others) != 0: others = random.sample(others, int(len(others) * OTHERS_MOD)) if len(others) == 0: continue to_subgraph.add(user_id) for other in others: to_subgraph.add(other) pickle_it(rng_state, RNG_FNAME) user_graph = graph.subgraph(to_subgraph) if pickle: json_it(user_graph, USER_GRAPH_FNAME, nx.node_link_data) return user_graph
def construct_graph_data(): global GRAPH_DATA GRAPH_DATA = reload_json("graph_data", lambda: None) if GRAPH_DATA: return GRAPH_DATA = {} GRAPH_DATA["raw_tweets"] = run_tweets() for name, d_source in DataSource.__members__.items(): sizes, colors = run_data(d_source) GRAPH_DATA[str(d_source)] = (sizes, colors) x_range, y_range = get_square_bounds() GRAPH_DATA["range"] = (x_range, y_range) json_it(GRAPH_DATA, "graph_data")
def run_data(d_source): global USER_DICT global TWEET_DICT global NETWORK if not USER_DICT: USER_DICT = reload_object(USER_DICT_FNAME, dict) if not TWEET_DICT: TWEET_DICT = reload_object(TWEETS_FNAME, dict) if not NETWORK: NETWORK = reload_json(USER_GRAPH_FNAME, nx.DiGraph, transform=nx.node_link_graph) values = [] for node in NETWORK: try: node_info = USER_DICT[str(node)] except KeyError: # some nodes may not show up in the user dictionary! node_info = {"followers": [], "friends": []} if d_source in [DataSource.FRIENDS, DataSource.FOLLOWERS]: values.append(len(node_info[str(d_source)])) elif d_source == DataSource.TWEETS: try: num_tweets = len(TWEET_DICT[str(node)]) values.append(num_tweets) except KeyError: values.append(0) else: values.append(1) maximum = max(values) minimum = min(values) colors = [] sizes = [] for value in values: index_ratio = (value - minimum) / maximum color_index = int(index_ratio * (len(PALETTE) - 1)) sizes.append(2**(color_index + 1)) colors.append(PALETTE[color_index]) return sizes, colors
def run_tweets(): global TWEET_DICT global NETWORK if not TWEET_DICT: TWEET_DICT = reload_object(TWEETS_FNAME, dict) if not NETWORK: NETWORK = reload_json(USER_GRAPH_FNAME, nx.DiGraph, transform=nx.node_link_graph) tweets = [] for node in NETWORK: try: node_tweets = TWEET_DICT[str(node)] except KeyError: node_tweets = [] if node_tweets: tweets.append(node_tweets[0]["text"]) else: tweets.append("") return tweets
import sys from enum import Enum, unique from statistics import mean, stdev import matplotlib.pyplot as plt import networkx as nx import numpy as np import pandas as pd import ujson from networkx.drawing.nx_agraph import to_agraph from global_vars import (NO_DATA_EXIT_CODE, RNG_FNAME, TWEETS_FNAME, USER_DICT_FNAME, USER_FRAME_FNAME, USER_GRAPH_FNAME) from utils import json_it, pickle_it, reload_json, reload_object USER_FRAME = pd.DataFrame(reload_json(USER_DICT_FNAME, dict)).transpose() friends = USER_FRAME["friends"] followers = USER_FRAME["followers"] with_friends = USER_FRAME.loc[USER_FRAME["friends"].astype(bool)] VALID_USER_FRAME = with_friends.loc[with_friends["followers"].astype(bool)] USER_LIST = VALID_USER_FRAME.axes[0] STDEV_MOD = 1 OTHERS_MOD = 0.001 @unique class Direct(Enum): IN = True OUT = False