def test_closeness_centrality(): """Test the betweenness centrality of a network.""" net = pp.Network(directed=False) net.add_edge('a', 'x') net.add_edge('x', 'b') c = pp.algorithms.centralities.closeness_centrality(net) assert c['a'] == 1/3
def test_degree_assortativity(): """Test the degree assortativity of a network.""" net = pp.Network(directed=False) net.add_edge('a', 'b', weight=2.1) net.add_edge('a', 'c', weight=1.0) s = pp.statistics.degrees.degree_assortativity(net)
def test_network_from_pathpyobjects(): """Create a network from pathpy objects""" trolls = pp.Network(multiedges=True, name='Trolls', chapter='Roast Mutton') tom = pp.Node(uid='t', name='Tom', age=156) bert = pp.Node(uid='b', name='Bert', age=96) e1 = pp.Edge(tom, bert, type='like', strength=2.0) trolls.add_edge(e1)
def get_coauthorship_network(sqlite_db_file, time_from=None, time_to=None): """ Returns coauthorship network containing links between authors who coedited at least one code file within a given time window. Node and edge infos set up to be expanded with future releases. Args: sqlite_db_file: path to sqlite database time_from: start time of time window filter, datetime object time_to: end time of time window filter, datetime object Returns: n: pathpy network node_info: info on node charactaristics edge_info: info on edge characteristics """ con = sqlite3.connect(sqlite_db_file) edits = pd.read_sql( """SELECT original_commit_deletion AS pre_commit, commit_hash AS post_commit, filename FROM edits""", con) commits = pd.read_sql( """SELECT hash, author_name, author_date AS time FROM commits""", con) data_pre = pd.merge(edits, commits, how='left', left_on='pre_commit', right_on='hash') \ .drop(['pre_commit', 'post_commit', 'hash'], axis=1) data_post = pd.merge(edits, commits, how='left', left_on='post_commit', right_on='hash') \ .drop(['pre_commit', 'post_commit', 'hash'], axis=1) data = pd.concat([data_pre, data_post]) all_times = [ datetime.datetime.strptime(dt, '%Y-%m-%d %H:%M:%S') for dt in data.time if not pd.isnull(dt) ] if time_from == None: time_from = min(all_times) if time_to == None: time_to = max(all_times) data = data.loc[pd.to_datetime(data['time']) >= time_from, :] data = data.loc[pd.to_datetime(data['time']) <= time_to, :] node_info = {} edge_info = {} n = pp.Network() for file in data.filename.unique(): n.add_clique(set(data.loc[data.filename == file, 'author_name'])) # remove self loops for edge in n.edges: if edge[0] == edge[1]: n.remove_edge(edge[0], edge[1]) return n, node_info, edge_info
def plot_hon(network): _net = pp.Network.from_paths(network._subpaths(), frequencies=True) forces = pp.Network(directed=False) for edge in network.edges: v = edge.v.nodes[0] w = edge.w.nodes[-1] force = edge['frequency'] if (v, w) not in forces.edges: forces.add_edge(v, w, force=force, opacity=0) else: forces.edges[v, w]['force'] += force deg = forces.degrees(weight='force') for edge in forces.edges: s = min(deg[edge.v.uid], deg[edge.w.uid]) edge['weight'] = edge['force'] / s clusters = { str(v): 'red' if len(str(v)) < 2 else ('green' if str(v).startswith('1') else 'blue') for v in range(30) } style = { 'width': 900, 'height': 600, 'forceCharge': -4000, 'forceRepel': -800, 'node_color': clusters, 'edge_size': 1, 'edge_color': 'gray', 'curved': True, 'restartAlpha': 1, 'targetAlpha': .0, 'forceAlpha': .3, 'repelDistance': 200, } for edge in network.nodes.edges: if (edge.v.uid, edge.w.uid) in forces.edges: forces.edges[edge.v.uid, edge.w.uid].update(opacity=.3, weight=0) else: forces.add_edge(edge.v.uid, edge.w.uid, opacity=.3, weight=0) # layout_style = {} # layout_style["node_size"] = 2 # layout_style['layout'] = 'Fruchterman-Reingold' # layout_style['force'] = 0.2 # layout_style['iterations'] = 500 # layout = pp.layout(forces, **layout_style) # print(layout) forces.plot(**style)
def test_degree_raw_moment(): """Test the degree raw moment of a network.""" net = pp.Network(directed=False) net.add_edge('a', 'b', weight=2.1) net.add_edge('a', 'c', weight=1.0) s = pp.statistics.degrees.degree_raw_moment(net) assert s == 4 / 3 s = pp.statistics.degrees.degree_raw_moment(net, weight=True)
def test_degree_central_moment(): """Test the degree central moment of a network.""" net = pp.Network(directed=False) net.add_edge('a', 'b', weight=2.1) net.add_edge('a', 'c', weight=1.0) s = pp.statistics.degrees.degree_central_moment(net) # print(s) s = pp.statistics.degrees.degree_central_moment(net, weight=True)
def test_degree_centrality(): """Test the betweenness centrality of a network.""" net = pp.Network(directed=True) net.add_edge('a', 'x') net.add_edge('x', 'b') c = pp.algorithms.centralities.degree_centrality(net) assert c['a'] == 1 c = pp.algorithms.centralities.degree_centrality(net, mode='indegree') assert c['a'] == 0
def test_local_clustering_coefficient(): """Test the degree assortativity of a network.""" net = pp.Network(directed=False) net.add_edge('a', 'b', weight=2.1) net.add_edge('b', 'c', weight=1.0) net.add_edge('c', 'a', weight=1.0) net.add_edge('b', 'd', weight=1.0) net.add_edge('d', 'e', weight=1.0) net.add_edge('e', 'b', weight=1.0) s = pp.statistics.clustering.local_clustering_coefficient(net, 'b')
def test_degree_distribution(): """Test the degree distribution of a network.""" net = pp.Network(directed=False) net.add_edge('a', 'b', weight=2.1) net.add_edge('a', 'c', weight=1.0) s = pp.statistics.degrees.degree_distribution(net) assert s == {2: 1 / 3, 1: 2 / 3} s = pp.statistics.degrees.degree_distribution(net, weight=True) assert s == {3.1: 1 / 3, 2.1: 1 / 3, 1.: 1 / 3}
def test_degree_sequence(): """Test the degree sequence of a network.""" net = pp.Network(directed=False) net.add_edge('a', 'b', weight=2.1) net.add_edge('a', 'c', weight=1.0) s = pp.statistics.degrees.degree_sequence(net) assert np.array_equal(s, np.array([2., 1., 1.])) s = pp.statistics.degrees.degree_sequence(net, weight=True) assert np.array_equal(s, np.array([3.1, 2.1, 1.]))
def test_diameter(): """Test the diameter of the network.""" net = pp.Network(directed=False) net.add_edge('a', 'x') net.add_edge('x', 'c') assert pp.algorithms.shortest_paths.diameter(net) == 2 assert net.diameter() == 2 net.add_edge('a', 'c') assert pp.algorithms.shortest_paths.diameter(net) == 1 assert net.diameter() == 1
def test_all_shortest_paths(): """Test all shortest paths in a network.""" net = pp.Network() net.add_edges(('a', 'x'), ('x', 'c')) paths, m = pp.algorithms.shortest_paths.all_shortest_paths(net) assert paths['a']['c'] == {('a', 'x', 'c')} net.add_edges(('a', 'y'), ('y', 'c')) paths, m = pp.algorithms.shortest_paths.all_shortest_paths(net) assert paths['a']['c'] == {('a', 'x', 'c'), ('a', 'y', 'c')}
def generate_random_network(n=10, m=20, directed=True, weighted=True, seed=0): """Generate a random Network""" random.seed(seed) net = pp.Network(directed) for i in range(n): net.add_node(str(i)) for i in range(m): v, w = random.sample(list(net.nodes), 2) if not weighted: net.add_edge(v, w) else: net.add_edge(v, w, weight=random.randint(0, 10)) return net
def test_distance_matrix(): """Test the disance matrix of a network.""" net = pp.Network() net.add_edges(('a', 'x'), ('x', 'y'), ('y', 'c')) m = pp.algorithms.shortest_paths.distance_matrix(net) assert m[0, 3] == 3 assert net.distance_matrix()[0, 3] == 3 net.add_edges(('x', 'c')) m = pp.algorithms.shortest_paths.distance_matrix(net) assert m[0, 3] == 2 assert net.distance_matrix()[0, 3] == 2
def plot_hon_walk(network, walk): wal = pp.TemporalNetwork(directed=False) net = pp.Network() for edge in network.nodes.edges(): net.add_edge(edge) clusters = { str(v): 'red' if len(str(v)) < 2 else ('green' if str(v).startswith('1') else 'blue') for v in range(30) } for node in net.nodes.values(): #wal.add_node(node, color=clusters[node.uid], size=16, t=0) wal.add_node(node, size=16, t=0) for edge in network.edges.values(): wal.add_edge(edge.v, edge.w, t=0) nodes = [] for t, w in enumerate(walk): if nodes: n = nodes.pop(0) wal.nodes[n].update(color=clusters[n], size=16, t=t) wal.nodes[w].update(color='gray', size=20, t=t) begin = t end = t + 1 for edge in wal.edges.values(): wal._edges._intervals.addi(begin, end, edge) wal._edges._interval_map[edge].add((begin, end)) nodes.append(w) if t == 110: break style = { 'width': 900, 'height': 600, 'forceCharge': -10, 'forceRepel': -200, 'defaultEdgeWeight': 0.01, 'edge_size': 1, 'edge_opacity': .3, 'edge_color': 'gray', 'animation_end': 100, 'animation_steps': 101, 'curved': True, } wal.plot(**style)
def net(request): net = pp.Network(directed=False) net.add_edge('a', 'b') net.add_edge('b', 'c') net.add_edge('c', 'a') net.add_edge('b', 'd') # net.add_edge('d','b') net.add_edge('d', 'e') net.add_edge('e', 'f') net.add_edge('f', 'd') # net.add_edge('f','e') net.add_edge('f', 'g') net.add_edge('g', 'd') return net
def test_distance_matrix(): """Test the distance matrix of a network.""" net = pp.Network() net.add_edges(('a', 'x'), ('x', 'y'), ('y', 'c')) m = pp.algorithms.shortest_paths.distance_matrix(net) n = net.nodes.index assert m[n['a'], n['c']] == 3 assert net.distance_matrix()[n['a'], n['c']] == 3 net.add_edges(('x', 'c')) m = pp.algorithms.shortest_paths.distance_matrix(net) assert m[n['a'], n['c']] == 2 assert net.distance_matrix()[n['a'], n['c']] == 2
def test_avg_clustering_coefficient(): """Test the avg clustering coefficient of a network.""" n = pp.Network(directed=False) n.add_edge('a', 'b') n.add_edge('b', 'c') n.add_edge('c', 'a') n.add_edge('d', 'e') n.add_edge('e', 'f') n.add_edge('f', 'g') n.add_edge('g', 'd') n.add_edge('d', 'f') n.add_edge('b', 'd') s = pp.statistics.clustering.avg_clustering_coefficient(n) assert pytest.approx(s, 0.001) == 0.761904
def test_local_clustering_coefficient(): """Test the local clustering coefficient of a network.""" n = pp.Network(directed=False) n.add_edge('a', 'b') n.add_edge('b', 'c') n.add_edge('c', 'a') n.add_edge('d', 'e') n.add_edge('e', 'f') n.add_edge('f', 'g') n.add_edge('g', 'd') n.add_edge('d', 'f') n.add_edge('b', 'd') cc = pp.statistics.clustering.local_clustering_coefficient(n, 'f') assert cc == 2 / 3 cc = pp.statistics.clustering.local_clustering_coefficient(n, 'a') assert cc == 1
def create_user_interaction_graph(conn: sqlite3.Connection, subreddit: str, temporal: bool = False): if temporal: g = pp.TemporalNetwork() else: g = pp.Network(directed=True) comment_df = pd.read_sql_query( "SELECT id, author, parent_id, created_utc FROM comments WHERE subreddit=='{sub:s}' AND author!='[deleted]'" .format(sub=subreddit), conn) submission_df = pd.read_sql_query( "SELECT id, author, created_utc FROM submissions WHERE subreddit=='{sub:s}'" .format(sub=subreddit), conn) for ind, row in comment_df.iterrows(): source_author = row['author'] t_value, target_id = row['parent_id'].split('_') # If link / submission if t_value == 't3': target_author = submission_df[submission_df['id'] == target_id]['author'] # If comment elif t_value == 't1': target_author = comment_df[comment_df['id'] == target_id]['author'] # Everything else else: print('?') continue # Check if at the search gave at least one result if len(target_author) > 0: target_author = target_author.iloc[0] if temporal: ts = int(row['created_utc']) g.add_edge(source_author, target_author, ts) else: g.add_edge(source_author, target_author) return g
def net(): net = pp.Network(directed=True) net.add_node('a', name='Alice', age=25, gender='f') net.add_node('b', name='Bob', age=31, gender='m') net.add_node('c', name='Claire', age=18, gender='f') net.add_node('d', name='Dennis', age=47, gender='m') net.add_node('e', name='Esther', age=22, gender='f') net.add_node('f', name='Frank', age=23, gender='m') net.add_node('g', name='George', age=50, gender='m') net.add_edge('a', 'b', is_formal=False) net.add_edge('a', 'c', is_formal=False) net.add_edge('c', 'd', is_formal=True) net.add_edge('d', 'e', is_formal=True) net.add_edge('e', 'c', is_formal=True) net.add_edge('c', 'f', is_formal=False) net.add_edge('f', 'a', is_formal=True) net.add_edge('f', 'g', is_formal=False) net.add_edge('g', 'g', is_formal=False) net.add_edge('g', 'd', is_formal=False) return net
def create_comment_structure_graph(conn: sqlite3.Connection, subreddit: str, temporal: bool = False): comment_df = pd.read_sql_query( "SELECT id, parent_id, created_utc FROM comments WHERE subreddit=='{subreddit:s}'" .format(subreddit=subreddit), conn) submission_df = pd.read_sql_query( "SELECT id, subreddit, created_utc FROM submissions WHERE subreddit=='{subreddit:s}'" .format(subreddit=subreddit), conn) if temporal: g = pp.TemporalNetwork() else: g = pp.Network(directed=True) for ind, record in comment_df.iterrows(): source_id = record['id'] t_value, target_id = record.parent_id.split('_') if temporal: ts = record.created_utc g.add_edge(source_id, target_id, ts) else: g.add_edge(source_id, target_id) # If the comment points at a submission also add edge to the subreddit. if t_value == 't3': source_id = target_id target_id = subreddit if temporal: ts = int(submission_df[submission_df['id'] == source_id].created_utc.iloc[0]) g.add_edge(source_id, target_id, ts) else: g.add_edge(source_id, target_id) return g
Considering that in reality we often do not have ground-truth that allows us to test which order performs best, this highlights the problem that we must decide which order to use for a given data set. We will solve this riddle in session 2, when we introduce a method to learn the optimal order for a given data set. ### Path statistics from origin-destination data In the example above, the data provide us with full knowledge about the exact itinerary taken by each passenger. However, we are often confronted with situations where we do not have such detailed information about paths. Nevertheless, we often have aggregate information that allows us to generate path statistics: Consider a setting where we know (1) the topology of a transportation network, and (2) the origin and destination stations of individual passengers, i.e. where passengers start and finish their journey. Under the assumption that passengers travel along shortest paths, we can now use this information to extract the path statistics that we need. `pathpy` provides a number of path extraction methods that help you to deal with such situations. For the situation described above, we can use the `pp.path_extraction.paths_from_origin_destination` method to generate path statistics based on tuples capturing origin/destination statistics and an instance of the class `Network`. Let us try this in a toy example. <span style="color:red">**TODO:** Generate a directed network with six nodes and six edges $(a,c), (b,c), (c,d), (d,f), (d,g)$. Plot the network. Based on a list of tuples $(a, f, 5), (b, g, 10)$ capturing origin destination statistics, use the method `pp.path_extraction.paths_from_origin_destination` to generate a `Paths` object and print the result.</span> """) #%% In [16] n = pp.Network(directed=True) n.add_edge('a', 'c') n.add_edge('b', 'c') n.add_edge('c', 'd') n.add_edge('d', 'f') n.add_edge('d', 'g') pp.visualisation.plot(n) od_stats = [('a', 'f', 5), ('b', 'g', 10)] paths = pp.path_extraction.paths_from_origin_destination(od_stats, n) print(paths) #%% md("""
def main(): # Network and attributes # ---------------------- net = pp.Network(directed=True) net.add_node('a', name='Alice', age=25, gender='f') net.add_node('b', name='Bob', age=31, gender='m') net.add_node('c', name='Claire', age=18, gender='f') net.add_node('d', name='Dennis', age=47, gender='m') net.add_node('e', name='Esther', age=22, gender='f') net.add_node('f', name='Frank', age=23, gender='m') net.add_node('g', name='George', age=50, gender='m') net.add_edge('a', 'b', is_formal=False) net.add_edge('a', 'c', is_formal=False) net.add_edge('c', 'd', is_formal=True) net.add_edge('d', 'e', is_formal=True) net.add_edge('e', 'c', is_formal=True) net.add_edge('c', 'f', is_formal=False) net.add_edge('f', 'a', is_formal=True) net.add_edge('f', 'g', is_formal=False) net.add_edge('g', 'g', is_formal=False) net.add_edge('g', 'd', is_formal=False) # Network dicts # ------------- color_dict = {"m": "blue", "f": "red"} shape_dict = {"m": "circle", "f": "rectangle"} style_dict = {"m": "{shading=ball}", "f": None} layout = { 'a': (4.3191, -3.5352), 'b': (0.5292, -0.5292), 'c': (8.6559, -3.8008), 'd': (12.4117, -7.5239), 'e': (12.7, -1.7069), 'f': (6.0022, -9.0323), 'g': (9.7608, -12.7) } # Visual style dict # ----------------- visual_style = {} # node styles # ----------- visual_style['node_size'] = 5 visual_style['node_color'] = { n: color_dict[a['gender']] for n, a in net.nodes.items() } visual_style['node_opacity'] = .7 visual_style['node_label'] = {n: a['name'] for n, a in net.nodes.items()} visual_style['node_label_position'] = 'below' visual_style['node_label_distance'] = 15 visual_style['node_label_color'] = 'gray' visual_style['node_label_size'] = 3 visual_style['node_shape'] = { n: shape_dict[a['gender']] for n, a in net.nodes.items() } visual_style['node_style'] = { n: style_dict[a['gender']] for n, a in net.nodes.items() } visual_style['node_label_off'] = {'e': True} visual_style['node_math_mode'] = {'a': True} visual_style['node_label_as_id'] = {'f': True} visual_style['node_pseudo'] = {'d': True} # edge styles # ----------- visual_style['edge_width'] = { e: .3 + .3 * int(a['is_formal']) for e, a in net.edges.items() } visual_style['edge_color'] = 'black' visual_style['edge_opacity'] = .8 visual_style['edge_curved'] = 0.1 visual_style['edge_label'] = {e: e[0] + e[1] for e in net.edges} visual_style['edge_label_position'] = 'above' visual_style['edge_label_distance'] = .6 visual_style['edge_label_color'] = 'gray' visual_style['edge_label_size'] = {('a', 'c'): 5} visual_style['edge_style'] = 'dashed' visual_style['edge_arrow_size'] = .2 visual_style['edge_arrow_width'] = .2 visual_style['edge_loop_size'] = 15 visual_style['edge_loop_position'] = 90 visual_style['edge_loop_shape'] = 45 visual_style['edge_directed'] = { ('a', 'b'): True, ('a', 'c'): True, ('c', 'd'): False, ('d', 'e'): True, ('e', 'c'): True, ('c', 'f'): False, ('f', 'a'): True, ('f', 'g'): True, ('g', 'g'): True } visual_style['edge_label'][('a', 'c')] = '\\frac{\\alpha}{\\beta}' visual_style['edge_math_mode'] = {('a', 'c'): True} visual_style['edge_not_in_bg'] = {('f', 'a'): True} # general options # --------------- visual_style['unit'] = 'mm' visual_style['layout'] = layout visual_style["margin"] = {'top': 5, 'bottom': 8, 'left': 5, 'right': 5} visual_style["canvas"] = (100, 60) visual_style['keep_aspect_ratio'] = False # Create a latex file plot(net, 'network.tex', **visual_style)
def test_simple(): g = pp.Network() g.add_node('a') g.add_node('b') plot(g)
def test_avg_path_length(): """Test the average path length of the network.""" net = pp.Network(directed=False) net.add_edge('a', 'x') net.add_edge('x', 'c') assert pp.algorithms.shortest_paths.avg_path_length(net) == 8/6
#%% import pathpy as pp # %% n = pp.Network() n.add_edge('a', 'b') n.plot() # %%
def test_to_networkx(): network = pp.Network() network.add_edge("a", "b") n = pp.converters.to_networkx(network)
def get_coauthorship_network(sqlite_db_file, author_identifier='author_id', time_from=None, time_to=None): """ Returns coauthorship network containing links between authors who coedited at least one code file within a given time window. :param str sqlite_db_file: path to SQLite database :param datetime.datetime time_from: start time of time window filter :param datetime.datetime time_to: end time of time window filter :return: - *pathpy.Network* – coauthorship network - *dict* – info on node charactaristics - *dict* – info on edge characteristics """ if author_identifier == 'author_id': _ensure_author_id_exists(sqlite_db_file) con = sqlite3.connect(sqlite_db_file) edits = pd.read_sql( """SELECT original_commit_deletion AS pre_commit, commit_hash AS post_commit, filename FROM edits""", con) if author_identifier == 'author_id': commits = pd.read_sql( """SELECT hash, author_id as author_identifier, author_date AS time, author_timezone AS timezone FROM commits""", con) elif author_identifier == 'author_name': commits = pd.read_sql( """SELECT hash, author_name as author_identifier, author_date AS time, author_timezone AS timezone FROM commits""", con) elif author_identifier == 'author_email': commits = pd.read_sql( """SELECT hash, author_email as author_identifier, author_date AS time, author_timezone AS timezone FROM commits""", con) else: raise Exception( "author_identifier must be from {'author_id', 'author_name', 'author_email'}." ) data_pre = pd.merge(edits, commits, how='left', left_on='pre_commit', right_on='hash') \ .drop(['pre_commit', 'post_commit', 'hash'], axis=1) data_post = pd.merge(edits, commits, how='left', left_on='post_commit', right_on='hash') \ .drop(['pre_commit', 'post_commit', 'hash'], axis=1) data = pd.concat([data_pre, data_post]) data['time'] = [ int( calendar.timegm( datetime.datetime.strptime(t, '%Y-%m-%d %H:%M:%S').timetuple()) - tz) if not pd.isnull(t) else np.nan for t, tz in zip(data.time, data.timezone) ] data = data.drop(['timezone'], axis=1) all_times = [dt for dt in data.time if not pd.isnull(dt)] if time_from == None: time_from = min(all_times) else: time_from = int(calendar.timegm(time_from.timetuple())) if time_to == None: time_to = max(all_times) else: time_to = int(calendar.timegm(time_to.timetuple())) data = data.loc[data['time'] >= time_from, :] data = data.loc[data['time'] <= time_to, :] node_info = {} edge_info = {} n = pp.Network() for file in data.filename.unique(): n.add_clique(set(data.loc[data.filename == file, 'author_identifier'])) # remove self loops for edge in n.edges: if edge[0] == edge[1]: n.remove_edge(edge[0], edge[1]) return n, node_info, edge_info