def tempnet(): tn = pp.TemporalNetwork() tn.add_edge('a', 'b', timestamp=1) tn.add_edge('b', 'c', timestamp=2) tn.add_edge('b', 'd', timestamp=5) tn.add_edge('c', 'd', timestamp=5) return tn
def tempnet2(): tn = pp.TemporalNetwork(directed=False) tn.add_edge('a', 'b', timestamp=1) tn.add_edge('b', 'c', timestamp=2) tn.add_edge('a', 'c', timestamp=3) tn.add_edge('b', 'a', timestamp=3) return tn
def generate_random_temporal_network(n=10, m=20, min_t=0, max_t=100, seed=0): """ Parameters ---------- n: int number of nodes m: int number of edges min_t: int starting time max_t: int end time seed: int seed for random number generator Returns ------- """ random.seed(seed) node_set = [str(i) for i in range(n)] source_nodes = [random.sample(node_set, 1)[0] for _ in range(m)] target_nodes = [random.sample(node_set, 1)[0] for _ in range(m)] times = [random.randint(min_t, max_t) for _ in range(m)] tedges = list(zip(source_nodes, target_nodes, times)) return pp.TemporalNetwork(tedges)
def test_dag_from_temporal_network_basic(): tn = pp.TemporalNetwork() tn.add_edge('a', 'b', 1) tn.add_edge('b', 'c', 2) tn.add_edge('a', 'c', 2) dag, mapping = pp.DAG.from_temporal_network(tn, delta=1) assert sorted(dag.routes_to_node('c_3')) == sorted([['a_2', 'c_3'], ['a_1', 'b_2', 'c_3']])
def temp_net(request): n = pp.TemporalNetwork(directed=True) n.add_edge('a', 'b', color='red', timestamp=1) n.add_edge('b', 'c', color='green', timestamp=2) n.add_edge('c', 'd', color='yellow', timestamp=3) n.add_edge('b', 'c', color='blue', timestamp=4) return n
def test_betweenness_preference_empty(): t = pp.TemporalNetwork() paths = pp.Paths.fromTemporalNetwork(t, delta=3) assert len(paths.getNodes()) == 0 betweenness_pref = paths.BetweennessPreference('e', method='MLE') expected = 0.0 assert betweenness_pref == pytest.approx(expected)
def get_coediting_network(db_location, time_from=None, time_to=None): """ Returns coediting network containing links between authors who coedited at least one line of code within a given time window. Node and edge infos set up to be expanded with future releases. Args: sqlite_db_file: path to sqlite database time_from: start time of time window filter, datetime object time_to: end time of time window filter, datetime object Returns: t: pathpy temporal network node_info: info on node charactaristics edge_info: info on edge characteristics """ con = sqlite3.connect(db_location) edits = pd.read_sql( """SELECT original_commit_deletion AS pre_commit, commit_hash AS post_commit, levenshtein_dist FROM edits""", con).drop_duplicates() commits = pd.read_sql( """SELECT hash, author_name, author_date FROM commits""", con) data = pd.merge(edits, commits, how='left', left_on='pre_commit', right_on='hash') \ .drop(['pre_commit', 'hash', 'author_date'], axis=1) data.columns = ['post_commit', 'levenshtein_dist', 'pre_author'] data = pd.merge(data, commits, how='left', left_on='post_commit', right_on='hash') \ .drop(['post_commit', 'hash'], axis=1) data.columns = ['levenshtein_dist', 'pre_author', 'post_author', 'time'] data = data[['pre_author', 'post_author', 'time', 'levenshtein_dist']] if time_from == None: time_from = datetime.datetime.strptime(min(data.time), '%Y-%m-%d %H:%M:%S') if time_to == None: time_to = datetime.datetime.strptime(max(data.time), '%Y-%m-%d %H:%M:%S') node_info = {} edge_info = {} t = pp.TemporalNetwork() for idx, row in data.iterrows(): if (datetime.datetime.strptime(row.time, '%Y-%m-%d %H:%M:%S') >= time_from) and \ (datetime.datetime.strptime(row.time, '%Y-%m-%d %H:%M:%S') <= time_to) and not \ (row['post_author'] == row['pre_author']): if not (pd.isnull(row['post_author']) or pd.isnull(row['pre_author'])): t.add_edge(row['post_author'], row['pre_author'], row['time'], directed=True, timestamp_format='%Y-%m-%d %H:%M:%S') return t, node_info, edge_info
def test_betweenness_preference_empty(): t = pp.TemporalNetwork() paths = pp.path_extraction.paths_from_temporal_network(t, delta=3) assert len(paths.nodes) == 0 betweenness_pref = pp.algorithms.path_measures.betweenness_preference( paths, 'e', method='MLE') expected = 0.0 assert betweenness_pref == pytest.approx(expected)
def get_bipartite_network(sqlite_db_file, time_from=None, time_to=None): """ Returns temporal bipartite network containing time-stamped file-author relationships for given time window. Node and edge infos set up to be expanded with future releases. Args: sqlite_db_file: path to sqlite database time_from: start time of time window filter, datetime object time_to: end time of time window filter, datetime object Returns: t: pathpy temporal network node_info: info on node charactaristics, e.g. membership in bipartite class edge_info: info on edge characteristics """ con = sqlite3.connect(sqlite_db_file) edits = pd.read_sql( """SELECT commit_hash AS post_commit, filename FROM edits""", con).drop_duplicates() commits = pd.read_sql( """SELECT hash, author_name, author_date AS time FROM commits""", con) data = pd.merge(edits, commits, how='left', left_on='post_commit', right_on='hash') \ .drop(['post_commit', 'hash'], axis=1) all_times = [ datetime.datetime.strptime(dt, '%Y-%m-%d %H:%M:%S') for dt in data.time if not pd.isnull(dt) ] if time_from == None: time_from = min(all_times) if time_to == None: time_to = max(all_times) node_info = {} edge_info = {} node_info['class'] = {} t = pp.TemporalNetwork() for idx, row in data.iterrows(): if (datetime.datetime.strptime(row.time, '%Y-%m-%d %H:%M:%S') >= time_from) and \ (datetime.datetime.strptime(row.time, '%Y-%m-%d %H:%M:%S') <= time_to): t.add_edge(row['author_name'], row['filename'], row['time'], directed=True, timestamp_format='%Y-%m-%d %H:%M:%S') node_info['class'][row['author_name']] = 'author' node_info['class'][row['filename']] = 'file' return t, node_info, edge_info
def test_dag_from_temporal_network(): """ The patterns is: 1. o x x \ 2. x o o / 3. x o x \ 4. x o o / / 5. o o x """ tn = pp.TemporalNetwork() tn.add_edge('a', 'b', 1) tn.add_edge('c', 'b', 2) tn.add_edge('b', 'c', 3) tn.add_edge('c', 'b', 4) tn.add_edge('b', 'a', 4) dag, mapping = pp.DAG.from_temporal_network(tn, delta=1) assert sorted(dag.routes_to_node('c_4')) == sorted([['c_2', 'b_3', 'c_4']]) assert dag.routes_to_node('a_5') == [['b_4', 'a_5']] dag, mapping = pp.DAG.from_temporal_network(tn, delta=2) assert sorted(dag.routes_to_node('c_4')) == sorted([['c_2', 'b_3', 'c_4'], ['a_1', 'b_3', 'c_4']]) assert sorted(dag.routes_to_node('a_5')) == sorted([['c_2', 'b_4', 'a_5']]) # network as before but with the node at 3 moved from b to a tn = pp.TemporalNetwork() tn.add_edge('a', 'b', 1) tn.add_edge('c', 'a', 2) tn.add_edge('a', 'c', 3) tn.add_edge('c', 'b', 4) tn.add_edge('b', 'a', 4) dag, mapping = pp.DAG.from_temporal_network(tn, delta=3) assert sorted(dag.routes_to_node('c_4')) == sorted([['c_2', 'a_3', 'c_4']]) assert sorted(dag.routes_to_node('a_5')) == sorted([['c_2', 'a_5'], ['a_1', 'b_4', 'a_5']])
def plot_hon_walk(network, walk): wal = pp.TemporalNetwork(directed=False) net = pp.Network() for edge in network.nodes.edges(): net.add_edge(edge) clusters = { str(v): 'red' if len(str(v)) < 2 else ('green' if str(v).startswith('1') else 'blue') for v in range(30) } for node in net.nodes.values(): #wal.add_node(node, color=clusters[node.uid], size=16, t=0) wal.add_node(node, size=16, t=0) for edge in network.edges.values(): wal.add_edge(edge.v, edge.w, t=0) nodes = [] for t, w in enumerate(walk): if nodes: n = nodes.pop(0) wal.nodes[n].update(color=clusters[n], size=16, t=t) wal.nodes[w].update(color='gray', size=20, t=t) begin = t end = t + 1 for edge in wal.edges.values(): wal._edges._intervals.addi(begin, end, edge) wal._edges._interval_map[edge].add((begin, end)) nodes.append(w) if t == 110: break style = { 'width': 900, 'height': 600, 'forceCharge': -10, 'forceRepel': -200, 'defaultEdgeWeight': 0.01, 'edge_size': 1, 'edge_opacity': .3, 'edge_color': 'gray', 'animation_end': 100, 'animation_steps': 101, 'curved': True, } wal.plot(**style)
def temporal_network_object(): t = pp.TemporalNetwork() # Path of length two t.addEdge("c", "e", 1) t.addEdge("e", "f", 2) # Path of length two t.addEdge("a", "e", 3) t.addEdge("e", "g", 4) # Path of length two t.addEdge("c", "e", 5) t.addEdge("e", "f", 6) # Path of length two t.addEdge("a", "e", 7) t.addEdge("e", "g", 8) # Path of length two t.addEdge("c", "e", 9) t.addEdge("e", "f", 10) # The next two edges continue the previous path to ( c-> e-> f-> e -> b ) t.addEdge("f", "e", 11) t.addEdge("e", "b", 12) # This is an isolated edge (i.e. path of length one) t.addEdge("e", "b", 13) # Path of length two t.addEdge("c", "e", 14) t.addEdge("e", "f", 15) # Path of length two t.addEdge("b", "e", 16) t.addEdge("e", "g", 17) # Path of length two t.addEdge("c", "e", 18) t.addEdge("e", "f", 19) # Path of length two t.addEdge("c", "e", 20) t.addEdge("e", "f", 21) return t
def create_user_interaction_graph(conn: sqlite3.Connection, subreddit: str, temporal: bool = False): if temporal: g = pp.TemporalNetwork() else: g = pp.Network(directed=True) comment_df = pd.read_sql_query( "SELECT id, author, parent_id, created_utc FROM comments WHERE subreddit=='{sub:s}' AND author!='[deleted]'" .format(sub=subreddit), conn) submission_df = pd.read_sql_query( "SELECT id, author, created_utc FROM submissions WHERE subreddit=='{sub:s}'" .format(sub=subreddit), conn) for ind, row in comment_df.iterrows(): source_author = row['author'] t_value, target_id = row['parent_id'].split('_') # If link / submission if t_value == 't3': target_author = submission_df[submission_df['id'] == target_id]['author'] # If comment elif t_value == 't1': target_author = comment_df[comment_df['id'] == target_id]['author'] # Everything else else: print('?') continue # Check if at the search gave at least one result if len(target_author) > 0: target_author = target_author.iloc[0] if temporal: ts = int(row['created_utc']) g.add_edge(source_author, target_author, ts) else: g.add_edge(source_author, target_author) return g
def test_paths_from_temporal_network_dag(): tn = pp.TemporalNetwork() tn.add_edge('a', 'b', 1) tn.add_edge('b', 'a', 3) tn.add_edge('b', 'c', 3) tn.add_edge('d', 'c', 4) tn.add_edge('c', 'd', 5) tn.add_edge('c', 'b', 6) paths = pp.path_extraction.paths_from_temporal_network_dag(tn, delta=2) assert paths.observation_count == 4.0 assert len(paths.nodes) == 4 assert paths.unique_paths(0) == 4.0 assert paths.unique_paths(1) == 4.0 assert paths.unique_paths(2) == 4.0 assert paths.unique_paths(3) == 1.0 # 4 longest paths assert (paths.paths[2][('a', 'b', 'a')] == [0.0, 1.0]).all() assert (paths.paths[2][('d', 'c', 'd')] == [0.0, 1.0]).all() assert (paths.paths[2][('d', 'c', 'b')] == [0.0, 1.0]).all() assert (paths.paths[3][('a', 'b', 'c', 'd')] == [0.0, 1.0]).all() # 4 subpaths of length 0 assert (paths.paths[0][('a',)] == [3.0, 0.0]).all() assert (paths.paths[0][('b',)] == [3.0, 0.0]).all() assert (paths.paths[0][('c',)] == [3.0, 0.0]).all() assert (paths.paths[0][('d',)] == [4.0, 0.0]).all() # 6 subpaths of length 1 assert (paths.paths[1][('a', 'b')] == [2.0, 0.0]).all() assert (paths.paths[1][('b', 'a')] == [1.0, 0.0]).all() assert (paths.paths[1][('b', 'c')] == [1.0, 0.0]).all() assert (paths.paths[1][('c', 'd')] == [2.0, 0.0]).all() assert (paths.paths[1][('d', 'c')] == [2.0, 0.0]).all() assert (paths.paths[1][('c', 'b')] == [1.0, 0.0]).all() # 2 subpaths of length 2 assert (paths.paths[2][('a', 'b', 'c')] == [1.0, 0.0]).all() assert (paths.paths[2][('b', 'c', 'd')] == [1.0, 0.0]).all()
def plot_walk(network, walk): wal = pp.TemporalNetwork(directed=False) for node in network.nodes.values(): wal.add_node(node, color='gray', size=16, t=0) for edge in network.edges.values(): wal.add_edge(edge.v, edge.w, t=0) nodes = [] for t, w in enumerate(walk): if nodes: n = nodes.pop(0) wal.nodes[n].update(color='gray', size=16, t=t) wal.nodes[w].update(color='red', size=20, t=t) begin = t end = t + 1 for edge in wal.edges.values(): wal._edges._intervals.addi(begin, end, edge) wal._edges._interval_map[edge].add((begin, end)) nodes.append(w) if t == 110: break style = { 'width': 900, 'height': 600, 'forceCharge': -10, 'forceRepel': -200, 'defaultEdgeWeight': 0.01, 'edge_size': 1, 'edge_opacity': .3, 'edge_color': 'gray', 'animation_end': 100, 'animation_steps': 101, 'curved': True, } wal.plot(**style)
def returnAll(): s = request.args.get('start') e = request.args.get('end') print(s) print(e) start_date = parser.parse(s).date() end_date = parser.parse(e).date() _dist = {} t = pp.TemporalNetwork() act = filter_activity(data, start_date, end_date) for a in act: if (a['from'] == '' or a['to'] == ''): continue t.add_edge(a["from"], a["to"], a["timestamp"]) n = pp.Network.from_temporal_network(t) n = n.to_undirected() dump = {'nodes': [], 'links': []} for node, prop in n.nodes.items(): net_copy = deepcopy(n) net_copy.remove_node(node) s1 = stats(net_copy) s2 = stats(n) temp = {} temp['id'] = node temp.update(diff_dict(s1, s2)) temp.update(prop) dump['nodes'].append(temp) for keys in n.edges.keys(): temp = {} temp['source'] = keys[0] temp['target'] = keys[1] dump['links'].append(temp) print(jsonify(dump)) return jsonify(dump)
def create_comment_structure_graph(conn: sqlite3.Connection, subreddit: str, temporal: bool = False): comment_df = pd.read_sql_query( "SELECT id, parent_id, created_utc FROM comments WHERE subreddit=='{subreddit:s}'" .format(subreddit=subreddit), conn) submission_df = pd.read_sql_query( "SELECT id, subreddit, created_utc FROM submissions WHERE subreddit=='{subreddit:s}'" .format(subreddit=subreddit), conn) if temporal: g = pp.TemporalNetwork() else: g = pp.Network(directed=True) for ind, record in comment_df.iterrows(): source_id = record['id'] t_value, target_id = record.parent_id.split('_') if temporal: ts = record.created_utc g.add_edge(source_id, target_id, ts) else: g.add_edge(source_id, target_id) # If the comment points at a submission also add edge to the subreddit. if t_value == 't3': source_id = target_id target_id = subreddit if temporal: ts = int(submission_df[submission_df['id'] == source_id].created_utc.iloc[0]) g.add_edge(source_id, target_id, ts) else: g.add_edge(source_id, target_id) return g
#%% In [1] import pathpy as pp t = pp.TemporalNetwork() print(t) #%% In [2] t.add_edge('a', 'b', 1) t.add_edge('b', 'a', 3) t.add_edge('b', 'c', 3) t.add_edge('d', 'c', 4) t.add_edge('c', 'd', 5) t.add_edge('c', 'b', 6) print(t) #%% In [3] t_realtime = pp.TemporalNetwork() t_realtime.add_edge('a', 'b', '2018-08-22 09:30:22') t_realtime.add_edge('b', 'c', '2018-08-22 09:30:25') t_realtime.add_edge('c', 'a', '2018-08-22 10:30:25') print(t_realtime) for e in t_realtime.tedges: print(e) #%% In [4] t #%% In [5] style = { 'ts_per_frame': 1,
#load temporal networks, whose format is (timestamps * agents * agents) f = open('./result/oCEP_output', 'rb') raw_TN = load(f) f.close() #normalize the temporal networks raw_TN = raw_TN / np.amax(raw_TN) #define the number of the agent (individuals) in the network #agents = 10 agents = raw_TN.shape[1] #construct temporalnetwork TN based on raw data #the individuals are identified as p0, p1, ... #The threshold is set as 0.1, any path with weitht >= 0.1 is regarded as connected TN = pathpy.TemporalNetwork() for i in range(raw_TN.shape[0]): for j in range(agents): for k in range(agents): if raw_TN[i][j][k] >= 0.1: TN.add_edge('p' + str(j), 'p' + str(k), i) #show information of the temporal network print('basic information of the temporal networks:') print(TN) #extract raw pathset from temporal networks print('\nextracting pathset from temporal network...\n') S = pathpy.path_extraction.paths_from_temporal_network_dag(TN, delta=1) #show basic information of the pathset
mog = pp.MultiOrderModel(paths, 3) # Color nodes according to known ground-truth clusters clusters = { v: 'red' if len(v)<2 else ('green' if v.startswith('1') else 'blue') for v in paths.nodes} pp.visualisation.plot(mog.layers[mog.estimate_order()], plot_higher_order_nodes=False, node_color=clusters) #%% In [2] from random import shuffle edges = [(v,w) for (v,w,t) in t.tedges] times = [t for (v,w,t) in t.tedges] shuffle(times) t_shuffled = pp.TemporalNetwork() for i in range(len(edges)): t_shuffled.add_edge(edges[i][0], edges[i][1], times[i]) paths = pp.path_extraction.paths_from_temporal_network_dag(t_shuffled) mog = pp.MultiOrderModel(paths, 3) clusters = { v: 'red' if len(v)<2 else ('green' if v.startswith('1') else 'blue') for v in paths.nodes} pp.visualisation.plot(mog.layers[mog.estimate_order()], plot_higher_order_nodes=False, node_color=clusters) #%% In [3] import scipy.stats def kendalltau(a, b):
def get_coediting_network(sqlite_db_file, author_identifier='author_id', time_from=None, time_to=None): """ Returns coediting network containing links between authors who coedited at least one line of code within a given time window. :param str sqlite_db_file: path to SQLite database :param datetime.datetime time_from: start time of time window filter :param datetime.datetime time_to: end time of time window filter :return: - *pathpy.TemporalNetwork* – coediting network - *dict* – info on node charactaristics - *dict* – info on edge characteristics """ if author_identifier == 'author_id': _ensure_author_id_exists(sqlite_db_file) con = sqlite3.connect(sqlite_db_file) edits = pd.read_sql( """SELECT original_commit_deletion AS pre_commit, commit_hash AS post_commit, levenshtein_dist FROM edits""", con).drop_duplicates() if author_identifier == 'author_id': commits = pd.read_sql( """SELECT hash, author_id as author_identifier, author_date, author_timezone FROM commits""", con) elif author_identifier == 'author_name': commits = pd.read_sql( """SELECT hash, author_name as author_identifier, author_date, author_timezone FROM commits""", con) elif author_identifier == 'author_email': commits = pd.read_sql( """SELECT hash, author_email as author_identifier, author_date, author_timezone FROM commits""", con) else: raise Exception( "author_identifier must be from {'author_id', 'author_name', 'author_email'}." ) data = pd.merge(edits, commits, how='left', left_on='pre_commit', right_on='hash') \ .drop(['pre_commit', 'hash', 'author_date', 'author_timezone'], axis=1) data.columns = ['post_commit', 'levenshtein_dist', 'pre_author'] data = pd.merge(data, commits, how='left', left_on='post_commit', right_on='hash') \ .drop(['post_commit', 'hash'], axis=1) data.columns = [ 'levenshtein_dist', 'pre_author', 'post_author', 'time', 'timezone' ] data['time'] = [ int(t / (10**9) - tz) for t, tz in zip( pd.to_datetime(data.time, format='%Y-%m-%d %H:%M:%S').view( 'int64'), data.timezone) ] data = data[['pre_author', 'post_author', 'time', 'levenshtein_dist']] if time_from == None: time_from = min(data.time) else: time_from = int(calendar.timegm(time_from.timetuple())) if time_to == None: time_to = max(data.time) else: time_to = int(calendar.timegm(time_to.timetuple())) node_info = {} edge_info = {} t = pp.TemporalNetwork() for row in data.itertuples(): if (row.time >= time_from) and (row.time <= time_to) and not \ (row.post_author == row.pre_author): if not (pd.isnull(row.post_author) or pd.isnull(row.pre_author)): t.add_edge(row.post_author, row.pre_author, row.time, directed=True) return t, node_info, edge_info
def get_bipartite_network(sqlite_db_file, author_identifier='author_id', time_from=None, time_to=None): """ Returns temporal bipartite network containing time-stamped file-author relationships for given time window. :param str sqlite_db_file: path to SQLite database :param datetime.datetime time_from: start time of time window filter, datetime object :param datetime.datetime time_to: end time of time window filter, datetime object :return: - *pathpy.TemporalNetwork* – bipartite network - *dict* – info on node charactaristics, e.g. membership in bipartite class - *dict* – info on edge characteristics """ if author_identifier == 'author_id': _ensure_author_id_exists(sqlite_db_file) con = sqlite3.connect(sqlite_db_file) edits = pd.read_sql( """SELECT commit_hash AS post_commit, filename FROM edits""", con).drop_duplicates() if author_identifier == 'author_id': commits = pd.read_sql( """SELECT hash, author_id as author_identifier, author_date AS time, author_timezone AS timezone FROM commits""", con) elif author_identifier == 'author_name': commits = pd.read_sql( """SELECT hash, author_name as author_identifier, author_date AS time, author_timezone AS timezone FROM commits""", con) elif author_identifier == 'author_email': commits = pd.read_sql( """SELECT hash, author_email as author_identifier, author_date AS time, author_timezone AS timezone FROM commits""", con) else: raise Exception( "author_identifier must be from {'author_id', 'author_name', 'author_email'}." ) data = pd.merge(edits, commits, how='left', left_on='post_commit', right_on='hash') \ .drop(['post_commit', 'hash'], axis=1) data['time'] = [ int( calendar.timegm( datetime.datetime.strptime(t, '%Y-%m-%d %H:%M:%S').timetuple()) - tz) if not pd.isnull(t) else np.nan for t, tz in zip(data.time, data.timezone) ] data = data.drop(['timezone'], axis=1) all_times = [dt for dt in data.time if not pd.isnull(dt)] if time_from == None: time_from = min(all_times) else: time_from = int(calendar.timegm(time_from.timetuple())) if time_to == None: time_to = max(all_times) else: time_to = int(calendar.timegm(time_to.timetuple())) node_info = {} edge_info = {} node_info['class'] = {} t = pp.TemporalNetwork() for idx, row in data.iterrows(): if (row.time >= time_from) and (row.time <= time_to): t.add_edge(row['author_identifier'], row['filename'], row['time'], directed=True) node_info['class'][row['author_identifier']] = 'author' node_info['class'][row['filename']] = 'file' return t, node_info, edge_info