Esempio n. 1
0
def test_closeness_centrality():
    """Test the betweenness centrality of a network."""
    net = pp.Network(directed=False)
    net.add_edge('a', 'x')
    net.add_edge('x', 'b')
    c = pp.algorithms.centralities.closeness_centrality(net)
    assert c['a'] == 1/3
Esempio n. 2
0
def test_degree_assortativity():
    """Test the degree assortativity of a network."""
    net = pp.Network(directed=False)
    net.add_edge('a', 'b', weight=2.1)
    net.add_edge('a', 'c', weight=1.0)

    s = pp.statistics.degrees.degree_assortativity(net)
Esempio n. 3
0
def test_network_from_pathpyobjects():
    """Create a network from pathpy objects"""
    trolls = pp.Network(multiedges=True, name='Trolls', chapter='Roast Mutton')
    tom = pp.Node(uid='t', name='Tom', age=156)
    bert = pp.Node(uid='b', name='Bert', age=96)
    e1 = pp.Edge(tom, bert, type='like', strength=2.0)

    trolls.add_edge(e1)
Esempio n. 4
0
def get_coauthorship_network(sqlite_db_file, time_from=None, time_to=None):
    """ Returns coauthorship network containing links between authors who coedited at least one code
        file within a given time window.

        Node and edge infos set up to be expanded with future releases.

    Args:
        sqlite_db_file: path to sqlite database
        time_from: start time of time window filter, datetime object
        time_to: end time of time window filter, datetime object

    Returns:
        n: pathpy network
        node_info: info on node charactaristics
        edge_info: info on edge characteristics
    """

    con = sqlite3.connect(sqlite_db_file)
    edits = pd.read_sql(
        """SELECT original_commit_deletion AS pre_commit,
                                  commit_hash AS post_commit,
                                  filename
                           FROM edits""", con)
    commits = pd.read_sql(
        """SELECT hash, author_name, author_date AS time FROM commits""", con)

    data_pre = pd.merge(edits, commits, how='left', left_on='pre_commit', right_on='hash') \
                    .drop(['pre_commit', 'post_commit', 'hash'], axis=1)
    data_post = pd.merge(edits, commits, how='left', left_on='post_commit', right_on='hash') \
                    .drop(['pre_commit', 'post_commit', 'hash'], axis=1)
    data = pd.concat([data_pre, data_post])

    all_times = [
        datetime.datetime.strptime(dt, '%Y-%m-%d %H:%M:%S') for dt in data.time
        if not pd.isnull(dt)
    ]
    if time_from == None:
        time_from = min(all_times)
    if time_to == None:
        time_to = max(all_times)

    data = data.loc[pd.to_datetime(data['time']) >= time_from, :]
    data = data.loc[pd.to_datetime(data['time']) <= time_to, :]

    node_info = {}
    edge_info = {}

    n = pp.Network()
    for file in data.filename.unique():
        n.add_clique(set(data.loc[data.filename == file, 'author_name']))

    # remove self loops
    for edge in n.edges:
        if edge[0] == edge[1]:
            n.remove_edge(edge[0], edge[1])

    return n, node_info, edge_info
Esempio n. 5
0
def plot_hon(network):

    _net = pp.Network.from_paths(network._subpaths(), frequencies=True)

    forces = pp.Network(directed=False)
    for edge in network.edges:
        v = edge.v.nodes[0]
        w = edge.w.nodes[-1]
        force = edge['frequency']
        if (v, w) not in forces.edges:
            forces.add_edge(v, w, force=force, opacity=0)
        else:
            forces.edges[v, w]['force'] += force

    deg = forces.degrees(weight='force')

    for edge in forces.edges:
        s = min(deg[edge.v.uid], deg[edge.w.uid])
        edge['weight'] = edge['force'] / s

    clusters = {
        str(v): 'red' if len(str(v)) < 2 else
        ('green' if str(v).startswith('1') else 'blue')
        for v in range(30)
    }

    style = {
        'width': 900,
        'height': 600,
        'forceCharge': -4000,
        'forceRepel': -800,
        'node_color': clusters,
        'edge_size': 1,
        'edge_color': 'gray',
        'curved': True,
        'restartAlpha': 1,
        'targetAlpha': .0,
        'forceAlpha': .3,
        'repelDistance': 200,
    }

    for edge in network.nodes.edges:
        if (edge.v.uid, edge.w.uid) in forces.edges:
            forces.edges[edge.v.uid, edge.w.uid].update(opacity=.3, weight=0)
        else:
            forces.add_edge(edge.v.uid, edge.w.uid, opacity=.3, weight=0)

    # layout_style = {}
    # layout_style["node_size"] = 2
    # layout_style['layout'] = 'Fruchterman-Reingold'
    # layout_style['force'] = 0.2
    # layout_style['iterations'] = 500
    # layout = pp.layout(forces, **layout_style)
    # print(layout)

    forces.plot(**style)
Esempio n. 6
0
def test_degree_raw_moment():
    """Test the degree raw moment of a network."""
    net = pp.Network(directed=False)
    net.add_edge('a', 'b', weight=2.1)
    net.add_edge('a', 'c', weight=1.0)

    s = pp.statistics.degrees.degree_raw_moment(net)
    assert s == 4 / 3

    s = pp.statistics.degrees.degree_raw_moment(net, weight=True)
Esempio n. 7
0
def test_degree_central_moment():
    """Test the degree central moment of a network."""
    net = pp.Network(directed=False)
    net.add_edge('a', 'b', weight=2.1)
    net.add_edge('a', 'c', weight=1.0)

    s = pp.statistics.degrees.degree_central_moment(net)
    # print(s)

    s = pp.statistics.degrees.degree_central_moment(net, weight=True)
def test_degree_centrality():
    """Test the betweenness centrality of a network."""
    net = pp.Network(directed=True)
    net.add_edge('a', 'x')
    net.add_edge('x', 'b')
    c = pp.algorithms.centralities.degree_centrality(net)
    assert c['a'] == 1

    c = pp.algorithms.centralities.degree_centrality(net, mode='indegree')
    assert c['a'] == 0
Esempio n. 9
0
def test_local_clustering_coefficient():
    """Test the degree assortativity of a network."""
    net = pp.Network(directed=False)
    net.add_edge('a', 'b', weight=2.1)
    net.add_edge('b', 'c', weight=1.0)
    net.add_edge('c', 'a', weight=1.0)
    net.add_edge('b', 'd', weight=1.0)
    net.add_edge('d', 'e', weight=1.0)
    net.add_edge('e', 'b', weight=1.0)

    s = pp.statistics.clustering.local_clustering_coefficient(net, 'b')
Esempio n. 10
0
def test_degree_distribution():
    """Test the degree distribution of a network."""
    net = pp.Network(directed=False)
    net.add_edge('a', 'b', weight=2.1)
    net.add_edge('a', 'c', weight=1.0)

    s = pp.statistics.degrees.degree_distribution(net)
    assert s == {2: 1 / 3, 1: 2 / 3}

    s = pp.statistics.degrees.degree_distribution(net, weight=True)
    assert s == {3.1: 1 / 3, 2.1: 1 / 3, 1.: 1 / 3}
Esempio n. 11
0
def test_degree_sequence():
    """Test the degree sequence of a network."""
    net = pp.Network(directed=False)
    net.add_edge('a', 'b', weight=2.1)
    net.add_edge('a', 'c', weight=1.0)

    s = pp.statistics.degrees.degree_sequence(net)
    assert np.array_equal(s, np.array([2., 1., 1.]))

    s = pp.statistics.degrees.degree_sequence(net, weight=True)
    assert np.array_equal(s, np.array([3.1, 2.1, 1.]))
Esempio n. 12
0
def test_diameter():
    """Test the diameter of the network."""
    net = pp.Network(directed=False)
    net.add_edge('a', 'x')
    net.add_edge('x', 'c')
    assert pp.algorithms.shortest_paths.diameter(net) == 2
    assert net.diameter() == 2

    net.add_edge('a', 'c')
    assert pp.algorithms.shortest_paths.diameter(net) == 1
    assert net.diameter() == 1
Esempio n. 13
0
def test_all_shortest_paths():
    """Test all shortest paths in a network."""
    net = pp.Network()
    net.add_edges(('a', 'x'), ('x', 'c'))
    paths, m = pp.algorithms.shortest_paths.all_shortest_paths(net)

    assert paths['a']['c'] == {('a', 'x', 'c')}

    net.add_edges(('a', 'y'), ('y', 'c'))
    paths, m = pp.algorithms.shortest_paths.all_shortest_paths(net)

    assert paths['a']['c'] == {('a', 'x', 'c'), ('a', 'y', 'c')}
Esempio n. 14
0
def generate_random_network(n=10, m=20, directed=True, weighted=True, seed=0):
    """Generate a random Network"""
    random.seed(seed)
    net = pp.Network(directed)
    for i in range(n):
        net.add_node(str(i))
    for i in range(m):
        v, w = random.sample(list(net.nodes), 2)
        if not weighted:
            net.add_edge(v, w)
        else:
            net.add_edge(v, w, weight=random.randint(0, 10))
    return net
Esempio n. 15
0
def test_distance_matrix():
    """Test the disance matrix of a network."""
    net = pp.Network()
    net.add_edges(('a', 'x'), ('x', 'y'), ('y', 'c'))
    m = pp.algorithms.shortest_paths.distance_matrix(net)

    assert m[0, 3] == 3
    assert net.distance_matrix()[0, 3] == 3

    net.add_edges(('x', 'c'))
    m = pp.algorithms.shortest_paths.distance_matrix(net)

    assert m[0, 3] == 2
    assert net.distance_matrix()[0, 3] == 2
Esempio n. 16
0
def plot_hon_walk(network, walk):
    wal = pp.TemporalNetwork(directed=False)
    net = pp.Network()
    for edge in network.nodes.edges():
        net.add_edge(edge)

    clusters = {
        str(v): 'red' if len(str(v)) < 2 else
        ('green' if str(v).startswith('1') else 'blue')
        for v in range(30)
    }

    for node in net.nodes.values():
        #wal.add_node(node, color=clusters[node.uid], size=16, t=0)
        wal.add_node(node, size=16, t=0)

    for edge in network.edges.values():
        wal.add_edge(edge.v, edge.w, t=0)

    nodes = []
    for t, w in enumerate(walk):
        if nodes:
            n = nodes.pop(0)
            wal.nodes[n].update(color=clusters[n], size=16, t=t)
        wal.nodes[w].update(color='gray', size=20, t=t)

        begin = t
        end = t + 1
        for edge in wal.edges.values():
            wal._edges._intervals.addi(begin, end, edge)
            wal._edges._interval_map[edge].add((begin, end))
        nodes.append(w)
        if t == 110:
            break

    style = {
        'width': 900,
        'height': 600,
        'forceCharge': -10,
        'forceRepel': -200,
        'defaultEdgeWeight': 0.01,
        'edge_size': 1,
        'edge_opacity': .3,
        'edge_color': 'gray',
        'animation_end': 100,
        'animation_steps': 101,
        'curved': True,
    }

    wal.plot(**style)
Esempio n. 17
0
def net(request):
    net = pp.Network(directed=False)
    net.add_edge('a', 'b')
    net.add_edge('b', 'c')
    net.add_edge('c', 'a')
    net.add_edge('b', 'd')
    # net.add_edge('d','b')
    net.add_edge('d', 'e')
    net.add_edge('e', 'f')
    net.add_edge('f', 'd')
    # net.add_edge('f','e')
    net.add_edge('f', 'g')
    net.add_edge('g', 'd')

    return net
Esempio n. 18
0
def test_distance_matrix():
    """Test the distance matrix of a network."""
    net = pp.Network()
    net.add_edges(('a', 'x'), ('x', 'y'), ('y', 'c'))
    m = pp.algorithms.shortest_paths.distance_matrix(net)
    n = net.nodes.index

    assert m[n['a'], n['c']] == 3
    assert net.distance_matrix()[n['a'], n['c']] == 3

    net.add_edges(('x', 'c'))
    m = pp.algorithms.shortest_paths.distance_matrix(net)

    assert m[n['a'], n['c']] == 2
    assert net.distance_matrix()[n['a'], n['c']] == 2
Esempio n. 19
0
def test_avg_clustering_coefficient():
    """Test the avg clustering coefficient of a network."""
    n = pp.Network(directed=False)
    n.add_edge('a', 'b')
    n.add_edge('b', 'c')
    n.add_edge('c', 'a')
    n.add_edge('d', 'e')
    n.add_edge('e', 'f')
    n.add_edge('f', 'g')
    n.add_edge('g', 'd')
    n.add_edge('d', 'f')
    n.add_edge('b', 'd')

    s = pp.statistics.clustering.avg_clustering_coefficient(n)
    assert pytest.approx(s, 0.001) == 0.761904
Esempio n. 20
0
def test_local_clustering_coefficient():
    """Test the local clustering coefficient of a network."""
    n = pp.Network(directed=False)
    n.add_edge('a', 'b')
    n.add_edge('b', 'c')
    n.add_edge('c', 'a')
    n.add_edge('d', 'e')
    n.add_edge('e', 'f')
    n.add_edge('f', 'g')
    n.add_edge('g', 'd')
    n.add_edge('d', 'f')
    n.add_edge('b', 'd')

    cc = pp.statistics.clustering.local_clustering_coefficient(n, 'f')
    assert cc == 2 / 3
    cc = pp.statistics.clustering.local_clustering_coefficient(n, 'a')
    assert cc == 1
Esempio n. 21
0
def create_user_interaction_graph(conn: sqlite3.Connection,
                                  subreddit: str,
                                  temporal: bool = False):
    if temporal:
        g = pp.TemporalNetwork()
    else:
        g = pp.Network(directed=True)

    comment_df = pd.read_sql_query(
        "SELECT id, author, parent_id, created_utc FROM comments WHERE subreddit=='{sub:s}' AND author!='[deleted]'"
        .format(sub=subreddit), conn)
    submission_df = pd.read_sql_query(
        "SELECT id, author, created_utc FROM submissions WHERE subreddit=='{sub:s}'"
        .format(sub=subreddit), conn)

    for ind, row in comment_df.iterrows():
        source_author = row['author']
        t_value, target_id = row['parent_id'].split('_')

        # If link / submission
        if t_value == 't3':
            target_author = submission_df[submission_df['id'] ==
                                          target_id]['author']
        # If comment
        elif t_value == 't1':
            target_author = comment_df[comment_df['id'] == target_id]['author']
        # Everything else
        else:
            print('?')
            continue

        # Check if at the search gave at least one result
        if len(target_author) > 0:
            target_author = target_author.iloc[0]
            if temporal:
                ts = int(row['created_utc'])
                g.add_edge(source_author, target_author, ts)
            else:
                g.add_edge(source_author, target_author)

    return g
Esempio n. 22
0
def net():
    net = pp.Network(directed=True)
    net.add_node('a', name='Alice', age=25, gender='f')
    net.add_node('b', name='Bob', age=31, gender='m')
    net.add_node('c', name='Claire', age=18, gender='f')
    net.add_node('d', name='Dennis', age=47, gender='m')
    net.add_node('e', name='Esther', age=22, gender='f')
    net.add_node('f', name='Frank', age=23, gender='m')
    net.add_node('g', name='George', age=50, gender='m')

    net.add_edge('a', 'b', is_formal=False)
    net.add_edge('a', 'c', is_formal=False)
    net.add_edge('c', 'd', is_formal=True)
    net.add_edge('d', 'e', is_formal=True)
    net.add_edge('e', 'c', is_formal=True)
    net.add_edge('c', 'f', is_formal=False)
    net.add_edge('f', 'a', is_formal=True)
    net.add_edge('f', 'g', is_formal=False)
    net.add_edge('g', 'g', is_formal=False)
    net.add_edge('g', 'd', is_formal=False)
    return net
Esempio n. 23
0
def create_comment_structure_graph(conn: sqlite3.Connection,
                                   subreddit: str,
                                   temporal: bool = False):
    comment_df = pd.read_sql_query(
        "SELECT id, parent_id, created_utc FROM comments WHERE subreddit=='{subreddit:s}'"
        .format(subreddit=subreddit), conn)
    submission_df = pd.read_sql_query(
        "SELECT id, subreddit, created_utc FROM submissions WHERE subreddit=='{subreddit:s}'"
        .format(subreddit=subreddit), conn)

    if temporal:
        g = pp.TemporalNetwork()
    else:
        g = pp.Network(directed=True)

    for ind, record in comment_df.iterrows():
        source_id = record['id']
        t_value, target_id = record.parent_id.split('_')

        if temporal:
            ts = record.created_utc
            g.add_edge(source_id, target_id, ts)
        else:
            g.add_edge(source_id, target_id)

        # If the comment points at a submission also add edge to the subreddit.
        if t_value == 't3':
            source_id = target_id
            target_id = subreddit
            if temporal:
                ts = int(submission_df[submission_df['id'] ==
                                       source_id].created_utc.iloc[0])
                g.add_edge(source_id, target_id, ts)
            else:
                g.add_edge(source_id, target_id)

    return g

Considering that in reality we often do not have ground-truth that allows us to test which order performs best, this highlights the problem that we must decide which order to use for a given data set. We will solve this riddle in session 2, when we introduce a method to learn the optimal order for a given data set. 


### Path statistics from origin-destination data

In the example above, the data provide us with full knowledge about the exact itinerary taken by each passenger. However, we are often confronted with situations where we do not have such detailed information about paths. Nevertheless, we often have aggregate information that allows us to generate path statistics: Consider a setting where we know (1) the topology of a transportation network, and (2) the origin and destination stations of individual passengers, i.e. where passengers start and finish their journey. Under the assumption that passengers travel along shortest paths, we can now use this information to extract the path statistics that we need. 

`pathpy` provides a number of path extraction methods that help you to deal with such situations. For the situation described above, we can use the `pp.path_extraction.paths_from_origin_destination` method to generate path statistics based on tuples capturing origin/destination statistics and an instance of the class `Network`. Let us try this in a toy example.

<span style="color:red">**TODO:** Generate a directed network with six nodes and six edges $(a,c), (b,c), (c,d), (d,f), (d,g)$. Plot the network. Based on a list of tuples $(a, f, 5), (b, g, 10)$ capturing origin destination statistics, use the method `pp.path_extraction.paths_from_origin_destination` to generate a `Paths` object and print the result.</span>
""")

#%% In [16]
n = pp.Network(directed=True)
n.add_edge('a', 'c')
n.add_edge('b', 'c')
n.add_edge('c', 'd')
n.add_edge('d', 'f')
n.add_edge('d', 'g')

pp.visualisation.plot(n)

od_stats = [('a', 'f', 5), ('b', 'g', 10)]

paths = pp.path_extraction.paths_from_origin_destination(od_stats, n)
print(paths)

#%%
md("""
Esempio n. 25
0
def main():
    # Network and attributes
    # ----------------------
    net = pp.Network(directed=True)
    net.add_node('a', name='Alice', age=25, gender='f')
    net.add_node('b', name='Bob', age=31, gender='m')
    net.add_node('c', name='Claire', age=18, gender='f')
    net.add_node('d', name='Dennis', age=47, gender='m')
    net.add_node('e', name='Esther', age=22, gender='f')
    net.add_node('f', name='Frank', age=23, gender='m')
    net.add_node('g', name='George', age=50, gender='m')

    net.add_edge('a', 'b', is_formal=False)
    net.add_edge('a', 'c', is_formal=False)
    net.add_edge('c', 'd', is_formal=True)
    net.add_edge('d', 'e', is_formal=True)
    net.add_edge('e', 'c', is_formal=True)
    net.add_edge('c', 'f', is_formal=False)
    net.add_edge('f', 'a', is_formal=True)
    net.add_edge('f', 'g', is_formal=False)
    net.add_edge('g', 'g', is_formal=False)
    net.add_edge('g', 'd', is_formal=False)

    # Network dicts
    # -------------
    color_dict = {"m": "blue", "f": "red"}
    shape_dict = {"m": "circle", "f": "rectangle"}
    style_dict = {"m": "{shading=ball}", "f": None}
    layout = {
        'a': (4.3191, -3.5352),
        'b': (0.5292, -0.5292),
        'c': (8.6559, -3.8008),
        'd': (12.4117, -7.5239),
        'e': (12.7, -1.7069),
        'f': (6.0022, -9.0323),
        'g': (9.7608, -12.7)
    }

    # Visual style dict
    # -----------------
    visual_style = {}

    # node styles
    # -----------
    visual_style['node_size'] = 5
    visual_style['node_color'] = {
        n: color_dict[a['gender']]
        for n, a in net.nodes.items()
    }
    visual_style['node_opacity'] = .7
    visual_style['node_label'] = {n: a['name'] for n, a in net.nodes.items()}
    visual_style['node_label_position'] = 'below'
    visual_style['node_label_distance'] = 15
    visual_style['node_label_color'] = 'gray'
    visual_style['node_label_size'] = 3
    visual_style['node_shape'] = {
        n: shape_dict[a['gender']]
        for n, a in net.nodes.items()
    }
    visual_style['node_style'] = {
        n: style_dict[a['gender']]
        for n, a in net.nodes.items()
    }
    visual_style['node_label_off'] = {'e': True}
    visual_style['node_math_mode'] = {'a': True}
    visual_style['node_label_as_id'] = {'f': True}
    visual_style['node_pseudo'] = {'d': True}

    # edge styles
    # -----------
    visual_style['edge_width'] = {
        e: .3 + .3 * int(a['is_formal'])
        for e, a in net.edges.items()
    }
    visual_style['edge_color'] = 'black'
    visual_style['edge_opacity'] = .8
    visual_style['edge_curved'] = 0.1
    visual_style['edge_label'] = {e: e[0] + e[1] for e in net.edges}
    visual_style['edge_label_position'] = 'above'
    visual_style['edge_label_distance'] = .6
    visual_style['edge_label_color'] = 'gray'
    visual_style['edge_label_size'] = {('a', 'c'): 5}
    visual_style['edge_style'] = 'dashed'
    visual_style['edge_arrow_size'] = .2
    visual_style['edge_arrow_width'] = .2
    visual_style['edge_loop_size'] = 15
    visual_style['edge_loop_position'] = 90
    visual_style['edge_loop_shape'] = 45
    visual_style['edge_directed'] = {
        ('a', 'b'): True,
        ('a', 'c'): True,
        ('c', 'd'): False,
        ('d', 'e'): True,
        ('e', 'c'): True,
        ('c', 'f'): False,
        ('f', 'a'): True,
        ('f', 'g'): True,
        ('g', 'g'): True
    }
    visual_style['edge_label'][('a', 'c')] = '\\frac{\\alpha}{\\beta}'
    visual_style['edge_math_mode'] = {('a', 'c'): True}
    visual_style['edge_not_in_bg'] = {('f', 'a'): True}

    # general options
    # ---------------
    visual_style['unit'] = 'mm'
    visual_style['layout'] = layout
    visual_style["margin"] = {'top': 5, 'bottom': 8, 'left': 5, 'right': 5}
    visual_style["canvas"] = (100, 60)
    visual_style['keep_aspect_ratio'] = False

    # Create a latex file
    plot(net, 'network.tex', **visual_style)
Esempio n. 26
0
def test_simple():
    g = pp.Network()
    g.add_node('a')
    g.add_node('b')
    plot(g)
Esempio n. 27
0
def test_avg_path_length():
    """Test the average path length of the network."""
    net = pp.Network(directed=False)
    net.add_edge('a', 'x')
    net.add_edge('x', 'c')
    assert pp.algorithms.shortest_paths.avg_path_length(net) == 8/6
Esempio n. 28
0
#%%
import pathpy as pp
# %%
n = pp.Network()
n.add_edge('a', 'b')
n.plot()
# %%
Esempio n. 29
0
def test_to_networkx():
    network = pp.Network()
    network.add_edge("a", "b")
    n = pp.converters.to_networkx(network)
Esempio n. 30
0
def get_coauthorship_network(sqlite_db_file,
                             author_identifier='author_id',
                             time_from=None,
                             time_to=None):
    """
    Returns coauthorship network containing links between authors who coedited at least one code
    file within a given time window.

    :param str sqlite_db_file: path to SQLite database
    :param datetime.datetime time_from: start time of time window filter
    :param datetime.datetime time_to: end time of time window filter

    :return:
        - *pathpy.Network* –  coauthorship network
        - *dict* – info on node charactaristics
        - *dict* – info on edge characteristics
    """

    if author_identifier == 'author_id':
        _ensure_author_id_exists(sqlite_db_file)

    con = sqlite3.connect(sqlite_db_file)
    edits = pd.read_sql(
        """SELECT original_commit_deletion AS pre_commit,
                                  commit_hash AS post_commit,
                                  filename
                           FROM edits""", con)

    if author_identifier == 'author_id':
        commits = pd.read_sql(
            """SELECT hash,
                                    author_id as author_identifier,
                                    author_date AS time,
                                    author_timezone AS timezone
                             FROM commits""", con)
    elif author_identifier == 'author_name':
        commits = pd.read_sql(
            """SELECT hash,
                                    author_name as author_identifier,
                                    author_date AS time,
                                    author_timezone AS timezone
                             FROM commits""", con)
    elif author_identifier == 'author_email':
        commits = pd.read_sql(
            """SELECT hash,
                                    author_email as author_identifier,
                                    author_date AS time,
                                    author_timezone AS timezone
                             FROM commits""", con)
    else:
        raise Exception(
            "author_identifier must be from {'author_id', 'author_name', 'author_email'}."
        )

    data_pre = pd.merge(edits, commits, how='left', left_on='pre_commit', right_on='hash') \
                    .drop(['pre_commit', 'post_commit', 'hash'], axis=1)
    data_post = pd.merge(edits, commits, how='left', left_on='post_commit', right_on='hash') \
                    .drop(['pre_commit', 'post_commit', 'hash'], axis=1)
    data = pd.concat([data_pre, data_post])

    data['time'] = [
        int(
            calendar.timegm(
                datetime.datetime.strptime(t, '%Y-%m-%d %H:%M:%S').timetuple())
            - tz) if not pd.isnull(t) else np.nan
        for t, tz in zip(data.time, data.timezone)
    ]
    data = data.drop(['timezone'], axis=1)

    all_times = [dt for dt in data.time if not pd.isnull(dt)]
    if time_from == None:
        time_from = min(all_times)
    else:
        time_from = int(calendar.timegm(time_from.timetuple()))
    if time_to == None:
        time_to = max(all_times)
    else:
        time_to = int(calendar.timegm(time_to.timetuple()))

    data = data.loc[data['time'] >= time_from, :]
    data = data.loc[data['time'] <= time_to, :]

    node_info = {}
    edge_info = {}

    n = pp.Network()
    for file in data.filename.unique():
        n.add_clique(set(data.loc[data.filename == file, 'author_identifier']))

    # remove self loops
    for edge in n.edges:
        if edge[0] == edge[1]:
            n.remove_edge(edge[0], edge[1])

    return n, node_info, edge_info