def analysis(graph, seed, calc_nrd=True, calc_ncc=True, calc_depth=True):
    """
    Computes and returns a number of statistics on the graph
    """

    logging.info('Computing Statistics')

    depth, nrd, ncc = {}, {}, {}
    if calc_depth:
        depth = _bfs_depth(graph, seed)
    if calc_nrd:
        nrd = bipartite.node_redundancy(graph)
    if calc_ncc:
        ncc = bipartite.clustering(graph, mode='min')

    for id in graph.nodes():
        node = graph.node[id]
        if calc_depth:
            node['depth'] = depth[id]
        if calc_nrd:
            node['nrd'] = nrd[id]
        if calc_ncc:
            node['ncc'] = ncc[id]
        graph.node[id] = node

    return graph
def analysis(graph, seed, calc_nrd=True, calc_ncc=True, calc_depth=True):
    """
    Computes and returns a number of statistics on the graph
    """

    logging.info('Computing Statistics')

    depth, nrd, ncc = {}, {}, {}
    if calc_depth:
        depth = _bfs_depth(graph, seed)
    if calc_nrd:
        nrd = bipartite.node_redundancy(graph)
    if calc_ncc:
        ncc = bipartite.clustering(graph, mode='min')

    for id in graph.nodes():
        node = graph.node[id]
        if calc_depth:
            node['depth'] = depth[id]
        if calc_nrd:
            node['nrd'] = nrd[id]
        if calc_ncc:
            node['ncc'] = ncc[id]
        graph.node[id] = node

    return graph
def analyze_graph(G):    
    #centralities and node metrics
    out_degrees = G.out_degree()
    in_degrees = G.in_degree()
    betweenness = nx.betweenness_centrality(G)
    eigenvector = nx.eigenvector_centrality_numpy(G)
    closeness = nx.closeness_centrality(G)
    pagerank = nx.pagerank(G)
    avg_neighbour_degree = nx.average_neighbor_degree(G)
    redundancy = bipartite.node_redundancy(G)
    load = nx.load_centrality(G)
    hits = nx.hits(G)
    vitality = nx.closeness_vitality(G)
    
    for name in G.nodes():
        G.node[name]['out_degree'] = out_degrees[name]
        G.node[name]['in_degree'] = in_degrees[name]
        G.node[name]['betweenness'] = betweenness[name]
        G.node[name]['eigenvector'] = eigenvector[name]
        G.node[name]['closeness'] = closeness[name]
        G.node[name]['pagerank'] = pagerank[name]
        G.node[name]['avg-neigh-degree'] = avg_neighbour_degree[name]
        G.node[name]['redundancy'] = redundancy[name]
        G.node[name]['load'] = load[name]
        G.node[name]['hits'] = hits[name]
        G.node[name]['vitality'] = vitality[name]
        
    #communities
    partitions = community.best_partition(G)
    for member, c in partitions.items():
        G.node[member]['community'] = c   
    
    return G
Beispiel #4
0
def test_redundant_nodes():
    G = cycle_graph(6)
    edge = {0, 3}
    G.add_edge(*edge)
    redundancy = node_redundancy(G)
    for v in edge:
        assert redundancy[v] == 2 / 3
    for v in set(G) - edge:
        assert redundancy[v] == 1
def test_redundant_nodes():
    G = cycle_graph(6)
    edge = set([0, 3])
    G.add_edge(*edge)
    redundancy = node_redundancy(G)
    for v in edge:
        assert_equal(redundancy[v], 2 / 3)
    for v in set(G) - edge:
        assert_equal(redundancy[v], 1)
Beispiel #6
0
def test_not_enough_neighbors():
    with pytest.raises(NetworkXError):
        G = complete_bipartite_graph(1, 2)
        node_redundancy(G)
Beispiel #7
0
def test_no_redundant_nodes():
    G = complete_bipartite_graph(2, 2)
    rc = node_redundancy(G)
    assert all(redundancy == 1 for redundancy in rc.values())
def test_not_enough_neighbors():
    G = complete_bipartite_graph(1, 2)
    node_redundancy(G)
        lineArr.append(line)
        length = len(line.split(","))
        if length != 8:
            continue;
        srcIp = line.split(",")[1]
        destIp = line.split(",")[length -2]
        edgeArr.append((srcIp,destIp))

    print('File Read , Now Creating Graph for Day val = ',dayVal)

    G.add_edges_from(edgeArr)
    print('Edges created')

    # redundancyMap = bipartite.node_redundancy(G)

    redundancyMap = bipartite.node_redundancy(G)

    validIps = {}
    for el in redundancyMap:
        if redundancyMap[el] != 0.0:
            validIps[el] = True
    # print(redundancyMap)
    print('Redundancy done for Day = ',dayVal)
    print('Now writing into the file for day = ',dayVal)
    for line in lineArr:
        srcIp = line.split(",")[1]
        destIp = line.split(",")[length -2]
        if srcIp in validIps or destIp in validIps:
            writeFile.write(line);
        else:
            writeFile2.write(line)
Beispiel #10
0
def write_developer_contrib_df(fname='data/developer_contributions_df.csv'):
    ids = utils.UniqueIdGenerator()
    peps = [pep for pep in get_peps() if pep.created is not None]
    connectivity = utils.load_result_pkl(connectivity_file)
    centrality = utils.load_result_pkl(centrality_file)
    networks_gen = networks_by_year()
    skip = next(networks_gen)
    networks = list(networks_gen)
    years = range(1992, 2015)
    devs_by_year = get_developers_by_years(networks=networks)
    with open(fname, 'wb') as f:
        out = csv.writer(f)
        out.writerow([
            'id', 'year', 'dev', 'has_written_peps', 'has_written_acc_peps',
            'is_delegate', 'peps_this_year', 'total_peps',
            'accepted_peps_year', 'total_accepted_peps',
            'degree', 'contributions_sc', 'contributions_edits',
            'contributions_added', 'contributions_deleted',
            'collaborators', 'knum', 'aknum', 'top', 'top2',
            'tenure', 'betweenness', 'closeness', 'degree_cent',
            'file_mean_degree', 'clus_sq', 'clus_dot', 'clus_red',
        ])
        for year, G in zip(years, networks):
            print("Analyzing {}".format(G.name))
            bdfl_delegates = get_delegates_by_year(year, peps=peps)
            peps_this_year = peps_by_developer_that_year(year, peps=peps)
            peps_until_year = peps_by_developer_until_year(year, peps=peps)
            acc_peps_this_year = accepted_peps_by_developer_that_year(year, peps=peps)
            acc_peps_until_year = accepted_peps_by_developer_until_year(year, peps=peps)
            top = get_developers_top_connectivity_by_year(G, year,
                                                          connectivity=connectivity)
            top2 = get_developers_top_connectivity_by_year_new(G, year,
                                                               connectivity=connectivity)
            devs = devs_by_year[year]
            tenure = compute_tenure_by_year(year, networks=networks)
            k_num = connectivity[year]['k_num']
            bet = normalize(centrality[year]['bet'])
            clos = normalize(centrality[year]['clos'])
            deg = normalize(centrality[year]['deg'])
            clus_sq = nx.square_clustering(G)
            clus_dot = bp.clustering(G)
            clus_red = bp.node_redundancy(G)
            for dev in devs:
                out.writerow([
                    ids[dev],
                    year,
                    dev.encode('utf8'),
                    1 if dev in peps_until_year else 0, # developer has written at least a pep
                    1 if dev in acc_peps_until_year else 0, # developer has written at least an acc. pep
                    1 if dev in bdfl_delegates else 0, # developer has been BDFL delegate
                    peps_this_year[dev] if dev in peps_this_year else 0, # peps written this year
                    peps_until_year[dev] if dev in peps_until_year else 0, # peps written until this year
                    acc_peps_this_year[dev] if dev in acc_peps_this_year else 0, # peps acc. this year
                    acc_peps_until_year[dev] if dev in acc_peps_until_year else 0, # total peps acc.
                    len(G[dev]), #G.degree(dev, weight=None),
                    G.degree(dev, weight='weight'), # lines of code added plus deleted
                    G.degree(dev, weight='edits'), # number files edit
                    G.degree(dev, weight='added'), # lines of code added
                    G.degree(dev, weight='deleted'), # lines of code removed
                    second_order_nbrs(G, dev), # second order neighbors
                    k_num[dev][0], # k-component number
                    k_num[dev][1], # Average k-component number
                    1 if dev in top else 0, # top connectivity level
                    1 if dev in top2 else 0, # top 2 connectivity level
                    tenure[dev],
                    bet[dev],
                    clos[dev],
                    deg[dev],
                    sum(len(G[n]) for n in G[dev]) / float(len(G[dev])),
                    clus_sq[dev],
                    clus_dot[dev],
                    clus_red[dev],
                ])
Beispiel #11
0
def build_survival_data_frame(fname=survival_file):
    nan = float('nan')
    ids = utils.UniqueIdGenerator()
    connectivity = utils.load_result_pkl(connectivity_file)
    centrality = utils.load_result_pkl(centrality_file)
    peps = [pep for pep in get_peps() if pep.created is not None]
    networks = list(networks_by_year())
    devs = get_developers_by_years(networks=networks)
    skip = networks.pop(0) # skip 1991
    G_start = networks.pop(0) # start with 1992
    devs_start = set(n for n, d in G_start.nodes(data=True) if d['bipartite']==1)
    years = range(1993, 2015)
    with open(fname, 'wb') as f:
        out = csv.writer(f)
        out.writerow([
            'id', 'dev', 'period', 'rstart', 'rstop', 'status',
            'has_written_peps', 'has_written_acc_peps',
            'peps_this_year', 'total_peps',
            'accepted_peps_year', 'total_accepted_peps',
            'biconnected', 'top', 'tenure', 'colaborators',
            'knum', 'aknum', 'clus_sq', 'clus_dot', 'clus_red',
            'degree', 'contributions', 'dcentrality',
            'betweenness', 'closeness',
        ])
        previous_devs = devs_start
        previous_year = 1992
        previous_G = G_start
        for i, (year, G) in enumerate(zip(years, networks)):
            print("processing year {}".format(previous_year))
            clus_sq = nx.square_clustering(previous_G)
            these_devs = devs[year]
            remaining_devs = get_all_remaining_devs(devs, years[i:])
            top_devs = get_developers_top_connectivity(
                connectivity[previous_year]['k_components'], 
                previous_devs)
            tenure = compute_tenure_by_year(previous_year)
            bet = normalize(centrality[previous_year]['bet'])
            clos = normalize(centrality[previous_year]['bet'])
            deg = normalize(centrality[previous_year]['deg'])
            clus_sq = nx.square_clustering(previous_G)
            clus_dot = bp.clustering(previous_G)
            clus_red = bp.node_redundancy(previous_G)
            peps_this_year = peps_by_developer_that_year(previous_year, peps=peps)
            peps_until_year = peps_by_developer_until_year(previous_year, peps=peps)
            acc_peps_this_year = accepted_peps_by_developer_that_year(previous_year, peps=peps)
            acc_peps_until_year = accepted_peps_by_developer_until_year(previous_year, peps=peps)
            for dev in previous_devs:
                out.writerow([
                    ids[dev], # developer numerical ID
                    dev.encode('utf8'), # developer name
                    i + 1, # period
                    i, # start
                    i + 1, # stop
                    0 if dev in remaining_devs else 1, # status (censored)
                    1 if dev in peps_until_year else 0, # developer has written at least a pep
                    1 if dev in acc_peps_until_year else 0, # developer has written at least an acc. pep
                    peps_this_year[dev] if dev in peps_this_year else 0, # peps written this year
                    peps_until_year[dev] if dev in peps_until_year else 0, # peps written until this year
                    acc_peps_this_year[dev] if dev in acc_peps_this_year else 0, # peps acc. this year
                    acc_peps_until_year[dev] if dev in acc_peps_until_year else 0, # total peps acc.
                    0 if connectivity[previous_year]['k_num'][dev][0] < 2 else 1,#biconnected
                    0 if dev not in top_devs else 1, # member of the top connectivity level
                    tenure[dev], # tenure in years
                    second_order_nbrs(previous_G, dev), # collaborators
                    connectivity[previous_year]['k_num'].get(dev, (nan,nan))[0], # knum
                    connectivity[previous_year]['k_num'].get(dev, (nan,nan))[1], # aknum
                    clus_sq.get(dev, nan),
                    clus_dot.get(dev, nan),
                    clus_red.get(dev, nan),
                    previous_G.degree(dev), # degree
                    previous_G.degree(dev, weight='weight'), # contributions
                    deg.get(dev, nan),
                    bet.get(dev, nan),
                    clos.get(dev, nan),
            ])
            previous_devs = these_devs
            previous_year = year
            previous_G = G
Beispiel #12
0
        lineArr.append(line)
        length = len(line.split(","))
        if length != 8:
            continue
        srcIp = line.split(",")[1]
        destIp = line.split(",")[length - 2]
        edgeArr.append((srcIp, destIp))

    print('File Read , Now Creating Graph for Day val = ', dayVal)

    G.add_edges_from(edgeArr)
    print('Edges created')

    # redundancyMap = bipartite.node_redundancy(G)

    redundancyMap = bipartite.node_redundancy(G)

    validIps = {}
    for el in redundancyMap:
        if redundancyMap[el] != 0.0:
            validIps[el] = True
    # print(redundancyMap)
    print('Redundancy done for Day = ', dayVal)
    print('Now writing into the file for day = ', dayVal)
    for line in lineArr:
        srcIp = line.split(",")[1]
        destIp = line.split(",")[length - 2]
        if srcIp in validIps or destIp in validIps:
            writeFile.write(line)
        else:
            writeFile2.write(line)