Ejemplo n.º 1
0
def accepted_peps_by_developer_until_year(year, peps=None):
    if peps is None:
        peps = get_peps()
    valid_status = {u'Accepted', u'Final', u'Active', u'Superseded'}
    peps_until_year = [pep for pep in peps if pep.created.year <= year]
    accepted_peps = [pep for pep in peps_until_year if pep.status in valid_status]
    return count_peps_by_author(accepted_peps)
Ejemplo n.º 2
0
def write_developer_contrib_df(fname='data/developer_contributions_df.csv'):
    ids = utils.UniqueIdGenerator()
    peps = [pep for pep in get_peps() if pep.created is not None]
    connectivity = utils.load_result_pkl(connectivity_file)
    centrality = utils.load_result_pkl(centrality_file)
    networks_gen = networks_by_year()
    skip = next(networks_gen)
    networks = list(networks_gen)
    years = range(1992, 2015)
    devs_by_year = get_developers_by_years(networks=networks)
    with open(fname, 'wb') as f:
        out = csv.writer(f)
        out.writerow([
            'id', 'year', 'dev', 'has_written_peps', 'has_written_acc_peps',
            'is_delegate', 'peps_this_year', 'total_peps',
            'accepted_peps_year', 'total_accepted_peps',
            'degree', 'contributions_sc', 'contributions_edits',
            'contributions_added', 'contributions_deleted',
            'collaborators', 'knum', 'aknum', 'top', 'top2',
            'tenure', 'betweenness', 'closeness', 'degree_cent',
            'file_mean_degree', 'clus_sq', 'clus_dot', 'clus_red',
        ])
        for year, G in zip(years, networks):
            print("Analyzing {}".format(G.name))
            bdfl_delegates = get_delegates_by_year(year, peps=peps)
            peps_this_year = peps_by_developer_that_year(year, peps=peps)
            peps_until_year = peps_by_developer_until_year(year, peps=peps)
            acc_peps_this_year = accepted_peps_by_developer_that_year(year, peps=peps)
            acc_peps_until_year = accepted_peps_by_developer_until_year(year, peps=peps)
            top = get_developers_top_connectivity_by_year(G, year,
                                                          connectivity=connectivity)
            top2 = get_developers_top_connectivity_by_year_new(G, year,
                                                               connectivity=connectivity)
            devs = devs_by_year[year]
            tenure = compute_tenure_by_year(year, networks=networks)
            k_num = connectivity[year]['k_num']
            bet = normalize(centrality[year]['bet'])
            clos = normalize(centrality[year]['clos'])
            deg = normalize(centrality[year]['deg'])
            clus_sq = nx.square_clustering(G)
            clus_dot = bp.clustering(G)
            clus_red = bp.node_redundancy(G)
            for dev in devs:
                out.writerow([
                    ids[dev],
                    year,
                    dev.encode('utf8'),
                    1 if dev in peps_until_year else 0, # developer has written at least a pep
                    1 if dev in acc_peps_until_year else 0, # developer has written at least an acc. pep
                    1 if dev in bdfl_delegates else 0, # developer has been BDFL delegate
                    peps_this_year[dev] if dev in peps_this_year else 0, # peps written this year
                    peps_until_year[dev] if dev in peps_until_year else 0, # peps written until this year
                    acc_peps_this_year[dev] if dev in acc_peps_this_year else 0, # peps acc. this year
                    acc_peps_until_year[dev] if dev in acc_peps_until_year else 0, # total peps acc.
                    len(G[dev]), #G.degree(dev, weight=None),
                    G.degree(dev, weight='weight'), # lines of code added plus deleted
                    G.degree(dev, weight='edits'), # number files edit
                    G.degree(dev, weight='added'), # lines of code added
                    G.degree(dev, weight='deleted'), # lines of code removed
                    second_order_nbrs(G, dev), # second order neighbors
                    k_num[dev][0], # k-component number
                    k_num[dev][1], # Average k-component number
                    1 if dev in top else 0, # top connectivity level
                    1 if dev in top2 else 0, # top 2 connectivity level
                    tenure[dev],
                    bet[dev],
                    clos[dev],
                    deg[dev],
                    sum(len(G[n]) for n in G[dev]) / float(len(G[dev])),
                    clus_sq[dev],
                    clus_dot[dev],
                    clus_red[dev],
                ])
Ejemplo n.º 3
0
def get_delegates_by_year(year, peps=None):
    if peps is None:
        peps = get_peps()
    delegates = set(flatten(p.delegates for p in peps 
                    if p.delegates and p.created.year == year))
    return {d.first_last for d in delegates}
Ejemplo n.º 4
0
def peps_by_developer_until_year(year, peps=None):
    if peps is None:
        peps = get_peps()
    peps_until_year = [pep for pep in peps if pep.created.year <= year]
    return count_peps_by_author(peps_until_year)
Ejemplo n.º 5
0
def build_survival_data_frame(fname=survival_file):
    nan = float('nan')
    ids = utils.UniqueIdGenerator()
    connectivity = utils.load_result_pkl(connectivity_file)
    centrality = utils.load_result_pkl(centrality_file)
    peps = [pep for pep in get_peps() if pep.created is not None]
    networks = list(networks_by_year())
    devs = get_developers_by_years(networks=networks)
    skip = networks.pop(0) # skip 1991
    G_start = networks.pop(0) # start with 1992
    devs_start = set(n for n, d in G_start.nodes(data=True) if d['bipartite']==1)
    years = range(1993, 2015)
    with open(fname, 'wb') as f:
        out = csv.writer(f)
        out.writerow([
            'id', 'dev', 'period', 'rstart', 'rstop', 'status',
            'has_written_peps', 'has_written_acc_peps',
            'peps_this_year', 'total_peps',
            'accepted_peps_year', 'total_accepted_peps',
            'biconnected', 'top', 'tenure', 'colaborators',
            'knum', 'aknum', 'clus_sq', 'clus_dot', 'clus_red',
            'degree', 'contributions', 'dcentrality',
            'betweenness', 'closeness',
        ])
        previous_devs = devs_start
        previous_year = 1992
        previous_G = G_start
        for i, (year, G) in enumerate(zip(years, networks)):
            print("processing year {}".format(previous_year))
            clus_sq = nx.square_clustering(previous_G)
            these_devs = devs[year]
            remaining_devs = get_all_remaining_devs(devs, years[i:])
            top_devs = get_developers_top_connectivity(
                connectivity[previous_year]['k_components'], 
                previous_devs)
            tenure = compute_tenure_by_year(previous_year)
            bet = normalize(centrality[previous_year]['bet'])
            clos = normalize(centrality[previous_year]['bet'])
            deg = normalize(centrality[previous_year]['deg'])
            clus_sq = nx.square_clustering(previous_G)
            clus_dot = bp.clustering(previous_G)
            clus_red = bp.node_redundancy(previous_G)
            peps_this_year = peps_by_developer_that_year(previous_year, peps=peps)
            peps_until_year = peps_by_developer_until_year(previous_year, peps=peps)
            acc_peps_this_year = accepted_peps_by_developer_that_year(previous_year, peps=peps)
            acc_peps_until_year = accepted_peps_by_developer_until_year(previous_year, peps=peps)
            for dev in previous_devs:
                out.writerow([
                    ids[dev], # developer numerical ID
                    dev.encode('utf8'), # developer name
                    i + 1, # period
                    i, # start
                    i + 1, # stop
                    0 if dev in remaining_devs else 1, # status (censored)
                    1 if dev in peps_until_year else 0, # developer has written at least a pep
                    1 if dev in acc_peps_until_year else 0, # developer has written at least an acc. pep
                    peps_this_year[dev] if dev in peps_this_year else 0, # peps written this year
                    peps_until_year[dev] if dev in peps_until_year else 0, # peps written until this year
                    acc_peps_this_year[dev] if dev in acc_peps_this_year else 0, # peps acc. this year
                    acc_peps_until_year[dev] if dev in acc_peps_until_year else 0, # total peps acc.
                    0 if connectivity[previous_year]['k_num'][dev][0] < 2 else 1,#biconnected
                    0 if dev not in top_devs else 1, # member of the top connectivity level
                    tenure[dev], # tenure in years
                    second_order_nbrs(previous_G, dev), # collaborators
                    connectivity[previous_year]['k_num'].get(dev, (nan,nan))[0], # knum
                    connectivity[previous_year]['k_num'].get(dev, (nan,nan))[1], # aknum
                    clus_sq.get(dev, nan),
                    clus_dot.get(dev, nan),
                    clus_red.get(dev, nan),
                    previous_G.degree(dev), # degree
                    previous_G.degree(dev, weight='weight'), # contributions
                    deg.get(dev, nan),
                    bet.get(dev, nan),
                    clos.get(dev, nan),
            ])
            previous_devs = these_devs
            previous_year = year
            previous_G = G