def accepted_peps_by_developer_until_year(year, peps=None): if peps is None: peps = get_peps() valid_status = {u'Accepted', u'Final', u'Active', u'Superseded'} peps_until_year = [pep for pep in peps if pep.created.year <= year] accepted_peps = [pep for pep in peps_until_year if pep.status in valid_status] return count_peps_by_author(accepted_peps)
def write_developer_contrib_df(fname='data/developer_contributions_df.csv'): ids = utils.UniqueIdGenerator() peps = [pep for pep in get_peps() if pep.created is not None] connectivity = utils.load_result_pkl(connectivity_file) centrality = utils.load_result_pkl(centrality_file) networks_gen = networks_by_year() skip = next(networks_gen) networks = list(networks_gen) years = range(1992, 2015) devs_by_year = get_developers_by_years(networks=networks) with open(fname, 'wb') as f: out = csv.writer(f) out.writerow([ 'id', 'year', 'dev', 'has_written_peps', 'has_written_acc_peps', 'is_delegate', 'peps_this_year', 'total_peps', 'accepted_peps_year', 'total_accepted_peps', 'degree', 'contributions_sc', 'contributions_edits', 'contributions_added', 'contributions_deleted', 'collaborators', 'knum', 'aknum', 'top', 'top2', 'tenure', 'betweenness', 'closeness', 'degree_cent', 'file_mean_degree', 'clus_sq', 'clus_dot', 'clus_red', ]) for year, G in zip(years, networks): print("Analyzing {}".format(G.name)) bdfl_delegates = get_delegates_by_year(year, peps=peps) peps_this_year = peps_by_developer_that_year(year, peps=peps) peps_until_year = peps_by_developer_until_year(year, peps=peps) acc_peps_this_year = accepted_peps_by_developer_that_year(year, peps=peps) acc_peps_until_year = accepted_peps_by_developer_until_year(year, peps=peps) top = get_developers_top_connectivity_by_year(G, year, connectivity=connectivity) top2 = get_developers_top_connectivity_by_year_new(G, year, connectivity=connectivity) devs = devs_by_year[year] tenure = compute_tenure_by_year(year, networks=networks) k_num = connectivity[year]['k_num'] bet = normalize(centrality[year]['bet']) clos = normalize(centrality[year]['clos']) deg = normalize(centrality[year]['deg']) clus_sq = nx.square_clustering(G) clus_dot = bp.clustering(G) clus_red = bp.node_redundancy(G) for dev in devs: out.writerow([ ids[dev], year, dev.encode('utf8'), 1 if dev in peps_until_year else 0, # developer has written at least a pep 1 if dev in acc_peps_until_year else 0, # developer has written at least an acc. pep 1 if dev in bdfl_delegates else 0, # developer has been BDFL delegate peps_this_year[dev] if dev in peps_this_year else 0, # peps written this year peps_until_year[dev] if dev in peps_until_year else 0, # peps written until this year acc_peps_this_year[dev] if dev in acc_peps_this_year else 0, # peps acc. this year acc_peps_until_year[dev] if dev in acc_peps_until_year else 0, # total peps acc. len(G[dev]), #G.degree(dev, weight=None), G.degree(dev, weight='weight'), # lines of code added plus deleted G.degree(dev, weight='edits'), # number files edit G.degree(dev, weight='added'), # lines of code added G.degree(dev, weight='deleted'), # lines of code removed second_order_nbrs(G, dev), # second order neighbors k_num[dev][0], # k-component number k_num[dev][1], # Average k-component number 1 if dev in top else 0, # top connectivity level 1 if dev in top2 else 0, # top 2 connectivity level tenure[dev], bet[dev], clos[dev], deg[dev], sum(len(G[n]) for n in G[dev]) / float(len(G[dev])), clus_sq[dev], clus_dot[dev], clus_red[dev], ])
def get_delegates_by_year(year, peps=None): if peps is None: peps = get_peps() delegates = set(flatten(p.delegates for p in peps if p.delegates and p.created.year == year)) return {d.first_last for d in delegates}
def peps_by_developer_until_year(year, peps=None): if peps is None: peps = get_peps() peps_until_year = [pep for pep in peps if pep.created.year <= year] return count_peps_by_author(peps_until_year)
def build_survival_data_frame(fname=survival_file): nan = float('nan') ids = utils.UniqueIdGenerator() connectivity = utils.load_result_pkl(connectivity_file) centrality = utils.load_result_pkl(centrality_file) peps = [pep for pep in get_peps() if pep.created is not None] networks = list(networks_by_year()) devs = get_developers_by_years(networks=networks) skip = networks.pop(0) # skip 1991 G_start = networks.pop(0) # start with 1992 devs_start = set(n for n, d in G_start.nodes(data=True) if d['bipartite']==1) years = range(1993, 2015) with open(fname, 'wb') as f: out = csv.writer(f) out.writerow([ 'id', 'dev', 'period', 'rstart', 'rstop', 'status', 'has_written_peps', 'has_written_acc_peps', 'peps_this_year', 'total_peps', 'accepted_peps_year', 'total_accepted_peps', 'biconnected', 'top', 'tenure', 'colaborators', 'knum', 'aknum', 'clus_sq', 'clus_dot', 'clus_red', 'degree', 'contributions', 'dcentrality', 'betweenness', 'closeness', ]) previous_devs = devs_start previous_year = 1992 previous_G = G_start for i, (year, G) in enumerate(zip(years, networks)): print("processing year {}".format(previous_year)) clus_sq = nx.square_clustering(previous_G) these_devs = devs[year] remaining_devs = get_all_remaining_devs(devs, years[i:]) top_devs = get_developers_top_connectivity( connectivity[previous_year]['k_components'], previous_devs) tenure = compute_tenure_by_year(previous_year) bet = normalize(centrality[previous_year]['bet']) clos = normalize(centrality[previous_year]['bet']) deg = normalize(centrality[previous_year]['deg']) clus_sq = nx.square_clustering(previous_G) clus_dot = bp.clustering(previous_G) clus_red = bp.node_redundancy(previous_G) peps_this_year = peps_by_developer_that_year(previous_year, peps=peps) peps_until_year = peps_by_developer_until_year(previous_year, peps=peps) acc_peps_this_year = accepted_peps_by_developer_that_year(previous_year, peps=peps) acc_peps_until_year = accepted_peps_by_developer_until_year(previous_year, peps=peps) for dev in previous_devs: out.writerow([ ids[dev], # developer numerical ID dev.encode('utf8'), # developer name i + 1, # period i, # start i + 1, # stop 0 if dev in remaining_devs else 1, # status (censored) 1 if dev in peps_until_year else 0, # developer has written at least a pep 1 if dev in acc_peps_until_year else 0, # developer has written at least an acc. pep peps_this_year[dev] if dev in peps_this_year else 0, # peps written this year peps_until_year[dev] if dev in peps_until_year else 0, # peps written until this year acc_peps_this_year[dev] if dev in acc_peps_this_year else 0, # peps acc. this year acc_peps_until_year[dev] if dev in acc_peps_until_year else 0, # total peps acc. 0 if connectivity[previous_year]['k_num'][dev][0] < 2 else 1,#biconnected 0 if dev not in top_devs else 1, # member of the top connectivity level tenure[dev], # tenure in years second_order_nbrs(previous_G, dev), # collaborators connectivity[previous_year]['k_num'].get(dev, (nan,nan))[0], # knum connectivity[previous_year]['k_num'].get(dev, (nan,nan))[1], # aknum clus_sq.get(dev, nan), clus_dot.get(dev, nan), clus_red.get(dev, nan), previous_G.degree(dev), # degree previous_G.degree(dev, weight='weight'), # contributions deg.get(dev, nan), bet.get(dev, nan), clos.get(dev, nan), ]) previous_devs = these_devs previous_year = year previous_G = G