def test_star_graph(): G = nx.star_graph(3) # all modes are the same answer = {0: 0, 1: 1, 2: 1, 3: 1} assert bipartite.clustering(G, mode="dot") == answer assert bipartite.clustering(G, mode="min") == answer assert bipartite.clustering(G, mode="max") == answer
def test_path_graph(): G = nx.path_graph(4) answer = {0: 0.5, 1: 0.5, 2: 0.5, 3: 0.5} assert_equal(bipartite.clustering(G, mode='dot'), answer) assert_equal(bipartite.clustering(G, mode='max'), answer) answer = {0: 1, 1: 1, 2: 1, 3: 1} assert_equal(bipartite.clustering(G, mode='min'), answer)
def test_star_graph(): G = nx.star_graph(3) # all modes are the same answer = {0: 0, 1: 1, 2: 1, 3: 1} assert_equal(bipartite.clustering(G, mode='dot'), answer) assert_equal(bipartite.clustering(G, mode='min'), answer) assert_equal(bipartite.clustering(G, mode='max'), answer)
def test_path_graph(): G = nx.path_graph(4) answer = {0: 0.5, 1: 0.5, 2: 0.5, 3: 0.5} assert bipartite.clustering(G, mode="dot") == answer assert bipartite.clustering(G, mode="max") == answer answer = {0: 1, 1: 1, 2: 1, 3: 1} assert bipartite.clustering(G, mode="min") == answer
def analysis(graph, seed, calc_nrd=True, calc_ncc=True, calc_depth=True): """ Computes and returns a number of statistics on the graph """ logging.info('Computing Statistics') depth, nrd, ncc = {}, {}, {} if calc_depth: depth = _bfs_depth(graph, seed) if calc_nrd: nrd = bipartite.node_redundancy(graph) if calc_ncc: ncc = bipartite.clustering(graph, mode='min') for id in graph.nodes(): node = graph.node[id] if calc_depth: node['depth'] = depth[id] if calc_nrd: node['nrd'] = nrd[id] if calc_ncc: node['ncc'] = ncc[id] graph.node[id] = node return graph
def calculate_centrality(fp, centrality_type, perm_maps): print '%s : start to read %s.txt '%(centrality_type, fp) g = nx.Graph() i_t = 100000 i_i = 0 p = 0 f = codecs.open('./txt_critical_perms/apps_file/%s.txt'%(fp), 'r', encoding='utf-8') l = f.readline() l = f.readline() while l: p, i_i = p_percent(p, i_i, i_t, 10) ls = l.split('\t') app_id = ls[0].strip().lower() perm_id = ls[1].strip().lower() g.add_node(app_id, bipartite=0) # top g.add_node(perm_id, bipartite=1) # buttom g.add_edge(app_id, perm_id) l = f.readline() is_connect = nx.is_connected(g) print u'end read: %s'%(fp), is_connect # buttom top #node_data, node_app = bipartite.sets(g) node_data = set(n for n, d in g.nodes(data=True) if d['bipartite'] == 1) node_app = set(g) - node_data ## centrality degree if centrality_type == 'degree': try: centrality = bipartite.degree_centrality(g, node_data) result = get_centrality_out(fp, node_data, node_app, centrality, centrality_type, perm_maps) return result, is_connect except Exception as e: print '** error in centrality: %s : %s'%(centrality_type, fp), e ## centrality closeness if centrality_type == 'closeness': try: centrality = bipartite.closeness_centrality(g, node_app, normalized=False) result = get_centrality_out(fp, node_data, node_app, centrality, centrality_type, perm_maps) return result, is_connect except Exception as e: print '**** error in centrality : %s : %s'%(centrality_type, fp), e ## centrality betweenness if centrality_type == 'betweenness': try: centrality = bipartite.betweenness_centrality(g, node_app) result = get_centrality_out(fp, node_data, node_app, centrality, centrality_type, perm_maps) return result, is_connect except Exception as e: print '**** error in centrality : %s : %s'%(centrality_type, fp), e if centrality_type == 'clustering': try: centrality = bipartite.clustering(g, node_data, mode='dot') result = get_centrality_out(fp, node_data, node_app, centrality, centrality_type, perm_maps) return result, is_connect except Exception as e: print '**** error in centrality : %s : %s'%(centrality_type, fp), e
def test_bad_mode(): bipartite.clustering(nx.path_graph(4), mode='foo')
def test_not_bipartite(): bipartite.clustering(nx.complete_graph(4))
def test_bad_mode(): with pytest.raises(nx.NetworkXError): bipartite.clustering(nx.path_graph(4), mode="foo")
def write_developer_contrib_df(fname='data/developer_contributions_df.csv'): ids = utils.UniqueIdGenerator() peps = [pep for pep in get_peps() if pep.created is not None] connectivity = utils.load_result_pkl(connectivity_file) centrality = utils.load_result_pkl(centrality_file) networks_gen = networks_by_year() skip = next(networks_gen) networks = list(networks_gen) years = range(1992, 2015) devs_by_year = get_developers_by_years(networks=networks) with open(fname, 'wb') as f: out = csv.writer(f) out.writerow([ 'id', 'year', 'dev', 'has_written_peps', 'has_written_acc_peps', 'is_delegate', 'peps_this_year', 'total_peps', 'accepted_peps_year', 'total_accepted_peps', 'degree', 'contributions_sc', 'contributions_edits', 'contributions_added', 'contributions_deleted', 'collaborators', 'knum', 'aknum', 'top', 'top2', 'tenure', 'betweenness', 'closeness', 'degree_cent', 'file_mean_degree', 'clus_sq', 'clus_dot', 'clus_red', ]) for year, G in zip(years, networks): print("Analyzing {}".format(G.name)) bdfl_delegates = get_delegates_by_year(year, peps=peps) peps_this_year = peps_by_developer_that_year(year, peps=peps) peps_until_year = peps_by_developer_until_year(year, peps=peps) acc_peps_this_year = accepted_peps_by_developer_that_year(year, peps=peps) acc_peps_until_year = accepted_peps_by_developer_until_year(year, peps=peps) top = get_developers_top_connectivity_by_year(G, year, connectivity=connectivity) top2 = get_developers_top_connectivity_by_year_new(G, year, connectivity=connectivity) devs = devs_by_year[year] tenure = compute_tenure_by_year(year, networks=networks) k_num = connectivity[year]['k_num'] bet = normalize(centrality[year]['bet']) clos = normalize(centrality[year]['clos']) deg = normalize(centrality[year]['deg']) clus_sq = nx.square_clustering(G) clus_dot = bp.clustering(G) clus_red = bp.node_redundancy(G) for dev in devs: out.writerow([ ids[dev], year, dev.encode('utf8'), 1 if dev in peps_until_year else 0, # developer has written at least a pep 1 if dev in acc_peps_until_year else 0, # developer has written at least an acc. pep 1 if dev in bdfl_delegates else 0, # developer has been BDFL delegate peps_this_year[dev] if dev in peps_this_year else 0, # peps written this year peps_until_year[dev] if dev in peps_until_year else 0, # peps written until this year acc_peps_this_year[dev] if dev in acc_peps_this_year else 0, # peps acc. this year acc_peps_until_year[dev] if dev in acc_peps_until_year else 0, # total peps acc. len(G[dev]), #G.degree(dev, weight=None), G.degree(dev, weight='weight'), # lines of code added plus deleted G.degree(dev, weight='edits'), # number files edit G.degree(dev, weight='added'), # lines of code added G.degree(dev, weight='deleted'), # lines of code removed second_order_nbrs(G, dev), # second order neighbors k_num[dev][0], # k-component number k_num[dev][1], # Average k-component number 1 if dev in top else 0, # top connectivity level 1 if dev in top2 else 0, # top 2 connectivity level tenure[dev], bet[dev], clos[dev], deg[dev], sum(len(G[n]) for n in G[dev]) / float(len(G[dev])), clus_sq[dev], clus_dot[dev], clus_red[dev], ])
for line in dataFile: length = len(line.split(",")) if length != 8: continue; srcIp = line.split(",")[1] destIp = line.split(",")[length -2] edgeArr.append((srcIp,destIp)) print('File Read , Now Creating Graph for Day val = ',dayVal) G.add_edges_from(edgeArr) print('Edges created') # redundancyMap = bipartite.node_redundancy(G) redundancyMap = bipartite.clustering(G) # print(redundancyMap) print('Redundancy done for Day = ',dayVal) valueMap = {} for el in redundancyMap: value = redundancyMap[el] if value not in valueMap: valueMap[value] = 0 valueMap[value] += 1 for el in valueMap: writeFile.write("{},{},{}".format(dayVal,el,valueMap[el]))
ax.xaxis.set_ticks_position('bottom') handles, labels = ax.get_legend_handles_labels() ax.legend(handles, labels) #plt.show() # ADD PLOTS OF DEGREE DISTRIBUTION CONSIDERING EACH EDGE TYPE (3 PLOTS) # Connectedness numSCC = nx.number_strongly_connected_components(network) numWCC = nx.number_weakly_connected_components(network) # Clustering # No C3 clustering by definition of bipartite, elaborate and explain C4 during talk cluster1 = nx.square_clustering( network) # No clustering because edges only go from users to designs cluster2 = bipartite.clustering( network) # No clustering because edges only go from users to designs # Centrality Measures # Do these factor in directedness!!!!!!!!!!!!!!!!!!!!!!!!!??????????????????????? closeness_centrality = bipartite.closeness_centrality(network, users) total_closeness_centrality = 0 for key, value in closeness_centrality.items(): total_closeness_centrality += value avg_closeness_centrality = total_closeness_centrality / len( closeness_centrality) degree_centrality = bipartite.degree_centrality(network, users) total_degree_centrality = 0 for key, value in degree_centrality.items(): total_degree_centrality += value avg_degree_centrality = total_degree_centrality / len(degree_centrality)
# sourceIpSet.add(srcIp) # destIpSet.add(destIp) # # ipMap[srcIp].add(destIp) edgeArr.append((srcIp, destIp)) # for key in ipMap.keys(): # if(len(ipMap[key]) > 1): # print("{},{}".format(key,len(ipMap[key]))) G.add_edges_from(edgeArr) print('xxx') arr = bipartite.clustering(G) for node in arr.keys(): coefficient = arr[node] if coefficient not in coefficientMap: coefficientMap[coefficient] = [] coefficientMap[coefficient].append(node) print('Clustering done for day = ' + str(dayVal)) for el in coefficientMap: if el not in glocalCoeffChangeMap: glocalCoeffChangeMap[el] = [] glocalCoeffChangeMap[el].append(len(coefficientMap[el]))
writeFile = open("../dataFiles/bipartiteClusteringDayWise.csv", "w") for dayVal in range(1, 16): dataFile = open("../dataFiles/sipscan-" + str(dayVal)) print("Parsing Day " + str(dayVal) + " data") dayMap[dayVal] = {} edgeArr = [] graph = nx.Graph() for line in dataFile: length = len(line.split(",")) srcIp = line.split(",")[1] destIp = line.split(",")[length - 2] edgeArr.append((srcIp, destIp)) graph.add_edges_from(edgeArr) clusterVal = bipartite.clustering(graph, mode="dot") print("Clustering Done for day = ", dayVal) for el in clusterVal: # if el not in ipMap: # ipMap[el] = [] # # ipMap[el].append(clusterVal[el]) writeFile.write("{},{},{}".format(str(dayVal), str(el), clusterVal[el])) writeFile.write("\n") writeFile.flush() print("Writing Done for day = ", dayVal) # for el in ipMap:
def bipartite_analysis(members, prods, graph): print bipartite.density(graph, members) print bipartite.density(graph, prods) return bipartite.clustering(graph, members)
def build_survival_data_frame(fname=survival_file): nan = float('nan') ids = utils.UniqueIdGenerator() connectivity = utils.load_result_pkl(connectivity_file) centrality = utils.load_result_pkl(centrality_file) peps = [pep for pep in get_peps() if pep.created is not None] networks = list(networks_by_year()) devs = get_developers_by_years(networks=networks) skip = networks.pop(0) # skip 1991 G_start = networks.pop(0) # start with 1992 devs_start = set(n for n, d in G_start.nodes(data=True) if d['bipartite']==1) years = range(1993, 2015) with open(fname, 'wb') as f: out = csv.writer(f) out.writerow([ 'id', 'dev', 'period', 'rstart', 'rstop', 'status', 'has_written_peps', 'has_written_acc_peps', 'peps_this_year', 'total_peps', 'accepted_peps_year', 'total_accepted_peps', 'biconnected', 'top', 'tenure', 'colaborators', 'knum', 'aknum', 'clus_sq', 'clus_dot', 'clus_red', 'degree', 'contributions', 'dcentrality', 'betweenness', 'closeness', ]) previous_devs = devs_start previous_year = 1992 previous_G = G_start for i, (year, G) in enumerate(zip(years, networks)): print("processing year {}".format(previous_year)) clus_sq = nx.square_clustering(previous_G) these_devs = devs[year] remaining_devs = get_all_remaining_devs(devs, years[i:]) top_devs = get_developers_top_connectivity( connectivity[previous_year]['k_components'], previous_devs) tenure = compute_tenure_by_year(previous_year) bet = normalize(centrality[previous_year]['bet']) clos = normalize(centrality[previous_year]['bet']) deg = normalize(centrality[previous_year]['deg']) clus_sq = nx.square_clustering(previous_G) clus_dot = bp.clustering(previous_G) clus_red = bp.node_redundancy(previous_G) peps_this_year = peps_by_developer_that_year(previous_year, peps=peps) peps_until_year = peps_by_developer_until_year(previous_year, peps=peps) acc_peps_this_year = accepted_peps_by_developer_that_year(previous_year, peps=peps) acc_peps_until_year = accepted_peps_by_developer_until_year(previous_year, peps=peps) for dev in previous_devs: out.writerow([ ids[dev], # developer numerical ID dev.encode('utf8'), # developer name i + 1, # period i, # start i + 1, # stop 0 if dev in remaining_devs else 1, # status (censored) 1 if dev in peps_until_year else 0, # developer has written at least a pep 1 if dev in acc_peps_until_year else 0, # developer has written at least an acc. pep peps_this_year[dev] if dev in peps_this_year else 0, # peps written this year peps_until_year[dev] if dev in peps_until_year else 0, # peps written until this year acc_peps_this_year[dev] if dev in acc_peps_this_year else 0, # peps acc. this year acc_peps_until_year[dev] if dev in acc_peps_until_year else 0, # total peps acc. 0 if connectivity[previous_year]['k_num'][dev][0] < 2 else 1,#biconnected 0 if dev not in top_devs else 1, # member of the top connectivity level tenure[dev], # tenure in years second_order_nbrs(previous_G, dev), # collaborators connectivity[previous_year]['k_num'].get(dev, (nan,nan))[0], # knum connectivity[previous_year]['k_num'].get(dev, (nan,nan))[1], # aknum clus_sq.get(dev, nan), clus_dot.get(dev, nan), clus_red.get(dev, nan), previous_G.degree(dev), # degree previous_G.degree(dev, weight='weight'), # contributions deg.get(dev, nan), bet.get(dev, nan), clos.get(dev, nan), ]) previous_devs = these_devs previous_year = year previous_G = G
def clustering(self): self.clustering_dict = bi.clustering(self.G)
writeFile = open("../dataFiles/bipartiteClusteringDayWise.csv","w") for dayVal in range(1,16): dataFile = open("../dataFiles/sipscan-"+str(dayVal)) print("Parsing Day "+str(dayVal)+" data") dayMap[dayVal] = {} edgeArr = [] graph = nx.Graph() for line in dataFile: length = len(line.split(",")) srcIp = line.split(",")[1] destIp = line.split(",")[length -2] edgeArr.append((srcIp,destIp)) graph.add_edges_from(edgeArr) clusterVal = bipartite.clustering(graph,mode="dot") print("Clustering Done for day = ",dayVal) for el in clusterVal: # if el not in ipMap: # ipMap[el] = [] # # ipMap[el].append(clusterVal[el]) writeFile.write("{},{},{}".format(str(dayVal),str(el),clusterVal[el])) writeFile.write("\n") writeFile.flush() print("Writing Done for day = ",dayVal) # for el in ipMap: # writeFile.write("{},{}".format(str(el),"##".join([str(x) for x in ipMap[el]])))
for line in dataFile: length = len(line.split(",")) if length != 8: continue srcIp = line.split(",")[1] destIp = line.split(",")[length - 2] edgeArr.append((srcIp, destIp)) print('File Read , Now Creating Graph for Day val = ', dayVal) G.add_edges_from(edgeArr) print('Edges created') # redundancyMap = bipartite.node_redundancy(G) redundancyMap = bipartite.clustering(G) # print(redundancyMap) print('Redundancy done for Day = ', dayVal) valueMap = {} for el in redundancyMap: value = redundancyMap[el] if value not in valueMap: valueMap[value] = 0 valueMap[value] += 1 for el in valueMap: writeFile.write("{},{},{}".format(dayVal, el, valueMap[el]))
def test_not_bipartite(): with pytest.raises(nx.NetworkXError): bipartite.clustering(nx.complete_graph(4))
print("Density ICD Nodes (Diseases): " + str(bipartite.density(G, nodes_0))) print("\n") print("Density ATC Nodes (Active Substances): " + str(bipartite.density(G, nodes_1))) print("\n") print('Calculating mean degree ...') print("\n") G_deg = nx.degree_histogram(G) G_deg_sum = [a * b for a, b in zip(G_deg, range(0, len(G_deg)))] print('average degree: {}'.format( sum(G_deg_sum) / G.number_of_nodes())) print("\n") print('Calculating mean clustering ...') print("\n") cluster_g = bipartite.clustering(G) scg = 0 for i in range(len(cluster_g)): scg = scg + list(cluster_g.items())[i][1] print("Average clustering %s" % str(scg / len(cluster_g))) print("\n") else: print("Nodes Number : " + str(GP.number_of_nodes())) print("\n") print("Edges Number : " + str(GP.number_of_edges())) print("\n") print('Calculating density ...') print("\n") components = sorted(nx.connected_components(GP), key=len, reverse=True) largest_component = components[0]
# sourceIpSet.add(srcIp) # destIpSet.add(destIp) # # ipMap[srcIp].add(destIp) edgeArr.append((srcIp,destIp)) # for key in ipMap.keys(): # if(len(ipMap[key]) > 1): # print("{},{}".format(key,len(ipMap[key]))) G.add_edges_from(edgeArr) print('xxx') arr = bipartite.clustering(G) for node in arr.keys(): coefficient = arr[node] if coefficient not in coefficientMap: coefficientMap[coefficient] = [] coefficientMap[coefficient].append(node) print('Clustering done for day = '+str(dayVal)) for el in coefficientMap: if el not in glocalCoeffChangeMap: glocalCoeffChangeMap[el] = [] glocalCoeffChangeMap[el].append(len(coefficientMap[el]))