def main(argv): #Standardvalues partitionfile = "data/partitions/final_partitions_p100_200_0.2.csv" project = "584" to_pajek = False try: opts, args = getopt.getopt(argv,"p:s:o") except getopt.GetoptError: print 'group_bridging.py -p <project_name> -s <partitionfile> -o [if you want pajek output]' sys.exit(2) for opt, arg in opts: if opt in ("-p"): project = arg elif opt in ("-s"): partitionfile = arg elif opt in ("-o"): to_pajek = True else: print 'group_bridging.py -p <project_name> -s <partitionfile> -o [if you want pajek output]' print "##################### GROUP BRIDGING ########################" print "Project %s " % project print "Partition %s" % partitionfile ff_edges_writer = csv.writer(open("results/%s_ff_bridging_edges.csv" % project, "wb")) at_edges_writer = csv.writer(open("results/%s_at_bridging_edges.csv" % project, "wb")) rt_edges_writer = csv.writer(open("results/%s_rt_bridging_edges.csv" % project, "wb")) csv_bridging_writer = csv.writer(open('results/spss/group bridging/%s_group_bridging.csv' % project , 'wb')) csv_bridging_writer.writerow(["Project", "Name", "Member_count", "Competing_Lists", "FF_bin_degree", "FF_bin_in_degree", "FF_bin_out_degree", "FF_volume_in","FF_volume_out", "FF_bin_betweeness","FF_bin_closeness", "FF_bin_pagerank", #"FF_bin_eigenvector", "FF_bin_c_size","FF_bin_c_density","FF_bin_c_hierarchy","FF_bin_c_index", "AT_bin_degree", "AT_bin_in_degree", "AT_bin_out_degree", "AT_bin_betweeness", "AT_bin_closeness", "AT_bin_pagerank", #"AT_bin_eigenvector", "AT_bin_c_size","AT_bin_c_density","AT_bin_c_hierarchy","AT_bin_c_index", "AT_volume_in", "AT_volume_out", "RT_volume_in", "RT_volume_out", "FF_rec", "AT_rec", "AT_avg", "FF_avg"]) # Get the overall network from disk FF = nx.read_edgelist('data/networks/%s_FF.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) AT = nx.read_edgelist('data/networks/%s_solr_AT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) RT = nx.read_edgelist('data/networks/%s_solr_RT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) # Read in the partition tmp = hp.get_partition(partitionfile) partitions = tmp[0] groups = tmp[1] #Read in members count for each project reader = csv.reader(open("results/stats/%s_lists_stats.csv" % project, "rb"), delimiter=",") temp = {} reader.next() # Skip first row for row in reader: temp[row[0]] = {"name":row[0],"member_count":int(row[3])} #Read in the list-listings for individuals listings = {} indiv_reader = csv.reader(open(partitionfile)) for row in indiv_reader: if listings.has_key(row[1]): listings[row[1]]["competing_lists"] += int(row[3]) else: listings[row[1]] = {"competing_lists": int(row[3])} # Add dummy nodes if they are missing in the networks for partition in partitions: for node in partition: FF.add_node(node) AT.add_node(node) RT.add_node(node) #Blockmodel the networks into groups according to the partition P_FF = nx.blockmodel(FF,partitions) P_AT = nx.blockmodel(AT,partitions) P_RT = nx.blockmodel(RT,partitions) #Name the nodes in the network #TODO check: How do I know that the names really match? mapping = {} mapping_pajek = {} i = 0 for group in groups: mapping_pajek[i] = "\"%s\"" % group # mapping for pajek mapping[i] = "%s" % group i += 1 H_FF = nx.relabel_nodes(P_FF,mapping) H_AT = nx.relabel_nodes(P_AT,mapping) H_RT = nx.relabel_nodes(P_RT,mapping) #Outpt the networks to pajek if needed if to_pajek: OUT_FF = nx.relabel_nodes(P_FF,mapping_pajek) OUT_AT = nx.relabel_nodes(P_AT,mapping_pajek) OUT_RT = nx.relabel_nodes(P_RT,mapping_pajek) #Write the blocked network out to disk nx.write_pajek(OUT_FF,"results/networks/%s_grouped_FF.net" % project) nx.write_pajek(OUT_AT,"results/networks/%s_grouped_AT.net" % project) nx.write_pajek(OUT_RT,"results/networks/%s_grouped_RT.net" % project) ########## Output the Edges between groups to csv ############## # Needed for the computation of individual bridging # Edges in both directions between the groups are addded up processed_edges = [] for (u,v,attrib) in H_FF.edges(data=True): if "%s%s" %(u,v) not in processed_edges: processed_edges.append("%s%s" % (u,v)) if H_FF.has_edge(v,u): processed_edges.append("%s%s" % (v,u)) ff_edges_writer.writerow([u,v,attrib["weight"]+H_FF[v][u]["weight"]]) else: ff_edges_writer.writerow([u,v,attrib["weight"]]) processed_edges = [] for (u,v,attrib) in H_AT.edges(data=True): if "%s%s" %(u,v) not in processed_edges: processed_edges.append("%s%s" % (u,v)) if H_AT.has_edge(v,u): processed_edges.append("%s%s" % (v,u)) at_edges_writer.writerow([u,v,attrib["weight"]+H_AT[v][u]["weight"]]) else: at_edges_writer.writerow([u,v,attrib["weight"]]) processed_edges = [] for (u,v,attrib) in H_RT.edges(data=True): if "%s%s" %(u,v) not in processed_edges: processed_edges.append("%s%s" % (u,v)) if H_RT.has_edge(v,u): processed_edges.append("%s%s" % (v,u)) rt_edges_writer.writerow([u,v,attrib["weight"]+H_RT[v][u]["weight"]]) else: rt_edges_writer.writerow([u,v,attrib["weight"]]) ########## TRIM EDGES ################ # For meaningfull results we have to trim edges in the AT and FF network so the whole network just doesnt look like a blob # It is chosen this way so the network remains as one component THRESHOLD = min([hp.min_threshold(H_AT),hp.min_threshold(H_FF)])-1 H_FF = hp.trim_edges(H_FF, THRESHOLD) H_AT = hp.trim_edges(H_AT, THRESHOLD) ########## MEASURES ############## #Get the number of nodes in the aggregated networks #FF_nodes = {} #for node in H_FF.nodes(data=True): # FF_nodes[node[0]] = node[1]["nnodes"] #Get the FF network measures of the nodes # Works fine on binarized Data FF_bin_degree = nx.degree_centrality(H_FF) FF_bin_in_degree = nx.in_degree_centrality(H_FF) # The attention paid towards this group FF_bin_out_degree = nx.out_degree_centrality(H_FF) # The attention that this group pays towards other people FF_bin_betweenness = nx.betweenness_centrality(H_FF,weight="weight") # How often is the group between other groups FF_bin_closeness = nx.closeness_centrality(H_FF) #FF_bin_eigenvector = nx.eigenvector_centrality(H_FF) FF_bin_pagerank = nx.pagerank(H_FF) FF_bin_struc = sx.structural_holes(H_FF) # AT network measures of the nodes AT_bin_degree = nx.degree_centrality(H_AT) AT_bin_in_degree = nx.in_degree_centrality(H_AT) AT_bin_out_degree = nx.out_degree_centrality(H_AT) AT_bin_betweenness = nx.betweenness_centrality(H_AT,weight="weight") AT_bin_closeness = nx.closeness_centrality(H_AT) #AT_bin_eigenvector = nx.eigenvector_centrality(H_AT) AT_bin_pagerank = nx.pagerank(H_AT) AT_bin_struc = sx.structural_holes(H_AT) # Tie strengths dAT_avg_tie = hp.individual_average_tie_strength(H_AT) dFF_avg_tie = hp.individual_average_tie_strength(H_FF) dAT_rec = hp.individual_reciprocity(H_AT) dFF_rec = hp.individual_reciprocity(H_FF) # Dependent Variable see csv # TODO A measure that calculates how often Tweets travel through this group: Eventually betweeness in the RT graph #Arrange it in a list and output for node in FF_bin_degree.keys(): csv_bridging_writer.writerow([project, node, int(temp[node]["member_count"]), listings[node]["competing_lists"], FF_bin_degree[node], FF_bin_in_degree[node], FF_bin_out_degree[node], H_FF.in_degree(node,weight="weight"), H_FF.out_degree(node,weight="weight"), FF_bin_betweenness[node],FF_bin_closeness[node],FF_bin_pagerank[node], #FF_bin_eigenvector[node], FF_bin_struc[node]['C-Size'],FF_bin_struc[node]['C-Density'],FF_bin_struc[node]['C-Hierarchy'],FF_bin_struc[node]['C-Index'], AT_bin_degree[node], AT_bin_in_degree[node], AT_bin_out_degree[node], AT_bin_betweenness[node], AT_bin_closeness[node], AT_bin_pagerank[node], #AT_bin_eigenvector[node], AT_bin_struc[node]['C-Size'],AT_bin_struc[node]['C-Density'],AT_bin_struc[node]['C-Hierarchy'],AT_bin_struc[node]['C-Index'], H_AT.in_degree(node,weight="weight"), H_AT.out_degree(node,weight="weight"), H_RT.in_degree(node,weight="weight"), H_RT.out_degree(node,weight="weight"), dFF_rec[node],dAT_rec[node],dAT_avg_tie[node],dFF_avg_tie[node] ])
if hp.reciprocity(FF) == hp.reciprocity(FF_bin): print "NOTICE: Group Reciprocity is BINARY" if nx.average_clustering(FF.to_undirected()) == nx.average_clustering(FF_bin.to_undirected()): print "NOTICE: Average Clustering is BINARY and UNDIRECTED" if nx.average_shortest_path_length(FF) == nx.average_shortest_path_length(FF_bin): print "NOTICE: Group Average path length is is BINARY" if hp.average_tie_strength(FF) == hp.average_tie_strength(FF_bin): print "NOTICE: Group Average tie strength path is is BINARY" ################# Individual Bonding measures ################ #dFF_degree = FF.degree("a1") dFF = nx.degree_centrality(FF) # Binarized undirected dFF_in = nx.in_degree_centrality(FF) #People that follow me in the network binarized dFF_out = nx.out_degree_centrality(FF) #People that I follow in the network binarized dFF_closeness = nx.closeness_centrality(FF) # Non-directed and binarized #dFF_pagerank = nx.pagerank(FF) dFF_eigenvector = nx.eigenvector_centrality(FF.to_undirected()) # Undirected and binarized dFF_rec = hp.individual_reciprocity(FF) # Individual Reciprocity dFF_avg_tie = hp.individual_average_tie_strength(FF) # Individual average tie strength dFF_in_volume = hp.individual_in_volume(FF) #compute the volume of all incoming ties dFF_out_volume = hp.individual_out_volume(FF) #compute the volume of all outgoing ties # Test the output of NetworkX against UCINET if names[i] == "a": print "######################## INDIVIDUAL MEASURES TEST of BINARY #####################" a2 = "a2" if nx.degree_centrality(FF) == nx.degree_centrality(FF_bin): print "NOTICE: Degree centrality is BINARY" if nx.in_degree_centrality(FF) == nx.in_degree_centrality(FF_bin): print "NOTICE: in_degree_centrality is BINARY" if nx.out_degree_centrality(FF) == nx.out_degree_centrality(FF_bin): print "NOTICE: out_degree_centrality is BINARY" if nx.closeness_centrality(FF) == nx.closeness_centrality(FF_bin): print "NOTICE: closeness_centrality is BINARY" # Eigenvector Centrality makes problems upon converging #if nx.eigenvector_centrality(FF) == nx.eigenvector_centrality(FF_bin): print "NOTICE: eigenvector_centrality is BINARY" if hp.individual_reciprocity(FF) == hp.individual_reciprocity(FF_bin): print "NOTICE: individual_reciprocity is BINARY"
def main(argv): #Standardvalues partitionfile = "data/partitions/final_partitions_p100_200_0.2.csv" project = "584" to_pajek = False try: opts, args = getopt.getopt(argv,"p:s:o") except getopt.GetoptError: print 'individual_bridging_2.py -p <project_name> -s <partitionfile> ' sys.exit(2) for opt, arg in opts: if opt in ("-p"): project = arg elif opt in ("-s"): partitionfile = arg else: print 'individual_bridging_2.py -p <project_name> -s <partitionfile> ' print "##################### INDIVIDUAL BRIDGING 2 (Working on whole network) ########################" print "Project %s " % project print "Partition %s" % partitionfile csv_bridging_writer = csv.writer(open('results/spss/individual bridging/%s_individual_bridging_3.csv' % project, 'wb')) csv_bridging_writer.writerow(["Project", "Community", "Person_ID", "Competing_lists", "FF_bin_degree", "FF_bin_in_degree", "FF_bin_out_degree", "FF_vol_in", "FF_vol_out", "FF_groups_in", "FF_groups_out", "FF_rec", "FF_bin_betweeness", #"FF_bin_closeness", "FF_bin_pagerank", #"FF_c_size", "FF_c_density", "FF_c_hierarchy", "FF_c_index", "AT_bin_degree", "AT_bin_in_degree", "AT_bin_out_degree", "AT_vol_in", "AT_vol_out", "AT_groups_in", "AT_groups_out", "AT_rec", "AT_bin_betweeness",#, "AT_bin_closeness", "AT_bin_pagerank", # FF_c_size, FF_c_density, FF_c_hierarchy, FF_c_index, "AT_avg_tie_strength","AT_strength_centrality_in", "RT_bin_in_degree", "RT_bin_out_degree", "RT_vol_in", "RT_vol_out"]) #Read in the list-listings for individuals listings = {} indiv_reader = csv.reader(open(partitionfile)) for row in indiv_reader: listings[row[0]] = {"group":row[1],"place":int(row[2]), "competing_lists": int(row[3])} # Read in the centralities of nodes in their corresponding community centralities = {} centrality_reader = csv.reader(open('results/spss/individual bonding/%s_individual_bonding.csv' % project)) for row in centrality_reader: centralities[row[2]] = {"ff_in_degree":row[5]} # Read in the partition tmp = hp.get_partition(partitionfile) partitions = tmp[0] groups = tmp[1] # Read in the networks FF_all = nx.read_edgelist('data/networks/%s_FF.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) AT_all = nx.read_edgelist('data/networks/%s_solr_AT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) RT_all = nx.read_edgelist('data/networks/%s_solr_RT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) print "Done reading in Networks" #Determine the Maximum subset of nodes present in all Networks maximum_subset = [] for node in FF_all.nodes(): if AT_all.has_node(node) and RT_all.has_node(node): maximum_subset.append(node) i = 0 for partition in partitions: for node in partition: FF_all.add_node(node, group = groups[i]) # Add nodes AT_all.add_node(node, group = groups[i]) RT_all.add_node(node, group = groups[i]) i += 1 i = 0 #These measures are computed only once on the graph (we are making an error since the internal group structure is considered to load up those values) if len(maximum_subset) < 1000: scaling_k = len(maximum_subset) else: scaling_k = len(maximum_subset)/100 dFF_bin_betweeness = nx.betweenness_centrality(FF_all,k=scaling_k) dAT_bin_betweeness = nx.betweenness_centrality(AT_all,k=scaling_k) #dFF_struc = sx.structural_holes(FF_all) for partition in partitions: project_name = groups[i] #Determine the groups that are not in the partition all_other_groups = groups[:] group = groups[i] all_other_groups.remove(group) # Get all the partitions without the current partition partitions_without_partition = partitions[:] partitions_without_partition.remove(partition) #Remove the nodes that are in this partition remaining_nodes = [item for sublist in partitions for item in sublist] #flatlist of all nodes for nodes_to_be_deleted in partition: remaining_nodes.remove(nodes_to_be_deleted) #Create Subgraphs that contain all nodes except the ones that are in the partition S_FF = FF_all.subgraph(remaining_nodes) S_AT = AT_all.subgraph(remaining_nodes) S_RT = RT_all.subgraph(remaining_nodes) i += 1 for node in partition: if node in maximum_subset: t0 = time.time() #Add FF nodes and edges S_FF.add_node(node, group = group) S_FF.add_edges_from(FF_all.in_edges(node,data=True)) # in edges S_FF.add_edges_from(FF_all.out_edges(node,data=True)) #out edges # Delete the nodes that we again accidentally added by importing all of the node's edges for tmp_node in partition: if tmp_node != node and tmp_node in S_FF: S_FF.remove_node(tmp_node) # Add AT nodes and edges S_AT.add_node(node, group = group) S_AT.add_edges_from(AT_all.in_edges(node,data=True)) # in edges S_AT.add_edges_from(AT_all.out_edges(node,data=True)) #out edges # Delete the nodes that we again accidentally added by importing all of the node's edges for tmp_node in partition: if tmp_node != node and tmp_node in S_AT: S_AT.remove_node(tmp_node) S_RT.add_node(node, group = group) S_RT.add_edges_from(RT_all.in_edges(node,data=True)) # in edges S_RT.add_edges_from(RT_all.out_edges(node,data=True)) #out edges # Delete the nodes that we again accidentally added by importing all of the node's edges for tmp_node in partition: if tmp_node != node and tmp_node in S_RT: S_RT.remove_node(tmp_node) print "Done creating Subgraphs" ## FF measures dFF_bin = nx.degree_centrality(S_FF) dFF_bin_in = nx.in_degree_centrality(S_FF) dFF_bin_out = nx.out_degree_centrality(S_FF) #nx.load_centrality(S_FF,v=node, weight="weight") #dFF_bin_closeness = nx.closeness_centrality(S_FF,v=node) #dFF_bin_pagerank = nx.pagerank(S_FF, weight="weight") dFF_total_in_groups = hp.filtered_group_volume(hp.incoming_group_volume(S_FF,node,all_other_groups),0) dFF_total_out_groups = hp.filtered_group_volume(hp.outgoing_group_volume(S_FF,node,all_other_groups),0) dFF_rec = hp.individual_reciprocity(S_FF,node) #number of reciprocated ties ## AT Measures dAT_bin = nx.degree_centrality(S_AT) dAT_bin_in = nx.in_degree_centrality(S_AT) dAT_bin_out = nx.out_degree_centrality(S_AT) #dAT_bin_betweeness = nx.betweenness_centrality(S_AT, k=100) #nx.load_centrality(S_AT,v=node,weight="weight") #dAT_bin_closeness = nx.closeness_centrality(S_AT,v=node) #dAT_bin_pagerank = nx.pagerank(S_AT,weight="weight") dAT_total_in_groups = hp.filtered_group_volume(hp.incoming_group_volume(S_AT,node,all_other_groups),0) dAT_total_out_groups = hp.filtered_group_volume(hp.outgoing_group_volume(S_AT,node,all_other_groups),0) dAT_rec = hp.individual_reciprocity(S_AT,node) #number of @reciprocated ties dAT_avg_tie = hp.individual_average_tie_strength(S_AT,node) #Compute a combined measure which multiplies the strength of incoming ties times the centrality of that person dAT_strength_centrality = 0 for edge in S_AT.in_edges(node,data=True): if edge[0] in maximum_subset: dAT_strength_centrality += edge[2]["weight"]*float(centralities[edge[0]]["ff_in_degree"]) #get the centrality of the node that the tie is incoming from ############### DEPENDENT VARIABLES ########### dRT_in = nx.in_degree_centrality(S_RT) # At least once a retweets that a person has received dRT_out = nx.out_degree_centrality(S_RT) # At least one retweets that a person has made print "Done computing Measures" try: c_size = dFF_struc[node]['C-Size'] c_dens = dFF_struc[node]['C-Density'] c_hierarch = dFF_struc[node]['C-Hierarchy'] c_index = dFF_struc[node]['C-Index'] except: c_size = "NaN" c_dens = "NaN" c_hierarch = "NaN" c_index = "NaN" csv_bridging_writer.writerow([project, project_name, node, listings[node]["competing_lists"], dFF_bin[node], dFF_bin_in[node], dFF_bin_out[node], S_FF.in_degree(node,weight="weight"), S_FF.out_degree(node,weight="weight"), dFF_total_in_groups, dFF_total_out_groups, dFF_rec[node], dFF_bin_betweeness[node],#dFF_bin_closeness[node],dFF_bin_pagerank[node], #c_size,c_dens,c_hierarch,c_index, dAT_bin[node], dAT_bin_in[node], dAT_bin_out[node], S_AT.in_degree(node,weight="weight"), S_AT.out_degree(node, weight="weight"), dAT_total_in_groups, dAT_total_out_groups, dAT_rec[node], dAT_bin_betweeness[node],#dAT_bin_closeness[node], dAT_bin_pagerank[node], #dAT_struc[node]['C-Size'],dAT_struc[node]['C-Density'],dAT_struc[node]['C-Hierarchy'],dAT_struc[node]['C-Index'], dAT_avg_tie[node],dAT_strength_centrality, dRT_in[node],dRT_out[node], S_RT.in_degree(node,weight="weight"), S_RT.out_degree(node,weight="weight") ]) t_delta = (time.time() - t0) print "Count: %s Node: %s Time: %s" % (i,node,t_delta) #Remove the nodes again S_FF.remove_node(node) S_AT.remove_node(node) S_RT.remove_node(node)
def main(argv): # Standardvalues partitionfile = "data/partitions/final_partitions_p100_200_0.2.csv" project = "584" try: opts, args = getopt.getopt(argv, "p:s:") except getopt.GetoptError: print "individual_bonding.py -p <project_name> -s <partitionfile>" sys.exit(2) for opt, arg in opts: if opt in ("-p"): project = arg elif opt in ("-s"): partitionfile = arg else: print "individual_bonding.py -p <project_name> -s <partitionfile>" print "##################### INDIVIDUAL BONDING ########################" print "Project %s " % project print "Partition %s" % partitionfile csv_writer = csv.writer(open("results/spss/individual bonding/%s_individual_bonding.csv" % project, "wb")) csv_writer.writerow( [ "Project", "Community", "Person_ID", "Place_on_list", "FF_bin_deg", "FF_bin_in_deg", "FF_bin_out_deg", "FF_vol_in", "FF_vol_out", "FF_bin_close", "FF_bin_page", "FF_rec", "AT_bin_deg", "AT_bin_in_deg", "AT_bin_out_deg", "AT_bin_close", "AT_bin_page", "AT_rec", "AT_avg", "AT_vol_in", "AT_vol_out", "RT_bin_deg_in", "RT_bin_deg_out", "RT_vol_in", "RT_vol_out", "RT_global_vol_in", "RT_global_vol_out", ] ) # Read in the list-listings for individuals listings = {} indiv_reader = csv.reader(open(partitionfile)) i = 0 for row in indiv_reader: if i > int(row[2]): # in case there are less than 101 entries for a group for some reason i = 0 i += 1 listings[row[0]] = {"group": row[1], "place": i, "competing_lists": int(row[3]), "original_place": int(row[2])} if i == 101: # Some of the original places have shifted because of the regrouping i = 0 # Read in Networks FF_all = nx.read_edgelist( "data/networks/%s_FF.edgelist" % project, nodetype=str, data=(("weight", float),), create_using=nx.DiGraph() ) AT_all = nx.read_edgelist( "data/networks/%s_solr_AT.edgelist" % project, nodetype=str, data=(("weight", float),), create_using=nx.DiGraph(), ) RT_all = nx.read_edgelist( "data/networks/%s_solr_RT.edgelist" % project, nodetype=str, data=(("weight", float),), create_using=nx.DiGraph(), ) # Read in the partitions tmp = hp.get_partition(partitionfile) partitions = tmp[0] groups = tmp[1] # Add missing nodes # We are limiting the analysis to only the maximal subset of nodes that are present in all networks maximum_subset = [] for node in FF_all.nodes(): if AT_all.has_node(node) and RT_all.has_node(node): maximum_subset.append(node) else: print node print "Maximum Subset of nodes %s" % len(maximum_subset) # In this case we are not adding missing nodes to the network, to produce a smaller error in the final regressions, but use the subset method. # i = 0 # for partition in partitions: # for node in partition: # FF_all.add_node(node, group = groups[i]) # AT_all.add_node(node, group = groups[i]) # RT_all.add_node(node, group = groups[i]) # i += 1 i = 0 for partition in partitions: project_name = groups[i] print "############ Calculating Project %s ############### " % project_name # Generate a subgraph according to the partition FF = FF_all.subgraph(partition) AT = AT_all.subgraph(partition) RT = RT_all.subgraph(partition) # Additional Info for each project FF.name = "FF_%s " % project_name AT.name = "AT_%s " % project_name RT.name = "RT_%s " % project_name # hp.draw_graph(FF) # hp.draw_graph(AT) # hp.draw_graph(RT) ############### Compute Individual measures ################ # Compute FF Centralities # Works fine on binary data dFF_bin = nx.degree_centrality(FF) dFF_bin_in = nx.in_degree_centrality(FF) # People that follow me in the network dFF_bin_out = nx.out_degree_centrality(FF) # People that I follow in the network dFF_bin_closeness = nx.closeness_centrality(FF) dFF_bin_pagerank = nx.pagerank(FF) try: dFF_bin_eigenvector = nx.eigenvector_centrality(FF, 10000) except: print "Failed to compute for FF %s " % FF.name # if len(nx.weakly_connected_components(FF)) > 1: # FF_comp = FF.subgraph(nx.weakly_connected_components(FF)[0]) # dFF_bin_eigenvector = nx.eigenvector_centrality(FF_comp) # else: # Compute AT Centralities # Centralities are problematic on weighted data, since we are losing all the information dAT_bin = nx.degree_centrality(AT) # binary dAT_bin_in = nx.in_degree_centrality(AT) # binary dAT_bin_out = nx.out_degree_centrality(AT) # binary dAT_bin_closeness = nx.closeness_centrality(AT) # binary dAT_bin_pagerank = nx.pagerank(AT) try: dAT_bin_eigenvector = nx.eigenvector_centrality(AT, 10000) except: print "Failed to compute for AT %s " % AT.name # if len(nx.weakly_connected_components(AT)) > 1: # AT_comp = AT.subgraph(nx.weakly_connected_components(AT)[0]) # dFF_bin_eigenvector = nx.eigenvector_centrality(AT_comp) # else: # # Tie strengths dAT_avg_tie = hp.individual_average_tie_strength(AT) dAT_rec = hp.individual_reciprocity(AT) dFF_rec = hp.individual_reciprocity(FF) # Dependent Variable see csv below # Deprecated since in networkx centrality works only on binary edges dRT_in = nx.in_degree_centrality(RT) # At least once a retweets that a person has received dRT_out = nx.out_degree_centrality(RT) # At least one retweets that a person has made ############### Output ################ for node in dFF_bin.keys(): if node in maximum_subset: csv_writer.writerow( [ project, project_name, node, listings[node]["place"], dFF_bin[node], dFF_bin_in[node], dFF_bin_out[node], FF.in_degree(node, weight="weight"), FF.out_degree(node, weight="weight"), dFF_bin_closeness[node], dFF_bin_pagerank[node], dFF_rec[node], dAT_bin[node], dAT_bin_in[node], dAT_bin_out[node], dAT_bin_closeness[node], dAT_bin_pagerank[node], dAT_rec[node], dAT_avg_tie[node], AT.in_degree(node, weight="weight"), AT.out_degree(node, weight="weight"), dRT_in[node], dRT_out[node], RT.in_degree(node, weight="weight"), RT.out_degree(node, weight="weight"), RT_all.in_degree(node, weight="weight"), RT_all.out_degree(node, weight="weight"), ] ) i += 1