# Density is computed on binarized matrix FF_bin_density = nx.density(FF) # Transitivity FF_bin_transitivity = nx.transitivity(FF) # Reciprocity FF_reciprocity = hp.reciprocity(FF) # Weakly connected components # FF_partition = nx.weakly_connected_components(FF)[0] # FF_comp = FF.subgraph(FF_partition) # Clustering FF_clustering = nx.average_clustering(FF.to_undirected(reciprocal=True)) # To undirected only keeps edges that are reciprocal # Average path length FF_avg_path_length = nx.average_shortest_path_length(FF) # Avg Tie strength FF_avg_volume = hp.average_tie_strength(FF) # TODO: # What about network centralization ? # Test the output of NetworkX against UCINET if names[i] == "a": print "######################## GROUP MEASURES TEST #####################" # Test for BINARY if nx.density(FF) == nx.density(FF_bin): print "NOTICE: Group Density is BINARY" if nx.transitivity(FF) == nx.transitivity(FF_bin): print "NOTICE: Group Transitivity is BINARY" if hp.reciprocity(FF) == hp.reciprocity(FF_bin): print "NOTICE: Group Reciprocity is BINARY" if nx.average_clustering(FF.to_undirected()) == nx.average_clustering(FF_bin.to_undirected()): print "NOTICE: Average Clustering is BINARY and UNDIRECTED" if nx.average_shortest_path_length(FF) == nx.average_shortest_path_length(FF_bin): print "NOTICE: Group Average path length is is BINARY" if hp.average_tie_strength(FF) == hp.average_tie_strength(FF_bin): print "NOTICE: Group Average tie strength path is is BINARY"
def main(argv): #Standardvalues partitionfile = "data/partitions/final_partitions_p100_200_0.2.csv" project = "584" to_pajek = True try: opts, args = getopt.getopt(argv,"p:s:o") except getopt.GetoptError: print 'group_bonding.py -p <project_name> -s <partitionfile> -o [if you want pajek output]' sys.exit(2) for opt, arg in opts: if opt in ("-p"): project = arg elif opt in ("-s"): partitionfile = arg elif opt in ("-o"): to_pajek = True else: print 'group_bonding.py -p <project_name> -s <partitionfile> -o [if you want pajek output]' print "##################### GROUP BONDING ########################" print "Project %s " % project print "Partition %s" % partitionfile csv_writer = csv.writer(open('results/spss/group bonding/%s_group_bonding.csv' % project, 'wb')) #Attributes for Gephi csv_attributes = csv.writer(open('results/networks/%s_at_node_attributes.csv' % project, 'wb')) csv_writer.writerow(["Project", "Name", "Member_count", "Competing_Lists", "FF_Nodes", "AT_Nodes", "RT_Nodes", "FF_Edges","AT_Edges", "RT_Edges", "FF_bin_density", "AT_density", "FF_bin_avg_path_length", "AT_bin_avg_path_length", "FF_bin_clustering", "AT_bin_clustering", "FF_reciprocity", "AT_reciprocity", "FF_bin_transitivity", "AT_bin_transitivity", "RT_density", "RT_total_volume" ]) # Read in the networks FF_all = nx.read_edgelist('data/networks/%s_FF.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) AT_all = nx.read_edgelist('data/networks/%s_solr_AT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) RT_all = nx.read_edgelist('data/networks/%s_solr_RT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) # Read in the partition tmp = hp.get_partition(partitionfile) partitions = tmp[0] groups = tmp[1] #Read in members count for each project reader = csv.reader(open("results/stats/%s_lists_stats.csv" % project, "rb"), delimiter=",") temp = {} reader.next() # Skip first row for row in reader: temp[row[0]] = {"name":row[0],"member_count":int(row[3])} #Read in the list-listings for individuals listings = {} indiv_reader = csv.reader(open(partitionfile)) for row in indiv_reader: if listings.has_key(row[1]): listings[row[1]]["competing_lists"] += int(row[3]) else: listings[row[1]] = {"competing_lists": int(row[3])} i = 0 for partition in partitions: for node in partition: FF_all.add_node(node, group = groups[i]) AT_all.add_node(node, group = groups[i]) RT_all.add_node(node, group = groups[i]) i += 1 #Write out to pajek for gephi visualization if to_pajek: #Write the attributes file i= 0 csv_attributes.writerow(["id", "name", "type"]) for node in AT_all.nodes(): i+= 1 csv_attributes.writerow([i, node, AT_all.node[node]["group"]]) nx.write_pajek(FF_all,"results/networks/%s_FF.net" % project) nx.write_pajek(AT_all,"results/networks/%s_AT.net" % project) nx.write_pajek(RT_all,"results/networks/%s_RT.net" % project) i = 0 for partition in partitions: project_name = groups[i] # Add up total members member_count = 0 member_count = int(temp[project_name]["member_count"]) print "############ Calculating Project %s ############### " % project_name # Generate a subgraph according to the partition FF = FF_all.subgraph(partition) AT = AT_all.subgraph(partition) RT = RT_all.subgraph(partition) #Additional Info for each project FF.name = "FF_%s " % project_name AT.name = "AT_%s " % project_name RT.name = "RT_%s " % project_name ############### Compute Group measures ################ #Measures FF FF_bin_density = nx.density(FF) FF_bin_transitivity = nx.transitivity(FF) FF_reciprocity = hp.reciprocity(FF) # Calculate the number of reciprocated ties of all ties # Measures that need a connected graph # In case the graph is split into multiple graphs get the biggest connected component FF_partition = nx.weakly_connected_components(FF)[0] FF_comp = FF.subgraph(FF_partition) FF_bin_avg_path_length = nx.average_shortest_path_length(FF_comp) FF_bin_clustering = nx.average_clustering(FF_comp.to_undirected(),count_zeros=False) # Networks with a lot of mutual trust have a high clustering coefficient. # Star networks with a single broadcast node and passive listeners have a low clustering coefficient. # Measures AT #AT_density = nx.density(AT) # deprecated since it treats the network as binarized and we lose all the interaction information AT_density = hp.average_tie_strength(AT) AT_bin_transitivity = nx.transitivity(AT) AT_reciprocity = hp.reciprocity(AT) #AT_avg_volume = hp.average_tie_strength(AT) AT_partition = nx.weakly_connected_components(AT)[0] AT_comp = AT.subgraph(AT_partition) AT_bin_avg_path_length = nx.average_shortest_path_length(AT_comp) AT_bin_clustering = nx.average_clustering(AT_comp.to_undirected()) # Dependent Variable #RT_density = nx.density(RT) # Danger this works on the binarized graph! # TODO I need a weighted density for RT RT_density = hp.average_tie_strength(RT) RT_total_volume = hp.total_edge_weight(RT) ############### Output ################ csv_writer.writerow([project, project_name, member_count, listings[project_name]["competing_lists"], len(FF.nodes()), len(AT.nodes()), len(RT.nodes()), len(FF.edges()), len(AT.edges()), len(RT.edges()), FF_bin_density, AT_density, FF_bin_avg_path_length, AT_bin_avg_path_length, FF_bin_clustering, AT_bin_clustering, FF_reciprocity, AT_reciprocity, FF_bin_transitivity, AT_bin_transitivity, RT_density, RT_total_volume]) i += 1