Beispiel #1
0
def main(argv):
    #Standardvalues
    partitionfile = "data/partitions/final_partitions_p100_200_0.2.csv"
    project = "584"
    to_pajek = False
    try:
      opts, args = getopt.getopt(argv,"p:s:o")
    except getopt.GetoptError:
      print 'group_bridging.py -p <project_name> -s <partitionfile> -o [if you want pajek output]'
      sys.exit(2)
    for opt, arg in opts:
        if opt in ("-p"):
            project = arg
        elif opt in ("-s"):
            partitionfile = arg
        elif opt in ("-o"):
             to_pajek = True
        else:
            print 'group_bridging.py -p <project_name> -s <partitionfile> -o [if you want pajek output]'
    
    print "##################### GROUP BRIDGING ########################"
    print "Project %s " % project
    print "Partition %s" % partitionfile
    
    ff_edges_writer = csv.writer(open("results/%s_ff_bridging_edges.csv" % project, "wb"))
    at_edges_writer = csv.writer(open("results/%s_at_bridging_edges.csv" % project, "wb"))
    rt_edges_writer = csv.writer(open("results/%s_rt_bridging_edges.csv" % project, "wb"))
    
    csv_bridging_writer = csv.writer(open('results/spss/group bridging/%s_group_bridging.csv' % project , 'wb'))
    
    csv_bridging_writer.writerow(["Project", "Name", "Member_count", "Competing_Lists",
                                "FF_bin_degree", "FF_bin_in_degree", "FF_bin_out_degree",
                                "FF_volume_in","FF_volume_out",
                                "FF_bin_betweeness","FF_bin_closeness", "FF_bin_pagerank", #"FF_bin_eigenvector",
                                "FF_bin_c_size","FF_bin_c_density","FF_bin_c_hierarchy","FF_bin_c_index",
                                "AT_bin_degree", "AT_bin_in_degree", "AT_bin_out_degree",
                                "AT_bin_betweeness", "AT_bin_closeness", "AT_bin_pagerank", #"AT_bin_eigenvector",                            
                                "AT_bin_c_size","AT_bin_c_density","AT_bin_c_hierarchy","AT_bin_c_index",
                                "AT_volume_in", "AT_volume_out",
                                "RT_volume_in", "RT_volume_out",
                                "FF_rec", "AT_rec", "AT_avg", "FF_avg"])    
    
    # Get the overall network from disk    
    FF = nx.read_edgelist('data/networks/%s_FF.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) 
    AT = nx.read_edgelist('data/networks/%s_solr_AT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) 
    RT = nx.read_edgelist('data/networks/%s_solr_RT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph())
        
    # Read in the partition
    tmp = hp.get_partition(partitionfile)
    partitions = tmp[0]
    groups = tmp[1]
    
    #Read in members count for each project
    reader = csv.reader(open("results/stats/%s_lists_stats.csv" % project, "rb"), delimiter=",")
    temp  = {}
    reader.next() # Skip first row
    for row in reader:        
            temp[row[0]] = {"name":row[0],"member_count":int(row[3])}
    
    #Read in the list-listings for individuals
    listings = {}
    indiv_reader = csv.reader(open(partitionfile))
    for row in indiv_reader:                
            if listings.has_key(row[1]):
                listings[row[1]]["competing_lists"] += int(row[3])
            else:
                listings[row[1]] = {"competing_lists": int(row[3])}                            
           
    # Add dummy nodes if they are missing in the networks
    for partition in partitions:
            for node in partition:
                FF.add_node(node)
                AT.add_node(node)
                RT.add_node(node)
            
    #Blockmodel the networks into groups according to the partition
    P_FF = nx.blockmodel(FF,partitions)
    P_AT = nx.blockmodel(AT,partitions)
    P_RT = nx.blockmodel(RT,partitions)
    
    #Name the nodes in the network
    #TODO check: How do I know that the names really match?
    mapping = {}
    mapping_pajek = {}
    i = 0
    for group in groups:
        mapping_pajek[i] = "\"%s\"" % group # mapping for pajek
        mapping[i] = "%s" % group 
        i += 1
    
    H_FF = nx.relabel_nodes(P_FF,mapping)
    H_AT = nx.relabel_nodes(P_AT,mapping)
    H_RT = nx.relabel_nodes(P_RT,mapping)
    
    #Outpt the networks to pajek if needed
    if to_pajek:
        OUT_FF = nx.relabel_nodes(P_FF,mapping_pajek)
        OUT_AT = nx.relabel_nodes(P_AT,mapping_pajek)
        OUT_RT = nx.relabel_nodes(P_RT,mapping_pajek)
        
        #Write the blocked network out to disk
        nx.write_pajek(OUT_FF,"results/networks/%s_grouped_FF.net" % project)
        nx.write_pajek(OUT_AT,"results/networks/%s_grouped_AT.net" % project)
        nx.write_pajek(OUT_RT,"results/networks/%s_grouped_RT.net" % project)
    
    ########## Output the Edges between groups to csv ##############
    # Needed for the computation of individual bridging
    # Edges in both directions between the groups are addded up
    
    processed_edges = []
    for (u,v,attrib) in H_FF.edges(data=True):
        if "%s%s" %(u,v)  not in processed_edges:
            processed_edges.append("%s%s" % (u,v))            
            if H_FF.has_edge(v,u):
                processed_edges.append("%s%s" % (v,u))
                ff_edges_writer.writerow([u,v,attrib["weight"]+H_FF[v][u]["weight"]])
            else:
                ff_edges_writer.writerow([u,v,attrib["weight"]])
                
    processed_edges = []
    for (u,v,attrib) in H_AT.edges(data=True):
        if "%s%s" %(u,v)  not in processed_edges:
            processed_edges.append("%s%s" % (u,v))            
            if H_AT.has_edge(v,u):
                processed_edges.append("%s%s" % (v,u))
                at_edges_writer.writerow([u,v,attrib["weight"]+H_AT[v][u]["weight"]])
            else:
                at_edges_writer.writerow([u,v,attrib["weight"]])
                    
    processed_edges = []
    for (u,v,attrib) in H_RT.edges(data=True):
        if "%s%s" %(u,v)  not in processed_edges:
            processed_edges.append("%s%s" % (u,v))            
            if H_RT.has_edge(v,u):
                processed_edges.append("%s%s" % (v,u))
                rt_edges_writer.writerow([u,v,attrib["weight"]+H_RT[v][u]["weight"]])
            else:
                rt_edges_writer.writerow([u,v,attrib["weight"]])                

    
    ########## TRIM EDGES ################
    # For meaningfull results we have to trim edges in the AT and FF network so the whole network just doesnt look like a blob            
    # It is chosen this way so the network remains as one component
    
    THRESHOLD = min([hp.min_threshold(H_AT),hp.min_threshold(H_FF)])-1    
    H_FF = hp.trim_edges(H_FF, THRESHOLD)
    H_AT = hp.trim_edges(H_AT, THRESHOLD)    

    ########## MEASURES ##############
    
    #Get the number of nodes in the aggregated networks
    #FF_nodes = {}
    #for node in H_FF.nodes(data=True):
    #        FF_nodes[node[0]] = node[1]["nnodes"]
    
    
    #Get the FF network measures of the nodes
    # Works fine on binarized Data
    FF_bin_degree = nx.degree_centrality(H_FF) 
    FF_bin_in_degree = nx.in_degree_centrality(H_FF) # The attention paid towards this group
    FF_bin_out_degree = nx.out_degree_centrality(H_FF) # The attention that this group pays towards other people
    FF_bin_betweenness = nx.betweenness_centrality(H_FF,weight="weight") # How often is the group between other groups
    FF_bin_closeness = nx.closeness_centrality(H_FF) #FF_bin_eigenvector = nx.eigenvector_centrality(H_FF)
    FF_bin_pagerank = nx.pagerank(H_FF)        
    FF_bin_struc = sx.structural_holes(H_FF)
    
    # AT network measures of the nodes
    AT_bin_degree = nx.degree_centrality(H_AT)
    AT_bin_in_degree = nx.in_degree_centrality(H_AT)
    AT_bin_out_degree = nx.out_degree_centrality(H_AT)
    AT_bin_betweenness = nx.betweenness_centrality(H_AT,weight="weight") 
    AT_bin_closeness = nx.closeness_centrality(H_AT) #AT_bin_eigenvector = nx.eigenvector_centrality(H_AT)
    AT_bin_pagerank = nx.pagerank(H_AT)        
    AT_bin_struc = sx.structural_holes(H_AT)
    
    # Tie strengths
    dAT_avg_tie = hp.individual_average_tie_strength(H_AT)
    dFF_avg_tie = hp.individual_average_tie_strength(H_FF)
    dAT_rec = hp.individual_reciprocity(H_AT)    
    dFF_rec = hp.individual_reciprocity(H_FF)
    
    # Dependent Variable see csv
    # TODO A measure that calculates how often Tweets travel through this group: Eventually betweeness in the RT graph
    
    #Arrange it in a list and output
    for node in FF_bin_degree.keys():                
                csv_bridging_writer.writerow([project, node, int(temp[node]["member_count"]), listings[node]["competing_lists"],
                                                FF_bin_degree[node], FF_bin_in_degree[node], FF_bin_out_degree[node],
                                                H_FF.in_degree(node,weight="weight"), H_FF.out_degree(node,weight="weight"),
                                                FF_bin_betweenness[node],FF_bin_closeness[node],FF_bin_pagerank[node], #FF_bin_eigenvector[node],
                                                FF_bin_struc[node]['C-Size'],FF_bin_struc[node]['C-Density'],FF_bin_struc[node]['C-Hierarchy'],FF_bin_struc[node]['C-Index'],
                                                AT_bin_degree[node], AT_bin_in_degree[node], AT_bin_out_degree[node],
                                                AT_bin_betweenness[node], AT_bin_closeness[node], AT_bin_pagerank[node], #AT_bin_eigenvector[node],
                                                AT_bin_struc[node]['C-Size'],AT_bin_struc[node]['C-Density'],AT_bin_struc[node]['C-Hierarchy'],AT_bin_struc[node]['C-Index'],
                                                H_AT.in_degree(node,weight="weight"), H_AT.out_degree(node,weight="weight"),
                                                H_RT.in_degree(node,weight="weight"), H_RT.out_degree(node,weight="weight"),
                                                dFF_rec[node],dAT_rec[node],dAT_avg_tie[node],dFF_avg_tie[node]
                                            ])        
Beispiel #2
0
def main(argv):
    # Partitionfile
    partitionfile = "data/partitions/final_partitions_p100_200_0.2.csv"
    project = "584"
    reverse = False

    # Read in Networks
    FF_all = nx.read_edgelist(
        "data/networks/%s_FF.edgelist" % project, nodetype=str, data=(("weight", float),), create_using=nx.DiGraph()
    )
    AT_all = nx.read_edgelist(
        "data/networks/%s_solr_AT.edgelist" % project,
        nodetype=str,
        data=(("weight", float),),
        create_using=nx.DiGraph(),
    )
    RT_all = nx.read_edgelist(
        "data/networks/%s_solr_RT.edgelist" % project,
        nodetype=str,
        data=(("weight", float),),
        create_using=nx.DiGraph(),
    )

    try:
        opts, args = getopt.getopt(argv, "r")
    except getopt.GetoptError:
        print "edges.py -r [if you want to reverse the AT<-->RT tie direction ]"
    for opt, arg in opts:
        if opt in ("-r"):
            print "Calculating the influence of outgoing AT ties on incoming RT ties"
            reverse = True

    # Output
    summary_csv_writer = csv.writer(open("results/spss/edges/%s_edges_summary.csv" % project, "wb"))
    summary_csv_writer.writerow(["Community", "Retweets Inside Community", "Retweets between Communities"])

    if reverse:
        bridging_csv_writer = csv.writer(
            open("results/spss/edges/%s_reverse_bridging_edges.csv" % project, "wb")
        )  # reverse
        bonding_csv_writer = csv.writer(
            open("results/spss/edges/%s_reverse_bonding_edges.csv" % project, "wb")
        )  # reverse
    else:
        bridging_csv_writer = csv.writer(open("results/spss/edges/%s_bridging_edges.csv" % project, "wb"))
        bonding_csv_writer = csv.writer(open("results/spss/edges/%s_bonding_edges.csv" % project, "wb"))

    # Read in the partitions
    tmp = hp.get_partition(partitionfile)
    partitions = tmp[0]
    groups = tmp[1]

    ff_bridging_edges = defaultdict(dict)
    ff_bonding_edges = defaultdict(dict)
    at_bridging_edges = defaultdict(dict)
    at_bonding_edges = defaultdict(dict)
    rt_bridging_edges = defaultdict(list)
    rt_bonding_edges = defaultdict(list)
    total_bridging_edges = 0
    total_bonding_edges = 0

    i = 0
    for partition in partitions:

        ################ FF Edges ######################

        # Collect the FF edges between groups
        for edge in nx.edge_boundary(FF_all, partition):
            if FF_all.has_edge(edge[1], edge[0]):
                ff_bridging_edges[edge[0]][edge[1]] = "ff_recip"
            else:
                ff_bridging_edges[edge[0]][edge[1]] = "ff_non_recip"

        # Collect the FF edges inside the group
        for edge in FF_all.subgraph(partition).edges():
            if FF_all.has_edge(edge[1], edge[0]):
                ff_bonding_edges[edge[0]][edge[1]] = "ff_recip"
            else:
                ff_bonding_edges[edge[0]][edge[1]] = "ff_non_recip"

        ################ AT Edges ######################
        # TODO its missing the reciprocated edges that have a weight > 1
        # Idea 1: We might simply add up the incoming and outgoing edges to a total weight

        # Collect the AT edges that are between groups
        for edge in nx.edge_boundary(AT_all, partition):
            if AT_all.has_edge(edge[1], edge[0]):
                if AT_all.get_edge_data(*edge)["weight"] == 1:
                    at_bridging_edges[edge[0]][edge[1]] = "at_recip"
            else:
                if AT_all.get_edge_data(*edge)["weight"] == 1:
                    at_bridging_edges[edge[0]][edge[1]] = "at_non_recip_w1"
                else:
                    at_bridging_edges[edge[0]][edge[1]] = AT_all.get_edge_data(*edge)["weight"]

        # Collect the AT edges that are inside the group
        for edge in AT_all.subgraph(partition).edges():
            if AT_all.has_edge(edge[1], edge[0]):
                if AT_all.get_edge_data(*edge)["weight"] == 1:
                    at_bonding_edges[edge[0]][edge[1]] = "at_recip"
            else:
                if AT_all.get_edge_data(*edge)["weight"] == 1:
                    at_bonding_edges[edge[0]][edge[1]] = "at_non_recip_w1"
                else:
                    at_bonding_edges[edge[0]][edge[1]] = AT_all.get_edge_data(*edge)["weight"]

        ################ RT Edges ######################

        # Collect the RT edges between groups:
        tmp_rt_bridging_edges = 0
        for edge in nx.edge_boundary(RT_all, partition):
            tmp_rt_bridging_edges += RT_all.get_edge_data(*edge)["weight"]
            rt_bridging_edges[RT_all.get_edge_data(*edge)["weight"]].append((edge[0], edge[1]))
        total_bridging_edges += tmp_rt_bridging_edges

        # Collect the RT edges inside group
        tmp_rt_bonding_edges = 0
        for edge in RT_all.subgraph(partition).edges():
            tmp_rt_bonding_edges += RT_all.get_edge_data(*edge)["weight"]
            rt_bonding_edges[RT_all.get_edge_data(*edge)["weight"]].append((edge[0], edge[1]))
        total_bonding_edges += tmp_rt_bonding_edges

        summary_csv_writer.writerow([groups[i], tmp_rt_bonding_edges, tmp_rt_bridging_edges])
        print "Community %s, Total Retweets inside: %s, Total Retweets between %s" % (
            groups[i],
            tmp_rt_bonding_edges,
            tmp_rt_bridging_edges,
        )
        i += 1

    print "Total Bonding Edges %s" % total_bonding_edges
    print "Total Bridging Edges %s" % total_bridging_edges

    ##################BONDING: Influence of AT strengths on bonding retweets ##############################
    bonding_flow = defaultdict(list)
    for rt_strength, retweets in rt_bonding_edges.iteritems():
        for retweet in retweets:
            value = None
            try:
                if reverse:
                    value = at_bonding_edges[retweet[1]][retweet[0]]  # Reverse
                    del at_bonding_edges[retweet[1]][retweet[0]]  # delete that entry reverse
                else:
                    value = at_bonding_edges[retweet[0]][retweet[1]]  # Same direction
                    del at_bonding_edges[retweet[0]][retweet[1]]  # delete that entry same direction
            except:
                ""
            if value == None:  # If the AT Network led to no diffusion ONLY then check the FF network
                try:
                    if reverse:
                        value = ff_bonding_edges[retweet[1]][retweet[0]]  # Reverse
                        del ff_bonding_edges[retweet[1]][retweet[0]]  # delete that entry reverse
                    else:
                        value = ff_bonding_edges[retweet[0]][retweet[1]]  # Same direction
                        del ff_bonding_edges[retweet[0]][retweet[1]]  # delete that entry same direction
                except:
                    ""
            if value == None:  # A retweet happend despite there being no ties at all
                value = "no_tie"
            bonding_flow[value].append(rt_strength)

    bonding_no_flow = {}

    # Count the AT ties that led to no diffusion
    for k, v1 in at_bonding_edges.iteritems():
        for k, value in v1.iteritems():
            if bonding_no_flow.has_key(value):
                bonding_no_flow[value] += 1
            else:
                bonding_no_flow[value] = 0

    # Count the FF ties that led to no diffusion
    for k, v1 in ff_bonding_edges.iteritems():
        for k, value in v1.iteritems():
            if bonding_no_flow.has_key(value):
                bonding_no_flow[value] += 1
            else:
                bonding_no_flow[value] = 0

    ##################BRIDGING: Influence of AT strenghts on bridging retweets ##############################

    bridging_flow = defaultdict(list)
    for rt_strength, retweets in rt_bridging_edges.iteritems():
        for retweet in retweets:
            value = None
            try:
                if reverse:
                    value = at_bridging_edges[retweet[1]][retweet[0]]  # reverse
                    del at_bridging_edges[retweet[1]][retweet[0]]  # delete that entry reverse
                else:
                    value = at_bridging_edges[retweet[0]][retweet[1]]  # Same direction
                    del at_bridging_edges[retweet[0]][retweet[1]]  # delete that entry same direction
            except:
                ""
            if value == None:  # If the AT Network led to no diffusion ONLY then check the FF network
                try:
                    if reverse:
                        value = ff_bridging_edges[retweet[1]][retweet[0]]  # Reverse
                        del ff_bridging_edges[retweet[1]][retweet[0]]  # delete that entry reverse
                    else:
                        value = ff_bridging_edges[retweet[0]][retweet[1]]  # Same direction
                        del ff_bridging_edges[retweet[0]][retweet[1]]  # delete that entry same direction
                except:
                    ""
            if value == None:  # A retweet happend despite there being no ties at all
                value = "no_tie"
            bridging_flow[value].append(rt_strength)

    bridging_no_flow = {}

    # Count the AT ties that led to no diffusion
    for k, v1 in at_bridging_edges.iteritems():
        for k, value in v1.iteritems():
            if bridging_no_flow.has_key(value):
                bridging_no_flow[value] += 1
            else:
                bridging_no_flow[value] = 0

    # Count the FF ties that led to no diffusion
    for k, v1 in ff_bridging_edges.iteritems():
        for k, value in v1.iteritems():
            if bridging_no_flow.has_key(value):
                bridging_no_flow[value] += 1
            else:
                bridging_no_flow[value] = 0

    ###########################  Output ###########################

    bridging_csv_writer.writerow(
        [
            "bridging_tie_type",
            "#_ties_w_retweets",
            "#_ties_w_o_retweets",
            "#_retweets",
            "%_of_total",
            "retweets/#_ties_w_o_retweets",
            "retweets/#_ties_w_retweets",
            "std",
        ]
    )
    bonding_csv_writer.writerow(
        [
            "bonding_tie_type",
            "#_ties_w_retweets",
            "#_ties_w_o_retweets",
            "#_retweets",
            "%_of_total",
            "retweets/#_ties_w_o_retweets",
            "retweets/#_ties_w_retweets",
            "std",
        ]
    )

    # BRIDGING TIES
    bridging_total = [val for subl in bridging_flow.values() for val in subl]
    bridging_noflow_total = sum(bridging_no_flow.values())
    for k, v in bridging_flow.iteritems():
        if bridging_no_flow.has_key(k) and bridging_no_flow[k] != 0 and len(bridging_flow[k]) > 5:
            ratio = sum(bridging_flow[k]) / bridging_no_flow[k]
            of_total = sum(bridging_flow[k]) / float(sum(bridging_total))
            std = np.std(bridging_flow[k])
            average = np.average(bridging_flow[k])
            bridging_csv_writer.writerow(
                [k, len(bridging_flow[k]), bridging_no_flow[k], sum(bridging_flow[k]), of_total, ratio, average, std]
            )
        if k == "no_tie":
            std = np.std(bridging_flow[k])
            average = np.average(bridging_flow[k])
            bridging_csv_writer.writerow([k, len(bridging_flow[k]), 0, sum(bridging_flow[k]), 0, 0, average, std])

    std = np.std(bridging_total)
    average = np.average(bridging_total)
    bridging_csv_writer.writerow(
        [
            "total",
            len(bridging_total),
            bridging_noflow_total,
            sum(bridging_total),
            1,
            sum(bridging_total) / float(bridging_noflow_total),
            average,
            std,
        ]
    )

    # BONDING TIES
    bonding_total = [val for subl in bonding_flow.values() for val in subl]
    bonding_noflow_total = sum(bonding_no_flow.values())
    for k, v in bonding_flow.iteritems():
        if bonding_no_flow.has_key(k) and bonding_no_flow[k] != 0 and len(bonding_flow[k]) > 5:
            ratio = sum(bonding_flow[k]) / bonding_no_flow[k]
            of_total = sum(bridging_flow[k]) / float(sum(bonding_total))
            std = np.std(bonding_flow[k])
            average = np.average(bonding_flow[k])
            bonding_csv_writer.writerow(
                [k, len(bonding_flow[k]), bonding_no_flow[k], sum(bonding_flow[k]), of_total, ratio, average, std]
            )
        if k == "no_tie":
            std = np.std(bonding_flow[k])
            average = np.average(bonding_flow[k])
            bonding_csv_writer.writerow([k, len(bonding_flow[k]), 0, sum(bonding_flow[k]), 0, 0, average, std])

    std = np.std(bonding_total)
    average = np.average(bonding_total)
    bonding_csv_writer.writerow(
        [
            "total",
            len(bonding_total),
            bonding_noflow_total,
            sum(bonding_total),
            1,
            sum(bonding_total) / float(bonding_no_flow_total),
            average,
            std,
        ]
    )
import networkx as nx
import csv
import helper as hp
import sys
import sys,getopt

partitionfile = "data/partitions/final_partitions_p100_200_0.2.csv"
project = "584"
tmp = hp.get_partition(partitionfile)
partitions = tmp[0]
groups = tmp[1]
FF_all = nx.read_edgelist('data/networks/%s_FF.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph())
AT_all = nx.read_edgelist('data/networks/%s_solr_AT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) 
RT_all = nx.read_edgelist('data/networks/%s_solr_RT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph())

#Read in the network as a dict
def nonull(stream):
    for line in stream:
        yield line.replace('\x00', '')

def read_in_net(edges_file):
    net_hash = {}
    for row in edges_file:
        if not net_hash.has_key(row[0]):
            net_hash[row[0]] = {row[1]: []}
        if not net_hash[row[0]].has_key(row[1]):
            net_hash[row[0]] = dict(net_hash[row[0]].items() + {row[1]: []}.items())
        net_hash[row[0]][row[1]].append(row[3])
    return net_hash

f1 = open("data/solr_584_at_connections.csv", "rb")
def main(argv):
   #Standardvalues
   partitionfile = "data/partitions/final_partitions_p100_200_0.2.csv"
   project = "584"
   to_pajek = False
   try:
     opts, args = getopt.getopt(argv,"p:s:o")
   except getopt.GetoptError:
     print 'individual_bridging.py -p <project_name> -s <partitionfile> -o [if you want pajek output]'
     sys.exit(2)
   for opt, arg in opts:
       if opt in ("-p"):
           project = arg
       elif opt in ("-s"):
           partitionfile = arg
       elif opt in ("-o"):
            to_pajek = True
       else:
         print 'individual_bridging.py -p <project_name> -s <partitionfile> -o [if you want pajek output]'
   
   print "##################### INDIVIDUAL BRIDGING ########################"
   print "Project %s " % project
   print "Partition %s" % partitionfile
   
   csv_bridging_writer = csv.writer(open('results/spss/individual bridging/%s_individual_bridging.csv' % project, 'wb'))   
   csv_bridging_writer.writerow(["Name", "Group1", "Group2", "Number_between_ties",
                                 "Competing_lists",
                                 "FF_bin_degree", "FF_bin_in_degree", "FF_bin_out_degree",
                                 "FF_bin_betweeness",
                                 #"FF_c_size","FF_c_density","FF_c_hierarchy","FF_c_index",
                                 "FF_own_group_in_volume", "FF_other_group_in_volume",
                                 "FF_own_group_out_volume", "FF_other_group_out_volume",
                                 "AT_bin_degree", "AT_bin_in_degree", "AT_bin_out_degree",
                                 "AT_bin_betweeness",
                                 "AT_volume_in", "AT_volume_out",
                                 #"AT_c_size","AT_c_density","AT_c_hierarchy","AT_c_index",
                                 "AT_own_group_in_volume", "AT_other_group_in_volume",
                                 "AT_own_group_out_volume", "AT_other_group_out_volume",
                                 "RT_total_volume_in", "RT_total_volume_out",
                                 "RT_own_group_in_volume", "RT_other_group_in_volume",
                                 "RT_own_group_out_volume", "RT_other_group_out_volume"])
   
   #Read in the list-listings for individuals
   listings = {}
   indiv_reader = csv.reader(open(partitionfile))
   for row in indiv_reader:        
           listings[row[0]] = {"group":row[1],"place":int(row[2]), "competing_lists": int(row[3])}
   
   #Read in the edges between the groups and sort them
   GROUPS = 80 # 80x200 ~ 16000 individuals for analysis 
   reader = csv.reader(open("results/%s_bridging_edges.csv" % project, "rb"), delimiter=",")
   edges  = []
   for row in reader:
           edges.append({"group1":row[0],"group2":row[1], "count":float(row[2])})
   edges_sorted = sorted(edges, key=lambda k: k["count"])
   distance_between_samples = int(float(len(edges_sorted)) / GROUPS)
   if distance_between_samples == 0: distance_between_samples = 1 #Minimal Distance
   iterator = 0
   
   # Read in the partition
   tmp = hp.get_partition(partitionfile)
   partitions = tmp[0]
   groups = tmp[1]
   
   # Read in the networks   
   FF_all = nx.read_edgelist('data/networks/%s_FF.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) 
   AT_all = nx.read_edgelist('data/networks/%s_solr_AT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) 
   RT_all = nx.read_edgelist('data/networks/%s_solr_RT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph())
   
   i = 0
   for partition in partitions:
       for node in partition:
           FF_all.add_node(node, group =  groups[i]) # Add nodes 
           AT_all.add_node(node, group =  groups[i])
           RT_all.add_node(node, group =  groups[i])
       i += 1
   
   while iterator < len(edges_sorted):
      
      #Genereate a subgraph consisting out of two partitions
      # Problem: With n= 2(pairs of 2)  and k = 200 (~number of groups) we can generate 200 ^ 200 /2 combinations. How to generate the two pairs?   
      # Solution 1: By Random
      # Solution 2: Based on the ordered tie strength between groups from the group bridging step
      # e.g. [10,9,8,7,6,5,0]  take every xth element to create set with this size [10,8,6,0]
      # TODO Bin same edges with same weight into the same category and then select a grop by random
      selected_edge = edges_sorted[iterator]
      group1 = selected_edge["group1"]
      group2 = selected_edge["group2"]
      index1 = groups.index(group1)
      index2 = groups.index(group2)   
      print "%s : %s with %s of strength %s" % (iterator, group1, group2, selected_edge["count"])
         
      # Create Subgraphs
      S_FF = FF_all.subgraph(partitions[index1]+partitions[index2])
      S_FF.name = "%s_%s" % (group1, group2)
      S_AT = AT_all.subgraph(partitions[index1]+partitions[index2])
      S_AT.name = "%s_%s" % (group1, group2)
      S_RT = RT_all.subgraph(partitions[index1]+partitions[index2])
      S_RT.name = "%s_%s" % (group1, group2)   
      iterator += distance_between_samples # Make equidistant steps in with the iterator
   
      #Optional Output to pajek   
      if to_pajek:
         print "Generating pajek output for %s %s" % (groups[index1], groups[index2])
         #Relabel for pajek
         def mapping(x):
                 return "\"%s\"" % x   
         H_FF = nx.relabel_nodes(S_FF,mapping)
         H_AT = nx.relabel_nodes(S_AT,mapping)
         H_RT = nx.relabel_nodes(S_RT,mapping)   
         #Write it to disk
         nx.write_pajek(H_FF,"results/networks/pairs/%s_%s_%s_pair_FF.net" % (project, groups[index1], groups[index2]))
         nx.write_pajek(H_AT,"results/networks/pairs/%s_%s_%s_pair_AT.net" % (project, groups[index1], groups[index2]))
         nx.write_pajek(H_RT,"results/networks/pairs/%s_%s_%s_pair_RT.net" % (project, groups[index1], groups[index2]))
      
      ################ MEASURES ################
      
      ## FF measures
      dFF_bin = nx.degree_centrality(S_FF)
      dFF_bin_in = nx.in_degree_centrality(S_FF)
      dFF_bin_out = nx.out_degree_centrality(S_FF)
      dFF_bin_betweeness = nx.betweenness_centrality(S_FF)
      # Structural Holes has problems, probably with nonconnected networks (eventually compte bigest component first)
      # dFF_struc = sx.structural_holes(S_FF)
      # Which one is own group which one is other ?
      dFF_group1_vol_in = hp.individual_in_volume(S_FF,group1)
      dFF_group2_vol_in = hp.individual_in_volume(S_FF,group2)
      dFF_group1_vol_out = hp.individual_out_volume(S_FF,group1)
      dFF_group2_vol_out = hp.individual_out_volume(S_FF,group2)   
      
      ## AT Measures
      dAT_bin = nx.degree_centrality(S_AT)
      dAT_bin_in = nx.in_degree_centrality(S_AT)
      dAT_bin_out = nx.out_degree_centrality(S_AT)
      dAT_bin_betweeness = nx.betweenness_centrality(S_AT)
      # Why can here the structural holes not be computed?
      #dAT_struc = sx.structural_holes(S_AT)
      dAT_group1_vol_in = hp.individual_in_volume(S_AT,group1)
      dAT_group2_vol_in = hp.individual_in_volume(S_AT,group2)
      dAT_group1_vol_out = hp.individual_out_volume(S_AT,group1)
      dAT_group2_vol_out = hp.individual_out_volume(S_AT,group2)        
      
      ############### DEPENDENT VARIABLES ###########
      
      dRT_group1_vol_in = hp.individual_in_volume(S_RT,group1)
      dRT_group2_vol_in = hp.individual_in_volume(S_RT,group2)
      dRT_group1_vol_out = hp.individual_out_volume(S_RT,group1)
      dRT_group2_vol_out = hp.individual_out_volume(S_RT,group2)
      
      ############ OUTPUT ###########################
      #Arrange it in a list and output
      for node in dFF_bin.keys():
         # Depending if the node is in partition 1 or two the definition of "own" and "other" changes.
         if node in partitions[index1]:
            #FF
            FF_own_group_in_volume = dFF_group1_vol_in[node]
            FF_own_group_out_volume = dFF_group1_vol_out[node]
            FF_other_group_in_volume = dFF_group2_vol_in[node]         
            FF_other_group_out_volume = dFF_group2_vol_out[node]
            #AT
            AT_own_group_in_volume = dAT_group1_vol_in[node]
            AT_own_group_out_volume = dAT_group1_vol_out[node]
            AT_other_group_in_volume = dAT_group2_vol_in[node]         
            AT_other_group_out_volume = dAT_group2_vol_out[node]
            #RT
            RT_own_group_in_volume = dRT_group1_vol_in[node]
            RT_own_group_out_volume = dRT_group1_vol_out[node]
            RT_other_group_in_volume = dRT_group2_vol_in[node]         
            RT_other_group_out_volume = dRT_group2_vol_out[node]
         else:
            FF_own_group_in_volume = dFF_group2_vol_in[node]
            FF_own_group_out_volume = dFF_group2_vol_out[node]
            FF_other_group_in_volume = dFF_group1_vol_in[node]         
            FF_other_group_out_volume = dFF_group1_vol_out[node]
            #AT
            AT_own_group_in_volume = dAT_group2_vol_in[node]
            AT_own_group_out_volume = dAT_group2_vol_out[node]
            AT_other_group_in_volume = dAT_group1_vol_in[node]         
            AT_other_group_out_volume = dAT_group1_vol_out[node]
            #RT
            RT_own_group_in_volume = dRT_group2_vol_in[node]
            RT_own_group_out_volume = dRT_group2_vol_out[node]
            RT_other_group_in_volume = dRT_group1_vol_in[node]         
            RT_other_group_out_volume = dRT_group1_vol_out[node]
            
         csv_bridging_writer.writerow([node, group1, group2,selected_edge["count"],
                                       listings[node]["competing_lists"],
                                       dFF_bin[node], dFF_bin_in[node], dFF_bin_out[node],
                                       dFF_bin_betweeness[node],
                                       #dFF_struc[node]['C-Size'],dFF_struc[node]['C-Density'],dFF_struc[node]['C-Hierarchy'],dFF_struc[node]['C-Index'],
                                       FF_own_group_in_volume, FF_other_group_in_volume,
                                       FF_own_group_out_volume, FF_other_group_out_volume,
                                       dAT_bin[node], dAT_bin_in[node], dAT_bin_out[node],
                                       dAT_bin_betweeness[node],
                                       S_AT.in_degree(node,weight="weight"), S_AT.out_degree(node,weight="weight"),
                                       #dAT_struc[node]['C-Size'],dAT_struc[node]['C-Density'],dAT_struc[node]['C-Hierarchy'],dAT_struc[node]['C-Index'],
                                       AT_own_group_in_volume, AT_other_group_in_volume,
                                       AT_own_group_out_volume, AT_other_group_out_volume,
                                       S_RT.in_degree(node,weight="weight"), S_RT.out_degree(node,weight="weight"),
                                       RT_own_group_in_volume, RT_other_group_in_volume,
                                       RT_own_group_out_volume, RT_other_group_out_volume,
                                      ])
def main(argv):
   #Standardvalues
   partitionfile = "data/partitions/final_partitions_p100_200_0.2.csv"
   project = "584"
   to_pajek = False
   try:
     opts, args = getopt.getopt(argv,"p:s:o")
   except getopt.GetoptError:
     print 'individual_bridging_2.py -p <project_name> -s <partitionfile> '
     sys.exit(2)
   for opt, arg in opts:
       if opt in ("-p"):
           project = arg
       elif opt in ("-s"):
           partitionfile = arg
       else:
         print 'individual_bridging_2.py -p <project_name> -s <partitionfile> '
   
   print "##################### INDIVIDUAL BRIDGING 2 (Working on whole network) ########################"
   print "Project %s " % project
   print "Partition %s" % partitionfile
   
   csv_bridging_writer = csv.writer(open('results/spss/individual bridging/%s_individual_bridging_3.csv' % project, 'wb'))
   csv_bridging_writer.writerow(["Project", "Community", "Person_ID",
                                 "Competing_lists",
                                 "FF_bin_degree", "FF_bin_in_degree", "FF_bin_out_degree",
                                 "FF_vol_in", "FF_vol_out",
                                 "FF_groups_in", "FF_groups_out",
                                 "FF_rec",
                                 "FF_bin_betweeness", #"FF_bin_closeness", "FF_bin_pagerank",
                                  #"FF_c_size", "FF_c_density", "FF_c_hierarchy", "FF_c_index",
                                 "AT_bin_degree", "AT_bin_in_degree", "AT_bin_out_degree",
                                 "AT_vol_in", "AT_vol_out",
                                 "AT_groups_in", "AT_groups_out",
                                 "AT_rec",
                                 "AT_bin_betweeness",#, "AT_bin_closeness", "AT_bin_pagerank",
                                 # FF_c_size, FF_c_density, FF_c_hierarchy, FF_c_index,
                                 "AT_avg_tie_strength","AT_strength_centrality_in",
                                 "RT_bin_in_degree", "RT_bin_out_degree",
                                 "RT_vol_in", "RT_vol_out"])
   
   #Read in the list-listings for individuals
   listings = {}
   indiv_reader = csv.reader(open(partitionfile))
   for row in indiv_reader:        
           listings[row[0]] = {"group":row[1],"place":int(row[2]), "competing_lists": int(row[3])}
   
   # Read in the centralities of nodes in their corresponding community
   centralities = {}
   centrality_reader = csv.reader(open('results/spss/individual bonding/%s_individual_bonding.csv' % project))
   for row in centrality_reader:
      centralities[row[2]] = {"ff_in_degree":row[5]}
   
   # Read in the partition
   tmp = hp.get_partition(partitionfile)
   partitions = tmp[0]
   groups = tmp[1]
   
   # Read in the networks   
   FF_all = nx.read_edgelist('data/networks/%s_FF.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) 
   AT_all = nx.read_edgelist('data/networks/%s_solr_AT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) 
   RT_all = nx.read_edgelist('data/networks/%s_solr_RT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph())
   print "Done reading in Networks"
   
   #Determine the Maximum subset of nodes present in all Networks   
   maximum_subset = []
   for node in FF_all.nodes():
      if AT_all.has_node(node) and RT_all.has_node(node):
         maximum_subset.append(node)
   
   i = 0
   for partition in partitions:
       for node in partition:
           FF_all.add_node(node, group =  groups[i]) # Add nodes 
           AT_all.add_node(node, group =  groups[i])
           RT_all.add_node(node, group =  groups[i])
       i += 1

   i = 0
   
   #These measures are computed only once on the graph (we are making an error since the internal group structure is considered to load up those values)
   if len(maximum_subset) < 1000:
      scaling_k = len(maximum_subset)
   else:
      scaling_k = len(maximum_subset)/100
   dFF_bin_betweeness = nx.betweenness_centrality(FF_all,k=scaling_k)
   dAT_bin_betweeness = nx.betweenness_centrality(AT_all,k=scaling_k)
   #dFF_struc = sx.structural_holes(FF_all)
   
   for partition in partitions:      
      project_name = groups[i]
      
      #Determine the groups that are not in the partition
      all_other_groups = groups[:]
      group = groups[i]
      all_other_groups.remove(group)
      
      # Get all the partitions without the current partition
      partitions_without_partition = partitions[:]
      partitions_without_partition.remove(partition)
      
      #Remove the nodes that are in this partition
      remaining_nodes = [item for sublist in partitions for item in sublist] #flatlist of all nodes
      for nodes_to_be_deleted in partition:
         remaining_nodes.remove(nodes_to_be_deleted)
      
      #Create Subgraphs that contain all nodes except the ones that are in the partition
      S_FF = FF_all.subgraph(remaining_nodes)
      S_AT = AT_all.subgraph(remaining_nodes)
      S_RT = RT_all.subgraph(remaining_nodes)
      
      i += 1
      for node in partition:
         if node in maximum_subset:            
            t0 = time.time() 
            
            #Add FF nodes and edges
            S_FF.add_node(node, group = group)            
            S_FF.add_edges_from(FF_all.in_edges(node,data=True)) # in edges 
            S_FF.add_edges_from(FF_all.out_edges(node,data=True)) #out edges               
            # Delete the nodes that we again accidentally added by importing all of the node's edges
            for tmp_node in partition:
               if tmp_node != node and tmp_node in S_FF:
                  S_FF.remove_node(tmp_node)
                        
            # Add AT nodes and edges
            S_AT.add_node(node, group = group)
            S_AT.add_edges_from(AT_all.in_edges(node,data=True)) # in edges 
            S_AT.add_edges_from(AT_all.out_edges(node,data=True)) #out edges
            # Delete the nodes that we again accidentally added by importing all of the node's edges
            for tmp_node in partition:
               if tmp_node != node and tmp_node in S_AT:
                  S_AT.remove_node(tmp_node)
                  
            S_RT.add_node(node, group = group)
            S_RT.add_edges_from(RT_all.in_edges(node,data=True)) # in edges 
            S_RT.add_edges_from(RT_all.out_edges(node,data=True)) #out edges   
            # Delete the nodes that we again accidentally added by importing all of the node's edges
            for tmp_node in partition:
               if tmp_node != node and tmp_node in S_RT:
                  S_RT.remove_node(tmp_node)
                  
            print "Done creating Subgraphs"
            
            ## FF measures
            dFF_bin = nx.degree_centrality(S_FF)
            dFF_bin_in = nx.in_degree_centrality(S_FF)
            dFF_bin_out = nx.out_degree_centrality(S_FF)            
            #nx.load_centrality(S_FF,v=node, weight="weight")
            #dFF_bin_closeness = nx.closeness_centrality(S_FF,v=node)
            #dFF_bin_pagerank = nx.pagerank(S_FF, weight="weight")            
            dFF_total_in_groups = hp.filtered_group_volume(hp.incoming_group_volume(S_FF,node,all_other_groups),0)
            dFF_total_out_groups = hp.filtered_group_volume(hp.outgoing_group_volume(S_FF,node,all_other_groups),0)            
            dFF_rec = hp.individual_reciprocity(S_FF,node)   #number of reciprocated ties            
            
            ## AT Measures
            dAT_bin = nx.degree_centrality(S_AT)
            dAT_bin_in = nx.in_degree_centrality(S_AT)
            dAT_bin_out = nx.out_degree_centrality(S_AT)
            #dAT_bin_betweeness = nx.betweenness_centrality(S_AT, k=100) #nx.load_centrality(S_AT,v=node,weight="weight")
            #dAT_bin_closeness = nx.closeness_centrality(S_AT,v=node) 
            #dAT_bin_pagerank = nx.pagerank(S_AT,weight="weight")
            dAT_total_in_groups = hp.filtered_group_volume(hp.incoming_group_volume(S_AT,node,all_other_groups),0)
            dAT_total_out_groups = hp.filtered_group_volume(hp.outgoing_group_volume(S_AT,node,all_other_groups),0)
            dAT_rec = hp.individual_reciprocity(S_AT,node)   #number of @reciprocated ties
            dAT_avg_tie = hp.individual_average_tie_strength(S_AT,node)
            
            #Compute a combined measure which multiplies the strength of incoming ties times the centrality of that person
            dAT_strength_centrality = 0
            for edge in S_AT.in_edges(node,data=True):
               if edge[0] in maximum_subset:
                  dAT_strength_centrality += edge[2]["weight"]*float(centralities[edge[0]]["ff_in_degree"]) #get the centrality of the node that the tie is incoming from
            
            ############### DEPENDENT VARIABLES ###########
            
            dRT_in = nx.in_degree_centrality(S_RT) # At least once a retweets that a person has received 
            dRT_out = nx.out_degree_centrality(S_RT) # At least one retweets that a person has made            
            print "Done computing Measures"
            
            try:
               c_size = dFF_struc[node]['C-Size']
               c_dens = dFF_struc[node]['C-Density']
               c_hierarch = dFF_struc[node]['C-Hierarchy']
               c_index = dFF_struc[node]['C-Index']
            except:
               c_size = "NaN"
               c_dens = "NaN"
               c_hierarch = "NaN"
               c_index = "NaN"
               
            csv_bridging_writer.writerow([project, project_name, node, 
                                          listings[node]["competing_lists"],
                                          dFF_bin[node], dFF_bin_in[node], dFF_bin_out[node],
                                          S_FF.in_degree(node,weight="weight"), S_FF.out_degree(node,weight="weight"),
                                          dFF_total_in_groups, dFF_total_out_groups,
                                          dFF_rec[node],
                                          dFF_bin_betweeness[node],#dFF_bin_closeness[node],dFF_bin_pagerank[node],                                                                                    
                                          #c_size,c_dens,c_hierarch,c_index,                                                                                    
                                          dAT_bin[node], dAT_bin_in[node], dAT_bin_out[node],
                                          S_AT.in_degree(node,weight="weight"), S_AT.out_degree(node, weight="weight"),
                                          dAT_total_in_groups, dAT_total_out_groups,
                                          dAT_rec[node],
                                          dAT_bin_betweeness[node],#dAT_bin_closeness[node], dAT_bin_pagerank[node],                                       
                                          #dAT_struc[node]['C-Size'],dAT_struc[node]['C-Density'],dAT_struc[node]['C-Hierarchy'],dAT_struc[node]['C-Index'],                                          
                                          dAT_avg_tie[node],dAT_strength_centrality,
                                          dRT_in[node],dRT_out[node],   
                                          S_RT.in_degree(node,weight="weight"), S_RT.out_degree(node,weight="weight")
                                         ])
            t_delta = (time.time() - t0)
            print "Count: %s Node: %s Time: %s" % (i,node,t_delta)
            
            #Remove the nodes again
            S_FF.remove_node(node)
            S_AT.remove_node(node)
            S_RT.remove_node(node)
Beispiel #6
0
def main(argv):
    #Standardvalues
    partitionfile = "data/partitions/final_partitions_p100_200_0.2.csv"
    project = "584"
    to_pajek = True
    
    try:
      opts, args = getopt.getopt(argv,"p:s:o")
    except getopt.GetoptError:
      print 'group_bonding.py -p <project_name> -s <partitionfile> -o [if you want pajek output]'
      sys.exit(2)
    for opt, arg in opts:
        if opt in ("-p"):
            project = arg
        elif opt in ("-s"):
            partitionfile = arg
        elif opt in ("-o"):
             to_pajek = True
        else:
            print 'group_bonding.py -p <project_name> -s <partitionfile> -o [if you want pajek output]'
    
    print "##################### GROUP BONDING ########################"
    print "Project %s " % project
    print "Partition %s" % partitionfile
    
    csv_writer = csv.writer(open('results/spss/group bonding/%s_group_bonding.csv' % project, 'wb'))
    
    #Attributes for Gephi
    csv_attributes = csv.writer(open('results/networks/%s_at_node_attributes.csv' % project, 'wb'))
    
    csv_writer.writerow(["Project", "Name", "Member_count", "Competing_Lists",
                        "FF_Nodes", "AT_Nodes", "RT_Nodes",
                        "FF_Edges","AT_Edges", "RT_Edges",
                        "FF_bin_density", "AT_density",
                        "FF_bin_avg_path_length", "AT_bin_avg_path_length", 
                        "FF_bin_clustering", "AT_bin_clustering",
                        "FF_reciprocity", "AT_reciprocity",
                        "FF_bin_transitivity", "AT_bin_transitivity",                    
                        "RT_density", "RT_total_volume"
                        ])    
        
    
    # Read in the networks    
    FF_all = nx.read_edgelist('data/networks/%s_FF.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) 
    AT_all = nx.read_edgelist('data/networks/%s_solr_AT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) 
    RT_all = nx.read_edgelist('data/networks/%s_solr_RT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph())
    
    # Read in the partition
    tmp = hp.get_partition(partitionfile)
    partitions = tmp[0]
    groups = tmp[1]
    
    #Read in members count for each project
    reader = csv.reader(open("results/stats/%s_lists_stats.csv" % project, "rb"), delimiter=",")
    temp  = {}
    reader.next() # Skip first row
    for row in reader:        
            temp[row[0]] = {"name":row[0],"member_count":int(row[3])}
    
    #Read in the list-listings for individuals
    listings = {}
    indiv_reader = csv.reader(open(partitionfile))
    for row in indiv_reader:                
            if listings.has_key(row[1]):
                listings[row[1]]["competing_lists"] += int(row[3])
            else:
                listings[row[1]] = {"competing_lists": int(row[3])}

               
    i = 0
    for partition in partitions:
        for node in partition:
            FF_all.add_node(node, group =  groups[i])
            AT_all.add_node(node, group =  groups[i])
            RT_all.add_node(node, group =  groups[i])
        i += 1
        
    #Write out to pajek for gephi visualization
    if to_pajek:
        #Write the attributes file
        i= 0
        csv_attributes.writerow(["id", "name", "type"])
        for node in AT_all.nodes():
            i+= 1
            csv_attributes.writerow([i, node, AT_all.node[node]["group"]])
            
        nx.write_pajek(FF_all,"results/networks/%s_FF.net" % project)
        nx.write_pajek(AT_all,"results/networks/%s_AT.net" % project)
        nx.write_pajek(RT_all,"results/networks/%s_RT.net" % project)
        

        
    
    i = 0    
    for partition in partitions:
    
        project_name = groups[i]    
        # Add up total members 
        member_count = 0    
        member_count = int(temp[project_name]["member_count"])
        
        print "############ Calculating Project %s ############### " % project_name
    
        # Generate a subgraph according to the partition
        FF = FF_all.subgraph(partition)
        AT = AT_all.subgraph(partition)
        RT = RT_all.subgraph(partition)
        
        #Additional Info for each project    
        FF.name = "FF_%s " % project_name
        AT.name = "AT_%s " % project_name
        RT.name = "RT_%s " % project_name
    
        ############### Compute Group measures ################
    
        #Measures FF
        FF_bin_density = nx.density(FF)    
        FF_bin_transitivity = nx.transitivity(FF)            
        FF_reciprocity = hp.reciprocity(FF) # Calculate the number of reciprocated ties of all ties
        
        # Measures that need  a connected graph
        # In case the graph is split into multiple graphs get the biggest connected component    
        FF_partition = nx.weakly_connected_components(FF)[0]    
        FF_comp = FF.subgraph(FF_partition)    
        FF_bin_avg_path_length = nx.average_shortest_path_length(FF_comp)
        FF_bin_clustering = nx.average_clustering(FF_comp.to_undirected(),count_zeros=False) # Networks with a lot of mutual trust have a high clustering coefficient. # Star networks with a single broadcast node and passive listeners have a low clustering coefficient.    
        
        # Measures AT
        #AT_density = nx.density(AT) # deprecated since it treats the network as binarized and we lose all the interaction information
        AT_density = hp.average_tie_strength(AT)
        AT_bin_transitivity = nx.transitivity(AT)
        AT_reciprocity = hp.reciprocity(AT)
        #AT_avg_volume = hp.average_tie_strength(AT)
        
        AT_partition = nx.weakly_connected_components(AT)[0]
        AT_comp = AT.subgraph(AT_partition)
        AT_bin_avg_path_length = nx.average_shortest_path_length(AT_comp)
        AT_bin_clustering = nx.average_clustering(AT_comp.to_undirected())
            
        # Dependent Variable
        #RT_density = nx.density(RT) # Danger this works on the binarized graph! # TODO I need a weighted density for RT
        RT_density = hp.average_tie_strength(RT) 
        RT_total_volume = hp.total_edge_weight(RT)
    
        ############### Output ################        
        csv_writer.writerow([project, project_name, member_count, listings[project_name]["competing_lists"],
                             len(FF.nodes()), len(AT.nodes()), len(RT.nodes()),
                             len(FF.edges()), len(AT.edges()), len(RT.edges()),
                            FF_bin_density, AT_density,
                            FF_bin_avg_path_length, AT_bin_avg_path_length,
                            FF_bin_clustering, AT_bin_clustering,
                            FF_reciprocity, AT_reciprocity,
                            FF_bin_transitivity, AT_bin_transitivity,                        
                            RT_density, RT_total_volume])
        i += 1
Beispiel #7
0
import networkx as nx
import csv
import helper as hp

csv_writer = csv.writer(open('results/spss/whole network/whole_network.csv', 'wb'))
csv_writer.writerow(["FF_assortativity","AT_assortativity","RT_assortativity"])

# Read in the partition
tmp = hp.get_partition()
partitions = tmp[0]
groups = tmp[1]

# Read in the networks
project = "584"
FF_all = nx.read_edgelist('data/networks/%s_FF.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) 
AT_all = nx.read_edgelist('data/networks/%s_AT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph()) 
RT_all = nx.read_edgelist('data/networks/%s_RT.edgelist' % project, nodetype=str, data=(('weight',float),),create_using=nx.DiGraph())

# Add dummy nodes if they are missing in the networks
i = 0
for partition in partitions:
    for node in partition:
        FF_all.add_node(node, group =  groups[i])
        AT_all.add_node(node, group =  groups[i])
        RT_all.add_node(node, group =  groups[i])
    i += 1


# Compute Assortativity in Friendships
aFF = nx.attribute_assortativity_coefficient(FF_all,'group')
aAT = nx.attribute_assortativity_coefficient(AT_all,'group')
def main(argv):
    # Standardvalues
    partitionfile = "data/partitions/final_partitions_p100_200_0.2.csv"
    project = "584"
    try:
        opts, args = getopt.getopt(argv, "p:s:")
    except getopt.GetoptError:
        print "individual_bonding.py -p <project_name> -s <partitionfile>"
        sys.exit(2)
    for opt, arg in opts:
        if opt in ("-p"):
            project = arg
        elif opt in ("-s"):
            partitionfile = arg
        else:
            print "individual_bonding.py -p <project_name> -s <partitionfile>"

    print "##################### INDIVIDUAL BONDING ########################"
    print "Project %s " % project
    print "Partition %s" % partitionfile

    csv_writer = csv.writer(open("results/spss/individual bonding/%s_individual_bonding.csv" % project, "wb"))
    csv_writer.writerow(
        [
            "Project",
            "Community",
            "Person_ID",
            "Place_on_list",
            "FF_bin_deg",
            "FF_bin_in_deg",
            "FF_bin_out_deg",
            "FF_vol_in",
            "FF_vol_out",
            "FF_bin_close",
            "FF_bin_page",
            "FF_rec",
            "AT_bin_deg",
            "AT_bin_in_deg",
            "AT_bin_out_deg",
            "AT_bin_close",
            "AT_bin_page",
            "AT_rec",
            "AT_avg",
            "AT_vol_in",
            "AT_vol_out",
            "RT_bin_deg_in",
            "RT_bin_deg_out",
            "RT_vol_in",
            "RT_vol_out",
            "RT_global_vol_in",
            "RT_global_vol_out",
        ]
    )

    # Read in the list-listings for individuals
    listings = {}
    indiv_reader = csv.reader(open(partitionfile))
    i = 0
    for row in indiv_reader:
        if i > int(row[2]):  # in case there are less than 101 entries for a group for some reason
            i = 0
        i += 1
        listings[row[0]] = {"group": row[1], "place": i, "competing_lists": int(row[3]), "original_place": int(row[2])}
        if i == 101:  # Some of the original places have shifted because of the regrouping
            i = 0

    # Read in Networks
    FF_all = nx.read_edgelist(
        "data/networks/%s_FF.edgelist" % project, nodetype=str, data=(("weight", float),), create_using=nx.DiGraph()
    )
    AT_all = nx.read_edgelist(
        "data/networks/%s_solr_AT.edgelist" % project,
        nodetype=str,
        data=(("weight", float),),
        create_using=nx.DiGraph(),
    )
    RT_all = nx.read_edgelist(
        "data/networks/%s_solr_RT.edgelist" % project,
        nodetype=str,
        data=(("weight", float),),
        create_using=nx.DiGraph(),
    )

    # Read in the partitions
    tmp = hp.get_partition(partitionfile)
    partitions = tmp[0]
    groups = tmp[1]

    # Add missing nodes
    # We are limiting the analysis to only the maximal subset of nodes that are present in all networks
    maximum_subset = []
    for node in FF_all.nodes():
        if AT_all.has_node(node) and RT_all.has_node(node):
            maximum_subset.append(node)
        else:
            print node
    print "Maximum Subset of nodes %s" % len(maximum_subset)

    # In this case we are not adding missing nodes to the network, to produce a smaller error in the final regressions, but use the subset method.
    # i = 0
    # for partition in partitions:
    #    for node in partition:
    #        FF_all.add_node(node, group =  groups[i])
    #        AT_all.add_node(node, group =  groups[i])
    #        RT_all.add_node(node, group =  groups[i])
    #    i += 1

    i = 0

    for partition in partitions:

        project_name = groups[i]
        print "############ Calculating Project %s ############### " % project_name
        # Generate a subgraph according to the partition
        FF = FF_all.subgraph(partition)
        AT = AT_all.subgraph(partition)
        RT = RT_all.subgraph(partition)

        # Additional Info for each project
        FF.name = "FF_%s " % project_name
        AT.name = "AT_%s " % project_name
        RT.name = "RT_%s " % project_name

        # hp.draw_graph(FF)
        # hp.draw_graph(AT)
        # hp.draw_graph(RT)

        ############### Compute Individual measures ################

        # Compute FF Centralities
        # Works fine on binary data
        dFF_bin = nx.degree_centrality(FF)
        dFF_bin_in = nx.in_degree_centrality(FF)  # People that follow me in the network
        dFF_bin_out = nx.out_degree_centrality(FF)  # People that I follow in the network
        dFF_bin_closeness = nx.closeness_centrality(FF)
        dFF_bin_pagerank = nx.pagerank(FF)
        try:
            dFF_bin_eigenvector = nx.eigenvector_centrality(FF, 10000)
        except:
            print "Failed to compute for FF %s " % FF.name

        # if len(nx.weakly_connected_components(FF)) > 1:
        #    FF_comp = FF.subgraph(nx.weakly_connected_components(FF)[0])
        #    dFF_bin_eigenvector = nx.eigenvector_centrality(FF_comp)
        # else:

        # Compute AT Centralities
        # Centralities are problematic on weighted data, since we are losing all the information
        dAT_bin = nx.degree_centrality(AT)  # binary
        dAT_bin_in = nx.in_degree_centrality(AT)  # binary
        dAT_bin_out = nx.out_degree_centrality(AT)  # binary
        dAT_bin_closeness = nx.closeness_centrality(AT)  # binary
        dAT_bin_pagerank = nx.pagerank(AT)
        try:
            dAT_bin_eigenvector = nx.eigenvector_centrality(AT, 10000)
        except:
            print "Failed to compute for AT %s " % AT.name
        # if len(nx.weakly_connected_components(AT)) > 1:
        #    AT_comp = AT.subgraph(nx.weakly_connected_components(AT)[0])
        #    dFF_bin_eigenvector = nx.eigenvector_centrality(AT_comp)
        # else:
        #

        # Tie strengths
        dAT_avg_tie = hp.individual_average_tie_strength(AT)
        dAT_rec = hp.individual_reciprocity(AT)
        dFF_rec = hp.individual_reciprocity(FF)

        # Dependent Variable see csv below
        # Deprecated since in networkx centrality works only on binary edges
        dRT_in = nx.in_degree_centrality(RT)  # At least once a retweets that a person has received
        dRT_out = nx.out_degree_centrality(RT)  # At least one retweets that a person has made

        ############### Output ################
        for node in dFF_bin.keys():
            if node in maximum_subset:
                csv_writer.writerow(
                    [
                        project,
                        project_name,
                        node,
                        listings[node]["place"],
                        dFF_bin[node],
                        dFF_bin_in[node],
                        dFF_bin_out[node],
                        FF.in_degree(node, weight="weight"),
                        FF.out_degree(node, weight="weight"),
                        dFF_bin_closeness[node],
                        dFF_bin_pagerank[node],
                        dFF_rec[node],
                        dAT_bin[node],
                        dAT_bin_in[node],
                        dAT_bin_out[node],
                        dAT_bin_closeness[node],
                        dAT_bin_pagerank[node],
                        dAT_rec[node],
                        dAT_avg_tie[node],
                        AT.in_degree(node, weight="weight"),
                        AT.out_degree(node, weight="weight"),
                        dRT_in[node],
                        dRT_out[node],
                        RT.in_degree(node, weight="weight"),
                        RT.out_degree(node, weight="weight"),
                        RT_all.in_degree(node, weight="weight"),
                        RT_all.out_degree(node, weight="weight"),
                    ]
                )

        i += 1