Ejemplo n.º 1
0
def test_default():
    assert (invert_dict({'hired': {
        'be': {
            'to': {
                'deserve': 'I'
            }
        }
    }}) == {
        'I': {
            'deserve': {
                'to': {
                    'be': 'hired'
                }
            }
        }
    })
Ejemplo n.º 2
0
def all_centrality_betweenness(D, N=None):
    """ALL-CENTRALITY-BETWEENNESS(D)
		centrality_dictionary = {} // an empty dictionary
		for i = 0 to D.size - 1
			centrality_dictionary[i] = CENTRALITY-BETWEENNESS(i, D)
		return centrality_dictionary"""
  
    global SP # beware use of global variables
    
    SP = {}
    centrality_dictionary = {}
    
    for i in range(len(D)):
        
        centrality_dictionary[i] = centrality_betweenness(i, D)   

    if N is not None:
           
        for i in SP:
            
            if len(SP[i]) != 1:
                
                print SP[i],
     
                Ninv = invert_dict.invert_dict(N)
                
    #            http://stackoverflow.com/questions/11264684/flatten-list-of-lists
                
                airports = list(itertools.chain.from_iterable(SP[i]))
                airports = list(set(airports))
                airports.sort()
                
                for j in airports:
                    
                    print j, Ninv[j],
    
                print 
                
    #            raw_input()
        
        print
    
    return centrality_dictionary
Ejemplo n.º 3
0
def route_level_g(network):
    
    N = network[0]
    g = network[1]
    
#    test case
#    N = {'one':0, 'two':1, 'three':2, 'four':3, 'five':4}
#    g = numpy.array([[0,0,1,0,0], [0,0,1,0,1], [1,1,0,1,1], [0,0,1,0,1], [0,1,1,1,0]])    
    
    inv_d = invert_dict(N)
    
    number_of_nodes = len(N)
    number_of_routes = int(sum(sum(g)) / 2)
    
    route_list = []
    
    gstar = numpy.zeros((number_of_routes, number_of_routes), dtype=int)
    
#    identify all routes from g
    
    for row in range(number_of_nodes):
        for col in range(row + 1, number_of_nodes):
            
            if g[row][col] == 1:
                
                route = inv_d[row] + '_' + inv_d[col]
                route_list.append(route)
    
    route_dict = dict(zip(range(number_of_routes), route_list))
    
#    node = route
#    link = two routes (nodes) have one endpoint in common
#    no own-links
    
    for route1 in range(number_of_routes):
        
        endpoint1 = route_dict[route1].split('_')[0]
        endpoint2 = route_dict[route1].split('_')[1]
            
        for route2 in range(route1 + 1, number_of_routes):
            
            link_found = (endpoint1 in route_dict[route2]) or (endpoint2 in route_dict[route2])
            
            if link_found:
                
                gstar[route1][route2] = 1
                gstar[route2][route1] = 1
                
#    walks of length 2

    g2 = g.dot(g)
    
#    by construction, number of walks of length 2 (i>j)
#    must equal number of links in gstar (i>j)    
    
    checksum = (sum(sum(numpy.triu(g2, 1))) != sum(sum(gstar)) / 2)
    
    if checksum:
        raise ArithmeticError('gstar checksum condition not satisfied')
    
    network_star = (invert_dict(route_dict), gstar)
    
    return network_star
Ejemplo n.º 4
0
            route_options['test'] = False
            route_options['constant_weight'] = False        
            route_options['erdos_renyi'] = False
            route_options['all_airports'] = all_airports
            route_options['highlight_nodes'] = True
            
            assert not (route_options['test'] and route_options['erdos_renyi'])

            density, Nbar, gbar = compute_density.density(year, quarter, carrier)
            
            if route_options['erdos_renyi']:
                                                
                print 'Erdos-Renyi',
                print 'density for carrier', carrier, 'is %.3f'%density
    
                inv_d = invert_dict.invert_dict(Nbar)
                
                horizontal()
            
            if route_options['erdos_renyi']:
                
                g = random_network.random_network(gbar, density)
            
            else:
                
                g = None
    
            route_options['Nbar'] = Nbar
            route_options['g'] = g
            
            if route_options['erdos_renyi']:
def add_network(year, quarter):

    src = '../input/data_' + str(year) + '_' + str(quarter) + '.bin'
    
    f = open(src, 'rb')
    data = cPickle.load(f)
    f.close()
    
    all_airlines = list_of_airlines(data)
    all_airports = list_of_airports(data)
    N = map_airports_code(all_airports)  
    
    DC_dict = {}
    CC_dict = {}
    BC_dict = {}
    EC_dict = {}
    
    density_dict = {}
    
    DCroute_dict = {}
    CCroute_dict = {}
    BCroute_dict = {}
    ECroute_dict = {}
    
    count = 0
    
    for carrier in all_airlines:
        
        print '\t' + carrier + ' (' + str(count + 1) + ' of ' + str(len(all_airlines)) + ')'
        
        DC_dict[carrier] = {}
        CC_dict[carrier] = {}
        BC_dict[carrier] = {}
        EC_dict[carrier] = {}
        
        DCroute_dict[carrier] = {}
        CCroute_dict[carrier] = {}
        BCroute_dict[carrier] = {}
        ECroute_dict[carrier] = {}
        
        g = adjacency_matrix(data, N, carrier)
        Nbar, gbar = remove_zeros(N, g)
        network = (N, g)
        network_bar = (Nbar, gbar)
        inv_d = invert_dict(Nbar)
        
        network_star = route_level_g(network_bar)        
        Nstar = network_star[0]
        gstar = network_star[1]
        inv_d_star = invert_dict(Nstar)
        
#        diameter_g = connected(gbar)
#        diameter_gstar = connected(gstar)
#        
#        print 'diameter g = ', diameter_g
#        print 'diameter gstar = ', diameter_gstar
        
        D, average_path_length = distance_matrix(gbar)
        
        if len(Nstar) > 1:
            Dstar, average_path_length_star = distance_matrix(gstar)
            
        density, Pd = density_degree_distribution(network_bar)
        
#        try:
#            
#            density_star, Pd_star = density_degree_distribution(network_star)
#            print density, density_star
#            
#        except ZeroDivisionError:
#            
#            pass
        
        density_dict[carrier] = density
            
        DC = degree_centrality(network_bar)
        DCroute = degree_centrality(network_star)
        
        CC = closeness_centrality(gbar)
        
        if len(Nstar) > 1:
            CCroute = closeness_centrality(gstar)
        
        eigenvector_map = centrality_eigenvector(gbar)
        eigenvector_map_route = centrality_eigenvector(gstar)
        
        if len(Nbar) > 2 and not numpy.isinf(average_path_length):
            BC = all_centrality_betweenness(D)
            
#        if len(Nstar) > 1 and not numpy.isinf(average_path_length_star):
#            BCroute = all_centrality_betweenness(Dstar)
    
        for key in DC:
            DC_dict[carrier][inv_d[key]] = DC[key]
        
        for key in DCroute:
            DCroute_dict[carrier][inv_d_star[key]] = DCroute[key]
            
        for key in CC:
            CC_dict[carrier][inv_d[key]] = CC[key]
        
        if len(Nstar) > 1:
            for key in CCroute:
                CCroute_dict[carrier][inv_d_star[key]] = CCroute[key]
        
        if len(Nbar) > 2 and not numpy.isinf(average_path_length):
            for key in BC:
                BC_dict[carrier][inv_d[key]] = BC[key]
        
        for key in eigenvector_map:
            EC_dict[carrier][inv_d[key]] = eigenvector_map[key]
            
        for key in eigenvector_map_route:
            ECroute_dict[carrier][inv_d_star[key]] = eigenvector_map_route[key]
        
        count += 1
    
    for i in data:
        origin = i.split('_')[0]
        dest = i.split('_')[1]
        route = origin + '_' + dest
        carrier = i.split('_')[2]
        
        # add minimum, maximum degree centrality variable    
        
        data[i]['mindegree'] = min(DC_dict[carrier][origin], DC_dict[carrier][dest])
        data[i]['maxdegree'] = max(DC_dict[carrier][origin], DC_dict[carrier][dest])
    
        # add route-level degree centrality variable    
        
        data[i]['routedegree'] = DCroute_dict[carrier][route]
    
        # add minimum, maximum closeness centrality variable    
        
        data[i]['mincloseness'] = min(CC_dict[carrier][origin], CC_dict[carrier][dest])
        data[i]['maxcloseness'] = max(CC_dict[carrier][origin], CC_dict[carrier][dest])

        # add route-level closeness centrality variable    
        
        try:
            
            data[i]['routecloseness'] = CCroute_dict[carrier][route]
            
        except KeyError:
            
            data[i]['routecloseness'] = 'NA'
        
        # add minimum, maximum betweenness centrality variable    
        
        try:
            
            data[i]['minbetweenness'] = min(BC_dict[carrier][origin], BC_dict[carrier][dest])
            data[i]['maxbetweenness'] = max(BC_dict[carrier][origin], BC_dict[carrier][dest])
            
        except KeyError:
            
            data[i]['minbetweenness'] = 'NA'
            data[i]['maxbetweenness'] = 'NA'
    
        # add minimum, maximum eigenvector centrality variable    
        
        data[i]['mineigenvector'] = min(EC_dict[carrier][origin], EC_dict[carrier][dest])
        data[i]['maxeigenvector'] = max(EC_dict[carrier][origin], EC_dict[carrier][dest])
    
        # add route-level eigenvector centrality variable    
        
        data[i]['routeeigenvector'] = ECroute_dict[carrier][route]
    
        # add density
    
        data[i]['density'] = density_dict[carrier]
    
    # save bin datafile to \temp (same filename as \input datafile)
        
    filename = '../temp/data_' + str(year) + '_' + str(quarter) + '.bin'
    
    f = open(filename, 'wb')
    cPickle.dump(data, f)
    f.close()
    
    return None
Ejemplo n.º 6
0
def add_network(year, quarter):

    test_output = True

    print '\nadd network measures to data_year_quarter.bin, save to \\temp'

    src = '..\\input\\data_' + str(year) + '_' + str(quarter) + '.bin'
    
    print '\nloading', src, '\n'
    
    f = open(src, 'rb')
    data = cPickle.load(f)
    f.close()
    
    all_airlines = list_of_airlines.list_of_airlines(data)    
    all_airports = list_of_airports.list_of_airports(data)
    
    N = map_airports_code.map_airports_code(all_airports)
    
    DC_dict = {}
    CC_dict = {}
    BC_dict = {}
    EC_dict = {}
    
    density_dict = {}
    diameter_dict = {}
    nodes_dict = {}
    edges_dict = {}
    
    DCroute_dict = {}
    CCroute_dict = {}
    BCroute_dict = {}
    ECroute_dict = {}
    
    count = 0
    
    for carrier in all_airlines:
        
#        test_condition = (carrier == 'AA' and year == 2013 and quarter == 3)         
        test_condition = True
        
        print '\t' + carrier + ' (' + str(count + 1) + ' of ' + str(len(all_airlines)) + ')'
        
        DC_dict[carrier] = {}
        CC_dict[carrier] = {}
        BC_dict[carrier] = {}
        EC_dict[carrier] = {}
        
        DCroute_dict[carrier] = {}
        CCroute_dict[carrier] = {}
        BCroute_dict[carrier] = {}
        ECroute_dict[carrier] = {}
        
        g = adjacency_matrix.adjacency_matrix(data, N, carrier)
        
        Nbar, gbar = remove_zeros.remove_zeros(N, g)
        
        number_nodes = len(gbar)
        number_edges = sum(sum(gbar)) / 2
        
        nodes_dict[carrier] = number_nodes
        edges_dict[carrier] = number_edges
        
        network = (N, g)
        network_bar = (Nbar, gbar)
        inv_d = invert_dict.invert_dict(Nbar)
        
        network_star = route_level_g.route_level_g(network_bar)        
        Nstar = network_star[0]
        gstar = network_star[1]
        inv_d_star = invert_dict.invert_dict(Nstar)
            
        try:
            
            diameter_g = connected.connected(gbar)
            
        except:
            
            diameter_g = 'NA'
            
#        diameter_gstar = connected.connected(gstar)
#        
#        print 'diameter g = ', diameter_g
#        print 'diameter gstar = ', diameter_gstar
        
        D, average_path_length = distance_matrix.distance_matrix(gbar)
        
        if len(Nstar) > 1:
            
            Dstar, average_path_length_star = distance_matrix.distance_matrix(gstar)
            
        density, Pd = density_degree_distribution.density_degree_distribution(network_bar)
        
#        try:
#            
#            density_star, Pd_star = density_degree_distribution.density_degree_distribution(network_star)
#            print density, density_star
#            
#        except ZeroDivisionError:
#            
#            pass

        diameter_dict[carrier] = diameter_g
            
        density_dict[carrier] = density
            
        DC = degree_centrality.degree_centrality(network_bar)
        DCroute = degree_centrality.degree_centrality(network_star)
            
        CC = closeness_centrality.closeness_centrality(gbar)
        
        if len(Nstar) > 1:
            CCroute = closeness_centrality.closeness_centrality(gstar)
        
        eigenvector_map = centrality_eigenvector.centrality_eigenvector(gbar)
        eigenvector_map_route = centrality_eigenvector.centrality_eigenvector(gstar)
            
        if len(Nbar) > 2 and not numpy.isinf(average_path_length):
            
            BC = centrality_betweenness.all_centrality_betweenness(D)
            
#        if len(Nstar) > 1 and not numpy.isinf(average_path_length_star):
#            
#            BCroute = centrality_betweenness.all_centrality_betweenness(Dstar)
        
        if test_output and test_condition:

            print '\nTEST OUTPUT'
            print 'carrier', carrier, 'year', year, 'quarter', quarter            
            print
#            print 'Nbar', Nbar
#            print 'gbar[2]', gbar[2] # Austin-Bergstrom International Airport            
            print 'number_nodes', number_nodes
            print 'number_edges', number_edges
#            print 'inv_d_star', inv_d_star
            number_nodes_star = len(gstar)
            number_edges_star = sum(sum(gstar)) / 2
#            print 'number_nodes_star', number_nodes_star
#            print 'number_edges_star', number_edges_star
            print 'diameter_g', diameter_g
#            print 'distance matrix D', D
            print 'average_path_length', average_path_length
            print 'density', density
#            print 'degree distribution Pd', Pd
#            print 'degree centrality DC', DC
                        
            print 'overall clustering', clustering_A.cl(gbar)
            print 'average clustering', clustering_average.cl_avg(gbar)
            
##            http://stackoverflow.com/questions/5927180/removing-data-from-a-numpy-array
#            iu = numpy.triu_indices(len(gbar), 1)
#            gbar_upper_triangle = gbar[iu]
#            X = numpy.ma.masked_equal(gbar_upper_triangle, 0)
#            gbar_upper_triangle_no_zeros = X.compressed()
            
            degree_by_node = numpy.sum(gbar ,axis=1)
            print 'mean degree', numpy.mean(degree_by_node)
            print 'median degree', numpy.median(degree_by_node)
            print 'degree correlation', degree_correlation.calculate(gbar)
            print
            
            max_DC = 0
            max_DC_i = None
            
            for i in DC:
                
                if DC[i] > max_DC:
                    
                    max_DC = DC[i]
                    max_DC_i = i
                    
#            print 'maximum degree centrality', max_DC, 'index', max_DC_i, 'node', inv_d[max_DC_i]
            
#            print 'closeness centrality CC', CC
            
            max_CC = 0
            max_CC_i = None
            
            for i in CC:
                
                if CC[i] > max_CC:
                    
                    max_CC = CC[i]
                    max_CC_i = i
                    
#            print 'maximum closeness centrality', max_CC, 'index', max_CC_i, 'node', inv_d[max_CC_i]

#            print 'eigenvector_map', eigenvector_map
            
            max_EC = 0
            max_EC_i = None
            
            for i in eigenvector_map:
                
                if eigenvector_map[i] > max_EC:
                    
                    max_EC = eigenvector_map[i]
                    max_EC_i = i
                    
#            print 'maximum eigenvector centrality', max_EC, 'index', max_EC_i, 'node', inv_d[max_EC_i]

            max_BC = 0
            max_BC_i = None
            
            for i in BC:
                
                if BC[i] > max_BC:
                    
                    max_BC = BC[i]
                    max_BC_i = i
                    
#            print 'maximum betweenness centrality', max_BC, 'index', max_BC_i, 'node', inv_d[max_BC_i]
           
#            raw_input()
            
        for key in DC:
            DC_dict[carrier][inv_d[key]] = DC[key]
        
        for key in DCroute:
            DCroute_dict[carrier][inv_d_star[key]] = DCroute[key]
            
        for key in CC:
            CC_dict[carrier][inv_d[key]] = CC[key]
        
        if len(Nstar) > 1:
            for key in CCroute:
                CCroute_dict[carrier][inv_d_star[key]] = CCroute[key]
        
        if len(Nbar) > 2 and not numpy.isinf(average_path_length):
            for key in BC:
                BC_dict[carrier][inv_d[key]] = BC[key]
        
        for key in eigenvector_map:
            EC_dict[carrier][inv_d[key]] = eigenvector_map[key]
            
        for key in eigenvector_map_route:
            ECroute_dict[carrier][inv_d_star[key]] = eigenvector_map_route[key]
        
        count += 1
    
    centrality_dicts = ({'betweenness': BC_dict, 'closeness': 
        CC_dict, 'degree': DC_dict, 'eigenvector': EC_dict})
    
    other_centrality = other_carrier_centrality.centrality(centrality_dicts)
    
#    print
#    print 'betweenness', BC_dict['AA']['DFW'], other_centrality['betweenness']['AA']['DFW']
#    print 'closeness', CC_dict['AA']['DFW'], other_centrality['closeness']['AA']['DFW']
#    print 'degree', DC_dict['AA']['DFW'], other_centrality['degree']['AA']['DFW']
#    print 'eigenvector', EC_dict['AA']['DFW'], other_centrality['eigenvector']['AA']['DFW']
    
    for i in data:
        
        origin = i.split('_')[0]
        dest = i.split('_')[1]
        route = origin + '_' + dest
        carrier = i.split('_')[2]
        
        # add minimum, maximum degree centrality variable    
        
        data[i]['mindegree'] = min(DC_dict[carrier][origin], DC_dict[carrier][dest])
        data[i]['maxdegree'] = max(DC_dict[carrier][origin], DC_dict[carrier][dest])
    
        # add origin, destination degree centrality variable
    
        data[i]['origindegree'] = DC_dict[carrier][origin]
        data[i]['destinationdegree'] = DC_dict[carrier][dest]
        
        # add route-level degree centrality variable    
        
        data[i]['routedegree'] = DCroute_dict[carrier][route]
    
        # add minimum, maximum closeness centrality variable    
        
        data[i]['mincloseness'] = min(CC_dict[carrier][origin], CC_dict[carrier][dest])
        data[i]['maxcloseness'] = max(CC_dict[carrier][origin], CC_dict[carrier][dest])

        # add origin, destination closeness centrality variable
    
        data[i]['origincloseness'] = CC_dict[carrier][origin]
        data[i]['destinationcloseness'] = CC_dict[carrier][dest]

        # add route-level closeness centrality variable    
        
        try:
            
            data[i]['routecloseness'] = CCroute_dict[carrier][route]
            
        except KeyError:
            
            data[i]['routecloseness'] = 'NA'
        
        # add minimum, maximum betweenness centrality variable    
        
        try:
            
            data[i]['minbetweenness'] = min(BC_dict[carrier][origin], BC_dict[carrier][dest])
            data[i]['maxbetweenness'] = max(BC_dict[carrier][origin], BC_dict[carrier][dest])
            
        except KeyError:
            
            data[i]['minbetweenness'] = 'NA'
            data[i]['maxbetweenness'] = 'NA'
    
        # add origin, destination betweenness centrality variable    
        
        try:
            
            data[i]['originbetweenness'] = BC_dict[carrier][origin]
            data[i]['destinationbetweenness'] = BC_dict[carrier][dest]
            
        except KeyError:
            
            data[i]['originbetweenness'] = 'NA'
            data[i]['destinationbetweenness'] = 'NA'
            
        # add minimum, maximum eigenvector centrality variable    
        
        data[i]['mineigenvector'] = min(EC_dict[carrier][origin], EC_dict[carrier][dest])
        data[i]['maxeigenvector'] = max(EC_dict[carrier][origin], EC_dict[carrier][dest])
    
        # add origin, destination eigenvector centrality variable    
        
        data[i]['origineigenvector'] = EC_dict[carrier][origin]
        data[i]['destinationeigenvector'] = EC_dict[carrier][dest]
        
        # add route-level eigenvector centrality variable    
        
        data[i]['routeeigenvector'] = ECroute_dict[carrier][route]
    
        # add density
    
        data[i]['density'] = density_dict[carrier]
        
        # add diameter
    
        data[i]['diameter'] = diameter_dict[carrier]
        
        # add number of nodes
    
        data[i]['nodes'] = nodes_dict[carrier]
        
        # add number of edges
    
        data[i]['edges'] = edges_dict[carrier]
    
    # save bin datafile to \temp (same filename as \input datafile)
        
    filename = '..\\temp\\data_' + str(year) + '_' + str(quarter) + '.bin'
    
    f = open(filename, 'wb')
    cPickle.dump(data, f)
    f.close()
    
    return None