Exemple #1
0
def remove_zeros(Nodes_dict,g):
    """FUNCTION	remove zero rows and columns from adjacency matrix g
        <(N, g) to (Nbar, gbar)>
		INPUT	adjacency matrix g
		INPUT	dictionary of nodes (output from Task 5)
		DETERMINE	integer n (the dimension of g)
		SET	an empty list of unused airports <unused = []>
		SET	airport_list equal to keys of nodes dictionary
		SORT  airport_list alphabetically
		SET	new_airport_list equal to an empty list
		FOR	row number i in 0,1,...,n-1 <range(n)>
			IF	row g[i] only contains zeros
			<numpy.array_equal(g[i], numpy.zeros(n))>
			THEN
				APPEND	row number i to unused list
			ELSE <the two IF / ELSE alternatives are exclusive>
				APPEND	airport_list[i] to new_airport_list
			ENDIF
		ENDFOR
		IF	unused list not empty
		THEN
			DELETE	all zero rows from g
					<g = numpy.delete(g, tuple(unused), axis=0)>
			DELETE	all zero columns from g
					<g = numpy.delete(g, tuple(unused), axis=1)>
		ENDIF
		COMPUTE	dictionary that maps airport codes in new_airport_list to numbers
				<call Task 5 function with new_airport_list, output newnodes>
		RETURN	g <this has been modified to gbar>
		RETURN	newnodes dictionary
	ENDFUNCTION"""
 
    n = len(Nodes_dict)
    airports_list = []
    unused_airports = []
    new_airports_list=[]
    
    for keys in Nodes_dict.keys():
        
        airports_list.append(keys)

    airports_list.sort()

    for i in range(n):
        
        if numpy.array_equal(g[i], numpy.zeros(n)):
            unused_airports.append(i)
        else:
            new_airports_list.append(airports_list[i])
    
    if unused_airports != []:
        
        gbar = numpy.delete(g, tuple(unused_airports), axis=0)
        gbar = numpy.delete(gbar, tuple(unused_airports), axis=1)
    
    Nbar = map_airports_code.map_airports_code(new_airports_list)
    
    return Nbar, gbar
    
def density(year, quarter, carrier):

    src = '..\\input\\data_' + str(year) + '_' + str(quarter) + '.bin'
    
    f = open(src, 'rb')
    data = cPickle.load(f)
    f.close()
    
    all_airlines = list_of_airlines.list_of_airlines(data)
    
    assert (carrier in all_airlines), 'carrier not found'
    
    all_airports = list_of_airports.list_of_airports(data)
    
    N = map_airports_code.map_airports_code(all_airports)
    
    g = adjacency_matrix.adjacency_matrix(data, N, carrier)
    
    Nbar, gbar = remove_zeros.remove_zeros(N, g)
    network_bar = (Nbar, gbar)
        
    density, Pd = density_degree_distribution.density_degree_distribution(network_bar)      
    
    return density, Nbar, gbar
def add_network(year, quarter):

    src = '../input/data_' + str(year) + '_' + str(quarter) + '.bin'
    
    f = open(src, 'rb')
    data = cPickle.load(f)
    f.close()
    
    all_airlines = list_of_airlines(data)
    all_airports = list_of_airports(data)
    N = map_airports_code(all_airports)  
    
    DC_dict = {}
    CC_dict = {}
    BC_dict = {}
    EC_dict = {}
    
    density_dict = {}
    
    DCroute_dict = {}
    CCroute_dict = {}
    BCroute_dict = {}
    ECroute_dict = {}
    
    count = 0
    
    for carrier in all_airlines:
        
        print '\t' + carrier + ' (' + str(count + 1) + ' of ' + str(len(all_airlines)) + ')'
        
        DC_dict[carrier] = {}
        CC_dict[carrier] = {}
        BC_dict[carrier] = {}
        EC_dict[carrier] = {}
        
        DCroute_dict[carrier] = {}
        CCroute_dict[carrier] = {}
        BCroute_dict[carrier] = {}
        ECroute_dict[carrier] = {}
        
        g = adjacency_matrix(data, N, carrier)
        Nbar, gbar = remove_zeros(N, g)
        network = (N, g)
        network_bar = (Nbar, gbar)
        inv_d = invert_dict(Nbar)
        
        network_star = route_level_g(network_bar)        
        Nstar = network_star[0]
        gstar = network_star[1]
        inv_d_star = invert_dict(Nstar)
        
#        diameter_g = connected(gbar)
#        diameter_gstar = connected(gstar)
#        
#        print 'diameter g = ', diameter_g
#        print 'diameter gstar = ', diameter_gstar
        
        D, average_path_length = distance_matrix(gbar)
        
        if len(Nstar) > 1:
            Dstar, average_path_length_star = distance_matrix(gstar)
            
        density, Pd = density_degree_distribution(network_bar)
        
#        try:
#            
#            density_star, Pd_star = density_degree_distribution(network_star)
#            print density, density_star
#            
#        except ZeroDivisionError:
#            
#            pass
        
        density_dict[carrier] = density
            
        DC = degree_centrality(network_bar)
        DCroute = degree_centrality(network_star)
        
        CC = closeness_centrality(gbar)
        
        if len(Nstar) > 1:
            CCroute = closeness_centrality(gstar)
        
        eigenvector_map = centrality_eigenvector(gbar)
        eigenvector_map_route = centrality_eigenvector(gstar)
        
        if len(Nbar) > 2 and not numpy.isinf(average_path_length):
            BC = all_centrality_betweenness(D)
            
#        if len(Nstar) > 1 and not numpy.isinf(average_path_length_star):
#            BCroute = all_centrality_betweenness(Dstar)
    
        for key in DC:
            DC_dict[carrier][inv_d[key]] = DC[key]
        
        for key in DCroute:
            DCroute_dict[carrier][inv_d_star[key]] = DCroute[key]
            
        for key in CC:
            CC_dict[carrier][inv_d[key]] = CC[key]
        
        if len(Nstar) > 1:
            for key in CCroute:
                CCroute_dict[carrier][inv_d_star[key]] = CCroute[key]
        
        if len(Nbar) > 2 and not numpy.isinf(average_path_length):
            for key in BC:
                BC_dict[carrier][inv_d[key]] = BC[key]
        
        for key in eigenvector_map:
            EC_dict[carrier][inv_d[key]] = eigenvector_map[key]
            
        for key in eigenvector_map_route:
            ECroute_dict[carrier][inv_d_star[key]] = eigenvector_map_route[key]
        
        count += 1
    
    for i in data:
        origin = i.split('_')[0]
        dest = i.split('_')[1]
        route = origin + '_' + dest
        carrier = i.split('_')[2]
        
        # add minimum, maximum degree centrality variable    
        
        data[i]['mindegree'] = min(DC_dict[carrier][origin], DC_dict[carrier][dest])
        data[i]['maxdegree'] = max(DC_dict[carrier][origin], DC_dict[carrier][dest])
    
        # add route-level degree centrality variable    
        
        data[i]['routedegree'] = DCroute_dict[carrier][route]
    
        # add minimum, maximum closeness centrality variable    
        
        data[i]['mincloseness'] = min(CC_dict[carrier][origin], CC_dict[carrier][dest])
        data[i]['maxcloseness'] = max(CC_dict[carrier][origin], CC_dict[carrier][dest])

        # add route-level closeness centrality variable    
        
        try:
            
            data[i]['routecloseness'] = CCroute_dict[carrier][route]
            
        except KeyError:
            
            data[i]['routecloseness'] = 'NA'
        
        # add minimum, maximum betweenness centrality variable    
        
        try:
            
            data[i]['minbetweenness'] = min(BC_dict[carrier][origin], BC_dict[carrier][dest])
            data[i]['maxbetweenness'] = max(BC_dict[carrier][origin], BC_dict[carrier][dest])
            
        except KeyError:
            
            data[i]['minbetweenness'] = 'NA'
            data[i]['maxbetweenness'] = 'NA'
    
        # add minimum, maximum eigenvector centrality variable    
        
        data[i]['mineigenvector'] = min(EC_dict[carrier][origin], EC_dict[carrier][dest])
        data[i]['maxeigenvector'] = max(EC_dict[carrier][origin], EC_dict[carrier][dest])
    
        # add route-level eigenvector centrality variable    
        
        data[i]['routeeigenvector'] = ECroute_dict[carrier][route]
    
        # add density
    
        data[i]['density'] = density_dict[carrier]
    
    # save bin datafile to \temp (same filename as \input datafile)
        
    filename = '../temp/data_' + str(year) + '_' + str(quarter) + '.bin'
    
    f = open(filename, 'wb')
    cPickle.dump(data, f)
    f.close()
    
    return None
def add_network(year, quarter):

    test_output = True

    print '\nadd network measures to data_year_quarter.bin, save to \\temp'

    src = '..\\input\\data_' + str(year) + '_' + str(quarter) + '.bin'
    
    print '\nloading', src, '\n'
    
    f = open(src, 'rb')
    data = cPickle.load(f)
    f.close()
    
    all_airlines = list_of_airlines.list_of_airlines(data)    
    all_airports = list_of_airports.list_of_airports(data)
    
    N = map_airports_code.map_airports_code(all_airports)
    
    DC_dict = {}
    CC_dict = {}
    BC_dict = {}
    EC_dict = {}
    
    density_dict = {}
    diameter_dict = {}
    nodes_dict = {}
    edges_dict = {}
    
    DCroute_dict = {}
    CCroute_dict = {}
    BCroute_dict = {}
    ECroute_dict = {}
    
    count = 0
    
    for carrier in all_airlines:
        
#        test_condition = (carrier == 'AA' and year == 2013 and quarter == 3)         
        test_condition = True
        
        print '\t' + carrier + ' (' + str(count + 1) + ' of ' + str(len(all_airlines)) + ')'
        
        DC_dict[carrier] = {}
        CC_dict[carrier] = {}
        BC_dict[carrier] = {}
        EC_dict[carrier] = {}
        
        DCroute_dict[carrier] = {}
        CCroute_dict[carrier] = {}
        BCroute_dict[carrier] = {}
        ECroute_dict[carrier] = {}
        
        g = adjacency_matrix.adjacency_matrix(data, N, carrier)
        
        Nbar, gbar = remove_zeros.remove_zeros(N, g)
        
        number_nodes = len(gbar)
        number_edges = sum(sum(gbar)) / 2
        
        nodes_dict[carrier] = number_nodes
        edges_dict[carrier] = number_edges
        
        network = (N, g)
        network_bar = (Nbar, gbar)
        inv_d = invert_dict.invert_dict(Nbar)
        
        network_star = route_level_g.route_level_g(network_bar)        
        Nstar = network_star[0]
        gstar = network_star[1]
        inv_d_star = invert_dict.invert_dict(Nstar)
            
        try:
            
            diameter_g = connected.connected(gbar)
            
        except:
            
            diameter_g = 'NA'
            
#        diameter_gstar = connected.connected(gstar)
#        
#        print 'diameter g = ', diameter_g
#        print 'diameter gstar = ', diameter_gstar
        
        D, average_path_length = distance_matrix.distance_matrix(gbar)
        
        if len(Nstar) > 1:
            
            Dstar, average_path_length_star = distance_matrix.distance_matrix(gstar)
            
        density, Pd = density_degree_distribution.density_degree_distribution(network_bar)
        
#        try:
#            
#            density_star, Pd_star = density_degree_distribution.density_degree_distribution(network_star)
#            print density, density_star
#            
#        except ZeroDivisionError:
#            
#            pass

        diameter_dict[carrier] = diameter_g
            
        density_dict[carrier] = density
            
        DC = degree_centrality.degree_centrality(network_bar)
        DCroute = degree_centrality.degree_centrality(network_star)
            
        CC = closeness_centrality.closeness_centrality(gbar)
        
        if len(Nstar) > 1:
            CCroute = closeness_centrality.closeness_centrality(gstar)
        
        eigenvector_map = centrality_eigenvector.centrality_eigenvector(gbar)
        eigenvector_map_route = centrality_eigenvector.centrality_eigenvector(gstar)
            
        if len(Nbar) > 2 and not numpy.isinf(average_path_length):
            
            BC = centrality_betweenness.all_centrality_betweenness(D)
            
#        if len(Nstar) > 1 and not numpy.isinf(average_path_length_star):
#            
#            BCroute = centrality_betweenness.all_centrality_betweenness(Dstar)
        
        if test_output and test_condition:

            print '\nTEST OUTPUT'
            print 'carrier', carrier, 'year', year, 'quarter', quarter            
            print
#            print 'Nbar', Nbar
#            print 'gbar[2]', gbar[2] # Austin-Bergstrom International Airport            
            print 'number_nodes', number_nodes
            print 'number_edges', number_edges
#            print 'inv_d_star', inv_d_star
            number_nodes_star = len(gstar)
            number_edges_star = sum(sum(gstar)) / 2
#            print 'number_nodes_star', number_nodes_star
#            print 'number_edges_star', number_edges_star
            print 'diameter_g', diameter_g
#            print 'distance matrix D', D
            print 'average_path_length', average_path_length
            print 'density', density
#            print 'degree distribution Pd', Pd
#            print 'degree centrality DC', DC
                        
            print 'overall clustering', clustering_A.cl(gbar)
            print 'average clustering', clustering_average.cl_avg(gbar)
            
##            http://stackoverflow.com/questions/5927180/removing-data-from-a-numpy-array
#            iu = numpy.triu_indices(len(gbar), 1)
#            gbar_upper_triangle = gbar[iu]
#            X = numpy.ma.masked_equal(gbar_upper_triangle, 0)
#            gbar_upper_triangle_no_zeros = X.compressed()
            
            degree_by_node = numpy.sum(gbar ,axis=1)
            print 'mean degree', numpy.mean(degree_by_node)
            print 'median degree', numpy.median(degree_by_node)
            print 'degree correlation', degree_correlation.calculate(gbar)
            print
            
            max_DC = 0
            max_DC_i = None
            
            for i in DC:
                
                if DC[i] > max_DC:
                    
                    max_DC = DC[i]
                    max_DC_i = i
                    
#            print 'maximum degree centrality', max_DC, 'index', max_DC_i, 'node', inv_d[max_DC_i]
            
#            print 'closeness centrality CC', CC
            
            max_CC = 0
            max_CC_i = None
            
            for i in CC:
                
                if CC[i] > max_CC:
                    
                    max_CC = CC[i]
                    max_CC_i = i
                    
#            print 'maximum closeness centrality', max_CC, 'index', max_CC_i, 'node', inv_d[max_CC_i]

#            print 'eigenvector_map', eigenvector_map
            
            max_EC = 0
            max_EC_i = None
            
            for i in eigenvector_map:
                
                if eigenvector_map[i] > max_EC:
                    
                    max_EC = eigenvector_map[i]
                    max_EC_i = i
                    
#            print 'maximum eigenvector centrality', max_EC, 'index', max_EC_i, 'node', inv_d[max_EC_i]

            max_BC = 0
            max_BC_i = None
            
            for i in BC:
                
                if BC[i] > max_BC:
                    
                    max_BC = BC[i]
                    max_BC_i = i
                    
#            print 'maximum betweenness centrality', max_BC, 'index', max_BC_i, 'node', inv_d[max_BC_i]
           
#            raw_input()
            
        for key in DC:
            DC_dict[carrier][inv_d[key]] = DC[key]
        
        for key in DCroute:
            DCroute_dict[carrier][inv_d_star[key]] = DCroute[key]
            
        for key in CC:
            CC_dict[carrier][inv_d[key]] = CC[key]
        
        if len(Nstar) > 1:
            for key in CCroute:
                CCroute_dict[carrier][inv_d_star[key]] = CCroute[key]
        
        if len(Nbar) > 2 and not numpy.isinf(average_path_length):
            for key in BC:
                BC_dict[carrier][inv_d[key]] = BC[key]
        
        for key in eigenvector_map:
            EC_dict[carrier][inv_d[key]] = eigenvector_map[key]
            
        for key in eigenvector_map_route:
            ECroute_dict[carrier][inv_d_star[key]] = eigenvector_map_route[key]
        
        count += 1
    
    centrality_dicts = ({'betweenness': BC_dict, 'closeness': 
        CC_dict, 'degree': DC_dict, 'eigenvector': EC_dict})
    
    other_centrality = other_carrier_centrality.centrality(centrality_dicts)
    
#    print
#    print 'betweenness', BC_dict['AA']['DFW'], other_centrality['betweenness']['AA']['DFW']
#    print 'closeness', CC_dict['AA']['DFW'], other_centrality['closeness']['AA']['DFW']
#    print 'degree', DC_dict['AA']['DFW'], other_centrality['degree']['AA']['DFW']
#    print 'eigenvector', EC_dict['AA']['DFW'], other_centrality['eigenvector']['AA']['DFW']
    
    for i in data:
        
        origin = i.split('_')[0]
        dest = i.split('_')[1]
        route = origin + '_' + dest
        carrier = i.split('_')[2]
        
        # add minimum, maximum degree centrality variable    
        
        data[i]['mindegree'] = min(DC_dict[carrier][origin], DC_dict[carrier][dest])
        data[i]['maxdegree'] = max(DC_dict[carrier][origin], DC_dict[carrier][dest])
    
        # add origin, destination degree centrality variable
    
        data[i]['origindegree'] = DC_dict[carrier][origin]
        data[i]['destinationdegree'] = DC_dict[carrier][dest]
        
        # add route-level degree centrality variable    
        
        data[i]['routedegree'] = DCroute_dict[carrier][route]
    
        # add minimum, maximum closeness centrality variable    
        
        data[i]['mincloseness'] = min(CC_dict[carrier][origin], CC_dict[carrier][dest])
        data[i]['maxcloseness'] = max(CC_dict[carrier][origin], CC_dict[carrier][dest])

        # add origin, destination closeness centrality variable
    
        data[i]['origincloseness'] = CC_dict[carrier][origin]
        data[i]['destinationcloseness'] = CC_dict[carrier][dest]

        # add route-level closeness centrality variable    
        
        try:
            
            data[i]['routecloseness'] = CCroute_dict[carrier][route]
            
        except KeyError:
            
            data[i]['routecloseness'] = 'NA'
        
        # add minimum, maximum betweenness centrality variable    
        
        try:
            
            data[i]['minbetweenness'] = min(BC_dict[carrier][origin], BC_dict[carrier][dest])
            data[i]['maxbetweenness'] = max(BC_dict[carrier][origin], BC_dict[carrier][dest])
            
        except KeyError:
            
            data[i]['minbetweenness'] = 'NA'
            data[i]['maxbetweenness'] = 'NA'
    
        # add origin, destination betweenness centrality variable    
        
        try:
            
            data[i]['originbetweenness'] = BC_dict[carrier][origin]
            data[i]['destinationbetweenness'] = BC_dict[carrier][dest]
            
        except KeyError:
            
            data[i]['originbetweenness'] = 'NA'
            data[i]['destinationbetweenness'] = 'NA'
            
        # add minimum, maximum eigenvector centrality variable    
        
        data[i]['mineigenvector'] = min(EC_dict[carrier][origin], EC_dict[carrier][dest])
        data[i]['maxeigenvector'] = max(EC_dict[carrier][origin], EC_dict[carrier][dest])
    
        # add origin, destination eigenvector centrality variable    
        
        data[i]['origineigenvector'] = EC_dict[carrier][origin]
        data[i]['destinationeigenvector'] = EC_dict[carrier][dest]
        
        # add route-level eigenvector centrality variable    
        
        data[i]['routeeigenvector'] = ECroute_dict[carrier][route]
    
        # add density
    
        data[i]['density'] = density_dict[carrier]
        
        # add diameter
    
        data[i]['diameter'] = diameter_dict[carrier]
        
        # add number of nodes
    
        data[i]['nodes'] = nodes_dict[carrier]
        
        # add number of edges
    
        data[i]['edges'] = edges_dict[carrier]
    
    # save bin datafile to \temp (same filename as \input datafile)
        
    filename = '..\\temp\\data_' + str(year) + '_' + str(quarter) + '.bin'
    
    f = open(filename, 'wb')
    cPickle.dump(data, f)
    f.close()
    
    return None