def Construct_WT_ntwrkX_Modularity(year): dirPre = dm.set_dir_tree() ## (1) Load country names that align with 3 letter acronyms used in origin destination file countriesLL = dm.load_country_lat_lon_csv(dirPre) num_countries = countriesLL.shape[0] # (2) Obtain accurate directory locations for both input and output files. dirIn = str(dirPre + 'adjacency_ntwrk_npz_files/') dirOut = str(dirPre + 'modularity_ntwrkX_npz_files/') ## (4) First the adjacency matrix is loaded from the adj_npz. # Then the adj_matrix is converted into a NetworkX DiGraph object. # Finally the DiGraph is used to create a modularity matrix, using the built in NetworkX # modularity_matrix function. adj_npz = dm.load_adjacency_npz_year(dirIn, year, num_countries) adj_graph = nx.from_numpy_matrix(adj_npz[0], create_using=nx.DiGraph()) mod_mtrx = nx.directed_modularity_matrix(adj_graph) np.savez(str(dirOut + 'modularity_ntwrkX_' + str(year) + '_' + str(num_countries) + 'countries.npz'), netwrk=mod_mtrx) return mod_mtrx
def __init__(self, year, method, flg_sym, norm="norm", is_gcc=False): self.year = year if flg_sym: self.flg_sym = 'sym' else: self.flg_sym = '' self.G = nm.construct_ntwrkX_Graph(self.dirPre, self.year, self.flg_sym) self.gcc = max(nx.connected_components(self.G), key=len) self.num_gcc = len(self.gcc) self.trade_ntwrk_graph, self.imports, self.exports =\ dm.load_adjacency_npz_year(self.dirIn, year, self.num_countries, self.flg_sym) assert np.any(np.sum(self.trade_ntwrk_graph, axis=0) == self.imports), 'Imports are Weird' assert np.any(np.sum(self.trade_ntwrk_graph, axis=1) == self.exports), 'Exports are Weird' if method is "Laplacian": print('hi') self.trade_ntwrk = nm.networkX_laplacian(self.G, self.flg_sym, norm) else: self.trade_ntwrk = nm.construct_ntwrk_method( self.trade_ntwrk_graph, method) if is_gcc: self.trade_ntwrk = nm.convert_adjacency_to_giant_component( self.G, self.trade_ntwrk) self.labels = None
countriesLL = dm.load_country_lat_lon_csv(dirPre) num_countries = countriesLL.shape[0] ## (2) Load in names and codes for types of goods traded #goods = dm.load_products(dirPre) # (4). Loop through, load and plot all previously saved adjacency matrix files. years = range( 1962, 2015) # np.array([1962]) # years for which we have world trade data. for y in years: print(y) dirIn = str(dirPre + 'adjacency_ntwrk_npz_files/') try: trade_ntwrk, imports, exports = dm.load_adjacency_npz_year( dirIn, y, num_countries) except: print('Can not find adjacency file') continue # (4). Check that Imports are just the column sums and Exports are row sums assert np.any(np.sum(trade_ntwrk, axis=0) == imports), 'Imports are Weird' assert np.any(np.sum(trade_ntwrk, axis=1) == exports), 'Exports are Weird' #print('Everything checks out with sums, imports and exports.') # (5c). Compute Normalized Laplacian (For Asymmetric, use_out_degree can use imports or exports) trade_ntwrk = nm.construct_ntwrk_method(trade_ntwrk, method) # (5b). Find Cuthill-McKee reordering of Adjacency Matrix (requires sparse matrices and scipy's sparse library). # Q: Does it make sense to do this on Laplacian & Modularity? How do things change? if True:
continent = np.append(continent, reg[0:2]) conts = set(continent) # this is a 'set object' (all the different countries) conts = list(conts) # convert 'set object' to a list that I can iterate over. conts = np.sort(conts) node_colors_by_continent = np.zeros(len(continent)) for i in range(0, len(conts)): node_colors_by_continent[np.array(continent == conts[i])] = i # (-) Loop through different years and compute modularity using NetworkX for year in range(1962, 1963): ## (-) First the adjacency matrix is loaded from the adj_npz. # Then the adj_matrix is converted into a NetworkX DiGraph object. # Finally the DiGraph is used to create a laplacian matrix, using the NetworkX # laplacian_matrix functions, selected in our network manipulation suit. adj_npz, _, _ = dm.load_adjacency_npz_year(dirIn, year, num_countries, sym) adj_graph = nm.construct_ntwrkX_Graph(dirPre, year, sym) lap_ntwkX = nm.networkX_laplacian(adj_graph, sym, norm, weight) # (-) Uncomment these lines to check if Our implemented modularity matches results of NetworkX version. # dirLap = str(dirPre + 'laplacian_ntwrk_npz_files/') # lap_npz = dm.load_laplacian_npz_year(dirLap, year, num_countries, sym) # diff = lap_ntwkX - lap_npz # claim = (diff).any() # print('Claim differences in 2 laplacian calcs.', claim ) # if claim: # plt.imshow(diff) # plt.colorbar() # plt.show() if False: np.savez(str(dirOut + file_name + str(year) + '_' +
def construct_ntwrkX_Graph(dirPre, year, sym): ## (1). Make a NetworkX Graph Object from an Adjacency matrix. It is a directed network with edge weights # equal to the amount of goods sent from country i to country j (or vice versa). Nodes are tagged # information about country name, Lat & Lon, continent, total imports & total exports. The resulting # Graph object will be saved as a gpickle file. if (sym=='sym'): G = nx.Graph() # create the weighted undirected (ie. symmetric) graph. else: G = nx.DiGraph() # create the weighted directed graph. # ---------------------------------------------------------------------------------------------------------- # (A). Get latitude and longitude information from a pickle UTF8 file created from a csv and set them # construct nodes with attributes of name. countriesLL = dm.load_country_lat_lon_csv(dirPre) num_countries = countriesLL.shape[0] country_indx = list(range(num_countries)) continent = [countriesLL['id'][row][:2] for row in range(num_countries)] countryId3 = [countriesLL['id_3char'][row] for row in range(num_countries)] countryName = [countriesLL['name'][row] for row in range(num_countries)] LonLat = [ (countriesLL['longitude'][row], countriesLL['latitude'][row]) for row in range(num_countries)] dirAdj = str( dirPre + 'adjacency_ntwrk_npz_files/' ) try: # load in adjacency for a given year. A, I, E = dm.load_adjacency_npz_year(dirAdj, year, num_countries, sym) except: print('Adjacency File not found.') return for a in range(num_countries): G.add_nodes_from( [country_indx[a]], LatLon=LonLat[a], countryId3=countryId3[a], countryName=countryName[a], continent=continent[a], imports=I[a], exports=E[a] ) for b in range(num_countries): if A[a, b] > 0: G.add_weighted_edges_from( [(a, b, A[a, b])] ) # create the weighted directed graph. # # Note: To access data for each node or edge, do: # G.nodes.data('LatLon')[0] # G.nodes.data('countryId3')[0] # G.nodes.data('countryName')[0] # G.nodes.data('continent')[0] # G.nodes.data('imports')[0] # G.nodes.data('exports')[0] # G.edges.data('weight') # (C). Save a gpickle file containing the networkAdj constructed in nx.write_gpickle(G, str(dirPre + 'adjacency_ntwrkX_pickle_files/' + sym + 'trade_ntwrkX_' + str(year) + '.gpickle')) # # (D). Save a gexf file containing the networkAdj to use with Gephi toolbox # nx.write_gexf( G, str(dirPre + 'adjacency_gexf_network_files/' + sym + 'trade_ntwrkX_' + str(year) + '.gexf'), encoding='utf-8', prettyprint=True, version='1.2draft') # # # Note: Graph Data File Used for Gephi not working currently. Come back. return G
if flg_symmetric_network: sym = 'sym' else: sym = '' # (4). Loop through, load and plot all previously saved adjacency matrix files. years = range( 1962, 1963) # years for which we have world trade data. range(1962,2015) for y in years: print(y) dirIn = str(dirPre + 'adjacency_ntwrk_npz_files/') try: trade_ntwrk, imports, exports = dm.load_adjacency_npz_year( dirIn, y, num_countries, flg_symmetric_network) except: print('Can not find adjacency file') continue # # (3). Plot adjacency matrix and total imports / exports from each country. # if False: # fig = plt.figure(figsize=(15,15)) # plt.subplot(2,1,1) # plt.imshow( np.log(trade_ntwrk), interpolation='none' ) # plt.title( str('Global Trade Network in ' + str(y)) ) # plt.colorbar(fraction=0.046, pad=0.04) # plt.xticks( range(0,num_countries), countriesLL.id_3char[0:num_countries] ) # plt.yticks( range(0,num_countries), countriesLL.id_3char[0:num_countries] ) # plt.subplot(2,2,3)
import numpy as np import pandas as pd import sys import networkx as nx import utils.data_manipulation as dm # utils is a package I am putting together of useful functions from utils.network_manipulation import modularity dirPre = dm.set_dir_tree() dirIn = str(dirPre + 'adjacency_ntwrk_npz_files/') adj = dm.load_adjacency_npz_year(dirIn, 1969, 263) adj_mtrx = adj[0] adj_graph = nx.from_numpy_matrix(adj_mtrx, create_using=nx.DiGraph()) nodelist = list(adj_graph) A = nx.to_scipy_sparse_matrix(adj_graph, nodelist=nodelist, weight='weight', format='csr') A_prime = nx.to_numpy_matrix(adj_graph) if (A.todense() == adj_mtrx).all(): print('Scipy_Sparse is equal to our npz') else: print('nx.to_scipy_sparce_matrix is different than npz adj') """ Key note for nx.directed_modularity_matrix use. This uses the nx.to_scipy_sparce_matrix call, which creates a csr matrix. To preserve the adjacency matrix weights, we need to set nodelist and weight parameters as seen below. nodelist is just list(G) and weight is the string 'weight.'
#plt.legend(trade_ntwrkG.nodes()) figG.savefig(str( '../out_figures/trade_maps/' + str(y) + '_trade_map.png' ), bbox_inches='tight') #plt.show() plt.close(figG) #------------------------------------------------------------------------------------------------------------ # (6). Imshow and save figure of Adjacency Matrix - another way to visualize changes in network # # if flg_imshow_adjacency: trade_ntwrkA, imports, exports = dm.load_adjacency_npz_year(str( dirPre + 'adjacency_ntwrk_npz_files/'), y, num_countries) #trade_ntwrkA = out[0] figA = plt.figure(figsize=(15,15)) plt.imshow(np.log10(trade_ntwrkA)) plt.colorbar() pf.plot_labels(figA, str( 'Global Trade Adjacency : ' + str(y) ), 'Country', 'Country', 20) # title, xlabel, ylabel and plot ticks formatting figA.savefig(str( dirPre + 'out_figures/adjacency_mats/' + str(y) + '_adj_mat.png' )) #plt.show() plt.close(figA)