def Construct_WT_ntwrkX_Modularity(year): dirPre = dm.set_dir_tree() ## (1) Load country names that align with 3 letter acronyms used in origin destination file countriesLL = dm.load_country_lat_lon_csv(dirPre) num_countries = countriesLL.shape[0] # (2) Obtain accurate directory locations for both input and output files. dirIn = str(dirPre + 'adjacency_ntwrk_npz_files/') dirOut = str(dirPre + 'modularity_ntwrkX_npz_files/') ## (4) First the adjacency matrix is loaded from the adj_npz. # Then the adj_matrix is converted into a NetworkX DiGraph object. # Finally the DiGraph is used to create a modularity matrix, using the built in NetworkX # modularity_matrix function. adj_npz = dm.load_adjacency_npz_year(dirIn, year, num_countries) adj_graph = nx.from_numpy_matrix(adj_npz[0], create_using=nx.DiGraph()) mod_mtrx = nx.directed_modularity_matrix(adj_graph) np.savez(str(dirOut + 'modularity_ntwrkX_' + str(year) + '_' + str(num_countries) + 'countries.npz'), netwrk=mod_mtrx) return mod_mtrx
def Construct_WTnet_Adjacency(year): # print( sys.argv ) # print( sys.argv[0] ) # print( sys.argv[1] ) dirPre = dm.set_dir_tree() ## (1) Load country names that align with 3 letter acronyms used in origin destination file countriesLL = dm.load_country_lat_lon_csv(dirPre) ## (2) Load in names and codes for types of goods traded # goods = dm.load_products(dirPre) ## (3) Load data file with quantities of goods traded between pairs of countries and Chop the big tsv file (~4GB and 700M rows) into # smaller ones for each year so I can handle them more efficiently. Python goes into big memory swap when using the whole thing. # # Dont need to run this every time. Only once in fact. if False: dirIn = str(dirPre + 'MIT_WT_datafiles/') dirOut = str(dirPre + 'origin_destination_csvs_byYear/') file = 'year_origin_destination_sitc_rev2.tsv' # dm.extract_year_from_origin_destination_csv(dirIn, dirOut, file) ## (4) Construct directed network (in an Adjacency matrix form) that shows goods shipped to and from each pair of # countries. There are two possible networks we can build in the data. This section convinces me they are equivalent. # (a). Exports from Origin to Destination. (trade_ntwrkExp) # (b). Imports from Origin to Destination. (trade_ntwrkImp) # # While this technically works, it is very slow. How to speed it up? # Dont need to run this every time. Only once in fact. if True: dirIn = str(dirPre + 'origin_destination_csvs_byYear/') dirOut = str(dirPre + 'adjacency_ntwrk_npz_files/') fileTag = '_origin_destination_sitc_rev2.csv' #year = {This Variable Passed into Construct_WTnet_Adjacency function} #year = range(1962,2014) # this is input into function now as sys.argv[0] ! # # try: # num_countries = np.size(countries,0) # except: # num_countries = 261 # hard coded if countries vector has not been loaded in. # print(countries) # dm.construct_adjacency_from_year_origin_destination_csv(dirIn, dirOut, fileTag, year, countriesLL)
# import matplotlib.cm as cm # import pandas as pd # from mpl_toolkits.mplot3d import Axes3D # import time # import os # import csv # import sys # import scipy as sp # library to deal with sparse graphs for Cuthill-Mckee and Laplacian #------------------------------------------------------------------------------------------------------------ # Load in a network for a specific year dirPre = dm.set_dir_tree() year = np.array(1962) flg_sym = True G = nm.construct_ntwrkX_Graph(dirPre=dirPre, year=year, flg_sym=flg_sym) #------------------------------------------------------------------------------------------------------------ # Explore 'community' module # Compute best partition and dendrogram using Louvain algorithm in 'community' module res = [0.1, 0.5, 1, 3, 5, 7, 10] # different resolution values for partitioning algorithms q_bp = np.zeros_like(res) q_dend = np.zeros((3, len(res))) coverage = np.zeros_like(res)
class Clustering: dirPre = dm.set_dir_tree() dirIn = str(dirPre + 'adjacency_ntwrk_npz_files/') countriesLL = dm.load_country_lat_lon_csv( dirPre) # country names & ids and Lat,Lon information. num_countries = countriesLL.shape[0] def __init__(self, year, method, flg_sym, norm="norm", is_gcc=False): self.year = year if flg_sym: self.flg_sym = 'sym' else: self.flg_sym = '' self.G = nm.construct_ntwrkX_Graph(self.dirPre, self.year, self.flg_sym) self.gcc = max(nx.connected_components(self.G), key=len) self.num_gcc = len(self.gcc) self.trade_ntwrk_graph, self.imports, self.exports =\ dm.load_adjacency_npz_year(self.dirIn, year, self.num_countries, self.flg_sym) assert np.any(np.sum(self.trade_ntwrk_graph, axis=0) == self.imports), 'Imports are Weird' assert np.any(np.sum(self.trade_ntwrk_graph, axis=1) == self.exports), 'Exports are Weird' if method is "Laplacian": print('hi') self.trade_ntwrk = nm.networkX_laplacian(self.G, self.flg_sym, norm) else: self.trade_ntwrk = nm.construct_ntwrk_method( self.trade_ntwrk_graph, method) if is_gcc: self.trade_ntwrk = nm.convert_adjacency_to_giant_component( self.G, self.trade_ntwrk) self.labels = None def svd(self): """Compute Singular Value Decomposition on trade_ntwrkA (without anything on the diagonal) Returns: tuple """ Ui, Si, Vi = np.linalg.svd(self.trade_ntwrk, full_matrices=True, compute_uv=True) print(self.trade_ntwrk.shape) return Ui, Si, Vi def kmeans(self, numClust, nDims, Vi): """Computes kmeans clustering based on svd Returns: tuple """ km = skc.KMeans(n_clusters=numClust, n_init=10, max_iter=300, tol=0.001, verbose=False).fit(Vi[0:nDims].T) kmLabels = km.labels_ kmCenters = km.cluster_centers_ kmParams = km self.labels = kmLabels return kmLabels, kmCenters, kmParams def best_partition(self): best_partitions = c.best_partition(self.G) formatted_label = np.empty((len(best_partitions)), dtype=np.int32) for country_index in best_partitions: formatted_label[country_index] = best_partitions[country_index] self.labels = list(formatted_label) return list(formatted_label) def reformat_kmLabels_nx(self, numClust): """reformat kmLabels to be used in nx quality function Returns: list: list of sets. Each set contains the index of countries """ assert self.labels is not None, "Run kmeans method first " \ "to get the labels." community = [set() for _ in range(numClust)] for i in range(len(self.labels)): # Pass the clusters without any nodes community[self.labels[i]].add(i) return community def reformat_kmLabels_c(self): """ Returns: dict: a dictionary where keys are their nodes and values the clusters """ assert self.labels is not None, "Run kmeans method first to get" \ "the labels." partition = {} for i in range(len(self.labels)): partition[i] = self.labels[i] return partition def cluster_quality_measure(self, quality_measure, labels): """ Returns: float: the quality measure of the clustering """ assert self.labels is not None, "Run kmeans method before running" \ "quality measures in order to set" \ "the labels" if quality_measure is 'louvain_modularity': assert self.flg_sym is 'sym', "louvain modularity does not accept" \ "asymmetrical graphs" # labels = self.reformat_kmLabels_c() return c.modularity(labels, self.G) else: # labels = self.reformat_kmLabels_nx() if quality_measure is "modularity": return nx.algorithms.community.quality.modularity( self.G, labels, 'weight') if quality_measure is "coverage": return nx.algorithms.community.quality.coverage(self.G, labels) if quality_measure is "performance": return nx.algorithms.community.quality.performance( self.G, labels) if quality_measure is "density": return cq.density(self.G, labels) if quality_measure is "conductance": return cq.conductance(self.G, labels) else: raise ValueError("Quality measure is not found.")