Python set_dir_tree Examples, utils.data_manipulation.set_dir_tree Python Examples

Example #1

0

Show file

def Construct_WT_ntwrkX_Modularity(year):

    dirPre = dm.set_dir_tree()

    ## (1) Load country names that align with 3 letter acronyms used in origin destination file
    countriesLL = dm.load_country_lat_lon_csv(dirPre)
    num_countries = countriesLL.shape[0]

    # (2) Obtain accurate directory locations for both input and output files.

    dirIn = str(dirPre + 'adjacency_ntwrk_npz_files/')
    dirOut = str(dirPre + 'modularity_ntwrkX_npz_files/')

    ## (4) First the adjacency matrix is loaded from the adj_npz.
    # Then the adj_matrix is converted into a NetworkX DiGraph object.
    # Finally the DiGraph is used to create a modularity matrix, using the built in NetworkX
    # modularity_matrix function.
    adj_npz = dm.load_adjacency_npz_year(dirIn, year, num_countries)
    adj_graph = nx.from_numpy_matrix(adj_npz[0], create_using=nx.DiGraph())
    mod_mtrx = nx.directed_modularity_matrix(adj_graph)

    np.savez(str(dirOut + 'modularity_ntwrkX_' + str(year) + '_' +
                 str(num_countries) + 'countries.npz'),
             netwrk=mod_mtrx)

    return mod_mtrx

Example #2

0

Show file

def Construct_WTnet_Adjacency(year):	

	# print( sys.argv )
	# print( sys.argv[0] )
	# print( sys.argv[1] )

	dirPre = dm.set_dir_tree()

	## (1) Load country names that align with 3 letter acronyms used in origin destination file
	countriesLL = dm.load_country_lat_lon_csv(dirPre)
	
	## (2) Load in names and codes for types of goods traded
	# goods = dm.load_products(dirPre)


	## (3) Load data file with quantities of goods traded between pairs of countries and Chop the big tsv file (~4GB and 700M rows) into
	# smaller ones for each year so I can handle them more efficiently. Python goes into big memory swap when using the whole thing.
	#
	# Dont need to run this every time. Only once in fact.
	if False:
		dirIn = str(dirPre + 'MIT_WT_datafiles/')
		dirOut = str(dirPre + 'origin_destination_csvs_byYear/')
		file = 'year_origin_destination_sitc_rev2.tsv'
		#
		dm.extract_year_from_origin_destination_csv(dirIn, dirOut, file)


	## (4) Construct directed network (in an Adjacency matrix form) that shows goods shipped to and from each pair of
	# countries. There are two possible networks we can build in the data. This section convinces me they are equivalent.
	# (a). Exports from Origin to Destination. (trade_ntwrkExp)
	# (b). Imports from Origin to Destination. (trade_ntwrkImp)
	#
	# While this technically works, it is very slow. How to speed it up?
	# Dont need to run this every time. Only once in fact.
	if True:
		dirIn = str(dirPre + 'origin_destination_csvs_byYear/')		
		dirOut = str(dirPre + 'adjacency_ntwrk_npz_files/')
		fileTag = '_origin_destination_sitc_rev2.csv'
		#year = {This Variable Passed into Construct_WTnet_Adjacency function}
		#year = range(1962,2014) # this is input into function now as sys.argv[0] !
		#
		# try:
		# 	num_countries = np.size(countries,0)
		# except:
		# 	num_countries = 261 # hard coded if countries vector has not been loaded in.

		# print(countries)	
		#
		dm.construct_adjacency_from_year_origin_destination_csv(dirIn, dirOut, fileTag, year, countriesLL)

Example #3

0

Show file

File: clustering_methods.py Project: ParhamP/Global-Trade-Network

# import matplotlib.cm as cm
# import pandas as pd
# from mpl_toolkits.mplot3d import Axes3D
# import time
# import os
# import csv

# import sys

# import scipy as sp       # library to deal with sparse graphs for Cuthill-Mckee and Laplacian

#------------------------------------------------------------------------------------------------------------
# Load in a network for a specific year

dirPre = dm.set_dir_tree()
year = np.array(1962)
flg_sym = True
G = nm.construct_ntwrkX_Graph(dirPre=dirPre, year=year, flg_sym=flg_sym)

#------------------------------------------------------------------------------------------------------------
# Explore 'community' module
# Compute best partition and dendrogram using Louvain algorithm in 'community' module

res = [0.1, 0.5, 1, 3, 5, 7,
       10]  # different resolution values for partitioning algorithms

q_bp = np.zeros_like(res)
q_dend = np.zeros((3, len(res)))
coverage = np.zeros_like(res)

Example #4

0

Show file

class Clustering:
    dirPre = dm.set_dir_tree()
    dirIn = str(dirPre + 'adjacency_ntwrk_npz_files/')
    countriesLL = dm.load_country_lat_lon_csv(
        dirPre)  # country names & ids and Lat,Lon information.
    num_countries = countriesLL.shape[0]

    def __init__(self, year, method, flg_sym, norm="norm", is_gcc=False):
        self.year = year
        if flg_sym:
            self.flg_sym = 'sym'
        else:
            self.flg_sym = ''
        self.G = nm.construct_ntwrkX_Graph(self.dirPre, self.year,
                                           self.flg_sym)
        self.gcc = max(nx.connected_components(self.G), key=len)
        self.num_gcc = len(self.gcc)
        self.trade_ntwrk_graph, self.imports, self.exports =\
            dm.load_adjacency_npz_year(self.dirIn, year, self.num_countries,
                                       self.flg_sym)
        assert np.any(np.sum(self.trade_ntwrk_graph, axis=0) ==
                      self.imports), 'Imports are Weird'
        assert np.any(np.sum(self.trade_ntwrk_graph, axis=1) ==
                      self.exports), 'Exports are Weird'
        if method is "Laplacian":
            print('hi')
            self.trade_ntwrk = nm.networkX_laplacian(self.G, self.flg_sym,
                                                     norm)
        else:
            self.trade_ntwrk = nm.construct_ntwrk_method(
                self.trade_ntwrk_graph, method)
        if is_gcc:
            self.trade_ntwrk = nm.convert_adjacency_to_giant_component(
                self.G, self.trade_ntwrk)
        self.labels = None

    def svd(self):
        """Compute Singular Value Decomposition on trade_ntwrkA
        (without anything on the diagonal)

        Returns:
            tuple

        """
        Ui, Si, Vi = np.linalg.svd(self.trade_ntwrk,
                                   full_matrices=True,
                                   compute_uv=True)
        print(self.trade_ntwrk.shape)
        return Ui, Si, Vi

    def kmeans(self, numClust, nDims, Vi):
        """Computes kmeans clustering based on svd

        Returns:
            tuple
        """
        km = skc.KMeans(n_clusters=numClust,
                        n_init=10,
                        max_iter=300,
                        tol=0.001,
                        verbose=False).fit(Vi[0:nDims].T)
        kmLabels = km.labels_
        kmCenters = km.cluster_centers_
        kmParams = km
        self.labels = kmLabels
        return kmLabels, kmCenters, kmParams

    def best_partition(self):
        best_partitions = c.best_partition(self.G)
        formatted_label = np.empty((len(best_partitions)), dtype=np.int32)
        for country_index in best_partitions:
            formatted_label[country_index] = best_partitions[country_index]
        self.labels = list(formatted_label)
        return list(formatted_label)

    def reformat_kmLabels_nx(self, numClust):
        """reformat kmLabels to be used in nx quality function

        Returns:
            list: list of sets. Each set contains the index of countries
        """
        assert self.labels is not None, "Run kmeans method first " \
                                        "to get the labels."
        community = [set() for _ in range(numClust)]
        for i in range(len(self.labels)):
            # Pass the clusters without any nodes
            community[self.labels[i]].add(i)
        return community

    def reformat_kmLabels_c(self):
        """

        Returns:
            dict: a dictionary where keys are their nodes
            and values the clusters

        """
        assert self.labels is not None, "Run kmeans method first to get" \
                                        "the labels."
        partition = {}
        for i in range(len(self.labels)):
            partition[i] = self.labels[i]
        return partition

    def cluster_quality_measure(self, quality_measure, labels):
        """

        Returns:
            float: the quality measure of the clustering

        """
        assert self.labels is not None, "Run kmeans method before running" \
                                        "quality measures in order to set" \
                                        "the labels"
        if quality_measure is 'louvain_modularity':
            assert self.flg_sym is 'sym', "louvain modularity does not accept" \
                                          "asymmetrical graphs"
            # labels = self.reformat_kmLabels_c()
            return c.modularity(labels, self.G)
        else:
            # labels = self.reformat_kmLabels_nx()
            if quality_measure is "modularity":
                return nx.algorithms.community.quality.modularity(
                    self.G, labels, 'weight')
            if quality_measure is "coverage":
                return nx.algorithms.community.quality.coverage(self.G, labels)
            if quality_measure is "performance":
                return nx.algorithms.community.quality.performance(
                    self.G, labels)
            if quality_measure is "density":
                return cq.density(self.G, labels)
            if quality_measure is "conductance":
                return cq.conductance(self.G, labels)
            else:
                raise ValueError("Quality measure is not found.")