Example #1
0
def Construct_WT_ntwrkX_Modularity(year):

    dirPre = dm.set_dir_tree()

    ## (1) Load country names that align with 3 letter acronyms used in origin destination file
    countriesLL = dm.load_country_lat_lon_csv(dirPre)
    num_countries = countriesLL.shape[0]

    # (2) Obtain accurate directory locations for both input and output files.

    dirIn = str(dirPre + 'adjacency_ntwrk_npz_files/')
    dirOut = str(dirPre + 'modularity_ntwrkX_npz_files/')

    ## (4) First the adjacency matrix is loaded from the adj_npz.
    # Then the adj_matrix is converted into a NetworkX DiGraph object.
    # Finally the DiGraph is used to create a modularity matrix, using the built in NetworkX
    # modularity_matrix function.
    adj_npz = dm.load_adjacency_npz_year(dirIn, year, num_countries)
    adj_graph = nx.from_numpy_matrix(adj_npz[0], create_using=nx.DiGraph())
    mod_mtrx = nx.directed_modularity_matrix(adj_graph)

    np.savez(str(dirOut + 'modularity_ntwrkX_' + str(year) + '_' +
                 str(num_countries) + 'countries.npz'),
             netwrk=mod_mtrx)

    return mod_mtrx
Example #2
0
def Construct_WTnet_Adjacency(year):	

	# print( sys.argv )
	# print( sys.argv[0] )
	# print( sys.argv[1] )

	dirPre = dm.set_dir_tree()

	## (1) Load country names that align with 3 letter acronyms used in origin destination file
	countriesLL = dm.load_country_lat_lon_csv(dirPre)
	
	## (2) Load in names and codes for types of goods traded
	# goods = dm.load_products(dirPre)


	## (3) Load data file with quantities of goods traded between pairs of countries and Chop the big tsv file (~4GB and 700M rows) into
	# smaller ones for each year so I can handle them more efficiently. Python goes into big memory swap when using the whole thing.
	#
	# Dont need to run this every time. Only once in fact.
	if False:
		dirIn = str(dirPre + 'MIT_WT_datafiles/')
		dirOut = str(dirPre + 'origin_destination_csvs_byYear/')
		file = 'year_origin_destination_sitc_rev2.tsv'
		#
		dm.extract_year_from_origin_destination_csv(dirIn, dirOut, file)


	## (4) Construct directed network (in an Adjacency matrix form) that shows goods shipped to and from each pair of
	# countries. There are two possible networks we can build in the data. This section convinces me they are equivalent.
	# (a). Exports from Origin to Destination. (trade_ntwrkExp)
	# (b). Imports from Origin to Destination. (trade_ntwrkImp)
	#
	# While this technically works, it is very slow. How to speed it up?
	# Dont need to run this every time. Only once in fact.
	if True:
		dirIn = str(dirPre + 'origin_destination_csvs_byYear/')		
		dirOut = str(dirPre + 'adjacency_ntwrk_npz_files/')
		fileTag = '_origin_destination_sitc_rev2.csv'
		#year = {This Variable Passed into Construct_WTnet_Adjacency function}
		#year = range(1962,2014) # this is input into function now as sys.argv[0] !
		#
		# try:
		# 	num_countries = np.size(countries,0)
		# except:
		# 	num_countries = 261 # hard coded if countries vector has not been loaded in.

		# print(countries)	
		#
		dm.construct_adjacency_from_year_origin_destination_csv(dirIn, dirOut, fileTag, year, countriesLL)
#

method = 'Adjacency'
#method = 'Normalized Laplacian'
#method = 'Modularity'
#method = 'Topographic Modularity'

print(method)

#------------------------------------------------------------------------------------------------------------
## (0). Check what Operating System you are on (either my machine or Cortex cluster) and adjust directory structure accordingly.
dirPre = dm.set_dir_tree()

#------------------------------------------------------------------------------------------------------------
## (1). Load country names that align with 3 letter acronyms used in origin destination file
countriesLL = dm.load_country_lat_lon_csv(dirPre)
num_countries = countriesLL.shape[0]

## (2) Load in names and codes for types of goods traded
#goods = dm.load_products(dirPre)

# (4). Loop through, load and plot all previously saved adjacency matrix files.
years = range(
    1962, 2015)  # np.array([1962]) # years for which we have world trade data.

for y in years:
    print(y)

    dirIn = str(dirPre + 'adjacency_ntwrk_npz_files/')
    try:
        trade_ntwrk, imports, exports = dm.load_adjacency_npz_year(
def construct_ntwrkX_Graph(dirPre, year, sym):
    ## (1). Make a NetworkX Graph Object from an Adjacency matrix. It is a directed network with edge weights
    # equal to the amount of goods sent from country i to country j (or vice versa). Nodes are tagged
    # information about country name, Lat & Lon, continent, total imports & total exports. The resulting
    # Graph object will be saved as a gpickle file.

    if (sym=='sym'):
        G = nx.Graph()  # create the weighted undirected (ie. symmetric) graph.
    else:
        G = nx.DiGraph()  # create the weighted directed graph.


    # ----------------------------------------------------------------------------------------------------------
    # (A). Get latitude and longitude information from a pickle UTF8 file created from a csv and set them
    # construct nodes with attributes of name.
    countriesLL = dm.load_country_lat_lon_csv(dirPre)
    num_countries = countriesLL.shape[0]
    country_indx = list(range(num_countries))

    continent =  [countriesLL['id'][row][:2] for row in range(num_countries)]
    countryId3 = [countriesLL['id_3char'][row] for row in range(num_countries)]
    countryName = [countriesLL['name'][row] for row in range(num_countries)]
    LonLat = [ (countriesLL['longitude'][row], countriesLL['latitude'][row]) for row in range(num_countries)]


    dirAdj = str( dirPre + 'adjacency_ntwrk_npz_files/' )
    try:
        # load in adjacency for a given year.
        A, I, E = dm.load_adjacency_npz_year(dirAdj, year, num_countries, sym)
    except:
        print('Adjacency File not found.')
        return


    for a in range(num_countries):
        G.add_nodes_from( [country_indx[a]], LatLon=LonLat[a], countryId3=countryId3[a], countryName=countryName[a],
                          continent=continent[a], imports=I[a], exports=E[a] )

        for b in range(num_countries):
            if A[a, b] > 0:
                G.add_weighted_edges_from( [(a, b, A[a, b])] )  # create the weighted directed graph.

    # # Note: To access data for each node or edge, do:
    # G.nodes.data('LatLon')[0]
    # G.nodes.data('countryId3')[0]
    # G.nodes.data('countryName')[0]
    # G.nodes.data('continent')[0]
    # G.nodes.data('imports')[0]
    # G.nodes.data('exports')[0]
    # G.edges.data('weight')


    # (C). Save a gpickle file containing the networkAdj constructed in
    nx.write_gpickle(G, str(dirPre + 'adjacency_ntwrkX_pickle_files/' + sym + 'trade_ntwrkX_' + str(year) + '.gpickle'))

    # # (D). Save a gexf file containing the networkAdj to use with Gephi toolbox
    # nx.write_gexf( G, str(dirPre + 'adjacency_gexf_network_files/' + sym + 'trade_ntwrkX_' + str(year) + '.gexf'), encoding='utf-8', prettyprint=True, version='1.2draft')
    #
    # # Note: Graph Data File Used for Gephi not working currently. Come back.

    return G
Example #5
0
#

#method = 'Adjacency'
#method = 'NormLaplacian'
method = 'Modularity'
#method = 'TopoModularity'

#------------------------------------------------------------------------------------------------------------
## (0). Check what Operating System you are on (either my machine or Cortex cluster) and adjust directory structure accordingly.
dirPre = dm.set_dir_tree()

#------------------------------------------------------------------------------------------------------------
## (1). Load country names that align with 3 letter acronyms used in origin destination file
#countries = dm.load_countries(dirPre)

countriesLL = dm.load_country_lat_lon_csv(
    dirPre)  # country names & ids and Lat,Lon information.
num_countries = countriesLL.shape[0]

## (2) Load in names and codes for types of goods traded
#goods = dm.load_products(dirPre)

# (3). Get array indicating continents for each country to colorcode nodes in graph.
continent = []
colors = ['r', 'g', 'b', 'y', 'c', 'm', 'k']
for o in range(0, num_countries):
    reg = countriesLL.id[o]
    continent = np.append(continent, reg[0:2])
conts = set(continent)  # this is a 'set object' (all the different countries)
conts = list(conts)  # convert 'set object' to a list that I can iterate over.
conts = np.sort(conts)
node_colors_by_continent = np.zeros(len(continent))
Example #6
0
class Clustering:
    dirPre = dm.set_dir_tree()
    dirIn = str(dirPre + 'adjacency_ntwrk_npz_files/')
    countriesLL = dm.load_country_lat_lon_csv(
        dirPre)  # country names & ids and Lat,Lon information.
    num_countries = countriesLL.shape[0]

    def __init__(self, year, method, flg_sym, norm="norm", is_gcc=False):
        self.year = year
        if flg_sym:
            self.flg_sym = 'sym'
        else:
            self.flg_sym = ''
        self.G = nm.construct_ntwrkX_Graph(self.dirPre, self.year,
                                           self.flg_sym)
        self.gcc = max(nx.connected_components(self.G), key=len)
        self.num_gcc = len(self.gcc)
        self.trade_ntwrk_graph, self.imports, self.exports =\
            dm.load_adjacency_npz_year(self.dirIn, year, self.num_countries,
                                       self.flg_sym)
        assert np.any(np.sum(self.trade_ntwrk_graph, axis=0) ==
                      self.imports), 'Imports are Weird'
        assert np.any(np.sum(self.trade_ntwrk_graph, axis=1) ==
                      self.exports), 'Exports are Weird'
        if method is "Laplacian":
            print('hi')
            self.trade_ntwrk = nm.networkX_laplacian(self.G, self.flg_sym,
                                                     norm)
        else:
            self.trade_ntwrk = nm.construct_ntwrk_method(
                self.trade_ntwrk_graph, method)
        if is_gcc:
            self.trade_ntwrk = nm.convert_adjacency_to_giant_component(
                self.G, self.trade_ntwrk)
        self.labels = None

    def svd(self):
        """Compute Singular Value Decomposition on trade_ntwrkA
        (without anything on the diagonal)

        Returns:
            tuple

        """
        Ui, Si, Vi = np.linalg.svd(self.trade_ntwrk,
                                   full_matrices=True,
                                   compute_uv=True)
        print(self.trade_ntwrk.shape)
        return Ui, Si, Vi

    def kmeans(self, numClust, nDims, Vi):
        """Computes kmeans clustering based on svd

        Returns:
            tuple
        """
        km = skc.KMeans(n_clusters=numClust,
                        n_init=10,
                        max_iter=300,
                        tol=0.001,
                        verbose=False).fit(Vi[0:nDims].T)
        kmLabels = km.labels_
        kmCenters = km.cluster_centers_
        kmParams = km
        self.labels = kmLabels
        return kmLabels, kmCenters, kmParams

    def best_partition(self):
        best_partitions = c.best_partition(self.G)
        formatted_label = np.empty((len(best_partitions)), dtype=np.int32)
        for country_index in best_partitions:
            formatted_label[country_index] = best_partitions[country_index]
        self.labels = list(formatted_label)
        return list(formatted_label)

    def reformat_kmLabels_nx(self, numClust):
        """reformat kmLabels to be used in nx quality function

        Returns:
            list: list of sets. Each set contains the index of countries
        """
        assert self.labels is not None, "Run kmeans method first " \
                                        "to get the labels."
        community = [set() for _ in range(numClust)]
        for i in range(len(self.labels)):
            # Pass the clusters without any nodes
            community[self.labels[i]].add(i)
        return community

    def reformat_kmLabels_c(self):
        """

        Returns:
            dict: a dictionary where keys are their nodes
            and values the clusters

        """
        assert self.labels is not None, "Run kmeans method first to get" \
                                        "the labels."
        partition = {}
        for i in range(len(self.labels)):
            partition[i] = self.labels[i]
        return partition

    def cluster_quality_measure(self, quality_measure, labels):
        """

        Returns:
            float: the quality measure of the clustering

        """
        assert self.labels is not None, "Run kmeans method before running" \
                                        "quality measures in order to set" \
                                        "the labels"
        if quality_measure is 'louvain_modularity':
            assert self.flg_sym is 'sym', "louvain modularity does not accept" \
                                          "asymmetrical graphs"
            # labels = self.reformat_kmLabels_c()
            return c.modularity(labels, self.G)
        else:
            # labels = self.reformat_kmLabels_nx()
            if quality_measure is "modularity":
                return nx.algorithms.community.quality.modularity(
                    self.G, labels, 'weight')
            if quality_measure is "coverage":
                return nx.algorithms.community.quality.coverage(self.G, labels)
            if quality_measure is "performance":
                return nx.algorithms.community.quality.performance(
                    self.G, labels)
            if quality_measure is "density":
                return cq.density(self.G, labels)
            if quality_measure is "conductance":
                return cq.conductance(self.G, labels)
            else:
                raise ValueError("Quality measure is not found.")