Esempio n. 1
0
1. HS02 to SITCR2

Notes
-----
1. Any Special Concordances or Alterations are located in dataset/<dataset_name>/meta
"""

import os
import copy
import pandas as pd

from pyeconlab.util import check_directory

# - Data in data/ - #
this_dir, this_filename = os.path.split(__file__)
DATA_PATH = check_directory(os.path.join(this_dir, "data"))

#---------------#
#-Product Codes-#
#---------------#


class HS_To_SITC(object):
    """ 
    Concordance from HS to SITC

    Parameters
    ----------
    hs          :   str
                    Specify HS Code ('HS92', 'HS96', 'HS02')
    sitc        :   str 
Esempio n. 2
0
Future Work
-----------
1. Add Metadata to the SITC objects (i.e. applicable_years, data_available_years etc.)
2. Add source_institution option to Revision Functions

"""

import os
import copy
import pandas as pd

from pyeconlab.util import check_directory

# - Data in data/ - #
this_dir, this_filename = os.path.split(__file__)
DATA_PATH = check_directory(os.path.join(this_dir, "data"))

class SITC(object):
	"""
	SITC Classification Object

	Provides an interface to the SITC Trade Classification System

	Parameters
	----------
	revision 	: 	int
					Specify SITC Revision Number [1,2,3,4]

	source_institution 	: 	str, optional(default="un")
							Provide source institution string (i.e. "un"). 
							See data/README.md for more information
Esempio n. 3
0
"""
Testing DTA and HDF Data Structures using NBERFeenstraWTFConstructor

Running Time: 2303.473s [38 minutes]
"""

from nose import with_setup
import pandas as pd
from pandas.util.testing import assert_series_equal, assert_frame_equal
from numpy.testing import assert_allclose

from pyeconlab.util import package_folder, expand_homepath, check_directory
from ..constructor import NBERFeenstraWTFConstructor

#-DATA Paths-#
SOURCE_DATA_DIR = check_directory("E:\\work-data\\x_datasets\\36a376e5a01385782112519bddfac85e\\") 			#Win7!
TEST_DATA_DIR = package_folder(__file__, "data") 


class TestConstructorDTAvsHDFYearIndex():
	""" 
	Test HDF Year Indexed File
	Test the Constructor Conversion to HDF Year Indexed DataFormat

	Files
	-----	
	STATA 	.dta files: wt??.dta 
	HDF 	.h5 file: 	wtf00-62_yearindex.h5

	Notes
	-----
Esempio n. 4
0
    def __init__(self,
                 source_dir,
                 trade_classification,
                 dtype,
                 years=[],
                 ftype='hdf',
                 reduce_memory=False,
                 standardize_dataset=False,
                 reset_cache=False,
                 verbose=True):
        """
        Constructor for the CID Atlas of Economic Complexity Data
        
        ..  Inheritance
            -----------
            1. AtlasOfComplexity -> Provides Meta Data on AtlasOfComplexity Dataset
        
        Parameters
        ----------
        source_dir              :   str
                                    Specify source directory containing raw tsv files
        trade_classification    :   str
                                    Type of Source Files to Load ["SITCR2", "HS92"]
        dtype                   :   str
                                    Specify Data Type to work with ["trade", "export", "import"]
        years                   :   list, optional(default=[])
                                    Apply a Year Filter [Default: All Years Available in the Data]
        skip_setup              :   bool, optional(default=True)
                                    [Testing] This allows you to skip __init__ setup of object to manually load the object with csv data etc. 
                                    This is mainly used for loading test data to check attributes and methods etc. 
        reduce_memory           :   bool, optional(default=False)
                                    This will delete self.__raw_data after initializing self.dataset with the raw_data
                                    [Warning: This will render properties that depend on self.__raw_data inoperable]
                                    Useful when building datasets to be more memory efficient as the operations don't require a record of the original raw_data
        standardize_dataset     :   bool, optional(default=False)
                                    Standardize dataset into Trade, Export, Import Values Only from RAW Files.

        """
        #-Setup Attributes-#
        self.name = "Atlas Of Complexity (CID) Dataset"
        self.dtype = dtype.lower()
        if self.dtype not in self.source_dtypes:
            raise ValueError("%s is not a valid data type [Valid: %s]" %
                             (self.dtype, self.source_dtypes))
        self.classification = trade_classification
        if self.classification not in self.source_classifications:
            raise ValueError(
                "%s is not a valid classification [Valid: %s]" %
                (self.classification, self.source_source_classifications))
        self.revision = trade_classification[-2:]
        self.level = self.source_level
        self.notes = ""
        self.operations = ""
        self.complete_dataset = False

        #-Parse Years-#
        if verbose: print "[INFO] Fetching CIDAtlas Data from %s" % source_dir
        if years == []:
            self.complete_dataset = True
            years = self.source_years[self.classification]
        #-Assign to Attribute-#
        self.years = years
        #-Files-#
        self.__source_dir = check_directory(source_dir)
        self.__cache_dir = "cache/"
        #-Load Data-#
        if ftype == "tsv" or reset_cache:
            self.load_raw_from_tsv(reset_cache=reset_cache)
        else:
            if not os.path.exists(self.__source_dir + self.__cache_dir):
                self.load_raw_from_tsv()
            else:
                self.load_raw_from_hdf()
        #-Reduce Memory-#
        if reduce_memory:
            self.dataset = self.__raw_data
            self.__raw_data = None
        else:
            self.dataset = self.__raw_data.copy(deep=True)

        #-Standardize-#
        if standardize_dataset:
            self.construct_standardized_dataset()
Esempio n. 5
0
    def __init__(self, source_dir, trade_classification, dtype, years=[], ftype='hdf', reduce_memory=False, standardize_dataset=False, reset_cache=False, verbose=True):
        """
        Constructor for the CID Atlas of Economic Complexity Data
        
        ..  Inheritance
            -----------
            1. AtlasOfComplexity -> Provides Meta Data on AtlasOfComplexity Dataset
        
        Parameters
        ----------
        source_dir              :   str
                                    Specify source directory containing raw tsv files
        trade_classification    :   str
                                    Type of Source Files to Load ["SITCR2", "HS92"]
        dtype                   :   str
                                    Specify Data Type to work with ["trade", "export", "import"]
        years                   :   list, optional(default=[])
                                    Apply a Year Filter [Default: All Years Available in the Data]
        skip_setup              :   bool, optional(default=True)
                                    [Testing] This allows you to skip __init__ setup of object to manually load the object with csv data etc. 
                                    This is mainly used for loading test data to check attributes and methods etc. 
        reduce_memory           :   bool, optional(default=False)
                                    This will delete self.__raw_data after initializing self.dataset with the raw_data
                                    [Warning: This will render properties that depend on self.__raw_data inoperable]
                                    Useful when building datasets to be more memory efficient as the operations don't require a record of the original raw_data
        standardize_dataset     :   bool, optional(default=False)
                                    Standardize dataset into Trade, Export, Import Values Only from RAW Files.

        """
        #-Setup Attributes-#
        self.name = "Atlas Of Complexity (CID) Dataset"
        self.dtype = dtype.lower()
        if self.dtype not in self.source_dtypes:
            raise ValueError("%s is not a valid data type [Valid: %s]" % (self.dtype, self.source_dtypes))
        self.classification = trade_classification
        if self.classification not in self.source_classifications:
            raise ValueError("%s is not a valid classification [Valid: %s]" % (self.classification, self.source_source_classifications))
        self.revision = trade_classification[-2:]
        self.level = self.source_level
        self.notes = ""
        self.operations = ""
        self.complete_dataset = False
        
        #-Parse Years-#
        if verbose: print "[INFO] Fetching CIDAtlas Data from %s" % source_dir
        if years == []:
            self.complete_dataset = True                        
            years = self.source_years[self.classification]    
        #-Assign to Attribute-#
        self.years = years 
        #-Files-#
        self.__source_dir = check_directory(source_dir)
        self.__cache_dir = "cache/"
        #-Load Data-#
        if ftype=="tsv" or reset_cache:
            self.load_raw_from_tsv(reset_cache=reset_cache)
        else:
            if not os.path.exists(self.__source_dir + self.__cache_dir):
                self.load_raw_from_tsv()
            else:
                self.load_raw_from_hdf()
        #-Reduce Memory-#
        if reduce_memory:
            self.dataset = self.__raw_data
            self.__raw_data = None
        else:
            self.dataset = self.__raw_data.copy(deep=True)                
        
        #-Standardize-#
        if standardize_dataset:
            self.construct_standardized_dataset()