1. HS02 to SITCR2 Notes ----- 1. Any Special Concordances or Alterations are located in dataset/<dataset_name>/meta """ import os import copy import pandas as pd from pyeconlab.util import check_directory # - Data in data/ - # this_dir, this_filename = os.path.split(__file__) DATA_PATH = check_directory(os.path.join(this_dir, "data")) #---------------# #-Product Codes-# #---------------# class HS_To_SITC(object): """ Concordance from HS to SITC Parameters ---------- hs : str Specify HS Code ('HS92', 'HS96', 'HS02') sitc : str
Future Work ----------- 1. Add Metadata to the SITC objects (i.e. applicable_years, data_available_years etc.) 2. Add source_institution option to Revision Functions """ import os import copy import pandas as pd from pyeconlab.util import check_directory # - Data in data/ - # this_dir, this_filename = os.path.split(__file__) DATA_PATH = check_directory(os.path.join(this_dir, "data")) class SITC(object): """ SITC Classification Object Provides an interface to the SITC Trade Classification System Parameters ---------- revision : int Specify SITC Revision Number [1,2,3,4] source_institution : str, optional(default="un") Provide source institution string (i.e. "un"). See data/README.md for more information
""" Testing DTA and HDF Data Structures using NBERFeenstraWTFConstructor Running Time: 2303.473s [38 minutes] """ from nose import with_setup import pandas as pd from pandas.util.testing import assert_series_equal, assert_frame_equal from numpy.testing import assert_allclose from pyeconlab.util import package_folder, expand_homepath, check_directory from ..constructor import NBERFeenstraWTFConstructor #-DATA Paths-# SOURCE_DATA_DIR = check_directory("E:\\work-data\\x_datasets\\36a376e5a01385782112519bddfac85e\\") #Win7! TEST_DATA_DIR = package_folder(__file__, "data") class TestConstructorDTAvsHDFYearIndex(): """ Test HDF Year Indexed File Test the Constructor Conversion to HDF Year Indexed DataFormat Files ----- STATA .dta files: wt??.dta HDF .h5 file: wtf00-62_yearindex.h5 Notes -----
def __init__(self, source_dir, trade_classification, dtype, years=[], ftype='hdf', reduce_memory=False, standardize_dataset=False, reset_cache=False, verbose=True): """ Constructor for the CID Atlas of Economic Complexity Data .. Inheritance ----------- 1. AtlasOfComplexity -> Provides Meta Data on AtlasOfComplexity Dataset Parameters ---------- source_dir : str Specify source directory containing raw tsv files trade_classification : str Type of Source Files to Load ["SITCR2", "HS92"] dtype : str Specify Data Type to work with ["trade", "export", "import"] years : list, optional(default=[]) Apply a Year Filter [Default: All Years Available in the Data] skip_setup : bool, optional(default=True) [Testing] This allows you to skip __init__ setup of object to manually load the object with csv data etc. This is mainly used for loading test data to check attributes and methods etc. reduce_memory : bool, optional(default=False) This will delete self.__raw_data after initializing self.dataset with the raw_data [Warning: This will render properties that depend on self.__raw_data inoperable] Useful when building datasets to be more memory efficient as the operations don't require a record of the original raw_data standardize_dataset : bool, optional(default=False) Standardize dataset into Trade, Export, Import Values Only from RAW Files. """ #-Setup Attributes-# self.name = "Atlas Of Complexity (CID) Dataset" self.dtype = dtype.lower() if self.dtype not in self.source_dtypes: raise ValueError("%s is not a valid data type [Valid: %s]" % (self.dtype, self.source_dtypes)) self.classification = trade_classification if self.classification not in self.source_classifications: raise ValueError( "%s is not a valid classification [Valid: %s]" % (self.classification, self.source_source_classifications)) self.revision = trade_classification[-2:] self.level = self.source_level self.notes = "" self.operations = "" self.complete_dataset = False #-Parse Years-# if verbose: print "[INFO] Fetching CIDAtlas Data from %s" % source_dir if years == []: self.complete_dataset = True years = self.source_years[self.classification] #-Assign to Attribute-# self.years = years #-Files-# self.__source_dir = check_directory(source_dir) self.__cache_dir = "cache/" #-Load Data-# if ftype == "tsv" or reset_cache: self.load_raw_from_tsv(reset_cache=reset_cache) else: if not os.path.exists(self.__source_dir + self.__cache_dir): self.load_raw_from_tsv() else: self.load_raw_from_hdf() #-Reduce Memory-# if reduce_memory: self.dataset = self.__raw_data self.__raw_data = None else: self.dataset = self.__raw_data.copy(deep=True) #-Standardize-# if standardize_dataset: self.construct_standardized_dataset()
def __init__(self, source_dir, trade_classification, dtype, years=[], ftype='hdf', reduce_memory=False, standardize_dataset=False, reset_cache=False, verbose=True): """ Constructor for the CID Atlas of Economic Complexity Data .. Inheritance ----------- 1. AtlasOfComplexity -> Provides Meta Data on AtlasOfComplexity Dataset Parameters ---------- source_dir : str Specify source directory containing raw tsv files trade_classification : str Type of Source Files to Load ["SITCR2", "HS92"] dtype : str Specify Data Type to work with ["trade", "export", "import"] years : list, optional(default=[]) Apply a Year Filter [Default: All Years Available in the Data] skip_setup : bool, optional(default=True) [Testing] This allows you to skip __init__ setup of object to manually load the object with csv data etc. This is mainly used for loading test data to check attributes and methods etc. reduce_memory : bool, optional(default=False) This will delete self.__raw_data after initializing self.dataset with the raw_data [Warning: This will render properties that depend on self.__raw_data inoperable] Useful when building datasets to be more memory efficient as the operations don't require a record of the original raw_data standardize_dataset : bool, optional(default=False) Standardize dataset into Trade, Export, Import Values Only from RAW Files. """ #-Setup Attributes-# self.name = "Atlas Of Complexity (CID) Dataset" self.dtype = dtype.lower() if self.dtype not in self.source_dtypes: raise ValueError("%s is not a valid data type [Valid: %s]" % (self.dtype, self.source_dtypes)) self.classification = trade_classification if self.classification not in self.source_classifications: raise ValueError("%s is not a valid classification [Valid: %s]" % (self.classification, self.source_source_classifications)) self.revision = trade_classification[-2:] self.level = self.source_level self.notes = "" self.operations = "" self.complete_dataset = False #-Parse Years-# if verbose: print "[INFO] Fetching CIDAtlas Data from %s" % source_dir if years == []: self.complete_dataset = True years = self.source_years[self.classification] #-Assign to Attribute-# self.years = years #-Files-# self.__source_dir = check_directory(source_dir) self.__cache_dir = "cache/" #-Load Data-# if ftype=="tsv" or reset_cache: self.load_raw_from_tsv(reset_cache=reset_cache) else: if not os.path.exists(self.__source_dir + self.__cache_dir): self.load_raw_from_tsv() else: self.load_raw_from_hdf() #-Reduce Memory-# if reduce_memory: self.dataset = self.__raw_data self.__raw_data = None else: self.dataset = self.__raw_data.copy(deep=True) #-Standardize-# if standardize_dataset: self.construct_standardized_dataset()