Beispiel #1
0
class TestGenericSeriesData_RateGroup_TS(unittest.TestCase):

    def setUp(self):
        self.estat = Request()
        filepath = os.path.join(
            pkg_path, 'data/exr/ecb_exr_rg/generic/ecb_exr_rg_ts.xml')
        self.resp = self.estat.get(fromfile=filepath)

    def test_groups(self):
        data = self.resp.data
        self.assertEqual(len(list(data.groups)), 5)
        self.assertEqual(len(list(data.series)), 4)
        g2 = list(data.groups)[2]
        self.assertEqual(g2.key.CURRENCY, 'GBP')
        self.assertEqual(
            g2.attrib.TITLE, 'ECB reference exchange rate, U.K. Pound sterling /Euro')
        # Check group attributes of a series
        s = list(data.series)[0]
        g_attrib = s.group_attrib
        self.assertEqual(len(g_attrib), 5)
        self.assertIsInstance(g_attrib, tuple)
        self.assertEqual(len(g_attrib), 5)

    def test_footer(self):
        filepath = os.path.join(
            pkg_path, 'data/estat/footer.xml')
        resp = self.estat.get(
            fromfile=filepath, get_footer_url=None)
        f = resp.footer
        assert f.code == 413
        assert f.severity == 'Infomation'
        assert f.text[1].startswith('http')
class TestStructSpecSeriesData_RateGroup_TS(unittest.TestCase):

    def setUp(self):
        self.estat = Request()
        filepath = os.path.join(
            pkg_path, 'data/exr/ecb_exr_rg/structured/ecb_exr_rg_ts.xml')
        dsd_resp = self.estat.datastructure(
            fromfile=os.path.join(pkg_path, 'data/exr/ecb_exr_rg/ecb_exr_rg.xml'))
        dsd = dsd_resp.datastructure.DataStructure
        self.resp = self.estat.get(fromfile=filepath, dsd=dsd)

    def test_groups(self):
        data = self.resp.data
        self.assertEqual(len(list(data.groups)), 5)
        self.assertEqual(len(list(data.series)), 4)
        g2 = list(data.groups)[2]
        self.assertEqual(g2.key.CURRENCY, 'GBP')
        self.assertEqual(
            g2.attrib.TITLE, 'ECB reference exchange rate, U.K. Pound sterling /Euro')
        # Check group attributes of a series
        s = list(data.series)[0]
        g_attrib = s.group_attrib
        self.assertEqual(len(g_attrib), 5)
        self.assertIsInstance(g_attrib, tuple)
        self.assertEqual(len(g_attrib), 5)
Beispiel #3
0
    def test_samples_request(self):
        """Test the samples from the SDMXJSON spec."""
        req = Request()
        for name, data in sample_data.items():
            resp = req.get(fromfile=self._filepath(name))

            df = resp.write()
            assert df.equals(data), \
                '\n'.join(map(str, [name, df.index, data.index,
                                    getattr(df, 'columns', ''),
                                    getattr(data, 'columns', '')]))
Beispiel #4
0
class Test_ESTAT_dsd_apro_mk_cola(unittest.TestCase):

    def setUp(self):
        self.estat = Request('ESTAT')
        filepath = os.path.join(test_path, 'data/estat/apro_dsd.xml')
        self.resp = self.estat.get(fromfile=filepath)

    def test_codelists_keys(self):
        self.assertEqual(len(self.resp.codelist), 6)
        self.assertIsInstance(self.resp.codelist.CL_GEO, model.Codelist)

    def test_codelist_name(self):
        self.assertEqual(
            self.resp.msg.codelist.CL_GEO.UK.name.en, 'United Kingdom')
        assert self.resp.codelist.CL_FREQ.name.en == 'FREQ'

        def test_code_cls(self):
            self.assertIsInstance(
                self.resp.codelist.CL_FREQ.D, model.Code)

    def test_writer(self):
        df = self.resp.write(rows='codelist')
        self.assertEqual(df.shape, (79, 2))

    def tearDown(self):
        pass
Beispiel #5
0
class test_dsd_common(unittest.TestCase):

    def setUp(self):
        self.estat = Request('ESTAT')
        filepath = os.path.join(test_path, 'data/common/common.xml')
        self.resp = self.estat.get(fromfile=filepath)

    def test_codelists_keys(self):
        self.assertEqual(len(self.resp.msg.codelist), 5)
        self.assertIsInstance(self.resp.msg.codelist.CL_FREQ, model.Codelist)

    def test_codelist_name(self):
        self.assertEqual(self.resp.msg.codelist.CL_FREQ.D.name.en, 'Daily')

    def test_code_cls(self):
        self.assertIsInstance(self.resp.msg.codelist.CL_FREQ.D, model.Code)

    def test_annotations(self):
        code = self.resp.codelist.CL_FREQ.A
        anno_list = list(code.annotations)
        self.assertEqual(len(anno_list), 1)
        a = anno_list[0]
        self.assertIsInstance(a, model.Annotation)
        self.assertTrue(a.text.en.startswith('It is'))
        self.assertEqual(a.annotationtype, 'NOTE')
 def setUp(self):
     self.estat = Request()
     filepath = os.path.join(
         pkg_path, 'data/exr/ecb_exr_rg/structured/ecb_exr_rg_ts.xml')
     dsd_resp = self.estat.datastructure(
         fromfile=os.path.join(pkg_path, 'data/exr/ecb_exr_rg/ecb_exr_rg.xml'))
     dsd = dsd_resp.datastructure.DataStructure
     self.resp = self.estat.get(fromfile=filepath, dsd=dsd)
 def setUp(self):
     self.estat = Request('ESTAT')
     filepath = os.path.join(
         pkg_path, 'data/exr/ecb_exr_ng/structured/ecb_exr_ng_ts_gf.xml')
     dsd_resp = self.estat.datastructure(
         fromfile=os.path.join(pkg_path, 'data/exr/ecb_exr_ng/ecb_exr_ng_full.xml'))
     dsd = dsd_resp.datastructure.DataStructure
     self.resp = self.estat.data(fromfile=filepath, dsd=dsd)
class TestStructSpecSeriesDataSet2(unittest.TestCase):

    def setUp(self):
        self.estat = Request('ESTAT')
        filepath = os.path.join(
            pkg_path, 'data/exr/ecb_exr_ng/structured/ecb_exr_ng_ts.xml')
        dsd_resp = self.estat.datastructure(
            fromfile=os.path.join(pkg_path, 'data/exr/ecb_exr_ng/ecb_exr_ng_full.xml'))
        dsd = dsd_resp.datastructure.DataStructure
        self.resp = self.estat.data(fromfile=filepath, dsd=dsd)

    def test_header_attributes(self):
        self.assertEqual(self.resp.header.structured_by, 'STR1')
        self.assertEqual(self.resp.header.dim_at_obs, 'TIME_PERIOD')

    def test_dataset_cls(self):
        self.assertIsInstance(self.resp.data, model.DataSet)

    def test_structured_obs(self):
        data = self.resp.data
        # empty obs iterator
        self.assertEqual(len(list(data.obs())), 0)
        series_list = list(data.series)
        self.assertEqual(len(series_list), 4)
        s3 = series_list[3]
        self.assertIsInstance(s3, model.Series)
        self.assertIsInstance(s3.key, tuple)
        self.assertEqual(len(s3.key), 4)
        self.assertEqual(s3.key.CURRENCY, 'USD')
        self.assertEqual(s3.attrib.DECIMALS, '4')
        obs_list = list(s3.obs(reverse_obs=True))
        self.assertEqual(len(obs_list), 3)
        o0 = obs_list[2]
        self.assertEqual(len(o0), 3)
        self.assertEqual(o0.dim, '2010-08')
        self.assertEqual(o0.value, '1.2894')
        self.assertIsInstance(o0.attrib, tuple)
        self.assertEqual(o0.attrib.OBS_STATUS, 'A')

    def test_dataframe(self):
        data = self.resp.data
        df = self.resp.write(
            data, attributes='', asframe=True, reverse_obs=True)
        self.assertIsInstance(df, pandas.core.frame.DataFrame)
        self.assertEqual(df.shape, (3, 4))
Beispiel #9
0
    def test_write_source(self):
        """Test the write_source() method."""
        req = Request()
        for name in sample_data.keys():
            orig_fn = self._filepath(name)
            temp_fn = self._filepath(name + '-write-source')

            # Read the message
            resp = req.get(fromfile=orig_fn)

            # Write to a temporary JSON file
            resp.write_source(temp_fn)

            # Read the two files and compare JSON (ignores ordering)
            with open(orig_fn) as orig, open(temp_fn) as temp:
                assert json.load(orig) == json.load(temp)

            # Delete the temporary file
            os.remove(temp_fn)
class TestStructSpecFlatDataSet(unittest.TestCase):

    def setUp(self):
        self.estat = Request('ESTAT')
        filepath = os.path.join(
            pkg_path, 'data/exr/ecb_exr_ng/structured/ecb_exr_ng_flat.xml')
        dsd_resp = self.estat.datastructure(
            fromfile=os.path.join(pkg_path, 'data/exr/ecb_exr_ng/ecb_exr_ng_full.xml'))
        dsd = dsd_resp.datastructure.DataStructure
        self.resp = self.estat.get(fromfile=filepath, dsd=dsd)

    def test_msg_type(self):
        self.assertIsInstance(self.resp.msg, model.DataMessage)

    def test_header_attributes(self):
        self.assertEqual(self.resp.header.structured_by, 'STR1')
        self.assertEqual(self.resp.header.dim_at_obs, 'AllDimensions')

    def test_dataset_cls(self):
        self.assertIsInstance(self.resp.data, model.DataSet)
        self.assertEqual(self.resp.msg.data.dim_at_obs, 'AllDimensions')

    def test_generic_obs(self):
        data = self.resp.data
        # empty series list
        self.assertEqual(len(list(data.series)), 0)
        obs_list = list(data.obs())
        self.assertEqual(len(obs_list), 12)
        o0 = obs_list[0]
        self.assertEqual(len(o0), 3)
        self.assertIsInstance(o0.key, tuple)  # obs_key
        self.assertEqual(o0.key.FREQ, 'M')
        self.assertEqual(o0.key.CURRENCY, 'CHF')
        self.assertEqual(o0.value, '1.3413')
        self.assertIsInstance(o0.attrib, tuple)
        self.assertEqual(o0.attrib.OBS_STATUS, 'A')
        self.assertEqual(o0.attrib.DECIMALS, '4')

    def test_write2pandas(self):
        data_series = self.resp.write(attributes='',
                                      asframe=False, reverse_obs=False)
        self.assertIsInstance(data_series, pandas.Series)
Beispiel #11
0
class TestGenericSeriesData_SiblingGroup_TS(unittest.TestCase):

    def setUp(self):
        self.estat = Request()
        filepath = os.path.join(
            pkg_path, 'data/exr/ecb_exr_sg/generic/ecb_exr_sg_ts.xml')
        self.resp = self.estat.get(fromfile=filepath)

    def test_groups(self):
        data = self.resp.data
        self.assertEqual(len(list(data.groups)), 4)
        self.assertEqual(len(list(data.series)), 4)
        g2 = list(data.groups)[2]
        self.assertEqual(g2.key.CURRENCY, 'JPY')
        self.assertEqual(
            g2.attrib.TITLE, 'ECB reference exchange rate, Japanese yen/Euro')
        # Check group attributes of a series
        s = list(data.series)[0]
        g_attrib = s.group_attrib
        self.assertEqual(len(g_attrib), 1)
        self.assertIsInstance(g_attrib, tuple)
        self.assertEqual(len(g_attrib), 1)
Beispiel #12
0
class test_exr_constraints(unittest.TestCase):

    def setUp(self):
        self.ecb = Request('ecb')
        filepath = os.path.join(test_path, 'data/exr_flow.xml')
        self.resp = self.ecb.get(fromfile=filepath)

    def test_constrained_codes(self):
        m = self.resp.msg
        self.assertEqual(m._dim_ids[0], 'FREQ')
        self.assertEqual(len(m._dim_ids), 5)
        self.assertEqual(len(m._dim_ids), 5)
        self.assertEqual(len(m._dim_codes), 5)
        self.assertEqual(len(m._attr_ids), 9)
        self.assertEqual(len(m._attr_codes), 9)
        self.assertEqual(m._attr_ids[-1], 'UNIT_MULT')
        self.assertIn('5', m._attr_codes.UNIT_MULT)
        self.assertIn('W', m._dim_codes.FREQ)
        self.assertIn('W', m._dim_codes.FREQ)
        self.assertEqual(len(m._constrained_codes), 14)
        self.assertNotIn('W', m._constrained_codes.FREQ)
        key = {'FREQ': ['W']}
        self.assertTrue(m.in_codes(key))
        self.assertFalse(m.in_constraints(key, raise_error=False))
        self.assertRaises(ValueError, m.in_constraints, key)
        self.assertTrue(m.in_constraints({'CURRENCY': ['CHF']}))
        # test with invalid key
        self.assertRaises(TypeError, m._in_constraints, {'FREQ': 'A'})
        # structure writer with constraints
        out = self.resp.write()
        cl = out.codelist
        self.assertEqual(cl.shape, (3555, 2))
        # unconstrained codelists
        out = self.resp.write(constraint=False)
        cl = out.codelist
        self.assertEqual(cl.shape, (4177, 2))
Beispiel #13
0
 def setUp(self):
     self.ecb = Request('ecb')
     filepath = os.path.join(test_path, 'data/exr_flow.xml')
     self.resp = self.ecb.get(fromfile=filepath)
Beispiel #14
0
class InseeTestCase(unittest.TestCase):

    # nosetests -s -v pandasdmx.tests.test_insee:InseeTestCase

    def setUp(self):
        unittest.TestCase.setUp(self)
        self.sdmx = Request('INSEE')

    def test_load_dataset(self):

        dataset_code = 'IPI-2010-A21'

        '''load all dataflows'''
        dataflows_response = self.sdmx.get(
            resource_type='dataflow', agency='FR1', fromfile=DATAFLOW_FP)
        dataflows = dataflows_response.msg.dataflow

        self.assertEqual(len(dataflows.keys()), 663)
        self.assertTrue(dataset_code in dataflows)

        '''load datastructure for current dataset_code'''
        fp_datastructure = DATASETS[dataset_code]['datastructure-fp']
        datastructure_response = self.sdmx.get(
            resource_type='datastructure', agency='FR1', fromfile=fp_datastructure)
        self.assertTrue(
            dataset_code in datastructure_response.msg.datastructure)
        dsd = datastructure_response.msg.datastructure[dataset_code]

        '''Verify dimensions list'''
        dimensions = OrderedDict([dim.id, dim] for dim in dsd.dimensions.aslist(
        ) if dim.id not in ['TIME', 'TIME_PERIOD'])
        dim_keys = list(dimensions.keys())
        self.assertEqual(dim_keys, ['FREQ', 'PRODUIT', 'NATURE'])

        '''load datas for the current dataset'''
        fp_data = DATASETS[dataset_code]['data-fp']
        data = self.sdmx.get(
            resource_type='data', agency='FR1', fromfile=fp_data)

        '''Verify series count and values'''
        series = list(data.msg.data.series)
        series_count = len(series)
        self.assertEqual(series_count, DATASETS[dataset_code]['series_count'])

        first_series = series[0]
        observations = list(first_series.obs())

        first_obs = observations[0]
        last_obs = observations[-1]

        self.assertEqual(first_obs.dim, '2015-10')
        self.assertEqual(first_obs.value, '105.61')

        self.assertEqual(last_obs.dim, '1990-01')
        self.assertEqual(last_obs.value, '139.22')

    def test_fixe_key_names(self):
        """Verify key or attribute contains '-' in name 
        """

        dataset_code = 'CNA-2010-CONSO-SI-A17'

        fp_datastructure = DATASETS[dataset_code]['datastructure-fp']
        datastructure_response = self.sdmx.get(
            resource_type='datastructure', agency='FR1', fromfile=fp_datastructure)
        self.assertTrue(
            dataset_code in datastructure_response.msg.datastructure)
        dsd = datastructure_response.msg.datastructure[dataset_code]

        dimensions = OrderedDict([dim.id, dim] for dim in dsd.dimensions.aslist(
        ) if dim.id not in ['TIME', 'TIME_PERIOD'])
        dim_keys = list(dimensions.keys())
        self.assertEqual(
            dim_keys, ['SECT-INST', 'OPERATION', 'PRODUIT', 'PRIX'])

        fp_data = DATASETS[dataset_code]['data-fp']
        data = self.sdmx.get(
            resource_type='data', agency='FR1', fromfile=fp_data)
        series = list(data.msg.data.series)

        series = series[0]

        self.assertEqual(list(series.key._asdict().keys()),
                         ['SECT-INST', 'OPERATION', 'PRODUIT', 'PRIX'])

        self.assertEqual(list(series.attrib._asdict().keys()),
                         ['FREQ', 'IDBANK', 'TITLE', 'LAST_UPDATE', 'UNIT_MEASURE', 'UNIT_MULT', 'REF_AREA', 'DECIMALS', 'BASE_PER', 'TIME_PER_COLLECT'])

    def test_freq_in_series_attribute(self):
        # Test that we don't have regression on Issues #39 and #41
        # INSEE time series provide the FREQ value as attribute on the series instead of a dimension. This caused
        # a runtime error when writing as pandas dataframe.
        data_response = self.sdmx.data(
            fromfile=SERIES['UNEMPLOYMENT_CAT_A_B_C']['data-fp'])
        data_response.write()
Beispiel #15
0
class InseeTestCase(unittest.TestCase):

    # nosetests -s -v pandasdmx.tests.test_insee:InseeTestCase

    def setUp(self):
        unittest.TestCase.setUp(self)
        self.sdmx = Request('INSEE')

    def test_load_dataset(self):

        dataset_code = 'IPI-2010-A21'

        '''load all dataflows'''
        dataflows_response = self.sdmx.get(
            resource_type='dataflow', agency='FR1', fromfile=DATAFLOW_FP)
        dataflows = dataflows_response.msg.dataflow

        self.assertEqual(len(dataflows.keys()), 663)
        self.assertTrue(dataset_code in dataflows)

        '''load datastructure for current dataset_code'''
        fp_datastructure = DATASETS[dataset_code]['datastructure-fp']
        datastructure_response = self.sdmx.get(
            resource_type='datastructure', agency='FR1', fromfile=fp_datastructure)
        self.assertTrue(
            dataset_code in datastructure_response.msg.datastructure)
        dsd = datastructure_response.msg.datastructure[dataset_code]

        '''Verify dimensions list'''
        dimensions = OrderedDict([dim.id, dim] for dim in dsd.dimensions.aslist(
        ) if dim.id not in ['TIME', 'TIME_PERIOD'])
        dim_keys = list(dimensions.keys())
        self.assertEqual(dim_keys, ['FREQ', 'PRODUIT', 'NATURE'])

        '''load datas for the current dataset'''
        fp_data = DATASETS[dataset_code]['data-fp']
        data = self.sdmx.get(
            resource_type='data', agency='FR1', fromfile=fp_data)

        '''Verify series count and values'''
        series = list(data.msg.data.series)
        series_count = len(series)
        self.assertEqual(series_count, DATASETS[dataset_code]['series_count'])

        first_series = series[0]
        observations = list(first_series.obs())

        first_obs = observations[0]
        last_obs = observations[-1]

        self.assertEqual(first_obs.dim, '2015-10')
        self.assertEqual(first_obs.value, '105.61')

        self.assertEqual(last_obs.dim, '1990-01')
        self.assertEqual(last_obs.value, '139.22')

    def test_fixe_key_names(self):
        """Verify key or attribute contains '-' in name 
        """

        dataset_code = 'CNA-2010-CONSO-SI-A17'

        fp_datastructure = DATASETS[dataset_code]['datastructure-fp']
        datastructure_response = self.sdmx.get(
            resource_type='datastructure', agency='FR1', fromfile=fp_datastructure)
        self.assertTrue(
            dataset_code in datastructure_response.msg.datastructure)
        dsd = datastructure_response.msg.datastructure[dataset_code]

        dimensions = OrderedDict([dim.id, dim] for dim in dsd.dimensions.aslist(
        ) if dim.id not in ['TIME', 'TIME_PERIOD'])
        dim_keys = list(dimensions.keys())
        self.assertEqual(
            dim_keys, ['SECT-INST', 'OPERATION', 'PRODUIT', 'PRIX'])

        fp_data = DATASETS[dataset_code]['data-fp']
        data = self.sdmx.get(
            resource_type='data', agency='FR1', fromfile=fp_data)
        series = list(data.msg.data.series)

        series = series[0]

        self.assertEqual(list(series.key._asdict().keys()),
                         ['SECT-INST', 'OPERATION', 'PRODUIT', 'PRIX'])

        self.assertEqual(list(series.attrib._asdict().keys()),
                         ['FREQ', 'IDBANK', 'TITLE', 'LAST_UPDATE', 'UNIT_MEASURE', 'UNIT_MULT', 'REF_AREA', 'DECIMALS', 'BASE_PER', 'TIME_PER_COLLECT'])
Beispiel #16
0
 def setUp(self):
     unittest.TestCase.setUp(self)
     self.sdmx = Request('INSEE')
Beispiel #17
0
 def req(self):
     return Request("INSEE")
Beispiel #18
0
 def setUp(self):
     self.estat = Request('ESTAT')
     filepath = os.path.join(test_path, 'data/common/common.xml')
     self.resp = self.estat.get(fromfile=filepath)
Beispiel #19
0
 def setUp(self):
     self.estat = Request('ESTAT')
     filepath = os.path.join(
         pkg_path, 'data/exr/ecb_exr_ng/generic/ecb_exr_ng_ts_gf.xml')
     self.resp = self.estat.data(fromfile=filepath)
Beispiel #20
0
"""
Created on Fri Oct 19 10:02:37 2018

@author: dpsugasa
"""

from pandasdmx import Request
import plotly
import plotly.plotly as py  #for plotting
import plotly.graph_objs as go
import plotly.dashboard_objs as dashboard
import plotly.tools as tls
import plotly.figure_factory as ff
import credentials  #plotly API details

estat = Request('ESTAT')
flow_response = estat.dataflow('une_rt_a')
structure_response = flow_response.dataflow.une_rt_a.structure(
    request=True, target_only=False)
structure_response.write().codelist.loc['GEO'].head()

resp = estat.data('une_rt_a',
                  key={'GEO': 'EL+ES+IT'},
                  params={'startPeriod': '1950'})

data = resp.write(s for s in resp.data.series if s.key.AGE == 'TOTAL')
z = data.loc[:, ('PC_ACT', 'TOTAL', 'T')]
z['Dates'] = z.index.to_series().astype(str)
z['IT_scr'] = (z['IT'] - z['IT'].mean()) / z['IT'].std()
print(z)
class EurostatFetcher(SDMXFetcher):
    
    _sdmx_loader = Request('ESTAT')
    _nsmap = dict(nt='urn:eu.europa.ec.eurostat.navtree')
    
    _GEO = ['AT', 'BE', 'BG', 'CY', 'CZ', 'DE', 'DK', 'EE', 'EL', 'ES',
            'EU28', 'EU27', 'EA19', 'FI', 'FR', 'HR', 'HU', 'IE', 'IT', 'LT', 'LU', 'LV', 'ME', 
            'MK', 'MT', 'NL', 'PL', 'PT', 'RO', 'RS', 'SE', 'SI', 'SK', 'TR', 'UK']

    _S_ADJ = ['NSA','SA']
    
    _FREQ = ['A','H','Q','M','S','W']
    
    
    def __init__(self, name=NAME, parent_location=GIT_PARENT_LOCATION):
        
        self._name = name
        self._base_url = URL
        
        self._parent_location = parent_location
        self._repo_location = parent_location + "/" + name
        self._temp_location = TEMP_PARENT_LOCATION + "/" + name
        self._xml_toc_file = XML_TOC_FILE 
        self._xml_toc_file_location = self._repo_location + "/" + self._xml_toc_file
        Path(self._temp_location).mkdir(exist_ok=True)
        Path(self._repo_location).mkdir(exist_ok=True)
        try:
            self.load_existing_repo()
        except ValueError:
            self.initialize_repo()         

    def _download_file(self, params, stream):
        rsp = requests.get(self._base_url,params,stream=stream)
        if rsp.ok:
            file_path = self._repo_location + '/' + params['file'].split('/')[-1]
            with open(file_path, 'wb') as f:
                for chunk in rsp.iter_content(chunk_size=1024):
                    if chunk:
                        f.write(chunk)
                f.close()
                self.repo.stage([file_path])
        return rsp

    def _dictionary_file(self, params):
        rsp = self._download_file(params, False)
        if rsp.ok:
            tsv_file_io = StringIO(rsp.text)
            reader = csv.reader(tsv_file_io,delimiter='\t')
            return dict([(r[0],r[1]) for r in reader])

    def _dimensions_list(self):
        log.info("Downloading and updating the dimensions list...")
        params = {"sort":"1","file":"dic/en/dimlst.dic"}
        self._dimlst = self._dictionary_file(params)
        
    def _load_existing_toc(self):
        if Path(self._xml_toc_file_location).is_file():
            log.info("Loading up existing Table of Contents XML file...")
            parser = etree.XMLParser(remove_blank_text=True)
            self.xml_toc_old = etree.parse(self._xml_toc_file_location, parser=parser)
        else:
            log.info("No existing Table of Contents XML file. Setting to None.")
            self.xml_toc_old = None
    
    def _update_toc(self):
        self._load_existing_toc()
        
        log.info("Starting file Table of Contents XML file download...")
        response = requests.get(XML_TOC_URL)
        log.info(">> download finished.")
        parser = etree.XMLParser(remove_blank_text=True)
        xml_toc = etree.fromstring(response.content, parser=parser)
        self.xml_toc = etree.ElementTree(xml_toc)
        with open(self._xml_toc_file_location, 'wb') as xml_file:
            self.xml_toc.write(xml_file, encoding='utf-8', pretty_print=True, xml_declaration=True)
            xml_file.close()
        self.repo.stage([self._xml_toc_file])
            
    def _datasets_to_update(self, forceUpdateAll=False):
        
        if forceUpdateAll:
            for ds in self.xml_toc.iterfind('.//nt:leaf[@type="dataset"]', namespaces=self._nsmap):
                yield ds
        else:
            if self.xml_toc_old is not None:
                last_update_by_ds = {}
                for ds in self.xml_toc_old.iterfind('.//nt:leaf[@type="dataset"]', namespaces=self._nsmap):
                    ds_code = ds.findtext("nt:code", namespaces=self._nsmap)
                    ds_last_update = ds.findtext("nt:lastUpdate", namespaces=self._nsmap)
                    last_update_by_ds[ds_code] = ds_last_update
                
            for ds in self.xml_toc.iterfind('.//nt:leaf[@type="dataset"]', namespaces=self._nsmap):
                ds_code = ds.findtext("nt:code", namespaces=self._nsmap)
                if self.xml_toc_old is None:
                    yield ds
                else:
                    ds_earlier_update = last_update_by_ds.get(ds_code)
                    if ds_earlier_update is None:
                        yield ds
                    else:
                        ds_last_update = ds.findtext("nt:lastUpdate", namespaces=self._nsmap)                    
                        if ds_last_update != ds_earlier_update:
                            yield ds
    
    
    def update_content(self, forceUpdateAll=False):
        ds_iter = self._datasets_to_update(forceUpdateAll)
        
        for ds in ds_iter:
            ds_code = ds.findtext("nt:code", namespaces=self._nsmap)
            #ds_last_update = ds.findtext("nt:lastUpdate", namespaces=self._nsmap)
            #ds_metadata = ds.findtext('nt:metadata[@format="sdmx"]', namespaces=self._nsmap)
            #ds_url_tsv = ds.findtext('nt:downloadLink[@format="tsv"]', namespaces=self._nsmap)
            #ds_url_sdmx = ds.findtext('nt:downloadLink[@format="sdmx"]', namespaces=self._nsmap)
            
            
            log.info('Dowloading dataset {}'.format(ds_code))
            #SHORT APPROACH TO DOWNLOAD DATA AS TSV ONLY HAS SERIES
            #log.info('...tsv file...')
            #response = requests.get(ds_url_tsv)

            #LONG APPROACH TO DOWNLOAD DATA AS SDMX-->VERY INEFFICIENT IT SEEMS
            #BETTER TO DOWNLOAD IN ZIP, THEN UNZIP AND THEN READ FROM FILES?
            log.info('...sdmx information...'.format(ds_code))
            dsd_code = 'DSD_' + ds_code
            cs_code = 'CS_' + dsd_code
            dsd_response = self._sdmx_loader.datastructure(resource_id=dsd_code)
            dsd = dsd_response.msg.datastructure[dsd_code]
            dsd_df = dsd_response.write()
            cs = dsd_response.msg.conceptscheme[cs_code]
            cs_id_list = [c.id for c in cs.aslist()]
            cl = dsd_response.msg.codelist
            data_response = self._sdmx_loader.data(resource_id=ds_code, dsd=dsd)
            data = data_response.data
            series_l = list(data.series)
            series_df = data_response.write(series_l)
            #this is because the timeseries are in reverse order otherwise
            series_df.sort_index(inplace=True) 
            idx_multi = series_df.columns
            #drop levels that are unique in the multi-index
            lvl_to_drop = [i for i in range(len(idx_multi.levels)) if len(idx_multi.levels[i])==1]
            idx_multi = idx_multi.droplevel(lvl_to_drop)
            if 'INDIC' in idx_multi.names:
                idx_multi.swaplevel('INDIC',0)  #place INDIC first
            if 'INDICATOR' in idx_multi.names:
                idx_multi.swaplevel('INDICATOR',0)  #place INDICATOR first
            series_df.columns = idx_multi
            idx_multi_joined = ['_'.join(col).strip() for col in idx_multi.values]
            series_df.columns = idx_multi_joined
            series_df.to_csv(self._repo_location + ds_code +'.csv', na_rep="na")
Beispiel #22
0
 def setUp(self):
     unittest.TestCase.setUp(self)
     self.sdmx = Request('INSEE')
Beispiel #23
0
 def setUp(self):
     self.estat = Request('ESTAT')
     filepath = os.path.join(
         pkg_path, 'data/exr/ecb_exr_ng/generic/ecb_exr_ng_flat.xml')
     self.resp = self.estat.get(fromfile=filepath)
Beispiel #24
0
def test_doc_usage_structure():
    """Code examples in walkthrough.rst."""
    ecb = Request("ECB")

    ecb_via_proxy = Request("ECB", proxies={"http": "http://1.2.3.4:5678"})
    assert all(
        getattr(ecb_via_proxy.session, k) == v
        for k, v in (
            ("proxies", {"http": "http://1.2.3.4:5678"}),
            ("stream", False),
            ("timeout", 30.1),
        )
    )

    msg1 = ecb.categoryscheme()

    assert msg1.response.url == (
        "https://sdw-wsrest.ecb.europa.eu/service/categoryscheme/ECB/latest"
        "?references=parentsandsiblings"
    )

    # Check specific headers
    headers = msg1.response.headers
    assert headers["Content-Type"] == (
        "application/vnd.sdmx.structure+xml; " "version=2.1"
    )
    assert all(k in headers for k in ["Connection", "Date", "Server"])

    # Removed: in pandaSDMX 0.x this was a convenience method that (for this
    # structure message) returned two DataStructureDefinitions. Contra the
    # spec, that assumes:
    # - There is 1 Categorization using the CategoryScheme; there could be
    #   many.
    # - The Categorization maps DataStructureDefinitions to Categories, when
    #   there could be many.
    # list(cat_response.category_scheme['MOBILE_NAVI']['07'])

    dfs = pandasdmx.to_pandas(msg1.dataflow).head()
    assert len(dfs) == 2

    flows = ecb.dataflow()  # noqa: F841
    dsd_id = flows.dataflow.EXR.structure.id
    assert dsd_id == "ECB_EXR1"

    refs = dict(references="all")
    msg2 = ecb.datastructure(resource_id=dsd_id, params=refs)
    dsd = msg2.structure[dsd_id]

    assert pandasdmx.to_pandas(dsd.dimensions) == [
        "FREQ",
        "CURRENCY",
        "CURRENCY_DENOM",
        "EXR_TYPE",
        "EXR_SUFFIX",
        "TIME_PERIOD",
    ]

    cl = pandasdmx.to_pandas(msg2.codelist["CL_CURRENCY"]).sort_index()
    expected = pd.Series(
        {
            "ADF": "Andorran Franc (1-1 peg to the French franc)",
            "ADP": "Andorran Peseta (1-1 peg to the Spanish peseta)",
            "AED": "United Arab Emirates dirham",
            "AFA": "Afghanistan afghani (old)",
            "AFN": "Afghanistan, Afghanis",
        },
        name="Currency code list",
    ).rename_axis("CL_CURRENCY")
    assert_pd_equal(cl.head(), expected)
class TestStructSpecSeriesDataSet(unittest.TestCase):

    def setUp(self):
        self.estat = Request('ESTAT')
        filepath = os.path.join(
            pkg_path, 'data/exr/ecb_exr_ng/structured/ecb_exr_ng_ts_gf.xml')
        dsd_resp = self.estat.datastructure(
            fromfile=os.path.join(pkg_path, 'data/exr/ecb_exr_ng/ecb_exr_ng_full.xml'))
        dsd = dsd_resp.datastructure.DataStructure
        self.resp = self.estat.data(fromfile=filepath, dsd=dsd)

    def test_header_attributes(self):
        self.assertEqual(self.resp.header.structured_by, 'STR1')
        self.assertEqual(self.resp.header.dim_at_obs, 'TIME_PERIOD')

    def test_dataset_cls(self):
        self.assertIsInstance(self.resp.msg.data, model.DataSet)

    def test_obs(self):
        data = self.resp.data
        # empty obs iterator
        self.assertEqual(len(list(data.obs())), 0)
        series_list = list(data.series)
        self.assertEqual(len(series_list), 4)
        s3 = series_list[3]
        self.assertIsInstance(s3, model.Series)
        self.assertIsInstance(s3.key, tuple)
        self.assertEqual(len(s3.key), 4)
        self.assertEqual(s3.key.CURRENCY, 'USD')
        self.assertEqual(s3.attrib.DECIMALS, '4')
        obs_list = list(s3.obs(reverse_obs=True))
        self.assertEqual(len(obs_list), 3)
        o0 = obs_list[2]
        self.assertEqual(len(o0), 3)
        self.assertEqual(o0.dim, '2010-08')
        self.assertEqual(o0.value, '1.2894')
        self.assertIsInstance(o0.attrib, tuple)
        self.assertEqual(o0.attrib.OBS_STATUS, 'A')

    def test_pandas(self):
        resp = self.resp
        data = resp.data
        pd_series = [s for s in resp.write(
            data, attributes='', reverse_obs=True, asframe=False)]
        self.assertEqual(len(pd_series), 4)
        s3 = pd_series[3]
        self.assertIsInstance(s3, pandas.core.series.Series)
        self.assertEqual(s3[2], 1.2894)
        self.assertIsInstance(s3.name, tuple)
        self.assertEqual(len(s3.name), 4)
        # now with attributes
        pd_series = [s for s in resp.write(
            data, attributes='osgd', reverse_obs=True, asframe=False)]
        self.assertEqual(len(pd_series), 4)
        self.assertIsInstance(pd_series[0], tuple)  # contains 2 series
        self.assertEqual(len(pd_series[0]), 2)
        s3, a3 = pd_series[3]
        self.assertIsInstance(s3, pandas.core.series.Series)
        self.assertIsInstance(a3, pandas.core.series.Series)
        self.assertEqual(s3[2], 1.2894)
        self.assertIsInstance(s3.name, tuple)
        self.assertEqual(len(s3.name), 4)
        self.assertEqual(len(a3), 3)
        # access an attribute of the first value
        self.assertEqual(a3[0].OBS_STATUS, 'A')

    def test_write2pandas(self):
        df = self.resp.write(attributes='',
                             reverse_obs=False)
        self.assertIsInstance(df, pandas.DataFrame)
        assert df.shape == (3, 4)
        # with metadata
        df, mdf = self.resp.write(attributes='osgd',
                                  reverse_obs=False)
        assert mdf.shape == (3, 4)
        assert mdf.iloc[1, 1].OBS_STATUS == 'A'
Beispiel #26
0
def istat():
    return Request("ISTAT")
#!/usr/bin/env python
# coding: utf-8
import math, re, sys, calendar, os, copy, time
import pandas as pd
import numpy as np
from datetime import datetime, date
# In[80]:

import pandasdmx
from pandasdmx import Request

tStart = time.time()

oecd = Request('OECD')
print('Time: ', int(time.time() - tStart),'s'+'\n')

data_response = oecd.data(resource_id='MEI_BTS_COS', key='all?startTime=1995')
print('Time: ', int(time.time() - tStart),'s'+'\n')

df = data_response.to_pandas()
print('Time: ', int(time.time() - tStart),'s'+'\n')

#df.to_csv('c:\\Temp\\test_lei.txt', sep='\t')
path = 'C:/Users/lawre/Desktop'

# In[82]:


#df = data_response.to_pandas()
#type(data_response.data)
df.to_csv(path+'test_lei.csv')
Beispiel #28
0
 def setUp(self):
     self.estat = Request()
     filepath = os.path.join(
         pkg_path, 'data/exr/ecb_exr_rg/generic/ecb_exr_rg_ts.xml')
     self.resp = self.estat.get(fromfile=filepath)
Beispiel #29
0
def test_doc_usage_structure():
    """Code examples in walkthrough.rst."""
    ecb = Request('ECB')

    ecb_via_proxy = Request('ECB', proxies={'http': 'http://1.2.3.4:5678'})

    assert all(getattr(ecb_via_proxy.session, k) == v for k, v in (
        ('proxies', {'http': 'http://1.2.3.4:5678'}),
        ('stream', True),
        ('timeout', 30.1),
        ))

    msg1 = ecb.categoryscheme()

    assert msg1.response.url == (
        'http://sdw-wsrest.ecb.int/service/categoryscheme/ECB/latest'
        '?references=parentsandsiblings')

    # Check specific headers
    headers = msg1.response.headers
    assert headers['Content-Type'] == ('application/vnd.sdmx.structure+xml; '
                                       'version=2.1')
    assert all(k in headers for k in ['Connection', 'Date', 'Server'])

    # Removed: in pandaSDMX 0.x this was a convenience method that (for this
    # structure message) returned two DataStructureDefinitions. Contra the
    # spec, that assumes:
    # - There is 1 Categorization using the CategoryScheme; there could be many.
    # - The Categorization maps DataStructureDefintions to Categories, when
    #   there could be many.
    # list(cat_response.category_scheme['MOBILE_NAVI']['07'])

    dfs = sdmx.to_pandas(msg1.dataflow).head()
    expected = pd.Series({
        'AME': 'AMECO',
        'BKN': 'Banknotes statistics',
        'BLS': 'Bank Lending Survey Statistics',
        'BOP': ('Euro Area Balance of Payments and International Investment '
                'Position Statistics'),
        'BSI': 'Balance Sheet Items',
        })
    assert_pd_equal(dfs, expected)

    flows = ecb.dataflow()
    dsd_id = msg1.dataflow.EXR.structure.id
    assert dsd_id == 'ECB_EXR1'

    refs = dict(references='all')
    msg2 = ecb.datastructure(resource_id=dsd_id, params=refs)
    dsd = msg2.structure[dsd_id]

    assert sdmx.to_pandas(dsd.dimensions) == ['FREQ', 'CURRENCY',
        'CURRENCY_DENOM', 'EXR_TYPE', 'EXR_SUFFIX', 'TIME_PERIOD']

    cl = sdmx.to_pandas(msg2.codelist['CL_CURRENCY']).sort_index()
    expected = pd.Series({
        'ADF': 'Andorran Franc (1-1 peg to the French franc)',
        'ADP': 'Andorran Peseta (1-1 peg to the Spanish peseta)',
        'AED': 'United Arab Emirates dirham',
        'AFA': 'Afghanistan afghani (old)',
        'AFN': 'Afghanistan, Afghanis',
        }, name='Currency code list') \
        .rename_axis('CL_CURRENCY')
    assert_pd_equal(cl.head(), expected)
Beispiel #30
0
 def setUp(self):
     self.estat = Request()
     filepath = os.path.join(
         pkg_path, 'data/exr/ecb_exr_sg/generic/ecb_exr_sg_ts.xml')
     self.resp = self.estat.get(fromfile=filepath)
Beispiel #31
0
from pandasdmx import Request

# Connecting to the Eurostat web service
estat = Request('ESTAT')

# Downloading the dataflow definitions
flows = estat.dataflow()

# Getting information about the dataflow
flows.url
flows.http_headers

# Exporting the dataflow definitions to a pandas DataFrame
dflows = flows.write().dataflow
# Listing tables from the high tech database and the description of a single table
ht_tabs = dflows[dflows.index.str.startswith('htec') == True]
kia_emp = dflows.loc['htec_kia_emp2'][0]

# Dataflow definition
df_def = flows.dataflow.htec_kia_emp2

# Database's datastructure id
dsd_id = df_def.structure.id

# Creating a support dict
refs = dict(references = 'all')

# Calling the table
dsd_response = estat.get(url = 'http://ec.europa.eu/eurostat/SDMX/diss-web/rest/datastructure/ESTAT/' + dsd_id)

# Getting informatou about the datastructure