class TestGenericSeriesData_RateGroup_TS(unittest.TestCase): def setUp(self): self.estat = Request() filepath = os.path.join( pkg_path, 'data/exr/ecb_exr_rg/generic/ecb_exr_rg_ts.xml') self.resp = self.estat.get(fromfile=filepath) def test_groups(self): data = self.resp.data self.assertEqual(len(list(data.groups)), 5) self.assertEqual(len(list(data.series)), 4) g2 = list(data.groups)[2] self.assertEqual(g2.key.CURRENCY, 'GBP') self.assertEqual( g2.attrib.TITLE, 'ECB reference exchange rate, U.K. Pound sterling /Euro') # Check group attributes of a series s = list(data.series)[0] g_attrib = s.group_attrib self.assertEqual(len(g_attrib), 5) self.assertIsInstance(g_attrib, tuple) self.assertEqual(len(g_attrib), 5) def test_footer(self): filepath = os.path.join( pkg_path, 'data/estat/footer.xml') resp = self.estat.get( fromfile=filepath, get_footer_url=None) f = resp.footer assert f.code == 413 assert f.severity == 'Infomation' assert f.text[1].startswith('http')
class TestStructSpecSeriesData_RateGroup_TS(unittest.TestCase): def setUp(self): self.estat = Request() filepath = os.path.join( pkg_path, 'data/exr/ecb_exr_rg/structured/ecb_exr_rg_ts.xml') dsd_resp = self.estat.datastructure( fromfile=os.path.join(pkg_path, 'data/exr/ecb_exr_rg/ecb_exr_rg.xml')) dsd = dsd_resp.datastructure.DataStructure self.resp = self.estat.get(fromfile=filepath, dsd=dsd) def test_groups(self): data = self.resp.data self.assertEqual(len(list(data.groups)), 5) self.assertEqual(len(list(data.series)), 4) g2 = list(data.groups)[2] self.assertEqual(g2.key.CURRENCY, 'GBP') self.assertEqual( g2.attrib.TITLE, 'ECB reference exchange rate, U.K. Pound sterling /Euro') # Check group attributes of a series s = list(data.series)[0] g_attrib = s.group_attrib self.assertEqual(len(g_attrib), 5) self.assertIsInstance(g_attrib, tuple) self.assertEqual(len(g_attrib), 5)
def test_samples_request(self): """Test the samples from the SDMXJSON spec.""" req = Request() for name, data in sample_data.items(): resp = req.get(fromfile=self._filepath(name)) df = resp.write() assert df.equals(data), \ '\n'.join(map(str, [name, df.index, data.index, getattr(df, 'columns', ''), getattr(data, 'columns', '')]))
class Test_ESTAT_dsd_apro_mk_cola(unittest.TestCase): def setUp(self): self.estat = Request('ESTAT') filepath = os.path.join(test_path, 'data/estat/apro_dsd.xml') self.resp = self.estat.get(fromfile=filepath) def test_codelists_keys(self): self.assertEqual(len(self.resp.codelist), 6) self.assertIsInstance(self.resp.codelist.CL_GEO, model.Codelist) def test_codelist_name(self): self.assertEqual( self.resp.msg.codelist.CL_GEO.UK.name.en, 'United Kingdom') assert self.resp.codelist.CL_FREQ.name.en == 'FREQ' def test_code_cls(self): self.assertIsInstance( self.resp.codelist.CL_FREQ.D, model.Code) def test_writer(self): df = self.resp.write(rows='codelist') self.assertEqual(df.shape, (79, 2)) def tearDown(self): pass
class test_dsd_common(unittest.TestCase): def setUp(self): self.estat = Request('ESTAT') filepath = os.path.join(test_path, 'data/common/common.xml') self.resp = self.estat.get(fromfile=filepath) def test_codelists_keys(self): self.assertEqual(len(self.resp.msg.codelist), 5) self.assertIsInstance(self.resp.msg.codelist.CL_FREQ, model.Codelist) def test_codelist_name(self): self.assertEqual(self.resp.msg.codelist.CL_FREQ.D.name.en, 'Daily') def test_code_cls(self): self.assertIsInstance(self.resp.msg.codelist.CL_FREQ.D, model.Code) def test_annotations(self): code = self.resp.codelist.CL_FREQ.A anno_list = list(code.annotations) self.assertEqual(len(anno_list), 1) a = anno_list[0] self.assertIsInstance(a, model.Annotation) self.assertTrue(a.text.en.startswith('It is')) self.assertEqual(a.annotationtype, 'NOTE')
def setUp(self): self.estat = Request() filepath = os.path.join( pkg_path, 'data/exr/ecb_exr_rg/structured/ecb_exr_rg_ts.xml') dsd_resp = self.estat.datastructure( fromfile=os.path.join(pkg_path, 'data/exr/ecb_exr_rg/ecb_exr_rg.xml')) dsd = dsd_resp.datastructure.DataStructure self.resp = self.estat.get(fromfile=filepath, dsd=dsd)
def setUp(self): self.estat = Request('ESTAT') filepath = os.path.join( pkg_path, 'data/exr/ecb_exr_ng/structured/ecb_exr_ng_ts_gf.xml') dsd_resp = self.estat.datastructure( fromfile=os.path.join(pkg_path, 'data/exr/ecb_exr_ng/ecb_exr_ng_full.xml')) dsd = dsd_resp.datastructure.DataStructure self.resp = self.estat.data(fromfile=filepath, dsd=dsd)
class TestStructSpecSeriesDataSet2(unittest.TestCase): def setUp(self): self.estat = Request('ESTAT') filepath = os.path.join( pkg_path, 'data/exr/ecb_exr_ng/structured/ecb_exr_ng_ts.xml') dsd_resp = self.estat.datastructure( fromfile=os.path.join(pkg_path, 'data/exr/ecb_exr_ng/ecb_exr_ng_full.xml')) dsd = dsd_resp.datastructure.DataStructure self.resp = self.estat.data(fromfile=filepath, dsd=dsd) def test_header_attributes(self): self.assertEqual(self.resp.header.structured_by, 'STR1') self.assertEqual(self.resp.header.dim_at_obs, 'TIME_PERIOD') def test_dataset_cls(self): self.assertIsInstance(self.resp.data, model.DataSet) def test_structured_obs(self): data = self.resp.data # empty obs iterator self.assertEqual(len(list(data.obs())), 0) series_list = list(data.series) self.assertEqual(len(series_list), 4) s3 = series_list[3] self.assertIsInstance(s3, model.Series) self.assertIsInstance(s3.key, tuple) self.assertEqual(len(s3.key), 4) self.assertEqual(s3.key.CURRENCY, 'USD') self.assertEqual(s3.attrib.DECIMALS, '4') obs_list = list(s3.obs(reverse_obs=True)) self.assertEqual(len(obs_list), 3) o0 = obs_list[2] self.assertEqual(len(o0), 3) self.assertEqual(o0.dim, '2010-08') self.assertEqual(o0.value, '1.2894') self.assertIsInstance(o0.attrib, tuple) self.assertEqual(o0.attrib.OBS_STATUS, 'A') def test_dataframe(self): data = self.resp.data df = self.resp.write( data, attributes='', asframe=True, reverse_obs=True) self.assertIsInstance(df, pandas.core.frame.DataFrame) self.assertEqual(df.shape, (3, 4))
def test_write_source(self): """Test the write_source() method.""" req = Request() for name in sample_data.keys(): orig_fn = self._filepath(name) temp_fn = self._filepath(name + '-write-source') # Read the message resp = req.get(fromfile=orig_fn) # Write to a temporary JSON file resp.write_source(temp_fn) # Read the two files and compare JSON (ignores ordering) with open(orig_fn) as orig, open(temp_fn) as temp: assert json.load(orig) == json.load(temp) # Delete the temporary file os.remove(temp_fn)
class TestStructSpecFlatDataSet(unittest.TestCase): def setUp(self): self.estat = Request('ESTAT') filepath = os.path.join( pkg_path, 'data/exr/ecb_exr_ng/structured/ecb_exr_ng_flat.xml') dsd_resp = self.estat.datastructure( fromfile=os.path.join(pkg_path, 'data/exr/ecb_exr_ng/ecb_exr_ng_full.xml')) dsd = dsd_resp.datastructure.DataStructure self.resp = self.estat.get(fromfile=filepath, dsd=dsd) def test_msg_type(self): self.assertIsInstance(self.resp.msg, model.DataMessage) def test_header_attributes(self): self.assertEqual(self.resp.header.structured_by, 'STR1') self.assertEqual(self.resp.header.dim_at_obs, 'AllDimensions') def test_dataset_cls(self): self.assertIsInstance(self.resp.data, model.DataSet) self.assertEqual(self.resp.msg.data.dim_at_obs, 'AllDimensions') def test_generic_obs(self): data = self.resp.data # empty series list self.assertEqual(len(list(data.series)), 0) obs_list = list(data.obs()) self.assertEqual(len(obs_list), 12) o0 = obs_list[0] self.assertEqual(len(o0), 3) self.assertIsInstance(o0.key, tuple) # obs_key self.assertEqual(o0.key.FREQ, 'M') self.assertEqual(o0.key.CURRENCY, 'CHF') self.assertEqual(o0.value, '1.3413') self.assertIsInstance(o0.attrib, tuple) self.assertEqual(o0.attrib.OBS_STATUS, 'A') self.assertEqual(o0.attrib.DECIMALS, '4') def test_write2pandas(self): data_series = self.resp.write(attributes='', asframe=False, reverse_obs=False) self.assertIsInstance(data_series, pandas.Series)
class TestGenericSeriesData_SiblingGroup_TS(unittest.TestCase): def setUp(self): self.estat = Request() filepath = os.path.join( pkg_path, 'data/exr/ecb_exr_sg/generic/ecb_exr_sg_ts.xml') self.resp = self.estat.get(fromfile=filepath) def test_groups(self): data = self.resp.data self.assertEqual(len(list(data.groups)), 4) self.assertEqual(len(list(data.series)), 4) g2 = list(data.groups)[2] self.assertEqual(g2.key.CURRENCY, 'JPY') self.assertEqual( g2.attrib.TITLE, 'ECB reference exchange rate, Japanese yen/Euro') # Check group attributes of a series s = list(data.series)[0] g_attrib = s.group_attrib self.assertEqual(len(g_attrib), 1) self.assertIsInstance(g_attrib, tuple) self.assertEqual(len(g_attrib), 1)
class test_exr_constraints(unittest.TestCase): def setUp(self): self.ecb = Request('ecb') filepath = os.path.join(test_path, 'data/exr_flow.xml') self.resp = self.ecb.get(fromfile=filepath) def test_constrained_codes(self): m = self.resp.msg self.assertEqual(m._dim_ids[0], 'FREQ') self.assertEqual(len(m._dim_ids), 5) self.assertEqual(len(m._dim_ids), 5) self.assertEqual(len(m._dim_codes), 5) self.assertEqual(len(m._attr_ids), 9) self.assertEqual(len(m._attr_codes), 9) self.assertEqual(m._attr_ids[-1], 'UNIT_MULT') self.assertIn('5', m._attr_codes.UNIT_MULT) self.assertIn('W', m._dim_codes.FREQ) self.assertIn('W', m._dim_codes.FREQ) self.assertEqual(len(m._constrained_codes), 14) self.assertNotIn('W', m._constrained_codes.FREQ) key = {'FREQ': ['W']} self.assertTrue(m.in_codes(key)) self.assertFalse(m.in_constraints(key, raise_error=False)) self.assertRaises(ValueError, m.in_constraints, key) self.assertTrue(m.in_constraints({'CURRENCY': ['CHF']})) # test with invalid key self.assertRaises(TypeError, m._in_constraints, {'FREQ': 'A'}) # structure writer with constraints out = self.resp.write() cl = out.codelist self.assertEqual(cl.shape, (3555, 2)) # unconstrained codelists out = self.resp.write(constraint=False) cl = out.codelist self.assertEqual(cl.shape, (4177, 2))
def setUp(self): self.ecb = Request('ecb') filepath = os.path.join(test_path, 'data/exr_flow.xml') self.resp = self.ecb.get(fromfile=filepath)
class InseeTestCase(unittest.TestCase): # nosetests -s -v pandasdmx.tests.test_insee:InseeTestCase def setUp(self): unittest.TestCase.setUp(self) self.sdmx = Request('INSEE') def test_load_dataset(self): dataset_code = 'IPI-2010-A21' '''load all dataflows''' dataflows_response = self.sdmx.get( resource_type='dataflow', agency='FR1', fromfile=DATAFLOW_FP) dataflows = dataflows_response.msg.dataflow self.assertEqual(len(dataflows.keys()), 663) self.assertTrue(dataset_code in dataflows) '''load datastructure for current dataset_code''' fp_datastructure = DATASETS[dataset_code]['datastructure-fp'] datastructure_response = self.sdmx.get( resource_type='datastructure', agency='FR1', fromfile=fp_datastructure) self.assertTrue( dataset_code in datastructure_response.msg.datastructure) dsd = datastructure_response.msg.datastructure[dataset_code] '''Verify dimensions list''' dimensions = OrderedDict([dim.id, dim] for dim in dsd.dimensions.aslist( ) if dim.id not in ['TIME', 'TIME_PERIOD']) dim_keys = list(dimensions.keys()) self.assertEqual(dim_keys, ['FREQ', 'PRODUIT', 'NATURE']) '''load datas for the current dataset''' fp_data = DATASETS[dataset_code]['data-fp'] data = self.sdmx.get( resource_type='data', agency='FR1', fromfile=fp_data) '''Verify series count and values''' series = list(data.msg.data.series) series_count = len(series) self.assertEqual(series_count, DATASETS[dataset_code]['series_count']) first_series = series[0] observations = list(first_series.obs()) first_obs = observations[0] last_obs = observations[-1] self.assertEqual(first_obs.dim, '2015-10') self.assertEqual(first_obs.value, '105.61') self.assertEqual(last_obs.dim, '1990-01') self.assertEqual(last_obs.value, '139.22') def test_fixe_key_names(self): """Verify key or attribute contains '-' in name """ dataset_code = 'CNA-2010-CONSO-SI-A17' fp_datastructure = DATASETS[dataset_code]['datastructure-fp'] datastructure_response = self.sdmx.get( resource_type='datastructure', agency='FR1', fromfile=fp_datastructure) self.assertTrue( dataset_code in datastructure_response.msg.datastructure) dsd = datastructure_response.msg.datastructure[dataset_code] dimensions = OrderedDict([dim.id, dim] for dim in dsd.dimensions.aslist( ) if dim.id not in ['TIME', 'TIME_PERIOD']) dim_keys = list(dimensions.keys()) self.assertEqual( dim_keys, ['SECT-INST', 'OPERATION', 'PRODUIT', 'PRIX']) fp_data = DATASETS[dataset_code]['data-fp'] data = self.sdmx.get( resource_type='data', agency='FR1', fromfile=fp_data) series = list(data.msg.data.series) series = series[0] self.assertEqual(list(series.key._asdict().keys()), ['SECT-INST', 'OPERATION', 'PRODUIT', 'PRIX']) self.assertEqual(list(series.attrib._asdict().keys()), ['FREQ', 'IDBANK', 'TITLE', 'LAST_UPDATE', 'UNIT_MEASURE', 'UNIT_MULT', 'REF_AREA', 'DECIMALS', 'BASE_PER', 'TIME_PER_COLLECT']) def test_freq_in_series_attribute(self): # Test that we don't have regression on Issues #39 and #41 # INSEE time series provide the FREQ value as attribute on the series instead of a dimension. This caused # a runtime error when writing as pandas dataframe. data_response = self.sdmx.data( fromfile=SERIES['UNEMPLOYMENT_CAT_A_B_C']['data-fp']) data_response.write()
class InseeTestCase(unittest.TestCase): # nosetests -s -v pandasdmx.tests.test_insee:InseeTestCase def setUp(self): unittest.TestCase.setUp(self) self.sdmx = Request('INSEE') def test_load_dataset(self): dataset_code = 'IPI-2010-A21' '''load all dataflows''' dataflows_response = self.sdmx.get( resource_type='dataflow', agency='FR1', fromfile=DATAFLOW_FP) dataflows = dataflows_response.msg.dataflow self.assertEqual(len(dataflows.keys()), 663) self.assertTrue(dataset_code in dataflows) '''load datastructure for current dataset_code''' fp_datastructure = DATASETS[dataset_code]['datastructure-fp'] datastructure_response = self.sdmx.get( resource_type='datastructure', agency='FR1', fromfile=fp_datastructure) self.assertTrue( dataset_code in datastructure_response.msg.datastructure) dsd = datastructure_response.msg.datastructure[dataset_code] '''Verify dimensions list''' dimensions = OrderedDict([dim.id, dim] for dim in dsd.dimensions.aslist( ) if dim.id not in ['TIME', 'TIME_PERIOD']) dim_keys = list(dimensions.keys()) self.assertEqual(dim_keys, ['FREQ', 'PRODUIT', 'NATURE']) '''load datas for the current dataset''' fp_data = DATASETS[dataset_code]['data-fp'] data = self.sdmx.get( resource_type='data', agency='FR1', fromfile=fp_data) '''Verify series count and values''' series = list(data.msg.data.series) series_count = len(series) self.assertEqual(series_count, DATASETS[dataset_code]['series_count']) first_series = series[0] observations = list(first_series.obs()) first_obs = observations[0] last_obs = observations[-1] self.assertEqual(first_obs.dim, '2015-10') self.assertEqual(first_obs.value, '105.61') self.assertEqual(last_obs.dim, '1990-01') self.assertEqual(last_obs.value, '139.22') def test_fixe_key_names(self): """Verify key or attribute contains '-' in name """ dataset_code = 'CNA-2010-CONSO-SI-A17' fp_datastructure = DATASETS[dataset_code]['datastructure-fp'] datastructure_response = self.sdmx.get( resource_type='datastructure', agency='FR1', fromfile=fp_datastructure) self.assertTrue( dataset_code in datastructure_response.msg.datastructure) dsd = datastructure_response.msg.datastructure[dataset_code] dimensions = OrderedDict([dim.id, dim] for dim in dsd.dimensions.aslist( ) if dim.id not in ['TIME', 'TIME_PERIOD']) dim_keys = list(dimensions.keys()) self.assertEqual( dim_keys, ['SECT-INST', 'OPERATION', 'PRODUIT', 'PRIX']) fp_data = DATASETS[dataset_code]['data-fp'] data = self.sdmx.get( resource_type='data', agency='FR1', fromfile=fp_data) series = list(data.msg.data.series) series = series[0] self.assertEqual(list(series.key._asdict().keys()), ['SECT-INST', 'OPERATION', 'PRODUIT', 'PRIX']) self.assertEqual(list(series.attrib._asdict().keys()), ['FREQ', 'IDBANK', 'TITLE', 'LAST_UPDATE', 'UNIT_MEASURE', 'UNIT_MULT', 'REF_AREA', 'DECIMALS', 'BASE_PER', 'TIME_PER_COLLECT'])
def setUp(self): unittest.TestCase.setUp(self) self.sdmx = Request('INSEE')
def req(self): return Request("INSEE")
def setUp(self): self.estat = Request('ESTAT') filepath = os.path.join(test_path, 'data/common/common.xml') self.resp = self.estat.get(fromfile=filepath)
def setUp(self): self.estat = Request('ESTAT') filepath = os.path.join( pkg_path, 'data/exr/ecb_exr_ng/generic/ecb_exr_ng_ts_gf.xml') self.resp = self.estat.data(fromfile=filepath)
""" Created on Fri Oct 19 10:02:37 2018 @author: dpsugasa """ from pandasdmx import Request import plotly import plotly.plotly as py #for plotting import plotly.graph_objs as go import plotly.dashboard_objs as dashboard import plotly.tools as tls import plotly.figure_factory as ff import credentials #plotly API details estat = Request('ESTAT') flow_response = estat.dataflow('une_rt_a') structure_response = flow_response.dataflow.une_rt_a.structure( request=True, target_only=False) structure_response.write().codelist.loc['GEO'].head() resp = estat.data('une_rt_a', key={'GEO': 'EL+ES+IT'}, params={'startPeriod': '1950'}) data = resp.write(s for s in resp.data.series if s.key.AGE == 'TOTAL') z = data.loc[:, ('PC_ACT', 'TOTAL', 'T')] z['Dates'] = z.index.to_series().astype(str) z['IT_scr'] = (z['IT'] - z['IT'].mean()) / z['IT'].std() print(z)
class EurostatFetcher(SDMXFetcher): _sdmx_loader = Request('ESTAT') _nsmap = dict(nt='urn:eu.europa.ec.eurostat.navtree') _GEO = ['AT', 'BE', 'BG', 'CY', 'CZ', 'DE', 'DK', 'EE', 'EL', 'ES', 'EU28', 'EU27', 'EA19', 'FI', 'FR', 'HR', 'HU', 'IE', 'IT', 'LT', 'LU', 'LV', 'ME', 'MK', 'MT', 'NL', 'PL', 'PT', 'RO', 'RS', 'SE', 'SI', 'SK', 'TR', 'UK'] _S_ADJ = ['NSA','SA'] _FREQ = ['A','H','Q','M','S','W'] def __init__(self, name=NAME, parent_location=GIT_PARENT_LOCATION): self._name = name self._base_url = URL self._parent_location = parent_location self._repo_location = parent_location + "/" + name self._temp_location = TEMP_PARENT_LOCATION + "/" + name self._xml_toc_file = XML_TOC_FILE self._xml_toc_file_location = self._repo_location + "/" + self._xml_toc_file Path(self._temp_location).mkdir(exist_ok=True) Path(self._repo_location).mkdir(exist_ok=True) try: self.load_existing_repo() except ValueError: self.initialize_repo() def _download_file(self, params, stream): rsp = requests.get(self._base_url,params,stream=stream) if rsp.ok: file_path = self._repo_location + '/' + params['file'].split('/')[-1] with open(file_path, 'wb') as f: for chunk in rsp.iter_content(chunk_size=1024): if chunk: f.write(chunk) f.close() self.repo.stage([file_path]) return rsp def _dictionary_file(self, params): rsp = self._download_file(params, False) if rsp.ok: tsv_file_io = StringIO(rsp.text) reader = csv.reader(tsv_file_io,delimiter='\t') return dict([(r[0],r[1]) for r in reader]) def _dimensions_list(self): log.info("Downloading and updating the dimensions list...") params = {"sort":"1","file":"dic/en/dimlst.dic"} self._dimlst = self._dictionary_file(params) def _load_existing_toc(self): if Path(self._xml_toc_file_location).is_file(): log.info("Loading up existing Table of Contents XML file...") parser = etree.XMLParser(remove_blank_text=True) self.xml_toc_old = etree.parse(self._xml_toc_file_location, parser=parser) else: log.info("No existing Table of Contents XML file. Setting to None.") self.xml_toc_old = None def _update_toc(self): self._load_existing_toc() log.info("Starting file Table of Contents XML file download...") response = requests.get(XML_TOC_URL) log.info(">> download finished.") parser = etree.XMLParser(remove_blank_text=True) xml_toc = etree.fromstring(response.content, parser=parser) self.xml_toc = etree.ElementTree(xml_toc) with open(self._xml_toc_file_location, 'wb') as xml_file: self.xml_toc.write(xml_file, encoding='utf-8', pretty_print=True, xml_declaration=True) xml_file.close() self.repo.stage([self._xml_toc_file]) def _datasets_to_update(self, forceUpdateAll=False): if forceUpdateAll: for ds in self.xml_toc.iterfind('.//nt:leaf[@type="dataset"]', namespaces=self._nsmap): yield ds else: if self.xml_toc_old is not None: last_update_by_ds = {} for ds in self.xml_toc_old.iterfind('.//nt:leaf[@type="dataset"]', namespaces=self._nsmap): ds_code = ds.findtext("nt:code", namespaces=self._nsmap) ds_last_update = ds.findtext("nt:lastUpdate", namespaces=self._nsmap) last_update_by_ds[ds_code] = ds_last_update for ds in self.xml_toc.iterfind('.//nt:leaf[@type="dataset"]', namespaces=self._nsmap): ds_code = ds.findtext("nt:code", namespaces=self._nsmap) if self.xml_toc_old is None: yield ds else: ds_earlier_update = last_update_by_ds.get(ds_code) if ds_earlier_update is None: yield ds else: ds_last_update = ds.findtext("nt:lastUpdate", namespaces=self._nsmap) if ds_last_update != ds_earlier_update: yield ds def update_content(self, forceUpdateAll=False): ds_iter = self._datasets_to_update(forceUpdateAll) for ds in ds_iter: ds_code = ds.findtext("nt:code", namespaces=self._nsmap) #ds_last_update = ds.findtext("nt:lastUpdate", namespaces=self._nsmap) #ds_metadata = ds.findtext('nt:metadata[@format="sdmx"]', namespaces=self._nsmap) #ds_url_tsv = ds.findtext('nt:downloadLink[@format="tsv"]', namespaces=self._nsmap) #ds_url_sdmx = ds.findtext('nt:downloadLink[@format="sdmx"]', namespaces=self._nsmap) log.info('Dowloading dataset {}'.format(ds_code)) #SHORT APPROACH TO DOWNLOAD DATA AS TSV ONLY HAS SERIES #log.info('...tsv file...') #response = requests.get(ds_url_tsv) #LONG APPROACH TO DOWNLOAD DATA AS SDMX-->VERY INEFFICIENT IT SEEMS #BETTER TO DOWNLOAD IN ZIP, THEN UNZIP AND THEN READ FROM FILES? log.info('...sdmx information...'.format(ds_code)) dsd_code = 'DSD_' + ds_code cs_code = 'CS_' + dsd_code dsd_response = self._sdmx_loader.datastructure(resource_id=dsd_code) dsd = dsd_response.msg.datastructure[dsd_code] dsd_df = dsd_response.write() cs = dsd_response.msg.conceptscheme[cs_code] cs_id_list = [c.id for c in cs.aslist()] cl = dsd_response.msg.codelist data_response = self._sdmx_loader.data(resource_id=ds_code, dsd=dsd) data = data_response.data series_l = list(data.series) series_df = data_response.write(series_l) #this is because the timeseries are in reverse order otherwise series_df.sort_index(inplace=True) idx_multi = series_df.columns #drop levels that are unique in the multi-index lvl_to_drop = [i for i in range(len(idx_multi.levels)) if len(idx_multi.levels[i])==1] idx_multi = idx_multi.droplevel(lvl_to_drop) if 'INDIC' in idx_multi.names: idx_multi.swaplevel('INDIC',0) #place INDIC first if 'INDICATOR' in idx_multi.names: idx_multi.swaplevel('INDICATOR',0) #place INDICATOR first series_df.columns = idx_multi idx_multi_joined = ['_'.join(col).strip() for col in idx_multi.values] series_df.columns = idx_multi_joined series_df.to_csv(self._repo_location + ds_code +'.csv', na_rep="na")
def setUp(self): self.estat = Request('ESTAT') filepath = os.path.join( pkg_path, 'data/exr/ecb_exr_ng/generic/ecb_exr_ng_flat.xml') self.resp = self.estat.get(fromfile=filepath)
def test_doc_usage_structure(): """Code examples in walkthrough.rst.""" ecb = Request("ECB") ecb_via_proxy = Request("ECB", proxies={"http": "http://1.2.3.4:5678"}) assert all( getattr(ecb_via_proxy.session, k) == v for k, v in ( ("proxies", {"http": "http://1.2.3.4:5678"}), ("stream", False), ("timeout", 30.1), ) ) msg1 = ecb.categoryscheme() assert msg1.response.url == ( "https://sdw-wsrest.ecb.europa.eu/service/categoryscheme/ECB/latest" "?references=parentsandsiblings" ) # Check specific headers headers = msg1.response.headers assert headers["Content-Type"] == ( "application/vnd.sdmx.structure+xml; " "version=2.1" ) assert all(k in headers for k in ["Connection", "Date", "Server"]) # Removed: in pandaSDMX 0.x this was a convenience method that (for this # structure message) returned two DataStructureDefinitions. Contra the # spec, that assumes: # - There is 1 Categorization using the CategoryScheme; there could be # many. # - The Categorization maps DataStructureDefinitions to Categories, when # there could be many. # list(cat_response.category_scheme['MOBILE_NAVI']['07']) dfs = pandasdmx.to_pandas(msg1.dataflow).head() assert len(dfs) == 2 flows = ecb.dataflow() # noqa: F841 dsd_id = flows.dataflow.EXR.structure.id assert dsd_id == "ECB_EXR1" refs = dict(references="all") msg2 = ecb.datastructure(resource_id=dsd_id, params=refs) dsd = msg2.structure[dsd_id] assert pandasdmx.to_pandas(dsd.dimensions) == [ "FREQ", "CURRENCY", "CURRENCY_DENOM", "EXR_TYPE", "EXR_SUFFIX", "TIME_PERIOD", ] cl = pandasdmx.to_pandas(msg2.codelist["CL_CURRENCY"]).sort_index() expected = pd.Series( { "ADF": "Andorran Franc (1-1 peg to the French franc)", "ADP": "Andorran Peseta (1-1 peg to the Spanish peseta)", "AED": "United Arab Emirates dirham", "AFA": "Afghanistan afghani (old)", "AFN": "Afghanistan, Afghanis", }, name="Currency code list", ).rename_axis("CL_CURRENCY") assert_pd_equal(cl.head(), expected)
class TestStructSpecSeriesDataSet(unittest.TestCase): def setUp(self): self.estat = Request('ESTAT') filepath = os.path.join( pkg_path, 'data/exr/ecb_exr_ng/structured/ecb_exr_ng_ts_gf.xml') dsd_resp = self.estat.datastructure( fromfile=os.path.join(pkg_path, 'data/exr/ecb_exr_ng/ecb_exr_ng_full.xml')) dsd = dsd_resp.datastructure.DataStructure self.resp = self.estat.data(fromfile=filepath, dsd=dsd) def test_header_attributes(self): self.assertEqual(self.resp.header.structured_by, 'STR1') self.assertEqual(self.resp.header.dim_at_obs, 'TIME_PERIOD') def test_dataset_cls(self): self.assertIsInstance(self.resp.msg.data, model.DataSet) def test_obs(self): data = self.resp.data # empty obs iterator self.assertEqual(len(list(data.obs())), 0) series_list = list(data.series) self.assertEqual(len(series_list), 4) s3 = series_list[3] self.assertIsInstance(s3, model.Series) self.assertIsInstance(s3.key, tuple) self.assertEqual(len(s3.key), 4) self.assertEqual(s3.key.CURRENCY, 'USD') self.assertEqual(s3.attrib.DECIMALS, '4') obs_list = list(s3.obs(reverse_obs=True)) self.assertEqual(len(obs_list), 3) o0 = obs_list[2] self.assertEqual(len(o0), 3) self.assertEqual(o0.dim, '2010-08') self.assertEqual(o0.value, '1.2894') self.assertIsInstance(o0.attrib, tuple) self.assertEqual(o0.attrib.OBS_STATUS, 'A') def test_pandas(self): resp = self.resp data = resp.data pd_series = [s for s in resp.write( data, attributes='', reverse_obs=True, asframe=False)] self.assertEqual(len(pd_series), 4) s3 = pd_series[3] self.assertIsInstance(s3, pandas.core.series.Series) self.assertEqual(s3[2], 1.2894) self.assertIsInstance(s3.name, tuple) self.assertEqual(len(s3.name), 4) # now with attributes pd_series = [s for s in resp.write( data, attributes='osgd', reverse_obs=True, asframe=False)] self.assertEqual(len(pd_series), 4) self.assertIsInstance(pd_series[0], tuple) # contains 2 series self.assertEqual(len(pd_series[0]), 2) s3, a3 = pd_series[3] self.assertIsInstance(s3, pandas.core.series.Series) self.assertIsInstance(a3, pandas.core.series.Series) self.assertEqual(s3[2], 1.2894) self.assertIsInstance(s3.name, tuple) self.assertEqual(len(s3.name), 4) self.assertEqual(len(a3), 3) # access an attribute of the first value self.assertEqual(a3[0].OBS_STATUS, 'A') def test_write2pandas(self): df = self.resp.write(attributes='', reverse_obs=False) self.assertIsInstance(df, pandas.DataFrame) assert df.shape == (3, 4) # with metadata df, mdf = self.resp.write(attributes='osgd', reverse_obs=False) assert mdf.shape == (3, 4) assert mdf.iloc[1, 1].OBS_STATUS == 'A'
def istat(): return Request("ISTAT")
#!/usr/bin/env python # coding: utf-8 import math, re, sys, calendar, os, copy, time import pandas as pd import numpy as np from datetime import datetime, date # In[80]: import pandasdmx from pandasdmx import Request tStart = time.time() oecd = Request('OECD') print('Time: ', int(time.time() - tStart),'s'+'\n') data_response = oecd.data(resource_id='MEI_BTS_COS', key='all?startTime=1995') print('Time: ', int(time.time() - tStart),'s'+'\n') df = data_response.to_pandas() print('Time: ', int(time.time() - tStart),'s'+'\n') #df.to_csv('c:\\Temp\\test_lei.txt', sep='\t') path = 'C:/Users/lawre/Desktop' # In[82]: #df = data_response.to_pandas() #type(data_response.data) df.to_csv(path+'test_lei.csv')
def setUp(self): self.estat = Request() filepath = os.path.join( pkg_path, 'data/exr/ecb_exr_rg/generic/ecb_exr_rg_ts.xml') self.resp = self.estat.get(fromfile=filepath)
def test_doc_usage_structure(): """Code examples in walkthrough.rst.""" ecb = Request('ECB') ecb_via_proxy = Request('ECB', proxies={'http': 'http://1.2.3.4:5678'}) assert all(getattr(ecb_via_proxy.session, k) == v for k, v in ( ('proxies', {'http': 'http://1.2.3.4:5678'}), ('stream', True), ('timeout', 30.1), )) msg1 = ecb.categoryscheme() assert msg1.response.url == ( 'http://sdw-wsrest.ecb.int/service/categoryscheme/ECB/latest' '?references=parentsandsiblings') # Check specific headers headers = msg1.response.headers assert headers['Content-Type'] == ('application/vnd.sdmx.structure+xml; ' 'version=2.1') assert all(k in headers for k in ['Connection', 'Date', 'Server']) # Removed: in pandaSDMX 0.x this was a convenience method that (for this # structure message) returned two DataStructureDefinitions. Contra the # spec, that assumes: # - There is 1 Categorization using the CategoryScheme; there could be many. # - The Categorization maps DataStructureDefintions to Categories, when # there could be many. # list(cat_response.category_scheme['MOBILE_NAVI']['07']) dfs = sdmx.to_pandas(msg1.dataflow).head() expected = pd.Series({ 'AME': 'AMECO', 'BKN': 'Banknotes statistics', 'BLS': 'Bank Lending Survey Statistics', 'BOP': ('Euro Area Balance of Payments and International Investment ' 'Position Statistics'), 'BSI': 'Balance Sheet Items', }) assert_pd_equal(dfs, expected) flows = ecb.dataflow() dsd_id = msg1.dataflow.EXR.structure.id assert dsd_id == 'ECB_EXR1' refs = dict(references='all') msg2 = ecb.datastructure(resource_id=dsd_id, params=refs) dsd = msg2.structure[dsd_id] assert sdmx.to_pandas(dsd.dimensions) == ['FREQ', 'CURRENCY', 'CURRENCY_DENOM', 'EXR_TYPE', 'EXR_SUFFIX', 'TIME_PERIOD'] cl = sdmx.to_pandas(msg2.codelist['CL_CURRENCY']).sort_index() expected = pd.Series({ 'ADF': 'Andorran Franc (1-1 peg to the French franc)', 'ADP': 'Andorran Peseta (1-1 peg to the Spanish peseta)', 'AED': 'United Arab Emirates dirham', 'AFA': 'Afghanistan afghani (old)', 'AFN': 'Afghanistan, Afghanis', }, name='Currency code list') \ .rename_axis('CL_CURRENCY') assert_pd_equal(cl.head(), expected)
def setUp(self): self.estat = Request() filepath = os.path.join( pkg_path, 'data/exr/ecb_exr_sg/generic/ecb_exr_sg_ts.xml') self.resp = self.estat.get(fromfile=filepath)
from pandasdmx import Request # Connecting to the Eurostat web service estat = Request('ESTAT') # Downloading the dataflow definitions flows = estat.dataflow() # Getting information about the dataflow flows.url flows.http_headers # Exporting the dataflow definitions to a pandas DataFrame dflows = flows.write().dataflow # Listing tables from the high tech database and the description of a single table ht_tabs = dflows[dflows.index.str.startswith('htec') == True] kia_emp = dflows.loc['htec_kia_emp2'][0] # Dataflow definition df_def = flows.dataflow.htec_kia_emp2 # Database's datastructure id dsd_id = df_def.structure.id # Creating a support dict refs = dict(references = 'all') # Calling the table dsd_response = estat.get(url = 'http://ec.europa.eu/eurostat/SDMX/diss-web/rest/datastructure/ESTAT/' + dsd_id) # Getting informatou about the datastructure