def test_doc_index1(): """First code example in index.rst.""" estat = Request('ESTAT') flow_response = estat.dataflow('une_rt_a') with pytest.raises(TypeError): # This presumes the DataStructureDefinition instance can conduct a # network request for its own content structure_response = flow_response.dataflow.une_rt_a.structure( request=True, target_only=False) # Same effect structure_response = estat.get( 'datastructure', flow_response.dataflow.une_rt_a.structure.id) # Even better: Request.get(…) should examine the class and ID of the object # structure = estat.get(flow_response.dataflow.une_rt_a.structure) # Show some codelists s = sdmx.to_pandas(structure_response) expected = pd.Series({ 'AT': 'Austria', 'BE': 'Belgium', 'BG': 'Bulgaria', 'CH': 'Switzerland', 'CY': 'Cyprus', }, name='GEO') \ .rename_axis('CL_GEO') # Codelists are converted to a DictLike assert isinstance(s.codelist, DictLike) # Same effect assert_pd_equal(s.codelist['CL_GEO'].sort_index().head(), expected)
def dump_structure(data_source: sdmx.Request, dataset_name: str, debug=False): print(f"Exploring {dataset_name}...") dataset_msg = data_source.dataflow(dataset_name) dataset = getattr(dataset_msg.dataflow, dataset_name) if debug: print(dataset) dsd = dataset.structure if debug: print(dsd) assert isinstance(dsd, sdmx.model.DataStructureDefinition) return dsd
def test_doc_index1(): """First code example in index.rst.""" estat = Request("ESTAT") flow_response = estat.dataflow("une_rt_a") with pytest.raises(TypeError): # This presumes the DataStructureDefinition instance can conduct a # network request for its own content structure_response = flow_response.dataflow.une_rt_a.structure( request=True, target_only=False ) # Same effect structure_response = estat.get( "datastructure", flow_response.dataflow.une_rt_a.structure.id ) # Even better: Request.get(…) should examine the class and ID of the object # structure = estat.get(flow_response.dataflow.une_rt_a.structure) # Show some codelists s = pandasdmx.to_pandas(structure_response) expected = pd.Series( { "AT": "Austria", "BE": "Belgium", "BG": "Bulgaria", "CH": "Switzerland", "CY": "Cyprus", }, name="GEO", ).rename_axis("CL_GEO") # Codelists are converted to a DictLike assert isinstance(s.codelist, DictLike) # Same effect assert_pd_equal(s.codelist["CL_GEO"].sort_index().head(), expected)
def test_doc_usage_structure(): """Code examples in walkthrough.rst.""" ecb = Request("ECB") ecb_via_proxy = Request("ECB", proxies={"http": "http://1.2.3.4:5678"}) assert all( getattr(ecb_via_proxy.session, k) == v for k, v in ( ("proxies", { "http": "http://1.2.3.4:5678" }), ("stream", False), ("timeout", 30.1), )) msg1 = ecb.categoryscheme() assert msg1.response.url == ( "http://sdw-wsrest.ecb.int/service/categoryscheme/ECB/latest" "?references=parentsandsiblings") # Check specific headers headers = msg1.response.headers assert headers["Content-Type"] == ("application/vnd.sdmx.structure+xml; " "version=2.1") assert all(k in headers for k in ["Connection", "Date", "Server"]) # Removed: in pandaSDMX 0.x this was a convenience method that (for this # structure message) returned two DataStructureDefinitions. Contra the # spec, that assumes: # - There is 1 Categorization using the CategoryScheme; there could be # many. # - The Categorization maps DataStructureDefinitions to Categories, when # there could be many. # list(cat_response.category_scheme['MOBILE_NAVI']['07']) dfs = pandasdmx.to_pandas(msg1.dataflow).head() assert len(dfs) == 2 flows = ecb.dataflow() # noqa: F841 dsd_id = flows.dataflow.EXR.structure.id assert dsd_id == "ECB_EXR1" refs = dict(references="all") msg2 = ecb.datastructure(resource_id=dsd_id, params=refs) dsd = msg2.structure[dsd_id] assert pandasdmx.to_pandas(dsd.dimensions) == [ "FREQ", "CURRENCY", "CURRENCY_DENOM", "EXR_TYPE", "EXR_SUFFIX", "TIME_PERIOD", ] cl = pandasdmx.to_pandas(msg2.codelist["CL_CURRENCY"]).sort_index() expected = pd.Series( { "ADF": "Andorran Franc (1-1 peg to the French franc)", "ADP": "Andorran Peseta (1-1 peg to the Spanish peseta)", "AED": "United Arab Emirates dirham", "AFA": "Afghanistan afghani (old)", "AFN": "Afghanistan, Afghanis", }, name="Currency code list", ).rename_axis("CL_CURRENCY") assert_pd_equal(cl.head(), expected)
from pandasdmx import Request from pprint import pprint estat = Request('ESTAT') table_code = 'teilm020' flow_response = estat.dataflow(table_code) structure_response = flow_response.dataflow[table_code].structure( request=True, target_only=False) metadata = structure_response.write() # List of of available keys metadata.codelist.index.levels[0] # List of names for potential key vaulues metadata.codelist.loc['GEO']['name'].to_dict() #resp = estat.data(table_code, key={'GEO': 'EL+ES+IE'}, params={'startPeriod': '2007'}) resp = estat.data(table_code) data = resp.write() ## Accessing data by key # Keys order data.keys().names # Available key values [x.name + "= " + str([n for n in x]) for x in data.keys().levels] ## Accessing data by date # List of available dates:
def test_doc_usage_structure(): """Code examples in walkthrough.rst.""" ecb = Request('ECB') ecb_via_proxy = Request('ECB', proxies={'http': 'http://1.2.3.4:5678'}) assert all( getattr(ecb_via_proxy.session, k) == v for k, v in ( ('proxies', { 'http': 'http://1.2.3.4:5678' }), ('stream', False), ('timeout', 30.1), )) msg1 = ecb.categoryscheme() assert msg1.response.url == ( 'http://sdw-wsrest.ecb.int/service/categoryscheme/ECB/latest' '?references=parentsandsiblings') # Check specific headers headers = msg1.response.headers assert headers['Content-Type'] == ('application/vnd.sdmx.structure+xml; ' 'version=2.1') assert all(k in headers for k in ['Connection', 'Date', 'Server']) # Removed: in pandaSDMX 0.x this was a convenience method that (for this # structure message) returned two DataStructureDefinitions. Contra the # spec, that assumes: # - There is 1 Categorization using the CategoryScheme; there could be # many. # - The Categorization maps DataStructureDefintions to Categories, when # there could be many. # list(cat_response.category_scheme['MOBILE_NAVI']['07']) dfs = sdmx.to_pandas(msg1.dataflow).head() expected = pd.Series({ 'AME': 'AMECO', 'BKN': 'Banknotes statistics', 'BLS': 'Bank Lending Survey Statistics', 'BOP': ('Euro Area Balance of Payments and International Investment ' 'Position Statistics'), 'BSI': 'Balance Sheet Items', }) assert_pd_equal(dfs, expected) flows = ecb.dataflow() # noqa: F841 dsd_id = msg1.dataflow.EXR.structure.id assert dsd_id == 'ECB_EXR1' refs = dict(references='all') msg2 = ecb.datastructure(resource_id=dsd_id, params=refs) dsd = msg2.structure[dsd_id] assert sdmx.to_pandas(dsd.dimensions) == [ 'FREQ', 'CURRENCY', 'CURRENCY_DENOM', 'EXR_TYPE', 'EXR_SUFFIX', 'TIME_PERIOD' ] cl = sdmx.to_pandas(msg2.codelist['CL_CURRENCY']).sort_index() expected = pd.Series({ 'ADF': 'Andorran Franc (1-1 peg to the French franc)', 'ADP': 'Andorran Peseta (1-1 peg to the Spanish peseta)', 'AED': 'United Arab Emirates dirham', 'AFA': 'Afghanistan afghani (old)', 'AFN': 'Afghanistan, Afghanis', }, name='Currency code list') \ .rename_axis('CL_CURRENCY') assert_pd_equal(cl.head(), expected)
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Wed Apr 15 18:47:27 2020 @author: Peter Jakobsen """ from pandasdmx import Request estat = Request('ESTAT') # Download the metadata and expose it as a dict mapping resource names to pandas DataFrames flow_response = estat.dataflow('une_rt_a') structure_response = flow_response.dataflow.une_rt_a.structure(request=True, target_only=False) # Show some code lists. structure_response.write().codelist.loc['GEO'].head() #We use codes from the code list ‘GEO’ to obtain data on Greece, Ireland and Spain only. resp = estat.data('une_rt_a', key={'GEO': 'EL+ES+IE'}, params={'startPeriod': '2007'}) # We use a generator expression to select some columns # and write them to a pandas DataFrame data = resp.write(s for s in resp.data.series if s.key.AGE == 'TOTAL')
def dump_flows(data_source: sdmx.Request): flows = data_source.dataflow() df = sdmx.to_pandas(flows.dataflow) print(df)
from pandasdmx import Request # Connecting to the Eurostat web service estat = Request('ESTAT') # Downloading the dataflow definitions flows = estat.dataflow() # Getting information about the dataflow flows.url flows.http_headers # Exporting the dataflow definitions to a pandas DataFrame dflows = flows.write().dataflow # Listing tables from the high tech database and the description of a single table ht_tabs = dflows[dflows.index.str.startswith('htec') == True] kia_emp = dflows.loc['htec_kia_emp2'][0] # Dataflow definition df_def = flows.dataflow.htec_kia_emp2 # Database's datastructure id dsd_id = df_def.structure.id # Creating a support dict refs = dict(references = 'all') # Calling the table dsd_response = estat.get(url = 'http://ec.europa.eu/eurostat/SDMX/diss-web/rest/datastructure/ESTAT/' + dsd_id) # Getting informatou about the datastructure