def _read_zipped_sdmx(path_or_buf): """ Unzipp data contains SDMX-XML """ data = _read_content(path_or_buf) zp = compat.BytesIO() zp.write(compat.str_to_bytes(data)) f = zipfile.ZipFile(zp) files = f.namelist() assert len(files) == 1 return f.open(files[0])
def read_sdmx(path_or_buf, dtype='float64', dsd=None): """ Convert a SDMX-XML string to pandas object Parameters ---------- filepath_or_buffer : a valid SDMX-XML string or file-like https://webgate.ec.europa.eu/fpfis/mwikis/sdmx/index.php/Main_Page dtype : str dtype to coerce values dsd : dict parsed DSD dict corresponding to the SDMX-XML data Returns ------- results : Series, DataFrame, or dictionaly of Series or DataFrame. """ xdata = _read_content(path_or_buf) import xml.etree.ElementTree as ET root = ET.fromstring(xdata) structure = _get_child(root, _MESSAGE + 'Structure') idx_name = structure.get('dimensionAtObservation') dataset = _get_child(root, _DATASET) keys = [] obss = [] for series in dataset.iter(_SERIES): key = _parse_series_key(series) obs = _parse_observations(series.iter(_OBSERVATION)) keys.append(key) obss.append(obs) mcols = _construct_index(keys, dsd=dsd) mseries = _construct_series(obss, name=idx_name, dsd=dsd) df = pd.DataFrame(mseries, dtype=dtype) df = df.T df.columns = mcols return df
def _read_sdmx_dsd(path_or_buf): """ Convert a SDMX-XML DSD string to mapping dictionary Parameters ---------- filepath_or_buffer : a valid SDMX-XML DSD string or file-like https://webgate.ec.europa.eu/fpfis/mwikis/sdmx/index.php/Main_Page Returns ------- results : namedtuple (SDMXCode) """ xdata = _read_content(path_or_buf) import xml.etree.cElementTree as ET root = ET.fromstring(xdata) structure = _get_child(root, _MESSAGE + "Structures") codes = _get_child(structure, _STRUCTURE + "Codelists") # concepts = _get_child(structure, _STRUCTURE + 'Concepts') datastructures = _get_child(structure, _STRUCTURE + "DataStructures") code_results = {} for codelist in codes: # codelist_id = codelist.get('id') codelist_name = _get_english_name(codelist) mapper = {} for code in codelist.iter(_CODE): code_id = code.get("id") name = _get_english_name(code) mapper[code_id] = name # codeobj = SDMXCode(id=codelist_id, name=codelist_name, mapper=mapper) # code_results[codelist_id] = codeobj code_results[codelist_name] = mapper times = list(datastructures.iter(_TIMEDIMENSION)) times = [t.get("id") for t in times] result = SDMXCode(codes=code_results, ts=times) return result
def _read_sdmx_dsd(path_or_buf): """ Convert a SDMX-XML DSD string to mapping dictionary Parameters ---------- filepath_or_buffer : a valid SDMX-XML DSD string or file-like https://webgate.ec.europa.eu/fpfis/mwikis/sdmx/index.php/Main_Page Returns ------- results : namedtuple (SDMXCode) """ xdata = _read_content(path_or_buf) import xml.etree.cElementTree as ET root = ET.fromstring(xdata) structure = _get_child(root, _MESSAGE + 'Structures') codes = _get_child(structure, _STRUCTURE + 'Codelists') # concepts = _get_child(structure, _STRUCTURE + 'Concepts') datastructures = _get_child(structure, _STRUCTURE + 'DataStructures') code_results = {} for codelist in codes: # codelist_id = codelist.get('id') codelist_name = _get_english_name(codelist) mapper = {} for code in codelist.iter(_CODE): code_id = code.get('id') name = _get_english_name(code) mapper[code_id] = name # codeobj = SDMXCode(id=codelist_id, name=codelist_name, mapper=mapper) # code_results[codelist_id] = codeobj code_results[codelist_name] = mapper times = list(datastructures.iter(_TIMEDIMENSION)) times = [t.get('id') for t in times] result = SDMXCode(codes=code_results, ts=times) return result
def read_jsdmx(path_or_buf): """ Convert a SDMX-JSON string to pandas object Parameters ---------- path_or_buf : a valid SDMX-JSON string or file-like http://sdmx.org/wp-content/uploads/2014/07/sdmx-json-data-message.pdf Returns ------- results : Series, DataFrame, or dictionary of Series or DataFrame. """ jdata = _read_content(path_or_buf) try: import simplejson as json except ImportError: if sys.version_info[:2] < (2, 7): raise ImportError('simplejson is required in python 2.6') import json if isinstance(jdata, dict): data = jdata else: data = json.loads(jdata, object_pairs_hook=OrderedDict) structure = data['structure'] index = _parse_dimensions(structure['dimensions']['observation']) columns = _parse_dimensions(structure['dimensions']['series']) dataset = data['dataSets'] if len(dataset) != 1: raise ValueError("length of 'dataSets' must be 1") dataset = dataset[0] values = _parse_values(dataset, index=index, columns=columns) df = pd.DataFrame(values, columns=columns, index=index) return df
def read_jsdmx(path_or_buf): """ Convert a SDMX-JSON string to pandas object Parameters ---------- filepath_or_buffer : a valid SDMX-JSON string or file-like http://sdmx.org/wp-content/uploads/2014/07/sdmx-json-data-message.pdf Returns ------- results : Series, DataFrame, or dictionaly of Series or DataFrame. """ jdata = _read_content(path_or_buf) try: import simplejson as json except ImportError: if sys.version_info[:2] < (2, 7): raise ImportError('simplejson is required in python 2.6') import json if isinstance(jdata, dict): data = jdata else: data = json.loads(jdata, object_pairs_hook=compat.OrderedDict) structure = data['structure'] index = _parse_dimensions(structure['dimensions']['observation']) columns = _parse_dimensions(structure['dimensions']['series']) dataset = data['dataSets'] if len(dataset) != 1: raise ValueError("length of 'dataSets' must be 1") dataset = dataset[0] values = _parse_values(dataset, index=index, columns=columns) df = pd.DataFrame(values, columns=columns, index=index) return df
def read_jsdmx(path_or_buf): """ Convert a SDMX-JSON string to panda object Parameters ---------- path_or_buf : a valid SDMX-JSON string or file-like https://github.com/sdmx-twg/sdmx-json Returns ------- results : Series, DataFrame, or dictionary of Series or DataFrame. """ jdata = _read_content(path_or_buf) try: import simplejson as json except ImportError: if sys.version_info[:2] < (2, 7): raise ImportError("simplejson is required in python 2.6") import json if isinstance(jdata, dict): data = jdata else: data = json.loads(jdata, object_pairs_hook=OrderedDict) structure = data["structure"] index = _parse_dimensions(structure["dimensions"]["observation"]) columns = _parse_dimensions(structure["dimensions"]["series"]) dataset = data["dataSets"] if len(dataset) != 1: raise ValueError("length of 'dataSets' must be 1") dataset = dataset[0] values = _parse_values(dataset, index=index, columns=columns) df = pd.DataFrame(values, columns=columns, index=index) return df
def read_sdmx(path_or_buf, dtype='float64', dsd=None): """ Convert a SDMX-XML string to pandas object Parameters ---------- filepath_or_buffer : a valid SDMX-XML string or file-like https://webgate.ec.europa.eu/fpfis/mwikis/sdmx/index.php/Main_Page dtype : str dtype to coerce values dsd : dict parsed DSD dict corresponding to the SDMX-XML data Returns ------- results : Series, DataFrame, or dictionaly of Series or DataFrame. """ xdata = _read_content(path_or_buf) import xml.etree.ElementTree as ET root = ET.fromstring(xdata) try: structure = _get_child(root, _MESSAGE + 'Structure') except ValueError: # get zipped path result = list(root.iter(_COMMON + 'Text'))[1].text if not result.startswith('http'): raise ValueError(result) for _ in range(60): # wait zipped data is prepared try: data = _read_zipped_sdmx(result) return read_sdmx(data, dtype=dtype, dsd=dsd) except HTTPError: continue time.sleep(1) msg = ('Unable to download zipped data within 60 secs, ' 'please download it manually from: {0}') raise ValueError(msg.format(result)) idx_name = structure.get('dimensionAtObservation') dataset = _get_child(root, _DATASET) keys = [] obss = [] for series in dataset.iter(_SERIES): key = _parse_series_key(series) obs = _parse_observations(series.iter(_OBSERVATION)) keys.append(key) obss.append(obs) mcols = _construct_index(keys, dsd=dsd) mseries = _construct_series(obss, name=idx_name, dsd=dsd) df = pd.DataFrame(mseries, dtype=dtype) df = df.T df.columns = mcols return df