def read_jsdmx(path_or_buf): """ Convert a SDMX-JSON string to pandas object Parameters ---------- filepath_or_buffer : a valid SDMX-JSON string or file-like http://sdmx.org/wp-content/uploads/2014/07/sdmx-json-data-message.pdf Returns ------- results : Series, DataFrame, or dictionaly of Series or DataFrame. """ jdata = _read_content(path_or_buf) import json if isinstance(jdata, dict): data = jdata else: data = json.loads(jdata, object_pairs_hook=compat.OrderedDict) structure = data['structure'] index = _parse_dimensions(structure['dimensions']['observation']) columns = _parse_dimensions(structure['dimensions']['series']) dataset = data['dataSets'] if len(dataset) != 1: raise ValueError("length of 'dataSets' must be 1") dataset = dataset[0] values = _parse_values(dataset, index=index, columns=columns) df = pd.DataFrame(values, columns=columns, index=index) return df
def read_jstat(path_or_buf, typ='frame', squeeze=True): """ Convert a JSON-Stat string to pandas object Parameters ---------- filepath_or_buffer : a valid JSON-Stat string or file-like http://json-stat.org/ typ : {'frame', 'series'} Type of object to recover (series or frame), default 'frame' squeeze : bool, default True If True, return DataFrame or Series when the input has only one dataset. When the input has multiple dataset, returns dictionary of results. If False, always return a dictionary. Returns ------- results : Series, DataFrame, or dictionaly of Series or DataFrame. """ jdata = _read_content(path_or_buf) import json if isinstance(jdata, dict): datasets = jdata else: datasets = json.loads(jdata, object_pairs_hook=compat.OrderedDict) results = {} for dataname, dataset in compat.iteritems(datasets): values = dataset['value'] # mandatory dimensions = dataset['dimension'] # mandatory # Not supported, as the reis no specific meaning # in current format specification # status = dataset.get('status', None) # optional midx = _parse_dimensions(dimensions) values = _parse_values(values, size=len(midx)) result = pd.Series(values, index=midx) if typ == 'frame': if result.index.nlevels > 1: result = result.unstack() else: result = result.to_frame() elif typ == 'series': pass else: raise ValueError("'typ' must be either 'frame' or 'series'") if len(datasets) == 1 and squeeze: return result results[dataname] = result return results
def read_sdmx(path_or_buf, dtype='float64', dsd=None): """ Convert a SDMX-XML string to pandas object Parameters ---------- filepath_or_buffer : a valid SDMX-XML string or file-like https://webgate.ec.europa.eu/fpfis/mwikis/sdmx/index.php/Main_Page dtype : str dtype to coerce values dsd : dict parsed DSD dict corresponding to the SDMX-XML data Returns ------- results : Series, DataFrame, or dictionaly of Series or DataFrame. """ xdata = _read_content(path_or_buf) import xml.etree.ElementTree as ET root = ET.fromstring(xdata) structure = _get_child(root, _MESSAGE + 'Structure') idx_name = structure.get('dimensionAtObservation') dataset = _get_child(root, _DATASET) keys = [] obss = [] for series in dataset.iter(_SERIES): key = _parse_series_key(series) obs = _parse_observations(series.iter(_OBSERVATION)) keys.append(key) obss.append(obs) mcols = _construct_index(keys, dsd=dsd) mseries = _construct_series(obss, name=idx_name, dsd=dsd) df = pd.DataFrame(mseries, dtype=dtype) df = df.T df.columns = mcols return df
def _read_sdmx_dsd(path_or_buf): """ Convert a SDMX-XML DSD string to mapping dictionary Parameters ---------- filepath_or_buffer : a valid SDMX-XML DSD string or file-like https://webgate.ec.europa.eu/fpfis/mwikis/sdmx/index.php/Main_Page Returns ------- results : namedtuple (SDMXCode) """ xdata = _read_content(path_or_buf) import xml.etree.cElementTree as ET root = ET.fromstring(xdata) structure = _get_child(root, _MESSAGE + 'Structures') codes = _get_child(structure, _STRUCTURE + 'Codelists') # concepts = _get_child(structure, _STRUCTURE + 'Concepts') datastructures = _get_child(structure, _STRUCTURE + 'DataStructures') code_results = {} for codelist in codes: # codelist_id = codelist.get('id') codelist_name = _get_english_name(codelist) mapper = {} for code in codelist.iter(_CODE): code_id = code.get('id') name = _get_english_name(code) mapper[code_id] = name # codeobj = SDMXCode(id=codelist_id, name=codelist_name, mapper=mapper) # code_results[codelist_id] = codeobj code_results[codelist_name] = mapper times = list(datastructures.iter(_TIMEDIMENSION)) times = [t.get('id') for t in times] result = SDMXCode(codes=code_results, ts=times) return result