コード例 #1
0
ファイル: jsdmx.py プロジェクト: movermeyer/pyopendata
def read_jsdmx(path_or_buf):
    """
    Convert a SDMX-JSON string to pandas object

    Parameters
    ----------
    filepath_or_buffer : a valid SDMX-JSON string or file-like
        http://sdmx.org/wp-content/uploads/2014/07/sdmx-json-data-message.pdf

    Returns
    -------
    results : Series, DataFrame, or dictionaly of Series or DataFrame.
    """

    jdata = _read_content(path_or_buf)

    import json
    if isinstance(jdata, dict):
        data = jdata
    else:
        data = json.loads(jdata, object_pairs_hook=compat.OrderedDict)

    structure = data['structure']
    index = _parse_dimensions(structure['dimensions']['observation'])
    columns = _parse_dimensions(structure['dimensions']['series'])

    dataset = data['dataSets']
    if len(dataset) != 1:
        raise ValueError("length of 'dataSets' must be 1")
    dataset = dataset[0]
    values = _parse_values(dataset, index=index, columns=columns)

    df = pd.DataFrame(values, columns=columns, index=index)
    return df
コード例 #2
0
ファイル: jsdmx.py プロジェクト: racheljoyforshaw/pyopendata
def read_jsdmx(path_or_buf):
    """
    Convert a SDMX-JSON string to pandas object

    Parameters
    ----------
    filepath_or_buffer : a valid SDMX-JSON string or file-like
        http://sdmx.org/wp-content/uploads/2014/07/sdmx-json-data-message.pdf

    Returns
    -------
    results : Series, DataFrame, or dictionaly of Series or DataFrame.
    """

    jdata = _read_content(path_or_buf)

    import json
    if isinstance(jdata, dict):
        data = jdata
    else:
        data = json.loads(jdata, object_pairs_hook=compat.OrderedDict)

    structure = data['structure']
    index = _parse_dimensions(structure['dimensions']['observation'])
    columns = _parse_dimensions(structure['dimensions']['series'])

    dataset = data['dataSets']
    if len(dataset) != 1:
        raise ValueError("length of 'dataSets' must be 1")
    dataset = dataset[0]
    values = _parse_values(dataset, index=index, columns=columns)

    df = pd.DataFrame(values, columns=columns, index=index)
    return df
コード例 #3
0
ファイル: jstat.py プロジェクト: racheljoyforshaw/pyopendata
def read_jstat(path_or_buf, typ='frame', squeeze=True):
    """
    Convert a JSON-Stat string to pandas object

    Parameters
    ----------
    filepath_or_buffer : a valid JSON-Stat string or file-like
        http://json-stat.org/

    typ : {'frame', 'series'}
        Type of object to recover (series or frame), default 'frame'

    squeeze : bool, default True
        If True, return DataFrame or Series when the input has only one dataset.
        When the input has multiple dataset, returns dictionary of results.
        If False, always return a dictionary.

    Returns
    -------
    results : Series, DataFrame, or dictionaly of Series or DataFrame.
    """

    jdata = _read_content(path_or_buf)

    import json
    if isinstance(jdata, dict):
        datasets = jdata
    else:
        datasets = json.loads(jdata, object_pairs_hook=compat.OrderedDict)

    results = {}
    for dataname, dataset in compat.iteritems(datasets):
        values = dataset['value']               # mandatory
        dimensions = dataset['dimension']       # mandatory
        # Not supported, as the reis no specific meaning
        # in current format specification
        # status = dataset.get('status', None)    # optional
        midx = _parse_dimensions(dimensions)
        values = _parse_values(values, size=len(midx))

        result = pd.Series(values, index=midx)
        if typ == 'frame':
            if result.index.nlevels > 1:
                result = result.unstack()
            else:
                result = result.to_frame()
        elif typ == 'series':
            pass
        else:
            raise ValueError("'typ' must be either 'frame' or 'series'")
        if len(datasets) == 1 and squeeze:
            return result

        results[dataname] = result
    return results
コード例 #4
0
ファイル: jstat.py プロジェクト: movermeyer/pyopendata
def read_jstat(path_or_buf, typ='frame', squeeze=True):
    """
    Convert a JSON-Stat string to pandas object

    Parameters
    ----------
    filepath_or_buffer : a valid JSON-Stat string or file-like
        http://json-stat.org/

    typ : {'frame', 'series'}
        Type of object to recover (series or frame), default 'frame'

    squeeze : bool, default True
        If True, return DataFrame or Series when the input has only one dataset.
        When the input has multiple dataset, returns dictionary of results.
        If False, always return a dictionary.

    Returns
    -------
    results : Series, DataFrame, or dictionaly of Series or DataFrame.
    """

    jdata = _read_content(path_or_buf)

    import json
    if isinstance(jdata, dict):
        datasets = jdata
    else:
        datasets = json.loads(jdata, object_pairs_hook=compat.OrderedDict)

    results = {}
    for dataname, dataset in compat.iteritems(datasets):
        values = dataset['value']  # mandatory
        dimensions = dataset['dimension']  # mandatory
        # Not supported, as the reis no specific meaning
        # in current format specification
        # status = dataset.get('status', None)    # optional
        midx = _parse_dimensions(dimensions)
        values = _parse_values(values, size=len(midx))

        result = pd.Series(values, index=midx)
        if typ == 'frame':
            if result.index.nlevels > 1:
                result = result.unstack()
            else:
                result = result.to_frame()
        elif typ == 'series':
            pass
        else:
            raise ValueError("'typ' must be either 'frame' or 'series'")
        if len(datasets) == 1 and squeeze:
            return result

        results[dataname] = result
    return results
コード例 #5
0
ファイル: sdmx.py プロジェクト: racheljoyforshaw/pyopendata
def read_sdmx(path_or_buf, dtype='float64', dsd=None):
    """
    Convert a SDMX-XML string to pandas object

    Parameters
    ----------
    filepath_or_buffer : a valid SDMX-XML string or file-like
        https://webgate.ec.europa.eu/fpfis/mwikis/sdmx/index.php/Main_Page
    dtype : str
        dtype to coerce values
    dsd : dict
        parsed DSD dict corresponding to the SDMX-XML data

    Returns
    -------
    results : Series, DataFrame, or dictionaly of Series or DataFrame.
    """

    xdata = _read_content(path_or_buf)

    import xml.etree.ElementTree as ET
    root = ET.fromstring(xdata)

    structure = _get_child(root, _MESSAGE + 'Structure')
    idx_name = structure.get('dimensionAtObservation')
    dataset = _get_child(root, _DATASET)

    keys = []
    obss = []
    for series in dataset.iter(_SERIES):
        key = _parse_series_key(series)
        obs = _parse_observations(series.iter(_OBSERVATION))
        keys.append(key)
        obss.append(obs)

    mcols = _construct_index(keys, dsd=dsd)
    mseries = _construct_series(obss, name=idx_name, dsd=dsd)

    df = pd.DataFrame(mseries, dtype=dtype)
    df = df.T
    df.columns = mcols

    return df
コード例 #6
0
def read_sdmx(path_or_buf, dtype='float64', dsd=None):
    """
    Convert a SDMX-XML string to pandas object

    Parameters
    ----------
    filepath_or_buffer : a valid SDMX-XML string or file-like
        https://webgate.ec.europa.eu/fpfis/mwikis/sdmx/index.php/Main_Page
    dtype : str
        dtype to coerce values
    dsd : dict
        parsed DSD dict corresponding to the SDMX-XML data

    Returns
    -------
    results : Series, DataFrame, or dictionaly of Series or DataFrame.
    """

    xdata = _read_content(path_or_buf)

    import xml.etree.ElementTree as ET
    root = ET.fromstring(xdata)

    structure = _get_child(root, _MESSAGE + 'Structure')
    idx_name = structure.get('dimensionAtObservation')
    dataset = _get_child(root, _DATASET)

    keys = []
    obss = []
    for series in dataset.iter(_SERIES):
        key = _parse_series_key(series)
        obs = _parse_observations(series.iter(_OBSERVATION))
        keys.append(key)
        obss.append(obs)

    mcols = _construct_index(keys, dsd=dsd)
    mseries = _construct_series(obss, name=idx_name, dsd=dsd)

    df = pd.DataFrame(mseries, dtype=dtype)
    df = df.T
    df.columns = mcols

    return df
コード例 #7
0
ファイル: sdmx.py プロジェクト: racheljoyforshaw/pyopendata
def _read_sdmx_dsd(path_or_buf):
    """
    Convert a SDMX-XML DSD string to mapping dictionary

    Parameters
    ----------
    filepath_or_buffer : a valid SDMX-XML DSD string or file-like
        https://webgate.ec.europa.eu/fpfis/mwikis/sdmx/index.php/Main_Page

    Returns
    -------
    results : namedtuple (SDMXCode)
    """

    xdata = _read_content(path_or_buf)

    import xml.etree.cElementTree as ET
    root = ET.fromstring(xdata)

    structure = _get_child(root, _MESSAGE + 'Structures')
    codes = _get_child(structure, _STRUCTURE + 'Codelists')
    # concepts = _get_child(structure, _STRUCTURE + 'Concepts')
    datastructures = _get_child(structure, _STRUCTURE + 'DataStructures')

    code_results = {}
    for codelist in codes:
        # codelist_id = codelist.get('id')
        codelist_name = _get_english_name(codelist)
        mapper = {}
        for code in codelist.iter(_CODE):
            code_id = code.get('id')
            name = _get_english_name(code)
            mapper[code_id] = name
        # codeobj = SDMXCode(id=codelist_id, name=codelist_name, mapper=mapper)
        # code_results[codelist_id] = codeobj
        code_results[codelist_name] = mapper

    times = list(datastructures.iter(_TIMEDIMENSION))
    times = [t.get('id') for t in times]

    result = SDMXCode(codes=code_results, ts=times)
    return result
コード例 #8
0
def _read_sdmx_dsd(path_or_buf):
    """
    Convert a SDMX-XML DSD string to mapping dictionary

    Parameters
    ----------
    filepath_or_buffer : a valid SDMX-XML DSD string or file-like
        https://webgate.ec.europa.eu/fpfis/mwikis/sdmx/index.php/Main_Page

    Returns
    -------
    results : namedtuple (SDMXCode)
    """

    xdata = _read_content(path_or_buf)

    import xml.etree.cElementTree as ET
    root = ET.fromstring(xdata)

    structure = _get_child(root, _MESSAGE + 'Structures')
    codes = _get_child(structure, _STRUCTURE + 'Codelists')
    # concepts = _get_child(structure, _STRUCTURE + 'Concepts')
    datastructures = _get_child(structure, _STRUCTURE + 'DataStructures')

    code_results = {}
    for codelist in codes:
        # codelist_id = codelist.get('id')
        codelist_name = _get_english_name(codelist)
        mapper = {}
        for code in codelist.iter(_CODE):
            code_id = code.get('id')
            name = _get_english_name(code)
            mapper[code_id] = name
        # codeobj = SDMXCode(id=codelist_id, name=codelist_name, mapper=mapper)
        # code_results[codelist_id] = codeobj
        code_results[codelist_name] = mapper

    times = list(datastructures.iter(_TIMEDIMENSION))
    times = [t.get('id') for t in times]

    result = SDMXCode(codes=code_results, ts=times)
    return result