Exemple #1
0
def _read_zipped_sdmx(path_or_buf):
    """ Unzipp data contains SDMX-XML """
    data = _read_content(path_or_buf)

    zp = compat.BytesIO()
    zp.write(compat.str_to_bytes(data))
    f = zipfile.ZipFile(zp)
    files = f.namelist()
    assert len(files) == 1
    return f.open(files[0])
Exemple #2
0
def _read_zipped_sdmx(path_or_buf):
    """ Unzipp data contains SDMX-XML """
    data = _read_content(path_or_buf)

    zp = compat.BytesIO()
    zp.write(compat.str_to_bytes(data))
    f = zipfile.ZipFile(zp)
    files = f.namelist()
    assert len(files) == 1
    return f.open(files[0])
def read_sdmx(path_or_buf, dtype='float64', dsd=None):
    """
    Convert a SDMX-XML string to pandas object

    Parameters
    ----------
    filepath_or_buffer : a valid SDMX-XML string or file-like
        https://webgate.ec.europa.eu/fpfis/mwikis/sdmx/index.php/Main_Page
    dtype : str
        dtype to coerce values
    dsd : dict
        parsed DSD dict corresponding to the SDMX-XML data

    Returns
    -------
    results : Series, DataFrame, or dictionaly of Series or DataFrame.
    """

    xdata = _read_content(path_or_buf)

    import xml.etree.ElementTree as ET
    root = ET.fromstring(xdata)

    structure = _get_child(root, _MESSAGE + 'Structure')
    idx_name = structure.get('dimensionAtObservation')
    dataset = _get_child(root, _DATASET)

    keys = []
    obss = []

    for series in dataset.iter(_SERIES):
        key = _parse_series_key(series)
        obs = _parse_observations(series.iter(_OBSERVATION))
        keys.append(key)
        obss.append(obs)

    mcols = _construct_index(keys, dsd=dsd)
    mseries = _construct_series(obss, name=idx_name, dsd=dsd)

    df = pd.DataFrame(mseries, dtype=dtype)
    df = df.T
    df.columns = mcols

    return df
Exemple #4
0
def read_sdmx(path_or_buf, dtype='float64', dsd=None):
    """
    Convert a SDMX-XML string to pandas object

    Parameters
    ----------
    filepath_or_buffer : a valid SDMX-XML string or file-like
        https://webgate.ec.europa.eu/fpfis/mwikis/sdmx/index.php/Main_Page
    dtype : str
        dtype to coerce values
    dsd : dict
        parsed DSD dict corresponding to the SDMX-XML data

    Returns
    -------
    results : Series, DataFrame, or dictionaly of Series or DataFrame.
    """

    xdata = _read_content(path_or_buf)

    import xml.etree.ElementTree as ET
    root = ET.fromstring(xdata)

    structure = _get_child(root, _MESSAGE + 'Structure')
    idx_name = structure.get('dimensionAtObservation')
    dataset = _get_child(root, _DATASET)

    keys = []
    obss = []

    for series in dataset.iter(_SERIES):
        key = _parse_series_key(series)
        obs = _parse_observations(series.iter(_OBSERVATION))
        keys.append(key)
        obss.append(obs)

    mcols = _construct_index(keys, dsd=dsd)
    mseries = _construct_series(obss, name=idx_name, dsd=dsd)

    df = pd.DataFrame(mseries, dtype=dtype)
    df = df.T
    df.columns = mcols

    return df
Exemple #5
0
def _read_sdmx_dsd(path_or_buf):
    """
    Convert a SDMX-XML DSD string to mapping dictionary

    Parameters
    ----------
    filepath_or_buffer : a valid SDMX-XML DSD string or file-like
        https://webgate.ec.europa.eu/fpfis/mwikis/sdmx/index.php/Main_Page

    Returns
    -------
    results : namedtuple (SDMXCode)
    """

    xdata = _read_content(path_or_buf)

    import xml.etree.cElementTree as ET

    root = ET.fromstring(xdata)

    structure = _get_child(root, _MESSAGE + "Structures")
    codes = _get_child(structure, _STRUCTURE + "Codelists")
    # concepts = _get_child(structure, _STRUCTURE + 'Concepts')
    datastructures = _get_child(structure, _STRUCTURE + "DataStructures")

    code_results = {}
    for codelist in codes:
        # codelist_id = codelist.get('id')
        codelist_name = _get_english_name(codelist)
        mapper = {}
        for code in codelist.iter(_CODE):
            code_id = code.get("id")
            name = _get_english_name(code)
            mapper[code_id] = name
        # codeobj = SDMXCode(id=codelist_id, name=codelist_name, mapper=mapper)
        # code_results[codelist_id] = codeobj
        code_results[codelist_name] = mapper

    times = list(datastructures.iter(_TIMEDIMENSION))
    times = [t.get("id") for t in times]

    result = SDMXCode(codes=code_results, ts=times)
    return result
Exemple #6
0
def _read_sdmx_dsd(path_or_buf):
    """
    Convert a SDMX-XML DSD string to mapping dictionary

    Parameters
    ----------
    filepath_or_buffer : a valid SDMX-XML DSD string or file-like
        https://webgate.ec.europa.eu/fpfis/mwikis/sdmx/index.php/Main_Page

    Returns
    -------
    results : namedtuple (SDMXCode)
    """

    xdata = _read_content(path_or_buf)

    import xml.etree.cElementTree as ET
    root = ET.fromstring(xdata)

    structure = _get_child(root, _MESSAGE + 'Structures')
    codes = _get_child(structure, _STRUCTURE + 'Codelists')
    # concepts = _get_child(structure, _STRUCTURE + 'Concepts')
    datastructures = _get_child(structure, _STRUCTURE + 'DataStructures')

    code_results = {}
    for codelist in codes:
        # codelist_id = codelist.get('id')
        codelist_name = _get_english_name(codelist)
        mapper = {}
        for code in codelist.iter(_CODE):
            code_id = code.get('id')
            name = _get_english_name(code)
            mapper[code_id] = name
        # codeobj = SDMXCode(id=codelist_id, name=codelist_name, mapper=mapper)
        # code_results[codelist_id] = codeobj
        code_results[codelist_name] = mapper

    times = list(datastructures.iter(_TIMEDIMENSION))
    times = [t.get('id') for t in times]

    result = SDMXCode(codes=code_results, ts=times)
    return result
Exemple #7
0
def read_jsdmx(path_or_buf):
    """
    Convert a SDMX-JSON string to pandas object

    Parameters
    ----------
    path_or_buf : a valid SDMX-JSON string or file-like
        http://sdmx.org/wp-content/uploads/2014/07/sdmx-json-data-message.pdf

    Returns
    -------
    results : Series, DataFrame, or dictionary of Series or DataFrame.
    """

    jdata = _read_content(path_or_buf)

    try:
        import simplejson as json
    except ImportError:
        if sys.version_info[:2] < (2, 7):
            raise ImportError('simplejson is required in python 2.6')
        import json

    if isinstance(jdata, dict):
        data = jdata
    else:
        data = json.loads(jdata, object_pairs_hook=OrderedDict)

    structure = data['structure']
    index = _parse_dimensions(structure['dimensions']['observation'])
    columns = _parse_dimensions(structure['dimensions']['series'])

    dataset = data['dataSets']
    if len(dataset) != 1:
        raise ValueError("length of 'dataSets' must be 1")
    dataset = dataset[0]
    values = _parse_values(dataset, index=index, columns=columns)

    df = pd.DataFrame(values, columns=columns, index=index)
    return df
def read_jsdmx(path_or_buf):
    """
    Convert a SDMX-JSON string to pandas object

    Parameters
    ----------
    filepath_or_buffer : a valid SDMX-JSON string or file-like
        http://sdmx.org/wp-content/uploads/2014/07/sdmx-json-data-message.pdf

    Returns
    -------
    results : Series, DataFrame, or dictionaly of Series or DataFrame.
    """

    jdata = _read_content(path_or_buf)

    try:
        import simplejson as json
    except ImportError:
        if sys.version_info[:2] < (2, 7):
            raise ImportError('simplejson is required in python 2.6')
        import json

    if isinstance(jdata, dict):
        data = jdata
    else:
        data = json.loads(jdata, object_pairs_hook=compat.OrderedDict)

    structure = data['structure']
    index = _parse_dimensions(structure['dimensions']['observation'])
    columns = _parse_dimensions(structure['dimensions']['series'])

    dataset = data['dataSets']
    if len(dataset) != 1:
        raise ValueError("length of 'dataSets' must be 1")
    dataset = dataset[0]
    values = _parse_values(dataset, index=index, columns=columns)

    df = pd.DataFrame(values, columns=columns, index=index)
    return df
Exemple #9
0
def read_jsdmx(path_or_buf):
    """
    Convert a SDMX-JSON string to panda object

    Parameters
    ----------
    path_or_buf : a valid SDMX-JSON string or file-like
        https://github.com/sdmx-twg/sdmx-json

    Returns
    -------
    results : Series, DataFrame, or dictionary of Series or DataFrame.
    """

    jdata = _read_content(path_or_buf)

    try:
        import simplejson as json
    except ImportError:
        if sys.version_info[:2] < (2, 7):
            raise ImportError("simplejson is required in python 2.6")
        import json

    if isinstance(jdata, dict):
        data = jdata
    else:
        data = json.loads(jdata, object_pairs_hook=OrderedDict)

    structure = data["structure"]
    index = _parse_dimensions(structure["dimensions"]["observation"])
    columns = _parse_dimensions(structure["dimensions"]["series"])

    dataset = data["dataSets"]
    if len(dataset) != 1:
        raise ValueError("length of 'dataSets' must be 1")
    dataset = dataset[0]
    values = _parse_values(dataset, index=index, columns=columns)

    df = pd.DataFrame(values, columns=columns, index=index)
    return df
Exemple #10
0
def read_sdmx(path_or_buf, dtype='float64', dsd=None):
    """
    Convert a SDMX-XML string to pandas object

    Parameters
    ----------
    filepath_or_buffer : a valid SDMX-XML string or file-like
        https://webgate.ec.europa.eu/fpfis/mwikis/sdmx/index.php/Main_Page
    dtype : str
        dtype to coerce values
    dsd : dict
        parsed DSD dict corresponding to the SDMX-XML data

    Returns
    -------
    results : Series, DataFrame, or dictionaly of Series or DataFrame.
    """

    xdata = _read_content(path_or_buf)

    import xml.etree.ElementTree as ET
    root = ET.fromstring(xdata)

    try:
        structure = _get_child(root, _MESSAGE + 'Structure')
    except ValueError:
        # get zipped path
        result = list(root.iter(_COMMON + 'Text'))[1].text
        if not result.startswith('http'):
            raise ValueError(result)

        for _ in range(60):
            # wait zipped data is prepared
            try:
                data = _read_zipped_sdmx(result)
                return read_sdmx(data, dtype=dtype, dsd=dsd)
            except HTTPError:
                continue

            time.sleep(1)
        msg = ('Unable to download zipped data within 60 secs, '
               'please download it manually from: {0}')
        raise ValueError(msg.format(result))

    idx_name = structure.get('dimensionAtObservation')
    dataset = _get_child(root, _DATASET)

    keys = []
    obss = []

    for series in dataset.iter(_SERIES):
        key = _parse_series_key(series)
        obs = _parse_observations(series.iter(_OBSERVATION))
        keys.append(key)
        obss.append(obs)

    mcols = _construct_index(keys, dsd=dsd)
    mseries = _construct_series(obss, name=idx_name, dsd=dsd)

    df = pd.DataFrame(mseries, dtype=dtype)
    df = df.T
    df.columns = mcols

    return df
Exemple #11
0
def read_sdmx(path_or_buf, dtype='float64', dsd=None):
    """
    Convert a SDMX-XML string to pandas object

    Parameters
    ----------
    filepath_or_buffer : a valid SDMX-XML string or file-like
        https://webgate.ec.europa.eu/fpfis/mwikis/sdmx/index.php/Main_Page
    dtype : str
        dtype to coerce values
    dsd : dict
        parsed DSD dict corresponding to the SDMX-XML data

    Returns
    -------
    results : Series, DataFrame, or dictionaly of Series or DataFrame.
    """

    xdata = _read_content(path_or_buf)

    import xml.etree.ElementTree as ET
    root = ET.fromstring(xdata)

    try:
        structure = _get_child(root, _MESSAGE + 'Structure')
    except ValueError:
        # get zipped path
        result = list(root.iter(_COMMON + 'Text'))[1].text
        if not result.startswith('http'):
            raise ValueError(result)

        for _ in range(60):
            # wait zipped data is prepared
            try:
                data = _read_zipped_sdmx(result)
                return read_sdmx(data, dtype=dtype, dsd=dsd)
            except HTTPError:
                continue

            time.sleep(1)
        msg = ('Unable to download zipped data within 60 secs, '
               'please download it manually from: {0}')
        raise ValueError(msg.format(result))

    idx_name = structure.get('dimensionAtObservation')
    dataset = _get_child(root, _DATASET)

    keys = []
    obss = []

    for series in dataset.iter(_SERIES):
        key = _parse_series_key(series)
        obs = _parse_observations(series.iter(_OBSERVATION))
        keys.append(key)
        obss.append(obs)

    mcols = _construct_index(keys, dsd=dsd)
    mseries = _construct_series(obss, name=idx_name, dsd=dsd)

    df = pd.DataFrame(mseries, dtype=dtype)
    df = df.T
    df.columns = mcols

    return df