Example #1
0
def test_request():
    # Constructor
    r = pandasdmx.Request(log_level=logging.ERROR)

    # Invalid source name raise an exception
    with pytest.raises(ValueError):
        pandasdmx.Request("noagency")

    # Regular methods
    r.clear_cache()

    r.timeout = 300
    assert r.timeout == 300

    # dir() includes convenience methods for resource endpoints
    expected = {
        "cache",
        "clear_cache",
        "get",
        "preview_data",
        "series_keys",
        "session",
        "source",
        "timeout",
        "view_doc",
    }
    expected |= set(ep.name for ep in pandasdmx.Resource)
    assert set(filter(lambda s: not s.startswith("_"), dir(r))) == expected
Example #2
0
def fetch_dataset(db_collection, flow_name: str, parameters):
    # 1. try CSV fetch, if that doesnt work then try PandasSDMX to get the dataframe
    data_response = requests.get(url,
                                 params=parameters,
                                 headers={"Accept": "text/csv"})
    assert data_response.status_code == 200
    with get_tempfile() as fp:
        fp.write(data_response.text.encode())
        fp.seek(0)
        kwargs = ({} if not flow_name in extra_csv_parms else dict(
            **extra_csv_parms[flow_name]))
        try:
            df = pd.read_csv(fp, **kwargs)
            save_dataframe(db_collection, {}, df, url, "ECB")
            return
        except pd.errors.EmptyDataError:  # no data is ignored as far as --fail-fast is concerned
            print(
                f"No CSV data to save.. now trying {flow_name} using pandasdmx"
            )
            # FALLTHRU...

    # 2. try pandassdmx if CSV fetch fails
    ecb = sdmx.Request("ECB", backend="memory")
    data_msg = ecb.data(flow_name, params=parameters)
    df = sdmx.to_pandas(data_msg)
    assert isinstance(df, pd.DataFrame)
    save_dataframe(db_collection, {}, df, url, "ECB")
Example #3
0
def downloadCSV():
    try:
        script_dir = os.path.abspath(os.path.dirname(sys.argv[0]) or '.')
        csv_path = os.path.join(script_dir, 'DATA_CSV')
        oecd = pandasdmx.Request('OECD')
        while 1:
            lock.acquire()
            if len(datasetcode) > 0:
                code = datasetcode.pop(0)
                title = filename.pop(0)
                lock.release()
                print('in datasetcode=', code, "length of datasetcode ",
                      len(datasetcode))
                try:
                    data_response = oecd.data(resource_id=code, key='all')
                    df = data_response.write(data_response.data.series,
                                             parse_time=False)
                    s = str(title)
                    while s[-1] == ".":
                        s = s[:-1]
                    df.to_csv(csv_path + '\\' + s + '.csv', sep=',')
                    print('in download, completed to_csv', "datacode=", code)
                except:
                    pass
            else:
                lock.release()
                time.sleep(0.01)
    except:
        pass
Example #4
0
def test_request_get_args():
    req = sdmx.Request('ESTAT')

    # Request._make_key accepts '+'-separated values
    args = dict(resource_id='une_rt_a',
                key={'GEO': 'EL+ES+IE'},
                params={'startPeriod': '2007'},
                dry_run=True,
                use_cache=True)
    # Store the URL
    url = req.data(**args).url

    # Using an iterable of key values gives the same URL
    args['key'] = {'GEO': ['EL', 'ES', 'IE']}
    assert req.data(**args).url == url

    # Using a direct string for a key gives the same URL
    args['key'] = '....EL+ES+IE'  # No specified values for first 4 dimensions
    assert req.data(**args).url == url

    # Giving 'provider' is redundant for a data request, causes a warning
    with pytest.warns(UserWarning, match="'provider' argument is redundant"):
        req.data('une_rt_a',
                 key={'GEO': 'EL+ES+IE'},
                 params={'startPeriod': '2007'},
                 provider='ESTAT')

    # Using an unknown endpoint is an exception
    with pytest.raises(ValueError):
        req.get('badendpoint', 'id')
Example #5
0
def test_write_constraint():
    """'constraint' argument to writer.write_dataset."""
    with specimen('ng-ts.xml') as f:
        msg = sdmx.read_sdmx(f)

    # Fetch the message's DSD
    assert msg.structure.is_external_reference
    # NB the speciment included in tests/data has 'ECB_EXR_NG' as the
    #    data structure ID; but a query against the web service gives
    #    'ECB_EXR1' for the same data structure.
    id = 'ECB_EXR1'
    dsd = sdmx.Request(msg.structure.maintainer.id) \
              .get('datastructure', id) \
              .structure[id]

    # Create a ContentConstraint
    cc = dsd.make_constraint({'CURRENCY': 'JPY+USD'})

    # Write the message without constraint
    s1 = sdmx.to_pandas(msg)
    assert len(s1) == 12
    assert set(s1.index.to_frame()['CURRENCY']) == {'CHF', 'GBP', 'JPY', 'USD'}

    # Writing using constraint produces a fewer items; only those matching the
    # constraint
    s2 = sdmx.to_pandas(msg, constraint=cc)
    assert len(s2) == 6
    assert set(s2.index.to_frame()['CURRENCY']) == {'JPY', 'USD'}
Example #6
0
def test_write_constraint():
    """'constraint' argument to writer.write_dataset."""
    with specimen("ng-ts.xml") as f:
        msg = pandasdmx.read_sdmx(f)

    # Fetch the message's DSD
    assert msg.structure.is_external_reference
    # NB the speciment included in tests/data has 'ECB_EXR_NG' as the
    #    data structure ID; but a query against the web service gives
    #    'ECB_EXR1' for the same data structure.
    id = "ECB_EXR1"
    dsd = (
        pandasdmx.Request(msg.structure.maintainer.id)
        .get("datastructure", id)
        .structure[id]
    )

    # Create a ContentConstraint
    cc = dsd.make_constraint({"CURRENCY": "JPY+USD"})

    # Write the message without constraint
    s1 = pandasdmx.to_pandas(msg)
    assert len(s1) == 12
    assert set(s1.index.to_frame()["CURRENCY"]) == {"CHF", "GBP", "JPY", "USD"}

    # Writing using constraint produces a fewer items; only those matching the
    # constraint
    s2 = pandasdmx.to_pandas(msg, constraint=cc)
    assert len(s2) == 6
    assert set(s2.index.to_frame()["CURRENCY"]) == {"JPY", "USD"}
Example #7
0
def test_request_get_args():
    req = pandasdmx.Request("ESTAT")

    # Request._make_key accepts '+'-separated values
    args = dict(
        resource_id="une_rt_a",
        key={"GEO": "EL+ES+IE"},
        params={"startPeriod": "2007"},
        dry_run=True,
        use_cache=True,
    )
    # Store the URL
    url = req.data(**args).url

    # Using an iterable of key values gives the same URL
    args["key"] = {"GEO": ["EL", "ES", "IE"]}
    assert req.data(**args).url == url

    # Using a direct string for a key gives the same URL
    args["key"] = "....EL+ES+IE"  # No specified values for first 4 dimensions
    assert req.data(**args).url == url

    # Giving 'provider' is redundant for a data request, causes a warning
    with pytest.warns(UserWarning, match="'provider' argument is redundant"):
        req.data(
            "une_rt_a",
            key={"GEO": "EL+ES+IE"},
            params={"startPeriod": "2007"},
            provider="ESTAT",
        )

    # Using an unknown endpoint is an exception
    with pytest.raises(ValueError):
        req.get("badendpoint", "id")
Example #8
0
def _download_sdmx(table: str, dst_dir: PathLike) -> None:
    path = dst_dir / _SDMX_FILENAME
    service = conf["sdmx_service_name"]
    name = conf["sdmx_datastructure_template"].format(table=table)

    r = pandasdmx.Request(service)
    r.datastructure(name, tofile=path)
def create_estat_request():
    # EuroStat datasets
    if 'CACHE_FILE_LOCATION' in app.config:
        cache_name = app.config['CACHE_FILE_LOCATION']
    else:
        cache_name = "/tmp/sdmx_datasets_cache"
    r = pandasdmx.Request("ESTAT", cache={"backend": "sqlite", "include_get_headers": True,
                                          "cache_name": cache_name})
    r.timeout = 180
    return r
Example #10
0
def test_request_get_exceptions():
    """Tests of Request.get() that don't require remote data."""
    req = pandasdmx.Request("ESTAT")

    # Exception is raised on unrecognized arguments
    exc = "unrecognized arguments: {'foo': 'bar'}"
    with pytest.raises(ValueError, match=exc):
        req.get("datastructure", foo="bar")

    with pytest.raises(ValueError, match=exc):
        pandasdmx.read_url("https://example.com", foo="bar")
Example #11
0
def test_request_get_exceptions():
    """Tests of Request.get() that don't require remote data."""
    req = sdmx.Request('ESTAT')

    # Exception is raised on unrecognized arguments
    exc = "unrecognized arguments: {'foo': 'bar'}"
    with pytest.raises(ValueError, match=exc):
        req.get('datastructure', foo='bar')

    with pytest.raises(ValueError, match=exc):
        sdmx.read_url('https://example.com', foo='bar')
Example #12
0
def test_request():
    # Constructor
    r = sdmx.Request(log_level=logging.ERROR)

    # Invalid source name raise an exception
    with pytest.raises(ValueError):
        sdmx.Request('noagency')

    # Regular methods
    r.clear_cache()

    r.timeout = 300
    assert r.timeout == 300

    # dir() includes convenience methods for resource endpoints
    expected = {
        'cache', 'clear_cache', 'get', 'preview_data', 'series_keys',
        'session', 'source', 'timeout'
    }
    expected |= set(ep.name for ep in sdmx.Resource)
    assert set(filter(lambda s: not s.startswith('_'), dir(r))) == expected
Example #13
0
def _read_structure_message(path):
    # Need to support pandasdmx==0.9 because 1.0 is not available for Python3.6
    if pandasdmx.__version__.startswith("0.9"):
        req = pandasdmx.Request()
        structure = req.get(fromfile=str(path),
                            writer="pandasdmx.writer.structure2pd")
        return structure.write()

    if pandasdmx.__version__.startswith("1."):
        return pandasdmx.read_sdmx(path)

    raise Exception(f"pandasdmx version is {pandasdmx.__version__}")
Example #14
0
def fx_rates(cur_from, cur_to, freq = "A", start_year = "2015"):


    if cur_from == "EUR":
        request_ecb = sdmx.Request("ECB")
        fx_rates = (
            request_ecb.data(resource_id="EXR", key={'CURRENCY': cur_to, "FREQ": freq, "EXR_SUFFIX": "A"}, params = {'startPeriod': start_year})
            .to_pandas()
            .reset_index()
            .loc[:, ["CURRENCY", "CURRENCY_DENOM", "TIME_PERIOD", "value"]]
            .set_axis(["to", "from", "period", "value"], axis=1)
        )

        if "EUR" not in set(fx_rates["to"]) and "EUR" in cur_to:
            dummy_fx_eur = (
                fx_rates[["period", "from"]]
                .assign(to=lambda x: "EUR")
                .assign(value=lambda x: 1)
            )
            fx_rates = fx_rates.append(dummy_fx_eur, ignore_index=True)

    if cur_from == "USD":
        request_bis = sdmx.Request("BIS")
        fx_rates = (
            request_bis.data(resource_id = "WEBSTATS_XRU_CURRENT_DATAFLOW", key={"FREQ": freq, "CURRENCY": cur_to, "COLLECTION": "A"}, params={"startPeriod": start_year})
            .to_pandas()
            .reset_index()
            .loc[:, ["CURRENCY", "TIME_PERIOD", "value"]]
            .assign(from_name=lambda x: "USD")
            .set_axis(["to", "period", "value" ,"from"], axis=1)
        )

    fx_rates = (
        fx_rates
        .groupby(["period", "from", "to"])
        .mean()
        .reset_index()
    )

    return(fx_rates)
Example #15
0
def get_uk_regional_gdp():
    # current year
    latest_year = datetime.now().year - 1
    # Tell pdmx we want OECD data
    oecd = pdmx.Request("OECD")
    # Set out everything about the request in the format specified by the OECD API
    data = oecd.data(
        resource_id="REGION_ECONOM",
        key="1+2.UKC.SNA_2008.GDP.REG+CURR_PR.ALL.2017+2018+2019+2020/all?",
    ).to_pandas()
    # example that works:
    "https://stats.oecd.org/restsdmx/sdmx.ashx/GetData/REGION_ECONOM/1+2.GBR+UKC+UKC11+UKC12.SNA_2008.GDP.REG+CURR_PR+USD_PPP+REAL_PR+REAL_PPP+PC+PC_CURR_PR+PC_USD_PPP+PC_REAL_PR+PC_REAL_PPP.ALL.2001+2002+2003+2004+2005+2006+2007+2008+2009+2010+2011+2012+2013+2014+2015+2016+2017+2018+2019+2020/all?"
    df = pd.DataFrame(data).reset_index()
    df.head()
Example #16
0
def create_estat_request():
    # from magic_box import app  # APP is this now. Adapt to the current app object!
    app = import_names("backend.restful_service", "app")
    if not app:
        app = import_names("magic_box", "app")
    # EuroStat datasets
    if 'CACHE_FILE_LOCATION' in app.config:
        cache_name = app.config['CACHE_FILE_LOCATION']
        print("USER: "******"/sdmx_datasets_cache"
    r = pandasdmx.Request("ESTAT", cache={"backend": "sqlite", "include_get_headers": True, "cache_name": cache_name})
    r.timeout = 180
    return r
Example #17
0
def _read(the_dir: PathLike) -> Mapping[str, pd.DataFrame]:
    path = the_dir / _SDMX_FILENAME
    req = pandasdmx.Request()
    structure = req.get(
        fromfile=str(path),
        writer='pandasdmx.writer.structure2pd'
        )

    codelist = structure.write()['codelist']

    result = {
        'dimensions': _get_dimensions(codelist),
        'attributes': _get_attributes(codelist),
    }

    return result
Example #18
0
def create_estat_request():
    # EuroStat datasets
    if get_global_configuration_variable('CACHE_FILE_LOCATION'):
        cache_name = get_global_configuration_variable('CACHE_FILE_LOCATION')
        print("USER: "******"/sdmx_datasets_cache"
    r = pandasdmx.Request("ESTAT",
                          cache={
                              "backend": "sqlite",
                              "include_get_headers": True,
                              "cache_name": cache_name
                          })
    r.timeout = 180
    return r
Example #19
0
def test_request_preview_data():
    req = sdmx.Request('ECB')

    # List of keys can be retrieved
    keys = req.preview_data('EXR')
    assert isinstance(keys, list)

    # Count of keys can be determined
    assert len(keys) == 4291

    # A filter can be provided, resulting in fewer keys
    keys = req.preview_data('EXR', {'CURRENCY': 'CAD+CHF+CNY'})
    assert len(keys) == 24

    # Result can be converted to pandas object
    keys_pd = sdmx.to_pandas(keys)
    assert isinstance(keys_pd, pd.DataFrame)
    assert len(keys_pd) == 24
Example #20
0
def test_request_preview_data():
    req = pandasdmx.Request("ECB")

    # List of keys can be retrieved
    keys = req.preview_data("EXR")
    assert isinstance(keys, list)

    # Count of keys can be determined
    assert len(keys) > 1000

    # A filter can be provided, resulting in fewer keys
    keys = req.preview_data("EXR", {"CURRENCY": "CAD+CHF+CNY"})
    assert len(keys) == 24

    # Result can be converted to pandas object
    keys_pd = pandasdmx.to_pandas(keys)
    assert isinstance(keys_pd, pd.DataFrame)
    assert len(keys_pd) == 24
Example #21
0
def ecb():
    ecb = sdmx.Request('ECB',
                       backend='sqlite',
                       fast_save=True,
                       expire_after=6000)
    dump_flows(ecb)
    dsd = dump_structure(ecb, 'YC')
    for dim_name in ['FREQ', 'REF_AREA', 'DATA_TYPE_FM']:
        results = get_available_of_dimension(dsd, dim_name)
        print(results)

    # external trade for australia since 2000
    dsd = dump_structure(ecb, 'TRD')
    print("*** External Trade")
    dump_dimension(dsd, 'REF_AREA', all=True)
    dump_dimension(dsd, 'TRD_PRODUCT')
    dump_dimension(dsd, 'TRD_FLOW')
    resp = ecb.data('TRD', key={'REF_AREA': 'I8'})
    df = resp.to_pandas()
    print(df)
Example #22
0
def test_doc_example():
    """Code from example.rst."""
    import pandasdmx

    estat = pandasdmx.Request("ESTAT")

    metadata = estat.datastructure("DSD_une_rt_a")

    for cl in "CL_AGE", "CL_UNIT":
        print(pandasdmx.to_pandas(metadata.codelist[cl]))

    resp = estat.data("une_rt_a",
                      key={"GEO": "EL+ES+IE"},
                      params={"startPeriod": "2007"})

    data = pandasdmx.to_pandas(resp,
                               datetime=dict(dim='TIME_PERIOD',
                                             freq='FREQ')).xs("Y15-74",
                                                              axis=1,
                                                              level="AGE",
                                                              drop_level=False)

    data.loc[:, ("Y15-74", "PC_ACT", "T")]

    # Further checks per https://github.com/dr-leo/pandaSDMX/issues/157

    # DimensionDescriptor for the structure message
    dd1 = metadata.structure.DSD_une_rt_a.dimensions

    # DimensionDescriptor retrieved whilst validating the data message
    dd2 = resp.data[0].structured_by.dimensions

    # DimensionDescriptors have same ID, components and order
    assert dd1 == dd2

    # One SeriesKey from the data message
    sk = list(resp.data[0].series.keys())[0]

    # Key values have same order as in the DSD
    assert dd1.order_key(sk) == sk
Example #23
0
def download_source():
    """
    Downloads the following datastructure from the UN SDMX API
    (1) United Nations Energy Balance Data
    (2) United Nations Greenhouse gas Emission Data
    Data is stored at corresponding CSV files
    """

    #if os.path.exists(UNFCC_FILE):
    #    os.rename(UNFCC_FILE,'old_'+UNFCC_FILE)
    #if os.path.exists(EBAL_FILE):
    #    os.rename(EBAL_FILE,'old_'+EBAL_FILE)

    try:
        unsd = sdmx.Request('UNSD')
        sdmx.logger.setLevel(logging.INFO)

        logger.info('Loading UNFCC Data')
        resp_unfcc = unsd.data('DF_UNData_UNFCC')

        logger.info('Loading UN Energy Balance Data')
        resp_ebal = unsd.data('DF_UNData_EnergyBalance')
    except Exception as e:
        logger.error('Error!! Please look at SDMX logs to troubleshoot' +
                     str(e))
        traceback.print_exc(file=sys.stdout)

    try:
        df_ebal = resp_ebal.to_pandas()
        df_unfcc = resp_unfcc.to_pandas()

        df_unfcc.reset_index().to_csv(UNFCC_FILE, index=False)
        logger.info('UNFCC Greenhouse Data stored as {}'.format(UNFCC_FILE))

        df_ebal.reset_index().to_csv(EBAL_FILE, index=False)
        logger.info('UN Energy Balance Data stored as {}'.format(EBAL_FILE))
    except Exception as e:
        logger.error('Error!! While saving data from SDMX to CSV ' + str(e))
        traceback.print_exc(file=sys.stdout)
Example #24
0
def test_doc_example():
    """Code from example.rst."""
    import pandasdmx as sdmx
    estat = sdmx.Request('ESTAT')

    metadata = estat.datastructure('DSD_une_rt_a')

    for cl in 'CL_AGE', 'CL_UNIT':
        print(sdmx.to_pandas(metadata.codelist[cl]))

    resp = estat.data(
        'une_rt_a',
        key={'GEO': 'EL+ES+IE'},
        params={'startPeriod': '2007'},
    )

    data = sdmx.to_pandas(resp) \
               .xs('Y15-74', level='AGE', drop_level=False)

    data.loc[('A', 'Y15-74', 'PC_ACT', 'T')]

    # Further checks per https://github.com/dr-leo/pandaSDMX/issues/157

    # DimensionDescriptor for the structure message
    dd1 = metadata.structure.DSD_une_rt_a.dimensions

    # DimensionDescriptor retrieved whilst validating the data message
    dd2 = resp.data[0].structured_by.dimensions

    # DimensionDescriptors have same ID, components and order
    assert dd1 == dd2

    # One SeriesKey from the data message
    sk = list(resp.data[0].series.keys())[0]

    # Key values have same order as in the DSD
    assert dd1.order_key(sk) == sk
Example #25
0
import pandasdmx as sdmx

# TODO: Move all of these to env/setting vars from production
sdmx_url = "https://sdmx.data.unicef.org/ws/public/sdmxapi/rest/data/ECARO,TRANSMONEE,1.0/.{}....?format=csv&startPeriod={}&endPeriod={}"

geo_json_file = (pathlib.Path(__file__).parent.parent.absolute() /
                 "assets/countries.geo.json")
with open(geo_json_file) as shapes_file:
    geo_json_countries = json.load(shapes_file)

with open(
        pathlib.Path(__file__).parent.parent.absolute() /
        "assets/indicator_config.json") as config_file:
    indicators_config = json.load(config_file)

unicef = sdmx.Request("UNICEF")

metadata = unicef.dataflow("TRANSMONEE", provider="ECARO", version="1.0")
dsd = metadata.structure["DSD_ECARO_TRANSMONEE"]

indicator_names = {
    code.id: code.name.en
    for code in dsd.dimensions.get("INDICATOR").local_representation.enumerated
}
# lbassil: get the age groups code list as it is not in the DSD
cl_age = unicef.codelist("CL_AGE", version="1.0")
age_groups = sdmx.to_pandas(cl_age)
dict_age_groups = age_groups["codelist"]["CL_AGE"].reset_index()
age_groups_names = {
    age["CL_AGE"]: age["name"]
    for index, age in dict_age_groups.iterrows() if age["CL_AGE"] != "_T"
# -*- coding: utf-8 -*-
"""
Created on Sun Apr 26 15:47:39 2020

@author: rohan
"""
import pandasdmx as sdmx

Agency_Code = 'ABS'
Dataset_Id = 'HF'  #'ATSI_BIRTHS_SUMM'
ABS = sdmx.Request(Agency_Code)
data_response = ABS.data(resource_id=Dataset_Id,
                         params={
                             'startPeriod': '2018-11',
                             'endPeriod': '2018-11'
                         })

data = data_response.data
msg = data_response.msg

measure = msg._elem['structure']['dimensions']['series'][0]['values']
region = msg._elem['structure']['dimensions']['series'][1]['values']
lender = msg._elem['structure']['dimensions']['series'][2]['values']
adjustment = msg._elem['structure']['dimensions']['series'][3]['values']
item = msg._elem['structure']['dimensions']['series'][4]['values']
frequency = msg._elem['structure']['dimensions']['series'][5]['values']


def flatten(l):
    return flatten(l[0]) + (
        flatten(l[1:]) if len(l) > 1 else []) if type(l) is list else [l]
Example #27
0
}, {
    'id': 'TTL',
    'name': 'Total'
}, {
    'id': 'VAL',
    'name': 'Value added'
}, {
    'id': 'DOMIMP',
    'name': 'Domestic output and imports'
}, {
    'id': 'ICESHR',
    'name': 'Imports content of exports, as % of exports'
}]

# Obtain some country data relevant for ecological footprint calculations
unstats = pandasdmx.Request("UNSD")
for name, description in [
    ("AG_LND_AGRI_ZS", "Agricultural land (% of land area)"),
    ("AG_LND_FRST_K2", "Forest area (sq. km)"),
    ("EN_ATM_CO2E_PC", "CO2 emissions (metric tons per capita)"),
    ("NY_GDP_MKTP_CD", "GDP (current US$)"),
    ("NY_GNP_PCAP_CD", "GNI per capita, Atlas method (current US$)"),
    ("NY_GNP_PCAP_PP_CD", "GNI per capita, PPP (current international $)"),
    ("NY_GNP_ATLS_CD", "GNI, Atlas method (current US$)"),
    ("NY_GNP_MKTP_PP_CD", "GNI, PPP (current international $)"),
    ("AG_SRF_TOTL_K2", "Surface area (sq. km)")
]:
    data = unstats.get(resource_type='data',
                       resource_id="DF_UNDATA_WDI",
                       key={
                           "FREQ": "A",
Example #28
0
        "--dataset",
        help="Download only the specified flowRef [None]",
        type=str,
        default=None,
    )

    a = args.parse_args()
    config, password = read_config(a.config)
    m = config.get("mongo")
    mongo = pymongo.MongoClient(m.get("host"),
                                m.get("port"),
                                username=m.get("user"),
                                password=password)
    db = mongo[m.get("db")]

    ecb = sdmx.Request("ECB")
    all_flows = ecb.dataflow()
    df = sdmx.to_pandas(all_flows.dataflow).to_frame()

    print(f"Found {len(df)} ECB datasets to process")

    if a.dataset:
        for s in a.dataset.split(","):
            do_download.add(s)
        recent_tags = set()  # forcibly re-download all stated datasets
    else:
        month_ago = now() - timedelta(days=30)
        recent_tags = set(
            db.ecb_data_cache.distinct("tag",
                                       {"last_updated": {
                                           "$gte": month_ago
Example #29
0
def OECD_dataset(dataset_ids, Logger):
    Logger.warning('Trying to get data form OECD.stat')
    oecd = pandasdmx.Request('OECD')
    params = dict(startPeriod='1990-Q1',
                  endPeriod='2021-Q1',
                  dimensionAtObservation='TimeDimension',
                  detail='Full')
    dfs = {}
    # st.text(f'Getting Datasets names..')
    # st.text(f'dataset ids: {dataset_ids}')
    try:
        for dataset in dataset_ids:
            if 'GBARD' in dataset:
                # st.text(f'Getting Dataset: {dataset}')
                try:
                    Logger.warning(f'Loading {dataset} from OECD database')
                    data_response = oecd.data(resource_id=dataset,
                                              params=params)
                    df = get_df(data_response, Logger)
                    Logger.debug(f'flattening {dataset} df')
                    df.reset_index(level=['COUNTRY', 'SEO', 'MEASURE'],
                                   inplace=True)
                    sort_df: object = df.melt(
                        id_vars=['COUNTRY', 'SEO', 'MEASURE'],
                        var_name="Date",
                        value_name="Value")

                    dfs['gbard'] = sort_df

                except req.exceptions.ConnectionError as err:
                    Logger.warning(f'This {dataset} got this error: {err}')
                    print(f'This dataset: {dataset} as failed to response')
                    continue

            elif 'HIGH_AGLINK' in dataset:
                # st.text(f'Getting Dataset: {dataset}')
                try:
                    Logger.warning(f'Loading {dataset} from OECD database')
                    data_response = oecd.data(resource_id=dataset,
                                              params=params)
                    df = get_df(data_response, Logger)
                    Logger.debug(f'flattening {dataset} df')
                    df.reset_index(level=['LOCATION', 'COMMODITY', 'VARIABLE'],
                                   inplace=True)
                    sort_df: object = df.melt(
                        id_vars=['LOCATION', 'COMMODITY', 'VARIABLE'],
                        var_name="Date",
                        value_name="Value")

                    dfs['agricultural'] = sort_df

                except req.exceptions.ConnectionError as err:
                    Logger.warning(f'This {dataset} got this error: {err}')
                    print(f'This dataset: {dataset} as failed to response')
                    continue

            elif 'SNA_TABLE4' in dataset:
                # st.text(f'Getting Dataset: {dataset}')
                try:
                    Logger.warning(f'Loading {dataset} from OECD database')
                    data_response = oecd.data(resource_id=dataset,
                                              params=params)

                    df = get_df(data_response, Logger)
                    Logger.debug(f'flattening {dataset} df')
                    df.reset_index(level=['LOCATION', 'TRANSACT', 'MEASURE'],
                                   inplace=True)
                    sort_df: object = df.melt(
                        id_vars=['LOCATION', 'TRANSACT', 'MEASURE'],
                        var_name="Date",
                        value_name="Value")

                    dfs['currncy'] = sort_df

                except req.exceptions.ConnectionError as err:
                    Logger.warning(f'This {dataset} got this error: {err}')
                    print(f'This dataset: {dataset} as failed to response')
                    continue

        if len(dfs) == len(dataset_ids):
            # st.text(f'Success of Getting Datasets')
            Logger.debug('Success')
            print('Success')
        else:
            # st.text(f'Only {len(dfs)} dataset were found: {list(dfs.keys())}')
            Logger.info(f'Only {len(dfs)} dataset were found')
            print(f'Only {len(dfs)} dataset were found')
        return dfs

    except TypeError:
        pass
Example #30
0
def _download_sdmx(table: str, dst_dir: PathLike) -> None:
    path = dst_dir / _SDMX_FILENAME
    service = conf['sdmx_service_name']
    name = conf['sdmx_datastructure_template'].format(table=table)
    r = pandasdmx.Request(service)
    r.datastructure(name).write_source(str(path))