def fetch(self, series_meta):
        """"
        Initial stab at querying. Will refactor code into a subclass...

        Can only support single series queries...
        :param series_meta: econ_platform_core.SeriesMetaData
        :return: list
        """
        query_ticker = str(series_meta.ticker_query)
        df = dbnomics.fetch_series(query_ticker)
        tickers = set(df.series_code)
        if len(tickers) > 1:
            raise NotImplementedError(
                'Multiple series queries not yet supported')
        ser = pandas.Series(df.value)
        ser.index = df.period
        ser.name = '{0}@{1}/{2}/{3}'.format(self.ProviderCode,
                                            df['provider_code'][0],
                                            df['dataset_code'][0],
                                            df['series_code'][0])
        # Convert 'NA' to NaN
        ser.replace('NA', numpy.nan)
        # Always return a list of series. Only the user interface will convert list to a single pandas.Series
        return [
            ser,
        ]
Exemple #2
0
def OECD_data(code, indicators, dset, periods):
    global codes
    print('Fetching data ' + str(indicators) + '...')
    for i in indicators:
        codes[code][i] = fetch_series('OECD' + '/' + dset + '/' + code + '.' +
                                      i + '.' + periods)
    print("Done")
    return
Exemple #3
0
def imf_WEOdata(code, indicators):
    global codes
    print('Fetching data ' + str(indicators) + '...')
    for i in indicators:
        codes[code][i] = fetch_series('IMF' + '/' + 'WEO' + '/' + code + "." +
                                      i)
    print("Done")
    return
Exemple #4
0
def imf_CPIdata(code, indicators, periods):
    global codes
    print('Fetching data ' + str(indicators) + '...')
    for i in indicators:
        codes[code][i] = fetch_series('IMF' + '/' + 'CPI' + '/' + periods +
                                      '.' + code + '.' + i)
    print("Done")
    return
Exemple #5
0
def getCPI():
    ukCPI = fetch_series('IMF/CPI/M.GB.PCPIHA_PC_CP_A_PT')
    euCPI = fetch_series('IMF/CPI/M.U2.PCPIHA_PC_CP_A_PT')

    dbnomicsQuery = "period >= '" + START_DATE + "'"

    ukCPI = ukCPI.query(dbnomicsQuery)
    euCPI = euCPI.query(dbnomicsQuery)

    ukCPIDict = {
        ukCPI.period.iloc[i]: ukCPI.value.iloc[i]
        for i in range(len(ukCPI))
    }
    euCPIDict = {
        euCPI.period.iloc[i]: euCPI.value.iloc[i]
        for i in range(len(euCPI))
    }

    dates = []
    ukCPIarr = []
    euCPIarr = []

    # Match the dates
    for k, v in ukCPIDict.items():
        match = euCPIDict.get(k, 0)

        ukCPIarr.append(v)
        euCPIarr.append(match)
        dates.append(k)

    ukCPIarr = np.array(ukCPIarr, dtype=np.float)
    euCPIarr = np.array(euCPIarr, dtype=np.float)

    ukEuCPIRatio = ukCPIarr / euCPIarr

    # The mean and std of the inflation data that was used in training
    # This keeps the normalisation consistent
    cpi_mean = 0.3957793542714463
    cpi_std = 0.9813408912397396
    ukEuCPIRatio = (ukEuCPIRatio - cpi_mean) / cpi_std

    #  Return a dictionary of all inflation data
    cpiDict = {dates[i]: ukEuCPIRatio[i] for i in range(len(dates))}

    return cpiDict
Exemple #6
0
    def get_dbnomics_data(self,
                          country: str,
                          indicator_list: list,
                          start_date=None,
                          end_date=None,
                          freq='A',
                          counterparty=None,
                          flow=None) -> pd.DataFrame:
        """
        get api data from dbnomics website
        :param country: str
        :param indicator_list: str
        :param freq: 'Q', 'A', or 'M'
        :param start_date: 'yyyy-mm-dd'
        :param end_date: 'yyyy-mm-dd'
        :param counterparty: str
        :param flow: 'M' for import, 'X' for export
        :return: Data Frame
        """
        result = []
        for indicator in indicator_list:
            dbnomics_code = self.get_dbnomics_code(country, indicator, freq,
                                                   counterparty, flow)

            # fetch data from dbnomics, update fetched data
            try:
                df = dbnomics.fetch_series(dbnomics_code)
                df = self.__update_fetched_data(df,
                                                indicator,
                                                start_date,
                                                end_date,
                                                freq=freq)

            except:
                if dbnomics_code in [
                        'IMF/BOP/A.CA.BSM_BP6_USD',
                        'IMF/BOP/A.ID.BSTV_BP6_USD',
                        'IMF/BOP/A.SA.BSR_BP6_USD', 'IMF/BOP/A.SA.BSM_BP6_USD',
                        'IMF/BOP/A.SA.BSOPCR_BP6_USD',
                        'IMF/BOP/Q.CA.BSM_BP6_USD',
                        'IMF/BOP/Q.ID.BSTV_BP6_USD',
                        'IMF/BOP/Q.SA.BSR_BP6_USD', 'IMF/BOP/Q.SA.BSM_BP6_USD',
                        'IMF/BOP/Q.SA.BSOPCR_BP6_USD'
                ]:
                    pass
                else:
                    # TODO: use pass when dbnomics connection is stable!
                    return self.get_dbnomics_data(country, indicator,
                                                  start_date, end_date, freq,
                                                  counterparty, flow)
            df = self.__format_dataframe(df, country, indicator, dbnomics_code,
                                         freq, counterparty)
            result.append(df)
        return pd.concat(result, axis=0)
Exemple #7
0
def test_fetch_series_of_dataset():
    df = fetch_series('AMECO', 'ZUTN')

    provider_codes = df["provider_code"].unique()
    assert len(provider_codes) == 1, df
    assert provider_codes[0] == "AMECO", df

    dataset_codes = df["dataset_code"].unique()
    assert len(dataset_codes) == 1, df
    assert dataset_codes[0] == "ZUTN", df

    series_codes = df["series_code"].unique()
    assert len(series_codes) > 1, df
Exemple #8
0
def test_fetch_series_by_code_mask_with_plus_in_dimension_code():
    df = fetch_series('SCB', 'AKIAM', '"J+K"+"G+H".AM0301C1')

    provider_codes = df["provider_code"].unique()
    assert len(provider_codes) == 1, df
    assert provider_codes[0] == "SCB", df

    dataset_codes = df["dataset_code"].unique()
    assert len(dataset_codes) == 1, df
    assert dataset_codes[0] == "AKIAM", df

    series_codes = df["series_code"].unique()
    assert set(series_codes) == {'J+K.AM0301C1', 'G+H.AM0301C1'}, series_codes
Exemple #9
0
def test_fetch_series_by_code_mask():
    df = fetch_series("IMF", "CPI", "M.FR+DE.PCPIEC_IX+PCPIA_IX")

    provider_codes = df["provider_code"].unique()
    assert len(provider_codes) == 1, df
    assert provider_codes[0] == "IMF", df

    dataset_codes = df["dataset_code"].unique()
    assert len(dataset_codes) == 1, df
    assert dataset_codes[0] == "CPI", df

    series_codes = df["series_code"].unique()
    assert len(series_codes) == 4, df
Exemple #10
0
def test_fetch_series_by_id():
    df = fetch_series('AMECO/ZUTN/EA19.1.0.0.0.ZUTN')

    provider_codes = df["provider_code"].unique()
    assert len(provider_codes) == 1, df
    assert provider_codes[0] == "AMECO", df

    dataset_codes = df["dataset_code"].unique()
    assert len(dataset_codes) == 1, df
    assert dataset_codes[0] == "ZUTN", df

    series_codes = df["series_code"].unique()
    assert len(series_codes) == 1, df
    assert series_codes[0] == "EA19.1.0.0.0.ZUTN", df
Exemple #11
0
def test_fetch_series_by_dimension():
    df = fetch_series("WB",
                      "DB",
                      dimensions={
                          "country": ["FR", "IT", "ES"],
                          "indicator": ["IC.REG.COST.PC.FE.ZS.DRFN"],
                      })

    provider_codes = df["provider_code"].unique()
    assert len(provider_codes) == 1, df
    assert provider_codes[0] == "WB", df

    dataset_codes = df["dataset_code"].unique()
    assert len(dataset_codes) == 1, df
    assert dataset_codes[0] == "DB", df

    series_codes = df["series_code"].unique()
    assert len(series_codes), df
Exemple #12
0
def test_fetch_series_by_ids_in_same_dataset():
    df = fetch_series([
        'AMECO/ZUTN/EA19.1.0.0.0.ZUTN',
        'AMECO/ZUTN/DNK.1.0.0.0.ZUTN',
    ])

    provider_codes = df["provider_code"].unique()
    assert len(provider_codes) == 1, df
    assert provider_codes[0] == "AMECO", df

    dataset_codes = df["dataset_code"].unique()
    assert len(dataset_codes) == 1, df
    assert dataset_codes[0] == "ZUTN", df

    series_codes = df["series_code"].unique()
    assert len(series_codes) == 2, df
    assert series_codes[0] == "EA19.1.0.0.0.ZUTN", df
    assert series_codes[1] == "DNK.1.0.0.0.ZUTN", df
Exemple #13
0
def test_fetch_series_by_ids_in_different_datasets():
    df = fetch_series([
        'AMECO/ZUTN/EA19.1.0.0.0.ZUTN',
        'BIS/PP-SS/Q.AU.N.628',
    ])

    provider_codes = df["provider_code"].unique()
    assert len(provider_codes) == 2, df
    assert provider_codes[0] == "AMECO", df
    assert provider_codes[1] == "BIS", df

    dataset_codes = df["dataset_code"].unique()
    assert len(dataset_codes) == 2, df
    assert dataset_codes[0] == "ZUTN", df
    assert dataset_codes[1] == "PP-SS", df

    series_codes = df["series_code"].unique()
    assert len(series_codes) == 2, df
    assert series_codes[0] == "EA19.1.0.0.0.ZUTN", df
    assert series_codes[1] == "Q.AU.N.628", df
Exemple #14
0
    def fetch(self, series_meta):
        """"
        Initial stab at querying. Will refactor code into a subclass...

        Can only support single series queries...
        :param series_meta: econ_platform_core.SeriesMetadata
        :return: list
        """
        query_ticker = str(series_meta.ticker_query)
        df = dbnomics.fetch_series(query_ticker)
        tickers = set(df.series_code)
        if len(tickers) > 1:
            raise NotImplementedError(
                'Multiple series queries not yet supported')
        ser = pandas.Series(df.value)
        ser.index = df.period
        ser.name = '{0}@{1}/{2}/{3}'.format(self.ProviderCode,
                                            df['provider_code'][0],
                                            df['dataset_code'][0],
                                            df['series_code'][0])
        # Convert 'NA' to NaN
        ser = ser.replace('NA', numpy.nan)
        # Always return a list of series. Only the user interface will convert list to a single pandas.Series
        # Get metadata
        colz = df.columns
        if 'FREQUENCY' in colz:
            series_meta.frequency = df['FREQUENCY'][df.index[0]]
        if 'series_name' in colz:
            series_meta.series_name = df['series_name'][df.index[0]]
            series_meta.series_description = '{0} : DB.nomics series {1}'.format(
                series_meta.series_name, query_ticker)
        excluded = ('value', 'period', 'original_period', 'indexed_at')
        for c in colz:
            if c in excluded:
                continue
            series_meta.ProviderMetadata[c] = df[c][df.index[0]]

        return ser
Exemple #15
0
def getEuroFundamentals():
    print()
    ukGDP = fetch_series('Eurostat/namq_10_gdp/Q.CLV05_MEUR.NSA.B1GQ.UK')
    ukExports = fetch_series('Eurostat/namq_10_exi/Q.CLV05_MEUR.NSA.P6.UK')
    franceGDP = fetch_series('Eurostat/namq_10_gdp/Q.CLV05_MEUR.NSA.B1GQ.FR')
    franceExports = fetch_series('Eurostat/namq_10_exi/Q.CLV05_MEUR.NSA.P6.FR')
    swedenGDP = fetch_series('Eurostat/namq_10_gdp/Q.CLV05_MEUR.NSA.B1GQ.SE')
    swedenExports = fetch_series('Eurostat/namq_10_exi/Q.CLV05_MEUR.NSA.P6.SE')
    greeceGDP = fetch_series('Eurostat/namq_10_gdp/Q.CLV05_MEUR.NSA.B1GQ.EL')
    greeceExports = fetch_series('Eurostat/namq_10_exi/Q.CLV05_MEUR.NSA.P6.EL')
    luxGDP = fetch_series('Eurostat/namq_10_gdp/Q.CLV05_MEUR.NSA.B1GQ.LU')
    luxExports = fetch_series('Eurostat/namq_10_exi/Q.CLV05_MEUR.NSA.P6.LU')
    spainGDP = fetch_series('Eurostat/namq_10_gdp/Q.CLV05_MEUR.NSA.B1GQ.ES')
    spainExports = fetch_series('Eurostat/namq_10_exi/Q.CLV05_MEUR.NSA.P6.ES')
    netherlandsGDP = fetch_series(
        'Eurostat/namq_10_gdp/Q.CLV05_MEUR.NSA.B1GQ.NL')
    netherlandsExports = fetch_series(
        'Eurostat/namq_10_exi/Q.CLV05_MEUR.NSA.P6.NL')

    toSave = (ukGDP, ukExports, franceGDP, franceExports, swedenGDP,
              swedenExports, greeceGDP, greeceExports, luxGDP, luxExports,
              spainGDP, spainExports, netherlandsGDP, netherlandsExports)

    for ukGDP in toSave:
        #yes it's bad form to name my variables this way, but I first
        #had to test to see if this would work for one dataFrame before making
        #a for loop and I didn't wanna go and change every single name
        ukGDP.query("period >= '2010'", inplace=True)
        print("running")
        ukGDP.drop('National accounts indicator (ESA 2010)',
                   axis=1,
                   inplace=True)
        ukGDP.drop('Geopolitical entity (reporting)', axis=1, inplace=True)
        ukGDP.drop('Seasonal adjustment', axis=1, inplace=True)
        ukGDP.drop('Unit of measure', axis=1, inplace=True)
        ukGDP.drop('@frequency', axis=1, inplace=True)
        ukGDP.drop('provider_code', axis=1, inplace=True)
        ukGDP.drop('dataset_code', axis=1, inplace=True)
        ukGDP.drop('dataset_name', axis=1, inplace=True)
        ukGDP.drop('series_code', axis=1, inplace=True)
        ukGDP.drop('series_name', axis=1, inplace=True)
        ukGDP.drop('original_period', axis=1, inplace=True)
        ukGDP.drop('Frequency', axis=1, inplace=True)
        ukGDP.drop('FREQ', axis=1, inplace=True)
        ukGDP.drop('unit', axis=1, inplace=True)
        ukGDP.drop('s_adj', axis=1, inplace=True)
        ukGDP.drop('na_item', axis=1, inplace=True)
        ukGDP.drop('geo', axis=1, inplace=True)
        ukGDP.drop('original_value', axis=1, inplace=True)
    pandas.set_option('display.max_rows', None)
    pandas.set_option('display.max_columns', None)
    print(toSave)
    pickle.dump(toSave, open('euroFundamentalData.pkl', 'wb'))
try:
    import pandas
    import dbnomics
except ImportError:
    print('Need to install pandas and dbnomics -  pip install dbnomics')
    raise



print("""Fetch a single series: Greek nominal GDP.
Got the code from the series webpage.
Code: 'Eurostat/namq_10_gdp/Q.CP_MEUR.SCA.B1GQ.EL'
URL: https://db.nomics.world/Eurostat/namq_10_gdp/Q.CP_MEUR.SCA.B1GQ.EL
""")

df1 = dbnomics.fetch_series('Eurostat/namq_10_gdp/Q.CP_MEUR.SCA.B1GQ.EL')
print('Object returned by fetch_series:')
print('Type:', type(df1))
print('Some of the data')
print(df1.head())
input('Hit Return to continue')
print('Columns of the dataframe:')
print(df1.columns)

print('This is a lot of columns. Useful, but often extraneous for analysis.')
print('The values of the time series:')
print(df1['value'])
print('This is indexed by the dataframe row number. We also need the time axis [period]')
print(df1['period'])
input('Hit any key; attempting to plot')
Exemple #17
0
def getSeries(provider, dataset, df_variable, start_date, end_date, flat_list):
    '''
    Fetch the serie from the website and compute correlation between the series and the variable of interest
    :param provider: Provider of the dataset, eg: AMECO, WTO, NBB, ...
    :param dataset: Name of the dataset
    :param df_variable: Variable of interest in the format date - value
    :param start_date: start date of the variable of interest
    :param end_date: end date of the variable of interest
    :param flat_list: country list
    :return: returns a dataset containing the correlation between the variable of interest and all external variables
             in the dataset
    '''
    try:
        df_series = fetch_series(provider_code=provider,
                                 dataset_code=dataset,
                                 max_nb_series=10000)
        df_series.columns = map(str.lower, df_series.columns)

        if set(df_series['@frequency']) == {'annual'}:
            return pd.DataFrame()
        else:
            pass

        if 'country' in df_series.columns:
            df_series['country'] = df_series['country'].str.lower()
            df_series = df_series[df_series['country'].isin(flat_list)]

        df_mapping = df_series[[
            'provider_code', 'series_code', 'dataset_name', 'series_name'
        ]].drop_duplicates()
        df_series = df_series[[
            'provider_code', 'dataset_code', 'series_code', 'period', 'value',
            '@frequency'
        ]]
        df_series = df_series[(df_series['period'] >= start_date)
                              & (df_series['period'] <= end_date)]

        if not df_series.empty:
            df_variable = df_variable.reset_index(drop=True)
            df_monthly = make_monthly(df_series, start_date, end_date)
            df_all = pd.merge(df_variable, df_monthly, how='outer', on='date')
            df_all = df_all.sort_values('date').reset_index(drop=True)
            df_all = df_all[df_all.count(axis=1) > 1]

            cols = df_all.columns.difference(
                ['date', 'provider_code', 'dataset_code'])
            cols2 = list(set(df_series['series_code']))
            df_all[cols] = df_all[cols].pct_change().fillna(0).replace(
                [np.inf, -np.inf], 0)
            corr = [
                crosscorr(df_all[var_name], df_all[var], i)
                for i in range(0, 13) for var in cols2
            ]

            df_corr = pd.DataFrame(
                data={
                    'series_code': list(np.repeat(cols2, 13)),
                    'Time': list(range(0, 13)) * len(cols2),
                    'Correlation': corr
                })

            df_corr = df_corr[df_corr['Correlation'] > corr_threshold]
            df_all = pd.merge(df_corr,
                              df_mapping,
                              how='left',
                              on='series_code')
            df_indicators_corr = df_all[[
                'provider_code', 'series_code', 'dataset_name', 'series_name',
                'Time', 'Correlation'
            ]]

            df_vals = df_monthly.T
            df_vals.columns = df_vals.iloc[0]

            df_return = pd.merge(df_indicators_corr,
                                 df_vals,
                                 how='left',
                                 left_on='series_code',
                                 right_index=True)

            return df_return
    except:
        logging.info("Dataset {} not parsed in getSeries.".format(dataset))
tmp_list = []
codes = []

for sublist in temp_list:
    for item in sublist:
        tmp_list.append(item)

for i in tmp_list:
    codes.append("IMF/DOT/A.AM.TMG_CIF_USD." + str(i))

dt = pd.DataFrame()
data = pd.DataFrame()

if (os.path.isfile('Homework 4 & 5/Import_Data.csv') == False):
    for i in codes:
        dt = fetch_series(str(i))
        data = data.append(dt)
        dt.iloc[0:0]
        data.to_csv("Homework 4 & 5\Import_Data.csv")

if (os.path.isfile('Homework 4 & 5/Import_Data.csv') == True):
    dtt = pd.read_csv("Homework 4 & 5/Import_Data.csv")

import_data = dtt[[
    "original_value", "original_period", "Counterpart Reference Area"
]]
grouped_data = import_data.groupby(['original_period']).sum()
sorted_data = grouped_data.sort_values(by="original_value", ascending=False)

grouped_country = import_data.groupby(['Counterpart Reference Area']).sum()
sorted_country = grouped_country.sort_values(by='original_value',
Exemple #19
0
def test_fetch_series_with_na_values():
    df = fetch_series('AMECO', 'ZUTN', 'DEU.1.0.0.0.ZUTN')
    assert "NA" in list(df.original_value)
    assert not any(df.original_value.isna())
    assert "NA" not in list(df.value)
    assert any(df.value.isna())