Ejemplo n.º 1
0
    def test_wdi_download_error_handling(self):
        cntry_codes = ['USA', 'XX']
        inds = 'NY.GDP.PCAP.CD'

        with tm.assertRaisesRegexp(ValueError, "Invalid Country Code\\(s\\): XX"):
            result = download(country=cntry_codes, indicator=inds,
                              start=2003, end=2004, errors='raise')

        if PANDAS_0160:
            # assert_produces_warning doesn't exists in prior versions
            with self.assert_produces_warning():
                result = download(country=cntry_codes, indicator=inds,
                                  start=2003, end=2004, errors='warn')
                self.assertTrue(isinstance(result, pd.DataFrame))
                self.assertEqual(len(result), 2)

        cntry_codes = ['USA']
        inds = ['NY.GDP.PCAP.CD', 'BAD_INDICATOR']

        with tm.assertRaisesRegexp(ValueError, "The provided parameter value is not valid\\. Indicator: BAD_INDICATOR"):
            result = download(country=cntry_codes, indicator=inds,
                              start=2003, end=2004, errors='raise')

        if PANDAS_0160:
            with self.assert_produces_warning():
                result = download(country=cntry_codes, indicator=inds,
                                  start=2003, end=2004, errors='warn')
                self.assertTrue(isinstance(result, pd.DataFrame))
                self.assertEqual(len(result), 2)
Ejemplo n.º 2
0
    def test_wdi_download(self):

        # Test a bad indicator with double (US), triple (USA),
        # standard (CA, MX), non standard (KSV),
        # duplicated (US, US, USA), and unknown (BLA) country codes

        # ...but NOT a crash inducing country code (World bank strips pandas
        #    users of the luxury of laziness, because they create their
        #    own exceptions, and don't clean up legacy country codes.
        # ...but NOT a retired indicator (User should want it to error.)

        cntry_codes = ['CA', 'MX', 'USA', 'US', 'US', 'KSV', 'BLA']
        inds = ['NY.GDP.PCAP.CD','BAD.INDICATOR']

        expected = {'NY.GDP.PCAP.CD': {('Canada', '2004'): 31829.522562759001, ('Canada', '2003'): 28026.006013044702,
                                       ('Kosovo', '2004'): 2135.3328465238301, ('Kosovo', '2003'): 1969.56271307405,
                                       ('Mexico', '2004'): 7042.0247834044303, ('Mexico', '2003'): 6601.0420648056606,
                                       ('United States', '2004'): 41928.886136479705, ('United States', '2003'): 39682.472247320402}}
        expected = pd.DataFrame(expected)
        # Round, to ignore revisions to data.
        expected = np.round(expected,decimals=-3)
        if PANDAS_0170:
            expected = expected.sort_index()
        else:
            expected = expected.sort()

        result = download(country=cntry_codes, indicator=inds,
                          start=2003, end=2004, errors='ignore')
        if PANDAS_0170:
            result = result.sort_index()
        else:
            result = result.sort()
        # Round, to ignore revisions to data.
        result = np.round(result, decimals=-3)


        if PANDAS_0140:
            expected.index.names=['country', 'year']
        else:
            # prior versions doesn't allow to set multiple names to MultiIndex
            # Thus overwrite it with the result
            expected.index = result.index
        tm.assert_frame_equal(result, expected)

        # pass start and end as string
        result = download(country=cntry_codes, indicator=inds,
                          start='2003', end='2004', errors='ignore')
        if PANDAS_0170:
            result = result.sort_index()
        else:
            result = result.sort()
        # Round, to ignore revisions to data.
        result = np.round(result, decimals=-3)
        tm.assert_frame_equal(result, expected)
Ejemplo n.º 3
0
    def test_wdi_download(self):

        # Test a bad indicator with double (US), triple (USA),
        # standard (CA, MX), non standard (KSV),
        # duplicated (US, US, USA), and unknown (BLA) country codes

        # ...but NOT a crash inducing country code (World bank strips pandas
        #    users of the luxury of laziness, because they create their
        #    own exceptions, and don't clean up legacy country codes.
        # ...but NOT a retired indicator (User should want it to error.)

        cntry_codes = ['CA', 'MX', 'USA', 'US', 'US', 'KSV', 'BLA']
        inds = ['NY.GDP.PCAP.CD','BAD.INDICATOR']

        expected = {'NY.GDP.PCAP.CD': {('Canada', '2003'): 28026.006013044702, ('Mexico', '2003'): 6601.0420648056606, ('Canada', '2004'): 31829.522562759001, ('Kosovo', '2003'): 1969.56271307405, ('Mexico', '2004'): 7042.0247834044303, ('United States', '2004'): 41928.886136479705, ('United States', '2003'): 39682.472247320402, ('Kosovo', '2004'): 2135.3328465238301}}
        expected = pandas.DataFrame(expected)
        # Round, to ignore revisions to data.
        expected = pandas.np.round(expected,decimals=-3)
        expected.sort(inplace=True)
        result = download(country=cntry_codes, indicator=inds,
                          start=2003, end=2004, errors='ignore')
        result.sort(inplace=True)
        # Round, to ignore revisions to data.
        result = pandas.np.round(result,decimals=-3)
        expected.index = result.index
        assert_frame_equal(result, pandas.DataFrame(expected))
Ejemplo n.º 4
0
def get_wb_df(wb_name,colname):
    """gets a dataframe from wb data with all years and all countries, and a lot of nans"""    
    #return all values
    wb_raw  =(wb.download(indicator=wb_name,start=start_year,end=today_year,country="all"))
    #sensible name for the column
    # wb_raw.rename(columns={wb_raw.columns[0]: colname},inplace=True)
    return wb_raw.rename(columns={wb_raw.columns[0]: colname})
Ejemplo n.º 5
0
    def test_wdi_download_w_retired_indicator(self):

        cntry_codes = ['CA', 'MX', 'US']
        # Despite showing up in the search feature, and being listed online,
        # the api calls to GDPPCKD don't work in their own query builder, nor
        # pandas module.  GDPPCKD used to be a common symbol.
        # This test is written to ensure that error messages to pandas users
        # continue to make sense, rather than a user getting some missing
        # key error, cause their JSON message format changed.  If
        # World bank ever finishes the deprecation of this symbol,
        # this nose test should still pass.

        inds = ['GDPPCKD']

        try:
            result = download(country=cntry_codes, indicator=inds,
                              start=2003, end=2004, errors='ignore')
        # If for some reason result actually ever has data, it's cause WB
        # fixed the issue with this ticker.  Find another bad one.
        except ValueError as e:
            raise nose.SkipTest("No indicators returned data: {0}".format(e))

        # if it ever gets here, it means WB unretired the indicator.
        # even if they dropped it completely, it would still get caught above
        # or the WB API changed somehow in a really unexpected way.
        if len(result) > 0:
            raise nose.SkipTest("Invalid results")
def datasets(dataset):
    if dataset == ('WB'):
        df = wb.download(indicator='NY.GDP.PCAP.KD',
                         country = ['US', 'CA', 'MX'],
                         start = 2000,
                         end = 2015)
        return df
    elif dataset == ('Iris'):
        df = pd.read_csv('https://archive.ics.uci.edu/ml/'
                         'machine-learning-databases/iris/iris.data', header = None)
        return df
Ejemplo n.º 7
0
    def test_wdi_download_monthly(self):
        expected = {'COPPER': {('World', '2012M01'): 8040.47,
                               ('World', '2011M12'): 7565.48,
                               ('World', '2011M11'): 7581.02,
                               ('World', '2011M10'): 7394.19,
                               ('World', '2011M09'): 8300.14,
                               ('World', '2011M08'): 9000.76,
                               ('World', '2011M07'): 9650.46,
                               ('World', '2011M06'): 9066.85,
                               ('World', '2011M05'): 8959.90,
                               ('World', '2011M04'): 9492.79,
                               ('World', '2011M03'): 9503.36,
                               ('World', '2011M02'): 9867.60,
                               ('World', '2011M01'): 9555.70}}
        expected = pd.DataFrame(expected)
        # Round, to ignore revisions to data.
        expected = np.round(expected, decimals=-3)
        if PANDAS_0170:
            expected = expected.sort_index()
        else:
            expected = expected.sort()

        cntry_codes = 'ALL'
        inds = 'COPPER'
        result = download(country=cntry_codes, indicator=inds,
                          start=2011, end=2012, freq='M',errors='ignore')
        if PANDAS_0170:
            result = result.sort_index()
        else:
            result = result.sort()
        result = np.round(result, decimals=-3)

        if PANDAS_0140:
            expected.index.names = ['country', 'year']
        else:
            # prior versions doesn't allow to set multiple names to MultiIndex
            # Thus overwrite it with the result
            expected.index = result.index

        tm.assert_frame_equal(result, expected)

        result = WorldBankReader(inds, countries=cntry_codes,
                                 start=2011, end=2012, freq='M', errors='ignore').read()
        if PANDAS_0170:
            result = result.sort_index()
        else:
            result = result.sort()
        result = np.round(result, decimals=-3)
        tm.assert_frame_equal(result, expected)
Ejemplo n.º 8
0
    def test_wdi_download_w_crash_inducing_countrycode(self):

        cntry_codes = ['CA', 'MX', 'US', 'XXX']
        inds = ['NY.GDP.PCAP.CD']

        try:
            result = download(country=cntry_codes, indicator=inds,
                              start=2003, end=2004, errors='ignore')
        except ValueError as e:
            raise nose.SkipTest("No indicators returned data: {0}".format(e))

        # if it ever gets here, it means the country code XXX got used by WB
        # or the WB API changed somehow in a really unexpected way.
        if len(result) > 0:
            raise nose.SkipTest("Invalid results")
Ejemplo n.º 9
0
def api_wb(params):
    from data_params import DATABASE
    country_codes = mongo_to_dataframe('utilities', 'country_code')

    df = wb.download(
                     indicator=params['indicator'],
                     country=params['country'],
                     start=params['start'],
                     end=params['end']
                     )
    df = df.reset_index()
    df.rename(columns=params['col_rename'], inplace=True)
    df = pd.merge(df,country_codes, left_on='country',right_on='country_name')
    df = df[['GDP_cst_dollars','ISO3','population','year']]

    logger.info('inserting df with shape: ' + str(df.shape))
    dataframe_to_mongo(df, DATABASE,params['collection_name'], erase=True)
    logger.info('insertion sucessful in db' + DATABASE + ' of collection: ' + params['collection_name'])
Ejemplo n.º 10
0
    def test_wdi_download_str(self):

        expected = {'NY.GDP.PCAP.CD': {('Japan', '2004'): 36441.50449394,
                                       ('Japan', '2003'): 33690.93772972,
                                       ('Japan', '2002'): 31235.58818439,
                                       ('Japan', '2001'): 32716.41867489,
                                       ('Japan', '2000'): 37299.64412913}}
        expected = pd.DataFrame(expected)
        # Round, to ignore revisions to data.
        expected = np.round(expected, decimals=-3)
        if PANDAS_0170:
            expected = expected.sort_index()
        else:
            expected = expected.sort()

        cntry_codes = 'JP'
        inds = 'NY.GDP.PCAP.CD'
        result = download(country=cntry_codes, indicator=inds,
                          start=2000, end=2004, errors='ignore')
        if PANDAS_0170:
            result = result.sort_index()
        else:
            result = result.sort()
        result = np.round(result, decimals=-3)

        if PANDAS_0140:
            expected.index.names = ['country', 'year']
        else:
            # prior versions doesn't allow to set multiple names to MultiIndex
            # Thus overwrite it with the result
            expected.index = result.index

        tm.assert_frame_equal(result, expected)

        result = WorldBankReader(inds, countries=cntry_codes,
                                 start=2000, end=2004, errors='ignore').read()
        if PANDAS_0170:
            result = result.sort_index()
        else:
            result = result.sort()
        result = np.round(result, decimals=-3)
        tm.assert_frame_equal(result, expected)
Ejemplo n.º 11
0
    def test_wdi_download_quarterly(self):
        expected = {'DT.DOD.PUBS.CD.US': {('Albania', '2012Q1'): 3240539817.18,
                                          ('Albania', '2011Q4'): 3213979715.15,
                                          ('Albania', '2011Q3'): 3187681048.95,
                                          ('Albania', '2011Q2'): 3248041513.86,
                                          ('Albania', '2011Q1'): 3137210567.92}}
        expected = pd.DataFrame(expected)
        # Round, to ignore revisions to data.
        expected = np.round(expected, decimals=-3)
        if PANDAS_0170:
            expected = expected.sort_index()
        else:
            expected = expected.sort()

        cntry_codes = 'ALB'
        inds = 'DT.DOD.PUBS.CD.US'
        result = download(country=cntry_codes, indicator=inds,
                          start=2011, end=2012, freq='Q', errors='ignore')
        if PANDAS_0170:
            result = result.sort_index()
        else:
            result = result.sort()
        result = np.round(result, decimals=-3)

        if PANDAS_0140:
            expected.index.names = ['country', 'year']
        else:
            # prior versions doesn't allow to set multiple names to MultiIndex
            # Thus overwrite it with the result
            expected.index = result.index

        tm.assert_frame_equal(result, expected)

        result = WorldBankReader(inds, countries=cntry_codes,
                                 start=2011, end=2012, freq='Q', errors='ignore').read()
        if PANDAS_0170:
            result = result.sort_index()
        else:
            result = result.sort()
        result = np.round(result, decimals=-1)
        tm.assert_frame_equal(result, expected)
Ejemplo n.º 12
0
    def __init__(self, indicator):
        """Initiates with data from 2017-today"""
        self.indicator = indicator
        # Build cache if it does not already exist
        engine = create_engine('sqlite:///cache.db')
        conn = engine.connect()
        if engine.dialect.has_table(engine, self.indicator):
            self.data = pandas.read_sql(self.indicator,
                                        conn,
                                        index_col=['country', 'year'])
        else:
            self.data = wb.download(indicator=self.indicator,
                                    country='all',
                                    start=1960,
                                    end=2030).dropna()

            self.data.to_sql(self.indicator, conn, if_exists='replace')
        conn.close()

        self.start_date = self.data.unstack().columns[0][1]
        self.end_date = self.data.unstack().columns[-1][1]
Ejemplo n.º 13
0
    def test_wdi_download_quarterly(self):
        code = "DT.DOD.PUBS.CD.US"
        expected = {
            code: {
                ("Albania", "2012Q1"): 3240539817.18,
                ("Albania", "2011Q4"): 3213979715.15,
                ("Albania", "2011Q3"): 3187681048.95,
                ("Albania", "2011Q2"): 3248041513.86,
                ("Albania", "2011Q1"): 3137210567.92,
            }
        }
        expected = pd.DataFrame(expected)
        # Round, to ignore revisions to data.
        expected = np.round(expected, decimals=-3)
        expected = expected.sort_index()
        cntry_codes = "ALB"
        inds = "DT.DOD.PUBS.CD.US"
        result = download(
            country=cntry_codes,
            indicator=inds,
            start=2011,
            end=2012,
            freq="Q",
            errors="ignore",
        )
        result = result.sort_index()
        result = np.round(result, decimals=-3)

        expected.index.names = ["country", "year"]
        tm.assert_frame_equal(result, expected)

        result = WorldBankReader(inds,
                                 countries=cntry_codes,
                                 start=2011,
                                 end=2012,
                                 freq="Q",
                                 errors="ignore").read()
        result = result.sort_index()
        result = np.round(result, decimals=-1)
        tm.assert_frame_equal(result, expected)
Ejemplo n.º 14
0
    def test_wdi_download(self):

        # Test a bad indicator with double (US), triple (USA),
        # standard (CA, MX), non standard (KSV),
        # duplicated (US, US, USA), and unknown (BLA) country codes

        # ...but NOT a crash inducing country code (World bank strips pandas
        #    users of the luxury of laziness, because they create their
        #    own exceptions, and don't clean up legacy country codes.
        # ...but NOT a retired indicator (User should want it to error.)

        cntry_codes = ['CA', 'MX', 'USA', 'US', 'US', 'KSV', 'BLA']
        inds = ['NY.GDP.PCAP.CD', 'BAD.INDICATOR']

        expected = {
            'NY.GDP.PCAP.CD': {
                ('Canada', '2003'): 28026.006013044702,
                ('Mexico', '2003'): 6601.0420648056606,
                ('Canada', '2004'): 31829.522562759001,
                ('Kosovo', '2003'): 1969.56271307405,
                ('Mexico', '2004'): 7042.0247834044303,
                ('United States', '2004'): 41928.886136479705,
                ('United States', '2003'): 39682.472247320402,
                ('Kosovo', '2004'): 2135.3328465238301
            }
        }
        expected = pandas.DataFrame(expected)
        # Round, to ignore revisions to data.
        expected = pandas.np.round(expected, decimals=-3)
        expected.sort(inplace=True)
        result = download(country=cntry_codes,
                          indicator=inds,
                          start=2003,
                          end=2004,
                          errors='ignore')
        result.sort(inplace=True)
        # Round, to ignore revisions to data.
        result = pandas.np.round(result, decimals=-3)
        expected.index = result.index
        assert_frame_equal(result, pandas.DataFrame(expected))
Ejemplo n.º 15
0
def featureappnd(ind,nm,ft):
    clmns.append(nm)
    tmpdt=wb.download(country=u"all", indicator=ind,start=strtyear, end=now.year)
    tmpdt.columns=[nm]
    tmpdt['Country']=[i[0] for i in tmpdt.index]
    tmpdt['Year']=[i[1] for i in tmpdt.index]
    tmpdt['Year']=tmpdt['Year'].apply(int)
    tmpdt =tmpdt.groupby("Country").transform(lambda x: x.iloc[::-1])
    tmpdt['Country']=[i[0] for i in tmpdt.index]
    tmpdt=tmpdt[["Country","Year",nm]]
    tmpdt=tmpdt[658:]
    tmpdt[nm]=tmpdt[nm].replace(0, np.nan)
    if ft=="reg":
        tmpdt[nm] = tmpdt.groupby("country")[nm].transform(lambda x: x.fillna(lnreg(x,tmpdt['Year']-strtyear))) 
    elif ft=="mean":
        tmpdt[nm] = tmpdt.groupby("country")[nm].transform(lambda x: x.fillna(x.mean()))
    elif ft=="sdp":
        tmpdt[nm] = tmpdt.groupby("country")[nm].transform(lambda x: x.fillna(x.mean()+(tmpdt["Year"]-2010)*0.5*np.std(x)))
    elif ft=="sdn":
        tmpdt[nm] = tmpdt.groupby("country")[nm].transform(lambda x: x.fillna(x.mean()-(tmpdt["Year"]-2010)*0.5*np.std(x)))
    global maindata
    maindata=pd.merge(maindata,tmpdt)
Ejemplo n.º 16
0
    def test_wdi_download_quarterly(self):
        code = 'DT.DOD.PUBS.CD.US'
        expected = {
            code: {
                ('Albania', '2012Q1'): 3240539817.18,
                ('Albania', '2011Q4'): 3213979715.15,
                ('Albania', '2011Q3'): 3187681048.95,
                ('Albania', '2011Q2'): 3248041513.86,
                ('Albania', '2011Q1'): 3137210567.92
            }
        }
        expected = pd.DataFrame(expected)
        # Round, to ignore revisions to data.
        expected = np.round(expected, decimals=-3)
        expected = expected.sort_index()
        cntry_codes = 'ALB'
        inds = 'DT.DOD.PUBS.CD.US'
        result = download(country=cntry_codes,
                          indicator=inds,
                          start=2011,
                          end=2012,
                          freq='Q',
                          errors='ignore')
        result = result.sort_index()
        result = np.round(result, decimals=-3)

        expected.index.names = ['country', 'year']
        tm.assert_frame_equal(result, expected)

        result = WorldBankReader(inds,
                                 countries=cntry_codes,
                                 start=2011,
                                 end=2012,
                                 freq='Q',
                                 errors='ignore').read()
        result = result.sort_index()
        result = np.round(result, decimals=-1)
        tm.assert_frame_equal(result, expected)
Ejemplo n.º 17
0
    def test_wdi_download_str(self):

        # These are the expected results, rounded (robust against
        # data revisions in the future).
        expected = {
            "NY.GDP.PCAP.CD": {
                ("Japan", "2004"): 38000.0,
                ("Japan", "2003"): 35000.0,
                ("Japan", "2002"): 32000.0,
                ("Japan", "2001"): 34000.0,
                ("Japan", "2000"): 39000.0,
            }
        }
        expected = pd.DataFrame(expected)
        expected = expected.sort_index()

        cntry_codes = "JP"
        inds = "NY.GDP.PCAP.CD"
        result = download(country=cntry_codes,
                          indicator=inds,
                          start=2000,
                          end=2004,
                          errors="ignore")
        result = result.sort_index()
        result = np.round(result, decimals=-3)

        expected.index.names = ["country", "year"]
        tm.assert_frame_equal(result, expected)

        result = WorldBankReader(inds,
                                 countries=cntry_codes,
                                 start=2000,
                                 end=2004,
                                 errors="ignore").read()
        result = result.sort_index()
        result = np.round(result, decimals=-3)
        tm.assert_frame_equal(result, expected)
Ejemplo n.º 18
0
def to_flourish(indicator, start_yr, end_yr, country='all', save_csv=True):
    """
    Downloads data from the World Bank and converts it to the format for making bar chart races in Flourish.
    Parameters:
    * indicator: the world bank code available on the World Bank Page.
    * country: a string if single or a list if multiple of the ISO3 codes of the locations.
    * start_yr: the first year of data that you want to get.
    * end_yr: the final year of data that you want to collect.
    * save_csv: saves the file as a csv in your working directory.
    """
    df = wb.download(indicator=indicator,
                     country=country,
                     start=start_yr,
                     end=end_yr)
    df = df.reset_index()
    df = pd.pivot_table(df, values=indicator, columns='year',
                        index='country').reset_index()
    print("Processed the Indicator Data")

    country_info = wb.get_countries()
    country_info = country_info[country_info.region != 'Aggregates']

    df_merged = pd.merge(country_info[['iso2c', 'name', 'region']],
                         df,
                         left_on='name',
                         right_on='country')
    df_merged.insert(
        3, 'Image URL', df_merged['iso2c'].apply(
            lambda i: f"https://www.countryflags.io/{i}/flat/64.png"))
    df_merged = df_merged.drop(columns=['iso2c', 'country'])

    if save_csv:
        df_merged.to_csv(
            f"flourish_data/flourish_{indicator}_{datetime.now().strftime('%d-%m-%Y %H-%M')}.csv",
            index=False)

    return df_merged
Ejemplo n.º 19
0
    def test_wdi_download_str(self):

        # These are the expected results, rounded (robust against
        # data revisions in the future).
        expected = {
            'NY.GDP.PCAP.CD': {
                ('Japan', '2004'): 38000.0,
                ('Japan', '2003'): 35000.0,
                ('Japan', '2002'): 32000.0,
                ('Japan', '2001'): 34000.0,
                ('Japan', '2000'): 39000.0
            }
        }
        expected = pd.DataFrame(expected)
        expected = expected.sort_index()

        cntry_codes = 'JP'
        inds = 'NY.GDP.PCAP.CD'
        result = download(country=cntry_codes,
                          indicator=inds,
                          start=2000,
                          end=2004,
                          errors='ignore')
        result = result.sort_index()
        result = np.round(result, decimals=-3)

        expected.index.names = ['country', 'year']
        tm.assert_frame_equal(result, expected)

        result = WorldBankReader(inds,
                                 countries=cntry_codes,
                                 start=2000,
                                 end=2004,
                                 errors='ignore').read()
        result = result.sort_index()
        result = np.round(result, decimals=-3)
        tm.assert_frame_equal(result, expected)
Ejemplo n.º 20
0
    def test_wdi_download_error_handling(self):
        cntry_codes = ["USA", "XX"]
        inds = "NY.GDP.PCAP.CD"

        msg = "Invalid Country Code\\(s\\): XX"
        with pytest.raises(ValueError, match=msg):
            download(
                country=cntry_codes,
                indicator=inds,
                start=2003,
                end=2004,
                errors="raise",
            )

        with pytest.warns(Warning):
            result = download(country=cntry_codes,
                              indicator=inds,
                              start=2003,
                              end=2004,
                              errors="warn")
            assert isinstance(result, pd.DataFrame)
            assert len(result), 2

        cntry_codes = ["USA"]
        inds = ["NY.GDP.PCAP.CD", "BAD_INDICATOR"]

        msg = "The provided parameter value is not valid\\. " "Indicator: BAD_INDICATOR"
        with pytest.raises(ValueError, match=msg):
            download(
                country=cntry_codes,
                indicator=inds,
                start=2003,
                end=2004,
                errors="raise",
            )

        with pytest.warns(Warning):
            result = download(country=cntry_codes,
                              indicator=inds,
                              start=2003,
                              end=2004,
                              errors="warn")
            assert isinstance(result, pd.DataFrame)
            assert len(result) == 2
Ejemplo n.º 21
0
def chart1(indicator1, countryCode, startY, endY):
    import matplotlib
    from io import BytesIO
    import base64
    import pandas as pd
    pd.core.common.is_list_like = pd.api.types.is_list_like
    from pandas_datareader import wb
    matplotlib.use("agg")
    import matplotlib.pyplot as plt

    # mathces = wb.search('gdp.*capita.*const')

    dat = wb.download(indicator=indicator1,
                      country=countryCode,
                      start=startY,
                      end=endY)

    data = dat.unstack()
    print(data)
    data.plot(kind='bar')

    sio = BytesIO()
    plt.savefig(sio, format='png')
    data = base64.encodebytes(sio.getvalue()).decode()
    html = '''
       <html>
           <body>
               <img src="data:image/png;base64,{}" />
           </body>
        <html>
    '''
    picture = "data:image/png;base64," + data
    #print(picture)
    plt.close()
    #return html.format(data)
    return render_template("home.html", picture=picture)
Ejemplo n.º 22
0
def get_wb_data(indicator, start_yr, end_yr, country='all', save_csv=False):
    """
    Downloads and formats World Bank Data with year as index.
    Parameters:
    * indicator: the world bank code available on the World Bank Page.
    * start_yr: the first year of data that you want to get.
    * end_yr: the final year of data that you want to collect.
    * country: a string if single or a list if multiple of the ISO3 codes of the locations.
    * save_csv: saves the file as a csv in your working directory.
    """
    global indicator_name
    indicator_name = indicator
    temp_df = wb.download(indicator=indicator,
                          country=country,
                          start=start_yr,
                          end=end_yr)
    temp_df = temp_df.dropna()
    temp_df.index.names = ['Region', 'Year']
    temp_df = temp_df.reset_index(level=0).sort_values(by='Region')
    if save_csv:
        temp_df.to_csv(
            f"{indicator}_{datetime.now().strftime('%d-%m-%Y %H-%M')}.csv",
            index=False)
    return temp_df
Ejemplo n.º 23
0
    def test_wdi_download_error_handling(self):
        cntry_codes = ['USA', 'XX']
        inds = 'NY.GDP.PCAP.CD'

        msg = "Invalid Country Code\\(s\\): XX"
        with assert_raises_regex(ValueError, msg):
            download(country=cntry_codes,
                     indicator=inds,
                     start=2003,
                     end=2004,
                     errors='raise')

        with tm.assert_produces_warning():
            result = download(country=cntry_codes,
                              indicator=inds,
                              start=2003,
                              end=2004,
                              errors='warn')
            assert isinstance(result, pd.DataFrame)
            assert len(result), 2

        cntry_codes = ['USA']
        inds = ['NY.GDP.PCAP.CD', 'BAD_INDICATOR']

        msg = ("The provided parameter value is not valid\\. "
               "Indicator: BAD_INDICATOR")
        with assert_raises_regex(ValueError, msg):
            download(country=cntry_codes,
                     indicator=inds,
                     start=2003,
                     end=2004,
                     errors='raise')

        with tm.assert_produces_warning():
            result = download(country=cntry_codes,
                              indicator=inds,
                              start=2003,
                              end=2004,
                              errors='warn')
            assert isinstance(result, pd.DataFrame)
            assert len(result) == 2
Ejemplo n.º 24
0
import matplotlib.pyplot as plt
import numpy as np
from pandas_datareader import wb

path = "https://github.com/omercadopopular/cgoes/blob/master/tutorial/python/statatopython/PPI_DB_082316.dta?raw=true"
cpisauce = "https://github.com/omercadopopular/cgoes/blob/master/tutorial/python/statatopython/CPIAUCSL.xls?raw=true"
gdpsauce = "https://github.com/omercadopopular/cgoes/blob/master/tutorial/python/statatopython/gdp.xlsx?raw=true"

#####################################
# 1. Retrieve Databases #############
#####################################

## 1.1 Import GDP data from the World Bank

wbdata = (wb.download(indicator='NY.GDP.MKTP.CD', country='all', start=1994, end=2015)
            .dropna()
            .rename(columns={'NY.GDP.MKTP.CD': 'gdp'})
            )

## 1.2 Read file from STATA dta

ppidf = pd.read_stata(path)

## 1.3 Import CPI data from excel file
    ## Note you have to skip 9 rows

cpi = pd.read_excel(cpisauce, skiprows=9, header=1)

#####################################
# 2. Adjust Databases ###############
#####################################
Ejemplo n.º 25
0
import pandas as pd, datetime
from pandas_datareader import data, wb

dat = wb.download(indicator=['SL.UEM.TOTL.ZS','NY.GDP.DEFL.KD.ZG','NE.RSB.GNFS.ZS'], country=['USA', 'TUR','GBR'], start=1970, end=2016)
dat.to_csv('data.csv')

# start=datetime.datetime(1970, 1, 1)
# end=datetime.datetime(2016, 1, 1)
# df = data.DataReader(['BPBLTT01TRA188S','LRUN64TTTRQ156S','FPCPITOTLZGTUR'], "fred", start, end)
# df.columns = ['tbal','unemploy','inf']
# df.to_csv('tr.csv')

# start=datetime.datetime(1970, 1, 1)
# end=datetime.datetime(2016, 1, 1)
# df = data.DataReader(['BPBLTT01GBQ188S','LMUNRRTTGBM156S','FPCPITOTLZGGBR'], "fred", start, end)
# df.columns = ['tbal','unemploy','inf']
# df.to_csv('uk.csv')

Ejemplo n.º 26
0
def download_data(year):
    ind = ['SH.STA.ACSN', 'SE.PRM.CMPT.FE.ZS']
    dat = wb.download(indicator=ind, country='all', start=year, end=year).dropna()
    dat.columns = ['sanitation', 'completion']
    return dat
Ejemplo n.º 27
0

from pandas_datareader import data, wb
#import wbdata
import pandas
import matplotlib.pyplot as plt

# #set up the countries I want
# countries = ["CL","UY","HU"]
#
# #set up the indicator I want (just build up the dict if you want more than one)
# indicators = {'SP.DYN.LE00.IN':'Life expectancy at birth, total (years)'}
#
# #grab indicators above for countires above and load into data frame
# df = wbdata.get_dataframe(indicators, convert_date=False)
# #wbdata.get_dataframe
# #df is "pivoted", pandas' unstack fucntion helps reshape it into something plottable
# dfu = df.unstack(level=0)
#
# # a simple matplotlib plot with legend, labels and a title
# dfu.plot();
# plt.legend(loc='best');
# plt.title("GNI Per Capita ($USD, Atlas Method)");
# plt.xlabel('Date'); plt.ylabel('GNI Per Capita ($USD, Atlas Method');


ind = ['SP.DYN.LE00.IN']
#countries = ['iso2c']
dat = wb.download(indicator=ind, country='all', start=2013, end=2013).dropna()
dat.columns = ['cellphone']
print(dat)
Ejemplo n.º 28
0
life = pd.read_csv('subsaharan_africa.csv', index_col=3, na_values=None)
jk = pd.read_csv('subsaharan_africa.csv')

# <codecell>

life.columns


# Get the external dataset from worldbank
#  We have selected indicator, "SP.POP.TOTL"
df = wb.download(
                    # Specify indicator to retrieve
                    indicator='SP.POP.TOTL',
                    country=['all'],
                    # Start Year
                    start='2008',
                    # End Year
                    end=2016
                )
# <codecell>

#create an array with all the years
all_year = [str(x) for x in range(1960, 2000)]
#Check what's inside all_year
print all_year.count

# <codecell>

#drop all columns with no values
#life = life.dropna(axis=0)
Ejemplo n.º 29
0
def p148(steps):
    countries = [
        'BR',
        'CA',
        'CN',
        'FR',
        'DE',
        'IN',
        'IL',
        'JP',
        'SA',
        'GB',
        'US',
    ]
    dat = wb.download(indicator='NY.GDP.PCAP.KD',
                      country=countries,
                      start=1970,
                      end=2016)
    df = dat.unstack().T
    df.index = df.index.droplevel(0).astype(int)

    class Net(torch.nn.Module):
        def __init__(self, input_size, hidden_size):
            super(Net, self).__init__()
            self.rnn = torch.nn.LSTM(input_size, hidden_size)
            self.fc = torch.nn.Linear(hidden_size, 1)

        def forward(self, x):
            x = x[:, :, None]
            x, _ = self.rnn(x)
            x = self.fc(x)
            x = x[:, :, 0]
            return x

    net = Net(input_size=1, hidden_size=5)

    # 数据归一化
    df_scaled = df / df.loc[2000]

    # 确定训练集和测试集
    years = df.index
    train_seq_len = sum((years >= 1971) & (years <= 2000))
    test_seq_len = sum(years > 2000)
    print('训练集长度 = {}, 测试集长度 = {}'.format(train_seq_len, test_seq_len))

    # 确定训练使用的特征和标签
    inputs = torch.tensor(df_scaled.iloc[:-1].values, dtype=torch.float32)
    labels = torch.tensor(df_scaled.iloc[1:].values, dtype=torch.float32)

    # 训练网络
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(net.parameters())
    for step in range(steps):
        if step:
            optimizer.zero_grad()
            train_loss.backward()
            optimizer.step()

        preds = net(inputs)
        train_preds = preds[:train_seq_len]
        train_labels = labels[:train_seq_len]
        train_loss = criterion(train_preds, train_labels)

        test_preds = preds[train_seq_len:]
        test_labels = labels[train_seq_len:]
        test_loss = criterion(test_preds, test_labels)

        if step % 500 == 0:
            print('第{}次迭代: loss (训练集) = {}, loss (测试集) = {}'.format(
                step, train_loss, test_loss))

    preds = net(inputs)
    df_pred_scaled = pd.DataFrame(preds.detach().numpy(),
                                  index=years[1:],
                                  columns=df.columns)
    df_pred = df_pred_scaled * df.loc[2000]
    print(df_pred.loc[2001:])
Ejemplo n.º 30
0
from pandas_datareader import wb
import matplotlib.pyplot as plt

mathces = wb.search('gni.*capita.*const')
 
#grab indicator,country, period I want and load into data frame
df = wb.download(indicator='NY.GNP.PCAP.CD', country=['CL', 'UY', 'HU'], start=1990, end=2010)

#df is "pivoted", pandas' unstack fucntion helps reshape it into something plottable
dfu = df.unstack(level=0)

# a simple matplotlib plot with legend, labels and a title
dfu.plot(); 
plt.legend(loc='best'); 
plt.title("GNI Per Capita ($USD, Atlas Method)"); 
plt.xlabel('Date'); plt.ylabel('GNI Per Capita ($USD, Atlas Method');
plt.show();
Ejemplo n.º 31
0
#Import packages:
from pandas_datareader import wb
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib_venn import venn2

#Import GDP data from World bank:
gdps_wb = wb.download(indicator='NY.GDP.PCAP.KD',
                      country=['US'],
                      start=1990,
                      end=2018)
gdps_wb = gdps_wb.rename(columns={'NY.GDP.PCAP.KD': 'gdp'})
gdps_wb = gdps_wb.reset_index()
gdps_wb.year = gdps_wb.year.astype(int)
gdps_wb.head(10)

gdpgrowth_wb = wb.download(indicator='NY.GDP.MKTP.KD.ZG',
                           country=['US'],
                           start=1990,
                           end=2018)
gdpgrowth_wb = gdpgrowth_wb.rename(columns={'NY.GDP.MKTP.KD.ZG': 'gdp_growth'})
gdpgrowth_wb = gdpgrowth_wb.reset_index()
gdpgrowth_wb.year = gdpgrowth_wb.year.astype(int)
gdpgrowth_wb.head(10)

#Import unemployment data from excel file:
unempl = pd.read_excel('Data.xlsx')
print(unempl)

#Change type:
Ejemplo n.º 32
0
        "St. Vincent and the Grenadines":"Saint Vincent and the Grenadines",
        "Congo, Rep.":"Republic of the Congo",
        "Bahamas, The":"The Bahamas",
        "Gambia, The":"The Gambia"
    }
    for t in trans :
        s["Country/Region"] = s["Country/Region"].replace(t, trans[t])
    return(s)


if __name__ == "__main__":
    dsets = datasets.load()
    covid = datasets.combine(dsets)
    if (os.path.isfile(WDI_FILE)) :
        warnings.warn("Reading cached WDI data from disk, delete file to download updated")
        wdi = pd.read_pickle(WDI_FILE)
    else :
        wdi = covid.drop(columns=["Date","Province/State","Lat","Long", datasets.CONFIRMED,"deaths","recoveries"]).drop_duplicates()
        for id in INDICES_USED:
            s = wb.download(indicator=id, country="all", start=2005, end=2019).reset_index()

            # use most recent non missing value
            s = s.dropna().groupby("country").last()
            s = s.drop(columns="year").reset_index()

            # match country names to covid data
            s = s.rename(columns={"country":"Country/Region"})
            s = fixcountrynames(s)
            wdi = pd.merge(wdi, s, how='left', on='Country/Region', validate="one_to_one")
        wdi.to_pickle(WDI_FILE)
Ejemplo n.º 33
0
import pandas as pd
from pandas_datareader import wb

import torch
import torch.nn
import torch.optim

countries = ['BR', 'CA', 'CN', 'FR', 'DE', 'IN', 'IL', 'JP', 'SA', 'GB', 'US',]
dat = wb.download(indicator='NY.GDP.PCAP.KD',
        country=countries, start=1970, end=2016)
df = dat.unstack().T
df.index = df.index.droplevel(0).astype(int)
print(df)

class Net(torch.nn.Module):

    def __init__(self, input_size, hidden_size):
        super(Net, self).__init__()
        self.rnn = torch.nn.LSTM(input_size, hidden_size)
        self.fc = torch.nn.Linear(hidden_size, 1)

    def forward(self, x):
        x = x[:, :, None]
        x, _ = self.rnn(x)
        x = self.fc(x)
        x = x[:, :, 0]
        return x


net = Net(input_size=1, hidden_size=5)
print(net)
####DATA PROJECT####

###Data Cleaning and Structuring###
## Set up
import pandas as pd
import numpy as np
import pandas_datareader
import datetime
from pandas_datareader import wb

##Downloand Data from the Wold Data Bank 
countries = ["CN","JP","BR","US","DK","ES","TM","IN","NG"]
indicators = {"NY.GDP.PCAP.KD":"GDP per capita", "NY.GDP.MKTP.CD":"GDP(current US $)", "SP.POP.TOTL":"Population total", 
              "SP.URB.TOTL.IN.ZS":"Urban Population in %", "SP.DYN.TFRT.IN":"Fertility Rate", "SE.ADT.LITR.ZS": "Literacy rate, adult total in %" }
data_wb = wb.download(indicator= indicators, country= countries, start=1990, end=2017)
data_wb = data_wb.rename(columns = {"NY.GDP.PCAP.KD":"gdp_pC","NY.GDP.MKTP.CD":"gdp", "SP.POP.TOTL":"pop", "SP.URB.TOTL.IN.ZS":"urban_pop%", 
                                    "SP.DYN.TFRT.IN":"frt", "SE.ADT.LITR.ZS":"litr"})
data_wb = data_wb.reset_index()
data_wb.head(-5)             

writer = pd.ExcelWriter('pandas_simple.xlsx', engine='xlsxwriter')
data_wb.to_excel(r"./data_wb1.xlsx")

##Overview of the data 
data_wb.dtypes

pd.options.display.float_format = '{:,}'.format

round(data_wb.head(),2)

data_wb['gdp_in_bil'] = data_wb['gdp']/1000000000
Ejemplo n.º 35
0
# In[23]:

df1.Price.mean()


# In[27]:

get_ipython().system("pip3 install --upgrade pandas_datareader")
from pandas_datareader import data, wb

df_wb = wb.download(
    # Specify indicator to retrieve
    indicator="SP.POP.TOTL",
    country=["all"],
    # Start Year
    start="2008",
    # End Year
    end=2016,
)


# In[28]:

df_wb.shape


# In[29]:

df_wb.head()
Ejemplo n.º 36
0
def get_wb(wb_name):
    """return unstacked dataframe (countries, year) with WB data"""
    return wb.download(indicator=wb_name,start=start_year,end=today_year,country="all").unstack("year")[wb_name].dropna(how="all").dropna(how="all",axis=1)
raw_unstacked_data = raw_data.unstack(level=0)

# printing our data object
# print(raw_data)
# print(raw_unstacked_data)

# =============================================================================
# # API method 2: using from pandas.datareader import wb, convert the data object to a DataFrame 
# =============================================================================

# view all data
pd.set_option('display.max_columns', 15) 
pd.set_option('display.max_rows', 15) 

df1 = wb.download(indicator = indicators, country = countries,  start = 2008, end = 2018)
date_period = [i for i in range(2008, 2019)]
print(df1)

# create a new DataFrame df2 for later use, not change origin values from df1 if we do some calculations for our dataframe df2
# rename the columns name
df2 = df1.rename(columns = {'SI.DST.05TH.20':'Income share held by highest 20%', 'SI.DST.FRST.20': 'Income share held by lowest 20%', \
             'SL.EMP.TOTL.SP.FE.NE.ZS': 'Employment to population ratio, 15+, female (%) (national estimate)',\
             'SL.EMP.TOTL.SP.MA.NE.ZS': 'Employment to population ratio, 15+, male (%) (national estimate)'}, inplace = False)

# overview our data object DataFrame
# Data manipulation: dealing with the missing value, replace them as mean(), which has less impact on our data sets
df2.mean()
df2.fillna(df2.mean(), inplace = True)
print(df2)
# -*- coding: utf-8 -*-
"""
Created on Wed Feb 24 14:07:46 2016

@author: anh
"""

import pandas as pd
from pandas_datareader import wb
from ggplot import *

dat = wb.download(
    indicator=['BX.KLT.DINV.CD.WD', 'BX.KLT.DINV.WD.GD.ZS'],
    country='CN', start=2005, end=2011)
dat.reset_index(inplace=True)
dat['year'] = pd.to_datetime(dat['year']) # key

print ggplot(aes(x='year', y='BX.KLT.DINV.CD.WD'),
       data=dat) + \
    geom_line() + theme_bw() + \
    scale_x_date(labels = date_format("%m - %Y"))

cdat

ggplot(dat.loc["China"], aes(x='year', y='BX.KLT.DINV.CD.WD')) + \
    geom_point()

#pip install linearmodels #Run in terminal
from linearmodels import PanelOLS
import numpy as np
import matplotlib.pyplot as plt
import pandas_datareader
from pandas_datareader import wb
import seaborn as sns

###### a. Downloading inflation and unemployment data from World Bank ######
cntr_eu = ['DK', 'SE', 'FR', 'NL', 'DE', 'GB', 'BE', 'LU', 'AT',
           'FI']  # Subset of countries affected by ECB's QE
cntr_other = ['CA', 'CH', 'AU', 'NZ', 'SG', 'NO', 'US', 'JP',
              'KR']  # Subset of countries not affected by ECB's QE

infl_eu = wb.download(indicator='FP.CPI.TOTL.ZG',
                      country=cntr_eu,
                      start=1991,
                      end=2017)
infl_other = wb.download(indicator='FP.CPI.TOTL.ZG',
                         country=cntr_other,
                         start=1991,
                         end=2017)

unem_eu = wb.download(indicator='SL.UEM.TOTL.ZS',
                      country=cntr_eu,
                      start=1991,
                      end=2017)
unem_other = wb.download(indicator='SL.UEM.TOTL.ZS',
                         country=cntr_other,
                         start=1991,
                         end=2017)
Ejemplo n.º 40
0
df1.Price

# In[23]:

df1.Price.mean()

# In[27]:

get_ipython().system('pip3 install --upgrade pandas_datareader')
from pandas_datareader import data, wb

df_wb = wb.download(
    # Specify indicator to retrieve
    indicator='SP.POP.TOTL',
    country=['all'],
    # Start Year
    start='2008',
    # End Year
    end=2016)

# In[28]:

df_wb.shape

# In[29]:

df_wb.head()

# In[ ]:

# In[36]:
Ejemplo n.º 41
0
# -*- coding: utf-8 -*-
"""
Created on Fri Oct 23 12:13:00 2020

@author: howard

美国、中国、日本近二十年人均GDP对比图
"""

from pandas_datareader import wb
import matplotlib.pyplot as plt

dat = wb.download(indicator='NY.GDP.PCAP.KD',
                  country=['US', 'CN', 'JP'],
                  start=2001,
                  end=2021)
dat2draw = dat.unstack(level=0)

plt.figure(figsize=(10, 4))
plt.plot(dat2draw.iloc[:, 0], 'r-', label="China")
plt.plot(dat2draw.iloc[:, 1], 'b-*', label="Japan")
plt.plot(dat2draw.iloc[:, 2], 'g--', label="USA")
plt.title("PER CAPITA GDP ($)", fontsize=20)
plt.legend()
plt.pause(0)
Ejemplo n.º 42
0
import matplotlib.pyplot as plt
import numpy as np
from pandas_datareader import wb

path = "https://github.com/omercadopopular/cgoes/blob/master/tutorial/python/statatopython/PPI_DB_082316.dta?raw=true"
cpisauce = "https://github.com/omercadopopular/cgoes/blob/master/tutorial/python/statatopython/CPIAUCSL.xls?raw=true"
gdpsauce = "https://github.com/omercadopopular/cgoes/blob/master/tutorial/python/statatopython/gdp.xlsx?raw=true"

#####################################
# 1. Retrieve Databases #############
#####################################

## 1.1 Import GDP data from the World Bank

wbdata = (wb.download(
    indicator='NY.GDP.MKTP.CD', country='all', start=1994,
    end=2015).dropna().rename(columns={'NY.GDP.MKTP.CD': 'gdp'}))

## 1.2 Read file from STATA dta

ppidf = pd.read_stata(path)

## 1.3 Import CPI data from excel file
## Note you have to skip 9 rows

cpi = pd.read_excel(cpisauce, skiprows=9, header=1)

#####################################
# 2. Adjust Databases ###############
#####################################
Ejemplo n.º 43
0
import warnings

warnings.simplefilter('ignore', FutureWarning)

from pandas_datareader import wb
import matplotlib.pyplot as plt

df = wb.download(indicator='SP.POP.TOTL', country=['JP', 'US'],
                 start=1960, end=2014)
print(df)
#                     SP.POP.TOTL
# country       year             
# Japan         2014    127276000
#               2013    127445000
#               2012    127629000
#               2011    127833000
#               2010    128070000
# ...                         ...
# United States 1964    191889000
#               1963    189242000
#               1962    186538000
#               1961    183691000
#               1960    180671000
# 
# [110 rows x 1 columns]

df2 = df.unstack(level=0)
print(df2.head())
#         SP.POP.TOTL              
# country       Japan United States
# year                             
Ejemplo n.º 44
0
#---- ch05/pandas-fred-ma
price['ma3'] = price.inflation.rolling(3, center=True).mean()
price['ma9'] = price.inflation.rolling(9, center=True).mean()
price


#---- ch05/pandas-fred-ma-plot/plot
ax = price.inflation.plot(alpha=0.2)
price[['ma3', 'ma9']].plot(ax = ax)
plt.show()


#---- ch05/wb-gdp
from pandas_datareader import wb
gdp = wb.download(indicator='NY.GDP.PCAP.CD', country='all',
                  start=1960, end=2010)
gdp


#---- ch05/wb-gdp-pivot
gdp_pivot = gdp.reset_index()
gdp_pivot['NY.GDP.PCAP.CD'] = np.log(gdp_pivot['NY.GDP.PCAP.CD'])
gdp_pivot = gdp_pivot.pivot(index='year', columns='country', 
                            values='NY.GDP.PCAP.CD')
gdp_pivot.index = gdp_pivot.index.astype('uint64')


#---- ch05/wb-gdp-pivot/dnr
gdp_pivot

start = dt.datetime(2010, 1, 1)  # start date
codes = ['GDPC1', 'PCECC96']     # real GDP, real consumption
fred  = data.DataReader(codes, 'fred', start)
fred = fred/1000                # convert billions to trillions

fred.plot()

#%%
# World Bank
from pandas_datareader import wb   # World Bank api

var = ['NY.GDP.PCAP.PP.KD']         # GDP per capita
iso = ['USA', 'FRA', 'JPN', 'CHN', 'IND', 'BRA', 'MEX']  # country codes
year = 2013
wbdf = wb.download(indicator=var, country=iso, start=year, end=year)

#%%
wbdf = wbdf.reset_index(level='year', drop=True)
wbdf.plot(kind='barh')

#%%
# Fama-French equity returns
from pandas_datareader import data  # Package to access FF

ff = data.DataReader('F-F_Research_Data_factors', 'famafrench')[0]
ff.columns = ['xsm', 'smb', 'hml', 'rf']      # rename variables

#%%
"""
Review
Ejemplo n.º 46
0
import pandas as pd
from pandas_datareader import wb
import matplotlib.pyplot as plt

mathces = wb.search('gdp.*capita.*const')

dat = wb.download(indicator='NY.GDP.PCAP.KD',
                  country='CN',
                  start=2010,
                  end=2017)

data = dat.stack()
print(data)
data.plot(kind='line')
plt.show()
Ejemplo n.º 47
0
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as widgets

#continents = ['DK','ZA','US','GB','CN','IN','BR','CA','RU','TR','KR','VN','SE','DE','AL','FR','BG','IT','PK','ID','MX','PL']

continents = ['WLD', 'TSA', 'TMN', 'ECS', 'SSF', 'NAC', 'LCN']

from pandas_datareader import wb
pop = wb.download(indicator='SP.POP.TOTL', country=continents, start=1970, end=2015)
pop.head(3)

gdp = wb.download(indicator='NY.GDP.MKTP.KD', country=continents, start=1970, end=2015)
gdp.head(3)

# Merging data:

merged = pd.merge(gdp,pop, how='inner', on=['country','year'])
merged = merged.reset_index()
merged = merged.rename(columns = {'country' : 'continent', 'NY.GDP.MKTP.KD' : 'gdp', 'SP.POP.TOTL' : 'pop'})

merged['gdp_cap'] = merged['gdp'] / merged['pop']

# Sorting data:
merged.sort_values(by=['continent','year'], inplace=True)
merged = merged.reset_index(drop = True)
merged.head()

# Indexing:
Ejemplo n.º 48
0
            x11.append(j)
    x1=np.array(x11)        
    y1=np.array(y11)
    if len(y1)>4:
        m=((x1.mean()*y1.mean())-(x1*y1).mean())/((x1.mean()*x1.mean())-(x1*x1).mean())
        b=y1.mean()-(m*x1.mean())
        return (m*p)+b
    else:
        return y1.mean()
    

clmns=[] #Stores names of all the columns used
'''Initiating Dataset with ease of doing business'''
indic="Ease of Doing Business"
clmns.append(indic)
maindata=wb.download(country=u"all", indicator="IC.BUS.EASE.XQ",start=strtyear, end=now.year) 
maindata.columns=[indic]
maindata['Country']=[i[0] for i in maindata.index]
maindata['Year']=[i[1] for i in maindata.index]
maindata['Year']=maindata['Year'].apply(int)
maindata = maindata.groupby("Country").transform(lambda x: x.iloc[::-1])
maindata['Country']=[i[0] for i in maindata.index]
maindata=maindata[["Country","Year",indic]]
maindata=maindata[658:]
maindata[indic]=maindata[indic].replace(0, np.nan)
maindata[indic] = maindata.groupby("country")[indic].transform(lambda x: x.fillna(x.mean()))

'''Function to append Features'''
def featureappnd(ind,nm,ft):
    clmns.append(nm)
    tmpdt=wb.download(country=u"all", indicator=ind,start=strtyear, end=now.year)
def country_DataFrame_to_list(country, target_data):
  df = wb.download(indicator = target_data, country = country,  start = 2008, end = 2018)
  df.fillna(df.mean(), inplace = True)
  df_list =df[df.columns[0]].tolist()
  round_list = [round(i, 2) for i in df_list ]
  return round_list[::-1]
Ejemplo n.º 50
0
#import matplotlib.pyplot as plt
from pandas_datareader import wb
import quandl

##GETTING DATA FROM WEB#####
tickers = "BRJ8"
startDate = '2018-1-1'
endDate = dt.datetime.today()
mgWeb = web.DataReader(tickers, 'moex', startDate, endDate)
mgWeb.to_excel('recData/BRENT_FUTURE_from_web.xlsx')

####GETTING DATA FROM MICROSOFT####
tickers = ['^SPX']
dataSource = 'stooq'
startDate = '2018-1-1'
endDate = dt.datetime.today()
gWeb = web.DataReader(tickers, dataSource, startDate, endDate)
gWeb.to_excel('recData/SP500_from_web.xlsx')

worldBank = wb.download(indicator='NY.GDP.MKTP.CD', country=[
                        'RU'], start=2005, end=2008)
worldBank.to_excel('recData/WB_from_web.xlsx')

keyIndicies = quandl.get('BANKRUSSIA/KEYECIND')
keyIndicies.to_excel('recData/CB_from_web.xlsx')

mgWeb = mgWeb[['VALUE', 'OPEN', 'LOW']]
mgWeb = mgWeb.assign(LowInUSD=mgWeb['LOW']/57)
mgWeb = mgWeb.assign(Numbers=mgWeb['VALUE']/mgWeb['LOW'])
print(mgWeb)