Beispiel #1
0
    def perform_computation(self, df, ameco_df):
        uvgdh, uvgdh_1, knp = 'UVGDH', 'UVGDH.1.0.0.0', 'KNP.1.0.212.0'
        series_meta = self.get_meta(uvgdh)
        splicer = Splicer()
        try:
            series_data = self.get_data(ameco_df, uvgdh_1)
            series_data = splicer.ratio_splice(series_data,
                                               self.get_data(df, uvgdh_1),
                                               type='forward')
        except KeyError:
            series_data = self.get_data(df, uvgdh)
        series = pd.Series(series_meta)
        series = series.append(series_data)
        self.result = self.result.append(series, ignore_index=True, sort=True)

        series_meta = self.get_meta(knp)
        series_data = self.get_data(ameco_df, knp)
        series = pd.Series(series_meta)
        series = series.append(series_data)
        self.result = self.result.append(series, ignore_index=True, sort=True)
        self.result.set_index(['Country Ameco', 'Variable Code'],
                              drop=True,
                              inplace=True)
        self.apply_scale()
        export_to_excel(self.result, step=6, country=self.country)

        return self.result
Beispiel #2
0
Datei: mixins.py Projekt: e/FDMS
    def _sum_and_splice(self, addends, df, ameco_h_df, splice=True):
        splicer = Splicer()
        for variable, sources in addends.items():
            series_meta = self.get_meta(variable)
            expected_scale = series_meta.get('Scale')
            try:
                base_series = self.get_data(ameco_h_df, variable)
            except KeyError:
                base_series = None
            splice_series = pd.Series()
            for source in sources:
                factor = 1
                if source.startswith('-'):
                    source = source[1:]
                    factor = -1
                src_scale = self.get_scale(source, dataframe=df)
                expected_scale = self.get_scale(variable)
                if src_scale != expected_scale:
                    factor = factor * pow(1000, self.codes[src_scale] - self.codes[expected_scale])
                try:
                    source_data = factor * self.get_data(df, source)
                except KeyError:
                    source_data = factor * self.get_data(self.result, source)
                splice_series = splice_series.add(source_data, fill_value=0)

            if base_series is None or splice is False:
                series_data = splice_series
            else:
                series_data = splicer.butt_splice(base_series, splice_series, kind='forward')
            if self.country == 'JP' and variable in ['UUTG.1.0.0.0', 'URTG.1.0.0.0']:
                if variable == 'URTG.1.0.0.0':
                    new_sources = ['UUTG.1.0.0.0', 'UBLG.1.0.0.0']
                    splice_series = self.get_data(
                        self.result, new_sources[0]) + self.get_data(
                        self.result, new_sources[1]
                    )
                series_data = splicer.ratio_splice(base_series, splice_series, kind='forward')
            series_data = series_data
            series = pd.Series(series_meta)
            series = series.append(series_data)
            self.result = self.result.append(series, ignore_index=True, sort=True)
Beispiel #3
0
    def perform_computation(self, df, ameco_df):
        operators = Operators()
        splicer = Splicer()
        variables = ['FETD9.1.0.0.0', 'FWTD9.1.0.0.0']
        if self.country in FCRIF:
            try:
                fetd9 = self.get_data(df, 'FETD.1.0.0.0')
                fwtd9 = self.get_data(df, 'FWTD.1.0.0.0')
            except KeyError:
                fetd9 = self.get_data(df, 'NETD.1.0.0.0')
                fwtd9 = self.get_data(df, 'NWTD.1.0.0.0')
            series_meta = self.get_meta(variables[0])
            series_data = fetd9.copy()
            series = pd.Series(series_meta)
            series = series.append(series_data)
            self.result = self.result.append(series,
                                             ignore_index=True,
                                             sort=True)
            series_meta = self.get_meta(variables[1])
            series_data = fwtd9.copy()
            series = pd.Series(series_meta)
            series = series.append(series_data)
            self.result = self.result.append(series,
                                             ignore_index=True,
                                             sort=True)
        else:
            series_meta = self.get_meta(variables[0])
            if self.country == 'US':
                fetd9 = self.get_data(df, 'NETD.1.0.0.0')
                fwtd9 = self.get_data(df, 'NWTD.1.0.0.0')
            else:
                fetd9 = splicer.ratio_splice(self.get_data(
                    ameco_df, variables[0]),
                                             self.get_data(df, 'NETD'),
                                             kind='forward')
                fwtd9 = splicer.ratio_splice(self.get_data(
                    ameco_df, variables[0]),
                                             self.get_data(df, 'NWTD'),
                                             kind='forward')
            series_data = fetd9.copy()
            series = pd.Series(series_meta)
            series = series.append(series_data)
            self.result = self.result.append(series,
                                             ignore_index=True,
                                             sort=True)
            series_meta = self.get_meta(variables[1])
            series_data = fwtd9.copy()
            series = pd.Series(series_meta)
            series = series.append(series_data)
            self.result = self.result.append(series,
                                             ignore_index=True,
                                             sort=True)

        variables = ['UWCD', 'UWWD', 'UWSC']
        variables_1 = [variable + '.1.0.0.0' for variable in variables]
        variables_h1 = [
            re.sub('^U', 'H', variable) + 'W.1.0.0.0' for variable in variables
        ]
        compensation = 'FWTD9.1.0.0.0'
        private_consumption_u = 'UCPH.1.0.0.0'
        private_consumption_o = 'OCPH.1.0.0.0'
        variables_r1 = [
            re.sub('^U', 'R', variable) + 'C.3.1.0.0' for variable in variables
        ]
        services = ['UMSN', 'UXSN', 'UMSN.1.0.0.0', 'UXSN.1.0.0.0']
        for index, variable in enumerate(variables):
            series_meta = self.get_meta(variables_h1[index])
            series_data = self.get_data(df, variables_1[index]) / fwtd9
            series = pd.Series(series_meta)
            series = series.append(series_data)
            self.result = self.result.append(series,
                                             ignore_index=True,
                                             sort=True)

            series_meta = self.get_meta(variables_r1[index])
            series_data = operators.rebase(
                self.get_data(df, variables_1[index]) / fwtd9 /
                self.get_data(df, private_consumption_u) /
                self.get_data(df, private_consumption_o),
                base_period=BASE_PERIOD)
            series = pd.Series(series_meta)
            series = series.append(series_data)
            self.result = self.result.append(series,
                                             ignore_index=True,
                                             sort=True)

        variables = [
            'RVGDE.1.0.0.0', 'RVGEW.1.0.0.0', 'RVGEW.1.0.0.0', 'ZATN9.1.0.0.0',
            'ZETN9.1.0.0.0', 'ZUTN9.1.0.0.0'
        ]
        numerators = [
            'OVGD.1.0.0.0', 'OVGE.1.0.0.0', 'OVGD.1.0.0.0', 'NLTN.1.0.0.0',
            'NETN.1.0.0.0', 'NUTN.1.0.0.0'
        ]
        denominators = [
            'FETD9.1.0.0.0', 'FETD9.1.0.0.0', 'NETD.1.0.0.0', 'NPAN1.1.0.0.0',
            'NPAN1.1.0.0.0', 'NLTN.1.0.0.0'
        ]
        for index, variable in enumerate(variables):
            series_meta = self.get_meta(variable)
            if denominators[index] == 'FETD9.1.0.0.0':
                denominator_series = fetd9
            else:
                denominator_series = self.get_data(df, denominators[index])
            series_data = self.get_data(df,
                                        numerators[index]) / denominator_series
            if variable in ['ZATN9.1.0.0.0', 'ZETN9.1.0.0.0', 'ZUTN9.1.0.0.0']:
                series_data = series_data * 100
            series = pd.Series(series_meta)
            series = series.append(series_data)
            self.result = self.result.append(series,
                                             ignore_index=True,
                                             sort=True)

        variable = 'FETD9.6.0.0.0'
        series_meta = self.get_meta(variable)
        series_data = fetd9.pct_change() * 100
        series = pd.Series(series_meta)
        series = series.append(series_data)
        self.result = self.result.append(series, ignore_index=True, sort=True)

        variable = 'ZUTN.1.0.0.0'
        if self.country in EU:
            # ZUTN based on NUTN.1.0.0.0 and NETN.1.0.0.0 (18/01/2017) is commented out in FDMS+
            last_observation = self.get_data(ameco_df,
                                             variable).last_valid_index()
            series_meta = self.get_meta(variable)
            series_data = round(
                self.get_data(df, 'NUTN') /
                (self.get_data(df, 'NUTN') + self.get_data(df, 'NETN')) * 100,
                1) + round(
                    self.get_data(ameco_df, 'NUTN.1.0.0.0')[last_observation] -
                    self.get_data(df, 'NUTN') /
                    (self.get_data(df, 'NUTN')[last_observation] +
                     self.get_data(df, 'NETN')[last_observation]), 1)
            series_data = splicer.butt_splice(
                self.get_data(ameco_df, variable),
                self.get_data(ameco_df, variable),
                kind='forward')
        else:
            try:
                netn1 = self.get_data(df, 'NETN.1.0.0.0')
            except KeyError:
                netn1 = self.get_data(df, 'NETN')
            series_data = splicer.level_splice(
                self.get_data(ameco_df, variable),
                self.get_data(df, 'NUTN.1.0.0.0') /
                (self.get_data(df, 'NUTN.1.0.0.0') + self.get_data(df, netn1))
                * 100)

        # NUTN ratiospliced (18/01/2017) is commented out in FDMS+

        plcd3 = 'plcd3_series'
        variables = ['PLCD.3.1.0.0', 'QLCD.3.1.0.0']
        numerators = ['HWCDW.1.0.0.0', 'PLCD.3.1.0.0']
        denominators = ['RVGDE.1.0.0.0', 'PVGD.3.1.0.0']
        for index, variable in enumerate(variables):
            series_meta = self.get_meta(variable)
            if denominators[index] == 'PVGD.3.1.0.0':
                denominator_series = self.get_data(df, denominators[index])
            else:
                denominator_series = self.get_data(self.result,
                                                   denominators[index])
            series_data = operators.rebase(
                self.get_data(self.result, numerators[index]) /
                denominator_series,
                base_period=BASE_PERIOD)
            if index == 0:
                plcd3 = series_data.copy()
            series = pd.Series(series_meta)
            series = series.append(series_data)
            self.result = self.result.append(series,
                                             ignore_index=True,
                                             sort=True)

        variables = [
            'RWCDC.3.1.0.0', 'PLCD.3.1.0.0', 'QLCD.3.1.0.0', 'HWCDW.1.0.0.0',
            'HWSCW.1.0.0.0', 'HWWDW.1.0.0.0', 'RVGDE.1.0.0.0', 'RVGEW.1.0.0.0'
        ]
        variables_6 = [
            re.sub('.....0.0$', '.6.0.0.0', variable) for variable in variables
        ]
        for index, variable in enumerate(variables):
            series_meta = self.get_meta(variables_6[index])
            series_data = self.get_data(self.result,
                                        variable).pct_change() * 100
            series = pd.Series(series_meta)
            series = series.append(series_data)
            self.result = self.result.append(series,
                                             ignore_index=True,
                                             sort=True)

        self.result.set_index(['Country Ameco', 'Variable Code'],
                              drop=True,
                              inplace=True)
        self.apply_scale()
        export_to_excel(self.result, step=11, country=self.country)
        return self.result
Beispiel #4
0
    def perform_computation(self, result_1, result_7, ameco_h_df):
        # TODO: Check the scales of the output variables
        splicer = Splicer()
        operators = Operators()
        # First we will calculate ASGH.1.0.0.0 and OVGHA.3.0.0.0, and then we will use the _sum_and_splice method
        # From SumAndSpliceMixin to calculate all the rest
        addends = {'UYOH.1.0.0.0': ['UOGH.1.0.0.0', 'UYNH.1.0.0.0']}
        self._sum_and_splice(addends, result_1, ameco_h_df, splice=False)
        new_input_df = self.result.set_index(
            ['Country Ameco', 'Variable Code'], drop=True)
        new_input_df = pd.concat([new_input_df, result_1], sort=True)
        addends = {
            'UVGH.1.0.0.0': [
                'UWCH.1.0.0.0', 'UYOH.1.0.0.0', 'UCTRH.1.0.0.0',
                '-UTYH.1.0.0.0', '-UCTPH.1.0.0.0'
            ]
        }
        self._sum_and_splice(addends, new_input_df, ameco_h_df, splice=False)

        new_input_df = self.result.set_index(
            ['Country Ameco', 'Variable Code'], drop=True)
        new_input_df = pd.concat([new_input_df, result_1], sort=True)
        addends = {'UVGHA.1.0.0.0': ['UVGH.1.0.0.0', 'UEHH.1.0.0.0']}
        self._sum_and_splice(addends, new_input_df, ameco_h_df, splice=False)

        addends = {
            'USGH.1.0.0.0': [
                'UWCH.1.0.0.0', 'UOGH.1.0.0.0', 'UYNH.1.0.0.0',
                'UCTRH.1.0.0.0', '-UTYH.1.0.0.0', '-UCTPH.1.0.0.0',
                'UEHH.1.0.0.0', '-UCPH0.1.0.0.0'
            ]
        }
        self._sum_and_splice(addends, new_input_df, ameco_h_df, splice=False)

        new_input_df = self.result.set_index(
            ['Country Ameco', 'Variable Code'], drop=True)
        new_input_df = pd.concat([new_input_df, result_1], sort=True)
        # Since this formula is using *ignoremissingsubtract* instead of *ignoremissingsum*, we change the sign of all
        # but the first variables in the list
        addends = {
            'UBLH.1.0.0.0': ['USGH.1.0.0.0', '-UITH.1.0.0.0', '-UKOH.1.0.0.0']
        }
        self._sum_and_splice(addends, new_input_df, ameco_h_df, splice=False)

        uvgha_data = self.get_data(new_input_df, 'UVGHA.1.0.0.0')
        pcph_data = self.get_data(result_7, 'PCPH.3.1.0.0')
        uvgha_base_period = uvgha_data.loc[BASE_PERIOD]
        ovgha_data = operators.rebase(uvgha_data / pcph_data,
                                      BASE_PERIOD) / 100 * uvgha_base_period
        series_meta = self.get_meta('OVGHA.3.0.0.0')
        series = pd.Series(series_meta)
        series = series.append(ovgha_data)
        self.result = self.result.append(series, ignore_index=True, sort=True)

        usgh_data = self.get_data(new_input_df, 'USGH.1.0.0.0')
        uvgha_data = self.get_data(new_input_df, 'UVGHA.1.0.0.0')
        asgh_ameco_h = self.get_data(ameco_h_df, 'ASGH.1.0.0.0')
        asgh_data = splicer.butt_splice(asgh_ameco_h,
                                        usgh_data / uvgha_data * 100)
        series_meta = self.get_meta('ASGH.1.0.0.0')
        new_series = pd.Series(series_meta)
        new_series = new_series.append(asgh_data)
        self.result = self.result.append(new_series,
                                         ignore_index=True,
                                         sort=True)

        self.result.set_index(['Country Ameco', 'Variable Code'],
                              drop=True,
                              inplace=True)
        self.apply_scale()
        export_to_excel(self.result, step=14, country=self.country)
        return self.result
Beispiel #5
0
 def rebase(self, series, base_period):
     new_series = pd.Series({base_period: 100})
     splicer = Splicer()
     return splicer.ratio_splice(new_series, series, kind='both')
Beispiel #6
0
    def perform_computation(self, df, ameco_df):
        for variable in NA_VO:
            new_variable = variable + '.1.0.0.0'
            u_variable = re.sub('^.', 'U', variable)
            variable11 = variable + '.1.1.0.0'
            if self.country in FCWVACP:
                try:
                    new_data = self.splicer.ratio_splice(
                        self.get_data(ameco_df, u_variable),
                        self.get_data(df, variable),
                        kind='forward')
                except KeyError:
                    logger.error(
                        'Failed to calculate {} (national accounts volume).'.
                        format(variable))
                    continue
                new_meta = pd.Series(self.get_meta(new_variable))
                new_series = new_meta.append(new_data)
                self.result = self.result.append(new_series, ignore_index=True)
            else:
                try:
                    series = self.get_data(df, variable)
                    u_series = self.get_data(df, u_variable)
                except KeyError:
                    logger.error(
                        'Failed to calculate {} (national accounts volume).'.
                        format(variable))
                    continue
                try:
                    series11 = self.get_data(ameco_df, variable11)
                    series11[2019] = pd.np.nan
                except KeyError:
                    logger.warning(
                        'Missing Ameco data for variable {} (national accounts volume). Using data '
                        'from country desk forecast'.format(variable11))
                splice_series = (series / u_series.shift(1) - 1) * 100
                # RatioSplice(base, level(series)) = base * (1 + 0,01 * series)
                new_data = self.splicer.splice_and_level_forward(
                    series11, splice_series)
                new_meta = pd.Series(self.get_meta(new_variable))
                new_series = new_meta.append(new_data)
                self.result = self.result.append(new_series, ignore_index=True)

        # Imports / exports of goods and services
        omgs, oxgs, obgn, obsn, oigp = 'OMGS.1.0.0.0', 'OXGS.1.0.0.0', 'OBGN.1.0.0.0', 'OBSN.1.0.0.0', 'OIGP.1.0.0.0'
        variables = {
            omgs: {
                'ameco': 'OMGS.1.1.0.0',
                'goods': 'OMGN',
                'services': 'OMSN',
                'u_goods': 'UMGN',
                'u_services': 'UMSN'
            }
        }
        variables[oxgs] = {
            'ameco': 'OXGS.1.1.0.0',
            'goods': 'OXGN',
            'services': 'OXSN',
            'u_goods': 'UXGN',
            'u_services': 'UXSN'
        }
        variables[obgn] = {
            'exports': 'OXGN.1.1.0.0',
            'imports': 'OMGN.1.1.0.0',
            'new_exports': 'OXGN',
            'u_exports': 'UXGN',
            'new_imports': 'OMGN',
            'u_imports': 'UMGN'
        }
        variables[obsn] = {
            'exports': 'OXSN.1.1.0.0',
            'imports': 'OMSN.1.1.0.0',
            'new_exports': 'OXSN',
            'u_exports': 'UXSN',
            'new_imports': 'OMGN',
            'u_imports': 'UMGN'
        }
        variables[oigp] = {
            'exports': 'OIGT.1.1.0.0',
            'imports': 'OIGG.1.1.0.0',
            'new_exports': 'OIGG',
            'u_exports': 'UIGG',
            'new_imports': 'OIGG',
            'u_imports': 'UIGG'
        }

        for variable in variables:
            base_series = None
            try:
                base_series, splice_series_1, splice_series_2 = self._get_data(
                    variable, variables[variable], df, ameco_df)
            except TypeError:
                logger.error(
                    'Missing data for variable {} in national accounts volume'.
                    format(variable))
            # if variable == obsn:
            #     import code;code.interact(local=locals())
            self._update_result(variable, base_series, splice_series_1,
                                splice_series_2)

        # Net exports goods and services
        var = 'OBGS.1.0.0.0'
        ameco_exports = 'OXGS.1.1.0.0'
        ameco_imports = 'OMGS.1.1.0.0'
        goods_exports = 'OXGN'
        services_exports = 'OXSN'
        goods_imports = 'OMGN'
        services_imports = 'OMSN'
        u_goods_exports = 'UXGN'
        u_services_exports = 'UXSN'
        u_goods_imports = 'UMGN'
        u_services_imports = 'UMSN'
        export_series = self.get_data(df, goods_exports) + self.get_data(
            df, services_exports)
        import_series = self.get_data(df, goods_imports) + self.get_data(
            df, services_imports)
        u_exports = self.get_data(df, u_goods_exports) + self.get_data(
            df, u_services_exports)
        u_imports = self.get_data(df, u_goods_imports) + self.get_data(
            df, u_services_imports)
        base_series = self.get_data(ameco_df, ameco_exports) - self.get_data(
            ameco_df, ameco_imports)
        splice_series_1 = export_series - import_series
        splice_series_2 = ((export_series - import_series) /
                           (u_exports - u_imports).shift(1) - 1) * 100
        self._update_result(var, base_series, splice_series_1, splice_series_2)

        # Investments
        var = 'OIGNR.1.0.0.0'
        ameco_1 = 'OIGCO.1.1.0.0'
        ameco_2 = 'OIGDW.1.1.0.0'
        investments_1 = 'OIGCO'
        investments_2 = 'OIGDW'
        u_investments_1 = 'UIGCO'
        u_investments_2 = 'UIGDW'
        net_series = self.get_data(df, investments_1) - self.get_data(
            df, investments_2)
        u_net_series = self.get_data(df, u_investments_1) - self.get_data(
            df, u_investments_2)
        base_series = self.get_data(ameco_df, ameco_1) - self.get_data(
            ameco_df, ameco_2)
        splice_series_1 = net_series.copy()
        splice_series_2 = (net_series / u_net_series.shift(1) - 1) * 100
        self._update_result(var, base_series, splice_series_1, splice_series_2)

        # Domestic demand
        var = 'OUNF.1.0.0.0'
        private_consumption = 'OCPH.1.1.0.0'
        government_consumption = 'OCTG.1.1.0.0'
        use_ameco = 'OIGT.1.1.0.0'
        new_private_consumption = 'OCPH'
        new_government_consumption = 'OCTG'
        new_use = 'OIGT'
        u_new_private_consumption = 'UCPH'
        u_new_government_consumption = 'UCTG'
        u_new_use = 'UIGT'
        u_series = self.get_data(
            df, u_new_private_consumption) + self.get_data(
                df, u_new_government_consumption) + self.get_data(
                    df, u_new_use)
        base_series = self.get_data(
            ameco_df, private_consumption) + self.get_data(
                ameco_df, government_consumption) + self.get_data(
                    ameco_df, use_ameco)
        splice_series_1 = self.get_data(
            df, new_private_consumption) + self.get_data(
                df, new_government_consumption) + self.get_data(df, new_use)
        splice_series_2 = (splice_series_1 / u_series.shift(1) - 1) * 100
        self._update_result(var, base_series, splice_series_1, splice_series_2)

        # Domestic demand
        variables = {
            'OUNT.1.0.0.0': ['OUNT.1.1.0.0', 'OCPH', 'OCTG', 'OIGT', 'OIST'],
            'OUTT.1.0.0.0':
            ['OUTT.1.1.0.0', 'OCPH', 'OCTG', 'OIGT', 'OIST', 'OXGN', 'OXSN'],
            'OITT.1.0.0.0': ['OITT.1.0.0.0', 'OIGT', 'OIST']
        }
        for var, new_vars in variables.items():
            base_series = None
            splice_series_1 = sum([self.get_data(df, v) for v in new_vars[1:]])
            try:
                base_series = self.get_data(df, new_vars[0])
            except KeyError:
                logger.warning(
                    'No historical data for {} to level_splice, country {}, using country forecast '
                    'data.'.format(new_vars[0], self.country))
            splice_series_2 = None
            if self.country not in FCWVACP:
                u_new_vars = [re.sub('^.', 'U', v) for v in new_vars[1:]]
                try:
                    sum_u_series = sum(
                        self.get_data(df, v) for v in new_vars[1:])
                    splice_series_2 = splice_series_1.copy(
                    ) / sum_u_series.shift(1) - 1 * 100
                    self._update_result(var, base_series, splice_series_1,
                                        splice_series_2)
                except KeyError:
                    logger.error(
                        'Missing data for variable {} in national accounts volume (172)'
                        .format(new_variable))
            else:
                self._update_result(var, base_series, splice_series_1, None)

        # Volume, rebase to baseperiod, percent change, contribution to percent change in GDP
        for var in NA_VO:
            new_variable = var + '.1.0.0.0'
            u1_variable = re.sub('^.', 'U', var) + '.1.0.0.0'

            # TODO: Review this
            new_vars = ['OXGS.1.0.0.0', 'OVGE.1.0.0.0']
            if new_variable in self.result['Variable Code'].values.tolist(
            ) + new_vars:
                if new_variable not in new_vars:
                    result_series_index = self.get_index(new_variable)
                    series_orig = self.result.loc[result_series_index]
                    data_orig = pd.to_numeric(
                        series_orig.filter(regex=r'[0-9]{4}'), errors='coerce')
                else:
                    logger.error(
                        'Missing data for variable {} in national accounts volume'
                        .format(u1_variable))

                # Rebase to baseperiod
                if u1_variable in df.index.get_level_values('Variable Code'):
                    series_meta = self.get_meta(new_variable)
                    u1_series = self.get_data(df, u1_variable)
                    value_to_rebase = data_orig[BASE_PERIOD] / u1_series[
                        BASE_PERIOD]
                    series_data = data_orig * value_to_rebase
                    series = pd.Series(series_meta)
                    series = series.append(series_data)
                    self.result.iloc[result_series_index] = series
                else:
                    logger.error(
                        'Missing data for variable {} in national accounts volume'
                        .format(u1_variable))

                # Percent change
                variable_6 = var + '.6.0.0.0'
                series_meta = self.get_meta(variable_6)
                series_data = data_orig.pct_change() * 100
                series = pd.Series(series_meta)
                series = series.append(series_data)
                self.result = self.result.append(series,
                                                 ignore_index=True,
                                                 sort=True)

                # Contribution to percent change in GDP
                variable_c1 = re.sub('^.', 'C', var) + '.1.0.0.0'
                variable_x = new_variable if self.country in [
                    'MT', 'TR'
                ] else u1_variable
                series_6_index = self.get_index(variable_6)
                data_6 = self.get_data(self.result, variable_6)
                # series_6 = self.result.loc[result_series_index]
                # data_6 = pd.to_numeric(series_6.filter(regex=r'[0-9]{4}'), errors='coerce')
                xvgd = 'OVGD.1.0.0.0' if self.country in ['MT', 'TR'
                                                          ] else 'UVGD.1.0.0.0'
                series_meta = self.get_meta(variable_c1)
                data_x = self.get_data(df, variable_x).shift(1)
                data_xvgd = self.get_data(df, xvgd).shift(1)
                if variable_c1 not in ['CBGN.1.0.0.0']:
                    try:
                        data_x[1996] = self.get_data(ameco_df,
                                                     variable_x)[1996]
                    except KeyError:
                        pass
                    try:
                        data_x[1996] = self.get_data(ameco_df, xvgd)[1996]
                    except KeyError:
                        pass
                try:
                    series_data = data_6 * data_x / data_xvgd
                except KeyError:
                    logger.error(
                        'Missing data for variable {} in national accounts volume'
                        .format(new_variable))
                    continue
                series = pd.Series(series_meta)
                series = series.append(series_data)
                # if variable_c1 == 'CMGS.1.0.0.0':
                #     import code;code.interact(local=locals())
                self.result = self.result.append(series,
                                                 ignore_index=True,
                                                 sort=True)

            else:
                logger.error(
                    'Missing data for variable {} in national accounts volume'.
                    format(new_variable))
            r = self.result.copy()
            if new_variable == 'OVGD.1.0.0.0':
                ovgd1 = self.get_data(self.result, 'OVGD.1.0.0.0')
            # if variable_c1 == 'CMGS.1.0.0.0':
            #     import code;code.interact(local=locals())

        # Contribution to percent change in GDP (calculation for additional variables)
        var = 'CMGS.1.0.0.0'
        series_meta = self.get_meta(var)
        series_data = -self.get_data(self.result, var)
        index = self.get_index(var)
        series = pd.Series(series_meta)
        series = series.append(series_data)
        self.result.iloc[index] = series
        var = 'CBGS.1.0.0.0'
        exports = 'CXGS.1.0.0.0'
        imports = 'CMGS.1.0.0.0'
        series_meta = self.get_meta(var)
        series_meta['Variable Code'] = var
        series_data = self.get_data(self.result, exports) + self.get_data(
            self.result, imports)
        index = self.get_index(var)
        series = pd.Series(series_meta)
        series = series.append(series_data)
        self.result = self.result.append(series, ignore_index=True, sort=True)
        # TODO: If Country in group 'Forecast: Countries with volumes at constant prices' line 202 country calc

        # Per-capita GDP
        # TODO: fix scale, frequency and country everywhere
        # TODO: fix this
        new_variable = 'RVGDP.1.0.0.0'
        ameco_variable = 'RVGDP.1.1.0.0'
        variable_6 = re.sub('.1.0.0.0', '.6.0.0.0', new_variable)
        total_population = 'NPTD.1.0.0.0'
        potential_gdp = 'OVGD.1.0.0.0'
        series_meta = self.get_meta(new_variable)
        series_6_meta = self.get_meta(variable_6)
        ameco_series = self.get_data(ameco_df, ameco_variable)
        splice_series = ovgd1 / self.get_data(df, total_population)
        splicer = Splicer()
        series_data = splicer.ratio_splice(ameco_series,
                                           splice_series,
                                           kind='forward')
        series_6_data = series_data.pct_change() * 100
        series = pd.Series(series_meta)
        series = series.append(series_data)
        self.result = self.result.append(series, ignore_index=True, sort=True)
        series_6 = pd.Series(series_6_meta)
        series_6 = series_6.append(series_6_data)
        self.result = self.result.append(series_6,
                                         ignore_index=True,
                                         sort=True)
        # TODO: Do not add series if they're alreade there, i.e. df.loc['BE','UMGS'] is repeated

        # Terms of trade
        variables = ['APGN.3.0.0.0', 'APSN.3.0.0.0', 'APGS.3.0.0.0']
        exports_1 = ['UXGN.1.0.0.0', 'UXSN.1.0.0.0', 'UXGS.1.0.0.0']
        exports_2 = ['OXGN.1.0.0.0', 'OXSN.1.0.0.0', 'OXGS.1.0.0.0']
        imports_1 = ['UMGN.1.0.0.0', 'UMSN.1.0.0.0', 'UMGS.1.0.0.0']
        imports_2 = ['OMGN.1.0.0.0', 'OMSN.1.0.0.0', 'OMGS.1.0.0.0']
        for index, variable in enumerate(variables):
            series_meta = self.get_meta(variable)
            series_data = (
                self.get_data(df, exports_1[index]) /
                self.get_data(self.result, exports_2[index]) /
                (self.get_data(df, imports_1[index]) /
                 self.get_data(self.result, imports_2[index]))) * 100
            series = pd.Series(series_meta)
            series = series.append(series_data)
            self.result = self.result.append(series,
                                             ignore_index=True,
                                             sort=True)
            variable_6 = re.sub('3', '6', variable)
            series_meta = self.get_meta(variable_6)
            series_data = series_data.pct_change() * 100
            series = pd.Series(series_meta)
            series = series.append(series_data)
            self.result = self.result.append(series,
                                             ignore_index=True,
                                             sort=True)

        # Set up OVGD.6.1.212.0 for World GDP volume table
        variable = 'OVGD.6.1.212.0'
        series_meta = self.get_meta(variable)
        series_data = self.get_data(self.result, 'OVGD.6.0.0.0')
        series = pd.Series(series_meta)
        series = series.append(series_data)
        self.result = self.result.append(series, ignore_index=True, sort=True)

        # Convert percent change of trade variables (volume) from national currency to USD
        for variable in T_VO:
            new_variable = variable + '.6.0.30.0'
            variable_6 = variable + '.6.0.0.0'
            series_meta = self.get_meta(new_variable)
            series_data = self.get_data(self.result, variable_6)
            series = pd.Series(series_meta)
            series = series.append(series_data)
            self.result = self.result.append(series,
                                             ignore_index=True,
                                             sort=True)

        series_meta = self.get_meta('OVGD.1.0.0.0')
        series = pd.Series(series_meta)
        # TODO: This shouldn't be needed... Check what's going on
        series = series.append(ovgd1)
        self.result = self.result.append(series, ignore_index=True, sort=True)
        self.result.set_index(['Country Ameco', 'Variable Code'],
                              drop=True,
                              inplace=True)
        self.apply_scale()
        export_to_excel(self.result, step=4, country=self.country)
        return self.result, ovgd1
Beispiel #7
0
    def perform_computation(self, ameco_db_df, xr_df, ameco_xne_us_df):
        splicer = Splicer()
        variable = 'XNE.1.0.99.0'
        series_data = self.get_data(ameco_db_df, variable)
        try:
            xr_data = self.get_data(xr_df, variable)
        except KeyError:
            pass
        else:
            last_valid = xr_data.first_valid_index()
            for year in range(last_valid + 1, LAST_YEAR + 1):
                series_data[year] = pd.np.nan
            series_data = splicer.ratio_splice(series_data.copy(),
                                               xr_data,
                                               kind='forward')
        series_meta = self.get_meta(variable)
        series = pd.Series(series_meta)
        series = series.append(series_data)
        self.result = self.result.append(series, ignore_index=True, sort=True)

        variables = ['ILN.1.0.0.0', 'ISN.1.0.0.0']
        sources = ['ILN.1.1.0.0', 'ISN.1.1.0.0']
        null_dates = list(
            range(int(datetime.datetime.now().year) - 1, LAST_YEAR))
        for index, variable in enumerate(variables):
            series_meta = self.get_meta(variable)
            series_data = self.get_data(ameco_db_df,
                                        sources[index],
                                        null_dates=null_dates)
            series_data = splicer.butt_splice(series_data,
                                              self.get_data(
                                                  xr_df, sources[index]),
                                              kind='forward')
            series = pd.Series(series_meta)
            series = series.append(series_data)
            self.result = self.result.append(series,
                                             ignore_index=True,
                                             sort=True)

        if self.country in EA:
            membership_date = get_membership_date(self.country)
            variable = 'XNE.1.0.99.0'
            for year in range(membership_date, LAST_YEAR + 1):
                self.result.loc[self.result['Variable Code'] == 'XNE.1.0.99.0',
                                year] = 1

            variable = 'XNEF.1.0.99.0'
            series_meta = self.get_meta(variable)
            series_data = self.get_data(ameco_db_df, 'XNE.1.0.99.0')
            last_valid = series_data.last_valid_index()
            if last_valid < LAST_YEAR:
                for index in range(last_valid + 1, LAST_YEAR + 1):
                    series_data[index] = series_data[last_valid]
            series = pd.Series(series_meta)
            series = series.append(series_data)
            self.result = self.result.append(series,
                                             ignore_index=True,
                                             sort=True)

            variable = 'XNEB.1.0.99.0'
            series_meta = self.get_meta(variable)
            series_data = self.get_data(self.result,
                                        'XNE.1.0.99.0') * self.get_data(
                                            self.result, 'XNEF.1.0.99.0')
            for year in range(membership_date, LAST_YEAR + 1):
                self.result.loc[self.result['Variable Code'] ==
                                'XNEF.1.0.99.0', year] = pd.np.nan
            series = pd.Series(series_meta)
            series = series.append(series_data)
            self.result = self.result.append(series,
                                             ignore_index=True,
                                             sort=True)
        else:
            variable = 'XNEB.1.0.99.0'
            series_meta = self.get_meta(variable)
            series_data = self.get_data(self.result, 'XNE.1.0.99.0').copy()
            series = pd.Series(series_meta)
            series = series.append(series_data)
            self.result = self.result.append(series,
                                             ignore_index=True,
                                             sort=True)

        variable = 'XNU.1.0.30.0'
        xne_us = self.get_data(xr_df, 'XNE.1.0.99.0', country='US')
        last_observation = xne_us.first_valid_index()
        new_xne_us = self.get_data(ameco_xne_us_df,
                                   'XNE.1.0.99.0',
                                   country='US')
        for year in range(last_observation + 1, LAST_YEAR + 1):
            new_xne_us[year] = pd.np.nan
        series_meta = self.get_meta(variable)
        series_data = splicer.ratio_splice(new_xne_us, xne_us, kind='forward')
        series = pd.Series(series_meta)
        series = series.append(series_data)
        self.result = self.result.append(series, ignore_index=True, sort=True)

        # Effective exchange rates and relative unit labour costs, currently not calculated in FDMS+
        variables = [
            'PLCDQ.3.0.0.437', 'PLCDQ.3.0.30.437', 'XUNNQ.3.0.30.437',
            'XUNRQ.3.0.30.437', 'PLCDQ.3.0.0.414', 'PLCDQ.3.0.0.415',
            'PLCDQ.3.0.0.417', 'PLCDQ.3.0.0.424', 'PLCDQ.3.0.0.427',
            'PLCDQ.3.0.0.435', 'PLCDQ.3.0.0.436', 'PLCDQ.3.0.30.414',
            'PLCDQ.3.0.30.415', 'PLCDQ.3.0.30.417', 'PLCDQ.3.0.30.424',
            'PLCDQ.3.0.30.427', 'PLCDQ.3.0.30.435', 'PLCDQ.3.0.30.436',
            'XUNNQ.3.0.30.414', 'XUNNQ.3.0.30.415', 'XUNNQ.3.0.30.417',
            'XUNNQ.3.0.30.423', 'XUNNQ.3.0.30.424', 'XUNNQ.3.0.30.427',
            'XUNNQ.3.0.30.435', 'XUNNQ.3.0.30.436', 'XUNNQ.3.0.30.441',
            'XUNRQ.3.0.30.414', 'XUNRQ.3.0.30.415', 'XUNRQ.3.0.30.417',
            'XUNRQ.3.0.30.424', 'XUNRQ.3.0.30.427', 'XUNRQ.3.0.30.435',
            'XUNRQ.3.0.30.436'
        ]
        missing_vars = []
        for variable in variables:
            series_meta = self.get_meta(variable)
            try:
                series_data = self.get_data(ameco_db_df, variable)
            except KeyError:
                missing_vars.append(variable)
            else:
                series = pd.Series(series_meta)
                series = series.append(series_data)
                self.result = self.result.append(series,
                                                 ignore_index=True,
                                                 sort=True)

        variables = ['PLCDQ.6.0.0.437', 'PLCDQ.6.0.0.435', 'PLCDQ.6.0.0.436']
        sources = ['PLCDQ.3.0.0.437', 'PLCDQ.3.0.0.435', 'PLCDQ.3.0.0.436']
        for index, variable in enumerate(variables):
            series_meta = self.get_meta(variable)
            try:
                series_data = self.get_data(
                    self.result, sources[index]).copy().pct_change() * 100
            except (KeyError, IndexError):
                missing_vars.append(variable)
            else:
                series = pd.Series(series_meta)
                series = series.append(series_data)
                self.result = self.result.append(series,
                                                 ignore_index=True,
                                                 sort=True)

        variables = [
            'XUNNQ.6.0.30.437', 'XUNRQ.6.0.30.437', 'XUNNQ.6.0.30.435',
            'XUNNQ.6.0.30.436', 'XUNRQ.6.0.30.435', 'XUNRQ.6.0.30.436'
        ]
        sources = [
            'XUNNQ.3.0.30.437', 'XUNRQ.3.0.30.437', 'XUNNQ.3.0.30.435',
            'XUNNQ.3.0.30.436', 'XUNRQ.3.0.30.435', 'XUNRQ.3.0.30.436'
        ]
        for index, variable in enumerate(variables):
            series_meta = self.get_meta(variable)
            try:
                series_data = self.get_data(
                    self.result, sources[index]).copy().pct_change() * 100
            except (KeyError, IndexError):
                missing_vars.append(variable)
            else:
                series = pd.Series(series_meta)
                series = series.append(series_data)
                self.result = self.result.append(series,
                                                 ignore_index=True,
                                                 sort=True)

        # TODO: is it OK? these are missing in ameco_db: PLCDQ.3.0.0.414 PLCDQ.3.0.0.435 PLCDQ.3.0.0.436
        # PLCDQ.3.0.30.414 PLCDQ.3.0.30.435 PLCDQ.3.0.30.436 XUNNQ.3.0.30.414 XUNNQ.3.0.30.423 XUNNQ.3.0.30.435
        # XUNNQ.3.0.30.436 XUNNQ.3.0.30.441 XUNRQ.3.0.30.414 XUNRQ.3.0.30.435 XUNRQ.3.0.30.436 PLCDQ.6.0.0.435
        # PLCDQ.6.0.0.436
        with open('errors_step_10.txt', 'w') as f:
            f.write('\n'.join(missing_vars))

        self.result.set_index(['Country Ameco', 'Variable Code'],
                              drop=True,
                              inplace=True)
        self.apply_scale()
        export_to_excel(self.result, step=10, country=self.country)
        return self.result
Beispiel #8
0
    def perform_computation(self, df, ameco_h_df):
        splicer = Splicer()
        addends = {
            'UTOG.1.0.0.0': ['UROG.1.0.0.0', 'UPOMN.1.0.0.0'],
            'UUCG.1.0.0.0': [
                'UWCG.1.0.0.0', 'UYTGH.1.0.0.0', 'UYIG.1.0.0.0',
                'UYVG.1.0.0.0', 'UUOG.1.0.0.0', 'UCTGI.1.0.0.0',
                'UYTGM.1.0.0.0'
            ],
            'URCG.1.0.0.0':
            ['UTVG.1.0.0.0', 'UTYG.1.0.0.0', 'UTSG.1.0.0.0', 'UTOG.1.0.0.0'],
            'UUTG.1.0.0.0': ['UUCG.1.0.0.0', 'UIGG0.1.0.0.0', 'UKOG.1.0.0.0'],
            'URTG.1.0.0.0': ['URCG.1.0.0.0', 'UKTTG.1.0.0.0'],
            'UBLG.1.0.0.0': ['URTG.1.0.0.0', '-UUTG.1.0.0.0'],
        }
        # if country == JP: addends['UUCG.1.0.0.0'][0] = 'UCTG.1.0.0.0'; del(addends['UTOG.1.0.0.0'])

        if self.country == 'JP':
            addends['UUCG.1.0.0.0'][0] = 'UCTG.1.0.0.0'
            del addends['UTOG.1.0.0.0']

        self._sum_and_splice(addends, df, ameco_h_df)

        # variable = 'UBLG.1.0.0.0'
        # sources = {variable: ['URTG.1.0.0.0', 'UUTG.1.0.0.0']}
        # series_meta = self.get_meta(variable)
        # splice_series = self.get_data(df, sources[variable][0]).subtract(self.get_data(
        #     df, sources[variable][1], fill_value=0))
        # if self.country == 'JP':
        #     series_data = splice_series.copy()
        # else:
        #     base_series = self.get_data(ameco_h_df, variable)
        #     series_data = splicer.butt_splice(base_series, splice_series, kind='forward')
        # series = pd.Series(series_meta)
        # series = series.append(series_data)
        # self.result = self.result.append(series, ignore_index=True, sort=True)

        if self.country not in EU:
            if self.country != 'MK':
                variable = 'UBLGE.1.0.0.0'
                series_meta = self.get_meta(variable)
                series_data = self.get_data(self.result, 'UBLG.1.0.0.0')
                series = pd.Series(series_meta)
                series = series.append(series_data)
                self.result = self.result.append(series,
                                                 ignore_index=True,
                                                 sort=True)

                variable = 'UYIGE.1.0.0.0'
                series_meta = self.get_meta(variable)
                series_data = self.get_data(df, 'UYIG.1.0.0.0')
                series = pd.Series(series_meta)
                series = series.append(series_data)
                self.result = self.result.append(series,
                                                 ignore_index=True,
                                                 sort=True)

        addends = {
            'UBLGI.1.0.0.0': ['UBLG.1.0.0.0', 'UYIG.1.0.0.0'],
            'UBLGIE.1.0.0.0': ['UBLGE.1.0.0.0', 'UYIGE.1.0.0.0'],
            'UTAT.1.0.0.0': [
                'UTVG.1.0.0.0', 'UTYG.1.0.0.0', 'UTAG.1.0.0.0', 'UTKG.1.0.0.0',
                'UTEU.1.0.0.0'
            ],
            'UOOMS.1.0.0.0': ['UOOMSR.1.0.0.0', 'UOOMSE.1.0.0.0'],
            'UTTG.1.0.0.0': ['UTVG.1.0.0.0', 'UTEU.1.0.0.0'],
            'UDGGL.1.0.0.0': [
                'UDGG.1.0.0.0',
            ]
        }
        self._sum_and_splice(addends, df, ameco_h_df)

        variable = 'UDGG.1.0.0.0'
        series_meta = self.get_meta(variable)
        series_data = self.get_data(self.result, 'UDGGL.1.0.0.0')
        series = pd.Series(series_meta)
        series = series.append(series_data)
        self.result = self.result.append(series, ignore_index=True, sort=True)

        # TODO: copy EATTG, EATYG and EATSG from cyclical adjustment database
        # for variable in ['EATTG', 'EATYG', 'EATSG']:
        #     series_meta = self.get_meta(variable)
        #     series_data = self.get_data(self.result, 'UDGGL.1.0.0.0')
        #     series = pd.Series(series_meta)
        #     series = series.append(series_data)
        #     self.result = self.result.append(series, ignore_index=True, sort=True)

        self.result.set_index(['Country Ameco', 'Variable Code'],
                              drop=True,
                              inplace=True)
        self.apply_scale()
        export_to_excel(self.result, step=12, country=self.country)
        return self.result
Beispiel #9
0
    def perform_computation(self, df, ameco_df):
        for index, row in df.iterrows():
            variable = index[1]
            if variable in TM:
                # Convert all transfer matrix variables to 1.0.0.0 (except National Account (volume)) and splice in
                # country desk forecast
                if variable not in NA_VO:
                    splicer = Splicer()
                    operators = Operators()
                    meta = self.get_meta(variable)
                    new_variable = variable + '.1.0.0.0'
                    meta1000 = self.get_meta(new_variable)
                    meta['Variable Code'] = variable
                    meta1000['Variable Code'] = new_variable
                    splice_series = self.get_data(df, variable)
                    base_series = None
                    try:
                        base_series = self.get_data(ameco_df, new_variable)
                    except KeyError:
                        logger.warning(
                            'Missing Ameco data for variable {} (transfer matrix)'
                            .format(new_variable))
                    orig_series = splice_series.copy()
                    orig_series.name = None
                    new_meta = pd.Series(meta)
                    orig_series = new_meta.append(orig_series)
                    if variable in TM_TBBO:
                        new_series = splicer.butt_splice(base_series,
                                                         splice_series,
                                                         kind='forward')
                        new_series.name = None
                        new_meta = pd.Series(meta1000)
                        new_series = new_meta.append(new_series)
                        self.result = self.result.append(new_series,
                                                         ignore_index=True)
                    elif variable in TM_TBM:
                        df_to_be_merged = pd.DataFrame(
                            [splice_series, base_series])
                        new_series = operators.merge(df_to_be_merged)
                        new_series.name = None
                        new_meta = pd.Series(meta1000)
                        new_series = new_meta.append(new_series)
                        self.result = self.result.append(new_series,
                                                         ignore_index=True)
                    else:
                        new_series = splicer.butt_splice(splicer.ratio_splice(
                            base_series, splice_series, kind='forward'),
                                                         splice_series,
                                                         kind='forward')
                        new_series.name = None
                        new_meta = pd.Series(meta1000)
                        new_series = new_meta.append(new_series)
                        self.result = self.result.append(new_series,
                                                         ignore_index=True)
                    self.result = self.result.append(orig_series,
                                                     ignore_index=True)

        self.result.set_index(['Country Ameco', 'Variable Code'],
                              drop=True,
                              inplace=True)
        self.apply_scale()
        export_to_excel(self.result, step=1, country=self.country)
        return self.result
Beispiel #10
0
    def perform_computation(self, df, ameco_df, ameco_db_df):
        '''Capital Stock and Total Factor Productivity'''
        # ameco_db_df should have data till 1960
        variables = ['OIGT.1.0.0.0', 'OVGD.1.0.0.0', 'UIGT.1.0.0.0']
        splicer = Splicer()
        for variable in variables:
            try:
                series_data = self.get_data(df, variable)
            except KeyError:
                logger.warning(
                    'Missing data for variable {} (Capital Stock)'.format(
                        variable))
                continue
            if series_data is not None:
                series_data = splicer.ratio_splice(series_data,
                                                   self.get_data(
                                                       ameco_db_df, variable),
                                                   kind='backward',
                                                   variable=variable)[YEARS]
                series_meta = self.get_meta(variable)
                series = pd.Series(series_meta)
                series = series.append(series_data)
                self.result = self.result.append(series,
                                                 ignore_index=True,
                                                 sort=True)

        # TODO: The AMECO_H.TXT only has data till 2017, we might need to update it
        variable = 'UKCT.1.0.0.0'
        try:
            ameco_data = self.get_data(ameco_df, variable)
        except KeyError:
            series_data = self.get_data(ameco_db_df, variable)[YEARS]
        else:
            series_data = splicer.ratio_splice(ameco_data,
                                               self.get_data(
                                                   ameco_db_df,
                                                   variable)[YEARS],
                                               kind='backward')
        series_meta = self.get_meta(variable)
        series = pd.Series(series_meta)
        series = series.append(series_data)
        self.result = self.result.append(series, ignore_index=True, sort=True)

        variable = 'OKCT.1.0.0.0'
        series_meta = self.get_meta(variable)
        series_data = self.get_data(
            self.result, 'UKCT.1.0.0.0') / (self.get_data(df, 'UIGT.1.0.0.0') /
                                            self.get_data(df, 'OIGT.1.0.0.0'))
        series = pd.Series(series_meta)
        series = series.append(series_data)
        self.result = self.result.append(series, ignore_index=True, sort=True)
        variable = 'OINT.1.0.0.0'
        series_meta = self.get_meta(variable)
        series_data = self.get_data(df, 'OIGT.1.0.0.0') - self.get_data(
            self.result, 'OKCT.1.0.0.0')
        series = pd.Series(series_meta)
        series = series.append(series_data)
        self.result = self.result.append(series, ignore_index=True, sort=True)

        variable = 'OKND.1.0.0.0'
        series_meta = self.get_meta(variable)

        series_1 = self.get_data(ameco_db_df, 'OVGD.1.0.0.0')
        series_2 = self.get_data(ameco_db_df, 'OIGT.1.0.0.0')

        if series_1.first_valid_index() + 1 < series_2.first_valid_index():
            last_observation = series_2.first_valid_index() - 1
        else:
            last_observation = series_1.first_valid_index()

        new_series = pd.Series(series_meta)
        oint_1 = self.get_data(ameco_db_df, 'OINT.1.0.0.0').copy()
        oigt_1 = self.get_data(self.result, 'OIGT.1.0.0.0').copy()
        new_data = pd.Series({
            year: pd.np.nan
            for year in range(last_observation, LAST_YEAR + 1)
        })
        new_data[last_observation] = 3 * series_1[last_observation]
        for year in range(last_observation + 1, LAST_YEAR):
            new_data[year] = new_data[year - 1] + oint_1[year]
        last_observation = self.result[
            self.result['Variable Code'] ==
            'OKCT.1.0.0.0'].iloc[-1].last_valid_index()
        if type(last_observation) != int:
            last_observation = 1993

        # Up until now we were discarding data before 1993, however here we need it if we want the same results
        # We need to pass all_data=True to read_ameco_db_xls and get the right ameco_db_df

        for year in range(last_observation + 1, LAST_YEAR + 1):
            self.result.loc[self.result['Variable Code'] == 'OKCT.1.0.0.0',
                            [year]] = (new_data[year - 1] * self.result.loc[
                                self.result['Variable Code'] == 'OKCT.1.0.0.0',
                                [year - 1]] / new_data[year - 2]).iloc[0, 0]

            new_data[year] = (
                new_data[year - 1] + oigt_1[year] -
                self.result.loc[self.result['Variable Code'] == 'OKCT.1.0.0.0',
                                [year]]).iloc[0, 0]

            self.result.loc[self.result['Variable Code'] == 'OINT.1.0.0.0',
                            [year]] = (oigt_1[year] - self.result.loc[
                                self.result['Variable Code'] == 'OKCT.1.0.0.0',
                                [year]]).iloc[0, 0]

            self.result.loc[
                self.result['Variable Code'] == 'UKCT.1.0.0.0',
                [year]] = (self.result.loc[self.result['Variable Code'] ==
                                           'OKCT.1.0.0.0', [year]] *
                           self.get_data(self.result, 'UIGT.1.0.0.0')[year] /
                           oigt_1[year]).iloc[0, 0]

        new_series = new_series.append(new_data[YEARS].copy())
        self.result = self.result.append(new_series,
                                         ignore_index=True,
                                         sort=True)

        # TODO: Fix this one, we get -6.897824 instead of -2.41 but it's because NLHT9.1.0.0.0 scale is wrong
        variable = 'ZVGDFA3.3.0.0.0'
        series_meta = self.get_meta(variable)
        series_3 = self.get_data(df, 'NLHT9.1.0.0.0')
        ovgd_1 = self.get_data(self.result, 'OVGD.1.0.0.0')
        series_data = pd.np.log(
            ovgd_1 / (pow(series_3 * 1000, 0.65) * pow(new_data, 0.35)))
        series = pd.Series(series_meta)
        series = series.append(series_data[YEARS].copy())
        self.result = self.result.append(series, ignore_index=True, sort=True)

        self.result.set_index(['Country Ameco', 'Variable Code'],
                              drop=True,
                              inplace=True)
        self.apply_scale()
        export_to_excel(self.result, step=8, country=self.country)
        return self.result
Beispiel #11
0
    def perform_computation(self, df, ameco_h_df):
        splicer = Splicer()

        # Imports and exports of goods and services at current prices (National accounts)
        variables = ['UMGS', 'UXGS', 'UMGS.1.0.0.0', 'UXGS.1.0.0.0']
        goods = ['UMGN', 'UXGN', 'UMGN.1.0.0.0', 'UXGN.1.0.0.0']
        services = ['UMSN', 'UXSN', 'UMSN.1.0.0.0', 'UXSN.1.0.0.0']
        country = 'BE'
        for index, variable in enumerate(variables):
            series_meta = self.get_meta(variable)
            series_data = self.get_data(df, goods[index]) + self.get_data(
                df, services[index])
            series = pd.Series(series_meta)
            series = series.append(series_data)
            self.result = self.result.append(series,
                                             ignore_index=True,
                                             sort=True)

        # Gross fixed capital formation at current prices: general government
        variables = ['UIGG', 'UIGG.1.0.0.0']
        grossfcf = ['UIGG0', 'UIGG0.1.0.0.0']
        country = 'BE'
        for index, variable in enumerate(variables):
            series_meta = self.get_meta(variable)
            series_data = self.get_data(df, grossfcf[index])
            series = pd.Series(series_meta)
            series = series.append(series_data)
            self.result = self.result.append(series,
                                             ignore_index=True,
                                             sort=True)

        # Net exports of goods, services, and goods & services at current prices (National accounts)
        # TODO: Check that the 4th variable is correct in exports_goods_and_services and imports_goods_and_services
        variables = [
            'UBGN', 'UBSN', 'UBGS', 'UBGN.1.0.0.0', 'UBSN.1.0.0.0',
            'UBGS.1.0.0.0', 'UIGP', 'UIGNR', 'UIGP.1.0.0.0', 'UIGNR.1.0.0.0'
        ]
        exports_goods_and_services = [
            'UXGN', 'UXSN', 'UXGS', 'UXGN', 'UXSN.1.0.0.0', 'UXGS.1.0.0.0',
            'UIGT', 'UIGCO', 'UIGT.1.0.0.0', 'UIGCO.1.0.0.0'
        ]
        imports_goods_and_services = [
            'UMGN', 'UMSN', 'UMGS', 'UMGN', 'UMSN.1.0.0.0', 'UMGS.1.0.0.0',
            'UIGG', 'UIGDW', 'UIGG.1.0.0.0', 'UIGDW.1.0.0.0'
        ]
        country = 'BE'
        for index, variable in enumerate(variables):
            series_meta = self.get_meta(variable)
            exports_data = self.get_data(df, exports_goods_and_services[index])
            imports_data = self.get_data(df, imports_goods_and_services[index])
            if not isinstance(exports_data.name, type(imports_data.name)):
                imports_data.name = None
            series_data = exports_data - imports_data
            series = pd.Series(series_meta)
            series = series.append(series_data)
            self.result = self.result.append(series,
                                             ignore_index=True,
                                             sort=True)

        # Domestic demand excluding stocks at current prices
        variables = ['UUNF', 'UUNF.1.0.0.0']
        private_consumption = ['UCPH', 'UCPH.1.0.0.0']
        government = ['UCTG', 'UCTG.1.0.0.0']
        total = ['UIGT', 'UIGT.1.0.0.0']
        country = 'BE'
        for index, variable in enumerate(variables):
            series_meta = self.get_meta(variable)
            series_data = self.get_data(
                df, private_consumption[index]) + self.get_data(
                    df, total[index]) + self.get_data(df, government[index])
            series = pd.Series(series_meta)
            series = series.append(series_data)
            self.result = self.result.append(series,
                                             ignore_index=True,
                                             sort=True)

        # Domestic demand including stocks at current prices
        variables = ['UUNT', 'UUNT.1.0.0.0']
        private_consumption = ['UCPH', 'UCPH.1.0.0.0']
        government = ['UCTG', 'UCTG.1.0.0.0']
        total = ['UIGT', 'UIGT.1.0.0.0']
        changes = ['UIST', 'UIST.1.0.0.0']
        country = 'BE'
        for index, variable in enumerate(variables):
            series_meta = self.get_meta(variable)
            series_data = self.get_data(
                df, private_consumption[index]) + self.get_data(
                    df, total[index]) + self.get_data(
                        df, government[index]) + self.get_data(
                            df, changes[index])
            series = pd.Series(series_meta)
            series = series.append(series_data)
            self.result = self.result.append(series,
                                             ignore_index=True,
                                             sort=True)

        # Final demand at current prices
        variables = ['UUTT', 'UUTT.1.0.0.0']
        private_consumption = ['UCPH', 'UCPH.1.0.0.0']
        government = ['UCTG', 'UCTG.1.0.0.0']
        total = ['UIGT', 'UIGT.1.0.0.0']
        changes = ['UIST', 'UIST.1.0.0.0']
        export_goods = ['UXGN', 'UXGN.1.0.0.0']
        export_services = ['UXSN', 'UXSN.1.0.0.0']
        country = 'BE'
        for index, variable in enumerate(variables):
            series_meta = self.get_meta(variable)
            series_data = self.get_data(
                df, private_consumption[index]) + self.get_data(
                    df, total[index]) + self.get_data(
                        df, government[index]) + self.get_data(
                            df, changes[index]) + self.get_data(
                                df, export_goods[index]) + self.get_data(
                                    df, export_services[index])
            series = pd.Series(series_meta)
            series = series.append(series_data)
            self.result = self.result.append(series,
                                             ignore_index=True,
                                             sort=True)

        # Gross capital formation at current prices: total economy
        variables = ['UITT', 'UITT.1.0.0.0']
        total = ['UIGT', 'UIGT.1.0.0.0']
        changes = ['UIST', 'UIST.1.0.0.0']
        country = 'BE'
        for index, variable in enumerate(variables):
            series_meta = self.get_meta(variable)
            series_data = self.get_data(df, total[index]) + self.get_data(
                df, changes[index])
            series = pd.Series(series_meta)
            series = series.append(series_data)
            self.result = self.result.append(series,
                                             ignore_index=True,
                                             sort=True)

        self.result.set_index(['Country Ameco', 'Variable Code'],
                              drop=True,
                              inplace=True)
        self.apply_scale()
        export_to_excel(self.result, step=3, country=self.country)
        return self.result
Beispiel #12
0
    def perform_computation(self, df, ameco_df):
        splicer = Splicer()
        # Total labour force (unemployed + employed)
        variable = 'NLTN.1.0.0.0'
        unemployed = 'NUTN.1.0.0.0'
        employed = 'NETN.1.0.0.0'
        base_series = self.get_data(ameco_df, variable)
        splice_series = self.get_data(df, unemployed) + self.get_data(
            df, employed)
        NLTN1000_meta = self.get_meta(variable)
        NLTN1000_data = splicer.ratio_splice(base_series,
                                             splice_series,
                                             kind='forward')
        NLTN1000 = pd.Series(NLTN1000_meta)
        NLTN1000 = NLTN1000.append(NLTN1000_data)
        self.result = self.result.append(NLTN1000, ignore_index=True)

        # Self employed (employed - wage and salary earners)
        variable = 'NSTD.1.0.0.0'
        employed = 'NETN.1.0.0.0'
        salary_earners = 'NWTD.1.0.0.0'
        base_series = None
        try:
            base_series = self.get_data(ameco_df, variable)
        except KeyError:
            logger.warning(
                'Missing Ameco data for variable {} (population). Using data '
                'from country desk forecast'.format(variable))
        splice_series = self.get_data(df, employed) - self.get_data(
            df, salary_earners)
        NSTD1000_meta = self.get_meta(variable)
        NSTD1000_data = splicer.ratio_splice(base_series,
                                             splice_series,
                                             kind='forward',
                                             variable=variable)
        NSTD1000 = pd.Series(NSTD1000_meta)
        NSTD1000 = NSTD1000.append(NSTD1000_data)
        self.result = self.result.append(NSTD1000, ignore_index=True)

        # Percentage employed (total employed / population of working age (15-64)
        variable = 'NETD.1.0.414.0'
        employed = 'NETD.1.0.0.0'
        working_age = 'NPAN1.1.0.0.0'
        NETD104140_meta = self.get_meta(variable)
        NETD104140_data = self.get_data(df, employed) / self.get_data(
            df, working_age) * 100
        NETD104140 = pd.Series(NETD104140_meta)
        NETD104140 = NETD104140.append(NETD104140_data)
        self.result = self.result.append(NETD104140, ignore_index=True)

        # Civilian employment
        variable = 'NECN.1.0.0.0'
        employed = 'NETN'
        NECN1000_meta = self.get_meta(variable)
        NECN1000_data = splicer.ratio_splice(self.get_data(ameco_df, variable),
                                             self.get_data(df, employed),
                                             kind='forward')
        NECN1000 = pd.Series(NECN1000_meta)
        NECN1000 = NECN1000.append(NECN1000_data)
        self.result = self.result.append(NECN1000, ignore_index=True)

        # Total annual hours worked
        variable = 'NLHT.1.0.0.0'
        average_hours = 'NLHA.1.0.0.0'
        employed = 'NETD.1.0.0.0'
        total_hours_data = self.get_data(df, employed) * self.get_data(
            df, average_hours)
        NLHT1000_meta = self.get_meta(variable)
        NLHT1000_data = splicer.ratio_splice(self.get_data(ameco_df, variable),
                                             total_hours_data,
                                             kind='forward')
        NLHT1000 = pd.Series(NLHT1000_meta)
        NLHT1000 = NLHT1000.append(NLHT1000_data)
        self.result = self.result.append(NLHT1000, ignore_index=True)

        # Total annual hours worked; total economy. for internal use only
        variable = 'NLHT9.1.0.0.0'
        average_hours = 'NLHA.1.0.0.0'
        employed = 'NETD.1.0.0.0'
        total_hours_data = self.get_data(df, employed) * self.get_data(
            df, average_hours)
        NLHT91000_meta = self.get_meta(variable)
        NLHT91000_data = splicer.ratio_splice(self.get_data(
            ameco_df, variable),
                                              total_hours_data,
                                              kind='forward')
        NLHT91000 = pd.Series(NLHT91000_meta)
        NLHT91000 = NLHT91000.append(NLHT91000_data)
        self.result = self.result.append(NLHT91000, ignore_index=True)

        # Civilian labour force
        variable = 'NLCN.1.0.0.0'
        civilian_employment = 'NECN.1.0.0.0'
        unemployed = 'NUTN.1.0.0.0'
        NLCN1000_meta = self.get_meta(variable)
        try:
            base_series = self.get_data(ameco_df, variable)
        except KeyError:
            logger.warning(
                'Missing Ameco data for variable {} (population). Using data '
                'from country desk forecast'.format(variable))
        NLCN1000_data = splicer.ratio_splice(base_series,
                                             NECN1000_data +
                                             self.get_data(df, unemployed),
                                             kind='forward',
                                             variable=variable)
        NLCN1000 = pd.Series(NLCN1000_meta)
        NLCN1000 = NLCN1000.append(NLCN1000_data)
        self.result = self.result.append(NLCN1000, ignore_index=True)

        self.result.set_index(['Country Ameco', 'Variable Code'],
                              drop=True,
                              inplace=True)
        self.apply_scale()
        export_to_excel(self.result, step=2, country=self.country)
        return self.result