예제 #1
0
 def write_doc_file(self):
     """
     Write reform documentation to text file.
     """
     if len(self.policy_dicts) <= 1:
         doc = Calculator.reform_documentation(self.param_dict)
     else:
         doc = Calculator.reform_documentation(self.param_dict,
                                               self.policy_dicts[1:])
     doc_fname = self._output_filename.replace('.csv', '-doc.text')
     with open(doc_fname, 'w') as dfile:
         dfile.write(doc)
예제 #2
0
 def write_doc_file(self):
     """
     Write reform documentation to text file.
     """
     if len(self.policy_dicts) <= 1:
         doc = Calculator.reform_documentation(self.param_dict)
     else:
         doc = Calculator.reform_documentation(self.param_dict,
                                               self.policy_dicts[1:])
     doc_fname = self._output_filename.replace('.csv', '-doc.text')
     with open(doc_fname, 'w') as dfile:
         dfile.write(doc)
예제 #3
0
 def __init__(self,
              input_filename,
              reform,
              exact_calculations,
              emulate_taxsim_2441_logic,
              output_records):
     """
     SimpleTaxIO class constructor.
     """
     # pylint: disable=too-many-arguments
     # check that input_filename is a string
     if not isinstance(input_filename, str):
         msg = 'SimpleTaxIO.ctor input_filename is not a string'
         raise ValueError(msg)
     # construct output_filename and delete old output file if it exists
     # ... construct reform extension to output_filename
     if reform is None:
         ref = ''
         self._using_reform_file = True
     else:  # if reform is not None
         if isinstance(reform, str):
             if reform.endswith('.json'):
                 ref = '-{}'.format(reform[:-5])
             else:
                 ref = '-{}'.format(reform)
             self._using_reform_file = True
         elif isinstance(reform, dict):
             ref = ''
             self._using_reform_file = False
         else:
             msg = 'SimpleTaxIO.ctor reform is neither None, str, nor dict'
             raise ValueError(msg)
     # ... construct whole output_filename
     self._using_input_file = True
     self._output_filename = '{}.out-simtax{}'.format(input_filename, ref)
     if os.path.isfile(self._output_filename):
         os.remove(self._output_filename)  # pragma: no cover
     # check for existence of file named input_filename
     if not os.path.isfile(input_filename):
         msg = 'INPUT file named {} could not be found'
         raise ValueError(msg.format(input_filename))
     # read input file contents into self._input dictionary
     self._read_input(input_filename)
     self.policy = Policy()
     # implement reform if reform is specified
     if reform:
         if self._using_reform_file:
             param_dict = Calculator.read_json_param_objects(reform, None)
             r_pol = param_dict['policy']
         else:
             r_pol = reform
         self.policy.implement_reform(r_pol)
     # validate input variable values
     self._validate_input()
     self.calc = self._calc_object(exact_calculations,
                                   emulate_taxsim_2441_logic,
                                   output_records)
예제 #4
0
    def _calc_object(self, exact_calcs,
                     emulate_taxsim_2441_logic, output_records):
        """
        Create and return Calculator object to conduct the tax calculations.

        Parameters
        ----------
        exact_calcs: boolean

        emulate_taxsim_2441_logic: boolean

        output_records: boolean

        Returns
        -------
        calc: Calculator
        """
        # create all-zeros dictionary and then list of all-zero dictionaries
        Records.read_var_info()
        zero_dict = {}
        for varname in Records.USABLE_READ_VARS:
            zero_dict[varname] = 0
        dict_list = [zero_dict for _ in range(0, len(self._input))]
        # use dict_list to create a Pandas DataFrame and Records object
        recsdf = pd.DataFrame(dict_list, dtype='int64')
        recsdf['MARS'] = recsdf['MARS'].add(1)  # because MARS==0 is illegal
        recs = Records(data=recsdf, exact_calculations=exact_calcs,
                       gfactors=None, weights=None,
                       start_year=self.policy.start_year)
        assert recs.array_length == len(self._input)
        # specify input for each tax filing unit in Records object
        lnum = 0
        for idx in range(0, recs.array_length):
            lnum += 1
            SimpleTaxIO._specify_input(recs, idx, self._input[lnum],
                                       emulate_taxsim_2441_logic)
        # optionally write Records.USABLE_READ_VARS content to file
        if output_records:
            recdf = pd.DataFrame()  # pragma: no cover
            for varname in Records.USABLE_READ_VARS:  # pragma: no cover
                vardata = getattr(recs, varname)  # pragma: no cover
                recdf[varname] = vardata  # pragma: no cover
            recdf.to_csv(re.sub('out-simtax', 'records',  # pragma: no cover
                                self._output_filename),
                         float_format='%.2f', index=False)
        # create Calculator object containing all tax filing units
        return Calculator(policy=self.policy, records=recs, sync_years=False)
예제 #5
0
class TaxCalcIO():
    """
    Constructor for the Tax-Calculator Input-Output class.

    TaxCalcIO class constructor call must be followed by init() call.

    Parameters
    ----------
    input_data: string or Pandas DataFrame
        string is name of INPUT file that is CSV formatted containing
        variable names in the Records USABLE_READ_VARS set, or
        Pandas DataFrame is INPUT data containing variable names in
        the Records USABLE_READ_VARS set.  INPUT vsrisbles not in the
        Records USABLE_READ_VARS set can be present but are ignored.

    tax_year: integer
        calendar year for which taxes will be computed for INPUT.

    baseline: None or string
        None implies baseline policy is current-law policy, or
        string is name of optional BASELINE file that is a JSON
        reform file.

    reform: None or string
        None implies no policy reform (current-law policy), or
        string is name of optional REFORM file(s).

    assump: None or string
        None implies economic assumptions are standard assumptions,
        or string is name of optional ASSUMP file.

    outdir: None or string
        None implies output files written to current directory,
        or string is name of optional output directory

    Returns
    -------
    class instance: TaxCalcIO
    """

    # pylint: disable=too-many-instance-attributes

    def __init__(self,
                 input_data,
                 tax_year,
                 baseline,
                 reform,
                 assump,
                 outdir=None):
        # pylint: disable=too-many-arguments,too-many-locals
        # pylint: disable=too-many-branches,too-many-statements
        self.errmsg = ''
        # check name and existence of INPUT file
        inp = 'x'
        self.puf_input_data = False
        self.cps_input_data = False
        if isinstance(input_data, str):
            # remove any leading directory path from INPUT filename
            fname = os.path.basename(input_data)
            # check if fname ends with ".csv"
            if fname.endswith('.csv'):
                inp = '{}-{}'.format(fname[:-4], str(tax_year)[2:])
            else:
                msg = 'INPUT file name does not end in .csv'
                self.errmsg += 'ERROR: {}\n'.format(msg)
            # check existence of INPUT file
            self.puf_input_data = input_data.endswith('puf.csv')
            self.cps_input_data = input_data.endswith('cps.csv')
            if not self.cps_input_data and not os.path.isfile(input_data):
                msg = 'INPUT file could not be found'
                self.errmsg += 'ERROR: {}\n'.format(msg)
        elif isinstance(input_data, pd.DataFrame):
            inp = 'df-{}'.format(str(tax_year)[2:])
        else:
            msg = 'INPUT is neither string nor Pandas DataFrame'
            self.errmsg += 'ERROR: {}\n'.format(msg)
        # check name and existence of BASELINE file
        bas = '-x'
        if baseline is None:
            bas = '-#'
        elif isinstance(baseline, str):
            # remove any leading directory path from BASELINE filename
            fname = os.path.basename(baseline)
            # check if fname ends with ".json"
            if fname.endswith('.json'):
                bas = '-{}'.format(fname[:-5])
            else:
                msg = 'BASELINE file name does not end in .json'
                self.errmsg += 'ERROR: {}\n'.format(msg)
            # check existence of BASELINE file
            if not os.path.isfile(baseline):
                msg = 'BASELINE file could not be found'
                self.errmsg += 'ERROR: {}\n'.format(msg)
        else:
            msg = 'TaxCalcIO.ctor: baseline is neither None nor str'
            self.errmsg += 'ERROR: {}\n'.format(msg)
        # check name(s) and existence of REFORM file(s)
        ref = '-x'
        if reform is None:
            self.specified_reform = False
            ref = '-#'
        elif isinstance(reform, str):
            self.specified_reform = True
            # split any compound reform into list of simple reforms
            refnames = list()
            reforms = reform.split('+')
            for rfm in reforms:
                # remove any leading directory path from rfm filename
                fname = os.path.basename(rfm)
                # check if fname ends with ".json"
                if not fname.endswith('.json'):
                    msg = '{} does not end in .json'.format(fname)
                    self.errmsg += 'ERROR: REFORM file name {}\n'.format(msg)
                # check existence of REFORM file
                if not os.path.isfile(rfm):
                    msg = '{} could not be found'.format(rfm)
                    self.errmsg += 'ERROR: REFORM file {}\n'.format(msg)
                # add fname to list of refnames used in output file names
                refnames.append(fname)
            # create (possibly compound) reform name for output file names
            ref = '-'
            num_refnames = 0
            for refname in refnames:
                num_refnames += 1
                if num_refnames > 1:
                    ref += '+'
                ref += '{}'.format(refname[:-5])
        else:
            msg = 'TaxCalcIO.ctor: reform is neither None nor str'
            self.errmsg += 'ERROR: {}\n'.format(msg)
        # check name and existence of ASSUMP file
        asm = '-x'
        if assump is None:
            asm = '-#'
        elif isinstance(assump, str):
            # remove any leading directory path from ASSUMP filename
            fname = os.path.basename(assump)
            # check if fname ends with ".json"
            if fname.endswith('.json'):
                asm = '-{}'.format(fname[:-5])
            else:
                msg = 'ASSUMP file name does not end in .json'
                self.errmsg += 'ERROR: {}\n'.format(msg)
            # check existence of ASSUMP file
            if not os.path.isfile(assump):
                msg = 'ASSUMP file could not be found'
                self.errmsg += 'ERROR: {}\n'.format(msg)
        else:
            msg = 'TaxCalcIO.ctor: assump is neither None nor str'
            self.errmsg += 'ERROR: {}\n'.format(msg)
        # check name and existence of OUTDIR
        if outdir is None:
            valid_outdir = True
        elif isinstance(outdir, str):
            # check existence of OUTDIR
            if os.path.isdir(outdir):
                valid_outdir = True
            else:
                valid_outdir = False
                msg = 'OUTDIR could not be found'
                self.errmsg += 'ERROR: {}\n'.format(msg)
        else:
            valid_outdir = False
            msg = 'TaxCalcIO.ctor: outdir is neither None nor str'
            self.errmsg += 'ERROR: {}\n'.format(msg)
        # create OUTPUT file name and delete any existing output files
        output_filename = '{}{}{}{}.csv'.format(inp, bas, ref, asm)
        if outdir is None:
            self._output_filename = output_filename
            delete_old_files = True
        elif valid_outdir:
            self._output_filename = os.path.join(outdir, output_filename)
            delete_old_files = True
        else:
            delete_old_files = False
        if delete_old_files:
            delete_file(self._output_filename)
            delete_file(self._output_filename.replace('.csv', '.db'))
            delete_file(self._output_filename.replace('.csv', '-doc.text'))
            delete_file(self._output_filename.replace('.csv', '-tab.text'))
            delete_file(self._output_filename.replace('.csv', '-atr.html'))
            delete_file(self._output_filename.replace('.csv', '-mtr.html'))
            delete_file(self._output_filename.replace('.csv', '-pch.html'))
        # initialize variables whose values are set in init method
        self.calc = None
        self.calc_base = None
        self.param_dict = None
        self.policy_dicts = list()

    def init(self, input_data, tax_year, baseline, reform, assump,
             aging_input_data, exact_calculations):
        """
        TaxCalcIO class post-constructor method that completes initialization.

        Parameters
        ----------
        First five are same as the first five of the TaxCalcIO constructor:
            input_data, tax_year, baseline, reform, assump.

        aging_input_data: boolean
            whether or not to extrapolate Records data from data year to
            tax_year.

        exact_calculations: boolean
            specifies whether or not exact tax calculations are done without
            any smoothing of "stair-step" provisions in the tax law.
        """
        # pylint: disable=too-many-arguments,too-many-locals
        # pylint: disable=too-many-statements,too-many-branches
        self.errmsg = ''
        # get policy parameter dictionary from --baseline file
        basedict = Calculator.read_json_param_objects(baseline, None)
        # get assumption sub-dictionaries
        paramdict = Calculator.read_json_param_objects(None, assump)
        # get policy parameter dictionaries from --reform file(s)
        policydicts = list()
        if self.specified_reform:
            reforms = reform.split('+')
            for ref in reforms:
                pdict = Calculator.read_json_param_objects(ref, None)
                policydicts.append(pdict['policy'])
            paramdict['policy'] = policydicts[0]
        # remember parameters for reform documentation
        self.param_dict = paramdict
        self.policy_dicts = policydicts
        # create gdiff_baseline object
        gdiff_baseline = GrowDiff()
        try:
            gdiff_baseline.update_growdiff(paramdict['growdiff_baseline'])
        except paramtools.ValidationError as valerr_msg:
            self.errmsg += valerr_msg.__str__()
        # create GrowFactors base object that incorporates gdiff_baseline
        gfactors_base = GrowFactors()
        gdiff_baseline.apply_to(gfactors_base)
        # specify gdiff_response object
        gdiff_response = GrowDiff()
        try:
            gdiff_response.update_growdiff(paramdict['growdiff_response'])
        except paramtools.ValidationError as valerr_msg:
            self.errmsg += valerr_msg.__str__()
        # create GrowFactors ref object that has all gdiff objects applied
        gfactors_ref = GrowFactors()
        gdiff_baseline.apply_to(gfactors_ref)
        gdiff_response.apply_to(gfactors_ref)
        # create Policy objects:
        # ... the baseline Policy object
        base = Policy(gfactors=gfactors_base)
        try:
            base.implement_reform(basedict['policy'],
                                  print_warnings=True,
                                  raise_errors=False)
            self.errmsg += base.parameter_errors
        except paramtools.ValidationError as valerr_msg:
            self.errmsg += valerr_msg.__str__()
        # ... the reform Policy object
        if self.specified_reform:
            pol = Policy(gfactors=gfactors_ref)
            for poldict in policydicts:
                try:
                    pol.implement_reform(poldict,
                                         print_warnings=True,
                                         raise_errors=False)
                    self.errmsg += pol.parameter_errors
                except paramtools.ValidationError as valerr_msg:
                    self.errmsg += valerr_msg.__str__()
        else:
            pol = Policy(gfactors=gfactors_base)
        # create Consumption object
        con = Consumption()
        try:
            con.update_consumption(paramdict['consumption'])
        except paramtools.ValidationError as valerr_msg:
            self.errmsg += valerr_msg.__str__()
        # check for valid tax_year value
        if tax_year < pol.start_year:
            msg = 'tax_year {} less than policy.start_year {}'
            msg = msg.format(tax_year, pol.start_year)
            self.errmsg += 'ERROR: {}\n'.format(msg)
        if tax_year > pol.end_year:
            msg = 'tax_year {} greater than policy.end_year {}'
            msg = msg.format(tax_year, pol.end_year)
            self.errmsg += 'ERROR: {}\n'.format(msg)
        # any errors imply cannot proceed with calculations
        if self.errmsg:
            return
        # set policy to tax_year
        pol.set_year(tax_year)
        base.set_year(tax_year)
        # read input file contents into Records objects
        if aging_input_data:
            if self.cps_input_data:
                recs = Records.cps_constructor(
                    gfactors=gfactors_ref,
                    exact_calculations=exact_calculations)
                recs_base = Records.cps_constructor(
                    gfactors=gfactors_base,
                    exact_calculations=exact_calculations)
            else:  # if not cps_input_data but aging_input_data
                recs = Records(data=input_data,
                               gfactors=gfactors_ref,
                               exact_calculations=exact_calculations)
                recs_base = Records(data=input_data,
                                    gfactors=gfactors_base,
                                    exact_calculations=exact_calculations)
        else:  # input_data are raw data that are not being aged
            recs = Records(data=input_data,
                           start_year=tax_year,
                           gfactors=None,
                           weights=None,
                           adjust_ratios=None,
                           exact_calculations=exact_calculations)
            recs_base = copy.deepcopy(recs)
        if tax_year < recs.data_year:
            msg = 'tax_year {} less than records.data_year {}'
            msg = msg.format(tax_year, recs.data_year)
            self.errmsg += 'ERROR: {}\n'.format(msg)
        # create Calculator objects
        self.calc = Calculator(policy=pol,
                               records=recs,
                               verbose=True,
                               consumption=con,
                               sync_years=aging_input_data)
        self.calc_base = Calculator(policy=base,
                                    records=recs_base,
                                    verbose=False,
                                    consumption=con,
                                    sync_years=aging_input_data)

    def custom_dump_variables(self, tcdumpvars_str):
        """
        Return set of variable names extracted from tcdumpvars_str, which
        contains the contents of the tcdumpvars file in the current directory.
        Also, builds self.errmsg if any custom variables are not valid.
        """
        assert isinstance(tcdumpvars_str, str)
        self.errmsg = ''
        # change some common delimiter characters into spaces
        dump_vars_str = tcdumpvars_str.replace(',', ' ')
        dump_vars_str = dump_vars_str.replace(';', ' ')
        dump_vars_str = dump_vars_str.replace('|', ' ')
        # split dump_vars_str into a list of dump variables
        dump_vars_list = dump_vars_str.split()
        # check that all dump_vars_list items are valid
        recs_vinfo = Records(data=None)  # contains records VARINFO only
        valid_set = recs_vinfo.USABLE_READ_VARS | recs_vinfo.CALCULATED_VARS
        for var in dump_vars_list:
            if var not in valid_set:
                msg = 'invalid variable name in tcdumpvars file: {}'
                msg = msg.format(var)
                self.errmsg += 'ERROR: {}\n'.format(msg)
        # add essential variables even if not on custom list
        if 'RECID' not in dump_vars_list:
            dump_vars_list.append('RECID')
        if 'FLPDYR' not in dump_vars_list:
            dump_vars_list.append('FLPDYR')
        # convert list into a set and return
        return set(dump_vars_list)

    def tax_year(self):
        """
        Return calendar year for which TaxCalcIO calculations are being done.
        """
        return self.calc.current_year

    def output_filepath(self):
        """
        Return full path to output file named in TaxCalcIO constructor.
        """
        dirpath = os.path.abspath(os.path.dirname(__file__))
        return os.path.join(dirpath, self._output_filename)

    def analyze(self,
                writing_output_file=False,
                output_tables=False,
                output_graphs=False,
                dump_varset=None,
                output_dump=False,
                output_sqldb=False):
        """
        Conduct tax analysis.

        Parameters
        ----------
        writing_output_file: boolean
           whether or not to generate and write output file

        output_tables: boolean
           whether or not to generate and write distributional tables
           to a text file

        output_graphs: boolean
           whether or not to generate and write HTML graphs of average
           and marginal tax rates by income percentile

        dump_varset: set
           custom set of variables to include in dump and sqldb output;
           None implies include all variables in dump and sqldb output

        output_dump: boolean
           whether or not to replace standard output with all input and
           calculated variables using their Tax-Calculator names

        output_sqldb: boolean
           whether or not to write SQLite3 database with dump table
           containing same output as written by output_dump to a csv file

        Returns
        -------
        Nothing
        """
        # pylint: disable=too-many-arguments,too-many-branches,too-many-locals
        if self.puf_input_data and self.calc.reform_warnings:
            warn = 'PARAMETER VALUE WARNING(S):  {}\n{}{}'  # pragma: no cover
            print(  # pragma: no cover
                warn.format('(read documentation for each parameter)',
                            self.calc.reform_warnings,
                            'CONTINUING WITH CALCULATIONS...'))
        calc_base_calculated = False
        self.calc.calc_all()
        if output_dump or output_sqldb:
            # might need marginal tax rates
            (mtr_paytax, mtr_inctax,
             _) = self.calc.mtr(wrt_full_compensation=False,
                                calc_all_already_called=True)
        else:
            # definitely do not need marginal tax rates
            mtr_paytax = None
            mtr_inctax = None
        # extract output if writing_output_file
        if writing_output_file:
            self.write_output_file(output_dump, dump_varset, mtr_paytax,
                                   mtr_inctax)
            self.write_doc_file()
        # optionally write --sqldb output to SQLite3 database
        if output_sqldb:
            self.write_sqldb_file(dump_varset, mtr_paytax, mtr_inctax)
        # optionally write --tables output to text file
        if output_tables:
            if not calc_base_calculated:
                self.calc_base.calc_all()
                calc_base_calculated = True
            self.write_tables_file()
        # optionally write --graphs output to HTML files
        if output_graphs:
            if not calc_base_calculated:
                self.calc_base.calc_all()
                calc_base_calculated = True
            self.write_graph_files()

    def write_output_file(self, output_dump, dump_varset, mtr_paytax,
                          mtr_inctax):
        """
        Write output to CSV-formatted file.
        """
        if output_dump:
            outdf = self.dump_output(dump_varset, mtr_inctax, mtr_paytax)
            column_order = sorted(outdf.columns)
        else:
            outdf = self.minimal_output()
            column_order = outdf.columns
        assert len(outdf.index) == self.calc.array_len
        outdf.to_csv(self._output_filename,
                     columns=column_order,
                     index=False,
                     float_format='%.2f')
        del outdf
        gc.collect()

    def write_doc_file(self):
        """
        Write reform documentation to text file.
        """
        if len(self.policy_dicts) <= 1:
            doc = Calculator.reform_documentation(self.param_dict)
        else:
            doc = Calculator.reform_documentation(self.param_dict,
                                                  self.policy_dicts[1:])
        doc_fname = self._output_filename.replace('.csv', '-doc.text')
        with open(doc_fname, 'w') as dfile:
            dfile.write(doc)

    def write_sqldb_file(self, dump_varset, mtr_paytax, mtr_inctax):
        """
        Write dump output to SQLite3 database table dump.
        """
        outdf = self.dump_output(dump_varset, mtr_inctax, mtr_paytax)
        assert len(outdf.index) == self.calc.array_len
        db_fname = self._output_filename.replace('.csv', '.db')
        dbcon = sqlite3.connect(db_fname)
        outdf.to_sql('dump', dbcon, if_exists='replace', index=False)
        dbcon.close()
        del outdf
        gc.collect()

    def write_tables_file(self):
        """
        Write tables to text file.
        """
        # pylint: disable=too-many-locals
        tab_fname = self._output_filename.replace('.csv', '-tab.text')
        # skip tables if there are not some positive weights
        if self.calc_base.total_weight() <= 0.:
            with open(tab_fname, 'w') as tfile:
                msg = 'No tables because sum of weights is not positive\n'
                tfile.write(msg)
            return
        # create list of results for nontax variables
        # - weights don't change with reform
        # - expanded_income may change, so always use baseline expanded income
        nontax_vars = ['s006', 'expanded_income']
        nontax = [self.calc_base.array(var) for var in nontax_vars]
        # create list of results for tax variables from reform Calculator
        tax_vars = ['iitax', 'payrolltax', 'lumpsum_tax', 'combined']
        reform = [self.calc.array(var) for var in tax_vars]
        # create DataFrame with tax distribution under reform
        dist = nontax + reform  # using expanded_income under baseline policy
        all_vars = nontax_vars + tax_vars
        distdf = pd.DataFrame(data=np.column_stack(dist), columns=all_vars)
        # create DataFrame with tax differences (reform - baseline)
        base = [self.calc_base.array(var) for var in tax_vars]
        change = [(reform[idx] - base[idx]) for idx in range(0, len(tax_vars))]
        diff = nontax + change  # using expanded_income under baseline policy
        diffdf = pd.DataFrame(data=np.column_stack(diff), columns=all_vars)
        # write each kind of distributional table
        with open(tab_fname, 'w') as tfile:
            TaxCalcIO.write_decile_table(distdf, tfile, tkind='Reform Totals')
            tfile.write('\n')
            TaxCalcIO.write_decile_table(diffdf, tfile, tkind='Differences')
        # delete intermediate DataFrame objects
        del distdf
        del diffdf
        gc.collect()

    @staticmethod
    def write_decile_table(dfx, tfile, tkind='Totals'):
        """
        Write to tfile the tkind decile table using dfx DataFrame.
        """
        dfx = add_quantile_table_row_variable(dfx,
                                              'expanded_income',
                                              10,
                                              decile_details=False,
                                              pop_quantiles=False,
                                              weight_by_income_measure=False)
        gdfx = dfx.groupby('table_row', as_index=False)
        rtns_series = gdfx.apply(unweighted_sum, 's006')
        xinc_series = gdfx.apply(weighted_sum, 'expanded_income')
        itax_series = gdfx.apply(weighted_sum, 'iitax')
        ptax_series = gdfx.apply(weighted_sum, 'payrolltax')
        htax_series = gdfx.apply(weighted_sum, 'lumpsum_tax')
        ctax_series = gdfx.apply(weighted_sum, 'combined')
        # write decile table to text file
        row = 'Weighted Tax {} by Baseline Expanded-Income Decile\n'
        tfile.write(row.format(tkind))
        rowfmt = '{}{}{}{}{}{}\n'
        row = rowfmt.format('    Returns', '    ExpInc', '    IncTax',
                            '    PayTax', '     LSTax', '    AllTax')
        tfile.write(row)
        row = rowfmt.format('       (#m)', '      ($b)', '      ($b)',
                            '      ($b)', '      ($b)', '      ($b)')
        tfile.write(row)
        rowfmt = '{:9.2f}{:10.1f}{:10.1f}{:10.1f}{:10.1f}{:10.1f}\n'
        for decile in range(0, 10):
            row = '{:2d}'.format(decile)
            row += rowfmt.format(rtns_series[decile] * 1e-6,
                                 xinc_series[decile] * 1e-9,
                                 itax_series[decile] * 1e-9,
                                 ptax_series[decile] * 1e-9,
                                 htax_series[decile] * 1e-9,
                                 ctax_series[decile] * 1e-9)
            tfile.write(row)
        row = ' A'
        row += rowfmt.format(rtns_series.sum() * 1e-6,
                             xinc_series.sum() * 1e-9,
                             itax_series.sum() * 1e-9,
                             ptax_series.sum() * 1e-9,
                             htax_series.sum() * 1e-9,
                             ctax_series.sum() * 1e-9)
        tfile.write(row)
        del gdfx
        del rtns_series
        del xinc_series
        del itax_series
        del ptax_series
        del htax_series
        del ctax_series
        gc.collect()

    def write_graph_files(self):
        """
        Write graphs to HTML files.
        All graphs contain same number of filing units in each quantile.
        """
        pos_wght_sum = self.calc.total_weight() > 0.0
        fig = None
        # average-tax-rate graph
        atr_fname = self._output_filename.replace('.csv', '-atr.html')
        atr_title = 'ATR by Income Percentile'
        if pos_wght_sum:
            fig = self.calc_base.atr_graph(self.calc, pop_quantiles=False)
            write_graph_file(fig, atr_fname, atr_title)
        else:
            reason = 'No graph because sum of weights is not positive'
            TaxCalcIO.write_empty_graph_file(atr_fname, atr_title, reason)
        # marginal-tax-rate graph
        mtr_fname = self._output_filename.replace('.csv', '-mtr.html')
        mtr_title = 'MTR by Income Percentile'
        if pos_wght_sum:
            fig = self.calc_base.mtr_graph(
                self.calc,
                alt_e00200p_text='Taxpayer Earnings',
                pop_quantiles=False)
            write_graph_file(fig, mtr_fname, mtr_title)
        else:
            reason = 'No graph because sum of weights is not positive'
            TaxCalcIO.write_empty_graph_file(mtr_fname, mtr_title, reason)
        # percentage-aftertax-income-change graph
        pch_fname = self._output_filename.replace('.csv', '-pch.html')
        pch_title = 'PCH by Income Percentile'
        if pos_wght_sum:
            fig = self.calc_base.pch_graph(self.calc, pop_quantiles=False)
            write_graph_file(fig, pch_fname, pch_title)
        else:
            reason = 'No graph because sum of weights is not positive'
            TaxCalcIO.write_empty_graph_file(pch_fname, pch_title, reason)
        if fig:
            del fig
            gc.collect()

    @staticmethod
    def write_empty_graph_file(fname, title, reason):
        """
        Write HTML graph file with title but no graph for specified reason.
        """
        txt = ('<html>\n'
               '<head><title>{}</title></head>\n'
               '<body><center<h1>{}</h1></center></body>\n'
               '</html>\n').format(title, reason)
        with open(fname, 'w') as gfile:
            gfile.write(txt)

    def minimal_output(self):
        """
        Extract minimal output and return it as Pandas DataFrame.
        """
        varlist = ['RECID', 'YEAR', 'WEIGHT', 'INCTAX', 'LSTAX', 'PAYTAX']
        odict = dict()
        scalc = self.calc
        odict['RECID'] = scalc.array('RECID')  # id for tax filing unit
        odict['YEAR'] = self.tax_year()  # tax calculation year
        odict['WEIGHT'] = scalc.array('s006')  # sample weight
        odict['INCTAX'] = scalc.array('iitax')  # federal income taxes
        odict['LSTAX'] = scalc.array('lumpsum_tax')  # lump-sum tax
        odict['PAYTAX'] = scalc.array('payrolltax')  # payroll taxes (ee+er)
        odf = pd.DataFrame(data=odict, columns=varlist)
        return odf

    def dump_output(self, dump_varset, mtr_inctax, mtr_paytax):
        """
        Extract dump output and return it as Pandas DataFrame.
        """
        recs_vinfo = Records(data=None)  # contains only Records VARINFO
        if dump_varset is None:
            varset = recs_vinfo.USABLE_READ_VARS | recs_vinfo.CALCULATED_VARS
        else:
            varset = dump_varset
        # create and return dump output DataFrame
        odf = pd.DataFrame()
        for varname in varset:
            vardata = self.calc.array(varname)
            if varname in recs_vinfo.INTEGER_VARS:
                odf[varname] = vardata
            else:
                odf[varname] = vardata.round(2)  # rounded to nearest cent
        # specify mtr values in percentage terms
        if 'mtr_inctax' in varset:
            odf['mtr_inctax'] = (mtr_inctax * 100).round(2)
        if 'mtr_paytax' in varset:
            odf['mtr_paytax'] = (mtr_paytax * 100).round(2)
        # specify tax calculation year
        odf['FLPDYR'] = self.tax_year()
        return odf
예제 #6
0
    def init(self, input_data, tax_year, baseline, reform, assump,
             aging_input_data, exact_calculations):
        """
        TaxCalcIO class post-constructor method that completes initialization.

        Parameters
        ----------
        First five are same as the first five of the TaxCalcIO constructor:
            input_data, tax_year, baseline, reform, assump.

        aging_input_data: boolean
            whether or not to extrapolate Records data from data year to
            tax_year.

        exact_calculations: boolean
            specifies whether or not exact tax calculations are done without
            any smoothing of "stair-step" provisions in the tax law.
        """
        # pylint: disable=too-many-arguments,too-many-locals
        # pylint: disable=too-many-statements,too-many-branches
        self.errmsg = ''
        # get policy parameter dictionary from --baseline file
        basedict = Calculator.read_json_param_objects(baseline, None)
        # get assumption sub-dictionaries
        paramdict = Calculator.read_json_param_objects(None, assump)
        # get policy parameter dictionaries from --reform file(s)
        policydicts = list()
        if self.specified_reform:
            reforms = reform.split('+')
            for ref in reforms:
                pdict = Calculator.read_json_param_objects(ref, None)
                policydicts.append(pdict['policy'])
            paramdict['policy'] = policydicts[0]
        # remember parameters for reform documentation
        self.param_dict = paramdict
        self.policy_dicts = policydicts
        # create gdiff_baseline object
        gdiff_baseline = GrowDiff()
        try:
            gdiff_baseline.update_growdiff(paramdict['growdiff_baseline'])
        except paramtools.ValidationError as valerr_msg:
            self.errmsg += valerr_msg.__str__()
        # create GrowFactors base object that incorporates gdiff_baseline
        gfactors_base = GrowFactors()
        gdiff_baseline.apply_to(gfactors_base)
        # specify gdiff_response object
        gdiff_response = GrowDiff()
        try:
            gdiff_response.update_growdiff(paramdict['growdiff_response'])
        except paramtools.ValidationError as valerr_msg:
            self.errmsg += valerr_msg.__str__()
        # create GrowFactors ref object that has all gdiff objects applied
        gfactors_ref = GrowFactors()
        gdiff_baseline.apply_to(gfactors_ref)
        gdiff_response.apply_to(gfactors_ref)
        # create Policy objects:
        # ... the baseline Policy object
        base = Policy(gfactors=gfactors_base)
        try:
            base.implement_reform(basedict['policy'],
                                  print_warnings=True,
                                  raise_errors=False)
            self.errmsg += base.parameter_errors
        except paramtools.ValidationError as valerr_msg:
            self.errmsg += valerr_msg.__str__()
        # ... the reform Policy object
        if self.specified_reform:
            pol = Policy(gfactors=gfactors_ref)
            for poldict in policydicts:
                try:
                    pol.implement_reform(poldict,
                                         print_warnings=True,
                                         raise_errors=False)
                    self.errmsg += pol.parameter_errors
                except paramtools.ValidationError as valerr_msg:
                    self.errmsg += valerr_msg.__str__()
        else:
            pol = Policy(gfactors=gfactors_base)
        # create Consumption object
        con = Consumption()
        try:
            con.update_consumption(paramdict['consumption'])
        except paramtools.ValidationError as valerr_msg:
            self.errmsg += valerr_msg.__str__()
        # check for valid tax_year value
        if tax_year < pol.start_year:
            msg = 'tax_year {} less than policy.start_year {}'
            msg = msg.format(tax_year, pol.start_year)
            self.errmsg += 'ERROR: {}\n'.format(msg)
        if tax_year > pol.end_year:
            msg = 'tax_year {} greater than policy.end_year {}'
            msg = msg.format(tax_year, pol.end_year)
            self.errmsg += 'ERROR: {}\n'.format(msg)
        # any errors imply cannot proceed with calculations
        if self.errmsg:
            return
        # set policy to tax_year
        pol.set_year(tax_year)
        base.set_year(tax_year)
        # read input file contents into Records objects
        if aging_input_data:
            if self.cps_input_data:
                recs = Records.cps_constructor(
                    gfactors=gfactors_ref,
                    exact_calculations=exact_calculations)
                recs_base = Records.cps_constructor(
                    gfactors=gfactors_base,
                    exact_calculations=exact_calculations)
            else:  # if not cps_input_data but aging_input_data
                recs = Records(data=input_data,
                               gfactors=gfactors_ref,
                               exact_calculations=exact_calculations)
                recs_base = Records(data=input_data,
                                    gfactors=gfactors_base,
                                    exact_calculations=exact_calculations)
        else:  # input_data are raw data that are not being aged
            recs = Records(data=input_data,
                           start_year=tax_year,
                           gfactors=None,
                           weights=None,
                           adjust_ratios=None,
                           exact_calculations=exact_calculations)
            recs_base = copy.deepcopy(recs)
        if tax_year < recs.data_year:
            msg = 'tax_year {} less than records.data_year {}'
            msg = msg.format(tax_year, recs.data_year)
            self.errmsg += 'ERROR: {}\n'.format(msg)
        # create Calculator objects
        self.calc = Calculator(policy=pol,
                               records=recs,
                               verbose=True,
                               consumption=con,
                               sync_years=aging_input_data)
        self.calc_base = Calculator(policy=base,
                                    records=recs_base,
                                    verbose=False,
                                    consumption=con,
                                    sync_years=aging_input_data)
예제 #7
0
class TaxCalcIO():
    """
    Constructor for the Tax-Calculator Input-Output class.

    TaxCalcIO class constructor call must be followed by init() call.

    Parameters
    ----------
    input_data: string or Pandas DataFrame
        string is name of INPUT file that is CSV formatted containing
        variable names in the Records.USABLE_READ_VARS set, or
        Pandas DataFrame is INPUT data containing variable names in
        the Records.USABLE_READ_VARS set.  INPUT vsrisbles not in the
        Records.USABLE_READ_VARS set can be present but are ignored.

    tax_year: integer
        calendar year for which taxes will be computed for INPUT.

    baseline: None or string
        None implies baseline policy is current-law policy, or
        string is name of optional BASELINE file that is a JSON
        reform file.

    reform: None or string
        None implies no policy reform (current-law policy), or
        string is name of optional REFORM file(s).

    assump: None or string
        None implies economic assumptions are standard assumptions,
        or string is name of optional ASSUMP file.

    outdir: None or string
        None implies output files written to current directory,
        or string is name of optional output directory

    Returns
    -------
    class instance: TaxCalcIO
    """
    # pylint: disable=too-many-instance-attributes

    def __init__(self, input_data, tax_year, baseline, reform, assump,
                 outdir=None):
        # pylint: disable=too-many-arguments,too-many-locals
        # pylint: disable=too-many-branches,too-many-statements
        self.errmsg = ''
        # check name and existence of INPUT file
        inp = 'x'
        self.puf_input_data = False
        self.cps_input_data = False
        if isinstance(input_data, str):
            # remove any leading directory path from INPUT filename
            fname = os.path.basename(input_data)
            # check if fname ends with ".csv"
            if fname.endswith('.csv'):
                inp = '{}-{}'.format(fname[:-4], str(tax_year)[2:])
            else:
                msg = 'INPUT file name does not end in .csv'
                self.errmsg += 'ERROR: {}\n'.format(msg)
            # check existence of INPUT file
            self.puf_input_data = input_data.endswith('puf.csv')
            self.cps_input_data = input_data.endswith('cps.csv')
            if not self.cps_input_data and not os.path.isfile(input_data):
                msg = 'INPUT file could not be found'
                self.errmsg += 'ERROR: {}\n'.format(msg)
        elif isinstance(input_data, pd.DataFrame):
            inp = 'df-{}'.format(str(tax_year)[2:])
        else:
            msg = 'INPUT is neither string nor Pandas DataFrame'
            self.errmsg += 'ERROR: {}\n'.format(msg)
        # check name and existence of BASELINE file
        bas = '-x'
        if baseline is None:
            bas = '-#'
        elif isinstance(baseline, str):
            # remove any leading directory path from BASELINE filename
            fname = os.path.basename(baseline)
            # check if fname ends with ".json"
            if fname.endswith('.json'):
                bas = '-{}'.format(fname[:-5])
            else:
                msg = 'BASELINE file name does not end in .json'
                self.errmsg += 'ERROR: {}\n'.format(msg)
            # check existence of BASELINE file
            if not os.path.isfile(baseline):
                msg = 'BASELINE file could not be found'
                self.errmsg += 'ERROR: {}\n'.format(msg)
        else:
            msg = 'TaxCalcIO.ctor: baseline is neither None nor str'
            self.errmsg += 'ERROR: {}\n'.format(msg)
        # check name(s) and existence of REFORM file(s)
        ref = '-x'
        if reform is None:
            self.specified_reform = False
            ref = '-#'
        elif isinstance(reform, str):
            self.specified_reform = True
            # split any compound reform into list of simple reforms
            refnames = list()
            reforms = reform.split('+')
            for rfm in reforms:
                # remove any leading directory path from rfm filename
                fname = os.path.basename(rfm)
                # check if fname ends with ".json"
                if not fname.endswith('.json'):
                    msg = '{} does not end in .json'.format(fname)
                    self.errmsg += 'ERROR: REFORM file name {}\n'.format(msg)
                # check existence of REFORM file
                if not os.path.isfile(rfm):
                    msg = '{} could not be found'.format(rfm)
                    self.errmsg += 'ERROR: REFORM file {}\n'.format(msg)
                # add fname to list of refnames used in output file names
                refnames.append(fname)
            # create (possibly compound) reform name for output file names
            ref = '-'
            num_refnames = 0
            for refname in refnames:
                num_refnames += 1
                if num_refnames > 1:
                    ref += '+'
                ref += '{}'.format(refname[:-5])
        else:
            msg = 'TaxCalcIO.ctor: reform is neither None nor str'
            self.errmsg += 'ERROR: {}\n'.format(msg)
        # check name and existence of ASSUMP file
        asm = '-x'
        if assump is None:
            asm = '-#'
        elif isinstance(assump, str):
            # remove any leading directory path from ASSUMP filename
            fname = os.path.basename(assump)
            # check if fname ends with ".json"
            if fname.endswith('.json'):
                asm = '-{}'.format(fname[:-5])
            else:
                msg = 'ASSUMP file name does not end in .json'
                self.errmsg += 'ERROR: {}\n'.format(msg)
            # check existence of ASSUMP file
            if not os.path.isfile(assump):
                msg = 'ASSUMP file could not be found'
                self.errmsg += 'ERROR: {}\n'.format(msg)
        else:
            msg = 'TaxCalcIO.ctor: assump is neither None nor str'
            self.errmsg += 'ERROR: {}\n'.format(msg)
        # check name and existence of OUTDIR
        if outdir is None:
            valid_outdir = True
        elif isinstance(outdir, str):
            # check existence of OUTDIR
            if os.path.isdir(outdir):
                valid_outdir = True
            else:
                valid_outdir = False
                msg = 'OUTDIR could not be found'
                self.errmsg += 'ERROR: {}\n'.format(msg)
        else:
            valid_outdir = False
            msg = 'TaxCalcIO.ctor: outdir is neither None nor str'
            self.errmsg += 'ERROR: {}\n'.format(msg)
        # create OUTPUT file name and delete any existing output files
        output_filename = '{}{}{}{}.csv'.format(inp, bas, ref, asm)
        if outdir is None:
            self._output_filename = output_filename
            delete_old_files = True
        elif valid_outdir:
            self._output_filename = os.path.join(outdir, output_filename)
            delete_old_files = True
        else:
            delete_old_files = False
        if delete_old_files:
            delete_file(self._output_filename)
            delete_file(self._output_filename.replace('.csv', '.db'))
            delete_file(self._output_filename.replace('.csv', '-doc.text'))
            delete_file(self._output_filename.replace('.csv', '-tab.text'))
            delete_file(self._output_filename.replace('.csv', '-atr.html'))
            delete_file(self._output_filename.replace('.csv', '-mtr.html'))
            delete_file(self._output_filename.replace('.csv', '-pch.html'))
        # initialize variables whose values are set in init method
        self.calc = None
        self.calc_base = None
        self.param_dict = None
        self.policy_dicts = list()

    def init(self, input_data, tax_year, baseline, reform, assump,
             aging_input_data, exact_calculations):
        """
        TaxCalcIO class post-constructor method that completes initialization.

        Parameters
        ----------
        First five are same as the first five of the TaxCalcIO constructor:
            input_data, tax_year, baseline, reform, assump.

        aging_input_data: boolean
            whether or not to extrapolate Records data from data year to
            tax_year.

        exact_calculations: boolean
            specifies whether or not exact tax calculations are done without
            any smoothing of "stair-step" provisions in the tax law.
        """
        # pylint: disable=too-many-arguments,too-many-locals
        # pylint: disable=too-many-statements,too-many-branches
        self.errmsg = ''
        # get policy parameter dictionary from --baseline file
        basedict = Calculator.read_json_param_objects(baseline, None)
        # get assumption sub-dictionaries
        paramdict = Calculator.read_json_param_objects(None, assump)
        # get policy parameter dictionaries from --reform file(s)
        policydicts = list()
        if self.specified_reform:
            reforms = reform.split('+')
            for ref in reforms:
                pdict = Calculator.read_json_param_objects(ref, None)
                policydicts.append(pdict['policy'])
            paramdict['policy'] = policydicts[0]
        # remember parameters for reform documentation
        self.param_dict = paramdict
        self.policy_dicts = policydicts
        # create gdiff_baseline object
        gdiff_baseline = GrowDiff()
        try:
            gdiff_baseline.update_growdiff(paramdict['growdiff_baseline'])
        except ValueError as valerr_msg:
            self.errmsg += valerr_msg.__str__()
        # create GrowFactors base object that incorporates gdiff_baseline
        gfactors_base = GrowFactors()
        gdiff_baseline.apply_to(gfactors_base)
        # specify gdiff_response object
        gdiff_response = GrowDiff()
        try:
            gdiff_response.update_growdiff(paramdict['growdiff_response'])
        except ValueError as valerr_msg:
            self.errmsg += valerr_msg.__str__()
        # create GrowFactors ref object that has all gdiff objects applied
        gfactors_ref = GrowFactors()
        gdiff_baseline.apply_to(gfactors_ref)
        gdiff_response.apply_to(gfactors_ref)
        # create Policy objects:
        # ... the baseline Policy object
        base = Policy(gfactors=gfactors_base)
        try:
            base.implement_reform(basedict['policy'],
                                  print_warnings=False,
                                  raise_errors=False)
            self.errmsg += base.parameter_errors
        except ValueError as valerr_msg:
            self.errmsg += valerr_msg.__str__()
        # ... the reform Policy object
        if self.specified_reform:
            pol = Policy(gfactors=gfactors_ref)
            for poldict in policydicts:
                try:
                    pol.implement_reform(poldict,
                                         print_warnings=False,
                                         raise_errors=False)
                    self.errmsg += pol.parameter_errors
                except ValueError as valerr_msg:
                    self.errmsg += valerr_msg.__str__()
        else:
            pol = Policy(gfactors=gfactors_base)
        # create Consumption object
        con = Consumption()
        try:
            con.update_consumption(paramdict['consumption'])
        except ValueError as valerr_msg:
            self.errmsg += valerr_msg.__str__()
        # check for valid tax_year value
        if tax_year < pol.start_year:
            msg = 'tax_year {} less than policy.start_year {}'
            msg = msg.format(tax_year, pol.start_year)
            self.errmsg += 'ERROR: {}\n'.format(msg)
        if tax_year > pol.end_year:
            msg = 'tax_year {} greater than policy.end_year {}'
            msg = msg.format(tax_year, pol.end_year)
            self.errmsg += 'ERROR: {}\n'.format(msg)
        # any errors imply cannot proceed with calculations
        if self.errmsg:
            return
        # set policy to tax_year
        pol.set_year(tax_year)
        base.set_year(tax_year)
        # read input file contents into Records objects
        if aging_input_data:
            if self.cps_input_data:
                recs = Records.cps_constructor(
                    gfactors=gfactors_ref,
                    exact_calculations=exact_calculations
                )
                recs_base = Records.cps_constructor(
                    gfactors=gfactors_base,
                    exact_calculations=exact_calculations
                )
            else:  # if not cps_input_data but aging_input_data
                recs = Records(
                    data=input_data,
                    gfactors=gfactors_ref,
                    exact_calculations=exact_calculations
                )
                recs_base = Records(
                    data=input_data,
                    gfactors=gfactors_base,
                    exact_calculations=exact_calculations
                )
        else:  # input_data are raw data that are not being aged
            recs = Records(data=input_data,
                           gfactors=None,
                           exact_calculations=exact_calculations,
                           weights=None,
                           adjust_ratios=None,
                           start_year=tax_year)
            recs_base = copy.deepcopy(recs)
        if tax_year < recs.data_year:
            msg = 'tax_year {} less than records.data_year {}'
            msg = msg.format(tax_year, recs.data_year)
            self.errmsg += 'ERROR: {}\n'.format(msg)
        # create Calculator objects
        self.calc = Calculator(policy=pol, records=recs,
                               verbose=True,
                               consumption=con,
                               sync_years=aging_input_data)
        self.calc_base = Calculator(policy=base, records=recs_base,
                                    verbose=False,
                                    consumption=con,
                                    sync_years=aging_input_data)

    def custom_dump_variables(self, tcdumpvars_str):
        """
        Return set of variable names extracted from tcdumpvars_str, which
        contains the contents of the tcdumpvars file in the current directory.
        Also, builds self.errmsg if any custom variables are not valid.
        """
        assert isinstance(tcdumpvars_str, str)
        self.errmsg = ''
        # change some common delimiter characters into spaces
        dump_vars_str = tcdumpvars_str.replace(',', ' ')
        dump_vars_str = dump_vars_str.replace(';', ' ')
        dump_vars_str = dump_vars_str.replace('|', ' ')
        # split dump_vars_str into a list of dump variables
        dump_vars_list = dump_vars_str.split()
        # check that all dump_vars_list items are valid
        valid_set = Records.USABLE_READ_VARS | Records.CALCULATED_VARS
        for var in dump_vars_list:
            if var not in valid_set:
                msg = 'invalid variable name in tcdumpvars file: {}'
                msg = msg.format(var)
                self.errmsg += 'ERROR: {}\n'.format(msg)
        # add essential variables even if not on custom list
        if 'RECID' not in dump_vars_list:
            dump_vars_list.append('RECID')
        if 'FLPDYR' not in dump_vars_list:
            dump_vars_list.append('FLPDYR')
        # convert list into a set and return
        return set(dump_vars_list)

    def tax_year(self):
        """
        Return calendar year for which TaxCalcIO calculations are being done.
        """
        return self.calc.current_year

    def output_filepath(self):
        """
        Return full path to output file named in TaxCalcIO constructor.
        """
        dirpath = os.path.abspath(os.path.dirname(__file__))
        return os.path.join(dirpath, self._output_filename)

    def analyze(self, writing_output_file=False,
                output_tables=False,
                output_graphs=False,
                dump_varset=None,
                output_dump=False,
                output_sqldb=False):
        """
        Conduct tax analysis.

        Parameters
        ----------
        writing_output_file: boolean
           whether or not to generate and write output file

        output_tables: boolean
           whether or not to generate and write distributional tables
           to a text file

        output_graphs: boolean
           whether or not to generate and write HTML graphs of average
           and marginal tax rates by income percentile

        dump_varset: set
           custom set of variables to include in dump and sqldb output;
           None implies include all variables in dump and sqldb output

        output_dump: boolean
           whether or not to replace standard output with all input and
           calculated variables using their Tax-Calculator names

        output_sqldb: boolean
           whether or not to write SQLite3 database with dump table
           containing same output as written by output_dump to a csv file

        Returns
        -------
        Nothing
        """
        # pylint: disable=too-many-arguments,too-many-branches,too-many-locals
        if self.puf_input_data and self.calc.reform_warnings:
            warn = 'PARAMETER VALUE WARNING(S):  {}\n{}{}'  # pragma: no cover
            print(  # pragma: no cover
                warn.format('(read documentation for each parameter)',
                            self.calc.reform_warnings,
                            'CONTINUING WITH CALCULATIONS...')
            )
        calc_base_calculated = False
        self.calc.calc_all()
        if output_dump or output_sqldb:
            # might need marginal tax rates
            (mtr_paytax, mtr_inctax,
             _) = self.calc.mtr(wrt_full_compensation=False,
                                calc_all_already_called=True)
        else:
            # definitely do not need marginal tax rates
            mtr_paytax = None
            mtr_inctax = None
        # extract output if writing_output_file
        if writing_output_file:
            self.write_output_file(output_dump, dump_varset,
                                   mtr_paytax, mtr_inctax)
            self.write_doc_file()
        # optionally write --sqldb output to SQLite3 database
        if output_sqldb:
            self.write_sqldb_file(dump_varset, mtr_paytax, mtr_inctax)
        # optionally write --tables output to text file
        if output_tables:
            if not calc_base_calculated:
                self.calc_base.calc_all()
                calc_base_calculated = True
            self.write_tables_file()
        # optionally write --graphs output to HTML files
        if output_graphs:
            if not calc_base_calculated:
                self.calc_base.calc_all()
                calc_base_calculated = True
            self.write_graph_files()

    def write_output_file(self, output_dump, dump_varset,
                          mtr_paytax, mtr_inctax):
        """
        Write output to CSV-formatted file.
        """
        if output_dump:
            outdf = self.dump_output(dump_varset, mtr_inctax, mtr_paytax)
            column_order = sorted(outdf.columns)
        else:
            outdf = self.minimal_output()
            column_order = outdf.columns
        assert len(outdf.index) == self.calc.array_len
        outdf.to_csv(self._output_filename, columns=column_order,
                     index=False, float_format='%.2f')
        del outdf
        gc.collect()

    def write_doc_file(self):
        """
        Write reform documentation to text file.
        """
        if len(self.policy_dicts) <= 1:
            doc = Calculator.reform_documentation(self.param_dict)
        else:
            doc = Calculator.reform_documentation(self.param_dict,
                                                  self.policy_dicts[1:])
        doc_fname = self._output_filename.replace('.csv', '-doc.text')
        with open(doc_fname, 'w') as dfile:
            dfile.write(doc)

    def write_sqldb_file(self, dump_varset, mtr_paytax, mtr_inctax):
        """
        Write dump output to SQLite3 database table dump.
        """
        outdf = self.dump_output(dump_varset, mtr_inctax, mtr_paytax)
        assert len(outdf.index) == self.calc.array_len
        db_fname = self._output_filename.replace('.csv', '.db')
        dbcon = sqlite3.connect(db_fname)
        outdf.to_sql('dump', dbcon, if_exists='replace', index=False)
        dbcon.close()
        del outdf
        gc.collect()

    def write_tables_file(self):
        """
        Write tables to text file.
        """
        # pylint: disable=too-many-locals
        tab_fname = self._output_filename.replace('.csv', '-tab.text')
        # skip tables if there are not some positive weights
        if self.calc_base.total_weight() <= 0.:
            with open(tab_fname, 'w') as tfile:
                msg = 'No tables because sum of weights is not positive\n'
                tfile.write(msg)
            return
        # create list of results for nontax variables
        # - weights don't change with reform
        # - expanded_income may change, so always use baseline expanded income
        nontax_vars = ['s006', 'expanded_income']
        nontax = [self.calc_base.array(var) for var in nontax_vars]
        # create list of results for tax variables from reform Calculator
        tax_vars = ['iitax', 'payrolltax', 'lumpsum_tax', 'combined']
        reform = [self.calc.array(var) for var in tax_vars]
        # create DataFrame with tax distribution under reform
        dist = nontax + reform  # using expanded_income under baseline policy
        all_vars = nontax_vars + tax_vars
        distdf = pd.DataFrame(data=np.column_stack(dist), columns=all_vars)
        # create DataFrame with tax differences (reform - baseline)
        base = [self.calc_base.array(var) for var in tax_vars]
        change = [(reform[idx] - base[idx]) for idx in range(0, len(tax_vars))]
        diff = nontax + change  # using expanded_income under baseline policy
        diffdf = pd.DataFrame(data=np.column_stack(diff), columns=all_vars)
        # write each kind of distributional table
        with open(tab_fname, 'w') as tfile:
            TaxCalcIO.write_decile_table(distdf, tfile, tkind='Reform Totals')
            tfile.write('\n')
            TaxCalcIO.write_decile_table(diffdf, tfile, tkind='Differences')
        # delete intermediate DataFrame objects
        del distdf
        del diffdf
        gc.collect()

    @staticmethod
    def write_decile_table(dfx, tfile, tkind='Totals'):
        """
        Write to tfile the tkind decile table using dfx DataFrame.
        """
        dfx = add_quantile_table_row_variable(dfx, 'expanded_income', 10,
                                              decile_details=False,
                                              weight_by_income_measure=False)
        gdfx = dfx.groupby('table_row', as_index=False)
        rtns_series = gdfx.apply(unweighted_sum, 's006')
        xinc_series = gdfx.apply(weighted_sum, 'expanded_income')
        itax_series = gdfx.apply(weighted_sum, 'iitax')
        ptax_series = gdfx.apply(weighted_sum, 'payrolltax')
        htax_series = gdfx.apply(weighted_sum, 'lumpsum_tax')
        ctax_series = gdfx.apply(weighted_sum, 'combined')
        # write decile table to text file
        row = 'Weighted Tax {} by Baseline Expanded-Income Decile\n'
        tfile.write(row.format(tkind))
        rowfmt = '{}{}{}{}{}{}\n'
        row = rowfmt.format('    Returns',
                            '    ExpInc',
                            '    IncTax',
                            '    PayTax',
                            '     LSTax',
                            '    AllTax')
        tfile.write(row)
        row = rowfmt.format('       (#m)',
                            '      ($b)',
                            '      ($b)',
                            '      ($b)',
                            '      ($b)',
                            '      ($b)')
        tfile.write(row)
        rowfmt = '{:9.2f}{:10.1f}{:10.1f}{:10.1f}{:10.1f}{:10.1f}\n'
        for decile in range(0, 10):
            row = '{:2d}'.format(decile)
            row += rowfmt.format(rtns_series[decile] * 1e-6,
                                 xinc_series[decile] * 1e-9,
                                 itax_series[decile] * 1e-9,
                                 ptax_series[decile] * 1e-9,
                                 htax_series[decile] * 1e-9,
                                 ctax_series[decile] * 1e-9)
            tfile.write(row)
        row = ' A'
        row += rowfmt.format(rtns_series.sum() * 1e-6,
                             xinc_series.sum() * 1e-9,
                             itax_series.sum() * 1e-9,
                             ptax_series.sum() * 1e-9,
                             htax_series.sum() * 1e-9,
                             ctax_series.sum() * 1e-9)
        tfile.write(row)
        del gdfx
        del rtns_series
        del xinc_series
        del itax_series
        del ptax_series
        del htax_series
        del ctax_series
        gc.collect()

    def write_graph_files(self):
        """
        Write graphs to HTML files.
        """
        pos_wght_sum = self.calc.total_weight() > 0.0
        fig = None
        # average-tax-rate graph
        atr_fname = self._output_filename.replace('.csv', '-atr.html')
        atr_title = 'ATR by Income Percentile'
        if pos_wght_sum:
            fig = self.calc_base.atr_graph(self.calc)
            write_graph_file(fig, atr_fname, atr_title)
        else:
            reason = 'No graph because sum of weights is not positive'
            TaxCalcIO.write_empty_graph_file(atr_fname, atr_title, reason)
        # marginal-tax-rate graph
        mtr_fname = self._output_filename.replace('.csv', '-mtr.html')
        mtr_title = 'MTR by Income Percentile'
        if pos_wght_sum:
            fig = self.calc_base.mtr_graph(
                self.calc, alt_e00200p_text='Taxpayer Earnings')
            write_graph_file(fig, mtr_fname, mtr_title)
        else:
            reason = 'No graph because sum of weights is not positive'
            TaxCalcIO.write_empty_graph_file(mtr_fname, mtr_title, reason)
        # percentage-aftertax-income-change graph
        pch_fname = self._output_filename.replace('.csv', '-pch.html')
        pch_title = 'PCH by Income Percentile'
        if pos_wght_sum:
            fig = self.calc_base.pch_graph(self.calc)
            write_graph_file(fig, pch_fname, pch_title)
        else:
            reason = 'No graph because sum of weights is not positive'
            TaxCalcIO.write_empty_graph_file(pch_fname, pch_title, reason)
        if fig:
            del fig
            gc.collect()

    @staticmethod
    def write_empty_graph_file(fname, title, reason):
        """
        Write HTML graph file with title but no graph for specified reason.
        """
        txt = ('<html>\n'
               '<head><title>{}</title></head>\n'
               '<body><center<h1>{}</h1></center></body>\n'
               '</html>\n').format(title, reason)
        with open(fname, 'w') as gfile:
            gfile.write(txt)

    def minimal_output(self):
        """
        Extract minimal output and return it as Pandas DataFrame.
        """
        varlist = ['RECID', 'YEAR', 'WEIGHT', 'INCTAX', 'LSTAX', 'PAYTAX']
        odict = dict()
        scalc = self.calc
        odict['RECID'] = scalc.array('RECID')  # id for tax filing unit
        odict['YEAR'] = self.tax_year()  # tax calculation year
        odict['WEIGHT'] = scalc.array('s006')  # sample weight
        odict['INCTAX'] = scalc.array('iitax')  # federal income taxes
        odict['LSTAX'] = scalc.array('lumpsum_tax')  # lump-sum tax
        odict['PAYTAX'] = scalc.array('payrolltax')  # payroll taxes (ee+er)
        odf = pd.DataFrame(data=odict, columns=varlist)
        return odf

    def dump_output(self, dump_varset, mtr_inctax, mtr_paytax):
        """
        Extract dump output and return it as Pandas DataFrame.
        """
        if dump_varset is None:
            varset = Records.USABLE_READ_VARS | Records.CALCULATED_VARS
        else:
            varset = dump_varset
        # create and return dump output DataFrame
        odf = pd.DataFrame()
        for varname in varset:
            vardata = self.calc.array(varname)
            if varname in Records.INTEGER_VARS:
                odf[varname] = vardata
            else:
                odf[varname] = vardata.round(2)  # rounded to nearest cent
        # specify mtr values in percentage terms
        if 'mtr_inctax' in varset:
            odf['mtr_inctax'] = (mtr_inctax * 100).round(2)
        if 'mtr_paytax' in varset:
            odf['mtr_paytax'] = (mtr_paytax * 100).round(2)
        # specify tax calculation year
        odf['FLPDYR'] = self.tax_year()
        return odf
예제 #8
0
    def init(self, input_data, tax_year, baseline, reform, assump,
             aging_input_data, exact_calculations):
        """
        TaxCalcIO class post-constructor method that completes initialization.

        Parameters
        ----------
        First five are same as the first five of the TaxCalcIO constructor:
            input_data, tax_year, baseline, reform, assump.

        aging_input_data: boolean
            whether or not to extrapolate Records data from data year to
            tax_year.

        exact_calculations: boolean
            specifies whether or not exact tax calculations are done without
            any smoothing of "stair-step" provisions in the tax law.
        """
        # pylint: disable=too-many-arguments,too-many-locals
        # pylint: disable=too-many-statements,too-many-branches
        self.errmsg = ''
        # get policy parameter dictionary from --baseline file
        basedict = Calculator.read_json_param_objects(baseline, None)
        # get assumption sub-dictionaries
        paramdict = Calculator.read_json_param_objects(None, assump)
        # get policy parameter dictionaries from --reform file(s)
        policydicts = list()
        if self.specified_reform:
            reforms = reform.split('+')
            for ref in reforms:
                pdict = Calculator.read_json_param_objects(ref, None)
                policydicts.append(pdict['policy'])
            paramdict['policy'] = policydicts[0]
        # remember parameters for reform documentation
        self.param_dict = paramdict
        self.policy_dicts = policydicts
        # create gdiff_baseline object
        gdiff_baseline = GrowDiff()
        try:
            gdiff_baseline.update_growdiff(paramdict['growdiff_baseline'])
        except ValueError as valerr_msg:
            self.errmsg += valerr_msg.__str__()
        # create GrowFactors base object that incorporates gdiff_baseline
        gfactors_base = GrowFactors()
        gdiff_baseline.apply_to(gfactors_base)
        # specify gdiff_response object
        gdiff_response = GrowDiff()
        try:
            gdiff_response.update_growdiff(paramdict['growdiff_response'])
        except ValueError as valerr_msg:
            self.errmsg += valerr_msg.__str__()
        # create GrowFactors ref object that has all gdiff objects applied
        gfactors_ref = GrowFactors()
        gdiff_baseline.apply_to(gfactors_ref)
        gdiff_response.apply_to(gfactors_ref)
        # create Policy objects:
        # ... the baseline Policy object
        base = Policy(gfactors=gfactors_base)
        try:
            base.implement_reform(basedict['policy'],
                                  print_warnings=False,
                                  raise_errors=False)
            self.errmsg += base.parameter_errors
        except ValueError as valerr_msg:
            self.errmsg += valerr_msg.__str__()
        # ... the reform Policy object
        if self.specified_reform:
            pol = Policy(gfactors=gfactors_ref)
            for poldict in policydicts:
                try:
                    pol.implement_reform(poldict,
                                         print_warnings=False,
                                         raise_errors=False)
                    self.errmsg += pol.parameter_errors
                except ValueError as valerr_msg:
                    self.errmsg += valerr_msg.__str__()
        else:
            pol = Policy(gfactors=gfactors_base)
        # create Consumption object
        con = Consumption()
        try:
            con.update_consumption(paramdict['consumption'])
        except ValueError as valerr_msg:
            self.errmsg += valerr_msg.__str__()
        # check for valid tax_year value
        if tax_year < pol.start_year:
            msg = 'tax_year {} less than policy.start_year {}'
            msg = msg.format(tax_year, pol.start_year)
            self.errmsg += 'ERROR: {}\n'.format(msg)
        if tax_year > pol.end_year:
            msg = 'tax_year {} greater than policy.end_year {}'
            msg = msg.format(tax_year, pol.end_year)
            self.errmsg += 'ERROR: {}\n'.format(msg)
        # any errors imply cannot proceed with calculations
        if self.errmsg:
            return
        # set policy to tax_year
        pol.set_year(tax_year)
        base.set_year(tax_year)
        # read input file contents into Records objects
        if aging_input_data:
            if self.cps_input_data:
                recs = Records.cps_constructor(
                    gfactors=gfactors_ref,
                    exact_calculations=exact_calculations
                )
                recs_base = Records.cps_constructor(
                    gfactors=gfactors_base,
                    exact_calculations=exact_calculations
                )
            else:  # if not cps_input_data but aging_input_data
                recs = Records(
                    data=input_data,
                    gfactors=gfactors_ref,
                    exact_calculations=exact_calculations
                )
                recs_base = Records(
                    data=input_data,
                    gfactors=gfactors_base,
                    exact_calculations=exact_calculations
                )
        else:  # input_data are raw data that are not being aged
            recs = Records(data=input_data,
                           gfactors=None,
                           exact_calculations=exact_calculations,
                           weights=None,
                           adjust_ratios=None,
                           start_year=tax_year)
            recs_base = copy.deepcopy(recs)
        if tax_year < recs.data_year:
            msg = 'tax_year {} less than records.data_year {}'
            msg = msg.format(tax_year, recs.data_year)
            self.errmsg += 'ERROR: {}\n'.format(msg)
        # create Calculator objects
        self.calc = Calculator(policy=pol, records=recs,
                               verbose=True,
                               consumption=con,
                               sync_years=aging_input_data)
        self.calc_base = Calculator(policy=base, records=recs_base,
                                    verbose=False,
                                    consumption=con,
                                    sync_years=aging_input_data)
def generate_policy_revenues():
    from taxcalc.growfactors import GrowFactors
    from taxcalc.policy import Policy
    from taxcalc.records import Records
    from taxcalc.gstrecords import GSTRecords
    from taxcalc.corprecords import CorpRecords
    from taxcalc.parameters import ParametersBase
    from taxcalc.calculator import Calculator
    

    """
    for num in range(1, num_reforms):
        block_selected_dict[num]['selected_item']= block_widget_dict[num][1].get()
        block_selected_dict[num]['selected_value']= block_widget_dict[num][3].get()
        block_selected_dict[num]['selected_year']= block_widget_dict[num][2].get()
    print(block_selected_dict)
    """
    f = open('reform.json')
    block_selected_dict = json.load(f)
    print("block_selected_dict from json",block_selected_dict)
    #print(block_selected_dict)
    # create Records object containing pit.csv and pit_weights.csv input data
    #print("growfactors filename ", growfactors_filename)
    #recs = Records(data=data_filename, weights=weights_filename, gfactors=GrowFactors(growfactors_filename=growfactors_filename))
    #recs = Records(data=data_filename, weights=weights_filename, gfactors=GrowFactors(growfactors_filename=growfactors_filename))

    #recs.increment_year1(3.0)
    
    #grecs = GSTRecords()
    f = open('global_vars.json')
    vars = json.load(f)
        
    print("data_filename: ", vars['cit_data_filename'])
    print("weights_filename: ", vars['cit_weights_filename'])
    print("growfactors_filename: ", vars['GROWFACTORS_FILENAME'])
    print("policy_filename: ", vars['DEFAULTS_FILENAME'])
    # create CorpRecords object using cross-section data
    #crecs1 = CorpRecords(data='cit_cross.csv', weights='cit_cross_wgts1.csv')
    crecs1 = CorpRecords(data=vars['cit_data_filename'], weights=vars['cit_weights_filename'], gfactors=GrowFactors(growfactors_filename=vars['GROWFACTORS_FILENAME']))
    #crecs1 = CorpRecords(data=vars['cit_weights_filename'], weights=vars['cit_weights_filename'])

    # Note: weights argument is optional
    assert isinstance(crecs1, CorpRecords)
    assert crecs1.current_year == 2017
    
    # create Policy object containing current-law policy
    pol = Policy(DEFAULTS_FILENAME=vars['DEFAULTS_FILENAME'])
    
    # specify Calculator objects for current-law policy
    #calc1 = Calculator(policy=pol, records=recs, corprecords=crecs1,
    #                   gstrecords=grecs, verbose=False)
    calc1 = Calculator(policy=pol, corprecords=crecs1, verbose=False)    
    #calc1.increment_year1(3.8)
    assert isinstance(calc1, Calculator)
    assert calc1.current_year == 2017

    np.seterr(divide='ignore', invalid='ignore')
    
    pol2 = Policy(DEFAULTS_FILENAME=vars['DEFAULTS_FILENAME'])
    
    years, reform=read_reform_dict(block_selected_dict)
    print("reform dictionary: ",reform) 
    #reform = Calculator.read_json_param_objects('app01_reform.json', None)
    pol2.implement_reform(reform['policy'])
    
    #calc2 = Calculator(policy=pol2, records=recs, corprecords=crecs1,
    #                   gstrecords=grecs, verbose=False)
    calc2 = Calculator(policy=pol2, corprecords=crecs1, verbose=False)
    pit_adjustment_factor={}
    revenue_dict_cit={}
    revenue_amount_dict = {}

    calc1.calc_all()
        
           
    for year in range(2019, 2024):
        cols = []
        calc1.advance_to_year(year)       
        calc2.advance_to_year(year)
        # NOTE: calc1 now contains a PRIVATE COPY of pol and a PRIVATE COPY of recs,
        #       so we can continue to use pol and recs in this script without any
        #       concern about side effects from Calculator method calls on calc1.

        # Produce DataFrame of results using the calculator
        
        # First run the calculator for the corporate income tax
        calc1.calc_all()
        
        print("***** Year ", year)
        weighted_citax1 = calc1.weighted_total_cit('citax')                
        citax_collection_billions1 = weighted_citax1/10**9       
        citax_collection_str1 = '{0:.2f}'.format(citax_collection_billions1)
              
        print("The CIT Collection in billions is: ", citax_collection_billions1)
 
        # Produce DataFrame of results using cross-section
        calc2.calc_all()
       
        weighted_citax2 = calc2.weighted_total_cit('citax')                
        citax_collection_billions2 = weighted_citax2/10**9    
        citax_collection_str2 = '{0:.2f}'.format(citax_collection_billions2)
        # This is the difference in the collection due to the reform
        # This amount will now be allocated to dividends of PIT
        citax_diff_collection_billions2 = (citax_collection_billions2-citax_collection_billions1)
        citax_diff_collection_str2 = '{0:.2f}'.format(citax_diff_collection_billions2)
              
        print("The CIT Collection after reform billions is: ", citax_collection_billions2)

        print("The difference in CIT Collection in billions is: ", citax_diff_collection_billions2)

        # Process of allocation of difference in CIT profits to PIT 
        # in the form of Dividends
        # Dividends in this case is reported as Income from Other Sources
        # TOTAL_INCOME_OS in the PIT form
        
        # First get the unadjusted amounts
        
        # Now calculate the adjusted amounts
        # contribution to PIT Dividends
        proportion_change_dividend = (weighted_citax1 - weighted_citax2)/weighted_citax1       
        new_dividend_proportion_of_old = (1 + proportion_change_dividend)
        pit_adjustment_factor[year]=new_dividend_proportion_of_old
        # Store Results
        revenue_dict_cit[year]={}
        revenue_dict_cit[year]['current_law']=citax_collection_str1
        revenue_dict_cit[year]['reform']=citax_collection_str2      
        revenue_dict_cit[year]['difference']=citax_diff_collection_str2
        
    print(revenue_dict_cit)
    
    print("new_dividend_proportion_of_old ", pit_adjustment_factor)     

    # now update pit.csv with this proportion
    # start a new round of simulation for pit
    recs = Records(data=vars['pit_data_filename'], weights=vars['pit_weights_filename'], gfactors=GrowFactors(growfactors_filename=vars['GROWFACTORS_FILENAME']))
    
    # create Policy object containing current-law policy
    pol = Policy(DEFAULTS_FILENAME=vars['DEFAULTS_FILENAME'])
    
    # specify Calculator objects for current-law policy
    #calc1 = Calculator(policy=pol, records=recs, corprecords=crecs1,
    #                   gstrecords=grecs, verbose=False)
    calc1 = Calculator(policy=pol, records=recs, verbose=False)    
    #calc1.increment_year1(3.8)
    assert isinstance(calc1, Calculator)
    assert calc1.current_year == 2017

    np.seterr(divide='ignore', invalid='ignore')
    
    pol2 = Policy(DEFAULTS_FILENAME=vars['DEFAULTS_FILENAME'])
    
    #years, reform=read_reform_dict(block_selected_dict)
    #print("reform dictionary: ", reform) 
    #reform = Calculator.read_json_param_objects('app01_reform.json', None)
    pol2.implement_reform(reform['policy'])
    
    #calc2 = Calculator(policy=pol2, records=recs, corprecords=crecs1,
    #                   gstrecords=grecs, verbose=False)

    calc2 = Calculator(policy=pol2, records=recs, verbose=False)
        
    total_revenue_text={}
    reform_revenue_text={}
    revenue_dict_pit={}
    revenue_amount_dict = {}
    num = 1
    first_time = True
    i=1
    j=0
    #rows = []
    
    window = tk.Toplevel()
    window.geometry("800x400+140+140")
    display_table(window, revenue_dict_cit, revenue_dict_pit, header=True)

    #for year in range(years[0], years[-1]+1):            
    for year in range(2019, 2024):
        cols = []
        calc1.advance_to_year(year)       
        calc2.advance_to_year(year)
        # NOTE: calc1 now contains a PRIVATE COPY of pol and a PRIVATE COPY of recs,
        #       so we can continue to use pol and recs in this script without any
        #       concern about side effects from Calculator method calls on calc1.

        # Produce DataFrame of results using the calculator
        
        # First run the calculator for the corporate income tax
        calc1.calc_all()
        
        weighted_pitax1 = calc1.weighted_total_pit('pitax')                
        pitax_collection_billions1 = weighted_pitax1/10**9        
        pitax_collection_str1 = '{0:.2f}'.format(pitax_collection_billions1)
        
        print('\n\n\n')
        print(f'TAX COLLECTION FOR THE YEAR - {year} \n')   
        print("The PIT Collection in billions is: ", pitax_collection_billions1)       
        #total_revenue_text[year] = "PIT COLLECTION UNDER CURRENT LAW FOR THE YEAR - " + str(year)+" : "+str(pitax_collection_str1)+" bill"

        # Produce DataFrame of results using cross-section
        calc2.calc_all()
       
        
        weighted_pitax2 = calc2.weighted_total_pit('pitax')
        pitax_collection_billions2 = weighted_pitax2/10**9        
        pitax_collection_str2 = '{0:.2f}'.format(pitax_collection_billions2)
        pitax_diff_collection_billions2 = (pitax_collection_billions2-pitax_collection_billions1)        
        pitax_diff_collection_str2 = '{0:.2f}'.format(pitax_diff_collection_billions2)
        
        # Now calculate the adjusted amounts
        # contribution to PIT Dividends

        print("Total Income from Other Sources (bill) no adjustment is ", calc2.weighted_total_pit('TOTAL_INCOME_OS')/10**9 )       
        calc2.adjust_pit(pit_adjustment_factor[year])
        print("Total Income from Other Sources (bill) after adjustment is ", calc2.weighted_total_pit('TOTAL_INCOME_OS')/10**9 )
        
        calc2.calc_all()
        
        weighted_pitax3 = calc2.weighted_total_pit('pitax')     
        pitax_collection_billions3 = weighted_pitax3/10**9        
        pitax_collection_str3 = '{0:.2f}'.format(pitax_collection_billions3)

        pitax_diff_collection_billions3 = (pitax_collection_billions3-pitax_collection_billions1)        
        pitax_diff_collection_str3 = '{0:.2f}'.format(pitax_diff_collection_billions3)

        pitax_diff_collection_billions4 = (pitax_collection_billions3-pitax_collection_billions2)        
        pitax_diff_collection_str4 = '{0:.2f}'.format(pitax_diff_collection_billions4)
        
        #save the results
        revenue_dict_pit[year]={}
        revenue_dict_pit[year]['current_law']=pitax_collection_str1
        revenue_dict_pit[year]['reform']={}
        revenue_dict_pit[year]['reform']['unadjusted']=pitax_collection_str2
        revenue_dict_pit[year]['reform']['adjusted']=pitax_collection_str3
        revenue_dict_pit[year]['difference']=pitax_diff_collection_str3
        
        print('\n\n\n')       
        print(f'TAX COLLECTION FOR THE YEAR UNDER REFORM - {year} \n')       
        print("The PIT Collection in billions is: ", pitax_collection_billions2)
        print("The difference in PIT Collection in billions is: ", pitax_diff_collection_billions2)
        print('****AFTER ADJUSTMENT \n\n\n')
        print('TAX COLLECTION FOR THE YEAR UNDER REFORM WITH ADJUSTMENT \n')       
        print("The PIT Collection in billions after adjusting for the impact of CIT is: ", pitax_collection_billions3)        
        print("The difference in PIT Collection in billions after adjusting for the impact of CIT is: ", pitax_diff_collection_billions3)

        print("The impact of adjustment is: ", pitax_diff_collection_billions4)
        
        display_table(window, revenue_dict_cit, revenue_dict_pit, year=year, row=i)
        i=i+1
        #reverse the adjustment to obtain baseline
        calc2.adjust_pit(1/pit_adjustment_factor[year])
   
    display_table(window, revenue_dict_cit, revenue_dict_pit, footer=i)

    
    
    """
def generate_revenues():
    from taxcalc.growfactors import GrowFactors
    from taxcalc.policy import Policy
    from taxcalc.records import Records
    from taxcalc.gstrecords import GSTRecords
    from taxcalc.corprecords import CorpRecords
    from taxcalc.parameters import ParametersBase
    from taxcalc.calculator import Calculator
    """
    for num in range(1, num_reforms):
        block_selected_dict[num]['selected_item']= block_widget_dict[num][1].get()
        block_selected_dict[num]['selected_value']= block_widget_dict[num][3].get()
        block_selected_dict[num]['selected_year']= block_widget_dict[num][2].get()
    print(block_selected_dict)
    """
    f = open('reform.json')
    block_selected_dict = json.load(f)
    print("block_selected_dict from json", block_selected_dict)
    #print(block_selected_dict)
    # create Records object containing pit.csv and pit_weights.csv input data
    #print("growfactors filename ", growfactors_filename)
    #recs = Records(data=data_filename, weights=weights_filename, gfactors=GrowFactors(growfactors_filename=growfactors_filename))
    #recs = Records(data=data_filename, weights=weights_filename, gfactors=GrowFactors(growfactors_filename=growfactors_filename))

    #recs.increment_year1(3.0)

    #grecs = GSTRecords()
    f = open('global_vars.json')
    vars = json.load(f)

    print("data_filename: ", vars['cit_data_filename'])
    print("weights_filename: ", vars['cit_weights_filename'])
    print("growfactors_filename: ", vars['GROWFACTORS_FILENAME'])
    print("policy_filename: ", vars['DEFAULTS_FILENAME'])
    # create CorpRecords object using cross-section data
    #crecs1 = CorpRecords(data='cit_cross.csv', weights='cit_cross_wgts1.csv')
    crecs1 = CorpRecords(
        data=vars['cit_data_filename'],
        weights=vars['cit_weights_filename'],
        gfactors=GrowFactors(
            growfactors_filename=vars['GROWFACTORS_FILENAME']))
    #crecs1 = CorpRecords(data=vars['cit_weights_filename'], weights=vars['cit_weights_filename'])

    # Note: weights argument is optional
    assert isinstance(crecs1, CorpRecords)
    assert crecs1.current_year == 2017

    # create Policy object containing current-law policy
    pol = Policy(DEFAULTS_FILENAME=vars['DEFAULTS_FILENAME'])

    # specify Calculator objects for current-law policy
    #calc1 = Calculator(policy=pol, records=recs, corprecords=crecs1,
    #                   gstrecords=grecs, verbose=False)
    calc1 = Calculator(policy=pol, corprecords=crecs1, verbose=False)
    #calc1.increment_year1(3.8)
    assert isinstance(calc1, Calculator)
    assert calc1.current_year == 2017

    np.seterr(divide='ignore', invalid='ignore')

    calc1.calc_all()
    revenue_dict_cit = {}

    for year in range(2019, 2024):
        cols = []
        calc1.advance_to_year(year)

        # NOTE: calc1 now contains a PRIVATE COPY of pol and a PRIVATE COPY of recs,
        #       so we can continue to use pol and recs in this script without any
        #       concern about side effects from Calculator method calls on calc1.

        # Produce DataFrame of results using the calculator

        # First run the calculator for the corporate income tax
        calc1.calc_all()

        print("***** Year ", year)
        weighted_citax1 = calc1.weighted_total_cit('citax')
        citax_collection_billions1 = weighted_citax1 / 10**9
        citax_collection_str1 = '{0:.2f}'.format(citax_collection_billions1)

        print("The CIT Collection in billions is: ",
              citax_collection_billions1)

        # Store Results
        revenue_dict_cit[year] = {}
        revenue_dict_cit[year]['current_law'] = citax_collection_str1

    # start a new round of simulation for pit
    recs = Records(data=vars['pit_data_filename'],
                   weights=vars['pit_weights_filename'],
                   gfactors=GrowFactors(
                       growfactors_filename=vars['GROWFACTORS_FILENAME']))

    # create Policy object containing current-law policy
    pol = Policy(DEFAULTS_FILENAME=vars['DEFAULTS_FILENAME'])

    # specify Calculator objects for current-law policy
    #calc1 = Calculator(policy=pol, records=recs, corprecords=crecs1,
    #                   gstrecords=grecs, verbose=False)
    calc1 = Calculator(policy=pol, records=recs, verbose=False)
    #calc1.increment_year1(3.8)
    assert isinstance(calc1, Calculator)
    assert calc1.current_year == 2017

    np.seterr(divide='ignore', invalid='ignore')

    total_revenue_text = {}
    reform_revenue_text = {}
    revenue_dict_pit = {}
    revenue_amount_dict = {}
    num = 1
    first_time = True
    i = 1
    j = 0
    #rows = []

    window = tk.Toplevel()
    window.geometry("800x400+140+140")
    display_table(window, revenue_dict_cit, revenue_dict_pit, header=True)

    #for year in range(years[0], years[-1]+1):
    for year in range(2019, 2024):
        cols = []
        calc1.advance_to_year(year)

        # NOTE: calc1 now contains a PRIVATE COPY of pol and a PRIVATE COPY of recs,
        #       so we can continue to use pol and recs in this script without any
        #       concern about side effects from Calculator method calls on calc1.

        # Produce DataFrame of results using the calculator

        # First run the calculator for the corporate income tax
        calc1.calc_all()

        weighted_pitax1 = calc1.weighted_total_pit('pitax')
        pitax_collection_billions1 = weighted_pitax1 / 10**9
        pitax_collection_str1 = '{0:.2f}'.format(pitax_collection_billions1)

        print('\n\n\n')
        print(f'TAX COLLECTION FOR THE YEAR - {year} \n')
        print("The PIT Collection in billions is: ",
              pitax_collection_billions1)
        #total_revenue_text[year] = "PIT COLLECTION UNDER CURRENT LAW FOR THE YEAR - " + str(year)+" : "+str(pitax_collection_str1)+" bill"

        #save the results
        revenue_dict_pit[year] = {}
        revenue_dict_pit[year]['current_law'] = pitax_collection_str1

        display_table(window,
                      revenue_dict_cit,
                      revenue_dict_pit,
                      year=year,
                      row=i)
        i = i + 1

    display_table(window, revenue_dict_cit, revenue_dict_pit, footer=i)
    """