class TaxCalcIO(object): """ Constructor for the Tax-Calculator Input-Output class. TaxCalcIO class constructor call must be followed by init() call. Parameters ---------- input_data: string or Pandas DataFrame string is name of INPUT file that is CSV formatted containing variable names in the Records.USABLE_READ_VARS set, or Pandas DataFrame is INPUT data containing variable names in the Records.USABLE_READ_VARS set. INPUT vsrisbles not in the Records.USABLE_READ_VARS set can be present but are ignored. tax_year: integer calendar year for which taxes will be computed for INPUT. baseline: None or string None implies baseline policy is current-law policy, or string is name of optional BASELINE file that is a JSON reform file. reform: None or string None implies no policy reform (current-law policy), or string is name of optional REFORM file(s). assump: None or string None implies economic assumptions are standard assumptions, or string is name of optional ASSUMP file. outdir: None or string None implies output files written to current directory, or string is name of optional output directory Returns ------- class instance: TaxCalcIO """ # pylint: disable=too-many-instance-attributes def __init__(self, input_data, tax_year, baseline, reform, assump, outdir=None): # pylint: disable=too-many-arguments,too-many-locals # pylint: disable=too-many-branches,too-many-statements self.errmsg = '' # check name and existence of INPUT file inp = 'x' self.puf_input_data = False self.cps_input_data = False if isinstance(input_data, six.string_types): # remove any leading directory path from INPUT filename fname = os.path.basename(input_data) # check if fname ends with ".csv" if fname.endswith('.csv'): inp = '{}-{}'.format(fname[:-4], str(tax_year)[2:]) else: msg = 'INPUT file name does not end in .csv' self.errmsg += 'ERROR: {}\n'.format(msg) # check existence of INPUT file self.puf_input_data = input_data.endswith('puf.csv') self.cps_input_data = input_data.endswith('cps.csv') if not self.cps_input_data and not os.path.isfile(input_data): msg = 'INPUT file could not be found' self.errmsg += 'ERROR: {}\n'.format(msg) elif isinstance(input_data, pd.DataFrame): inp = 'df-{}'.format(str(tax_year)[2:]) else: msg = 'INPUT is neither string nor Pandas DataFrame' self.errmsg += 'ERROR: {}\n'.format(msg) # check name and existence of BASELINE file bas = '-x' if baseline is None: bas = '-#' elif isinstance(baseline, six.string_types): # remove any leading directory path from BASELINE filename fname = os.path.basename(baseline) # check if fname ends with ".json" if fname.endswith('.json'): bas = '-{}'.format(fname[:-5]) else: msg = 'BASELINE file name does not end in .json' self.errmsg += 'ERROR: {}\n'.format(msg) # check existence of BASELINE file if not os.path.isfile(baseline): msg = 'BASELINE file could not be found' self.errmsg += 'ERROR: {}\n'.format(msg) else: msg = 'TaxCalcIO.ctor: baseline is neither None nor str' self.errmsg += 'ERROR: {}\n'.format(msg) # check name(s) and existence of REFORM file(s) ref = '-x' if reform is None: self.specified_reform = False ref = '-#' elif isinstance(reform, six.string_types): self.specified_reform = True # split any compound reform into list of simple reforms refnames = list() reforms = reform.split('+') for rfm in reforms: # remove any leading directory path from rfm filename fname = os.path.basename(rfm) # check if fname ends with ".json" if not fname.endswith('.json'): msg = '{} does not end in .json'.format(fname) self.errmsg += 'ERROR: REFORM file name {}\n'.format(msg) # check existence of REFORM file if not os.path.isfile(rfm): msg = '{} could not be found'.format(rfm) self.errmsg += 'ERROR: REFORM file {}\n'.format(msg) # add fname to list of refnames used in output file names refnames.append(fname) # create (possibly compound) reform name for output file names ref = '-' num_refnames = 0 for refname in refnames: num_refnames += 1 if num_refnames > 1: ref += '+' ref += '{}'.format(refname[:-5]) else: msg = 'TaxCalcIO.ctor: reform is neither None nor str' self.errmsg += 'ERROR: {}\n'.format(msg) # check name and existence of ASSUMP file asm = '-x' if assump is None: asm = '-#' elif isinstance(assump, six.string_types): # remove any leading directory path from ASSUMP filename fname = os.path.basename(assump) # check if fname ends with ".json" if fname.endswith('.json'): asm = '-{}'.format(fname[:-5]) else: msg = 'ASSUMP file name does not end in .json' self.errmsg += 'ERROR: {}\n'.format(msg) # check existence of ASSUMP file if not os.path.isfile(assump): msg = 'ASSUMP file could not be found' self.errmsg += 'ERROR: {}\n'.format(msg) else: msg = 'TaxCalcIO.ctor: assump is neither None nor str' self.errmsg += 'ERROR: {}\n'.format(msg) # check name and existence of OUTDIR if outdir is None: valid_outdir = True elif isinstance(outdir, six.string_types): # check existence of OUTDIR if os.path.isdir(outdir): valid_outdir = True else: valid_outdir = False msg = 'OUTDIR could not be found' self.errmsg += 'ERROR: {}\n'.format(msg) else: valid_outdir = False msg = 'TaxCalcIO.ctor: outdir is neither None nor str' self.errmsg += 'ERROR: {}\n'.format(msg) # create OUTPUT file name and delete any existing output files output_filename = '{}{}{}{}.csv'.format(inp, bas, ref, asm) if outdir is None: self._output_filename = output_filename delete_old_files = True elif valid_outdir: self._output_filename = os.path.join(outdir, output_filename) delete_old_files = True else: delete_old_files = False if delete_old_files: delete_file(self._output_filename) delete_file(self._output_filename.replace('.csv', '.db')) delete_file(self._output_filename.replace('.csv', '-doc.text')) delete_file(self._output_filename.replace('.csv', '-tab.text')) delete_file(self._output_filename.replace('.csv', '-atr.html')) delete_file(self._output_filename.replace('.csv', '-mtr.html')) delete_file(self._output_filename.replace('.csv', '-pch.html')) # initialize variables whose values are set in init method self.behavior_has_any_response = False self.calc = None self.calc_base = None self.growmodel = None self.param_dict = None self.policy_dicts = list() def init(self, input_data, tax_year, baseline, reform, assump, growdiff_growmodel, aging_input_data, exact_calculations): """ TaxCalcIO class post-constructor method that completes initialization. Parameters ---------- First five are same as the first five of the TaxCalcIO constructor: input_data, tax_year, baseline, reform, assump. growdiff_growmodel: GrowDiff object or None growdiff_growmodel GrowDiff object is used only in the TaxCalcIO.growmodel_analysis method. aging_input_data: boolean whether or not to extrapolate Records data from data year to tax_year. exact_calculations: boolean specifies whether or not exact tax calculations are done without any smoothing of "stair-step" provisions in the tax law. """ # pylint: disable=too-many-arguments,too-many-locals # pylint: disable=too-many-statements,too-many-branches self.errmsg = '' # get policy parameter dictionary from --baseline file basedict = Calculator.read_json_param_objects(baseline, None) # get assumption sub-dictionaries paramdict = Calculator.read_json_param_objects(None, assump) # get policy parameter dictionaries from --reform file(s) policydicts = list() if self.specified_reform: reforms = reform.split('+') for ref in reforms: pdict = Calculator.read_json_param_objects(ref, None) policydicts.append(pdict['policy']) paramdict['policy'] = policydicts[0] # remember parameters for reform documentation self.param_dict = paramdict self.policy_dicts = policydicts # create Behavior object beh = Behavior() try: beh.update_behavior(paramdict['behavior']) except ValueError as valerr_msg: self.errmsg += valerr_msg.__str__() self.behavior_has_any_response = beh.has_any_response() # create gdiff_baseline object gdiff_baseline = GrowDiff() try: gdiff_baseline.update_growdiff(paramdict['growdiff_baseline']) except ValueError as valerr_msg: self.errmsg += valerr_msg.__str__() # create GrowFactors base object that incorporates gdiff_baseline gfactors_base = GrowFactors() gdiff_baseline.apply_to(gfactors_base) # specify gdiff_response object gdiff_response = GrowDiff() try: gdiff_response.update_growdiff(paramdict['growdiff_response']) except ValueError as valerr_msg: self.errmsg += valerr_msg.__str__() # create GrowFactors ref object that has all gdiff objects applied gfactors_ref = GrowFactors() gdiff_baseline.apply_to(gfactors_ref) gdiff_response.apply_to(gfactors_ref) if growdiff_growmodel: growdiff_growmodel.apply_to(gfactors_ref) # create Policy objects: # ... the baseline Policy object base = Policy(gfactors=gfactors_base) try: base.implement_reform(basedict['policy'], print_warnings=False, raise_errors=False) self.errmsg += base.parameter_errors except ValueError as valerr_msg: self.errmsg += valerr_msg.__str__() # ... the reform Policy object if self.specified_reform: pol = Policy(gfactors=gfactors_ref) for poldict in policydicts: try: pol.implement_reform(poldict, print_warnings=False, raise_errors=False) self.errmsg += pol.parameter_errors except ValueError as valerr_msg: self.errmsg += valerr_msg.__str__() else: pol = Policy(gfactors=gfactors_base) # create Consumption object con = Consumption() try: con.update_consumption(paramdict['consumption']) except ValueError as valerr_msg: self.errmsg += valerr_msg.__str__() # create GrowModel object self.growmodel = GrowModel() try: self.growmodel.update_growmodel(paramdict['growmodel']) except ValueError as valerr_msg: self.errmsg += valerr_msg.__str__() # check for valid tax_year value if tax_year < pol.start_year: msg = 'tax_year {} less than policy.start_year {}' msg = msg.format(tax_year, pol.start_year) self.errmsg += 'ERROR: {}\n'.format(msg) if tax_year > pol.end_year: msg = 'tax_year {} greater than policy.end_year {}' msg = msg.format(tax_year, pol.end_year) self.errmsg += 'ERROR: {}\n'.format(msg) # any errors imply cannot proceed with calculations if self.errmsg: return # set policy to tax_year pol.set_year(tax_year) base.set_year(tax_year) # read input file contents into Records objects if aging_input_data: if self.cps_input_data: recs = Records.cps_constructor( gfactors=gfactors_ref, exact_calculations=exact_calculations) recs_base = Records.cps_constructor( gfactors=gfactors_base, exact_calculations=exact_calculations) else: # if not cps_input_data but aging_input_data recs = Records(data=input_data, gfactors=gfactors_ref, exact_calculations=exact_calculations) recs_base = Records(data=input_data, gfactors=gfactors_base, exact_calculations=exact_calculations) else: # input_data are raw data that are not being aged recs = Records(data=input_data, gfactors=None, exact_calculations=exact_calculations, weights=None, adjust_ratios=None, start_year=tax_year) recs_base = copy.deepcopy(recs) if tax_year < recs.data_year: msg = 'tax_year {} less than records.data_year {}' msg = msg.format(tax_year, recs.data_year) self.errmsg += 'ERROR: {}\n'.format(msg) # create Calculator objects self.calc = Calculator(policy=pol, records=recs, verbose=True, consumption=con, behavior=beh, sync_years=aging_input_data) self.calc_base = Calculator(policy=base, records=recs_base, verbose=False, consumption=con, sync_years=aging_input_data) def custom_dump_variables(self, tcdumpvars_str): """ Return set of variable names extracted from tcdumpvars_str, which contains the contents of the tcdumpvars file in the current directory. Also, builds self.errmsg if any custom variables are not valid. """ assert isinstance(tcdumpvars_str, six.string_types) self.errmsg = '' # change some common delimiter characters into spaces dump_vars_str = tcdumpvars_str.replace(',', ' ') dump_vars_str = dump_vars_str.replace(';', ' ') dump_vars_str = dump_vars_str.replace('|', ' ') # split dump_vars_str into a list of dump variables dump_vars_list = dump_vars_str.split() # check that all dump_vars_list items are valid valid_set = Records.USABLE_READ_VARS | Records.CALCULATED_VARS for var in dump_vars_list: if var not in valid_set: msg = 'invalid variable name in tcdumpvars file: {}' msg = msg.format(var) self.errmsg += 'ERROR: {}\n'.format(msg) # add essential variables even if not on custom list if 'RECID' not in dump_vars_list: dump_vars_list.append('RECID') if 'FLPDYR' not in dump_vars_list: dump_vars_list.append('FLPDYR') # convert list into a set and return return set(dump_vars_list) def tax_year(self): """ Return calendar year for which TaxCalcIO calculations are being done. """ return self.calc.policy_current_year() def output_filepath(self): """ Return full path to output file named in TaxCalcIO constructor. """ dirpath = os.path.abspath(os.path.dirname(__file__)) return os.path.join(dirpath, self._output_filename) def analyze(self, writing_output_file=False, output_tables=False, output_graphs=False, output_ceeu=False, dump_varset=None, output_dump=False, output_sqldb=False): """ Conduct tax analysis. Parameters ---------- writing_output_file: boolean whether or not to generate and write output file output_tables: boolean whether or not to generate and write distributional tables to a text file output_graphs: boolean whether or not to generate and write HTML graphs of average and marginal tax rates by income percentile output_ceeu: boolean whether or not to calculate and write to stdout standard certainty-equivalent expected-utility statistics dump_varset: set custom set of variables to include in dump and sqldb output; None implies include all variables in dump and sqldb output output_dump: boolean whether or not to replace standard output with all input and calculated variables using their Tax-Calculator names output_sqldb: boolean whether or not to write SQLite3 database with dump table containing same output as written by output_dump to a csv file Returns ------- Nothing """ # pylint: disable=too-many-arguments,too-many-branches if self.puf_input_data and self.calc.reform_warnings: warn = 'PARAMETER VALUE WARNING(S): {}\n{}{}' # pragma: no cover print( # pragma: no cover warn.format('(read documentation for each parameter)', self.calc.reform_warnings, 'CONTINUING WITH CALCULATIONS...')) calc_base_calculated = False if self.behavior_has_any_response: self.calc = Behavior.response(self.calc_base, self.calc) calc_base_calculated = True else: self.calc.calc_all() if output_dump or output_sqldb: # might need marginal tax rates (mtr_paytax, mtr_inctax, _) = self.calc.mtr(wrt_full_compensation=False, calc_all_already_called=True) else: # definitely do not need marginal tax rates mtr_paytax = None mtr_inctax = None # optionally conduct normative welfare analysis if output_ceeu: if self.behavior_has_any_response: ceeu_results = 'SKIP --ceeu output because baseline and ' ceeu_results += 'reform cannot be sensibly compared\n ' ceeu_results += ' ' ceeu_results += 'when specifying "behavior" with --assump ' ceeu_results += 'option' elif self.calc.total_weight() <= 0.: ceeu_results = 'SKIP --ceeu output because ' ceeu_results += 'sum of weights is not positive' else: self.calc_base.calc_all() calc_base_calculated = True cedict = self.calc_base.ce_aftertax_income( self.calc, custom_params=None, require_no_agg_tax_change=False) ceeu_results = TaxCalcIO.ceeu_output(cedict) else: ceeu_results = None # extract output if writing_output_file if writing_output_file: self.write_output_file(output_dump, dump_varset, mtr_paytax, mtr_inctax) self.write_doc_file() # optionally write --sqldb output to SQLite3 database if output_sqldb: self.write_sqldb_file(dump_varset, mtr_paytax, mtr_inctax) # optionally write --tables output to text file if output_tables: if not calc_base_calculated: self.calc_base.calc_all() calc_base_calculated = True self.write_tables_file() # optionally write --graphs output to HTML files if output_graphs: if not calc_base_calculated: self.calc_base.calc_all() calc_base_calculated = True self.write_graph_files() # optionally write --ceeu output to stdout if ceeu_results: print(ceeu_results) def write_output_file(self, output_dump, dump_varset, mtr_paytax, mtr_inctax): """ Write output to CSV-formatted file. """ if output_dump: outdf = self.dump_output(dump_varset, mtr_inctax, mtr_paytax) column_order = sorted(outdf.columns) else: outdf = self.minimal_output() column_order = outdf.columns assert len(outdf.index) == self.calc.array_len outdf.to_csv(self._output_filename, columns=column_order, index=False, float_format='%.2f') del outdf gc.collect() def write_doc_file(self): """ Write reform documentation to text file. """ if len(self.policy_dicts) <= 1: doc = Calculator.reform_documentation(self.param_dict) else: doc = Calculator.reform_documentation(self.param_dict, self.policy_dicts[1:]) doc_fname = self._output_filename.replace('.csv', '-doc.text') with open(doc_fname, 'w') as dfile: dfile.write(doc) def write_sqldb_file(self, dump_varset, mtr_paytax, mtr_inctax): """ Write dump output to SQLite3 database table dump. """ outdf = self.dump_output(dump_varset, mtr_inctax, mtr_paytax) assert len(outdf.index) == self.calc.array_len db_fname = self._output_filename.replace('.csv', '.db') dbcon = sqlite3.connect(db_fname) outdf.to_sql('dump', dbcon, if_exists='replace', index=False) dbcon.close() del outdf gc.collect() def write_tables_file(self): """ Write tables to text file. """ # pylint: disable=too-many-locals tab_fname = self._output_filename.replace('.csv', '-tab.text') # skip tables if there are not some positive weights if self.calc_base.total_weight() <= 0.: with open(tab_fname, 'w') as tfile: msg = 'No tables because sum of weights is not positive\n' tfile.write(msg) return # create list of results for nontax variables # - weights don't change with reform # - expanded_income may change, so always use baseline expanded income nontax_vars = ['s006', 'expanded_income'] nontax = [self.calc_base.array(var) for var in nontax_vars] # create list of results for tax variables from reform Calculator tax_vars = ['iitax', 'payrolltax', 'lumpsum_tax', 'combined'] reform = [self.calc.array(var) for var in tax_vars] # create DataFrame with tax distribution under reform dist = nontax + reform # using expanded_income under baseline policy all_vars = nontax_vars + tax_vars distdf = pd.DataFrame(data=np.column_stack(dist), columns=all_vars) # create DataFrame with tax differences (reform - baseline) base = [self.calc_base.array(var) for var in tax_vars] change = [(reform[idx] - base[idx]) for idx in range(0, len(tax_vars))] diff = nontax + change # using expanded_income under baseline policy diffdf = pd.DataFrame(data=np.column_stack(diff), columns=all_vars) # write each kind of distributional table with open(tab_fname, 'w') as tfile: TaxCalcIO.write_decile_table(distdf, tfile, tkind='Reform Totals') tfile.write('\n') TaxCalcIO.write_decile_table(diffdf, tfile, tkind='Differences') # delete intermediate DataFrame objects del distdf del diffdf gc.collect() @staticmethod def write_decile_table(dfx, tfile, tkind='Totals'): """ Write to tfile the tkind decile table using dfx DataFrame. """ dfx = add_quantile_table_row_variable(dfx, 'expanded_income', 10, decile_details=False, weight_by_income_measure=False) gdfx = dfx.groupby('table_row', as_index=False) rtns_series = gdfx.apply(unweighted_sum, 's006') xinc_series = gdfx.apply(weighted_sum, 'expanded_income') itax_series = gdfx.apply(weighted_sum, 'iitax') ptax_series = gdfx.apply(weighted_sum, 'payrolltax') htax_series = gdfx.apply(weighted_sum, 'lumpsum_tax') ctax_series = gdfx.apply(weighted_sum, 'combined') # write decile table to text file row = 'Weighted Tax {} by Baseline Expanded-Income Decile\n' tfile.write(row.format(tkind)) rowfmt = '{}{}{}{}{}{}\n' row = rowfmt.format(' Returns', ' ExpInc', ' IncTax', ' PayTax', ' LSTax', ' AllTax') tfile.write(row) row = rowfmt.format(' (#m)', ' ($b)', ' ($b)', ' ($b)', ' ($b)', ' ($b)') tfile.write(row) rowfmt = '{:9.2f}{:10.1f}{:10.1f}{:10.1f}{:10.1f}{:10.1f}\n' for decile in range(0, 10): row = '{:2d}'.format(decile) row += rowfmt.format(rtns_series[decile] * 1e-6, xinc_series[decile] * 1e-9, itax_series[decile] * 1e-9, ptax_series[decile] * 1e-9, htax_series[decile] * 1e-9, ctax_series[decile] * 1e-9) tfile.write(row) row = ' A' row += rowfmt.format(rtns_series.sum() * 1e-6, xinc_series.sum() * 1e-9, itax_series.sum() * 1e-9, ptax_series.sum() * 1e-9, htax_series.sum() * 1e-9, ctax_series.sum() * 1e-9) tfile.write(row) del gdfx del rtns_series del xinc_series del itax_series del ptax_series del htax_series del ctax_series gc.collect() def write_graph_files(self): """ Write graphs to HTML files. """ pos_wght_sum = self.calc.total_weight() > 0.0 fig = None # average-tax-rate graph atr_fname = self._output_filename.replace('.csv', '-atr.html') atr_title = 'ATR by Income Percentile' if pos_wght_sum: fig = self.calc_base.atr_graph(self.calc) write_graph_file(fig, atr_fname, atr_title) else: reason = 'No graph because sum of weights is not positive' TaxCalcIO.write_empty_graph_file(atr_fname, atr_title, reason) # marginal-tax-rate graph mtr_fname = self._output_filename.replace('.csv', '-mtr.html') mtr_title = 'MTR by Income Percentile' if pos_wght_sum: fig = self.calc_base.mtr_graph( self.calc, alt_e00200p_text='Taxpayer Earnings') write_graph_file(fig, mtr_fname, mtr_title) else: reason = 'No graph because sum of weights is not positive' TaxCalcIO.write_empty_graph_file(mtr_fname, mtr_title, reason) # percentage-aftertax-income-change graph pch_fname = self._output_filename.replace('.csv', '-pch.html') pch_title = 'PCH by Income Percentile' if pos_wght_sum: fig = self.calc_base.pch_graph(self.calc) write_graph_file(fig, pch_fname, pch_title) else: reason = 'No graph because sum of weights is not positive' TaxCalcIO.write_empty_graph_file(pch_fname, pch_title, reason) if fig: del fig gc.collect() @staticmethod def write_empty_graph_file(fname, title, reason): """ Write HTML graph file with title but no graph for specified reason. """ txt = ('<html>\n' '<head><title>{}</title></head>\n' '<body><center<h1>{}</h1></center></body>\n' '</html>\n').format(title, reason) with open(fname, 'w') as gfile: gfile.write(txt) def minimal_output(self): """ Extract minimal output and return it as Pandas DataFrame. """ varlist = ['RECID', 'YEAR', 'WEIGHT', 'INCTAX', 'LSTAX', 'PAYTAX'] odict = dict() scalc = self.calc odict['RECID'] = scalc.array('RECID') # id for tax filing unit odict['YEAR'] = self.tax_year() # tax calculation year odict['WEIGHT'] = scalc.array('s006') # sample weight odict['INCTAX'] = scalc.array('iitax') # federal income taxes odict['LSTAX'] = scalc.array('lumpsum_tax') # lump-sum tax odict['PAYTAX'] = scalc.array('payrolltax') # payroll taxes (ee+er) odf = pd.DataFrame(data=odict, columns=varlist) return odf @staticmethod def ceeu_output(cedict): """ Extract --ceeu output and return as text string. """ text = ('Aggregate {} Pre-Tax Expanded Income and ' 'Tax Revenue ($billion)\n') txt = text.format(cedict['year']) txt += ' baseline reform difference\n' fmt = '{} {:12.3f} {:10.3f} {:12.3f}\n' txt += fmt.format('income', cedict['inc1'], cedict['inc2'], cedict['inc2'] - cedict['inc1']) alltaxdiff = cedict['tax2'] - cedict['tax1'] txt += fmt.format('alltax', cedict['tax1'], cedict['tax2'], alltaxdiff) txt += ('Certainty Equivalent of Expected Utility of ' 'After-Tax Expanded Income ($)\n') txt += ('(assuming consumption equals ' 'after-tax expanded income)\n') txt += 'crra baseline reform pctdiff\n' fmt = '{} {:17.2f} {:10.2f} {:11.2f}\n' for crra, ceeu1, ceeu2 in zip(cedict['crra'], cedict['ceeu1'], cedict['ceeu2']): txt += fmt.format(crra, ceeu1, ceeu2, 100.0 * (ceeu2 - ceeu1) / ceeu1) if abs(alltaxdiff) >= 0.0005: txt += ('WARN: baseline and reform cannot be ' 'sensibly compared\n') text = (' because "alltax difference" is ' '{:.3f} which is not zero\n') txt += text.format(alltaxdiff) txt += ('FIX: adjust _LST or another reform policy parameter ' 'to bracket\n') txt += (' "alltax difference" equals zero and ' 'then interpolate') else: txt += 'NOTE: baseline and reform can be sensibly compared\n' txt += ' because "alltax difference" is essentially zero' return txt def dump_output(self, dump_varset, mtr_inctax, mtr_paytax): """ Extract dump output and return it as Pandas DataFrame. """ if dump_varset is None: varset = Records.USABLE_READ_VARS | Records.CALCULATED_VARS else: varset = dump_varset # create and return dump output DataFrame odf = pd.DataFrame() for varname in varset: vardata = self.calc.array(varname) if varname in Records.INTEGER_VARS: odf[varname] = vardata else: odf[varname] = vardata.round(2) # rounded to nearest cent # specify mtr values in percentage terms if 'mtr_inctax' in varset: odf['mtr_inctax'] = (mtr_inctax * 100).round(2) if 'mtr_paytax' in varset: odf['mtr_paytax'] = (mtr_paytax * 100).round(2) # specify tax calculation year odf['FLPDYR'] = self.tax_year() return odf @staticmethod def growmodel_analysis(input_data, tax_year, baseline, reform, assump, aging_input_data, exact_calculations, writing_output_file=False, output_tables=False, output_graphs=False, output_ceeu=False, dump_varset=None, output_dump=False, output_sqldb=False): """ High-level logic for dynamic analysis using GrowModel class. Parameters ---------- First five parameters are same as the first five parameters of the TaxCalcIO.init method. Last seven parameters are same as the first seven parameters of the TaxCalcIO.analyze method. Returns ------- Nothing """ # pylint: disable=too-many-arguments,too-many-locals progress = 'STARTING ANALYSIS FOR YEAR {}' gdiff_dict = {Policy.JSON_START_YEAR: {}} for year in range(Policy.JSON_START_YEAR, tax_year + 1): print(progress.format(year)) # specify growdiff_growmodel using gdiff_dict growdiff_growmodel = GrowDiff() growdiff_growmodel.update_growdiff(gdiff_dict) gd_dict = TaxCalcIO.annual_analysis( input_data, tax_year, baseline, reform, assump, aging_input_data, exact_calculations, growdiff_growmodel, year, writing_output_file, output_tables, output_graphs, output_ceeu, dump_varset, output_dump, output_sqldb) gdiff_dict[year + 1] = gd_dict @staticmethod def annual_analysis(input_data, tax_year, baseline, reform, assump, aging_input_data, exact_calculations, growdiff_growmodel, year, writing_output_file, output_tables, output_graphs, output_ceeu, dump_varset, output_dump, output_sqldb): """ Conduct static analysis for specifed growdiff_growmodel and year. Parameters ---------- First five parameters are same as the first five parameters of the TaxCalcIO.init method. Last seven parameters are same as the first seven parameters of the TaxCalcIO.analyze method. Returns ------- gd_dict: GrowDiff sub-dictionary for year+1 """ # pylint: disable=too-many-arguments,too-many-locals # instantiate TaxCalcIO object for specified growdiff_growmodel & year tcio = TaxCalcIO(input_data=input_data, tax_year=year, baseline=baseline, reform=reform, assump=assump) tcio.init(input_data=input_data, tax_year=year, baseline=baseline, reform=reform, assump=assump, growdiff_growmodel=growdiff_growmodel, aging_input_data=aging_input_data, exact_calculations=exact_calculations) if year == tax_year: # conduct final tax analysis for year equal to tax_year tcio.analyze(writing_output_file=writing_output_file, output_tables=output_tables, output_graphs=output_graphs, output_ceeu=output_ceeu, dump_varset=dump_varset, output_dump=output_dump, output_sqldb=output_sqldb) gd_dict = {} else: # conduct intermediate tax analysis for year less than tax_year tcio.analyze() # build dict in gdiff_dict key:dict pair for key equal to next year # ... extract tcio results for year needed by GrowModel class # >>>>> add logic here <<<<< # ... use extracted results to advance GrowModel to next year # >>>>> add logic here <<<<< # ... extract next year GrowModel results for next year gdiff_dict # >>>>> add logic here <<<<< gd_dict = {} # remove temporary code after GrowModel working return gd_dict
def init(self, input_data, tax_year, baseline, reform, assump, growdiff_growmodel, aging_input_data, exact_calculations): """ TaxCalcIO class post-constructor method that completes initialization. Parameters ---------- First five are same as the first five of the TaxCalcIO constructor: input_data, tax_year, baseline, reform, assump. growdiff_growmodel: GrowDiff object or None growdiff_growmodel GrowDiff object is used only in the TaxCalcIO.growmodel_analysis method. aging_input_data: boolean whether or not to extrapolate Records data from data year to tax_year. exact_calculations: boolean specifies whether or not exact tax calculations are done without any smoothing of "stair-step" provisions in the tax law. """ # pylint: disable=too-many-arguments,too-many-locals # pylint: disable=too-many-statements,too-many-branches self.errmsg = '' # get policy parameter dictionary from --baseline file basedict = Calculator.read_json_param_objects(baseline, None) # get assumption sub-dictionaries paramdict = Calculator.read_json_param_objects(None, assump) # get policy parameter dictionaries from --reform file(s) policydicts = list() if self.specified_reform: reforms = reform.split('+') for ref in reforms: pdict = Calculator.read_json_param_objects(ref, None) policydicts.append(pdict['policy']) paramdict['policy'] = policydicts[0] # remember parameters for reform documentation self.param_dict = paramdict self.policy_dicts = policydicts # create Behavior object beh = Behavior() try: beh.update_behavior(paramdict['behavior']) except ValueError as valerr_msg: self.errmsg += valerr_msg.__str__() self.behavior_has_any_response = beh.has_any_response() # create gdiff_baseline object gdiff_baseline = GrowDiff() try: gdiff_baseline.update_growdiff(paramdict['growdiff_baseline']) except ValueError as valerr_msg: self.errmsg += valerr_msg.__str__() # create GrowFactors base object that incorporates gdiff_baseline gfactors_base = GrowFactors() gdiff_baseline.apply_to(gfactors_base) # specify gdiff_response object gdiff_response = GrowDiff() try: gdiff_response.update_growdiff(paramdict['growdiff_response']) except ValueError as valerr_msg: self.errmsg += valerr_msg.__str__() # create GrowFactors ref object that has all gdiff objects applied gfactors_ref = GrowFactors() gdiff_baseline.apply_to(gfactors_ref) gdiff_response.apply_to(gfactors_ref) if growdiff_growmodel: growdiff_growmodel.apply_to(gfactors_ref) # create Policy objects: # ... the baseline Policy object base = Policy(gfactors=gfactors_base) try: base.implement_reform(basedict['policy'], print_warnings=False, raise_errors=False) self.errmsg += base.parameter_errors except ValueError as valerr_msg: self.errmsg += valerr_msg.__str__() # ... the reform Policy object if self.specified_reform: pol = Policy(gfactors=gfactors_ref) for poldict in policydicts: try: pol.implement_reform(poldict, print_warnings=False, raise_errors=False) self.errmsg += pol.parameter_errors except ValueError as valerr_msg: self.errmsg += valerr_msg.__str__() else: pol = Policy(gfactors=gfactors_base) # create Consumption object con = Consumption() try: con.update_consumption(paramdict['consumption']) except ValueError as valerr_msg: self.errmsg += valerr_msg.__str__() # create GrowModel object self.growmodel = GrowModel() try: self.growmodel.update_growmodel(paramdict['growmodel']) except ValueError as valerr_msg: self.errmsg += valerr_msg.__str__() # check for valid tax_year value if tax_year < pol.start_year: msg = 'tax_year {} less than policy.start_year {}' msg = msg.format(tax_year, pol.start_year) self.errmsg += 'ERROR: {}\n'.format(msg) if tax_year > pol.end_year: msg = 'tax_year {} greater than policy.end_year {}' msg = msg.format(tax_year, pol.end_year) self.errmsg += 'ERROR: {}\n'.format(msg) # any errors imply cannot proceed with calculations if self.errmsg: return # set policy to tax_year pol.set_year(tax_year) base.set_year(tax_year) # read input file contents into Records objects if aging_input_data: if self.cps_input_data: recs = Records.cps_constructor( gfactors=gfactors_ref, exact_calculations=exact_calculations) recs_base = Records.cps_constructor( gfactors=gfactors_base, exact_calculations=exact_calculations) else: # if not cps_input_data but aging_input_data recs = Records(data=input_data, gfactors=gfactors_ref, exact_calculations=exact_calculations) recs_base = Records(data=input_data, gfactors=gfactors_base, exact_calculations=exact_calculations) else: # input_data are raw data that are not being aged recs = Records(data=input_data, gfactors=None, exact_calculations=exact_calculations, weights=None, adjust_ratios=None, start_year=tax_year) recs_base = copy.deepcopy(recs) if tax_year < recs.data_year: msg = 'tax_year {} less than records.data_year {}' msg = msg.format(tax_year, recs.data_year) self.errmsg += 'ERROR: {}\n'.format(msg) # create Calculator objects self.calc = Calculator(policy=pol, records=recs, verbose=True, consumption=con, behavior=beh, sync_years=aging_input_data) self.calc_base = Calculator(policy=base, records=recs_base, verbose=False, consumption=con, sync_years=aging_input_data)