def run_nth_year_tax_calc_model(year_n, start_year, use_puf_not_cps, use_full_sample, user_mods, return_dict=True): """ The run_nth_year_tax_calc_model function assumes user_mods is a dictionary returned by the Calculator.read_json_param_objects() function. Setting use_puf_not_cps=True implies use puf.csv input file; otherwise, use cps.csv input file. Setting use_full_sample=False implies use sub-sample of input file; otherwsie, use the complete sample. """ # pylint: disable=too-many-arguments,too-many-locals start_time = time.time() # create calc1 and calc2 calculated for year_n and mask check_years_return_first_year(year_n, start_year, use_puf_not_cps) (calc1, calc2, mask) = calculate(year_n, start_year, use_puf_not_cps, use_full_sample, user_mods, behavior_allowed=True) # extract raw results from calc1 and calc2 rawres1 = results(calc1.records) rawres2 = results(calc2.records) # seed random number generator with a seed value based on user_mods seed = random_seed(user_mods) print('seed={}'.format(seed)) np.random.seed(seed) # pylint: disable=no-member # construct TaxBrain summary results from raw results summ = summary(rawres1, rawres2, mask) def append_year(pdf): """ append_year embedded function revises all column names in pdf """ pdf.columns = [str(col) + '_{}'.format(year_n) for col in pdf.columns] return pdf # optionally return non-JSON-like results if not return_dict: res = dict() for tbl in summ: res[tbl] = append_year(summ[tbl]) elapsed_time = time.time() - start_time print('elapsed time for this run: {:.1f}'.format(elapsed_time)) return res # optionally construct JSON-like results dictionaries for year n dec_row_names_n = [x + '_' + str(year_n) for x in DECILE_ROW_NAMES] bin_row_names_n = [x + '_' + str(year_n) for x in WEBBIN_ROW_NAMES] agg_row_names_n = [x + '_' + str(year_n) for x in AGG_ROW_NAMES] dist_column_types = [float] * len(DIST_TABLE_LABELS) diff_column_types = [float] * len(DIFF_TABLE_LABELS) info = dict() for tbl in summ: info[tbl] = {'row_names': [], 'col_types': []} if 'dec' in tbl: info[tbl]['row_names'] = dec_row_names_n elif 'bin' in tbl: info[tbl]['row_names'] = bin_row_names_n else: info[tbl]['row_names'] = agg_row_names_n if 'dist' in tbl: info[tbl]['col_types'] = dist_column_types elif 'diff' in tbl: info[tbl]['col_types'] = diff_column_types res = dict() for tbl in summ: if 'aggr' in tbl: res_table = create_dict_table(summ[tbl], row_names=info[tbl]['row_names']) res[tbl] = dict((k, v[0]) for k, v in res_table.items()) else: res[tbl] = create_dict_table(summ[tbl], row_names=info[tbl]['row_names'], column_types=info[tbl]['col_types']) elapsed_time = time.time() - start_time print('elapsed time for this run: {:.1f}'.format(elapsed_time)) return res
def run_nth_year_taxcalc_model(year_n, start_year, use_puf_not_cps, use_full_sample, user_mods, return_dict=True): """ The run_nth_year_taxcalc_model function assumes user_mods is a dictionary returned by the Calculator.read_json_param_objects() function. Setting use_puf_not_cps=True implies use puf.csv input file; otherwise, use cps.csv input file. Setting use_full_sample=False implies use sub-sample of input file; otherwsie, use the complete sample. """ # pylint: disable=too-many-arguments,too-many-locals,too-many-branches start_time = time.time() # create calc1 and calc2 calculated for year_n check_years_return_first_year(year_n, start_year, use_puf_not_cps) calc1, calc2 = calculate(year_n, start_year, use_puf_not_cps, use_full_sample, user_mods, behavior_allowed=True) # extract unfuzzed raw results from calc1 and calc2 dv1 = calc1.distribution_table_dataframe() dv2 = calc2.distribution_table_dataframe() # delete calc1 and calc2 now that raw results have been extracted del calc1 del calc2 # construct TaxBrain summary results from raw results sres = dict() fuzzing = use_puf_not_cps if fuzzing: # seed random number generator with a seed value based on user_mods # (reform-specific seed is used to choose whose results are fuzzed) seed = random_seed(user_mods) print('fuzzing_seed={}'.format(seed)) np.random.seed(seed) # pylint: disable=no-member # make bool array marking which filing units are affected by the reform reform_affected = np.logical_not( # pylint: disable=no-member np.isclose(dv1['combined'], dv2['combined'], atol=0.01, rtol=0.0)) agg1, agg2 = fuzzed(dv1, dv2, reform_affected, 'aggr') sres = summary_aggregate(sres, agg1, agg2) del agg1 del agg2 dv1b, dv2b = fuzzed(dv1, dv2, reform_affected, 'xbin') sres = summary_dist_xbin(sres, dv1b, dv2b) sres = summary_diff_xbin(sres, dv1b, dv2b) del dv1b del dv2b dv1d, dv2d = fuzzed(dv1, dv2, reform_affected, 'xdec') sres = summary_dist_xdec(sres, dv1d, dv2d) sres = summary_diff_xdec(sres, dv1d, dv2d) del dv1d del dv2d del reform_affected else: sres = summary_aggregate(sres, dv1, dv2) sres = summary_dist_xbin(sres, dv1, dv2) sres = summary_diff_xbin(sres, dv1, dv2) sres = summary_dist_xdec(sres, dv1, dv2) sres = summary_diff_xdec(sres, dv1, dv2) # nested function used below def append_year(pdf): """ append_year embedded function revises all column names in pdf """ pdf.columns = [str(col) + '_{}'.format(year_n) for col in pdf.columns] return pdf # optionally return non-JSON-like results if not return_dict: res = dict() for tbl in sres: res[tbl] = append_year(sres[tbl]) elapsed_time = time.time() - start_time print('elapsed time for this run: {:.1f}'.format(elapsed_time)) return res # optionally construct JSON-like results dictionaries for year n dec_rownames = list(sres['diff_comb_xdec'].index.values) dec_row_names_n = [x + '_' + str(year_n) for x in dec_rownames] bin_rownames = list(sres['diff_comb_xbin'].index.values) bin_row_names_n = [x + '_' + str(year_n) for x in bin_rownames] agg_row_names_n = [x + '_' + str(year_n) for x in AGG_ROW_NAMES] dist_column_types = [float] * len(DIST_TABLE_LABELS) diff_column_types = [float] * len(DIFF_TABLE_LABELS) info = dict() for tbl in sres: info[tbl] = {'row_names': [], 'col_types': []} if 'dec' in tbl: info[tbl]['row_names'] = dec_row_names_n elif 'bin' in tbl: info[tbl]['row_names'] = bin_row_names_n else: info[tbl]['row_names'] = agg_row_names_n if 'dist' in tbl: info[tbl]['col_types'] = dist_column_types elif 'diff' in tbl: info[tbl]['col_types'] = diff_column_types res = dict() for tbl in sres: if 'aggr' in tbl: res_table = create_dict_table(sres[tbl], row_names=info[tbl]['row_names']) res[tbl] = dict((k, v[0]) for k, v in res_table.items()) else: res[tbl] = create_dict_table(sres[tbl], row_names=info[tbl]['row_names'], column_types=info[tbl]['col_types']) elapsed_time = time.time() - start_time print('elapsed time for this run: {:.1f}'.format(elapsed_time)) return res