コード例 #1
0
 def __init__(self, growfactors_filename=FILENAME):
     # read grow factors from specified growfactors_filename
     gfdf = pd.DataFrame()
     if isinstance(growfactors_filename, str):
         full_filename = os.path.join(GrowFactors.FILE_PATH,
                                      growfactors_filename)
         if os.path.isfile(full_filename):
             gfdf = pd.read_csv(full_filename, index_col='YEAR')
         else:  # find file in conda package
             gfdf = read_egg_csv(os.path.basename(growfactors_filename),
                                 index_col='YEAR')  # pragma: no cover
     else:
         raise ValueError('growfactors_filename is not a string')
     assert isinstance(gfdf, pd.DataFrame)
     # check validity of gfdf column names
     gfdf_names = set(list(gfdf))
     if gfdf_names != GrowFactors.VALID_NAMES:
         msg = ('missing names are: {} and invalid names are: {}')
         missing = GrowFactors.VALID_NAMES - gfdf_names
         invalid = gfdf_names - GrowFactors.VALID_NAMES
         raise ValueError(msg.format(missing, invalid))
     # determine first_year and last_year from gfdf
     self._first_year = min(gfdf.index)
     self._last_year = max(gfdf.index)
     # set gfdf as attribute of class
     self.gfdf = pd.DataFrame()
     setattr(self, 'gfdf',
             gfdf.astype(np.float64))  # pylint: disable=no-member
     del gfdf
     # specify factors as being unused (that is, not yet accessed)
     self.used = False
コード例 #2
0
ファイル: records.py プロジェクト: codykallen/Tax-Calculator
 def _read_ratios(self, ratios):
     """
     Read Records adjustment ratios from file or
     use specified transposed/no-index DataFrame as ratios or
     create empty DataFrame if None
     """
     if ratios is None:
         setattr(self, 'ADJ', pd.DataFrame({'nothing': []}))
         return
     if isinstance(ratios, pd.DataFrame):
         assert 'INT2013' in ratios.columns  # check for transposed
         assert ratios.index.name is None  # check for no-index
         ADJ = ratios
     elif isinstance(ratios, str):
         ratios_path = os.path.join(Records.CUR_PATH, ratios)
         if os.path.isfile(ratios_path):
             ADJ = pd.read_csv(ratios_path,
                               index_col=0)
         else:
             # cannot call read_egg_ function in unit tests
             ADJ = read_egg_csv(os.path.basename(ratios_path),
                                index_col=0)  # pragma: no cover
         ADJ = ADJ.transpose()
     else:
         msg = 'ratios is neither None nor a Pandas DataFrame nor a string'
         raise ValueError(msg)
     assert isinstance(ADJ, pd.DataFrame)
     if ADJ.index.name != 'agi_bin':
         ADJ.index.name = 'agi_bin'
     self.ADJ = pd.DataFrame()
     setattr(self, 'ADJ', ADJ.astype(np.float32))
     del ADJ
コード例 #3
0
ファイル: records.py プロジェクト: codykallen/Tax-Calculator
 def _read_weights(self, weights):
     """
     Read Records weights from file or
     use specified DataFrame as data or
     create empty DataFrame if None.
     Assumes weights are integers equal to 100 times the real weight.
     """
     if weights is None:
         setattr(self, 'WT', pd.DataFrame({'nothing': []}))
         return
     if isinstance(weights, pd.DataFrame):
         WT = weights
     elif isinstance(weights, str):
         weights_path = os.path.join(Records.CUR_PATH, weights)
         if os.path.isfile(weights_path):
             WT = pd.read_csv(weights_path)
         else:
             # cannot call read_egg_ function in unit tests
             WT = read_egg_csv(
                 os.path.basename(weights_path))  # pragma: no cover
     else:
         msg = 'weights is not None or a string or a Pandas DataFrame'
         raise ValueError(msg)
     assert isinstance(WT, pd.DataFrame)
     setattr(self, 'WT', WT.astype(np.int32))
     del WT
コード例 #4
0
 def read_cps_data():
     """
     Return data in cps.csv.gz as a Pandas DataFrame.
     """
     fname = os.path.join(Records.CODE_PATH, 'cps.csv.gz')
     if os.path.isfile(fname):
         cpsdf = pd.read_csv(fname)
     else:  # find file in conda package
         cpsdf = read_egg_csv(fname)  # pragma: no cover
     return cpsdf
コード例 #5
0
ファイル: records.py プロジェクト: codykallen/Tax-Calculator
 def read_cps_data():
     """
     Return data in cps.csv.gz as a Pandas DataFrame.
     """
     fname = os.path.join(Records.CUR_PATH, 'cps.csv.gz')
     if os.path.isfile(fname):
         cpsdf = pd.read_csv(fname)
     else:
         # cannot call read_egg_ function in unit tests
         cpsdf = read_egg_csv(fname)  # pragma: no cover
     return cpsdf
コード例 #6
0
 def _read_weights(self, weights):
     """
     Read Records weights from file or
     use specified DataFrame as data or
     create empty DataFrame if None.
     """
     if weights is None:
         WT = pd.DataFrame({'nothing': []})
         setattr(self, 'WT', WT)
         return
     if isinstance(weights, pd.DataFrame):
         WT = weights
     elif isinstance(weights, six.string_types):
         if os.path.isfile(weights):
             # pylint: disable=redefined-variable-type
             # (above because pylint mistakenly thinks WT not a DataFrame)
             WT = pd.read_csv(weights)
         else:
             WT = read_egg_csv(Records.WEIGHTS_FILENAME)
     else:
         msg = 'weights is not None or a string or a Pandas DataFrame'
         raise ValueError(msg)
     assert isinstance(WT, pd.DataFrame)
     setattr(self, 'WT', WT)
コード例 #7
0
ファイル: records.py プロジェクト: ClarePan/Tax-Calculator
 def _read_benefits(self, benefits):
     """
     Read Records extrapolated benefits from a file or uses a specified
     DataFrame or creates an empty DataFrame if None. Should only be
     used with the cps.csv file
     """
     if benefits is None:
         setattr(self, 'BEN', pd.DataFrame({'Nothing': []}))
         return
     if isinstance(benefits, pd.DataFrame):
         BEN_partial = benefits
     elif isinstance(benefits, six.string_types):
         benefits_path = os.path.join(Records.CUR_PATH, benefits)
         if os.path.isfile(benefits_path):
             BEN_partial = pd.read_csv(benefits_path)
         else:
             # cannot call read_egg_ function in unit tests
             b_path = os.path.basename(benefits_path)  # pragma: no cover
             BEN_partial = read_egg_csv(b_path)  # pragma: no cover
     else:
         msg = 'benefits is not None or a string or a Pandas DataFrame'
         raise ValueError(msg)
     assert isinstance(BEN_partial, pd.DataFrame)
     # expand benefits DataFrame to include those who don't receive benefits
     recid_df = pd.DataFrame({'RECID': self.RECID})
     # merge benefits with DataFrame of RECID
     full_df = recid_df.merge(BEN_partial, on='RECID', how='left')
     # fill missing values
     full_df.fillna(0, inplace=True)
     assert len(recid_df.index) == len(full_df.index)
     self.BEN = pd.DataFrame()
     setattr(self, 'BEN', full_df.astype(np.float32))
     # delete intermediate DataFrame objects
     del full_df
     del recid_df
     del BEN_partial
コード例 #8
0
def test_read_egg_csv():
    with pytest.raises(ValueError):
        read_egg_csv('bad_filename')
コード例 #9
0
 def _read_data(self, data, exact_calcs):
     """
     Read Records data from file or use specified DataFrame as data.
     Specifies exact array depending on boolean value of exact_calcs.
     """
     # pylint: disable=too-many-statements,too-many-branches
     if Records.INTEGER_VARS == set():
         Records.read_var_info()
     # read specified data
     if isinstance(data, pd.DataFrame):
         taxdf = data
     elif isinstance(data, str):
         if os.path.isfile(data):
             taxdf = pd.read_csv(data)
         else:
             # cannot call read_egg_ function in unit tests
             taxdf = read_egg_csv(data)  # pragma: no cover
     else:
         msg = 'data is neither a string nor a Pandas DataFrame'
         raise ValueError(msg)
     self.__dim = len(taxdf.index)
     self.__index = taxdf.index
     # create class variables using taxdf column names
     READ_VARS = set()
     self.IGNORED_VARS = set()
     for varname in list(taxdf.columns.values):
         if varname in Records.USABLE_READ_VARS:
             READ_VARS.add(varname)
             if varname in Records.INTEGER_READ_VARS:
                 setattr(self, varname,
                         taxdf[varname].astype(np.int32).values)
             else:
                 setattr(self, varname,
                         taxdf[varname].astype(np.float64).values)
         else:
             self.IGNORED_VARS.add(varname)
     # check that MUST_READ_VARS are all present in taxdf
     if not Records.MUST_READ_VARS.issubset(READ_VARS):
         msg = 'Records data missing one or more MUST_READ_VARS'
         raise ValueError(msg)
     # delete intermediate taxdf object
     del taxdf
     # create other class variables that are set to all zeros
     UNREAD_VARS = Records.USABLE_READ_VARS - READ_VARS
     ZEROED_VARS = Records.CALCULATED_VARS | UNREAD_VARS
     for varname in ZEROED_VARS:
         if varname in Records.INTEGER_VARS:
             setattr(self, varname,
                     np.zeros(self.array_length, dtype=np.int32))
         else:
             setattr(self, varname,
                     np.zeros(self.array_length, dtype=np.float64))
     # check for valid MARS values
     if not np.all(np.logical_and(np.greater_equal(self.MARS, 1),
                                  np.less_equal(self.MARS, 5))):
         raise ValueError('not all MARS values in [1,5] range')
     # create variables derived from MARS, which is in MUST_READ_VARS
     self.num[:] = np.where(self.MARS == 2, 2, 1)
     self.sep[:] = np.where(self.MARS == 3, 2, 1)
     # check for valid EIC values
     if not np.all(np.logical_and(np.greater_equal(self.EIC, 0),
                                  np.less_equal(self.EIC, 3))):
         raise ValueError('not all EIC values in [0,3] range')
     # specify value of exact array
     self.exact[:] = np.where(exact_calcs is True, 1, 0)
     # delete intermediate variables
     del READ_VARS
     del UNREAD_VARS
     del ZEROED_VARS
コード例 #10
0
def test_read_egg_csv():
    with pytest.raises(ValueError):
        read_egg_csv('bad_filename')
コード例 #11
0
def calculate(year_n, start_year, use_puf_not_cps, use_full_sample, user_mods,
              behavior_allowed):
    """
    The calculate function assumes the specified user_mods is a dictionary
      returned by the Calculator.read_json_param_objects() function.
    The function returns (calc1, calc2) where
      calc1 is pre-reform Calculator object calculated for year_n, and
      calc2 is post-reform Calculator object calculated for year_n.
    Set behavior_allowed to False when generating static results or
      set behavior_allowed to True when generating dynamic results.
    """
    # pylint: disable=too-many-arguments,too-many-locals
    # pylint: disable=too-many-branches,too-many-statements

    check_user_mods(user_mods)

    # specify Consumption instance
    consump = Consumption()
    consump_assumptions = user_mods['consumption']
    consump.update_consumption(consump_assumptions)

    # specify growdiff_baseline and growdiff_response
    growdiff_baseline = GrowDiff()
    growdiff_response = GrowDiff()
    growdiff_base_assumps = user_mods['growdiff_baseline']
    growdiff_resp_assumps = user_mods['growdiff_response']
    growdiff_baseline.update_growdiff(growdiff_base_assumps)
    growdiff_response.update_growdiff(growdiff_resp_assumps)

    # create pre-reform and post-reform GrowFactors instances
    growfactors_pre = GrowFactors()
    growdiff_baseline.apply_to(growfactors_pre)
    growfactors_post = GrowFactors()
    growdiff_baseline.apply_to(growfactors_post)
    growdiff_response.apply_to(growfactors_post)

    # create sample pd.DataFrame from specified input file and sampling scheme
    stime = time.time()
    tbi_path = os.path.abspath(os.path.dirname(__file__))
    if use_puf_not_cps:
        # first try TaxBrain deployment path
        input_path = 'puf.csv.gz'
        if not os.path.isfile(input_path):
            # otherwise try local Tax-Calculator deployment path
            input_path = os.path.join(tbi_path, '..', '..', 'puf.csv')
        sampling_frac = 0.05
        sampling_seed = 180
    else:  # if using cps input not puf input
        # first try Tax-Calculator code path
        input_path = os.path.join(tbi_path, '..', 'cps.csv.gz')
        if not os.path.isfile(input_path):
            # otherwise read from taxcalc package "egg"
            input_path = None  # pragma: no cover
            full_sample = read_egg_csv('cps.csv.gz')  # pragma: no cover
        sampling_frac = 0.03
        sampling_seed = 180
    if input_path:
        full_sample = pd.read_csv(input_path)
    if use_full_sample:
        sample = full_sample
    else:
        sample = full_sample.sample(  # pylint: disable=no-member
            frac=sampling_frac,
            random_state=sampling_seed)
    if use_puf_not_cps:
        print('puf-read-time= {:.1f}'.format(time.time() - stime))
    else:
        print('cps-read-time= {:.1f}'.format(time.time() - stime))

    # create pre-reform Calculator instance
    if use_puf_not_cps:
        recs1 = Records(data=sample, gfactors=growfactors_pre)
    else:
        recs1 = Records.cps_constructor(data=sample, gfactors=growfactors_pre)
    policy1 = Policy(gfactors=growfactors_pre)
    calc1 = Calculator(policy=policy1, records=recs1, consumption=consump)
    while calc1.current_year < start_year:
        calc1.increment_year()
    calc1.calc_all()
    assert calc1.current_year == start_year

    # specify Behavior instance
    behv = Behavior()
    behavior_assumps = user_mods['behavior']
    behv.update_behavior(behavior_assumps)

    # always prevent both behavioral response and growdiff response
    if behv.has_any_response() and growdiff_response.has_any_response():
        msg = 'BOTH behavior AND growdiff_response HAVE RESPONSE'
        raise ValueError(msg)

    # optionally prevent behavioral response
    if behv.has_any_response() and not behavior_allowed:
        msg = 'A behavior RESPONSE IS NOT ALLOWED'
        raise ValueError(msg)

    # create post-reform Calculator instance
    if use_puf_not_cps:
        recs2 = Records(data=sample, gfactors=growfactors_post)
    else:
        recs2 = Records.cps_constructor(data=sample, gfactors=growfactors_post)
    policy2 = Policy(gfactors=growfactors_post)
    policy_reform = user_mods['policy']
    policy2.implement_reform(policy_reform)
    calc2 = Calculator(policy=policy2,
                       records=recs2,
                       consumption=consump,
                       behavior=behv)
    while calc2.current_year < start_year:
        calc2.increment_year()
    assert calc2.current_year == start_year

    # delete objects now embedded in calc1 and calc2
    del sample
    del full_sample
    del consump
    del growdiff_baseline
    del growdiff_response
    del growfactors_pre
    del growfactors_post
    del behv
    del recs1
    del recs2
    del policy1
    del policy2

    # increment Calculator objects for year_n years and calculate
    for _ in range(0, year_n):
        calc1.increment_year()
        calc2.increment_year()
    calc1.calc_all()
    if calc2.behavior_has_response():
        calc2 = Behavior.response(calc1, calc2)
    else:
        calc2.calc_all()

    # return calculated Calculator objects
    return (calc1, calc2)
コード例 #12
0
def calculate(year_n, start_year, use_puf_not_cps, use_full_sample, user_mods,
              behavior_allowed):
    """
    The calculate function assumes the specified user_mods is a dictionary
      returned by the Calculator.read_json_param_objects() function.
    The function returns (calc1, calc2, mask) where
      calc1 is pre-reform Calculator object calculated for year_n,
      calc2 is post-reform Calculator object calculated for year_n, and
      mask is boolean array marking records with reform-induced iitax diffs
    Set behavior_allowed to False when generating static results or
      set behavior_allowed to True when generating dynamic results.
    """
    # pylint: disable=too-many-arguments,too-many-locals
    # pylint: disable=too-many-branches,too-many-statements

    check_user_mods(user_mods)

    # specify Consumption instance
    consump = Consumption()
    consump_assumptions = user_mods['consumption']
    consump.update_consumption(consump_assumptions)

    # specify growdiff_baseline and growdiff_response
    growdiff_baseline = Growdiff()
    growdiff_response = Growdiff()
    growdiff_base_assumps = user_mods['growdiff_baseline']
    growdiff_resp_assumps = user_mods['growdiff_response']
    growdiff_baseline.update_growdiff(growdiff_base_assumps)
    growdiff_response.update_growdiff(growdiff_resp_assumps)

    # create pre-reform and post-reform Growfactors instances
    growfactors_pre = Growfactors()
    growdiff_baseline.apply_to(growfactors_pre)
    growfactors_post = Growfactors()
    growdiff_baseline.apply_to(growfactors_post)
    growdiff_response.apply_to(growfactors_post)

    # create sample pd.DataFrame from specified input file and sampling scheme
    stime = time.time()
    tbi_path = os.path.abspath(os.path.dirname(__file__))
    if use_puf_not_cps:
        # first try TaxBrain deployment path
        input_path = 'puf.csv.gz'
        if not os.path.isfile(input_path):
            # otherwise try local Tax-Calculator deployment path
            input_path = os.path.join(tbi_path, '..', '..', 'puf.csv')
        sampling_frac = 0.05
        sampling_seed = 180
    else:  # if using cps input not puf input
        # first try Tax-Calculator code path
        input_path = os.path.join(tbi_path, '..', 'cps.csv.gz')
        if not os.path.isfile(input_path):
            # otherwise read from taxcalc package "egg"
            input_path = None  # pragma: no cover
            full_sample = read_egg_csv('cps.csv.gz')  # pragma: no cover
        sampling_frac = 0.03
        sampling_seed = 180
    if input_path:
        full_sample = pd.read_csv(input_path)
    if use_full_sample:
        sample = full_sample
    else:
        sample = full_sample.sample(  # pylint: disable=no-member
            frac=sampling_frac,
            random_state=sampling_seed)
    if use_puf_not_cps:
        print('puf-read-time= {:.1f}'.format(time.time() - stime))
    else:
        print('cps-read-time= {:.1f}'.format(time.time() - stime))

    # create pre-reform Calculator instance
    if use_puf_not_cps:
        recs1 = Records(data=copy.deepcopy(sample), gfactors=growfactors_pre)
    else:
        recs1 = Records.cps_constructor(data=copy.deepcopy(sample),
                                        gfactors=growfactors_pre)
    policy1 = Policy(gfactors=growfactors_pre)
    calc1 = Calculator(policy=policy1, records=recs1, consumption=consump)
    while calc1.current_year < start_year:
        calc1.increment_year()
    calc1.calc_all()
    assert calc1.current_year == start_year

    # compute mask array
    res1 = calc1.dataframe(DIST_VARIABLES)
    if use_puf_not_cps:
        # create pre-reform Calculator instance with extra income
        recs1p = Records(data=copy.deepcopy(sample), gfactors=growfactors_pre)
        # add one dollar to the income of each filing unit to determine
        # which filing units undergo a resulting change in tax liability
        recs1p.e00200 += 1.0  # pylint: disable=no-member
        recs1p.e00200p += 1.0  # pylint: disable=no-member
        policy1p = Policy(gfactors=growfactors_pre)
        # create Calculator with recs1p and calculate for start_year
        calc1p = Calculator(policy=policy1p,
                            records=recs1p,
                            consumption=consump)
        while calc1p.current_year < start_year:
            calc1p.increment_year()
        calc1p.calc_all()
        assert calc1p.current_year == start_year
        # compute mask showing which of the calc1 and calc1p results differ;
        # mask is true if a filing unit's income tax liability changed after
        # a dollar was added to the filing unit's wage and salary income
        res1p = calc1p.dataframe(DIST_VARIABLES)
        mask = np.logical_not(  # pylint: disable=no-member
            np.isclose(res1.iitax, res1p.iitax, atol=0.001, rtol=0.0))
        assert np.any(mask)
    else:  # if use_cps_not_cps is False
        # indicate that no fuzzing of reform results is required
        mask = np.zeros(res1.shape[0], dtype=np.int8)

    # specify Behavior instance
    behv = Behavior()
    behavior_assumps = user_mods['behavior']
    behv.update_behavior(behavior_assumps)

    # always prevent both behavioral response and growdiff response
    if behv.has_any_response() and growdiff_response.has_any_response():
        msg = 'BOTH behavior AND growdiff_response HAVE RESPONSE'
        raise ValueError(msg)

    # optionally prevent behavioral response
    if behv.has_any_response() and not behavior_allowed:
        msg = 'A behavior RESPONSE IS NOT ALLOWED'
        raise ValueError(msg)

    # create post-reform Calculator instance
    if use_puf_not_cps:
        recs2 = Records(data=copy.deepcopy(sample), gfactors=growfactors_post)
    else:
        recs2 = Records.cps_constructor(data=copy.deepcopy(sample),
                                        gfactors=growfactors_post)
    policy2 = Policy(gfactors=growfactors_post)
    policy_reform = user_mods['policy']
    policy2.implement_reform(policy_reform)
    calc2 = Calculator(policy=policy2,
                       records=recs2,
                       consumption=consump,
                       behavior=behv)
    while calc2.current_year < start_year:
        calc2.increment_year()
    calc2.calc_all()
    assert calc2.current_year == start_year

    # increment Calculator objects for year_n years and calculate
    for _ in range(0, year_n):
        calc1.increment_year()
        calc2.increment_year()
    calc1.calc_all()
    if calc2.behavior_has_response():
        calc2 = Behavior.response(calc1, calc2)
    else:
        calc2.calc_all()

    # return calculated Calculator objects and mask
    return (calc1, calc2, mask)
コード例 #13
0
ファイル: records.py プロジェクト: codykallen/Tax-Calculator
 def _read_data(self, data, exact_calcs):
     """
     Read Records data from file or use specified DataFrame as data.
     Specifies exact array depending on boolean value of exact_calcs.
     """
     # pylint: disable=too-many-statements,too-many-branches
     if Records.INTEGER_VARS == set():
         Records.read_var_info()
     # read specified data
     if isinstance(data, pd.DataFrame):
         taxdf = data
     elif isinstance(data, str):
         if os.path.isfile(data):
             taxdf = pd.read_csv(data)
         else:
             # cannot call read_egg_ function in unit tests
             taxdf = read_egg_csv(data)  # pragma: no cover
     else:
         msg = 'data is neither a string nor a Pandas DataFrame'
         raise ValueError(msg)
     self.__dim = len(taxdf.index)
     self.__index = taxdf.index
     # create class variables using taxdf column names
     READ_VARS = set()
     self.IGNORED_VARS = set()
     for varname in list(taxdf.columns.values):
         if varname in Records.USABLE_READ_VARS:
             READ_VARS.add(varname)
             if varname in Records.INTEGER_READ_VARS:
                 setattr(self, varname,
                         taxdf[varname].astype(np.int32).values)
             else:
                 setattr(self, varname,
                         taxdf[varname].astype(np.float64).values)
         else:
             self.IGNORED_VARS.add(varname)
     # check that MUST_READ_VARS are all present in taxdf
     if not Records.MUST_READ_VARS.issubset(READ_VARS):
         msg = 'Records data missing one or more MUST_READ_VARS'
         raise ValueError(msg)
     # delete intermediate taxdf object
     del taxdf
     # create other class variables that are set to all zeros
     UNREAD_VARS = Records.USABLE_READ_VARS - READ_VARS
     ZEROED_VARS = Records.CALCULATED_VARS | UNREAD_VARS
     for varname in ZEROED_VARS:
         if varname in Records.INTEGER_VARS:
             setattr(self, varname,
                     np.zeros(self.array_length, dtype=np.int32))
         else:
             setattr(self, varname,
                     np.zeros(self.array_length, dtype=np.float64))
     # check for valid MARS values
     if not np.all(np.logical_and(np.greater_equal(self.MARS, 1),
                                  np.less_equal(self.MARS, 5))):
         raise ValueError('not all MARS values in [1,5] range')
     # create variables derived from MARS, which is in MUST_READ_VARS
     self.num[:] = np.where(self.MARS == 2, 2, 1)
     self.sep[:] = np.where(self.MARS == 3, 2, 1)
     # check for valid EIC values
     if not np.all(np.logical_and(np.greater_equal(self.EIC, 0),
                                  np.less_equal(self.EIC, 3))):
         raise ValueError('not all EIC values in [0,3] range')
     # specify value of exact array
     self.exact[:] = np.where(exact_calcs is True, 1, 0)
     # delete intermediate variables
     del READ_VARS
     del UNREAD_VARS
     del ZEROED_VARS