コード例 #1
0
def test_validity_of_name_lists():
    assert len(DIST_TABLE_COLUMNS) == len(DIST_TABLE_LABELS)
    Records.read_var_info()
    assert set(DIST_VARIABLES).issubset(Records.CALCULATED_VARS | {'s006'})
    extra_vars_set = set(
        ['num_returns_StandardDed', 'num_returns_ItemDed', 'num_returns_AMT'])
    assert (set(DIST_TABLE_COLUMNS) - set(DIST_VARIABLES)) == extra_vars_set
コード例 #2
0
def test_calc_and_used_vars(tests_path):
    """
    Runs two kinds of tests on variables used in the calcfunctions.py file:

    (1) Checks that each var in Records.CALCULATED_VARS is actually calculated

    If test (1) fails, a variable in Records.CALCULATED_VARS was not
    calculated in any function in the calcfunctions.py file.  With the
    exception of a few variables listed in this test, all
    Records.CALCULATED_VARS must be calculated in the calcfunctions.py file.

    (2) Check that each variable that is calculated in a function and
    returned by that function is an argument of that function.
    """
    # pylint: disable=too-many-locals
    funcpath = os.path.join(tests_path, '..', 'calcfunctions.py')
    gfd = GetFuncDefs()
    fnames, fargs, cvars, rvars = gfd.visit(ast.parse(open(funcpath).read()))
    # Test (1):
    # .. create set of vars that are actually calculated in calcfunctions.py
    all_cvars = set()
    for fname in fnames:
        if fname == 'BenefitSurtax':
            continue  # because BenefitSurtax is not really a function
        all_cvars.update(set(cvars[fname]))
    # .. add to all_cvars set variables calculated in Records class
    all_cvars.update(set(['num', 'sep', 'exact']))
    # .. add to all_cvars set variables calculated elsewhere
    all_cvars.update(set(['mtr_paytax', 'mtr_inctax']))
    all_cvars.update(set(['benefit_cost_total', 'benefit_value_total']))
    # .. check that each var in Records.CALCULATED_VARS is in the all_cvars set
    Records.read_var_info()
    found_error1 = False
    if not Records.CALCULATED_VARS <= all_cvars:
        msg1 = ('all Records.CALCULATED_VARS not calculated '
                'in calcfunctions.py\n')
        for var in Records.CALCULATED_VARS - all_cvars:
            found_error1 = True
            msg1 += 'VAR NOT CALCULATED: {}\n'.format(var)
    # Test (2):
    faux_functions = ['EITCamount', 'ComputeBenefit', 'BenefitPrograms',
                      'BenefitSurtax', 'BenefitLimitation']
    found_error2 = False
    msg2 = 'calculated & returned variables are not function arguments\n'
    for fname in fnames:
        if fname in faux_functions:
            continue  # because fname is not a genuine function
        crvars_set = set(cvars[fname]) & set(rvars[fname])
        if not crvars_set <= set(fargs[fname]):
            found_error2 = True
            for var in crvars_set - set(fargs[fname]):
                msg2 += 'FUNCTION,VARIABLE: {} {}\n'.format(fname, var)
    # Report errors for the two tests:
    if found_error1 and found_error2:
        raise ValueError('{}\n{}'.format(msg1, msg2))
    if found_error1:
        raise ValueError(msg1)
    if found_error2:
        raise ValueError(msg2)
コード例 #3
0
def test_validity_of_name_lists():
    assert len(DIST_TABLE_COLUMNS) == len(DIST_TABLE_LABELS)
    Records.read_var_info()
    assert set(DIST_VARIABLES).issubset(Records.CALCULATED_VARS | {'s006'})
    extra_vars_set = set(['num_returns_StandardDed',
                          'num_returns_ItemDed',
                          'num_returns_AMT'])
    assert (set(DIST_TABLE_COLUMNS) - set(DIST_VARIABLES)) == extra_vars_set
コード例 #4
0
def main(filename, recid, input_vars_only, transpose):
    """
    Contains high-level logic of the script.
    """
    # read all file content into Pandas DataFrame
    adf = pd.read_csv(filename)
    adf_vars = set(adf.columns)  # pylint: disable=no-member

    # check that both files contain required tax variables
    required_input_vars = set(['RECID', 'MARS'])
    required_input_vars_str = 'RECID, MARS'
    if not required_input_vars.issubset(adf_vars):
        msg = 'ERROR: FILE does not include required input variables: {}\n'
        sys.stderr.write(msg.format(required_input_vars_str))
        return 1

    # check that RECID actually identifies a filing unit in FILE
    if recid not in adf['RECID'].values:
        msg = 'ERROR: RECID={} not in FILE\n'
        sys.stderr.write(msg.format(recid))
        return 1

    # extract the adf row with specified recid
    edf = adf[adf['RECID'] == recid]
    edf.is_copy = False

    # optionally remove all but Tax-Calculator usable input variables from edf
    if input_vars_only:
        Records.read_var_info()
        for colname in edf.columns:
            if colname not in Records.USABLE_READ_VARS:
                edf.drop(colname, axis=1, inplace=True)

    # remove all zero-valued variables from edf
    for colname in edf.columns:
        if edf[colname].iloc[0] == 0:
            edf.drop(colname, axis=1, inplace=True)

    # write edf to CSV-formatted output file
    if transpose:
        ofilename = '{}-{}T.csv'.format(filename[:-4], recid)
        tstr = transposed(edf)
        with open(ofilename, 'w') as ofile:
            ofile.write(tstr)
    else:
        ofilename = '{}-{}.csv'.format(filename[:-4], recid)
        edf.to_csv(path_or_buf=ofilename,
                   columns=sorted(edf.columns),
                   index=False,
                   float_format='%.2f')
    sys.stdout.write('EXTRACT IN {}\n'.format(ofilename))

    # normal return code
    return 0
コード例 #5
0
    def _calc_object(self, exact_calcs, emulate_taxsim_2441_logic,
                     output_records):
        """
        Create and return Calculator object to conduct the tax calculations.

        Parameters
        ----------
        exact_calcs: boolean

        emulate_taxsim_2441_logic: boolean

        output_records: boolean

        Returns
        -------
        calc: Calculator
        """
        # create all-zeros dictionary and then list of all-zero dictionaries
        Records.read_var_info()
        zero_dict = {}
        for varname in Records.USABLE_READ_VARS:
            zero_dict[varname] = 0
        dict_list = [zero_dict for _ in range(0, len(self._input))]
        # use dict_list to create a Pandas DataFrame and Records object
        recsdf = pd.DataFrame(dict_list, dtype='int64')
        recsdf['MARS'] = recsdf['MARS'].add(1)  # because MARS==0 is illegal
        recs = Records(data=recsdf,
                       exact_calculations=exact_calcs,
                       gfactors=None,
                       weights=None,
                       start_year=self.policy.start_year)
        assert recs.array_length == len(self._input)
        # specify input for each tax filing unit in Records object
        lnum = 0
        for idx in range(0, recs.array_length):
            lnum += 1
            SimpleTaxIO._specify_input(recs, idx, self._input[lnum],
                                       emulate_taxsim_2441_logic)
        # optionally write Records.USABLE_READ_VARS content to file
        if output_records:
            recdf = pd.DataFrame()
            for varname in Records.USABLE_READ_VARS:
                vardata = getattr(recs, varname)
                recdf[varname] = vardata
            recdf.to_csv(re.sub('out-simtax', 'records',
                                self._output_filename),
                         float_format='%.2f',
                         index=False)
        # create Calculator object containing all tax filing units
        return Calculator(policy=self.policy, records=recs, sync_years=False)
コード例 #6
0
ファイル: puf_fuzz.py プロジェクト: codykallen/Tax-Calculator
MAX_SIZE = 100000  # maximum size of sample to draw from puf.csv

DEBUG = False  # True implies no variable randomization or record sampling

TRACE = False  # True implies tracing output written to stdout

# specify set of variables not included in xYY.csv file
if DEBUG:
    DROP_VARS = set(['filer'])
else:
    DROP_VARS = set(['filer', 's006', 'cmbtp',
                     'nu05', 'nu13', 'elderly_dependents',
                     'e09700', 'e09800', 'e09900', 'e11200'])

# specify set of variables whose values are not to be randomized
Records.read_var_info()
if DEBUG:
    SKIP_VARS = Records.USABLE_READ_VARS
else:
    SKIP_VARS = set(['RECID', 'MARS', 'DSI', 'MIDR', 'FLPDYR',
                     'age_head', 'age_spouse',
                     'nu18', 'n1820', 'n21',
                     'XTOT', 'EIC', 'n24', 'f2441',
                     'f6251'])

ANNUAL_DRIFT = 0.03
NORM_STD_DEV = 0.25


def randomize_data(xdf, taxyear, rnseed):
    """
コード例 #7
0
def test_validity_of_name_lists():
    assert len(DIST_TABLE_COLUMNS) == len(DIST_TABLE_LABELS)
    Records.read_var_info()
    assert set(DIST_VARIABLES).issubset(Records.CALCULATED_VARS | {'weight'})
    extra_vars_set = set()
    assert (set(DIST_TABLE_COLUMNS) - set(DIST_VARIABLES)) == extra_vars_set
コード例 #8
0
ファイル: puf_fuzz.py プロジェクト: willgrimme/Tax-Calculator
DEBUG = False  # True implies no variable randomization or record sampling

TRACE = False  # True implies tracing output written to stdout

# specify set of variables not included in xYY.csv file
if DEBUG:
    DROP_VARS = set(['filer'])
else:
    DROP_VARS = set([
        'filer', 's006', 'cmbtp', 'nu05', 'nu13', 'elderly_dependent',
        'e09700', 'e09800', 'e09900', 'e11200'
    ])

# specify set of variables whose values are not to be randomized
Records.read_var_info()
if DEBUG:
    SKIP_VARS = Records.USABLE_READ_VARS
else:
    SKIP_VARS = set([
        'RECID', 'MARS', 'DSI', 'MIDR', 'FLPDYR', 'age_head', 'age_spouse',
        'nu18', 'n1820', 'n21', 'XTOT', 'EIC', 'n24', 'f2441', 'f6251'
    ])

ANNUAL_DRIFT = 0.03
NORM_STD_DEV = 0.25


def randomize_data(xdf, taxyear, rnseed):
    """
    Randomizes data variables.
コード例 #9
0
def test_calc_and_used_vars(tests_path):
    """
    Runs two kinds of tests on variables used in the calcfunctions.py file:

    (1) Checks that each var in Records.CALCULATED_VARS is actually calculated

    If test (1) fails, a variable in Records.CALCULATED_VARS was not
    calculated in any function in the calcfunctions.py file.  With the
    exception of a few variables listed in this test, all
    Records.CALCULATED_VARS must be calculated in the calcfunctions.py file.

    (2) Check that each variable that is calculated in a function and
    returned by that function is an argument of that function.
    """
    # pylint: disable=too-many-locals
    funcpath = os.path.join(tests_path, '..', 'calcfunctions.py')
    gfd = GetFuncDefs()
    fnames, fargs, cvars, rvars = gfd.visit(ast.parse(open(funcpath).read()))
    # Test (1):
    # .. create set of vars that are actually calculated in calcfunctions.py
    all_cvars = set()
    for fname in fnames:
        if fname == 'BenefitSurtax':
            continue  # because BenefitSurtax is not really a function
        all_cvars.update(set(cvars[fname]))
    # .. add to all_cvars set variables calculated in Records class
    all_cvars.update(set(['num', 'sep', 'exact']))
    # .. add to all_cvars set variables calculated elsewhere
    all_cvars.update(set(['mtr_paytax', 'mtr_inctax']))
    all_cvars.update(set(['benefit_cost_total', 'benefit_value_total']))
    # .. check that each var in Records.CALCULATED_VARS is in the all_cvars set
    Records.read_var_info()
    found_error1 = False
    if not Records.CALCULATED_VARS <= all_cvars:
        msg1 = ('all Records.CALCULATED_VARS not calculated '
                'in calcfunctions.py\n')
        for var in Records.CALCULATED_VARS - all_cvars:
            found_error1 = True
            msg1 += 'VAR NOT CALCULATED: {}\n'.format(var)
    # Test (2):
    faux_functions = [
        'EITCamount', 'ComputeBenefit', 'BenefitPrograms', 'BenefitSurtax',
        'BenefitLimitation'
    ]
    found_error2 = False
    msg2 = 'calculated & returned variables are not function arguments\n'
    for fname in fnames:
        if fname in faux_functions:
            continue  # because fname is not a genuine function
        crvars_set = set(cvars[fname]) & set(rvars[fname])
        if not crvars_set <= set(fargs[fname]):
            found_error2 = True
            for var in crvars_set - set(fargs[fname]):
                msg2 += 'FUNCTION,VARIABLE: {} {}\n'.format(fname, var)
    # Report errors for the two tests:
    if found_error1 and found_error2:
        raise ValueError('{}\n{}'.format(msg1, msg2))
    elif found_error1:
        raise ValueError(msg1)
    elif found_error2:
        raise ValueError(msg2)