예제 #1
0
            # because it varies within household.
        ("recently bought this house", cla.MeanBounds(0, 0.01)),
        ("female head", cla.MeanBounds(0.25, 0.55)),
        ("seguro de riesgos laborales", cla.MeanBounds(0.3, 0.6))
    ]:
        assert test.test(hh[c])


if True:  # IO
    log = "starting\n"
    #
    hh = oio.readStage(com.subsample,
                       "households_1_agg_plus." + com.strategy_year_suffix)
    ppl = oio.readStage(com.subsample,
                        "people_3_income_taxish." + com.strategy_year_suffix)
    hh["edu-max"] = util.interpretCategorical(hh["edu-max"], edu_key.values())
    ppl["edu"] = util.interpretCategorical(ppl["edu"], edu_key.values())

    test_const_within_group(
        # TODO ? move this test to the tests of person data
        gs=["household"],
        cs=defs.cols_const_within_hh,
        d=hh)
    test_indices(hh=hh, ppl=ppl)
    test_income_ranks(hh=hh, ppl=ppl)
    test_sums(hh=hh, ppl=ppl)
    test_bools(hh=hh, ppl=ppl)
    com_tests.test_quantiles(df=hh)

    oio.test_write(com.subsample, "households_1_agg_plus", log)
예제 #2
0
파일: test.py 프로젝트: ofiscal/tax.co
    assert not go([True, True])


def test_next_request():
    cols = [
        "user email", "user", "subsample", "completed", "time completed",
        "time requested"
    ]
    df = pd.DataFrame([
        ["1", "1", 100, False, 99, 99],
        ["2", "2", 100, False, 6, 6],
        ["3", "3", 100, True, 7, 7],
        ["4", "4", 100, True, 8, 8],
    ],
                      columns=cols)
    assert r.next_request(df)["user"] == "2"


if True:
    test_memory_permits_another_run()
    test_delete_oldest_request()
    test_at_least_one_result_is_old()
    test_uniquify_requests()
    test_unexecuted_requests_exist()
    test_next_request()
    #
    oio.test_write(
        1,  # PTIFALL: Uses no data, so always writes to recip-1/
        "requests",
        "")
예제 #3
0
파일: util_test.py 프로젝트: ofiscal/tax.co
  assert     util.near( 0,   1,      tol_frac = 0,   tol_abs = 2 )
  assert not util.near( 0,   1,      tol_frac = 0,   tol_abs = 1/2 )
  assert     util.near( 20, 21,      tol_frac = 0.1, tol_abs = 0 )
  assert not util.near( 20, 23,      tol_frac = 0.1, tol_abs = 0 )

def test_tuple_by_threshold():
    sched = [(0,"a","b")]
    for income in [-1,0,1,1e11]:
      assert util.tuple_by_threshold( income, sched ) == sched[0]
    sched = [ (0,1,2),
              (10,"whatever","something") ]
    for income in [(-1,0,1,9)]:
      assert util.tuple_by_threshold( income, sched ) == sched[0]
    for income in [(10,11,1e11)]:
      assert util.tuple_by_threshold( income, sched ) == sched[1]

def test_util_pad_column_as_int():
  c = pd.Series( [2, "2","2.0",np.nan] )
  assert pd.Series.equals(
    util.pad_column_as_int ( 4, c )
    , pd.Series( ["0002","0002","0002",np.nan] ) )


if True: # run tests
  log = "starting\n"
  test_near()
  test_util_pad_column_as_int()
  oio.test_write( cl.subsample
                , "common_util"
                , log )
예제 #4
0
  assert set( df.columns ) == set(
    [code_column_name, 'vat', 'vat frac'] )

  for c in df.columns:
    if c == code_column_name:
          assert df[c].dtype == "int64"
    else: assert df[c].dtype == "float64"

  # The "vat" and "vat frac" columns might have a few missing values.
  # The others should have none.
  assert ( ( len( non_null_part( df["vat"] ) )
           / len( df ) )
         > ( fraction_that_should_be_non_null - tolerance ) )
  assert (  len( non_null_part( df["vat"] ) )
         == len( non_null_part( df["vat frac"] ) ) )
  df = df.drop( columns = ["vat", "vat frac"] )
  for c in df.columns:
    assert len( df[ ~ pd.isnull( df[c] ) ] ) == len( df )


if True: # run tests
  log = "starting\n"

  test_vat_file( "vat_coicop_brief", "coicop", 1031 / 1051 )
  test_vat_file( "vat_cap_c_brief", "25-broad-categs", 20 / 25 )

  oio.test_write( cl.subsample
                , "vat_rates"
                , log )
예제 #5
0
    for (c, t) in [("recently bought this house", cla.InSet({True, False})),
                   ("recently bought this house", cla.CoversRange(0, 1)),
                   ("recently bought this house", cla.MeanBounds(0, 0.01)),
                   ("recently bought this house", cla.MissingAtMost(0)),
                   ("estrato", cla.InRange(0, 6)),
                   ("estrato", cla.CoversRange(0, 3)),
                   ("estrato", cla.MeanBounds(1.5, 2.5)),
                   ("estrato", cla.MissingAtMost(0.02))]:
        assert t.test(bs[c])


if True:  # run tests
    log = "starting\n"
    bs = oio.readStage(
        1  # PITFALL: For buildings, we always use the full sample.
        ,
        'buildings',
        dtype={"estrato": 'float64'}
        # If subsample is so small that there are no missing values,
        # "estrato" will by default be read as "int64".
    )
    test_types(bs)
    test_nullity(bs)
    test_ranges(bs)
    assert (unique(bs.columns))
    for ss in com.valid_subsamples:
        # PITFALL: Looping over subsample sizes because this program
        # always uses the full sample.
        # If it works, it works for all subsamples.
        oio.test_write(ss, "build_buildings", log)
예제 #6
0
    # But it's surprising, because for subsample = 10,
    # the reality is much less than the expectation.

    assert (set(df.columns) == set(Purchase_2_Columns_missing.all_columns()))

    # coicop and 25-broad-categs are each individually missing substantially,
    # but exactly one of them is always present
    assert len(df[(~pd.isnull(df["coicop"]))
                  & (~pd.isnull(df["25-broad-categs"]))]) == 0
    assert len(df[(pd.isnull(df["coicop"])) |
                  (pd.isnull(df["25-broad-categs"]))]) == len(df)

    for c in Purchase_2_Columns_missing.never:
        assert (len(df[pd.isnull(df[c])]) == 0)

    for c in Purchase_2_Columns_missing.slightly:
        assert ((len(df[pd.isnull(df[c])]) / len(df)) < 0.03)

    for c in Purchase_2_Columns_missing.very:
        assert ((len(df[pd.isnull(df[c])]) / len(df)) < 0.25)

    return log


if True:  # IO
    log = "starting\n"
    ps = oio.readStage(com.subsample, "purchases_2_vat." + com.strategy_suffix)
    log += test_ranges(ps)
    log += test_output(ps)
    oio.test_write(com.subsample, "build_purchases_2_vat", log)
예제 #7
0
     ( "value, non-purchase",
       [ cla.MeanBounds    (1e6,1e7),
         cla.CoversRange   (0 ,1e6),
         cla.InRange       (0 ,3.3e9),
         cla.MissingAtMost (0) ] ),

    ( "value, purchase",
       [ cla.MeanBounds    (1e6 ,5e6),
         cla.CoversRange   (1e2 ,4e7), # TODO ? This minimum is nuts.
         cla.InRange       (0   ,2e8),
         cla.MissingAtMost (0) ] ),

    ( "value, spending",
       [ cla.MeanBounds    (1e6 ,5e6),
         cla.CoversRange   (1e2 ,4e7), # TODO ? This minimum is nuts.
         cla.InRange       (0   ,2e8),
         cla.MissingAtMost(0) ] ) ]:
      for t in ts:
          assert t.test( sums[c] )

assert sums["household"].is_unique

assert util.near( len(sums),
                  num_households / com.subsample,
                  tol_frac = 1/5 )

oio.test_write( com.subsample,
                "build_purchase_sums",
                "It worked." )
예제 #8
0
파일: main_test.py 프로젝트: ofiscal/tax.co

# TODO : extend to all the old variables
def test_means(ppl: pd.DataFrame) -> None:
    for (col, theMin, theMax) in [
        ("used savings", 0.005, 0.05),
        ("empleado", 0.20, 0.6),
        ("desempleado", 0.03, 0.12),
        ("in labor force", 0.25, 0.6),
    ]:
        x = ppl[col].mean()
        assert (x >= theMin) & (x <= theMax)


if True:  # run tests
    log = "starting\n"

    # unit tests
    test_count_num_matches_in_space_separated_list()

    # integration tests
    ppl = oio.readStage(com.subsample, 'people_1')
    ppl["edu"] = util.interpretCategorical(ppl["edu"], files.edu_key.values())
    test_ranges(ppl)
    test_upper_bound_on_fraction_missing(ppl)
    test_means(ppl)

    assert util.near(len(ppl), num_people / com.subsample, tol_frac=1 / 5)

    oio.test_write(com.subsample, "people_main", log)
예제 #9
0
if True:
  for k,v in {
      "vat / purchase value"      : cl.InRange( 0, 0.3 ),
        # The special motorcycle tax, abusivelyed lump into the VAT table,
        # means the max "vat" is 0.27 rather than 0.19.
      "vat / income"              : cl.InRange( 0, np.inf ),
      "purchase value / income"   : cl.InRange( 0, np.inf )
      }.items():
    assert v.test( merge[k] )
  for k,v in {
      # These bounds could be tighter,
      # but the 1/1000 subsample has a small range.
      "vat / purchase value"       : cl.CoversRange( 0,      0.1    ),
      "vat / income"               : cl.CoversRange( 0,      np.inf ),
      "purchase value / income"    : cl.CoversRange( 0.2,    np.inf )
      }.items():
    assert v.test( merge[k] )
  for k,v in {
      "vat / purchase value"       : cl.MeanBounds( 2.5e-2, 6e-2 ),
      "vat / income"               : cl.MeanBounds( np.inf, np.inf ),
      "purchase value / income"    : cl.MeanBounds( np.inf, np.inf )
      }.items():
    assert v.test( merge[k] )
  for c in new_cols:
    assert cl.MissingAtMost( 0.01 ) . test( merge[c] )

oio.test_write(
    com.subsample,
    "households_2_purchases",
    "It worked." )
예제 #10
0
    log += "Very few missing quantity values."
    assert ((1e-5) > (len(df[pd.isnull(df["quantity"])]) / len(df)))

    log += "Very few negative quantity values."
    assert ((1e-5) > (len(df[df["quantity"] <= 0]) / len(df)))

    log += "Negative quantity purchases are for very little money."
    assert (df[df["quantity"] < 0]["value"] < 1e4).all()

    log += "Very few purchases with a frequency of \"never\"."
    assert ((1e-5) > (len(df[df["per month"] > 10]) / len(df)))

    log += "Those few frequency=\"never\" purchases are for very little money."
    assert (df[df["per month"] > 10]["value"] < 1e4).all()

    return log


if True:  # run the tests
    log = "starting\n"

    # unit tests
    log += test_drop_if_coicop_or_value_invalid()
    log += test_drop_absurdly_big_expenditures()

    # integration test
    df = oio.readStage(com.subsample, 'purchases_1')
    log += test_output(df)

    oio.test_write(com.subsample, "purchases_correct", log)
예제 #11
0
    assert near( reg.taxable(r),
                 r[reg.gravable_pre] * (1 - 0.325) )
    # The dependent exemption knocks off 10% from what's left.
    r["claims dependent (labor income tax)"] = True
    assert near( reg.taxable(r),
                 r[reg.gravable_pre] * (1 - 0.325) * 0.9 )

  if True: # For high earners (20000 muvt is around 50 million / month)
    # and you have no dependents, 5040 muvt is exempted.
    r = pd.Series( { "claims dependent (labor income tax)" : False,
                     reg.gravable_pre : 20000 * muvt } )
    assert near( reg.taxable(r),
                 r[reg.gravable_pre] - 5040 * muvt )
    # The dependent exemption knocks off another 32 muvt.
    r["claims dependent (labor income tax)"] = True
    assert near( reg.taxable(r),
                 r[reg.gravable_pre] - 5072 * muvt )
  
if True:
  test_most_income_tax()
  test_taxable()
  log = str( datetime.datetime.now() )
  for ss in common.valid_subsamples:
    # PITFALL: Looping over subsample sizes because this program
    # uses no data.
    # If it works, it works for all subsamples.
    oio.test_write( ss
                  , "regime_r2018"
                  , log )

예제 #12
0
        })))


def test_File():
    f = cla.File("sassafrass", "sassafrass.csv",
                 [("ugly input.csv", "dirt", "beautiful output.csv", "gold")])
    assert (cla.name_map(f.col_specs) == {
        "ugly input.csv": "beautiful output.csv"
    })
    assert (cla.input_map(f.col_specs) == {"ugly input.csv": "dirt"})
    assert (cla.output_map(f.col_specs) == {"beautiful output.csv": "gold"})


if True:  # run the tests
    log = "starting\n"

    test_Correction()
    test_File()
    test_MeanBounds()
    test_Property_subclasses()
    test_re_c()
    test_re_digits()
    test_re_gt1c()
    test_re_gt1p()
    test_re_nonNumeric()
    test_re_p()
    test_re_white()
    test_stringProperties()

    oio.test_write(com.subsample, "build_classes", log)
예제 #13
0
    #
    import python.build.classes as cla
    import python.build.output_io as oio
    import python.common.common as common
    import python.common.misc as misc


def test_coicop_data(df: pd.DataFrame):
    for t in [cla.InRange(1e6, 2e7), cla.CoversRange(2e6, 1e7)]:
        assert t.test(df["coicop"])
    for col in ["vat", "vat, min", "vat, max"]:
        assert (cla.InRange(0, 1).test(df[col]))


def test_capitulo_c_data(df: pd.DataFrame):
    for t in [cla.InRange(1, 25), cla.CoversRange(1, 25)]:
        assert t.test(df["CODE"])
    for col in ["vat", "vat, min", "vat, max"]:
        assert (cla.InRange(0, 1).test(df[col]))


if True:
    test_coicop_data(misc.read_csv_or_xlsx("config/vat/vat_by_coicop.csv"))
    test_capitulo_c_data(
        misc.read_csv_or_xlsx("config/vat/vat_by_capitulo_c.csv"))
    oio.test_write(
        1,  # PTIFALL: Uses no sample-size-dependent data,
        # so always writes to recip-1/
        "rate_input",
        "")
예제 #14
0
assert util.unique(out.columns)
assert util.unique(new_cols)

assert set.intersection(cols1, new_cols) == set()
assert set.union(cols1, new_cols) == cols2
assert set.difference(cols2, cols1) == new_cols

assert len(in_rows) == len(out)
assert util.near(len(out), misc.num_people / com.subsample, tol_frac=1 / 5)

per_cell_spec = {
    "age-decile": cl.InRange(0, 9),
    "income-decile": cl.InRange(0, 9),
    "female head": cl.InRange(0, 1)
}

per_column_spec = {
    "age-decile": cl.CoversRange(0, 9),
    "income-decile": cl.CoversRange(0, 9),
    "female head": cl.CoversRange(0, 1)
}

for k, v in per_cell_spec.items():
    assert v.test(out[k])

for k, v in per_column_spec.items():
    assert v.test(out[k])

oio.test_write(com.subsample, "people_2_buildings", "It worked.")
예제 #15
0
        assert near(t(employee, 100 * min_wage), 0.04 * 25 * min_wage)


def test_mk_cesantias_y_primas_employer():
    t = sf.mk_cesantias_y_primas_employer
    if True:  # for contractors, always 0
        assert near(t(contractor, 0), 0)
        assert near(t(contractor, 1000 * min_wage), 0)
    if True:  # for employees
        assert near(t(employee, 0.5 * min_wage), 0)
        assert near(t(employee, 1.1 * min_wage), (2.12 / 12) * 1.1 * min_wage)
        assert near(t(employee, 12 * min_wage), (2.12 / 12) * 12 * min_wage)
        assert near(t(employee, 14 * min_wage), 0)
        assert near(t(employee, 100 * min_wage), 0)


if True:
    log = str(datetime.datetime.now())
    test_mk_pension()
    test_mk_pension_employer()
    test_mk_salud()
    test_mk_salud_employer()
    test_mk_solidaridad()
    test_mk_parafiscales_employer()
    test_mk_cajas_de_compensacion_employer()
    test_mk_cesantias_y_primas_employer()
    for ss in common.valid_subsamples:
        # PITFALL: Looping over subsample sizes because this program
        # doesn't use any data. If it works, it works for all subsamples.
        oio.test_write(ss, "build_ss_functions", log)
예제 #16
0
    import python.build.output_io as oio
    import python.build.purchases.articulos as articulos
    import python.build.purchases.capitulo_c as capitulo_c
    # import python.build.purchases.medios as medios
    import python.build.purchases.nice_purchases as nice_purchases
    import python.common.util as util
    import python.common.common as common


def test_purchase_inputs():
    for f in (articulos.files
              # + medios.files
              + capitulo_c.files + nice_purchases.files):
        df = common.retrieve_file(
            f, subsample=1)  # PITFALL: Always the full sample.
        assert util.unique(df.columns)
        acc = {}
        for c in df.columns:
            acc.update([(c, cla.stringProperties(df[c]))])
            assert acc[c] == cla.input_map(f.col_specs)[c]


if True:  # run tests
    log = "starting\n"
    test_purchase_inputs()
    for ss in common.valid_subsamples:
        # PITFALL: Looping over subsample sizes because this program
        # always uses the full sample.
        # If it works, it works for all subsamples.
        oio.test_write(subsample=ss, filename="purchase_inputs", content=log)
htemp = (hh2[["household"] + same_ratio].rename(columns=same_ratio_hh_dict))
ptemp = ps4.merge(htemp[["household"] + same_ratio_hh_list], on="household")
ptemp = (  # To make sure none of the ratios is infinite.
    ptemp[(ptemp["income"] > 0) & (ptemp["value, purchase"] > 0)])
for c in same_ratio:
    assert (ptemp[c + "-hh"] > 0.99 * ptemp[c] - 1).all()
    assert (ptemp[c + "-hh"] < 1.01 * ptemp[c] + 1).all()
    log = log + "\n" + c + "has the same values in ps4 as in hh2."
del (htemp, ptemp)

for (c, m) in [
    ("in labor force", 0),
    ("share", 0),
    ("one", 0),
    ("income-decile", 0),
    ("income-percentile", 0),
    ("vat / purchase value", 0.1),
    ("vat / income", 0.1),
    ("purchase value / income", 0.1),
]:
    # PITFALL: Income and purcahse value is zero for far more rows of people_4 than for households_2.
    # The reason for the first is that many households have positive income even though some earners in the household don't.
    # The reason for the second is that "value, purchase" is zero for anyone with zero income in a household with positive income.
    # (If everyone in the household has zero income then purchase value is instead divided equally among earners.)
    assert ps4[c].isnull().mean() <= m
    log = log + "\n" + c + " is missing no more than " + str(100 * m) + "%."

com_tests.test_quantiles(df=ps4)

oio.test_write(com.subsample, "people_4_post_households", log)