Example #1
0
def test_ranges(bs: pd.DataFrame) -> None:
    for (c, t) in [("recently bought this house", cla.InSet({True, False})),
                   ("recently bought this house", cla.CoversRange(0, 1)),
                   ("recently bought this house", cla.MeanBounds(0, 0.01)),
                   ("recently bought this house", cla.MissingAtMost(0)),
                   ("estrato", cla.InRange(0, 6)),
                   ("estrato", cla.CoversRange(0, 3)),
                   ("estrato", cla.MeanBounds(1.5, 2.5)),
                   ("estrato", cla.MissingAtMost(0.02))]:
        assert t.test(bs[c])
Example #2
0
def test_Property_subclasses():
    assert (cla.MissingAtMost(0.5).test(pd.Series([1, np.nan, 3])))
    assert not (cla.MissingAtMost(0.5).test(pd.Series([1, np.nan, np.nan])))

    for (val, result) in [(-1, False), (0, True), (0.5, True), (1, True),
                          (2, False), (np.nan, True)]:
        assert cla.InRange(0, 1).test(pd.Series([val])) == result
        assert cla.InSet({0, 0.5, 1}).test(pd.Series([val])) == result

    assert ((cla.CoversRange(0, 10).test(pd.Series([0, 10]))) &
            (cla.CoversRange(0, 10).test(pd.Series([0, 10]))) &
            (not cla.CoversRange(0, 10).test(pd.Series([1, 9]))))
Example #3
0
def test_ranges(df):
    log = "test_ranges()\n"

    inRange_spec = {
        "25-broad-categs": cl.InRange(1, 25),
        "big-hog": cl.InRange(0, 1),
        "coicop": cl.InRange(1e6, 2e7),

        # PITFALL: "freq-code"=11 <=> the purchase is never made.
        # This corresponds to a "per month" value of np.nan.
        "freq-code": cl.InRange(0, 10),
        "household": cl.InRange(0, 1e6),
        "is-purchase": cl.InRange(0, 1),
        "per month": cl.InRange(1 / 36 - 0.001, 31),
        "quantity": cl.InRange(0, 1e8),
        "value": cl.InRange(0, 3e9),

        # The special motorcycle tax, abusivelyed lump into the VAT table,
        # means the max "vat" is 0.27 rather than 0.19.
        "vat": cl.InRange(0, 0.3),
        "vat frac": cl.InRange(0, 0.3 / 1.3),
    }

    for k, v in inRange_spec.items():
        assert v.test(df[k])

    coversRange_spec = {
        "household": cl.CoversRange(2e5, 6e5),
        "per month": cl.CoversRange(0.05, 30),
        "quantity": cl.CoversRange(1, 100),
        "value": cl.CoversRange(3, 1e6),
        "weight": cl.CoversRange(10, 1000),
        "where-got": cl.CoversRange(1, 25),

        # The special motorcycle tax, abusively lump into the VAT table,
        # means the max "vat" is 0.27 rather than 0.19.
        # *However*, in the smaller samples,
        # we can't be sure that whole range is covered:
        # there  might be no motorcycle purchases.
        # That at least some purchase incurs a VAT of 0.19, though, is a safe bet.
        "vat frac": cl.CoversRange(0, 0.19 / 1.19),
        "vat paid": cl.CoversRange(0, 1e5),
        "vat": cl.CoversRange(0, 0.19),
    }

    for k, v in coversRange_spec.items():
        assert v.test(df[k])

    return log
Example #4
0
assert util.unique(out.columns)
assert util.unique(new_cols)

assert set.intersection(cols1, new_cols) == set()
assert set.union(cols1, new_cols) == cols2
assert set.difference(cols2, cols1) == new_cols

assert len(in_rows) == len(out)
assert util.near(len(out), misc.num_people / com.subsample, tol_frac=1 / 5)

per_cell_spec = {
    "age-decile": cl.InRange(0, 9),
    "income-decile": cl.InRange(0, 9),
    "female head": cl.InRange(0, 1)
}

per_column_spec = {
    "age-decile": cl.CoversRange(0, 9),
    "income-decile": cl.CoversRange(0, 9),
    "female head": cl.CoversRange(0, 1)
}

for k, v in per_cell_spec.items():
    assert v.test(out[k])

for k, v in per_column_spec.items():
    assert v.test(out[k])

oio.test_write(com.subsample, "people_2_buildings", "It worked.")
Example #5
0
           "value, tax, purchaselike non-VAT",
           "value, tax, predial",
           "value, tax, purchaselike non-predial non-VAT",
           "transactions",
           "value, non-purchase",
           "value, purchase",
           "value, spending",
           "value, consumption",
           "vat paid" } )

if com.subsample < 11: # The data is too sparse to test
                       # the smaller samples this way
  for (c,ts) in [
    ( "transactions",
      [ cla.MeanBounds    ( 50 , 120 ),
        cla.CoversRange   ( 2  , 200 ),
        cla.InRange       ( 1  , 400 ),
        cla.MissingAtMost ( 0 ) ] ),

    ( "value, tax, purchaselike non-VAT",
      [ cla.MeanBounds    (1e4 , 1e5),
        cla.CoversRange   (0   , 2e6),
        cla.InRange       (0   , 1.1e8), # someone pays a huge predial
        cla.MissingAtMost (0) ] ),

    ( "value, tax, predial",
      [ cla.MeanBounds    (1e4 ,1e5),
        cla.CoversRange   (0   ,1e3),
        cla.InRange       (0   ,1.1e8),
        cla.MissingAtMost (0) ] ),
    assert ( merge[ merge["region-1"] == "SAN ANDRÉS" ]
             ["vat paid"].max() == 0 )

if True:
  for k,v in {
      "vat / purchase value"      : cl.InRange( 0, 0.3 ),
        # The special motorcycle tax, abusivelyed lump into the VAT table,
        # means the max "vat" is 0.27 rather than 0.19.
      "vat / income"              : cl.InRange( 0, np.inf ),
      "purchase value / income"   : cl.InRange( 0, np.inf )
      }.items():
    assert v.test( merge[k] )
  for k,v in {
      # These bounds could be tighter,
      # but the 1/1000 subsample has a small range.
      "vat / purchase value"       : cl.CoversRange( 0,      0.1    ),
      "vat / income"               : cl.CoversRange( 0,      np.inf ),
      "purchase value / income"    : cl.CoversRange( 0.2,    np.inf )
      }.items():
    assert v.test( merge[k] )
  for k,v in {
      "vat / purchase value"       : cl.MeanBounds( 2.5e-2, 6e-2 ),
      "vat / income"               : cl.MeanBounds( np.inf, np.inf ),
      "purchase value / income"    : cl.MeanBounds( np.inf, np.inf )
      }.items():
    assert v.test( merge[k] )
  for c in new_cols:
    assert cl.MissingAtMost( 0.01 ) . test( merge[c] )

oio.test_write(
    com.subsample,
Example #7
0
def test_capitulo_c_data(df: pd.DataFrame):
    for t in [cla.InRange(1, 25), cla.CoversRange(1, 25)]:
        assert t.test(df["CODE"])
    for col in ["vat", "vat, min", "vat, max"]:
        assert (cla.InRange(0, 1).test(df[col]))
Example #8
0
def test_coicop_data(df: pd.DataFrame):
    for t in [cla.InRange(1e6, 2e7), cla.CoversRange(2e6, 1e7)]:
        assert t.test(df["coicop"])
    for col in ["vat", "vat, min", "vat, max"]:
        assert (cla.InRange(0, 1).test(df[col]))