def test_ranges(bs: pd.DataFrame) -> None: for (c, t) in [("recently bought this house", cla.InSet({True, False})), ("recently bought this house", cla.CoversRange(0, 1)), ("recently bought this house", cla.MeanBounds(0, 0.01)), ("recently bought this house", cla.MissingAtMost(0)), ("estrato", cla.InRange(0, 6)), ("estrato", cla.CoversRange(0, 3)), ("estrato", cla.MeanBounds(1.5, 2.5)), ("estrato", cla.MissingAtMost(0.02))]: assert t.test(bs[c])
def test_Property_subclasses(): assert (cla.MissingAtMost(0.5).test(pd.Series([1, np.nan, 3]))) assert not (cla.MissingAtMost(0.5).test(pd.Series([1, np.nan, np.nan]))) for (val, result) in [(-1, False), (0, True), (0.5, True), (1, True), (2, False), (np.nan, True)]: assert cla.InRange(0, 1).test(pd.Series([val])) == result assert cla.InSet({0, 0.5, 1}).test(pd.Series([val])) == result assert ((cla.CoversRange(0, 10).test(pd.Series([0, 10]))) & (cla.CoversRange(0, 10).test(pd.Series([0, 10]))) & (not cla.CoversRange(0, 10).test(pd.Series([1, 9]))))
def test_ranges(df): log = "test_ranges()\n" inRange_spec = { "25-broad-categs": cl.InRange(1, 25), "big-hog": cl.InRange(0, 1), "coicop": cl.InRange(1e6, 2e7), # PITFALL: "freq-code"=11 <=> the purchase is never made. # This corresponds to a "per month" value of np.nan. "freq-code": cl.InRange(0, 10), "household": cl.InRange(0, 1e6), "is-purchase": cl.InRange(0, 1), "per month": cl.InRange(1 / 36 - 0.001, 31), "quantity": cl.InRange(0, 1e8), "value": cl.InRange(0, 3e9), # The special motorcycle tax, abusivelyed lump into the VAT table, # means the max "vat" is 0.27 rather than 0.19. "vat": cl.InRange(0, 0.3), "vat frac": cl.InRange(0, 0.3 / 1.3), } for k, v in inRange_spec.items(): assert v.test(df[k]) coversRange_spec = { "household": cl.CoversRange(2e5, 6e5), "per month": cl.CoversRange(0.05, 30), "quantity": cl.CoversRange(1, 100), "value": cl.CoversRange(3, 1e6), "weight": cl.CoversRange(10, 1000), "where-got": cl.CoversRange(1, 25), # The special motorcycle tax, abusively lump into the VAT table, # means the max "vat" is 0.27 rather than 0.19. # *However*, in the smaller samples, # we can't be sure that whole range is covered: # there might be no motorcycle purchases. # That at least some purchase incurs a VAT of 0.19, though, is a safe bet. "vat frac": cl.CoversRange(0, 0.19 / 1.19), "vat paid": cl.CoversRange(0, 1e5), "vat": cl.CoversRange(0, 0.19), } for k, v in coversRange_spec.items(): assert v.test(df[k]) return log
assert util.unique(out.columns) assert util.unique(new_cols) assert set.intersection(cols1, new_cols) == set() assert set.union(cols1, new_cols) == cols2 assert set.difference(cols2, cols1) == new_cols assert len(in_rows) == len(out) assert util.near(len(out), misc.num_people / com.subsample, tol_frac=1 / 5) per_cell_spec = { "age-decile": cl.InRange(0, 9), "income-decile": cl.InRange(0, 9), "female head": cl.InRange(0, 1) } per_column_spec = { "age-decile": cl.CoversRange(0, 9), "income-decile": cl.CoversRange(0, 9), "female head": cl.CoversRange(0, 1) } for k, v in per_cell_spec.items(): assert v.test(out[k]) for k, v in per_column_spec.items(): assert v.test(out[k]) oio.test_write(com.subsample, "people_2_buildings", "It worked.")
"value, tax, purchaselike non-VAT", "value, tax, predial", "value, tax, purchaselike non-predial non-VAT", "transactions", "value, non-purchase", "value, purchase", "value, spending", "value, consumption", "vat paid" } ) if com.subsample < 11: # The data is too sparse to test # the smaller samples this way for (c,ts) in [ ( "transactions", [ cla.MeanBounds ( 50 , 120 ), cla.CoversRange ( 2 , 200 ), cla.InRange ( 1 , 400 ), cla.MissingAtMost ( 0 ) ] ), ( "value, tax, purchaselike non-VAT", [ cla.MeanBounds (1e4 , 1e5), cla.CoversRange (0 , 2e6), cla.InRange (0 , 1.1e8), # someone pays a huge predial cla.MissingAtMost (0) ] ), ( "value, tax, predial", [ cla.MeanBounds (1e4 ,1e5), cla.CoversRange (0 ,1e3), cla.InRange (0 ,1.1e8), cla.MissingAtMost (0) ] ),
assert ( merge[ merge["region-1"] == "SAN ANDRÉS" ] ["vat paid"].max() == 0 ) if True: for k,v in { "vat / purchase value" : cl.InRange( 0, 0.3 ), # The special motorcycle tax, abusivelyed lump into the VAT table, # means the max "vat" is 0.27 rather than 0.19. "vat / income" : cl.InRange( 0, np.inf ), "purchase value / income" : cl.InRange( 0, np.inf ) }.items(): assert v.test( merge[k] ) for k,v in { # These bounds could be tighter, # but the 1/1000 subsample has a small range. "vat / purchase value" : cl.CoversRange( 0, 0.1 ), "vat / income" : cl.CoversRange( 0, np.inf ), "purchase value / income" : cl.CoversRange( 0.2, np.inf ) }.items(): assert v.test( merge[k] ) for k,v in { "vat / purchase value" : cl.MeanBounds( 2.5e-2, 6e-2 ), "vat / income" : cl.MeanBounds( np.inf, np.inf ), "purchase value / income" : cl.MeanBounds( np.inf, np.inf ) }.items(): assert v.test( merge[k] ) for c in new_cols: assert cl.MissingAtMost( 0.01 ) . test( merge[c] ) oio.test_write( com.subsample,
def test_capitulo_c_data(df: pd.DataFrame): for t in [cla.InRange(1, 25), cla.CoversRange(1, 25)]: assert t.test(df["CODE"]) for col in ["vat", "vat, min", "vat, max"]: assert (cla.InRange(0, 1).test(df[col]))
def test_coicop_data(df: pd.DataFrame): for t in [cla.InRange(1e6, 2e7), cla.CoversRange(2e6, 1e7)]: assert t.test(df["coicop"]) for col in ["vat", "vat, min", "vat, max"]: assert (cla.InRange(0, 1).test(df[col]))