def test_p_continue(average_size, max_size): assume(average_size <= max_size) p = cu._calc_p_continue(average_size, max_size) assert 0 <= target(p, label="p") <= 1 assert 0 < target(p, label="-p") or average_size < 1e-5 abs_err = abs(average_size - cu._p_continue_to_avg(p, max_size)) assert target(abs_err, label="abs_err") < 0.01
def test_abs_jd2_always_less_than_half_on_construction(jds): jd1, jd2 = jds t = Time(jd1, jd2, format="jd") target(np.amax(np.abs(t.jd2))) assert np.all(t.jd1 % 1 == 0) assert np.all(abs(t.jd2) <= 0.5) assert np.all((abs(t.jd2) < 0.5) | (t.jd1 % 2 == 0))
def test_all_segment_intersections_transform(segments, transform): """Verify that the intersections between a bunch of segments remain the same under transforms and adding extra segments does not remove points""" distractors = [] def transform_segment(segment): return (affine_transform(transform, segment[0]), affine_transform(transform, segment[1])) tr_segments = set([transform_segment(segment) for segment in segments]) distracted_tr_segments = list(tr_segments | set(distractors)) intersections = asi(segments) distracted_tr_intersections = asi(distracted_tr_segments) hyp.target(float(sum([len(segs) for pt, segs in distracted_tr_intersections]))) manually_tr_intersections = _normalize_result([ (affine_transform(transform, pt), [transform_segment(segment) for segment in inter_segments]) for pt, inter_segments in intersections]) # Filter tr_intersections # Fix to use walrus in python 3.8... tr_intersections = _normalize_result([ (pt, tr_segments & set(inter_segments)) for pt, inter_segments in distracted_tr_intersections if len(tr_segments & set(inter_segments)) >= 2]) # return locals() assert tr_intersections == manually_tr_intersections
def test_paths_with_target(path): """Generate paths targeting lower right. """ x, y = path_endpoint(path) print("With target: x={}, y={}, len={}".format(x, y, len(path))) in_range = to_range(x - y, 100) event(str(in_range)) target_function = float(x - y) target(target_function)
def assert_almost_equal(a, b, *, rtol=None, atol=None, label=''): """Assert numbers are almost equal. This version also lets hypothesis know how far apart the inputs are, so that it can work towards a failure and present the worst failure ever seen as well as the simplest, which often just barely exceeds the threshold. """ __tracebackhide__ = True if rtol is None or rtol == 0: thresh = atol elif atol is None: thresh = rtol * (abs(a) + abs(b)) / 2 else: thresh = atol + rtol * (abs(a) + abs(b)) / 2 amb = (a - b) if isinstance(amb, TimeDelta): ambv = amb.to_value(u.s) target(ambv, label=label + " (a-b).to_value(u.s), from TimeDelta") target(-ambv, label=label + " (b-a).to_value(u.s), from TimeDelta") if isinstance(thresh, u.Quantity): amb = amb.to(thresh.unit) else: try: target_value = float(amb) except TypeError: pass else: target(target_value, label=label + " float(a-b)") target(-target_value, label=label + " float(b-a)") assert abs(amb) < thresh
def _record_targets(code: str, prefix: str = "") -> str: # target larger inputs - the Hypothesis engine will do a multi-objective # hill-climbing search using these scores to generate 'better' examples. nodes = list(ast.walk(ast.parse(code))) import_nodes = [ n for n in nodes if isinstance(n, (ast.Import, ast.ImportFrom)) ] uniq_nodes = {type(n) for n in nodes} for value, label in [ (len(import_nodes), "total number of import nodes"), (len(uniq_nodes), "number of unique ast node types"), ]: hypothesis.target(float(value), label=prefix + label) return code
def test_targeting_square_loss(d): """Contrived example of targeting properties. """ # Assume this value triggers a bug target_value = 42 should_fail = abs(d - target_value) < 0.5 if should_fail: print("Failing with value {}".format(d)) raise Exception("Critically close to {}, got {}".format(target_value, d)) # Target the value loss = math.pow((d - target_value), 2.0) target(-loss)
def record_targets(code: str) -> str: # target larger inputs - the Hypothesis engine will do a multi-objective # hill-climbing search using these scores to generate 'better' examples. nodes = list(ast.walk(ast.parse(code))) uniq_nodes = {type(n) for n in nodes} instructions = list(dis.Bytecode(compile(code, "<string>", "exec"))) for value, label in [ (len(instructions), "(hypothesmith from_node) instructions in bytecode"), (len(nodes), "(hypothesmith from_node) total number of ast nodes"), (len(uniq_nodes), "(hypothesmith from_node) number of unique ast node types"), ]: target(float(value), label=label) return code
def assertColorsValid(self, **colors): assert len(colors) == 3 # sanity-check # Our color assertion helper checks that each color is in the range # [0, 1], and that it approximately round-tripped. We also "target" # the difference, to maximise and report the largest error each run. for name, values in colors.items(): for v in values: self.assertGreaterEqual(v, 0 if name not in "iq" else -1, msg=f"color={name!r}") self.assertLessEqual(v, 1, msg=f"color={name!r}") target( abs(values[0] - values[1]), label=f"absolute difference in {name.upper()} values", ) self.assertAlmostEqual(*values, msg=f"color={name!r}")
def test_compare_geth_hevm(b): code = b.hex() note("code that caused failure: ") note(code) # prepopulate the stack a bit x = os.system( 'evm --code ' + code + ' --gas 0xfffffffff --json --receiver 0xacab --nomemory run > gethout' ) y = os.system( 'hevm exec --code ' + code + ' --gas 0xfffffffff --chainid 0x539 --gaslimit 0xfffffffff --jsontrace --origin 0x73656e646572 --caller 0x73656e646572 > hevmout' ) assert x == y gethlines = open('gethout').read().split('\n') hevmlines = open('hevmout').read().split('\n') target(float(len(gethlines))) for i in range(len(hevmlines) - 3): gethline = gethlines[i] hevmline = hevmlines[i] hjson = json.loads(hevmline) gjson = json.loads(gethline) ## printed when diverging note('') note('--- STEP ----') note('geth thinks that') note(gethline) note('while hevm believes') note(hevmline) note('') assert hjson['pc'] == gjson['pc'] assert hjson['stack'] == gjson['stack'] # we can't compare memsize for now because geth # measures memory and memsize after the instruction, # as opposed to all other fields... # assert hjson['memSize'] == gjson['memSize'] assert hjson['gas'] == gjson['gas'] gethres = json.loads(gethlines[len(gethlines) - 2]) hevmres = json.loads(hevmlines[len(hevmlines) - 2]) note('--- OUTPUT ----') note('geth thinks that') note(gethres) note('while hevm believes') note(hevmres) assert gethres['output'] == hevmres['output'] assert gethres['gasUsed'] == hevmres['gasUsed']
def test_toeplitz_only_col(toep_cls, first_col, test): """Test toeplitz for real inputs.""" full_mat = toeplitz(first_col) toeplitz_op = toep_cls(first_col) if first_col.dtype == np.float16: atol_frac = 1e-2 elif first_col.dtype == np.float32: atol_frac = 1e-5 elif first_col.dtype == np.float64: atol_frac = 1e-14 elif first_col.dtype == np.float128: atol_frac = 1.1e-14 max_el = np.max(np.abs(first_col)) if max_el != 0: max_el *= np.max(np.abs(test)) mat_result = full_mat.dot(test) target(float(np.sum(np.isfinite(mat_result))), label="mat_result_finite") if first_col.dtype == np.float32: # Apparently `np.dot` uses an extended-precision accumulator assume(np.all(np.isfinite(mat_result))) op_result = toeplitz_op.dot(test) target(float(np.sum(np.isfinite(op_result))), label="op_result_finite") target( float(np.sum(np.isfinite(np.abs(op_result)))), label="op_result_mag_finite" ) if toep_cls == FFTToeplitz: assume(np.all(np.isfinite(op_result))) assume(np.all(np.isfinite(np.abs(op_result)))) atol = atol_frac * max_el + ATOL_MIN * (len(test) + toeplitz_op.shape[0]) assume(atol < np.inf) np_tst.assert_allclose( op_result, mat_result, atol=atol, rtol=atol_frac )
def threshold(error): target(error, label="error") assert error <= 10 target(0.0, label="never in failing example")
def test(value): event(value) target(float(value), label="a target")
def test_cannot_target_same_label_twice(_): target(0.0, label="label") with pytest.raises(InvalidArgument): target(1.0, label="label")
def test_cannot_target_outside_test(): with pytest.raises(InvalidArgument): target(1.0, label="example label")
def run_targets(targets: Iterable[Callable], context: TargetContext) -> None: for target in targets: value = target(context) hypothesis.target(value, label=target.__name__)
def test(ls): score = float(sum(ls)) result[0] = max(result[0], score) target(score)
def test_with_targeting(ls): target(float(len(ls))) assert len(ls) <= 80
def test_allowed_inputs_to_target_fewer_labels(observation, label): target(observation, label=label)
def test_targeting_with_many_empty(_): # This exercises some logic in the optimiser that prevents it from trying # to mutate empty examples in the middle of the test case. target(1.0)
def test_targeting_with_following_empty(ls, n): # This exercises some logic in the optimiser that prevents it from trying # to mutate empty examples at the end of the test case. target(float(len(ls)))
def test_cannot_target_default_label_twice(_): target(0.0) with pytest.raises(InvalidArgument): target(1.0)
def test_target_without_label(observation): target(observation)
def test_allowed_inputs_to_target(observation, label): target(observation, label)
def test_target_returns_value(a, b): difference = target(abs(a - b)) assert difference == abs(a - b) assert isinstance(difference, int)
def test_multiple_target_calls(args): for observation, label in args: target(observation, label=label)
def test_targeting_square_loss(d): target(-((d - 42.5)**2.0))
def test_respects_max_pool_size(observations): """Using many examples of several labels like this stresses the pool-size logic and internal assertions in TargetSelector. """ for i, obs in enumerate(observations): target(obs, label=str(i))
def run_targets(targets: Iterable[Target], elapsed: float) -> None: for target in targets: if target == Target.response_time: hypothesis.target(elapsed, label="response_time")
def test_disallowed_inputs_to_target(observation, label): with pytest.raises(InvalidArgument): target(observation, label=label)