def test_sampling(): domain = Domain.make(["a", "b"], ["x", "y"], real_bounds=(0, 1)) a, b, x, y = domain.get_symbols() support = (a | b) & (~a | ~b) & (x <= y) weight = smt.Ite(a, smt.Real(1), smt.Real(2)) required_sample_count = 10000 samples_weighted, pos_ratio = positive(required_sample_count, domain, support, weight) assert samples_weighted.shape[0] == required_sample_count assert sum(evaluate(domain, support, samples_weighted)) == len(samples_weighted) samples_a = sum(evaluate(domain, a, samples_weighted)) samples_b = sum(evaluate(domain, b, samples_weighted)) assert samples_a == pytest.approx(samples_b / 2, rel=0.2) assert pos_ratio == pytest.approx(0.25, rel=0.1) samples_unweighted, pos_ratio = positive(required_sample_count, domain, support) assert samples_unweighted.shape[0] == required_sample_count assert sum(evaluate(domain, support, samples_unweighted)) == len(samples_weighted) samples_a = sum(evaluate(domain, a, samples_unweighted)) samples_b = sum(evaluate(domain, b, samples_unweighted)) assert samples_a == pytest.approx(samples_b, rel=0.1) assert pos_ratio == pytest.approx(0.25, rel=0.1)
def test_sampling_stacking(): domain = Domain.make([], ["x", "y"], real_bounds=(0, 1)) x, y = domain.get_symbols() support = (x <= y) try: positive(20, domain, support, sample_count=10, max_samples=10000) assert True except ValueError: assert False
def test_sampling_max_samples(): domain = Domain.make([], ["x", "y"], real_bounds=(0, 1)) x, y = domain.get_symbols() support = smt.FALSE() try: positive(10, domain, support, max_samples=100000) assert False except SamplingError: assert True
def sample_dataset(model, n_samples): str_type = {BOOL : 'categorical', REAL : 'continuous'} features = [] name_to_var = {} for var in model.get_vars(): name_to_var[var.symbol_name()] = var features.append((var, str_type[var.symbol_type()])) features = [(var, str_type[var.symbol_type()]) for var in model.get_vars()] data = [] samples, _ = positive(n_samples, model.domain, model.support, model.weightfun) for x in samples: row = [None for _ in range(len(x))] for index, varname in enumerate(model.domain.variables): var = name_to_var[varname] new_index = list(map(lambda x : x[0], features)).index(var) if var.symbol_type() == BOOL: row[new_index] = bool(x[index]) else: row[new_index] = float(x[index]) assert(not None in row) data.append(row) return Dataset(features, data, None)
def approx_IAE(model1, model2, seed, sample_count): assert(set(model1.get_vars()) == set(model2.get_vars())),\ "M1 vars: {}\n M2 vars: {}".format(model1.get_vars(),model2.get_vars()) domain, bounds = merged_domain(model1, model2) samples, pos_ratio = positive(sample_count, domain, Or(model1.support, model2.support), weight=None) samples_m1 = samples[evaluate(domain, And(model1.support, Not(model2.support)), samples)] samples_m2 = samples[evaluate(domain, And(Not(model1.support), model2.support), samples)] samples_inter = samples[evaluate(domain, And(model1.support, model2.support), samples)] weights_m1 = sum(evaluate(domain, model1.weightfun, samples_m1)) weights_m2 = sum(evaluate(domain, model2.weightfun, samples_m2)) weights_inter = sum(abs(evaluate(domain, model1.weightfun, samples_inter) - evaluate(domain, model2.weightfun, samples_inter))) n_m1 = len(samples_m1) n_m2 = len(samples_m2) n_inter = len(samples_inter) norm_m1 = weights_m1 / sample_count norm_m2 = weights_m2 / sample_count norm_inter = weights_inter / sample_count logger.debug(f"[ S1 ~S2] len: {n_m1}, sum: {weights_m1}, norm: {norm_m1}") logger.debug(f"[ S1 ~S2] len: {n_m2}, sum: {weights_m2}, norm: {norm_m2}") logger.debug(f"[ S1 ~S2] len: {n_inter}, sum: {weights_inter}, norm: {norm_inter}") approx_vol = pos_ratio * 2**len(domain.bool_vars) for lb, ub in bounds.values(): approx_vol *= (ub - lb) return approx_vol*(weights_m1 + weights_m2 + weights_inter) / sample_count