def test_sampling():
    domain = Domain.make(["a", "b"], ["x", "y"], real_bounds=(0, 1))
    a, b, x, y = domain.get_symbols()
    support = (a | b) & (~a | ~b) & (x <= y)
    weight = smt.Ite(a, smt.Real(1), smt.Real(2))

    required_sample_count = 10000
    samples_weighted, pos_ratio = positive(required_sample_count, domain,
                                           support, weight)
    assert samples_weighted.shape[0] == required_sample_count
    assert sum(evaluate(domain, support,
                        samples_weighted)) == len(samples_weighted)
    samples_a = sum(evaluate(domain, a, samples_weighted))
    samples_b = sum(evaluate(domain, b, samples_weighted))
    assert samples_a == pytest.approx(samples_b / 2, rel=0.2)
    assert pos_ratio == pytest.approx(0.25, rel=0.1)

    samples_unweighted, pos_ratio = positive(required_sample_count, domain,
                                             support)
    assert samples_unweighted.shape[0] == required_sample_count
    assert sum(evaluate(domain, support,
                        samples_unweighted)) == len(samples_weighted)
    samples_a = sum(evaluate(domain, a, samples_unweighted))
    samples_b = sum(evaluate(domain, b, samples_unweighted))
    assert samples_a == pytest.approx(samples_b, rel=0.1)
    assert pos_ratio == pytest.approx(0.25, rel=0.1)
def test_sampling_stacking():
    domain = Domain.make([], ["x", "y"], real_bounds=(0, 1))
    x, y = domain.get_symbols()
    support = (x <= y)
    try:
        positive(20, domain, support, sample_count=10, max_samples=10000)
        assert True
    except ValueError:
        assert False
def test_sampling_max_samples():
    domain = Domain.make([], ["x", "y"], real_bounds=(0, 1))
    x, y = domain.get_symbols()
    support = smt.FALSE()
    try:
        positive(10, domain, support, max_samples=100000)
        assert False
    except SamplingError:
        assert True
def sample_dataset(model, n_samples):
    str_type = {BOOL : 'categorical', REAL : 'continuous'}
    features = []
    name_to_var = {}
    for var in model.get_vars():
        name_to_var[var.symbol_name()] = var
        features.append((var, str_type[var.symbol_type()]))

    features = [(var, str_type[var.symbol_type()]) for var in model.get_vars()]
    data = []

    samples, _ = positive(n_samples, model.domain, model.support, model.weightfun)
    for x in samples:
        row = [None for _ in range(len(x))]

        for index, varname in enumerate(model.domain.variables):
            var = name_to_var[varname]
            new_index = list(map(lambda x : x[0], features)).index(var)
            if var.symbol_type() == BOOL:
                row[new_index] = bool(x[index])
            else:
                row[new_index] = float(x[index])

        assert(not None in row)
        data.append(row)

    return Dataset(features, data, None)
def approx_IAE(model1, model2, seed, sample_count):
    assert(set(model1.get_vars()) == set(model2.get_vars())),\
        "M1 vars: {}\n M2 vars: {}".format(model1.get_vars(),model2.get_vars())

    domain, bounds = merged_domain(model1, model2)

    samples, pos_ratio = positive(sample_count, domain,
                                  Or(model1.support, model2.support),
                                  weight=None)
    samples_m1 = samples[evaluate(domain,
                                  And(model1.support, Not(model2.support)),
                                  samples)]
    samples_m2 = samples[evaluate(domain,
                                  And(Not(model1.support), model2.support),
                                  samples)]
    samples_inter = samples[evaluate(domain, And(model1.support, model2.support),
                                  samples)]

    weights_m1 = sum(evaluate(domain, model1.weightfun, samples_m1))
    weights_m2 = sum(evaluate(domain, model2.weightfun, samples_m2))
    weights_inter = sum(abs(evaluate(domain, model1.weightfun, samples_inter) -
                        evaluate(domain, model2.weightfun, samples_inter)))

    n_m1 = len(samples_m1)
    n_m2 = len(samples_m2)
    n_inter = len(samples_inter)

    norm_m1 = weights_m1 / sample_count
    norm_m2 = weights_m2 / sample_count
    norm_inter = weights_inter / sample_count
    
    logger.debug(f"[ S1 ~S2] len: {n_m1}, sum: {weights_m1}, norm: {norm_m1}")
    logger.debug(f"[ S1 ~S2] len: {n_m2}, sum: {weights_m2}, norm: {norm_m2}")
    logger.debug(f"[ S1 ~S2] len: {n_inter}, sum: {weights_inter}, norm: {norm_inter}")

    approx_vol = pos_ratio * 2**len(domain.bool_vars)
    for lb, ub in bounds.values():
        approx_vol *= (ub - lb)

    return approx_vol*(weights_m1 + weights_m2 + weights_inter) / sample_count