def compute_probabilities(self,
                              queries,
                              sample_count=None,
                              add_bounds=False):
        sample_count = sample_count if sample_count is not None else self.sample_count
        samples = uniform(self.domain, sample_count, rand_gen=self.rand_gen)
        labels = evaluate(self.domain, self.support, samples)
        positive_samples = samples[labels]

        results = []
        if self.weight is not None:
            sample_weights = evaluate(self.domain, self.weight,
                                      positive_samples)
            total = sum(sample_weights)
            for query in queries:
                if total > 0:
                    query_labels = numpy.logical_and(
                        evaluate(self.domain, query, positive_samples),
                        labels[labels])
                    results.append(sum(sample_weights[query_labels]) / total)
                else:
                    results.append(None)
        else:
            total = positive_samples.shape[0]
            for query in queries:
                if total > 0:
                    query_labels = numpy.logical_and(
                        evaluate(self.domain, query, positive_samples),
                        labels[labels])
                    results.append(sum(query_labels) / total)
                else:
                    results.append(None)

        return results
Exemple #2
0
def test_example1():
    domain, formula, name = ice_cream_problem()
    c, b, w = domain.get_symbols(["chocolate", "banana", "weekend"])

    c_val = 0.41358769878652346
    b_val = 0.04881279380000003
    assignment = {"chocolate": c_val, "banana": b_val, "weekend": 1.0}
    instance = np.array([assignment[v] for v in domain.variables])

    h1 = -0.9094061613514598 < (-2.11558444119424 * c +
                                -0.7052753601938021 * b)
    print(-0.9094061613514598,
          (-2.11558444119424 * c_val + -0.7052753601938021 * b_val))
    h2 = -43.62318633585081 < (-56.41097694745345 * c + -50.5657977670196 * b)
    print(-43.62318633585081,
          (-56.41097694745345 * c_val + -50.5657977670196 * b_val))
    h3 = -0.9094061613514598 < (-2.11558444119424 * c +
                                -0.7052753601938021 * b)
    print(-0.9094061613514598,
          (-2.11558444119424 * c_val + -0.7052753601938021 * b_val))
    h4 = 7.792607696237757 < (18.128225098004087 * c + 6.043431893671825 * b)
    print(7.792607696237757,
          (18.128225098004087 * c_val + 6.043431893671825 * b_val))
    h5 = -0.9094061613514598 < -(2.11558444119424 * c +
                                 -0.7052753601938021 * b)
    print(-0.9094061613514598,
          -(2.11558444119424 * c_val + -0.7052753601938021 * b_val))
    # h1: True, h2: True, h3: True, h4: False, h5: True

    learned = ((h1 | h2) & (h3 | ~w) & (h4 | h5))

    print(evaluate(domain, formula, instance))
    print(evaluate(domain, learned, instance))
Exemple #3
0
def negative_samples_example(background_knowledge):
    domain = Domain.make(["a", "b"], ["x", "y"], [(0, 1), (0, 1)])
    a, b, x, y = domain.get_symbols(domain.variables)
    formula = (a | b) & (~a | ~b) & (x <= y) & domain.get_bounds()
    background_knowledge = (a | b) & (~a
                                      | ~b) if background_knowledge else None
    thresholds = {"x": 0.1, "y": 0.2}
    data = uniform(domain, 10000)
    labels = evaluate(domain, formula, data)
    data = data[labels == 1]
    labels = labels[labels == 1]
    original_sample_count = len(labels)

    start_time = time.time()

    data, labels = OneClassStrategy.add_negatives(domain, data, labels,
                                                  thresholds, 100,
                                                  background_knowledge)
    print("Created {} negative examples".format(
        len(labels) - original_sample_count))

    directory = "test_output{}bg_sampled{}{}".format(
        os.path.sep, os.path.sep, time.strftime("%Y-%m-%d %Hh%Mm%Ss"))

    def learn_inc(_data, _labels, _i, _k, _h):
        strategy = OneClassStrategy(RandomViolationsStrategy(10),
                                    thresholds,
                                    background_knowledge=background_knowledge)
        learner = KCnfSmtLearner(_k, _h, strategy, "mvn")
        initial_indices = LearnOptions.initial_random(20)(list(
            range(len(_data))))
        learner.add_observer(
            PlottingObserver(domain, directory,
                             "run_{}_{}_{}".format(_i, _k,
                                                   _h), domain.real_vars[0],
                             domain.real_vars[1], None, False))
        return learner.learn(domain, _data, _labels, initial_indices)

    (new_data, new_labels,
     learned_formula), k, h = learn_bottom_up(data, labels, learn_inc, 1, 1, 1,
                                              1, None, None)
    if background_knowledge:
        learned_formula = learned_formula & background_knowledge

    duration = time.time() - start_time

    print("{}".format(smt_to_nested(learned_formula)))
    print("Learned CNF(k={}, h={}) formula {}".format(
        k, h, pretty_print(learned_formula)))
    print("Data-set grew from {} to {} entries".format(len(labels),
                                                       len(new_labels)))
    print("Learning took {:.2f}s".format(duration))

    test_data, labels = OneClassStrategy.add_negatives(domain, data, labels,
                                                       thresholds, 1000,
                                                       background_knowledge)
    assert all(evaluate(domain, learned_formula, test_data) == labels)
    def test_order(self):
        domain = Domain(["s1", "s2"], {"s1": REAL, "s2": BOOL}, {"s1": (0, 1)})
        data1 = np.array([1, 0])
        data2 = np.array([0, 1])

        a, b = domain.get_symbols(["s1", "s2"])
        f = (a >= 1) & ~b
        assert evaluate(domain, f, data1) == np.array([1])
        assert evaluate(domain, f, data2) == np.array([0])
 def eval(self, data):
     # TODO: fix this
     raise NotImplementedError()
     data = np.array(data)
     result = np.zeros(data.shape)
     result[:] = LOG_ZERO
     inside = evaluate(self.domain, self.support, data)
     result[inside] = np.log(
         evaluate(self.domain, self.weightfun, data[inside]))
     return result
def positive(required_sample_count,
             domain,
             support,
             weight=None,
             sample_pool_size=None,
             sample_count=None,
             max_samples=None,
             rand_gen=DEF_RNG):
    sample_pool_size = sample_pool_size or (
        required_sample_count if weight is None else required_sample_count *
        10)
    sample_count = sample_count or sample_pool_size * 2
    max_samples = max_samples or sample_count * 10
    samples = uniform(domain, sample_count, rand_gen=rand_gen)
    labels = evaluate(domain, support, samples)
    pos_samples = samples[labels]

    while pos_samples.shape[0] < sample_pool_size:
        if sample_count >= max_samples:
            raise SamplingError(
                "Max sample count {} exceeded (could not find pool of size {})"
                .format(max_samples, sample_pool_size))

        pos_ratio = pos_samples.shape[0] / sample_count
        estimated_count = (sample_pool_size - pos_samples.shape[0]) / max(
            pos_ratio, 0.001)
        new_sample_count = min(int(estimated_count * 1.1),
                               max_samples - sample_count)
        new_samples = uniform(domain, new_sample_count)
        new_labels = evaluate(domain, support, new_samples)
        new_pos_samples = new_samples[new_labels]
        if pos_samples.shape[0] > 0:
            pos_samples = np.concatenate((pos_samples, new_pos_samples),
                                         axis=0)
        else:
            pos_samples = new_pos_samples
        sample_count = sample_count + new_sample_count

    pos_ratio = pos_samples.shape[0] / sample_count

    if pos_samples.shape[0] > sample_pool_size:
        pos_samples = pos_samples[:sample_pool_size]

    if weight is not None:
        sample_weights = evaluate(domain, weight, pos_samples)
        return np.array(
            list(
                weighted_sample(sample_weights,
                                pos_samples,
                                required_sample_count,
                                rand_gen=rand_gen))), pos_ratio
    else:
        return pos_samples, pos_ratio
Exemple #7
0
def main():
    domain, formula, name = checker_problem()
    thresholds = {v: 0.1 for v in domain.real_vars}
    data = uniform(domain, 1000)
    labels = evaluate(domain, formula, data)
    data = data[labels == 1]
    labels = labels[labels == 1]

    def learn_inc(_data, _labels, _i, _k, _h):
        strategy = OneClassStrategy(RandomViolationsStrategy(10), thresholds)
        learner = KCnfSmtLearner(_k, _h, strategy, "mvn")
        initial_indices = LearnOptions.initial_random(20)(list(
            range(len(_data))))
        # learner.add_observer(LoggingObserver(None, _k, _h, None, True))
        learner.add_observer(
            PlottingObserver(domain, "test_output/checker",
                             "run_{}_{}_{}".format(_i, _k,
                                                   _h), domain.real_vars[0],
                             domain.real_vars[1], None, False))
        return learner.learn(domain, _data, _labels, initial_indices)

    (new_data, new_labels,
     formula), k, h = learn_bottom_up(data, labels, learn_inc, 1, 1, 1, 1,
                                      None, None)
    print("Learned CNF(k={}, h={}) formula {}".format(k, h,
                                                      pretty_print(formula)))
    print("Data-set grew from {} to {} entries".format(len(labels),
                                                       len(new_labels)))
Exemple #8
0
def background_knowledge_example():
    domain = Domain.make(["a", "b"], ["x", "y"], [(0, 1), (0, 1)])
    a, b, x, y = domain.get_symbols(domain.variables)
    formula = (a | b) & (~a | ~b) & (x >= 0) & (x <= y) & (y <= 1)
    thresholds = {v: 0.1 for v in domain.real_vars}
    data = uniform(domain, 10000)
    labels = evaluate(domain, formula, data)
    data = data[labels == 1]
    labels = labels[labels == 1]

    def learn_inc(_data, _labels, _i, _k, _h):
        strategy = OneClassStrategy(
            RandomViolationsStrategy(10),
            thresholds)  #, background_knowledge=(a | b) & (~a | ~b))
        learner = KCnfSmtLearner(_k, _h, strategy, "mvn")
        initial_indices = LearnOptions.initial_random(20)(list(
            range(len(_data))))
        # learner.add_observer(LoggingObserver(None, _k, _h, None, True))
        learner.add_observer(
            PlottingObserver(domain, "test_output/bg",
                             "run_{}_{}_{}".format(_i, _k,
                                                   _h), domain.real_vars[0],
                             domain.real_vars[1], None, False))
        return learner.learn(domain, _data, _labels, initial_indices)

    (new_data, new_labels,
     formula), k, h = learn_bottom_up(data, labels, learn_inc, 1, 1, 1, 1,
                                      None, None)
    print("Learned CNF(k={}, h={}) formula {}".format(k, h,
                                                      pretty_print(formula)))
    print("Data-set grew from {} to {} entries".format(len(labels),
                                                       len(new_labels)))
    def test_order(self):
        domain = Domain(["s1", "s2"], {"s1": REAL, "s2": BOOL}, {"s1": (0, 1)})
        data = np.array([[1, 0], [0, 1]])

        a, b = domain.get_symbols(["s1", "s2"])
        f = (a >= 1) & ~b
        assert all(evaluate(domain, f, data) == np.array([1, 0]))
Exemple #10
0
 def get_weighted_volume(self, weight_function, query=None):
     if self.is_leaf:
         if not self.empty:
             labels = self.labels
             if query:
                 labels = np.logical_and(
                     evaluate(self.builder.domain, query, self.samples),
                     labels)
             weighted_count = evaluate(self.builder.domain, weight_function,
                                       self.samples[labels])
             return sum(weighted_count) / len(
                 self.samples) * (self.volume / self.builder.volume)
         return 0
     else:
         return sum(
             node.get_weighted_volume(weight_function, query)
             for node in self.children)
    def integrate(self, domain, convex_bounds: List[LinearInequality],
                  polynomial: Polynomial):
        formula = smt.And(*[i.to_smt() for i in convex_bounds])

        if self.bounding_box > 0:
            if self.bounding_box == 1:
                a_matrix = numpy.zeros(
                    (len(convex_bounds), len(domain.real_vars)))
                b_matrix = numpy.zeros((len(convex_bounds), ))
                for i, bound in enumerate(convex_bounds):
                    for j in range(len(domain.real_vars)):
                        a_matrix[i, j] = bound.a(domain.real_vars[j])
                    b_matrix[i] = bound.b()

                lb_ub_bounds = {}
                c = numpy.zeros((len(domain.real_vars), ))
                for j in range(len(domain.real_vars)):
                    c[j] = 1
                    # noinspection PyTypeChecker
                    lb = scipy.optimize.linprog(c, a_matrix, b_matrix).x[j]
                    # noinspection PyTypeChecker
                    ub = scipy.optimize.linprog(-c, a_matrix, b_matrix).x[j]
                    c[j] = 0
                    lb_ub_bounds[domain.real_vars[j]] = (lb, ub)
            elif self.bounding_box == 2:
                samples = uniform(domain,
                                  self.sample_count,
                                  rand_gen=self.rand_gen)
                labels = evaluate(domain, formula, samples)
                samples = samples[labels == 1]

                try:
                    samples.sort(axis=0)
                    std = abs(samples[0:-1, :] - samples[1:, :]).std(axis=0)
                    lbs = samples[0, :] - std
                    ubs = samples[-1, :] + std
                except ValueError:
                    return 0

                lb_ub_bounds = {
                    domain.variables[j]: (lbs[j], ubs[j])
                    for j in range(len(domain.variables))
                }
            else:
                raise ValueError("Illegal bounding box value {}".format(
                    self.bounding_box))
            domain = Domain(domain.variables, domain.var_types, lb_ub_bounds)

        engine = RejectionEngine(domain,
                                 formula,
                                 polynomial.to_smt(),
                                 self.sample_count,
                                 seed=self.seed)
        result = engine.compute_volume()
        if self.bounding_box:
            result = result
        return result
Exemple #12
0
def get_problem_samples(domain, support, sample_count, max_ratio):
    minimal_count = sample_count * min(max_ratio, 1 - max_ratio)
    samples = uniform(domain, sample_count)
    labels = evaluate(domain, support, samples)
    positive_count = sum(labels)
    if positive_count < minimal_count or (sample_count -
                                          positive_count) < minimal_count:
        raise InsufficientBalanceError()

    return samples, labels
    def compute_volume(self,
                       sample_count=None,
                       add_bounds=False,
                       ohe_variables=None):
        sample_count = sample_count if sample_count is not None else self.sample_count
        samples = uniform(
            self.domain,
            sample_count,
            rand_gen=self.rand_gen,
            ohe_variables=ohe_variables,
        )
        labels = evaluate(self.domain, self.support, samples)

        if ohe_variables is None:
            bound_volume = (self.domain.get_volume()
                            if len(self.domain.real_vars) > 0 else 2**len(
                                self.domain.bool_vars))
        else:
            ohevars = {x for ohe in ohe_variables for x in ohe}
            bound_volume = 2**len(
                [v for v in self.domain.bool_vars if v not in ohevars])
            for ohe in ohe_variables:
                bound_volume *= len(ohe)

            real_volume = self.domain.get_bounding_box_volume()
            if real_volume != 0:
                bound_volume *= real_volume

        approx_volume = bound_volume * sum(labels) / len(labels)

        if self.weight is not None:
            pos_samples = samples[labels]
            sample_weights = evaluate(self.domain, self.weight, pos_samples)
            try:
                return sum(
                    sample_weights) / pos_samples.shape[0] * approx_volume
            except ZeroDivisionError:
                return 0.0
        else:
            return approx_volume
def test_sampling():
    domain = Domain.make(["a", "b"], ["x", "y"], real_bounds=(0, 1))
    a, b, x, y = domain.get_symbols()
    support = (a | b) & (~a | ~b) & (x <= y)
    weight = smt.Ite(a, smt.Real(1), smt.Real(2))

    required_sample_count = 10000
    samples_weighted, pos_ratio = positive(required_sample_count, domain,
                                           support, weight)
    assert samples_weighted.shape[0] == required_sample_count
    assert sum(evaluate(domain, support,
                        samples_weighted)) == len(samples_weighted)
    samples_a = sum(evaluate(domain, a, samples_weighted))
    samples_b = sum(evaluate(domain, b, samples_weighted))
    assert samples_a == pytest.approx(samples_b / 2, rel=0.2)
    assert pos_ratio == pytest.approx(0.25, rel=0.1)

    samples_unweighted, pos_ratio = positive(required_sample_count, domain,
                                             support)
    assert samples_unweighted.shape[0] == required_sample_count
    assert sum(evaluate(domain, support,
                        samples_unweighted)) == len(samples_weighted)
    samples_a = sum(evaluate(domain, a, samples_unweighted))
    samples_b = sum(evaluate(domain, b, samples_unweighted))
    assert samples_a == pytest.approx(samples_b, rel=0.1)
    assert pos_ratio == pytest.approx(0.25, rel=0.1)
Exemple #15
0
    def get_half_spaces(self, samples):
        half_spaces = []
        print("Generating half spaces: ", end="")
        if self.real_count > 0:
            while len(half_spaces) < self.h:
                half_space = generate_half_space_sample(
                    self.domain, self.real_count)
                labels = evaluate(self.domain, half_space, samples)
                half_spaces.append((half_space, labels))
                print("y", end="")

        print()
        return half_spaces
Exemple #16
0
def test_adaptive_threshold():
    random.seed(888)
    np.random.seed(888)

    domain = Domain.make([], ["x", "y"], [(0, 1), (0, 1)])
    x, y = domain.get_symbols(domain.variables)
    formula = (x <= y) & (x <= 0.5) & (y <= 0.5) & domain.get_bounds()
    thresholds = {"x": 0.1, "y": 0.1}
    data, _ = RejectionEngine(domain, formula, x * x, 100000).get_samples(50)
    k = 4
    nearest_neighbors = []
    for i in range(len(data)):
        nearest_neighbors.append([])
        for j in range(len(data)):
            if i != j:
                distance = 1 if any(data[i, b] != data[j, b] for b, v in enumerate(domain.variables)
                                    if domain.is_bool(v))\
                    else max(abs(data[i, r] - data[j, r]) / (domain.var_domains[v][1] - domain.var_domains[v][0]) for r, v in enumerate(domain.variables) if domain.is_real(v))
                if len(nearest_neighbors[i]) < k:
                    nearest_neighbors[i].append((j, distance))
                else:
                    index_of_furthest = None
                    for fi, f in enumerate(nearest_neighbors[i]):
                        if index_of_furthest is None or f[
                                1] > nearest_neighbors[i][index_of_furthest][1]:
                            index_of_furthest = fi
                    if distance < nearest_neighbors[i][index_of_furthest][1]:
                        nearest_neighbors[i][index_of_furthest] = (j, distance)
    print(nearest_neighbors)
    t = [[
        sum(n[1] for n in nearest_neighbors[i]) / len(nearest_neighbors[i]) *
        (domain.var_domains[v][1] - domain.var_domains[v][0])
        for v in domain.real_vars
    ] for i in range(len(nearest_neighbors))]
    t = np.array(t)
    print(t)
    print(data)
    # data = uniform(domain, 400)
    labels = evaluate(domain, formula, data)
    data = data[labels == 1]
    labels = labels[labels == 1]
    data, labels = OneClassStrategy.add_negatives(domain, data, labels, t,
                                                  1000)

    directory = "test_output{}adaptive{}{}".format(
        os.path.sep, os.path.sep, time.strftime("%Y-%m-%d %Hh%Mm%Ss"))
    os.makedirs(directory)

    name = os.path.join(directory, "combined.png")
    plot.plot_combined("x", "y", domain, formula, (data, labels), None, name,
                       set(), set())
Exemple #17
0
 def observe_iteration(self, data, labels, formula, new_active_indices,
                       solving_time, selection_time):
     self.iteration += 1
     learned_labels = evaluate(self.domain, formula, data)
     name = "{}{}{}_{}".format(self.directory, os.path.sep, self.name,
                               self.iteration)
     plot_combined(self.feat_x,
                   self.feat_y,
                   self.domain,
                   formula, (data, labels),
                   learned_labels,
                   name,
                   self.all_active,
                   new_active_indices,
                   condition=self.condition)
     self.all_active = self.all_active.union(new_active_indices)
Exemple #18
0
def prepare_ratios():
    sample_count = 1000
    bounds_pool = [(-1, 1), (-10, 10), (-100, 100), (-1000, 1000)]
    ratios = dict()
    for name, entry, density_filename in select_benchmark_files(
            lambda e: "bounds" not in e and benchmark_filter(e)):
        print("Finding ratios for {}".format(name))
        pysmt.environment.push_env()
        pysmt.environment.get_env().enable_infix_notation = True

        density = Density.import_from(density_filename)
        domain = density.domain

        result_bounds = []
        result_ratios = []
        for bounds in itertools.product(
                *[bounds_pool for _ in range(len(domain.real_vars))]):
            var_bounds = dict(zip(domain.real_vars, bounds))
            restricted_domain = Domain(domain.variables, domain.var_types,
                                       var_bounds)
            samples = uniform(restricted_domain, sample_count)
            labels = evaluate(restricted_domain, density.support, samples)
            positive_count = sum(labels)
            if 0 < positive_count < sample_count:
                ratio = positive_count / sample_count
                result_bounds.append(var_bounds)
                result_ratios.append(ratio)

        ratios[name] = list(zip(result_bounds, result_ratios))
        print(name, result_ratios)

        pysmt.environment.pop_env()

    with open(get_summary_file(), "rb") as summary_file_reference:
        summary = pickle.load(summary_file_reference)

    for name, bounds in ratios.items():
        summary[name]["bounds"] = bounds

    with open(get_summary_file(), "wb") as summary_file_reference:
        pickle.dump(summary, summary_file_reference)
Exemple #19
0
    def add_negatives(domain,
                      data,
                      labels,
                      thresholds,
                      sample_count,
                      background_knowledge=None,
                      distance_measure=None):
        # type: (Domain, np.ndarray, np.ndarray, Dict, int, FNode, Any) -> Tuple[np.ndarray, np.ndarray]

        new_data = uniform(domain, sample_count)
        background_knowledge = background_knowledge or TRUE()
        supported_indices = evaluate(domain, background_knowledge, new_data)
        boolean_indices = [
            i for i, v in enumerate(domain.variables) if domain.is_bool(v)
        ]
        real_indices = [
            i for i, v in enumerate(domain.variables) if domain.is_real(v)
        ]
        for j in range(new_data.shape[0]):
            valid_negative = True
            for i in range(data.shape[0]):
                # noinspection PyTypeChecker
                if labels[i] and all(
                        data[i, boolean_indices] == new_data[j,
                                                             boolean_indices]):
                    in_range = True
                    for ri, v in zip(real_indices, domain.real_vars):
                        t = thresholds[v] if isinstance(
                            thresholds, dict) else thresholds[i, ri]
                        if abs(data[i, ri] - new_data[j, ri]) > t:
                            in_range = False
                            break
                    valid_negative = valid_negative and (not in_range)
                    if not valid_negative:
                        break
            supported_indices[j] = supported_indices[j] and valid_negative
        new_data = new_data[supported_indices == 1, :]
        return np.concatenate([data, new_data], axis=0), np.concatenate(
            [labels, np.zeros(new_data.shape[0])])
def approx_IAE(model1, model2, seed, sample_count):
    assert(set(model1.get_vars()) == set(model2.get_vars())),\
        "M1 vars: {}\n M2 vars: {}".format(model1.get_vars(),model2.get_vars())

    domain, bounds = merged_domain(model1, model2)

    samples, pos_ratio = positive(sample_count, domain,
                                  Or(model1.support, model2.support),
                                  weight=None)
    samples_m1 = samples[evaluate(domain,
                                  And(model1.support, Not(model2.support)),
                                  samples)]
    samples_m2 = samples[evaluate(domain,
                                  And(Not(model1.support), model2.support),
                                  samples)]
    samples_inter = samples[evaluate(domain, And(model1.support, model2.support),
                                  samples)]

    weights_m1 = sum(evaluate(domain, model1.weightfun, samples_m1))
    weights_m2 = sum(evaluate(domain, model2.weightfun, samples_m2))
    weights_inter = sum(abs(evaluate(domain, model1.weightfun, samples_inter) -
                        evaluate(domain, model2.weightfun, samples_inter)))

    n_m1 = len(samples_m1)
    n_m2 = len(samples_m2)
    n_inter = len(samples_inter)

    norm_m1 = weights_m1 / sample_count
    norm_m2 = weights_m2 / sample_count
    norm_inter = weights_inter / sample_count
    
    logger.debug(f"[ S1 ~S2] len: {n_m1}, sum: {weights_m1}, norm: {norm_m1}")
    logger.debug(f"[ S1 ~S2] len: {n_m2}, sum: {weights_m2}, norm: {norm_m2}")
    logger.debug(f"[ S1 ~S2] len: {n_inter}, sum: {weights_inter}, norm: {norm_inter}")

    approx_vol = pos_ratio * 2**len(domain.bool_vars)
    for lb, ub in bounds.values():
        approx_vol *= (ub - lb)

    return approx_vol*(weights_m1 + weights_m2 + weights_inter) / sample_count
Exemple #21
0
def run_problem(problem,
                learner,
                seed,
                n_samples,
                timeout,
                global_norm,
                use_lariat=True):

    ground_truth = problem.model
    evaluation = dict()

    train = problem.datasets['train']
    valid = problem.datasets['valid']

    train_valid = Dataset(train.features, train.data + valid.data,
                          train.constraints)

    if problem.learned_supports is not None:
        prior_supports = {
            problem.metadata['supports_metadata'][i]['support_threshold_mult']:
            chi
            for i, chi in enumerate(problem.learned_supports)
        }
    else:
        logger.warning("Couldn't find any learned support.")
        prior_supports = dict()

    prior_supports['None'] = None
    prior_supports['gt-renorm'] = ground_truth.support

    t_0 = time()
    learner.estimate_density(train, validation_data=valid)
    t_f = time() - t_0
    logger.info("training time: {}".format(t_f))
    evaluation['training_time'] = t_f

    learned_models = []
    cached_models = dict()
    max_ll = None
    best = None

    logger.info("Evaluating:\n {}".format("\n".join(
        map(str, prior_supports.keys()))))

    for t_mult, prior_support in prior_supports.items():

        if t_mult != 'None' and not use_lariat:
            continue

        evaluation[t_mult] = dict()
        ps_str = serialize(prior_support) if not isinstance(t_mult,
                                                            str) else t_mult

        if ps_str in cached_models:
            learned_model, evaluation[t_mult] = cached_models[ps_str]
        else:
            try:
                logger.info(
                    "--------------------------------------------------")
                logger.info("Support: {}".format(t_mult))

                mode = RENORM_FULL if prior_support is not None else RENORM_OFF
                t_0 = time()
                learned_model, renormd = learner.renormalize(
                    train,
                    seed,
                    mode=mode,
                    support=prior_support,
                    timeout=timeout,
                    global_norm=global_norm)
                t_f = time() - t_0
                if not renormd and prior_support is not None:
                    continue

                evaluation[t_mult]['renorm_time'] = t_f

            except CalledProcessError as e:
                logger.warning("XADD error: {}".format(e))
                continue

            except ModelException as e:
                logger.warning("Model error: {}".format(e))
                continue

            logger.debug("Computing approx-IAE")
            iae = approx_IAE(learned_model, ground_truth, seed, n_samples)
            evaluation[t_mult]['approx-iae'] = iae

            logger.debug("Computing train-LL")
            train_ll, train_out = learned_model.log_likelihood(train)
            evaluation[t_mult]['train-ll'] = train_ll
            evaluation[t_mult]['train-out'] = train_out
            logger.debug("Computing valid-LL")
            valid_ll, valid_out = learned_model.log_likelihood(valid)
            evaluation[t_mult]['valid-ll'] = valid_ll
            evaluation[t_mult]['valid-out'] = valid_out
            train_valid_ll, train_valid_out = learned_model.log_likelihood(
                train_valid)
            evaluation[t_mult]['train-valid-ll'] = train_valid_ll
            evaluation[t_mult]['train-valid-out'] = train_valid_out

            if t_mult not in ['None','gt-renorm'] \
               and (max_ll is None or valid_ll > max_ll):
                max_ll = valid_ll
                best = t_mult

            logger.debug("Computing volume difference")
            poly1 = Model(learned_model.support, None, ground_truth.get_vars(),
                          ground_truth.bounds)
            poly2 = Model(ground_truth.support, None, ground_truth.get_vars(),
                          ground_truth.bounds)
            vol_diff = ISE(poly1, poly2, seed, n_samples, engine='rej')

            evaluation[t_mult]['vol-diff'] = vol_diff

            cached_models[ps_str] = (learned_model, evaluation[t_mult])

            domain = Domain.make(
                map(lambda v: v.symbol_name(), ground_truth.boolean_vars),
                learned_model.bounds)
            eval_falses = evaluate(domain, learned_model.support,
                                   np.asarray(train.data))

        learned_models.append((t_mult, learned_model))

    evaluation['best'] = best

    tmuls = sorted([
        key for key in evaluation
        if key not in ['None', 'gt-renorm', 'training_time', 'best']
    ])

    eval_msg = """RESULTS:
Training time: {}
No renorm: {}
GT renorm: {}
Best chi : {}

All chis:
{}
""".format(evaluation['training_time'], evaluation['None'],
           evaluation['gt-renorm'], (best, evaluation.get(best)),
           "\n".join([str((tmul, evaluation[tmul])) for tmul in tmuls]))

    logger.info(eval_msg)

    return learned_models, evaluation
 def evaluate(self, formula):
     return list(evaluate(self.domain, formula, self.values))
Exemple #23
0
def plot_density(density: Density,
                 feat_x: Optional[str] = None,
                 feat_y: Optional[str] = None,
                 filename: Optional[str] = None,
                 d3=False,
                 cmap=None):
    cmap = cmap or "plasma"
    from matplotlib import cm
    from mpl_toolkits.mplot3d import axes3d, Axes3D

    domain = density.domain
    row_vars = domain.bool_vars[:int(len(domain.bool_vars) / 2)]
    col_vars = domain.bool_vars[int(len(domain.bool_vars) / 2):]
    sf_size = 2

    fig = plt.figure(num=None,
                     figsize=(2**len(col_vars) * sf_size,
                              2**len(row_vars) * sf_size),
                     dpi=300)
    feat_x = feat_x if feat_x else domain.real_vars[0]
    feat_y = feat_y if feat_y else domain.real_vars[1]

    if d3:
        ax = fig.add_subplot(1, 1, 1, projection='3d')
    else:
        ax = fig.add_subplot(1, 1, 1)

    assert len(
        domain.bool_vars
    ) == 0  # Otherwise the max and min have to be calculated globally

    support = smt.simplify(density.support)
    weight = smt.simplify(density.weight)

    if d3:
        n = 1000
    else:
        n = 100
    x_arr = np.linspace(domain.var_domains[feat_x][0],
                        domain.var_domains[feat_x][1], n)
    y_arr = np.linspace(domain.var_domains[feat_y][0],
                        domain.var_domains[feat_y][1], n)

    x, y = np.meshgrid(x_arr, y_arr)
    z = np.zeros(x.shape)
    for i in range(x.shape[1]):
        data = np.concatenate((x[:, i][:, np.newaxis], y[:, i][:, np.newaxis]),
                              axis=1)
        labels = evaluate(domain, support, data)
        z[:, i] = evaluate(domain, weight, data) * labels

    if d3:
        ax.plot_surface(x, y, z, cmap=cmap)
        ax.view_init(30, 70)
    else:
        ax.scatter(x, y, c=z, cmap=cmap, s=1)

    plt.tick_params(axis='both', which='major', labelsize=6)
    ax.set_xlim(domain.var_domains[feat_x])
    ax.set_ylim(domain.var_domains[feat_y])

    if filename is not None:
        plt.savefig(filename if filename.endswith(".png") else "{}.png".
                    format(filename))
    else:
        plt.show()
    plt.close(fig)
Exemple #24
0
def main():
    smt_lib_name = "smt-lib-benchmark"
    synthetic_name = "synthetic"
    parser = argparse.ArgumentParser(
        description="Interface with benchmark or synthetic data for experiments"
    )

    parser.add_argument("source")
    parser.add_argument("--sample_size", type=int, default=None)
    parser.add_argument("--runs", type=int, default=None)
    parser.add_argument("--input_dir", type=str, default=None)
    parser.add_argument("--output_dir", type=str, default=None)
    parser.add_argument("--processes", type=int, default=None)
    parser.add_argument("--time_out", type=int, default=None)

    task_parsers = parser.add_subparsers(dest="task")
    prepare_parser = task_parsers.add_parser("prepare")
    prepare_parser.add_argument("--reset_samples", type=bool, default=False)
    learn_parser = task_parsers.add_parser("learn")
    analyze_parser = task_parsers.add_parser("analyze")
    analyze_parser.add_argument("--dirs", nargs="+", type=str)
    analyze_parser.add_argument("--res_path", type=str, default=None)

    show_parsers = analyze_parser.add_subparsers()
    show_parser = show_parsers.add_parser("show")
    show.add_arguments(show_parser)

    learn_options = LearnOptions()
    learn_options.add_arguments(learn_parser)

    args = parser.parse_args()
    if args.task == "prepare":
        if args.source == smt_lib_name:
            prepare_smt_lib_benchmark()
            prepare_ratios()
            prepare_samples(args.runs, args.sample_size, args.reset_samples)
        elif args.source == synthetic_name:
            prepare_synthetic(args.input_dir, args.output_dir, args.runs,
                              args.sample_size)
    elif args.task == "learn":
        learn_options.parse_arguments(args)
        if args.source == smt_lib_name:
            learn_benchmark(args.runs, args.sample_size, args.processes,
                            args.time_out, learn_options)
        elif args.source == synthetic_name:
            learn_synthetic(args.input_dir, args.output_dir, args.runs,
                            args.sample_size, args.processes, args.time_out,
                            learn_options)
        elif args.source.startswith("ex"):
            example_name = args.source.split(":", 1)[1]
            domain, formula = examples.get_by_name(example_name)
            np.random.seed(1)
            from pywmi.sample import uniform
            samples = uniform(domain, args.sample_size)
            from pywmi import evaluate
            labels = evaluate(domain, formula, samples)
            learn_options.set_value("domain", domain, False)
            learn_options.set_value("data", samples, False)
            learn_options.set_value("labels", labels, False)
            (formula, k, h), duration = learn_options.call(True)
            print("[{:.2f}s] Learned formula (k={}, h={}): {}".format(
                duration, k, h, pretty_print(formula)))
    elif args.task == "analyze":
        analyze(args.dirs, args.res_path, show.parse_args(args))
Exemple #25
0
def prepare_samples(n, sample_size, reset):
    samples_dir = get_benchmark_samples_dir()

    seeds = [random.randint(0, 2**32 - 1) for _ in range(n)]
    samples_dict = dict()

    def sample_filter(_entry):
        if "bounds" in _entry and benchmark_filter(_entry):
            if "samples" not in _entry["samples"]:
                return True
            else:
                return reset or any(
                    len([
                        s for s in _entry["samples"] if s["sample_size"] ==
                        sample_size and s["bounds"] == _bounds[0]
                    ]) < n for _bounds in _entry["bounds"]
                    if 0.2 <= _bounds[1] <= 0.8)
        return False

    for name, entry, filename in select_benchmark_files(sample_filter):
        print("Creating samples for {}".format(name))
        pysmt.environment.push_env()
        pysmt.environment.get_env().enable_infix_notation = True

        density = Density.import_from(filename)
        samples_dict[name] = [] if reset else entry.get("samples", [])

        for i, (bounds, ratio) in enumerate(entry["bounds"]):
            if not (0.2 <= ratio <= 0.8):
                continue

            print(i, bounds, ratio)
            previous_samples = [] if reset else ([
                s for s in entry.get("samples", [])
                if s["sample_size"] == sample_size and s["bounds"] == bounds
            ])
            bounded_domain = Domain(density.domain.variables,
                                    density.domain.var_types, bounds)

            for j in range(n - len(previous_samples)):
                seed = seeds[j]
                samples_filename = "{}{}{}.{}.{}.{}.sample.npy".format(
                    samples_dir, os.path.sep, name, sample_size, seed, i)
                labels_filename = "{}{}{}.{}.{}.{}.labels.npy".format(
                    samples_dir, os.path.sep, name, sample_size, seed, i)

                if not os.path.exists(os.path.dirname(samples_filename)):
                    os.makedirs(os.path.dirname(samples_filename))

                random.seed(seed)
                np.random.seed(seed)
                samples = uniform(bounded_domain, sample_size)
                labels = evaluate(bounded_domain, density.support, samples)
                np.save(samples_filename, samples)
                np.save(labels_filename, labels)

                samples_dict[name].append({
                    "bounds": bounds,
                    "seed": seed,
                    "samples_filename": samples_filename,
                    "labels_filename": labels_filename,
                    "sample_size": sample_size
                })

        pysmt.environment.pop_env()

    def edit(summary):
        for _n, _s in samples_dict.items():
            summary[_n]["samples"] = _s

    edit_summary(edit)
Exemple #26
0
 def check(self, samples):
     return evaluate(self.domain, self.formula, samples)
Exemple #27
0
def prepare_synthetic(input_directory, output_directory, runs, sample_size):
    seeds = [random.randint(0, 2**32 - 1) for _ in range(runs)]

    db = get_synthetic_db(output_directory, True)
    os.makedirs(output_directory)
    for filename in glob.glob("{}/**/synthetics*.txt".format(input_directory),
                              recursive=True):
        pysmt.environment.push_env()
        pysmt.environment.get_env().enable_infix_notation = True
        with open(filename) as file_reference:
            flat = json.load(file_reference)

        name = flat["synthetic_problem"]["problem"]["name"]
        print(name)

        if not db.exists(name):
            domain = import_domain(
                flat["synthetic_problem"]["problem"]["domain"])
            formula = nested_to_smt(
                flat["synthetic_problem"]["problem"]["theory"])
            Density(domain, formula, smt.Real(1.0)).export_to(
                os.path.join(output_directory, "{}.density".format(name)))
            entry = {
                "domain": export_domain(domain),
                "generation": {
                    "h": flat["synthetic_problem"]["half_space_count"],
                    "k": flat["synthetic_problem"]["formula_count"],
                    "l": flat["synthetic_problem"]["terms_per_formula"],
                    "structure": flat["synthetic_problem"]["cnf_or_dnf"],
                },
                "formula": smt_to_nested(formula),
                "samples": []
            }
        else:
            entry = dict(db.get(name))
            domain = import_domain(entry["domain"])
            formula = import_domain(entry["domain"])

        samples = entry.get("samples", [])
        matching_samples = []
        for sample in samples:
            if sample["sample_size"] == sample_size:
                matching_samples.append(sample)

        for i in range(runs - len(matching_samples)):
            seed = seeds[len(matching_samples) + i]
            samples_file = "{}.{}.{}.samples.npy".format(
                name, sample_size, seed)
            labels_file = "{}.{}.{}.labels.npy".format(name, sample_size, seed)
            np.random.seed(seed)
            data = uniform(domain, sample_size)
            np.save(os.path.join(output_directory, samples_file), data)
            labels = evaluate(domain, formula, data)
            np.save(os.path.join(output_directory, labels_file), labels)
            samples.append({
                "sample_size": sample_size,
                "seed": seed,
                "samples_file": samples_file,
                "labels_file": labels_file
            })

        entry["samples"] = samples
        db.set(name, entry)

        pysmt.environment.pop_env()