def background_knowledge_example(): domain = Domain.make(["a", "b"], ["x", "y"], [(0, 1), (0, 1)]) a, b, x, y = domain.get_symbols(domain.variables) formula = (a | b) & (~a | ~b) & (x >= 0) & (x <= y) & (y <= 1) thresholds = {v: 0.1 for v in domain.real_vars} data = uniform(domain, 10000) labels = evaluate(domain, formula, data) data = data[labels == 1] labels = labels[labels == 1] def learn_inc(_data, _labels, _i, _k, _h): strategy = OneClassStrategy( RandomViolationsStrategy(10), thresholds) #, background_knowledge=(a | b) & (~a | ~b)) learner = KCnfSmtLearner(_k, _h, strategy, "mvn") initial_indices = LearnOptions.initial_random(20)(list( range(len(_data)))) # learner.add_observer(LoggingObserver(None, _k, _h, None, True)) learner.add_observer( PlottingObserver(domain, "test_output/bg", "run_{}_{}_{}".format(_i, _k, _h), domain.real_vars[0], domain.real_vars[1], None, False)) return learner.learn(domain, _data, _labels, initial_indices) (new_data, new_labels, formula), k, h = learn_bottom_up(data, labels, learn_inc, 1, 1, 1, 1, None, None) print("Learned CNF(k={}, h={}) formula {}".format(k, h, pretty_print(formula))) print("Data-set grew from {} to {} entries".format(len(labels), len(new_labels)))
def main(): domain, formula, name = checker_problem() thresholds = {v: 0.1 for v in domain.real_vars} data = uniform(domain, 1000) labels = evaluate(domain, formula, data) data = data[labels == 1] labels = labels[labels == 1] def learn_inc(_data, _labels, _i, _k, _h): strategy = OneClassStrategy(RandomViolationsStrategy(10), thresholds) learner = KCnfSmtLearner(_k, _h, strategy, "mvn") initial_indices = LearnOptions.initial_random(20)(list( range(len(_data)))) # learner.add_observer(LoggingObserver(None, _k, _h, None, True)) learner.add_observer( PlottingObserver(domain, "test_output/checker", "run_{}_{}_{}".format(_i, _k, _h), domain.real_vars[0], domain.real_vars[1], None, False)) return learner.learn(domain, _data, _labels, initial_indices) (new_data, new_labels, formula), k, h = learn_bottom_up(data, labels, learn_inc, 1, 1, 1, 1, None, None) print("Learned CNF(k={}, h={}) formula {}".format(k, h, pretty_print(formula))) print("Data-set grew from {} to {} entries".format(len(labels), len(new_labels)))
def negative_samples_example(background_knowledge): domain = Domain.make(["a", "b"], ["x", "y"], [(0, 1), (0, 1)]) a, b, x, y = domain.get_symbols(domain.variables) formula = (a | b) & (~a | ~b) & (x <= y) & domain.get_bounds() background_knowledge = (a | b) & (~a | ~b) if background_knowledge else None thresholds = {"x": 0.1, "y": 0.2} data = uniform(domain, 10000) labels = evaluate(domain, formula, data) data = data[labels == 1] labels = labels[labels == 1] original_sample_count = len(labels) start_time = time.time() data, labels = OneClassStrategy.add_negatives(domain, data, labels, thresholds, 100, background_knowledge) print("Created {} negative examples".format( len(labels) - original_sample_count)) directory = "test_output{}bg_sampled{}{}".format( os.path.sep, os.path.sep, time.strftime("%Y-%m-%d %Hh%Mm%Ss")) def learn_inc(_data, _labels, _i, _k, _h): strategy = OneClassStrategy(RandomViolationsStrategy(10), thresholds, background_knowledge=background_knowledge) learner = KCnfSmtLearner(_k, _h, strategy, "mvn") initial_indices = LearnOptions.initial_random(20)(list( range(len(_data)))) learner.add_observer( PlottingObserver(domain, directory, "run_{}_{}_{}".format(_i, _k, _h), domain.real_vars[0], domain.real_vars[1], None, False)) return learner.learn(domain, _data, _labels, initial_indices) (new_data, new_labels, learned_formula), k, h = learn_bottom_up(data, labels, learn_inc, 1, 1, 1, 1, None, None) if background_knowledge: learned_formula = learned_formula & background_knowledge duration = time.time() - start_time print("{}".format(smt_to_nested(learned_formula))) print("Learned CNF(k={}, h={}) formula {}".format( k, h, pretty_print(learned_formula))) print("Data-set grew from {} to {} entries".format(len(labels), len(new_labels))) print("Learning took {:.2f}s".format(duration)) test_data, labels = OneClassStrategy.add_negatives(domain, data, labels, thresholds, 1000, background_knowledge) assert all(evaluate(domain, learned_formula, test_data) == labels)
def learn_wrap(data, labels, learn_inc, queue): res = learn_bottom_up(data, labels, learn_inc, 1, 1, 1, 1, None, None) (new_data, new_labels, formula), k, h = res msg = "Learned CNF(k={}, h={})" logger.debug(msg.format(k, h)) msg = "Data-set grew from {} to {} entries" logger.debug(msg.format(len(labels), len(new_labels))) queue.put((formula, k, h))
def experiment(): random.seed(888) np.random.seed(888) start = time.time() domain = Domain.make(["b0", "b1", "b2"], ["x0", "x1"], [(0, 1), (0, 1)]) # thresholds = {"x": 0.1, "y": 0.1} # data, _ = RejectionEngine(domain, formula, x * x, 100000).get_samples(50) filename = "/Users/samuelkolb/Downloads/input-ijcai-rh/ijcai-rh_2_3_2_100_50_4_3.problem_0.train_dataset.data" data = np.loadtxt(filename, delimiter=",", skiprows=0) k = 4 nearest_neighbors = [] for i in range(len(data)): nearest_neighbors.append([]) for j in range(len(data)): if i != j: distance = 1 if any(data[i, b] != data[j, b] for b, v in enumerate(domain.variables) if domain.is_bool(v))\ else max(abs(data[i, r] - data[j, r]) / (domain.var_domains[v][1] - domain.var_domains[v][0]) for r, v in enumerate(domain.variables) if domain.is_real(v)) if len(nearest_neighbors[i]) < k: nearest_neighbors[i].append((j, distance)) else: index_of_furthest = None for fi, f in enumerate(nearest_neighbors[i]): if index_of_furthest is None or f[ 1] > nearest_neighbors[i][index_of_furthest][1]: index_of_furthest = fi if distance < nearest_neighbors[i][index_of_furthest][1]: nearest_neighbors[i][index_of_furthest] = (j, distance) print(nearest_neighbors) t = [[ sum(n[1] for n in nearest_neighbors[i]) / len(nearest_neighbors[i]) * (domain.var_domains[v][1] - domain.var_domains[v][0]) if domain.is_real(v) else 0 for v in domain.variables ] for i in range(len(nearest_neighbors))] t = np.array(t) * 1.5 print(t) # data = uniform(domain, 400) labels = np.ones(len(data)) data, labels = OneClassStrategy.add_negatives(domain, data, labels, t, 1000) directory = "output{}lariat-synthetic{}{}".format( os.path.sep, os.path.sep, time.strftime("%Y-%m-%d %Hh%Mm%Ss")) os.makedirs(directory) name = os.path.join(directory, "combined.png") plot.plot_combined("x0", "x1", domain, None, (data, labels), None, name, set(), set()) def learn_inc(_data, _labels, _i, _k, _h): # strategy = OneClassStrategy(RandomViolationsStrategy(10), thresholds) strategy = DecisionTreeSelection() learner = KCnfSmtLearner(_k, _h, strategy, "mvn") initial_indices = LearnOptions.initial_random(20)(list( range(len(_data)))) learner.add_observer( PlottingObserver(domain, directory, "run_{}_{}_{}".format(_i, _k, _h), domain.real_vars[0], domain.real_vars[1], None, False)) return learner.learn(domain, _data, _labels, initial_indices) (new_data, new_labels, learned_formula), k, h = learn_bottom_up(data, labels, learn_inc, 1, 1, 2, 4, None, None) duration = time.time() - start Formula(domain, learned_formula).to_file( os.path.join(directory, "result_{}_{}_{}.json".format(k, h, int(duration))))