def evaluate( self, examples: Task, clause: Clause, hypothesis_space: HypothesisSpace) -> typing.Union[int, float]: """ Evaluates a clause by calling the Learner's eval_fn. Returns a number (the higher the better) """ # add_to_cache(node,key,val) # retrieve_from_cache(node,key) -> val or None # remove_from_cache(node,key) -> None # Cache holds sets of examples that were covered before covered = hypothesis_space.retrieve_from_cache(clause, "covered") # We have executed this clause before if covered is not None: # Note that _eval.fn.evaluate() will ignore clauses in `covered` # that are not in the current Task result = self._eval_fn.evaluate(clause, examples, covered) # print("No query here.") return result else: covered = self._execute_program(clause) # if 'None', i.e. trivial hypothesis, all clauses are covered if covered is None: pos, neg = examples.get_examples() covered = pos.union(neg) result = self._eval_fn.evaluate(clause, examples, covered) hypothesis_space.add_to_cache(clause, "covered", covered) return result
def get_initial_weights(self, examples: Task) -> dict: example_weights = {} for examples in examples.get_examples(): for example in examples: example_weights[example] = 1 return example_weights
def learn(self, examples: Task, background_location: string, hypothesis_space: TopDownHypothesisSpace): """ General learning loop """ t1 = time.time() self._solver.consult(background_location) self._solver.asserta(c_pred("test_task", 1)(Structure(c_functor("s", 2), [List([]), List([])]))) final_program = [] examples_to_use = examples pos, _ = examples_to_use.get_examples() print(pos) while len(final_program) == 0 or len(pos) > 0: # learn a single clause cl = self._learn_one_clause(examples_to_use, hypothesis_space) if self.stopped_early: break final_program.append(cl) self.rules += 1 # update covered positive examples covered = self._execute_program(examples, cl) pos, neg = examples_to_use.get_examples() pos = pos.difference(covered) examples_to_use = Task(pos, neg) # Reset example weights self.example_weights = {} print("\n FOUND A RULE, CURRENT PROGRAM AND COVERED: ") print("\t", final_program) print("\t", covered) self.ex_time = time.time() - t1 for rule in final_program: self._solver.retract(rule) if self.stopped_early: print("STOPPED EARLY...") print("\n EXECUTION TIME:") print("\t", self.ex_time, " seconds") print("\n EXTENSION LENGTH:") print("\t", self.exp_len) print("\n EXTENSION AMOUNT:") print("\t", self.exp_count) print("\n MAX POOL SIZE:") print("\t", self.max_queue_len) print("\n LEARNED RULES:") print("\t", self.rules) ss = SearchStats(self.stopped_early, self.ex_time, self.exp_len, self.exp_count, self.max_queue_len, self.rules) return final_program, ss
def _execute_program(self, examples: Task, clause: Clause) -> typing.Sequence[Atom]: """ Evaluates a clause using the Prolog engine and background knowledge Returns a set of atoms that the clause covers """ if len(clause.get_body().get_literals()) == 0: return [] else: self._solver.asserta(clause) covered_examples = [] pos, neg = examples.get_examples() total_examples = pos.union(neg) for example in total_examples: if self._solver.has_solution(example): covered_examples.append(example) self._solver.retract(clause) # head_predicate = clause.get_head().get_predicate() # head_variables = clause.get_head_variables() # # sols = self._solver.query(*clause.get_body().get_literals()) # # sols = [head_predicate(*[s[v] for v in head_variables]) for s in sols] return covered_examples
def learn(self, examples: Task, knowledge: Knowledge, hypothesis_space: TopDownHypothesisSpace): """ General learning loop """ self._assert_knowledge(knowledge) final_program = [] examples_to_use = examples pos, _ = examples_to_use.get_examples() self._result = [] self._expansion = 0 while len(final_program) == 0 or (len(pos) > 0): # learn na single clause cl = self._learn_one_clause(examples_to_use, hypothesis_space) final_program.append(cl) # update covered positive examples covered = self._execute_program(examples_to_use, cl) print('covered', covered) pos, neg = examples_to_use.get_examples() pos = pos.difference(covered) examples_to_use = Task(pos, neg) with open('Search3.csv', 'w') as f: writer = csv.writer(f, delimiter=';', lineterminator='\n') writer.writerows(self._result) return final_program
def learn(self, examples: Task, knowledge: Knowledge, hypothesis_space: TopDownHypothesisSpace): """ General learning loop """ # self._assert_knowledge(knowledge) self._solver.consult( "../inputfiles/StringTransformations_BackgroundKnowledge.pl" ) # TODO Look if this is good enough for our background knowledge final_program = [] examples_to_use = examples pos, _ = examples_to_use.get_examples() while len(final_program) == 0 or len(pos) > 0: # learn na single clause cl = self._learn_one_clause(examples_to_use, hypothesis_space) final_program.append(cl) # update covered positive examples covered = self._execute_program(examples, cl) pos, neg = examples_to_use.get_examples() pos = pos.difference(covered) examples_to_use = Task(pos, neg) return final_program
def learn(self, examples: Task, knowledge: Knowledge, hypothesis_space: TopDownHypothesisSpace): """ General learning loop """ self._assert_knowledge(knowledge) final_program = [] examples_to_use = examples pos, _ = examples_to_use.get_examples() while len(final_program) == 0 or len(pos) > 0: # learn na single clause cl = self._learn_one_clause(examples_to_use, hypothesis_space) final_program.append(cl) # update covered positive examples covered = self._execute_program(cl) pos, neg = examples_to_use.get_examples() pos = pos.difference(covered) examples_to_use = Task(pos, neg) return final_program
def learn_with_constants(): """ Consider a row of blocks [ block1 block2 block3 block4 block5 block6 ] The order of this row is expressed using follows(X,Y) The color of a block is expressed using color(X,Color) Goal: learn a function f that says: a block is positive when it is followed by a red block pos(X) :- next(X,Y), color(Y,red) """ block = c_type("block") col = c_type("col") block1 = c_const("block1", domain=block) # blue -> positive block2 = c_const("block2", domain=block) # red block3 = c_const("block3", domain=block) # green -> positive block4 = c_const("block4", domain=block) # red -> positive block5 = c_const("block5", domain=block) # red block6 = c_const("block6", domain=block) # green block7 = c_const("block7", domain=block) # blue block8 = c_const("block8", domain=block) # blue red = c_const("red", domain="col") green = c_const("green", domain="col") blue = c_const("blue", domain="col") follows = c_pred("follows", 2, domains=[block, block]) color = c_pred("color", 2, domains=[block, col]) # Predicate to learn: f = c_pred("f", 1, domains=[block]) bk = Knowledge(follows(block1, block2), follows(block2, block3), follows(block3, block4), follows(block4, block5), follows(block5, block6), follows(block6, block7), follows(block7, block8), color(block1, blue), color(block2, red), color(block3, green), color(block4, red), color(block5, red), color(block6, green), color(block7, blue), color(block8, blue)) pos = {f(x) for x in [block1, block3, block4]} neg = {f(x) for x in [block2, block5, block6, block7, block8]} task = Task(positive_examples=pos, negative_examples=neg) solver = SWIProlog() # EvalFn must return an upper bound on quality to prune search space. eval_fn1 = Coverage(return_upperbound=True) eval_fn2 = Compression(return_upperbound=True) eval_fn3 = Accuracy(return_upperbound=True) learners = [ Aleph(solver, eval_fn, max_body_literals=4, do_print=False) for eval_fn in [eval_fn1, eval_fn3] ] for learner in learners: res = learner.learn(task, bk, None, minimum_freq=1) print(res)
def learn(self, examples: Task, knowledge: Knowledge, hypothesis_space: TopDownHypothesisSpace): """ General learning loop """ self._assert_knowledge(knowledge) final_program = [] examples_to_use = examples pos, _ = examples_to_use.get_examples() i = 0 start = datetime.datetime.now() while len(final_program) == 0 or len(pos) > 0: # learn na single clause if self._print: print(f"Iteration {i}") print("- Current program:") for program_clause in final_program: print("\t" + str(program_clause)) cl = self._learn_one_clause(examples_to_use, hypothesis_space) final_program.append(cl) # update covered positive examples covered = self._execute_program(cl) # Find intermediate quality of program at this point, add to learnresult (don't cound these as Prolog queries) c = set() for cl in final_program: c = c.union(self._execute_program(cl, count_as_query=False)) pos_covered = len(c.intersection(examples._positive_examples)) neg_covered = len(c.intersection(examples._negative_examples)) self.__intermediate_coverage.append((pos_covered, neg_covered)) # Remove covered examples and start next iteration pos, neg = examples_to_use.get_examples() pos = pos.difference(covered) examples_to_use = Task(pos, neg) i += 1 total_time = (datetime.datetime.now() - start).total_seconds() if self._print: print("Done! Search took {:.5f} seconds.".format(total_time)) # Wrap results into learnresult and return self._learnresult["final_program"] = final_program self._learnresult["total_time"] = total_time self._learnresult["num_iterations"] = i self._learnresult[ "evalfn_evaluations"] = self._eval_fn._clauses_evaluated self._learnresult["prolog_queries"] = self._prolog_queries self._learnresult[ "intermediate_coverage"] = self._intermediate_coverage return self._learnresult
def learn_simpsons(): # define the predicates father = c_pred("father", 2) mother = c_pred("mother", 2) grandparent = c_pred("grandparent", 2) # specify the background knowledge background = Knowledge(father("homer", "bart"), father("homer", "lisa"), father("homer", "maggie"), mother("marge", "bart"), mother("marge", "lisa"), mother("marge", "maggie"), mother("mona", "homer"), father("abe", "homer"), mother("jacqueline", "marge"), father("clancy", "marge")) # positive examples pos = { grandparent("abe", "bart"), grandparent("abe", "lisa"), grandparent("abe", "maggie"), grandparent("mona", "bart"), grandparent("abe", "lisa"), grandparent("abe", "maggie"), grandparent("jacqueline", "bart"), grandparent("jacqueline", "lisa"), grandparent("jacqueline", "maggie"), grandparent("clancy", "bart"), grandparent("clancy", "lisa"), grandparent("clancy", "maggie"), } # negative examples neg = { grandparent("abe", "marge"), grandparent("abe", "homer"), grandparent("abe", "clancy"), grandparent("abe", "jacqueline"), grandparent("homer", "marge"), grandparent("homer", "jacqueline"), grandparent("jacqueline", "marge"), grandparent("clancy", "homer"), grandparent("clancy", "abe") } task = Task(positive_examples=pos, negative_examples=neg) solver = SWIProlog() # EvalFn must return an upper bound on quality to prune search space. eval_fn = Coverage(return_upperbound=True) eval_fn2 = Compression(return_upperbound=True) eval_fn3 = Compression(return_upperbound=True) learner = Aleph(solver, eval_fn, max_body_literals=4, do_print=False) learner2 = Aleph(solver, eval_fn2, max_body_literals=4, do_print=False) learner3 = Aleph(solver, eval_fn3, max_body_literals=4, do_print=False) result = learner.learn(task, background, None) print(result)
def evaluate_distinct(self, examples: Task, clause: Clause) -> typing.Tuple[int, int]: covered = self._execute_program(examples, clause) pos, neg = examples.get_examples() covered_pos = pos.intersection(covered) covered_neg = neg.intersection(covered) return len(covered_pos), len(covered_neg)
def evaluate(self, clause: Clause, examples: Task, covered: Sequence[Atom]): self._clauses_evaluated += 1 pos, neg = examples.get_examples() covered_pos = len(pos.intersection(covered)) covered_neg = len(neg.intersection(covered)) if self._return_upperbound: return (covered_pos - covered_neg), covered_pos return covered_pos - covered_neg
def evaluate(self, examples: Task, clause: Clause) -> typing.Union[int, float]: covered = self._execute_program(clause) pos, neg = examples.get_examples() covered_pos = pos.intersection(covered) covered_neg = neg.intersection(covered) if len(covered_neg) > 0: return 0 else: return len(covered_pos)
def _execute_program(self, examples: Task, clause: Clause) -> typing.Sequence[Atom]: """ Evaluates a clause using the Prolog engine and background knowledge Returns a set of atoms that the clause covers """ pos, neg = examples.get_examples() self._solver.assertz(clause) coverage = [] for example in pos: if self._solver.has_solution(example): coverage.append(example) self._solver.retract(clause) return coverage
def evaluate(self, clause: Clause, examples: Task, covered: Sequence[Atom]): self._clauses_evaluated += 1 pos, neg = examples.get_examples() covered_pos = len(pos.intersection(covered)) covered_neg = len(neg.intersection(covered)) if covered_pos + covered_neg == 0: return 0 if not self._return_upperbound else 0, 0 return ( covered_pos / (covered_pos + covered_neg) if not self._return_upperbound else covered_pos / (covered_pos + covered_neg), 1, )
def evaluate(self, clause: Clause, examples: Task, covered: Sequence[Atom]): self._clauses_evaluated += 1 pos, neg = examples.get_examples() covered_pos = len(pos.intersection(covered)) covered_neg = len(neg.intersection(covered)) if covered_pos + covered_neg == 0: return 0 p = covered_pos / (covered_pos + covered_neg) # Perfect split, no entropy if p == 1 or p == 0: return 0 return -(p * math.log10(p) + (1 - p) * math.log10(1 - p))
def get_best_primitives( self, examples: Task, current_cand: typing.Union[Clause, Recursion, Body] ) -> typing.Sequence[typing.Union[Clause, Body, Procedure]]: scores = [0] * 22 # Filter examples (e.g. only use positive/negative examples) # examples = self.filter_examples(examples) pos, neg = examples.get_examples() # Calculate nn output for each example for example in pos: # Update output nn_output = self.model.predict( get_nn_input_data(current_cand, example, self.current_primitives.tolist()))[0] nn_output = self.process_output(nn_output) # Update score vector scores = self.update_score(current_cand, example, scores, nn_output) for example in neg: # Update output nn_output = self.model.predict( get_nn_input_data(current_cand, example, self.current_primitives.tolist()))[0] nn_output = self.process_output(nn_output) # Update score vector scores = self.update_score(current_cand, example, scores, -0.2 * nn_output) # Return x best primitives indices = numpy.argpartition( scores, -self.amount_chosen_from_nn)[-self.amount_chosen_from_nn:] # TODO now a random order but could be replaced to the ordering via the score print("PRIMITIVES CHOSEN BY NN: ") print(self.current_primitives[indices], "\n") return self.current_primitives[indices]
def evaluate(self, examples: Task, clause: Clause) -> typing.Union[int, float]: covered = self._execute_program(examples, clause) pos, neg = examples.get_examples() covered_pos = pos.intersection(covered) covered_neg = neg.intersection(covered) print("\t covered neg: ", len(covered_neg)) print("\t covered pos: ", len(covered_pos)) if (len(covered_pos) + len(covered_neg)) == 0: print("\t score: ", 0) else: print("\t score: ", len(covered_pos) / (len(covered_pos) + len(covered_neg))) if len(covered_neg) > 0: return 0 else: return len(covered_pos)
def evaluate(self, examples: Task, clause: Clause): pos, neg = examples.get_examples() numberofpositivecoverance = 0 self._solver.assertz(clause) for example in pos: if self._solver.has_solution(example): numberofpositivecoverance += 1 numberofnegativecoverance = 0 for example in neg: if self._solver.has_solution(example): numberofnegativecoverance += 1 # print(example) self._solver.retract(clause) if numberofnegativecoverance + numberofpositivecoverance == 0: return [0, 0] else: return [ numberofpositivecoverance / (numberofpositivecoverance + numberofnegativecoverance) * (numberofpositivecoverance) / len(pos), numberofnegativecoverance ]
def train_task(task_id: string, pos_multiplier: int, neg_example_offset: int, nn_amount: int, pos=None, neg=None): # Load needed files bk, predicates = createKnowledge( "../inputfiles/StringTransformations_BackgroundKnowledge.pl", task_id) if pos is None and neg is None: pos, neg = generate_examples(task_id, pos_multiplier, neg_example_offset) task = Task(positive_examples=pos, negative_examples=neg) # Calculate predicates total_predicates = [] filtered_predicates = [] for predicate in predicates: if predicate.name not in ["s", task_id ] and predicate not in filtered_predicates: total_predicates.append(lambda x, pred=predicate: plain_extension( x, pred, connected_clauses=True)) filtered_predicates.append(predicate) # create the hypothesis space hs = TopDownHypothesisSpace( primitives=total_predicates, head_constructor=c_pred("test_task", 1), recursive_procedures=True, expansion_hooks_keep=[ lambda x, y: connected_clause(x, y), lambda x, y: only_1_pred_for_1_var(x, y), lambda x, y: head_first(x, y) ], expansion_hooks_reject=[ # lambda x, y: has_singleton_vars(x, y), # Singleton-vars constraint is reduced to this constraint lambda x, y: has_not_previous_output_as_input(x, y), # Strict # lambda x, y: has_new_input(x, y), # Not as strict # lambda x, y: has_unexplained_last_var(x, y), # For the 'write' predicate lambda x, y: has_unexplained_last_var_strict( x, y), # Strict version of above lambda x, y: has_duplicated_literal(x, y), lambda x, y: has_g1_same_vars_in_literal(x, y), lambda x, y: has_duplicated_var_set(x, y), lambda x, y: has_double_recursion(x, y), lambda x, y: has_endless_recursion(x, y) ]) learner = NeuralSearcher1(solver_instance=prolog, primitives=filtered_predicates, model_location="../utility/Saved_model_covered", max_body_literals=10, amount_chosen_from_nn=nn_amount, filter_amount=30, threshold=0.1) # Try to learn the program program, ss = learner.learn( task, "../inputfiles/StringTransformations_BackgroundKnowledge.pl", hs) print(program) return ss
def learn_text(): """ We describe piece of text spanning multiple lines: "node A red <newline> node B green <newline> node C blue <newline>" using the next\2, linestart\2, lineend\2, tokenlength\2 predicates """ token = c_type("token") num = c_type("num") next = c_pred("next", 2, ("token", "token")) linestart = c_pred("linestart", 2, ("token", "token")) lineend = c_pred("lineend", 2, ("token", "token")) tokenlength = c_pred("tokenlength", 2, ("token", "num")) n1 = c_const("n1", num) n3 = c_const("n3", num) n4 = c_const("n4", num) n5 = c_const("n5", num) node1 = c_const("node1", token) node2 = c_const("node2", token) node3 = c_const("node3", token) red = c_const("red", token) green = c_const("green", token) blue = c_const("blue", token) a_c = c_const("a_c", token) b_c = c_const("b_c", token) c_c = c_const("c_c", token) start = c_const("c_START", token) end = c_const("c_END", token) bk = Knowledge(next(start, node1), next(node1, a_c), next(a_c, red), next(red, node2), next(node2, green), next(green, b_c), next(b_c, node3), next(node3, c_c), next(c_c, blue), next(blue, end), tokenlength(node1, n4), tokenlength(node2, n4), tokenlength(node3, n4), tokenlength(a_c, n1), tokenlength(b_c, n1), tokenlength(c_c, n1), tokenlength(red, n3), tokenlength(green, n5), tokenlength(blue, n4), linestart(node1, node1), linestart(a_c, node1), linestart(red, node1), linestart(node2, node2), linestart(b_c, node2), linestart(green, node2), linestart(node3, node3), linestart(c_c, node3), linestart(blue, node3), lineend(node1, a_c), lineend(a_c, red), lineend(node2, red), lineend(b_c, green), lineend(node3, blue), lineend(c_c, blue), lineend(red, red), lineend(green, green), lineend(blue, blue)) solver = SWIProlog() eval_fn1 = Coverage(return_upperbound=True) learner = Aleph(solver, eval_fn1, max_body_literals=3, do_print=False) # 1. Consider the hypothesis: f1(word) :- word is the second word on a line if True: f1 = c_pred("f1", 1, [token]) neg = {f1(x) for x in [node1, node2, node3, blue, green, red]} pos = {f1(x) for x in [a_c, b_c, c_c]} task = Task(positive_examples=pos, negative_examples=neg) res = learner.learn(task, bk, None) print(res) # 2. Consider the hypothesis: f2(word) :- word is the first word on a line if True: f2 = c_pred("f2", 1, [token]) neg = {f1(x) for x in [a_c, b_c, c_c, blue, green, red]} pos = {f1(x) for x in [node1, node2, node3]} task2 = Task(positive_examples=pos, negative_examples=neg) res = learner.learn(task2, bk, None) print(res) # 3. Assume we have learned the predicate node(X) before (A, B and C and nodes). # We want to learn f3(Node,X) :- X is the next token after Node if True: node = c_pred("node", 1, [token]) color = c_pred("color", 1, [token]) nodecolor = c_pred("nodecolor", 2, [token, token]) a = c_var("A", token) b = c_var("B", token) bk_old = bk.get_all() bk = Knowledge(*bk_old, node(a_c), node(b_c), node(c_c), node(a_c), node(b_c), node(c_c), color(red), color(green), color(blue)) pos = { nodecolor(a_c, red), nodecolor(b_c, green), nodecolor(c_c, blue) } neg = set() neg = { nodecolor(node1, red), nodecolor(node2, red), nodecolor(node3, red), nodecolor(node1, blue), nodecolor(node2, blue), nodecolor(node2, blue), nodecolor(node1, green), nodecolor(node2, green), nodecolor(node3, green), nodecolor(a_c, green), nodecolor(a_c, blue), nodecolor(b_c, blue), nodecolor(b_c, red), nodecolor(c_c, red), nodecolor(c_c, green) } task3 = Task(positive_examples=pos, negative_examples=neg) # prog = learner.learn(task3,bk,None,initial_clause=Body(node(a),color(b))) result = learner.learn(task3, bk, None, initial_clause=Body(node(a), color(b)), minimum_freq=3) print(result)
def filter_examples(self, examples: Task) -> Task: pos, _ = examples.get_examples() return pos
mother = c_pred("mother", 2) grandparent = c_pred("grandparent", 2) # specify the background knowledge background = Knowledge(father("a", "b"), mother("a", "b"), mother("b", "c"), father("e", "f"), father("f", "g"), mother("h", "i"), mother("i", "j")) # positive examples pos = {grandparent("a", "c"), grandparent("e", "g"), grandparent("h", "j")} # negative examples neg = {grandparent("a", "b"), grandparent("a", "g"), grandparent("i", "j")} task = Task(positive_examples=pos, negative_examples=neg) # create Prolog instance prolog = SWIProlog() learner = SimpleBreadthFirstLearner(prolog, max_body_literals=3) # create the hypothesis space hs = TopDownHypothesisSpace(primitives=[ lambda x: plain_extension(x, father, connected_clauses=True), lambda x: plain_extension(x, mother, connected_clauses=True) ], head_constructor=grandparent, expansion_hooks_reject=[ lambda x, y: has_singleton_vars(x, y), lambda x, y: has_duplicated_literal(x, y)