def max_var_occurrences(head: Atom, body: Body, max_occurrence: int) -> bool: """ Returns True if the predicate pred does not appear more than max_occurrence times in the clause """ if len(body.get_variables()) > 0: counter = Counter(body.get_variables()) if (body.get_literals()[-1].get_predicate().get_name() == 'copy1'): print(counter) return counter.most_common()[0][1] >= max_occurrence else: return False
def _check_if_recursive(self, body: Body): """ checks if the body forms a recursive clause: - one of the predicates in the body is equal to the head predicate - a predicate constructed by FillerPredicate is in the body """ if isinstance(self._head_constructor, Predicate): return True if self._head_constructor in body.get_predicates( ) else False else: return (True if any([ self._head_constructor.is_created_by(x) for x in body.get_predicates() ]) else False)
def has_g1_same_vars_in_literal(head: Atom, body: Body) -> bool: """ Returns True if there exists a literal with all same vars """ return literal_exist_g1_same_variables([ x.get_atom() if isinstance(x, Not) else x for x in body.get_literals() ])
def has_duplicated_var_set(head: Atom, body: Body) -> bool: """ Returns True if there exists a variable set that is also used in other literals in the body """ return duplicated_var_set_exists([ x.get_atom() if isinstance(x, Not) else x for x in body.get_literals() ])
def has_not_previous_output_as_input(head: Atom, body: Body) -> bool: atom_list = [ x.get_atom() if isinstance(x, Not) else x for x in body.get_literals() ] atom_list.insert(0, head) return not_previous_output_as_input_exists(atom_list)
def _plain_extend_clause( clause: typing.Union[Clause, Body], predicate: Predicate, connected_clause: bool = True ) -> typing.Sequence[typing.Union[Clause, Body]]: """ Extends the clause with the predicate in every possible way (no bias) Arguments: clause: a clause to be extended predicate: a predicate to add to the clause """ if isinstance(clause, Body) and len(clause) == 0: head_variables = [chr(x) for x in range(ord("A"), ord("Z")) ][:predicate.get_arity()] possible_heads = [ Body(predicate(*list(x))) for x in combinations_with_replacement( head_variables, predicate.get_arity()) ] return possible_heads clause_variables: typing.Sequence[Variable] = clause.get_variables() used_variables = {x for x in clause_variables} pred_argument_types: typing.Sequence[Type] = predicate.get_arg_types() argument_matches = {} new_variables = set() # create new variable for each argument of a predicate for arg_ind in range(len(pred_argument_types)): new_var = new_variable(used_variables, pred_argument_types[arg_ind]) argument_matches[arg_ind] = [new_var] used_variables.add(new_var) new_variables.add(new_var) # check for potential match with other variables for clv_ind in range(len(clause_variables)): for arg_ind in range(len(pred_argument_types)): if clause_variables[clv_ind].get_type( ) == pred_argument_types[arg_ind]: argument_matches[arg_ind].append(clause_variables[clv_ind]) # do cross product of matches base_sets = [argument_matches[x] for x in range(len(pred_argument_types))] candidates: typing.List[typing.Union[Clause, Body]] = [] for arg_combo in product(*base_sets): new_clause = None if connected_clause and not all( [True if x in new_variables else False for x in arg_combo]): # check that the new literal is not disconnected from the rest of the clause new_clause = clause + predicate(*list(arg_combo)) elif not connected_clause: new_clause = clause + predicate(*list(arg_combo)) if new_clause is not None: candidates.append(new_clause) return candidates
def initialise(self, initial_clause: typing.Union[Clause, Body]) -> None: """ Initialises the search space. It is possible to provide an initial clause to initialize the hypothesis space with (instead of :-). """ if isinstance(self._head_constructor, (Predicate, FillerPredicate)): if isinstance(self._head_constructor, Predicate): # create possible heads head_variables = [chr(x) for x in range(ord("A"), ord("Z")) ][:self._head_constructor.get_arity()] possible_heads = [ self._head_constructor(*list(x)) for x in combinations_with_replacement( head_variables, self._head_constructor.get_arity()) ] else: possible_heads = self._head_constructor.all_possible_atoms() # create empty clause or use initial clause if initial_clause: clause = initial_clause if isinstance( initial_clause, Body) else initial_clause.get_body() else: clause = Body() if len(clause.get_literals()) > 0 and len(clause.get_variables( )) < self._head_constructor.get_arity(): raise AssertionError( "Cannot provide an initial clause with fewer distinct variables than the head predicate!" ) init_head_dict = { "ignored": False, "blocked": False, "visited": False } self._hypothesis_space.add_node(clause) self._hypothesis_space.nodes[clause]["heads"] = dict([ (x, init_head_dict.copy()) for x in possible_heads ]) self._hypothesis_space.nodes[clause]["visited"] = False self._pointers["main"] = clause self._root_node = clause else: raise Exception( f"Unknown head constructor ({self._head_constructor}")
def max_pred_occurrences(head: Atom, body: Body, pred: Predicate, max_occurrence: int) -> bool: """ Returns True if the predicate pred does not appear more than max_occurrence times in the clause """ preds = [x for x in body.get_literals() if x.get_predicate() == pred] return len(preds) <= max_occurrence
def endless_recursion_exists(head: Atom, body: Body) -> bool: for literal in body.get_literals(): if head.get_predicate() == literal.get_predicate(): if literal.get_variables()[0] == head.get_variables()[0]: return True return False return False
def max_var(head: Atom, body: Body, max_count: int) -> bool: """ Return True if there are no more than max_count variables in the clause """ vars = body.get_variables() for v in head.get_variables(): if v not in vars: vars += [v] return True if len(vars) <= max_count else False
def connected_body(head: Atom, body: Body) -> bool: """ Returns True if variables in the body cannot be partitioned in two non-overlapping sets """ if len(body) == 0: return True return are_variables_connected([ x.get_atom() if isinstance(x, Not) else x for x in body.get_literals() ])
def _from_body_fixed_arity( self, body: Body, arity: int = None, arg_types: Sequence[Type] = None, use_as_head_predicate: Predicate = None, ) -> Sequence[Atom]: """ Creates a head atom given the body of the clause :param body: :param arity: (optional) desired arity if specified with min/max when constructing the FillerPredicate :param arg_types: (optional) argument types to use :return: """ assert bool(arity) != bool(arg_types) vars = body.get_variables() if use_as_head_predicate and arg_types is None: arg_types = use_as_head_predicate.get_arg_types() if arg_types is None: base = [vars] * arity else: matches = {} for t_ind in range(len(arg_types)): matches[t_ind] = [] for v_ind in range(len(vars)): if vars[v_ind].get_type() == arg_types[t_ind]: matches[t_ind].append(vars[v_ind]) base = [matches[x] for x in range(arity)] heads = [] for comb in product(*base): self._instance_counter += 1 if use_as_head_predicate is not None: pred = use_as_head_predicate elif arg_types is None: pred = c_pred(f"{self._prefix_name}_{self._instance_counter}", arity) else: pred = c_pred( f"{self._prefix_name}_{self._instance_counter}", len(arg_types), arg_types, ) heads.append(Atom(pred, list(comb))) return heads
def has_singleton_vars(head: Atom, body: Body) -> bool: """ Returns True is the clause has a singleton variable (appears only once) """ if len(body) == 0: return False vars = {} head_vars = head.get_variables() for ind in range(len(head_vars)): if head_vars[ind] not in vars: vars[head_vars[ind]] = head_vars.count(head_vars[ind]) bvars = body.get_variables() body_vars_flat = reduce(lambda x, y: x + y, [x.get_variables() for x in body.get_literals()], []) for ind in range(len(bvars)): if bvars[ind] in vars: vars[bvars[ind]] += body_vars_flat.count(bvars[ind]) else: vars[bvars[ind]] = body_vars_flat.count(bvars[ind]) return True if any([k for k, v in vars.items() if v == 1]) else False
def negation_at_the_end(head: Atom, body: Body) -> bool: """ Returns True is negations appear after all positive literals """ pos_location = -1 neg_location = -1 lits = body.get_literals() for ind in range(len(lits)): if isinstance(lits[ind], Atom): pos_location = ind elif neg_location < 0: neg_location = ind return False if (-1 < neg_location < pos_location) else True
def _add_to_body_fixed_arity(self, body: Body, arity: int) -> Sequence[Body]: new_pred_stash = {} # arg_types tuple -> pred vars = body.get_variables() bodies = [] args = list(combinations(vars, arity)) for ind in range(len(args)): arg_types = (x.get_type() for x in args[ind]) if arg_types in new_pred_stash: pred = new_pred_stash[arg_types] else: self._instance_counter += 1 pred = c_pred(f"{self._prefix_name}{self._instance_counter}", arity, arg_types) new_pred_stash[arg_types] = pred bodies.append(body + pred(*args[ind])) return bodies
def _create_possible_heads( self, body: Body, use_as_head_predicate: Predicate = None) -> typing.Sequence[Atom]: """ Creates possible heads for a given body if the _head_constructor is Predicate, it makes all possible combinations that matches the types in the head """ vars = body.get_variables() if isinstance(self._head_constructor, Predicate): arg_types = self._head_constructor.get_arg_types() # matches_vars = [] # for i in range(len(arg_types)): # matches_vars[i] = [] # for var_ind in range(len(vars)): # if arg_types[i] == vars[var_ind].get_type(): # matches_vars[i].append(vars[var_ind]) # # bases = [matches_vars[x] for x in range(self._head_constructor.get_arity())] # heads = [] # # for comb in product(*bases): # heads.append(Atom(self._head_constructor, list(comb))) heads = [] for comb in combinations(vars, self._head_constructor.get_arity()): if [x.get_type() for x in comb] == arg_types: heads.append(Atom(self._head_constructor, list(comb))) return heads elif isinstance(self._head_constructor, FillerPredicate): return self._head_constructor.new_from_body( body, use_as_head_predicate=use_as_head_predicate) else: raise Exception( f"Unknown head constructor {self._head_constructor}")
def has_duplicated_literal(head: Atom, body: Body) -> bool: """ Returns True if there are duplicated literals in the body """ return len(body) != len(set(body.get_literals()))
is_uppercase = c_pred("is_uppercase", 1) # define the Variables H = c_var("H") Ta = c_var("Ta") Tb = c_var("Tb") A = c_var("A") B = c_var("B") H1 = c_var("H1") H2 = c_var("H2") Z = c_var("Z") O = c_var("O") N = c_var("N") # create clauses head = Atom(not_space, [A]) body = Atom(is_space, [A]) clause1 = Clause(head, Body(Not(body))) head = Atom(is_uppercase, [Structure(s, [Pair(H, Z), O])]) body = Atom(is_uppercase_aux, [H]) clause2 = Clause(head, Body(body)) head = Atom(not_uppercase, [A]) body = Atom(is_uppercase, [A]) clause3 = Clause(head, Body(Not(body))) head = Atom(is_lowercase, [Structure(s, [Pair(H, Z), O])]) body = Atom(is_lowercase_aux, [H]) clause4 = Clause(head, Body(body)) head = Atom(not_lowercase, [A]) body = Atom(is_lowercase, [A]) clause5 = Clause(head, Body(Not(body))) head = Atom(is_letter, [Structure(s, [Pair(H, Z), O])]) body1 = Atom(is_lowercase_aux, [H]) body2 = Atom(is_uppercase, [H])
def learn_text(): """ We describe piece of text spanning multiple lines: "node A red <newline> node B green <newline> node C blue <newline>" using the next\2, linestart\2, lineend\2, tokenlength\2 predicates """ token = c_type("token") num = c_type("num") next = c_pred("next", 2, ("token", "token")) linestart = c_pred("linestart", 2, ("token", "token")) lineend = c_pred("lineend", 2, ("token", "token")) tokenlength = c_pred("tokenlength", 2, ("token", "num")) n1 = c_const("n1", num) n3 = c_const("n3", num) n4 = c_const("n4", num) n5 = c_const("n5", num) node1 = c_const("node1", token) node2 = c_const("node2", token) node3 = c_const("node3", token) red = c_const("red", token) green = c_const("green", token) blue = c_const("blue", token) a_c = c_const("a_c", token) b_c = c_const("b_c", token) c_c = c_const("c_c", token) start = c_const("c_START", token) end = c_const("c_END", token) bk = Knowledge(next(start, node1), next(node1, a_c), next(a_c, red), next(red, node2), next(node2, green), next(green, b_c), next(b_c, node3), next(node3, c_c), next(c_c, blue), next(blue, end), tokenlength(node1, n4), tokenlength(node2, n4), tokenlength(node3, n4), tokenlength(a_c, n1), tokenlength(b_c, n1), tokenlength(c_c, n1), tokenlength(red, n3), tokenlength(green, n5), tokenlength(blue, n4), linestart(node1, node1), linestart(a_c, node1), linestart(red, node1), linestart(node2, node2), linestart(b_c, node2), linestart(green, node2), linestart(node3, node3), linestart(c_c, node3), linestart(blue, node3), lineend(node1, a_c), lineend(a_c, red), lineend(node2, red), lineend(b_c, green), lineend(node3, blue), lineend(c_c, blue), lineend(red, red), lineend(green, green), lineend(blue, blue)) solver = SWIProlog() eval_fn1 = Coverage(return_upperbound=True) learner = Aleph(solver, eval_fn1, max_body_literals=3, do_print=False) # 1. Consider the hypothesis: f1(word) :- word is the second word on a line if True: f1 = c_pred("f1", 1, [token]) neg = {f1(x) for x in [node1, node2, node3, blue, green, red]} pos = {f1(x) for x in [a_c, b_c, c_c]} task = Task(positive_examples=pos, negative_examples=neg) res = learner.learn(task, bk, None) print(res) # 2. Consider the hypothesis: f2(word) :- word is the first word on a line if True: f2 = c_pred("f2", 1, [token]) neg = {f1(x) for x in [a_c, b_c, c_c, blue, green, red]} pos = {f1(x) for x in [node1, node2, node3]} task2 = Task(positive_examples=pos, negative_examples=neg) res = learner.learn(task2, bk, None) print(res) # 3. Assume we have learned the predicate node(X) before (A, B and C and nodes). # We want to learn f3(Node,X) :- X is the next token after Node if True: node = c_pred("node", 1, [token]) color = c_pred("color", 1, [token]) nodecolor = c_pred("nodecolor", 2, [token, token]) a = c_var("A", token) b = c_var("B", token) bk_old = bk.get_all() bk = Knowledge(*bk_old, node(a_c), node(b_c), node(c_c), node(a_c), node(b_c), node(c_c), color(red), color(green), color(blue)) pos = { nodecolor(a_c, red), nodecolor(b_c, green), nodecolor(c_c, blue) } neg = set() neg = { nodecolor(node1, red), nodecolor(node2, red), nodecolor(node3, red), nodecolor(node1, blue), nodecolor(node2, blue), nodecolor(node2, blue), nodecolor(node1, green), nodecolor(node2, green), nodecolor(node3, green), nodecolor(a_c, green), nodecolor(a_c, blue), nodecolor(b_c, blue), nodecolor(b_c, red), nodecolor(c_c, red), nodecolor(c_c, green) } task3 = Task(positive_examples=pos, negative_examples=neg) # prog = learner.learn(task3,bk,None,initial_clause=Body(node(a),color(b))) result = learner.learn(task3, bk, None, initial_clause=Body(node(a), color(b)), minimum_freq=3) print(result)
def head_first(head: Atom, body: Body) -> bool: return len( set(body.get_literals()[0].get_variables()).intersection( set(head.get_variables()))) != 0
Ta = c_var("Ta") Tb = c_var("Tb") A = c_var("A") B = c_var("B") C = c_var("C") D = c_var("D") E = c_var("E") H1 = c_var("H1") H2 = c_var("H2") Z = c_var("Z") O = c_var("O") N = c_var("N") # create clauses head = Atom(not_space, [A]) body = Atom(is_space, [A]) clause1 = Clause(head, Body(Not(body))) head = Atom(is_uppercase, [Structure(s, [Pair(H, Z), O])]) body = Atom(is_uppercase_aux, [H]) clause2 = Clause(head, Body(body)) head = Atom(not_uppercase, [A]) body = Atom(is_uppercase, [A]) clause3 = Clause(head, Body(Not(body))) head = Atom(is_lowercase, [Structure(s, [Pair(H, Z), O])]) body = Atom(is_lowercase_aux, [H]) clause4 = Clause(head, Body(body)) head = Atom(not_lowercase, [A]) body = Atom(is_lowercase, [A]) clause5 = Clause(head, Body(Not(body))) head = Atom(is_letter, [Structure(s, [Pair(H, Z), O])]) body1 = Atom(is_lowercase_aux, [H]) body2 = Atom(is_uppercase, [H])
def has_unexplained_last_var_strict(head: Atom, body: Body) -> bool: return strict_unexplained_last_var_exists([ x.get_atom() if isinstance(x, Not) else x for x in body.get_literals() ])
def only_1_pred_for_1_var(head: Atom, body: Body) -> bool: return only_1_pred_exists_for_1_var([ x.get_atom() if isinstance(x, Not) else x for x in body.get_literals() ])
def _get_body_predicates_list(body: Body): return [x.get_predicate() for x in body.get_literals()]
def has_duplicated_variable(head: Atom, body: Body) -> bool: return len(body.get_literals()[-1].get_variables()) != len( set(body.get_literals()[-1].get_variables()))