def main(): builder = OntoBuilder() builder.build_knowledge_base('data/knowledge_base.txt') builder.build_facts('data/fact_base.txt') # builder.store('data/knowledge_base.json') onto_container = OntoContainer() onto_container.load("data/knowledge_base.json") onto_container.build_secondary_connections() algo1 = Algorithm(onto_container=onto_container, filename='algo/patterns/simple_connection.json') algo_container = AlgoContainer() algo_container.add_algorithm(algo1) brain = Brain(onto_container=onto_container, algo_container=algo_container) estimator = Estimator(brain) algo_composer = AlgoComposer(brain=brain, estimator=estimator) input = 'do people in Russia speak english?' # input = 'does USA have people?' graph_walker = GraphWalker(brain=brain) graph_walker.train_mode = True result = graph_walker.resolve(input) print(result) exit() algorithm = algo_composer.compose(input, 'right') if algorithm: algorithm.save('algo/patterns/composed.json')
def main(): builder = OntoBuilder2() # builder.build_knowledge_base('data/knowledge_base.txt') builder.build_facts('data/fact_base.txt') builder.store('data/knowledge_base.json') onto_container = OntoContainer() onto_container.load("data/knowledge_base.json") onto_container.build_secondary_connections() algo1 = Algorithm(onto_container=onto_container, filename='algo/patterns/simple_connection.json') algo_container = AlgoContainer() algo_container.add_algorithm(algo1) brain = Brain(onto_container=onto_container, algo_container=algo_container) estimator = Estimator(brain) algo_builder = AlgoBuilder(brain) algo_builder.build_from('data/algo_base.txt', './algo/patterns') # input = 'do people in a slavic speaking country speak english?' input = 'do people in a USA speak english?' # input = 'does USA have people?' graph_walker = GraphWalker(brain=brain) graph_walker.train_mode = True result = graph_walker.resolve(input) print(result) exit()
def main(): builder = OntoBuilder2() builder.build_facts('data/fact_base.txt') builder.store('data/knowledge_base.json') onto_container = OntoContainer() onto_container.load("data/knowledge_base.json") onto_container.build_secondary_connections() algo_container = AlgoContainer() brain = Brain(onto_container=onto_container, algo_container=algo_container) estimator = Estimator(brain) algo_builder = AlgoBuilder(brain) algo_builder.build_from('data/algo_base.txt', './algo/patterns') algo_container.add_algorithm( Algorithm(onto_container=onto_container, filename='algo/patterns/closed_q_reply.json')) algo_container.add_algorithm( Algorithm(onto_container=onto_container, filename='algo/patterns/what_question_reply.json')) algo_container.add_algorithm( Algorithm(onto_container=onto_container, filename='algo/patterns/switch_context.json')) algo_container.add_algorithm( Algorithm(onto_container=onto_container, filename='algo/patterns/get_closest.json')) algo_container.attach_to_brain(brain) # input = 'do people in a slavic speaking country speak english?' input = 'do people in a USA speak english?' # input = 'does USA have people?' algo_runner = AlgoRunner(brain=brain) result = algo_runner.run(input) print(result)
from nlp import Clause from onto.onto_container import OntoContainer from onto_resolver import OntoResolver container = OntoContainer() container.load("onto/moneycare.json") clause = Clause() clause.load("test/sample_query3.json") resolver = OntoResolver(container) reply = resolver.get_reply(clause) print(reply)
def __init__(self): self.nodes = set() self.id_counter = 0 self.fact_counter = 0 self.direction_counter = 0 self.container = OntoContainer()
class OntoBuilder: def __init__(self): self.nodes = set() self.id_counter = 0 self.fact_counter = 0 self.direction_counter = 0 self.container = OntoContainer() def load_list_from_file(filename): lines = [] with open(filename, 'r', encoding='utf-8') as file: for line in file: lines.append(line.strip()) return lines def build_knowledge_base(self, filename): lines = OntoBuilder.load_list_from_file(filename) for line in lines: self._build_kb_item(line) self.container.nodes = list(self.nodes) self.container.sort_nodes_by_id() def build_facts(self, filename): lines = OntoBuilder.load_list_from_file(filename) direction_nodes = [] fact_nodes = [] for line in lines: if line.startswith('#'): continue node = self._build_fact(line) if node: if line[:2].lower() == 'to': self.direction_counter += 1 node.pattern = 'direction {}'.format(self.direction_counter) direction_nodes.append(node) else: self.fact_counter += 1 node.pattern = 'fact {}'.format(self.fact_counter) fact_nodes.append(node) if len(direction_nodes) > 1: self.id_counter += 1 pattern = 'direction' abstract_direction_node = Node(id=str(self.id_counter), pattern=pattern, container=self.container, abstract=True) self.container.nodes.append(abstract_direction_node) for node in direction_nodes: self._add_bidirect_connections(abstract_direction_node, node) def store(self, filename): out_val = {'nodes': self.container.nodes, 'connections': self.container.connections} with open(filename, mode='wt', encoding='utf-8') as output_file: print(self._serialize(out_val), file=output_file) @staticmethod def _serialize(value): return json.dumps(value, cls=OntoEncoder) def _find_node_by_pattern(self, pattern): nodes = [node for node in self.nodes if node.pattern == pattern] if nodes: return nodes[0] return None def _build_kb_item(self, line): matches = re.findall("(\[[\w\d\s-]+\])", line) make_abstract_node = '+' in line make_connection = '*' in line nodes = [] for m in matches: pattern = m.strip('[').strip(']') node = self._find_node_by_pattern(pattern) if not node: self.id_counter += 1 node = Node(id=str(self.id_counter), pattern=pattern, container=self.container, abstract=False) nodes.append(node) if make_connection: self._add_bidirect_connections(nodes[0], nodes[1]) if make_abstract_node: self.id_counter += 1 pattern = ' '.join([node.pattern for node in nodes]) abstract_node = Node(id=str(self.id_counter), pattern=pattern, container=self.container, abstract=True) for node in nodes: self._add_bidirect_connections(node, abstract_node) nodes.append(abstract_node) self.nodes.update(nodes) def _add_bidirect_connections(self, node1, node2): if node1 == node2: raise BaseException('cannot connect node to itself') connection = Connection(source=node1, target=node2, container=self.container) self.container.connections.append(connection) connection = Connection(source=node2, target=node1, container=self.container) self.container.connections.append(connection) def _build_fact(self, line): translator = str.maketrans('', '', string.punctuation.replace('-', '') + '«»') terms = line.translate(translator).split() nodes_to_connect = set() nodes = [] # collect nodes for patterns for term in terms: node = self.container.get_node_by_pattern(term) if node: nodes.append(node) # find upmost abstract node for each eliminated = [] for node in nodes: most_abstract_node = self._get_most_abstract_node(node, nodes, eliminated) if most_abstract_node: nodes_to_connect.add(most_abstract_node) nodes_to_connect = [node for node in nodes_to_connect if node not in eliminated] # connect em all somehow if len(nodes_to_connect) < 2: return if len(nodes_to_connect) == 2: self._add_bidirect_connections(nodes_to_connect[0], nodes_to_connect[1]) else: self.id_counter += 1 # self.fact_counter += 1 # pattern = 'direction {}'.format(self.fact_counter) fact_node = Node(id=str(self.id_counter), pattern='', container=self.container, abstract=True) fact_node.knowledge_center = True self.container.nodes.append(fact_node) for node in nodes_to_connect: self._add_bidirect_connections(fact_node, node) return fact_node def _get_upper_abstract_nodes(self, node): return [conn.target for conn in self.container.connections if conn.source == node and conn.target.abstract] def _get_most_abstract_node(self, src_node, nodes, eliminated): current_node = src_node while True: abstract_node = self._get_most_abstract_node_step(current_node, nodes, eliminated) if abstract_node == current_node or abstract_node in eliminated: return abstract_node eliminated.append(current_node) current_node = abstract_node def _get_most_abstract_node_step(self, src_node, nodes, eliminated): upper_abstract = self._get_upper_abstract_nodes(src_node) upper_abstract = [node for node in upper_abstract if node not in eliminated] if len(upper_abstract) == 0: return src_node candidate_abstracts = set() for abstract_node in upper_abstract: for node in nodes: if node == src_node: continue if self.container.are_nodes_connected(node, abstract_node): candidate_abstracts.add(abstract_node) eliminated.append(node) if len(candidate_abstracts) > 1: raise BaseException('donna what to do with 2 parallel abstracts') if len(candidate_abstracts) == 1: return candidate_abstracts.pop() else: return src_node
def __init__(self): self.nodes = [] self.stop_words = ['a', 'is', 'in'] self.container = OntoContainer() self.line_nodes = {}
class OntoBuilder2: def __init__(self): self.nodes = [] self.stop_words = ['a', 'is', 'in'] self.container = OntoContainer() self.line_nodes = {} def load_list_from_file(filename): lines = [] with open(filename, 'r', encoding='utf-8') as file: for line in file: lines.append(line.strip()) return lines def tokenize_line(self, line): translator = str.maketrans('', '', string.punctuation.replace('-', '') + '«»') terms = line.translate(translator).split() return terms def build_facts(self, filename): lines = OntoBuilder2.load_list_from_file(filename) self._make_connections(lines) direction_nodes = [] fact_nodes = [] fact_counter = 0 direction_counter = 0 for line in self.line_nodes: nodes = self.line_nodes[line] if not self._is_fact(nodes): continue fact_node = self._build_fact(nodes) if fact_node: if line[:2].lower() == 'to': direction_counter += 1 fact_node.pattern = 'direction {}'.format( direction_counter) direction_nodes.append(fact_node) else: fact_counter += 1 fact_node.pattern = 'fact {}'.format(fact_counter) fact_nodes.append(fact_node) def store(self, filename): out_val = { 'nodes': self.container.nodes, 'connections': self.container.connections } with open(filename, mode='wt', encoding='utf-8') as output_file: print(self._serialize(out_val), file=output_file) @staticmethod def _serialize(value): return json.dumps(value, cls=OntoEncoder) def _make_connections(self, lines): for line in lines: if not line.startswith('#'): self.line_nodes[line] = self._build_nodes(line) self._make_simple_connections(lines) self._make_knowledge_connections(lines) def _make_knowledge_connections(self, lines): while True: if not self._make_knowledge_connections_1pass(lines): break def _make_knowledge_connections_1pass(self, lines): fact_nodes = [] for line in lines: if line not in self.line_nodes: continue nodes = self.line_nodes[line] if len(nodes) > 2: fact_nodes.append(nodes) bigrams = [] for f_nodes in fact_nodes: bigrams.extend(self._get_possible_bigrams(f_nodes)) counter = Counter(bigrams) top_frequent = counter.most_common(1)[0] num_times = top_frequent[1] if num_times < 2: return False bigram_repr = top_frequent[0] nodes_repr = bigram_repr.split('-') node1 = self.container.get_node_by_id(nodes_repr[0]) node2 = self.container.get_node_by_id(nodes_repr[1]) # create combined node combined_node = Node(self.container.next_node_id(), node1.pattern + ' ' + node2.pattern, self.container) combined_node.abstract = True self.container.nodes.append(combined_node) # replace the two nodes in every fact with a newly created abstract node for line in self.line_nodes: nodes = self.line_nodes[line] if node1 in nodes and node2 in nodes: pos1 = nodes.index(node1) nodes[pos1] = combined_node nodes.remove(node2) # connect source nodes to a newly created abstract node weight = min(1.0, num_times * minimal_weight) self._add_bidirect_connections(node1, combined_node, weight) self._add_bidirect_connections(node2, combined_node, weight) return True def _make_simple_connections(self, lines): for line in lines: if line not in self.line_nodes: continue terms = self.tokenize_line(line) if len(terms) < 6 and 'is' in terms: self._build_simple_connection(line) def _get_possible_bigrams(self, nodes): bigrams = [] for i in range(len(nodes) - 1): node1 = nodes[i] node2 = nodes[i + 1] bigram = self._get_bigram_repr(node1, node2) if bigram not in bigrams: bigrams.append(bigram) if i < len(nodes) - 2: node2 = nodes[i + 2] bigram = self._get_bigram_repr(node1, node2) if bigram not in bigrams: bigrams.append(bigram) return bigrams def _get_bigram_repr(self, node1, node2): id1 = int(node1.node_id) id2 = int(node2.node_id) min_id = min(id1, id2) max_id = max(id1, id2) return '{}-{}'.format(min_id, max_id) def _build_nodes(self, line): terms = self.tokenize_line(line) nodes = [] for term in terms: if term in self.stop_words: continue node = self.container.get_node_by_pattern(term) if node is None: node = Node(self.container.next_node_id(), term, self.container) self.container.nodes.append(node) nodes.append(node) return nodes def _is_fact(self, nodes): there_are_abstract_nodes = sum([1 for node in nodes if node.abstract]) return there_are_abstract_nodes or len(nodes) > 2 def _build_simple_connection(self, line): nodes = self.line_nodes[line] if len(nodes) > 2: raise Exception( 'cannot handle more than 2 nodes in _build_simple_connection()' ) self._add_bidirect_connections(nodes[0], nodes[1], minimal_weight) def _build_fact(self, nodes): fact_node = Node(self.container.next_node_id(), pattern='', container=self.container, abstract=True) fact_node.knowledge_center = True self.container.nodes.append(fact_node) for node in nodes: self._add_bidirect_connections(fact_node, node, minimal_weight) return fact_node def _add_bidirect_connections(self, node1, node2, weight): if node1 == node2: raise BaseException('cannot connect node to itself') connection = Connection(source=node1, target=node2, container=self.container) connection.weight = weight self.container.connections.append(connection) connection = Connection(source=node2, target=node1, container=self.container) connection.weight = weight self.container.connections.append(connection)
from onto.onto_container import OntoContainer container = OntoContainer() container.load("light_match.json")