def general_get_root_sub(tree): NPs = [sub_tree for sub_tree in tree if toolbox.is_tree(sub_tree)] if NPs: root_np = NPs.pop() return {'np': toolbox.np_processor(root_np), 'tree': root_np, 'new_tree': NPs} else: return {'np': toolbox.np_processor(tree), 'tree': tree, 'new_tree': NPs}
def get_root_sub(tree): NPs = [sub_tree for sub_tree in toolbox.get_the_only_sub_tree(tree) if toolbox.is_tree(sub_tree)] if NPs: root_np = NPs.pop() return root_np else: return toolbox.np_processor(tree)
def general_filter(self, main_tree): """Filters out Pre or NP that are in stop word list""" temp = [ sub_tree for sub_tree in main_tree if toolbox.is_tree(sub_tree) ] pres = [element for element in temp if element.label() == 'PRE'] stop = set(stopwords.words('english')) for pre in pres: word = pre[0][0] if word in stop: main_tree.remove(pre) return main_tree
def internal_bigram_rectifier(self, bigram): np_internal_grammar = '''FUNC_JJS: {<DT><JJS>} FUNC_TARGET: {<WP|WDT><NP|NN>+} ''' for np in [np for np in bigram if toolbox.is_tree(np)]: result = toolbox.parse(np_internal_grammar, np) if result != np: # this indicates FUNC_TARGET is spotted labels = [ sub_tree.label() for sub_tree in result if toolbox.is_tree(sub_tree) ] print('labels', labels) if 'FUNC_TARGET' in labels: # only FUNC_TARGET need to seperate noun from FUNC if [ tag_name[1] for tag_name in result.leaves() if tag_name[1].startswith('NN') ]: # if there is noun within the tree, seperate it and make it NP result = toolbox.separate_noun_in_tree(result) result.set_label('NP') bigram = toolbox.update_element_within_tree(np, result, bigram) return bigram
def NINCHAIN(self, tree): print('Entering NINCHAIN') tree = toolbox.remove_qi(tree) root_tree = PatternProcessor.get_root_sub_and_search_for_uri(tree) root_np = {} root_np.update(root_tree) root_np['component'] = PatternProcessor.construct_components(root_np, root_np['variable']) NPs = [np for np in root_np['tree'] if toolbox.is_tree(np)] nin = NPs.pop() nin_np = self.nin_processor(nin, root_np) print('nin np ', nin_np['component'])
def main_tree_navigator(self, main_tree): """The main function for Interpreter, it navigate through the tree and do things to form the final sparql query""" # 1. To recognize the number of labels self.main_tree = main_tree level_1_labels = [ sub_tree.label() for sub_tree in main_tree if toolbox.is_tree(sub_tree) ] print('Level 1 labels', level_1_labels) if 'QI' in level_1_labels: level_1_labels.remove('QI') # Now we know the number and name of all the labels the tree contains. It will be simple if there is only one label. (it is usually the case) if len(level_1_labels): # It contains only one level on label, pass it to the simple_level_1_classifier to parse it accordingly return self.simple_level_1_classifier(level_1_labels[0])
def NINVSNIN(self, tree): print('Entering NINVSNIN') tree = toolbox.remove_qi(tree) root_tree = PatternProcessor.get_root_sub_and_search_for_uri(tree) root_np = {} root_np.update(root_tree) root_np['component'] = PatternProcessor.construct_components(root_np, root_np['variable']) NPs = [np for np in root_np['tree'] if toolbox.is_tree(np)] components = [] for np in NPs: np_label = np.label() if np_label == 'NIN': print(' === Entering NIN processor ===') components.append(self.nin_processor(np, root_np)) else: print(' === Entering SINGLENP processor ===') components.append(self.singlenp_processor(np, root_np)) sparqlEngine = SPARQLEngine() query_result = sparqlEngine.fire_mix_query( QueryConstructor.construct_standard_ninvsnin_query(root_np, components[0], components[1])) return query_result
def question_filter(self, main_tree): for tree in main_tree: if toolbox.is_tree(tree): if tree.label() == 'SINGLENP': sub = toolbox.get_the_only_sub_tree(tree) question_result = toolbox.question_identifier(sub) is_question = question_result[0] expectation = question_result[1] if is_question: main_tree.remove(tree) main_tree.insert(0, Tree('QI', (expectation, 'EXP'))) else: question_result = toolbox.question_identifier(tree) is_question = question_result[0] expectation = question_result[1] if is_question: main_tree.remove(tree) main_tree.insert(0, Tree('QI', (expectation, 'EXP'))) else: main_tree.remove(tree) return main_tree