예제 #1
0
 def general_get_root_sub(tree):
     NPs = [sub_tree for sub_tree in tree if toolbox.is_tree(sub_tree)]
     if NPs:
         root_np = NPs.pop()
         return {'np': toolbox.np_processor(root_np), 'tree': root_np, 'new_tree': NPs}
     else:
         return {'np': toolbox.np_processor(tree), 'tree': tree, 'new_tree': NPs}
예제 #2
0
 def get_root_sub(tree):
     NPs = [sub_tree for sub_tree in toolbox.get_the_only_sub_tree(tree) if toolbox.is_tree(sub_tree)]
     if NPs:
         root_np = NPs.pop()
         return root_np
     else:
         return toolbox.np_processor(tree)
예제 #3
0
 def general_filter(self, main_tree):
     """Filters out Pre or NP that are in stop word list"""
     temp = [
         sub_tree for sub_tree in main_tree if toolbox.is_tree(sub_tree)
     ]
     pres = [element for element in temp if element.label() == 'PRE']
     stop = set(stopwords.words('english'))
     for pre in pres:
         word = pre[0][0]
         if word in stop:
             main_tree.remove(pre)
     return main_tree
예제 #4
0
 def internal_bigram_rectifier(self, bigram):
     np_internal_grammar = '''FUNC_JJS: {<DT><JJS>}
                              FUNC_TARGET: {<WP|WDT><NP|NN>+}
                              '''
     for np in [np for np in bigram if toolbox.is_tree(np)]:
         result = toolbox.parse(np_internal_grammar, np)
         if result != np:  # this indicates FUNC_TARGET is spotted
             labels = [
                 sub_tree.label() for sub_tree in result
                 if toolbox.is_tree(sub_tree)
             ]
             print('labels', labels)
             if 'FUNC_TARGET' in labels:  # only FUNC_TARGET need to seperate noun from FUNC
                 if [
                         tag_name[1] for tag_name in result.leaves()
                         if tag_name[1].startswith('NN')
                 ]:
                     # if there is noun within the tree, seperate it and make it NP
                     result = toolbox.separate_noun_in_tree(result)
                     result.set_label('NP')
             bigram = toolbox.update_element_within_tree(np, result, bigram)
     return bigram
예제 #5
0
    def NINCHAIN(self, tree):
        print('Entering NINCHAIN')
        tree = toolbox.remove_qi(tree)
        root_tree = PatternProcessor.get_root_sub_and_search_for_uri(tree)
        root_np = {}
        root_np.update(root_tree)
        root_np['component'] = PatternProcessor.construct_components(root_np, root_np['variable'])

        NPs = [np for np in root_np['tree'] if toolbox.is_tree(np)]
        nin = NPs.pop()
        nin_np = self.nin_processor(nin, root_np)

        print('nin np ', nin_np['component'])
예제 #6
0
    def main_tree_navigator(self, main_tree):
        """The main function for Interpreter, it navigate through the tree and do things to form the final sparql query"""
        # 1. To recognize the number of labels
        self.main_tree = main_tree

        level_1_labels = [
            sub_tree.label() for sub_tree in main_tree
            if toolbox.is_tree(sub_tree)
        ]
        print('Level 1 labels', level_1_labels)
        if 'QI' in level_1_labels:
            level_1_labels.remove('QI')

        # Now we know the number and name of all the labels the tree contains. It will be simple if there is only one label. (it is usually the case)
        if len(level_1_labels):
            # It contains only one level on label, pass it to the simple_level_1_classifier to parse it accordingly
            return self.simple_level_1_classifier(level_1_labels[0])
예제 #7
0
    def NINVSNIN(self, tree):
        print('Entering NINVSNIN')
        tree = toolbox.remove_qi(tree)
        root_tree = PatternProcessor.get_root_sub_and_search_for_uri(tree)
        root_np = {}
        root_np.update(root_tree)
        root_np['component'] = PatternProcessor.construct_components(root_np, root_np['variable'])
        NPs = [np for np in root_np['tree'] if toolbox.is_tree(np)]
        components = []
        for np in NPs:
            np_label = np.label()
            if np_label == 'NIN':
                print(' === Entering NIN processor ===')
                components.append(self.nin_processor(np, root_np))
            else:
                print(' === Entering SINGLENP processor ===')
                components.append(self.singlenp_processor(np, root_np))

        sparqlEngine = SPARQLEngine()
        query_result = sparqlEngine.fire_mix_query(
            QueryConstructor.construct_standard_ninvsnin_query(root_np, components[0], components[1]))
        return query_result
예제 #8
0
    def question_filter(self, main_tree):
        for tree in main_tree:
            if toolbox.is_tree(tree):
                if tree.label() == 'SINGLENP':
                    sub = toolbox.get_the_only_sub_tree(tree)
                    question_result = toolbox.question_identifier(sub)
                    is_question = question_result[0]
                    expectation = question_result[1]
                    if is_question:
                        main_tree.remove(tree)
                        main_tree.insert(0, Tree('QI', (expectation, 'EXP')))
            else:
                question_result = toolbox.question_identifier(tree)
                is_question = question_result[0]
                expectation = question_result[1]
                if is_question:
                    main_tree.remove(tree)
                    main_tree.insert(0, Tree('QI', (expectation, 'EXP')))

                else:
                    main_tree.remove(tree)

        return main_tree