def check_dependency_rules(self, sentence, verbose=True):
        ## simple normalization
        sentence = re.sub(r'\s+', ' ', sentence).strip()
        sentence = self.ini_processor.check_and_remove_ini(sentence,
                                                           self.analyzer,
                                                           verbose=False)

        ## check for complex dependency structure
        ob = base_structure(sentence, self.analyzer)
        if verbose:
            ob.print_dep_tree()

        r1_res = ob.loop_nodes(ob.dep_tree, self.rule1)
        if len(r1_res) == 0:
            return False

        eles = re.split(r',| |,', sentence)
        for e in eles:
            ob_e = base_structure(e, self.analyzer)
            if verbose:
                ob_e.print_dep_tree()
            r2_res = ob_e.loop_nodes(ob_e.dep_tree, self.rule2)
            if len(r2_res) == 0:
                return False

        return True
Beispiel #2
0
 def check_and_remove_ini(self, input_sentance, analyzer, verbose=True):
     """
     function to process everything 
     """
     check = self.check_all_rules(input_sentance, analyzer)
     if verbose:
         print(check)
         print(self.rule2name[check])
     if check > 0:
         res = base_structure(input_sentance, analyzer)
         out_sent = self.remove_init_stop_words(input_sentance, verbose)
         if verbose:
             res.print_dep_tree()
             ## check if rule satisfied
             print(self.remove_init_stop_words(input_sentance))
         return out_sent
     else:
         return input_sentance
def check_all_rules(sentance, analyzer, rule_map):
    check = processor._check_candidate(sentance)
    if check:
        res = base_structure(sentance, analyzer)
        for i in range(1, len(rule_map) + 1):
            f_l = res.loop_nodes(res.dep_tree, rule_map[i][0])
            ## check if negation logic, it was set in rules maps
            if rule_map[i][1]:
                if len(f_l) > 0:
                    return i
                else:
                    pass
            else:  ## this is negation logic
                if len(f_l) == 0:
                    return i
                else:
                    pass
        return 0
    else:
        return 0
 results_path = './data/dep_tree_out_1.xlsx'
 keep_columns = ['ask']
 df = pd.read_excel(data_path, sheet_name='a_b_1')
 df = df[keep_columns]
 df.dropna(inplace=True)
 df.reset_index(inplace=True)
 #df = df.head(1000)
 input_column_name = 'ask'
 #intent_column_name = '意图'
 #%%
 ## use stanford parser
 print('parsing using han analyzer....')
 analyzer = han_analyzer()
 processor = Processor('../libs/init_stop_words.txt')
 input_data = df[input_column_name].values
 #%%
 test_data = [processor.remove_init_stop_words(i) for i in input_data]
 assert len(test_data) == len(input_data)
 df['filtered_input'] = np.array(test_data)
 #%%
 msg_list = [
     base_structure(s, analyzer).print_dep_tree(print_out=False)
     for s in test_data
 ]
 msg_list = ['\n'.join(m) for m in msg_list]
 #%%
 df['han_dep'] = df[input_column_name].apply(get_dep_output_han,
                                             args=(analyzer, ))
 df['han_dep_tree'] = np.array(msg_list)
 #%%
 df.to_excel(results_path)
        1: (rule_1, True),
        2: (rule_2, True),
        3: (rule_3, True),
        4: (rule_4, False)
    }

    rule2name = {
        0: "其他",
        1: "动词+主谓关系",
        2: "动词+并列动词 没有主语",
        3: "动词+名词",
        4: "层数=2"
    }
    #%%
    test = "你相信世界上有鬼吗"
    r = base_structure(test, analyzer).print_dep_tree(print_out=True)
    label = check_all_rules(test, analyzer, rule_map)
    print(label, rule2name[label])

    #%%
    overall_results = []
    for t in test_data:
        try:
            msg = base_structure(t, analyzer).print_dep_tree(print_out=False)
            msg = '\n'.join(msg)
            label = check_all_rules(t, analyzer, rule_map)
            name = rule2name[label]
            overall_results.append((t, msg, label, name))
        except:
            print('Something Went Wrong')
            raise Exception(t)
Beispiel #6
0
                print(self.remove_init_stop_words(input_sentance))
            return out_sent
        else:
            return input_sentance


#    def check_filter_rules(self,input_sentance):
#        stop_words = self.init_stop_words

#%%
if __name__ == "__main__":
    processor = Processor(init_stop_words_path='init_stop_words.txt')
    analyzer = han_analyzer()

    ## test sentence
    test = "你觉得旅游必去的地方有哪些?"
    ## check if it is start with stop words
    #check = processor._check_candidate(test)
    check = processor.check_all_rules(test, analyzer)
    print(check)
    print(processor.rule2name[check])
    if check > 0:
        res = base_structure(test, analyzer)
        res.print_dep_tree()
        ## check if rule satisfied
        print(processor.remove_init_stop_words(test))

    # easy one function to process everything
    res = processor.check_and_remove_ini(test, analyzer, verbose=False)
    print(res)