Example #1
0
def gen_zhengzhou_tree(dirname=myconfig.ZZ_STD_ADD,
                       sav_file=myconfig.zhengzhou_std_word,
                       sav_file_2=myconfig.zhengzhou_std_tree):
    addr_kv_rec = open("./addr_match.txt", 'w+')
    print('\n>gen_zhengzhou_tree start')
    #pdb.set_trace()
    my_tree = trie_tree.Trie()
    my_word = trie_tree.Trie()
    paths = os.walk(dirname)
    sum_lines = []
    cnt = 0
    for _, _, fs in paths:
        for f in fs:
            pth = os.path.join(dirname, str(f))
            lines = open(pth, 'r').readlines()
            np.random.shuffle(lines)
            #lines = open(pth,'r').readlines()[:myconfig.TRAIN_DATA]
            for line in lines:
                if not ',' in line:
                    continue
                _line = line.split(',')[1]
                line = utils.pre_trans(_line)
                addr_kv_rec.write('%s\t%s\n' % (str(line), str(_line)))
                cnt += 1
                if cnt % 10000 == 1:
                    print(cnt)
                my_tree.insert(line)
                my_word.insert(_line)
    utils.save_var(my_word, sav_file)
    utils.save_var(my_tree, sav_file_2)
    print('\n>my address tree save ok')
    addr_kv_rec.close()
Example #2
0
def gen_address_tree(filename=myconfig.STDTXTPATH, sav_file=myconfig.MY_TREE):
    print('\n>gen_address_tree start')
    my_tree = trie_tree.Trie()
    df = open(filename, 'r')
    lines = df.readlines()  #pd.read_csv(filename)
    for sent in lines:
        my_tree.insert(sent)
    utils.save_var(my_tree, sav_file)
    print('\n>my address tree save ok')
    return my_tree
Example #3
0
 def search(self,words_lst):
     res = {}
     res['ROOT'] = {}
     words_lst = words_lst.split(" ")
     if "" in words_lst:
         words_lst.remove("")
     words_dct = {}
     print(words_lst)
     for word in words_lst:
         key, value = word.split("/") 
         if value in words_dct:
             words_dct[value]+="&"
             words_dct[value]+=key
         else:
             words_dct[value]=key
     words = []
     for key in myconfig.COLUMNS:
         value = words_dct.get(key,'nan')
         if value == 'nan':
             continue
         words.append("%s/%s"%(value,key))
     tree = trie_tree.Trie()
     tree.part_insert(tree.root,words)
     result = []
     tree.scan_child_word(tree.root,result)
     _result = []
     for node in result:
         if node.is_word:
             _result.append(node)
     parents = tree.get_all_parent_tree(_result)
     result = []
     for words in parents:
         words = words.split(' ')
         result.append(self.search_one(words))
     print(result)
     result = list(set(result))
     formula_result = []
     final_result = []
     for word in result:
         word = word.split(' ')
         if '' in word:
             word.remove('')
         formula_result = self.full_my_tree.scan_nodes([tree.root], word, formula_result)
         print(formula_result)
     result_child = []
     for node in formula_result:
         self.full_my_tree.scan_child_word(node,result_child)
         #pdb.set_trace()
     #print(result_child)
     #pdb.set_trace()
     final_result.extend(self.full_my_tree.get_all_parent_tree(result_child))
     print(final_result)
     #pdb.set_trace()
     return ",".join(final_result)
Example #4
0
def gen_std_tree_from_dataframe(data_src, sav_file=myconfig.MY_TREE):
    # 从dataframe创建标准地址树
    print('\n>gen_std_tree_from_dataframe start')
    my_tree = trie_tree.Trie()
    for item in data_src:
        clritem = remove_nan(item)
        print(clritem)
        pdb.set_trace()
        my_tree.part_insert(my_tree.root, clritem)
    utils.save_var(my_tree, sav_file)
    print('\n>gen_std_tree_from_dataframe ready and save finish')
    return myconfig.SUCCESS
Example #5
0
def gen_word_tree(filename=myconfig.STDTXTPATH, sav_file=myconfig.MY_WORD):
    print('\n>gen_address_tree start')
    my_tree = trie_tree.Trie()
    df = open(filename, 'r')
    lines = df.readlines()  #pd.read_csv(filename)
    print(len(lines))
    for sent in lines:
        words = sent.split('/')
        for word in words:
            my_tree.insert(word)
    utils.save_var(my_tree, sav_file)
    print('\n>my address tree save ok')
    return my_tree
Example #6
0
def gen_std_tree(filename=myconfig.STDTXTPATH,
                 sav_file=myconfig.MY_TREE,
                 delimeter='/'):
    print('\n>gen_std_tree start')
    my_tree = trie_tree.Trie()
    df = open(filename, 'r')
    lines = df.readlines()  #pd.read_csv(filename)
    for sent in lines:
        words = sent.split(delimeter)
        my_tree.insert(words)
    utils.save_var(my_tree, sav_file)
    print('\n>my std tree save ok')
    return my_tree