Beispiel #1
0
 def ConvertRawWordsToOrder(self, rawwords, nrange, ordertype="abs"):
     Analyzer = base_analyzer()
     WordRanker = ranker()
     Converter = word_convert()
     num_words = Converter.splitwords_bylen(rawwords, nrange)
     for len_word in num_words:
         num_words[len_word] = WordRanker.rank_words(num_words[len_word],
                                                     reverse=True)
         #num_words[len_word] = WordRanker.rank_tulple(num_words[len_word], reverse=True)
     PrimeWords = [word[0] for word in num_words[nrange]]
     PrimeOrders = {}
     for i in range(len(PrimeWords)):
         PrimeOrders[PrimeWords[i]] = i
     OrderWords = {}
     OrderWords[nrange] = PrimeOrders
     start_time = time.time()
     for i in range(1, nrange):
         if ordertype == 'abs':
             OrderWords[i] = self.ConvertWordToNumOrder(
                 [word[0] for word in num_words[i]], PrimeWords, rawwords)
         elif ordertype == 'Set':
             OrderWords[i] = self.ConvertwordToCntOrder(
                 num_words[i], num_words[nrange])
         else:
             OrderWords[i] = Converter.convert_word_order(
                 [word[0] for word in num_words[i]], PrimeWords)
     OrderWords = self.convert_order_to_raw(OrderWords)
     return OrderWords
 def infer_words_by_ve(self, data_path, r_way, h, combine, model, v_way, T,
                       r):
     if type(data_path) == 'str':
         datas = read_datas(data_path, r_way)
     else:
         datas = read_multity_dirs(data_path, r_way)
     datas = get_puredatas(datas)
     ABtest_now = ABtest()
     datas = datas[0:int(len(datas) * ABtest_now.ratio)]
     #messages = add_tail(datas, h) # change
     messages = datas
     message_splitter = splitter()
     message_split = message_splitter.split_by_ve(messages, h, combine,
                                                  model, v_way, T, r)
     t_now = time.strftime("%Y-%m-%d %H:%m:%s", time.localtime(time.time()))
     m_logger = get_logger(log_path + '/messge_splited_log' + t_now,
                           'msg_split')
     for message in message_split:
         m_logger.error(message)
     T_word_convert = word_convert()
     words_prim = T_word_convert.convert_words_byloc(message_split)
     p_logger = get_logger(log_path + '/p_log' + t_now, 'word_count')
     for key in words_prim:
         p_logger.error(key + str(words_prim[key].content))
     words_count = T_word_convert.get_words_count(words_prim)
     t_ranker = ranker()
     words_rank = t_ranker.rank_dic(words_count, True)
     return words_rank
 def GetCorresOrder(self, WordChilds, WordVoc):
     WordDic = {}
     for Child in WordChilds:
         WordDic[Child] = WordVoc[Child]
     OrderWords = ranker().rank_dic(WordDic)
     TotalNum = sum([item[1] for item in OrderWords])
     Lo = 0
     TempSum = 0
     while (TempSum < TotalNum / 2):
         TempSum = TempSum + OrderWords[Lo][1]
         Lo = Lo + 1
     return Lo
 def __init__(self):
     super().__init__
     self.rank = ranker()
     self.analysist = base_analyzer()
Beispiel #5
0
 def __init__(self):
     self.analyer = base_analyzer()
     self.convert = Converter()
     self.ranker = ranker()