def ConvertRawWordsToOrder(self, rawwords, nrange, ordertype="abs"): Analyzer = base_analyzer() WordRanker = ranker() Converter = word_convert() num_words = Converter.splitwords_bylen(rawwords, nrange) for len_word in num_words: num_words[len_word] = WordRanker.rank_words(num_words[len_word], reverse=True) #num_words[len_word] = WordRanker.rank_tulple(num_words[len_word], reverse=True) PrimeWords = [word[0] for word in num_words[nrange]] PrimeOrders = {} for i in range(len(PrimeWords)): PrimeOrders[PrimeWords[i]] = i OrderWords = {} OrderWords[nrange] = PrimeOrders start_time = time.time() for i in range(1, nrange): if ordertype == 'abs': OrderWords[i] = self.ConvertWordToNumOrder( [word[0] for word in num_words[i]], PrimeWords, rawwords) elif ordertype == 'Set': OrderWords[i] = self.ConvertwordToCntOrder( num_words[i], num_words[nrange]) else: OrderWords[i] = Converter.convert_word_order( [word[0] for word in num_words[i]], PrimeWords) OrderWords = self.convert_order_to_raw(OrderWords) return OrderWords
def infer_words_by_ve(self, data_path, r_way, h, combine, model, v_way, T, r): if type(data_path) == 'str': datas = read_datas(data_path, r_way) else: datas = read_multity_dirs(data_path, r_way) datas = get_puredatas(datas) ABtest_now = ABtest() datas = datas[0:int(len(datas) * ABtest_now.ratio)] #messages = add_tail(datas, h) # change messages = datas message_splitter = splitter() message_split = message_splitter.split_by_ve(messages, h, combine, model, v_way, T, r) t_now = time.strftime("%Y-%m-%d %H:%m:%s", time.localtime(time.time())) m_logger = get_logger(log_path + '/messge_splited_log' + t_now, 'msg_split') for message in message_split: m_logger.error(message) T_word_convert = word_convert() words_prim = T_word_convert.convert_words_byloc(message_split) p_logger = get_logger(log_path + '/p_log' + t_now, 'word_count') for key in words_prim: p_logger.error(key + str(words_prim[key].content)) words_count = T_word_convert.get_words_count(words_prim) t_ranker = ranker() words_rank = t_ranker.rank_dic(words_count, True) return words_rank
def GetCorresOrder(self, WordChilds, WordVoc): WordDic = {} for Child in WordChilds: WordDic[Child] = WordVoc[Child] OrderWords = ranker().rank_dic(WordDic) TotalNum = sum([item[1] for item in OrderWords]) Lo = 0 TempSum = 0 while (TempSum < TotalNum / 2): TempSum = TempSum + OrderWords[Lo][1] Lo = Lo + 1 return Lo
def __init__(self): super().__init__ self.rank = ranker() self.analysist = base_analyzer()
def __init__(self): self.analyer = base_analyzer() self.convert = Converter() self.ranker = ranker()