def raw_to_log(file_path, r_way, protocol): datas = read_datas(file_path, r_way) datas = get_puredatas(datas) raw_datas = [] converter = Converter() logger_raw = get_logger(log_path + '/' + protocol, 'raw_message_logger') i = 0 for data in datas: logger_raw.error(str(i) + ':' + converter.convert_raw_to_text(data))
def raw_to_redis(file_path, r_way): datas = read_datas(file_path, r_way) datas = get_puredatas(datas) raw_datas = [] converter = Converter() for data in datas: raw_datas.append(converter.convert_raw_to_text(data)) key = file_path phrase_redis = redis_deal() phrase_redis.insert_to_redis(key, raw_datas)
def getDelimiter(datas): convert = Converter() messages = [convert.convert_raw_to_text(data) for data in datas] t_results = [] for message in messages: t_results.extend(get_ngram_words([message], (1, 2), 10)) words = analyzer.get_topk(t_results)[0:10] deliWords = filterWords(words) wordsList = [chr(int(word)) for word in deliWords.split(' ')] deliW = ''.join(wordsList) return deliW
class FieldHunter: def __init__(self): self.analyer = base_analyzer() self.convert = Converter() self.ranker = ranker() def itemJudge(self, item): if int(item) >= 48 and int(item) <= 57: return True if int(item) >= 65 and int(item) <= 90: return True if int(item) >= 97 and int(item) <= 122: return True return False def isNumOrAlpha(self, sequence): chars = sequence.split(' ') isNumAlpha = False for item in chars: if self.itemJudge(item): isNumAlpha = True break return isNumAlpha def findDelimiter(self, messages): messages = [self.convert.convert_raw_to_text(data) for data in messages] wordsNgram = self.convert.ConvertRawToSimDic(messages, (1, 2)) wordsNgram = self.ranker.rank_dic(wordsNgram, reverse=True) #print(wordsNgram) delimiter = None for word in wordsNgram: if not self.isNumOrAlpha(word[0]): delimiter = word break candidates = [] for word in wordsNgram: if not self.isNumOrAlpha(word[0]): candidates.append(word[0]) print(candidates[0:100]) print('cccd') return candidates[0:100]
def single_message_voter(self, messages, h, voters="both", diff_measure="abs", v_way="normal", T=0, r=0): h = ve_parameter['height'] voters = ve_parameter['voters'] diff_measure = ve_parameter['diff_measure'] v_way = ve_parameter['decision_type'] T = ve_parameter['Threshold_T'] r = ve_parameter['Threshod_R'] redis_raw_word_keys = redis_prefix + 'correct_raw_words' if redis_writer.is_exist_key(redis_raw_word_keys): t_dics = redis_writer.read_from_redis(redis_raw_word_keys) else: t_dics = self.get_keywords(messages, h + 1) redis_writer.insert_to_redis(redis_prefix + 'correct_raw_words', t_dics) redis_normal_word_key = redis_prefix + 'normal_correct_words' if redis_writer.is_exist_key(redis_normal_word_key): t_fres = redis_writer.read_from_redis(redis_normal_word_key) else: t_fres = self.get_frequent(t_dics, h + 1) t_fres["300"] = 0 redis_writer.insert_to_redis(redis_prefix + 'normal_correct_words', t_fres) self.words_fre = t_fres t_entrys = self.get_backentry(t_dics, h + 1) self.words_entry = t_entrys self.words_table = t_dics f_boundaries = [] voters = ve_parameter['voters'] raw_conv = Converter() for i in range(len(messages)): t_fre_r, t_entry_r = self.vote_sequence(messages[i], h, t_fres, t_entrys) #t_fre_r = self.filter_los(t_fre_r, int(len(messages[i]) - h)) # change #t_entry_r = self.filter_los(t_entry_r, int(len(messages[i]) - h)) # change if (voters == 'both'): t_fre_votes = self.get_gvotes([t_fre_r, t_entry_r]) #voter_logger.error('raw: ' + str(t_fre_votes)) t_candidate_loc = self.vote_for_single_message( t_fre_votes, diff_measure, v_way, T, r) #voter_logger.error("voted: " + str(i) + " " + str(t_candidate_loc)) f_boundaries.append(t_candidate_loc) elif voters == 'frequent_voter': voter_logger.error( 'raw: ' + str(raw_conv.convert_raw_to_text(messages[i]))) voter_logger.error('raw + frequent: ' + str(t_fre_r)) t_candidate_loc = self.vote_for_single_message( t_fre_r, diff_measure, v_way, T, r) voter_logger.error("voted: " + str(i) + " " + str(t_candidate_loc)) f_boundaries.append(t_candidate_loc) else: #voter_logger.error('raw + entry: ' + str(t_fre_r)) t_candidate_loc = self.vote_for_single_message( t_entry_r, diff_measure, v_way, T, r) #voter_logger.error("voted: " + str(i) + " " + str(t_candidate_loc)) f_boundaries.append(t_candidate_loc) return f_boundaries