예제 #1
0
def raw_to_log(file_path, r_way, protocol):
    datas = read_datas(file_path, r_way)
    datas = get_puredatas(datas)
    raw_datas = []
    converter = Converter()
    logger_raw = get_logger(log_path + '/' + protocol, 'raw_message_logger')
    i = 0
    for data in datas:
        logger_raw.error(str(i) + ':' + converter.convert_raw_to_text(data))
예제 #2
0
def raw_to_redis(file_path, r_way):
    datas = read_datas(file_path, r_way)
    datas = get_puredatas(datas)
    raw_datas = []
    converter = Converter()
    for data in datas:
        raw_datas.append(converter.convert_raw_to_text(data))
    key = file_path
    phrase_redis = redis_deal()
    phrase_redis.insert_to_redis(key, raw_datas)
def getDelimiter(datas):
    convert = Converter()
    messages = [convert.convert_raw_to_text(data) for data in datas]
    t_results = []
    for message in messages:
        t_results.extend(get_ngram_words([message], (1, 2), 10))
    words = analyzer.get_topk(t_results)[0:10]
    deliWords = filterWords(words)
    wordsList = [chr(int(word)) for word in deliWords.split(' ')]
    deliW = ''.join(wordsList)
    return deliW
예제 #4
0
class FieldHunter:
    def __init__(self):
        self.analyer = base_analyzer()
        self.convert = Converter()
        self.ranker = ranker()

    def itemJudge(self, item):
        if int(item) >= 48 and int(item) <= 57:
            return True
        if int(item) >= 65 and int(item) <= 90:
            return True
        if int(item) >= 97 and int(item) <= 122:
            return True
        return False

    def isNumOrAlpha(self, sequence):
        chars = sequence.split(' ')
        isNumAlpha = False
        for item in chars:
            if self.itemJudge(item):
                isNumAlpha = True
                break
        return isNumAlpha

    def findDelimiter(self, messages):
        messages = [self.convert.convert_raw_to_text(data) for data in messages]
        wordsNgram = self.convert.ConvertRawToSimDic(messages, (1, 2))
        wordsNgram = self.ranker.rank_dic(wordsNgram, reverse=True)
        #print(wordsNgram)
        delimiter = None
        for word in wordsNgram:
            if not self.isNumOrAlpha(word[0]):
                delimiter = word
                break
        candidates = []
        for word in wordsNgram:
            if not self.isNumOrAlpha(word[0]):
                candidates.append(word[0])
        print(candidates[0:100])
        print('cccd')
        return candidates[0:100]
예제 #5
0
 def single_message_voter(self,
                          messages,
                          h,
                          voters="both",
                          diff_measure="abs",
                          v_way="normal",
                          T=0,
                          r=0):
     h = ve_parameter['height']
     voters = ve_parameter['voters']
     diff_measure = ve_parameter['diff_measure']
     v_way = ve_parameter['decision_type']
     T = ve_parameter['Threshold_T']
     r = ve_parameter['Threshod_R']
     redis_raw_word_keys = redis_prefix + 'correct_raw_words'
     if redis_writer.is_exist_key(redis_raw_word_keys):
         t_dics = redis_writer.read_from_redis(redis_raw_word_keys)
     else:
         t_dics = self.get_keywords(messages, h + 1)
         redis_writer.insert_to_redis(redis_prefix + 'correct_raw_words',
                                      t_dics)
     redis_normal_word_key = redis_prefix + 'normal_correct_words'
     if redis_writer.is_exist_key(redis_normal_word_key):
         t_fres = redis_writer.read_from_redis(redis_normal_word_key)
     else:
         t_fres = self.get_frequent(t_dics, h + 1)
         t_fres["300"] = 0
         redis_writer.insert_to_redis(redis_prefix + 'normal_correct_words',
                                      t_fres)
     self.words_fre = t_fres
     t_entrys = self.get_backentry(t_dics, h + 1)
     self.words_entry = t_entrys
     self.words_table = t_dics
     f_boundaries = []
     voters = ve_parameter['voters']
     raw_conv = Converter()
     for i in range(len(messages)):
         t_fre_r, t_entry_r = self.vote_sequence(messages[i], h, t_fres,
                                                 t_entrys)
         #t_fre_r = self.filter_los(t_fre_r, int(len(messages[i]) - h)) # change
         #t_entry_r = self.filter_los(t_entry_r, int(len(messages[i]) - h)) # change
         if (voters == 'both'):
             t_fre_votes = self.get_gvotes([t_fre_r, t_entry_r])
             #voter_logger.error('raw: ' + str(t_fre_votes))
             t_candidate_loc = self.vote_for_single_message(
                 t_fre_votes, diff_measure, v_way, T, r)
             #voter_logger.error("voted: " + str(i) + " " + str(t_candidate_loc))
             f_boundaries.append(t_candidate_loc)
         elif voters == 'frequent_voter':
             voter_logger.error(
                 'raw: ' + str(raw_conv.convert_raw_to_text(messages[i])))
             voter_logger.error('raw + frequent: ' + str(t_fre_r))
             t_candidate_loc = self.vote_for_single_message(
                 t_fre_r, diff_measure, v_way, T, r)
             voter_logger.error("voted: " + str(i) + " " +
                                str(t_candidate_loc))
             f_boundaries.append(t_candidate_loc)
         else:
             #voter_logger.error('raw + entry: ' + str(t_fre_r))
             t_candidate_loc = self.vote_for_single_message(
                 t_entry_r, diff_measure, v_way, T, r)
             #voter_logger.error("voted: " + str(i) + " " + str(t_candidate_loc))
             f_boundaries.append(t_candidate_loc)
     return f_boundaries