def __init__(self, idf_path, stop_words_path="", r_hd=0): self.tf_idf_hd = tf_idf(idf_path, stop_words_path) self.repeat = 0 self.not_repeat = 1 if not r_hd: self.r_hd = redis.Redis() self.word_key_pre = "pub_word:" self.title_id_pre = "pub_title_id:" self.time_limit = 259200 self.pub_title_id_key = "incr:pub_title_id" self.r_hd.flushdb()
def __init__(self, idf_path, stop_words_path = "", r_hd = 0): self.tf_idf_hd = tf_idf(idf_path, stop_words_path) self.repeat = 0 self.not_repeat = 1 if not r_hd: self.r_hd = redis.Redis() self.word_key_pre = "pub_word:" self.title_id_pre = "pub_title_id:" self.time_limit = 259200 self.pub_title_id_key = "incr:pub_title_id" self.r_hd.flushdb()
def __init__(self, idf_path, stop_words_path="", uid_overtime_path=""): self.uid_overtime_dic = self.gen_uid_overtime_dic(uid_overtime_path) self.tf_idf_hd = tf_idf(idf_path, stop_words_path) self.repeat = 0 self.not_repeat = 1 #如果title中topN小于此值则将结果判断相似度 self.sim_judge_limit = 3 self.r_hd = redis.Redis() self.word_key_pre = "main_word:" self.title_id_pre = "main_title_id:" self.uid_pre = "main_tid_uid:" self.time_stamp_pre = "main_time_stamp:" self.time_limit = 259200 self.uid_overtime_default = self.time_limit self.main_title_id_key = "incr:main_title_id" self.r_hd.flushdb()
def __init__(self, idf_path, stop_words_path = "", uid_overtime_path = ""): self.uid_overtime_dic = self.gen_uid_overtime_dic(uid_overtime_path) self.tf_idf_hd = tf_idf(idf_path, stop_words_path) self.repeat = 0 self.not_repeat = 1 #如果title中topN小于此值则将结果判断相似度 self.sim_judge_limit = 3 self.r_hd = redis.Redis() self.word_key_pre = "main_word:" self.title_id_pre = "main_title_id:" self.uid_pre = "main_tid_uid:" self.time_stamp_pre = "main_time_stamp:" self.time_limit = 259200 self.uid_overtime_default = self.time_limit self.main_title_id_key = "incr:main_title_id" self.r_hd.flushdb()
ret = flter.filter(s, id_set) print ret time.sleep(2) #raw_input(">>") s = "a, b, c, d, f" id_set = set([2, 3, 4, 5]) ret = flter.filter(s, id_set) print ret cov.stop() #分析结束 cov.save() #将覆盖率结果保存到数据文件 if 0: tf_idf_hd = tf_idf("idf.txt", "stopwords.txt") s = "a, b, c, d, e, f, 1, 2, 3" print tf_idf_hd.get_top_n_tf_idf(s) if 0: main_flter = main_repeat_filter("idf.txt", "stopwords.txt") main_flter.insert_s_to_redis('abc', ['main_word:a', 'main_word:b', 'main_word:c'], set([10, 20, 30])) if 0: pub_flter = pub_repeat_filter("idf.txt", "stopwords.txt") title = "广西一官员获刑十年未坐一天牢 法院称系监外执行"
ret = flter.filter(s, id_set) print ret time.sleep(2) #raw_input(">>") s = "a, b, c, d, f" id_set = set([2, 3, 4, 5]) ret = flter.filter(s, id_set) print ret cov.stop() #分析结束 cov.save() #将覆盖率结果保存到数据文件 if 0: tf_idf_hd = tf_idf("idf.txt", "stopwords.txt") s = "a, b, c, d, e, f, 1, 2, 3" print tf_idf_hd.get_top_n_tf_idf(s) if 0: main_flter = main_repeat_filter("idf.txt", "stopwords.txt") main_flter.insert_s_to_redis('abc', ['main_word:a', 'main_word:b', 'main_word:c'], set([10, 20, 30])) if 0: pub_flter = pub_repeat_filter("idf.txt", "stopwords.txt") title = "广西一官员获刑十年未坐一天牢 法院称系监外执行" ret = pub_flter.filter(title)