Beispiel #1
0
    def __init__(self, idf_path, stop_words_path="", r_hd=0):
        self.tf_idf_hd = tf_idf(idf_path, stop_words_path)
        self.repeat = 0
        self.not_repeat = 1
        if not r_hd:
            self.r_hd = redis.Redis()
        self.word_key_pre = "pub_word:"
        self.title_id_pre = "pub_title_id:"
        self.time_limit = 259200
        self.pub_title_id_key = "incr:pub_title_id"

        self.r_hd.flushdb()
Beispiel #2
0
    def __init__(self, idf_path, stop_words_path = "", r_hd = 0):
        self.tf_idf_hd = tf_idf(idf_path, stop_words_path)
        self.repeat = 0
        self.not_repeat = 1
        if not r_hd:
            self.r_hd = redis.Redis()
        self.word_key_pre = "pub_word:"
        self.title_id_pre = "pub_title_id:"
        self.time_limit = 259200
        self.pub_title_id_key = "incr:pub_title_id"

        self.r_hd.flushdb()
Beispiel #3
0
    def __init__(self, idf_path, stop_words_path="", uid_overtime_path=""):
        self.uid_overtime_dic = self.gen_uid_overtime_dic(uid_overtime_path)
        self.tf_idf_hd = tf_idf(idf_path, stop_words_path)
        self.repeat = 0
        self.not_repeat = 1
        #如果title中topN小于此值则将结果判断相似度
        self.sim_judge_limit = 3
        self.r_hd = redis.Redis()
        self.word_key_pre = "main_word:"
        self.title_id_pre = "main_title_id:"
        self.uid_pre = "main_tid_uid:"
        self.time_stamp_pre = "main_time_stamp:"
        self.time_limit = 259200
        self.uid_overtime_default = self.time_limit
        self.main_title_id_key = "incr:main_title_id"

        self.r_hd.flushdb()
Beispiel #4
0
 def __init__(self, idf_path, stop_words_path = "", uid_overtime_path = ""):
     self.uid_overtime_dic = self.gen_uid_overtime_dic(uid_overtime_path)
     self.tf_idf_hd = tf_idf(idf_path, stop_words_path)
     self.repeat = 0
     self.not_repeat = 1
     #如果title中topN小于此值则将结果判断相似度
     self.sim_judge_limit = 3
     self.r_hd = redis.Redis()
     self.word_key_pre = "main_word:"
     self.title_id_pre = "main_title_id:"
     self.uid_pre = "main_tid_uid:"
     self.time_stamp_pre = "main_time_stamp:"
     self.time_limit = 259200
     self.uid_overtime_default = self.time_limit 
     self.main_title_id_key = "incr:main_title_id"
  
     self.r_hd.flushdb()
Beispiel #5
0
    ret = flter.filter(s, id_set)

    print ret
    time.sleep(2)
    #raw_input(">>")

    s = "a, b, c, d, f"
    id_set = set([2, 3, 4, 5])

    ret = flter.filter(s, id_set)
    print ret
    cov.stop()  #分析结束
    cov.save()  #将覆盖率结果保存到数据文件

if 0:
    tf_idf_hd = tf_idf("idf.txt", "stopwords.txt")

    s = "a, b, c, d, e, f, 1, 2, 3"
    print tf_idf_hd.get_top_n_tf_idf(s)

if 0:
    main_flter = main_repeat_filter("idf.txt", "stopwords.txt")
    main_flter.insert_s_to_redis('abc',
                                 ['main_word:a', 'main_word:b', 'main_word:c'],
                                 set([10, 20, 30]))

if 0:
    pub_flter = pub_repeat_filter("idf.txt", "stopwords.txt")

    title = "广西一官员获刑十年未坐一天牢 法院称系监外执行"
Beispiel #6
0
    ret = flter.filter(s, id_set)

    print ret
    time.sleep(2)
    #raw_input(">>")

    s = "a, b, c, d, f"
    id_set = set([2, 3, 4, 5])

    ret = flter.filter(s, id_set)
    print ret
    cov.stop()            #分析结束
    cov.save()            #将覆盖率结果保存到数据文件

if 0:
    tf_idf_hd = tf_idf("idf.txt", "stopwords.txt")
    
    s = "a, b, c, d, e, f, 1, 2, 3"
    print tf_idf_hd.get_top_n_tf_idf(s)

if 0:
    main_flter = main_repeat_filter("idf.txt", "stopwords.txt")
    main_flter.insert_s_to_redis('abc', ['main_word:a', 'main_word:b', 'main_word:c'], set([10, 20, 30]))

if 0:
    pub_flter = pub_repeat_filter("idf.txt", "stopwords.txt")

    title = "广西一官员获刑十年未坐一天牢 法院称系监外执行"
    
    ret = pub_flter.filter(title)