예제 #1
0
 def __init__(self, path, debug):
     RFPDupeFilter.__init__(self, path, debug)
     self.db = dbconnection.getConnection()
     self.cur = self.db.cursor()
     self.task = task
     self.urls = self.loadFromDB()
     self.filter = getPersistFilter(self.task)
예제 #2
0
 def __init__(self, path, debug):
     RFPDupeFilter.__init__(self, path, debug)
     self.db = dbconnection.getConnection()
     self.cur = self.db.cursor()
     self.task = task
     self.urls = self.loadFromDB()
     self.filter = getPersistFilter(self.task)
예제 #3
0
    def __init__(self, path=None, debug=False):
        logging.info("init redis bloomFilter")
        self.key = "url"
        self.redis_client = redis.Redis(host='127.0.0.1', port=6379)
        error_rate = 0.001
        initial_size = 1000
        try:
            # bf.reserve,提供了三个参数, key, error_rate和initial_size。错误率越低,需要的空间越大,initial_size
            # 参数表示预计放入布隆过滤器的元素数量,当实际数量超出这个数值时,误判率会上升。 默认的参数是 error_rate=0.01, initial_size=100。
            self.redis_client.execute_command("bf.reserve", self.key,
                                              error_rate, initial_size)
        except ResponseError as e:
            logging.info(e)

        RFPDupeFilter.__init__(self, path)
예제 #4
0
 def __init__(self, path=None):
     self.urls_sbf = ScalableBloomFilter(mode=ScalableBloomFilter.SMALL_SET_GROWTH)
     RFPDupeFilter.__init__(self, path)
예제 #5
0
 def __init__(self, path=None):
     RFPDupeFilter.__init__(self, path)
예제 #6
0
파일: filter.py 프로젝트: topclay/LuLunZi
 def __init__(self, path=None, debug=False):
   self.urls_seen = set()
   RFPDupeFilter.__init__(self, path)
예제 #7
0
파일: MyDupeFilter.py 프로젝트: jiady/htdb
 def __init__(self, path=None, debug=False):
     RFPDupeFilter.__init__(self, path=None, debug=False)
     self.rclient = redis.StrictRedis(host="localhost", port=6379, db=0)
예제 #8
0
 def __init__(self, path=None, debug=False):
     self.urls_seen = set()
     RFPDupeFilter.__init__(self, path)
예제 #9
0
 def __init__(self, path=None, other=None):
     inmem = [it['url'] for it in MongoClient(settings['DBINFO']).nbbs.dsl.find({'out': 1})]
     self.already_seen = set(inmem)
     RFPDupeFilter.__init__(self, path, other)
 def __init__(self, path=None, debug=False):
     RFPDupeFilter.__init__(self, path)
     self.dupefilter = UrlFilterAndAdd()
예제 #11
0
 def __init__(self, path=None, debug=True):
     self.redis_client = RedisHelper.get_instance()
     RFPDupeFilter.__init__(self, path, debug)
예제 #12
0
 def __init__(self, path=None):
     self.url_seen = set()
     RFPDupeFilter.__init__(self, path)
예제 #13
0
파일: filter.py 프로젝트: v1cker/findSQL
 def __init__(self, path=None, debug=None):
     RFPDupeFilter.__init__(self, path, debug)
     self.fingerprints = {}
     print "[***]  filter running!"
예제 #14
0
 def __init__(self, path=None, debug=False):
     RFPDupeFilter.__init__(self, path=path, debug=debug)