Esempio n. 1
0
 def __init__(self, path, debug):
     RFPDupeFilter.__init__(self, path, debug)
     self.db = dbconnection.getConnection()
     self.cur = self.db.cursor()
     self.task = task
     self.urls = self.loadFromDB()
     self.filter = getPersistFilter(self.task)
Esempio n. 2
0
 def __init__(self, path, debug):
     RFPDupeFilter.__init__(self, path, debug)
     self.db = dbconnection.getConnection()
     self.cur = self.db.cursor()
     self.task = task
     self.urls = self.loadFromDB()
     self.filter = getPersistFilter(self.task)
Esempio n. 3
0
    def __init__(self, path=None, debug=False):
        logging.info("init redis bloomFilter")
        self.key = "url"
        self.redis_client = redis.Redis(host='127.0.0.1', port=6379)
        error_rate = 0.001
        initial_size = 1000
        try:
            # bf.reserve,提供了三个参数, key, error_rate和initial_size。错误率越低,需要的空间越大,initial_size
            # 参数表示预计放入布隆过滤器的元素数量,当实际数量超出这个数值时,误判率会上升。 默认的参数是 error_rate=0.01, initial_size=100。
            self.redis_client.execute_command("bf.reserve", self.key,
                                              error_rate, initial_size)
        except ResponseError as e:
            logging.info(e)

        RFPDupeFilter.__init__(self, path)
Esempio n. 4
0
 def __init__(self, path=None):
     self.urls_sbf = ScalableBloomFilter(mode=ScalableBloomFilter.SMALL_SET_GROWTH)
     RFPDupeFilter.__init__(self, path)
Esempio n. 5
0
 def __init__(self, path=None):
     RFPDupeFilter.__init__(self, path)
Esempio n. 6
0
 def __init__(self, path=None, debug=False):
   self.urls_seen = set()
   RFPDupeFilter.__init__(self, path)
Esempio n. 7
0
 def __init__(self, path=None, debug=False):
     RFPDupeFilter.__init__(self, path=None, debug=False)
     self.rclient = redis.StrictRedis(host="localhost", port=6379, db=0)
Esempio n. 8
0
 def __init__(self, path=None, debug=False):
     self.urls_seen = set()
     RFPDupeFilter.__init__(self, path)
Esempio n. 9
0
 def __init__(self, path=None, other=None):
     inmem = [it['url'] for it in MongoClient(settings['DBINFO']).nbbs.dsl.find({'out': 1})]
     self.already_seen = set(inmem)
     RFPDupeFilter.__init__(self, path, other)
 def __init__(self, path=None, debug=False):
     RFPDupeFilter.__init__(self, path)
     self.dupefilter = UrlFilterAndAdd()
Esempio n. 11
0
 def __init__(self, path=None, debug=True):
     self.redis_client = RedisHelper.get_instance()
     RFPDupeFilter.__init__(self, path, debug)
Esempio n. 12
0
 def __init__(self, path=None):
     self.url_seen = set()
     RFPDupeFilter.__init__(self, path)
Esempio n. 13
0
 def __init__(self, path=None, debug=None):
     RFPDupeFilter.__init__(self, path, debug)
     self.fingerprints = {}
     print "[***]  filter running!"
Esempio n. 14
0
 def __init__(self, path=None, debug=False):
     RFPDupeFilter.__init__(self, path=path, debug=debug)