Exemple #1
0
    def fetch(self, fingerprints):
        to_fetch = [f for f in fingerprints if f not in self._cache]
        self.logger.debug("cache size %s", len(self._cache))
        self.logger.debug("to fetch %d from %d", len(to_fetch), len(fingerprints))

        for chunk in chunks(to_fetch, 128):
            for state in self.session.query(self.model).filter(self.model.fingerprint.in_(chunk)):
                self._cache[str(state.fingerprint)] = state.state
Exemple #2
0
    def fetch(self, fingerprints):
        to_fetch = [f for f in fingerprints if f not in self._cache]
        self.logger.debug("cache size %s", len(self._cache))
        self.logger.debug("to fetch %d from %d", (len(to_fetch), len(fingerprints)))

        for chunk in chunks(to_fetch, 128):
            for state in self.model.objects.filter(crawl=self.crawl_id, fingerprint__in=chunk):
                self._cache[state.fingerprint] = state.state
Exemple #3
0
 def fetch(self, fingerprints):
     to_fetch = [f for f in fingerprints if f not in self._state_cache]
     self.logger.debug("cache size %s" % len(self._state_cache))
     self.logger.debug("to fetch %d from %d" % (len(to_fetch), len(fingerprints)))
     for chunk in chunks(to_fetch, 65536):
         keys = [unhexlify(fprint) for fprint in chunk]
         table = self.connection.table(self._table_name)
         records = table.rows(keys, columns=[b's:state'])
         for key, cells in records:
             if b's:state' in cells:
                 state = unpack('>B', cells[b's:state'])[0]
                 self._state_cache[hexlify(key)] = state
Exemple #4
0
 def flush(self, force_clear):
     if len(self._state_cache) > self._cache_size_limit:
         force_clear = True
     table = self.connection.table(self._table_name)
     for chunk in chunks(list(self._state_cache.items()), 32768):
         with table.batch(transaction=True) as b:
             for fprint, state in chunk:
                 hb_obj = prepare_hbase_object(state=state)
                 b.put(unhexlify(fprint), hb_obj)
     if force_clear:
         self.logger.debug("Cache has %d requests, clearing" % len(self._state_cache))
         self._state_cache.clear()
Exemple #5
0
 def fetch(self, fingerprints):
     to_fetch = [f for f in fingerprints if f not in self._state_cache]
     self.logger.debug("cache size %s" % len(self._state_cache))
     self.logger.debug("to fetch %d from %d" % (len(to_fetch), len(fingerprints)))
     for chunk in chunks(to_fetch, 65536):
         keys = [unhexlify(fprint) for fprint in chunk]
         table = self.connection.table(self._table_name)
         records = table.rows(keys, columns=[b's:state'])
         for key, cells in records:
             if b's:state' in cells:
                 state = unpack('>B', cells[b's:state'])[0]
                 self._state_cache[hexlify(key)] = state
Exemple #6
0
 def flush(self, force_clear):
     if len(self._state_cache) > self._cache_size_limit:
         force_clear = True
     table = self.connection.table(self._table_name)
     for chunk in chunks(list(self._state_cache.items()), 32768):
         with table.batch(transaction=True) as b:
             for fprint, state in chunk:
                 hb_obj = prepare_hbase_object(state=state)
                 b.put(unhexlify(fprint), hb_obj)
     if force_clear:
         self.logger.debug("Cache has %d requests, clearing" % len(self._state_cache))
         self._state_cache.clear()
Exemple #7
0
 def fetch(self, fingerprints):
     to_fetch = [f for f in fingerprints if f not in self._state_cache]
     if not to_fetch:
         return
     self.logger.debug('Fetching %d/%d elements from HBase (cache size %d)',
                       len(to_fetch), len(fingerprints),
                       len(self._state_cache))
     for chunk in chunks(to_fetch, 65536):
         keys = [unhexlify(fprint) for fprint in chunk]
         table = self.connection.table(self._table_name)
         records = table.rows(keys, columns=[b's:state'])
         for key, cells in records:
             if b's:state' in cells:
                 state = unpack('>B', cells[b's:state'])[0]
                 self._state_cache[hexlify(key)] = state
Exemple #8
0
 def test_non_multiple_length(self):
     assert list(chunks([1, 2, 3, 4, 5, 6, 7, 8], 3)) == [[1, 2, 3],
                                                          [4, 5, 6], [7, 8]]
Exemple #9
0
 def test_multiple_length(self):
     assert list(chunks([1, 2, 3, 4, 5, 6], 2)) == [[1, 2], [3, 4], [5, 6]]
Exemple #10
0
 def test_empty_list(self):
     assert list(chunks([], 1)) == []
 def test_non_multiple_length(self):
     assert list(chunks([1, 2, 3, 4, 5, 6, 7, 8], 3)) == [[1, 2, 3], [4, 5, 6], [7, 8]]
 def test_multiple_length(self):
     assert list(chunks([1, 2, 3, 4, 5, 6], 2)) == [[1, 2], [3, 4], [5, 6]]
 def test_empty_list(self):
     assert list(chunks([], 1)) == []