def iter_page_info(self): st = ffi.new('HashInfoStream **') ret = self._c_aduana.hashinfo_stream_new(st, self._page_db[0]) if ret != 0: raise AduanaException.from_error(self._page_db[0].error) page_hash = ffi.new('uint64_t *') pi = ffi.new('PageInfo **') while True: ss = self._c_aduana.hashinfo_stream_next(st[0], page_hash, pi) if ss != self._c_aduana.stream_state_next: break yield PageInfo(page_hash[0], pi[0]) self._c_aduana.hashinfo_stream_delete(st[0])
def iter_page_info(self): st = ffi.new('HashInfoStream **') ret = self._c_aduana.hashinfo_stream_new(st, self._page_db[0]) if ret != 0: raise PageDBException.from_error(self._page_db[0].error) page_hash = ffi.new('uint64_t *') pi = ffi.new('PageInfo **') while True: ss = self._c_aduana.hashinfo_stream_next(st[0], page_hash, pi) if ss != self._c_aduana.stream_state_next: break yield PageInfo(page_hash[0], pi[0]) self._c_aduana.hashinfo_stream_delete(st[0])
def __init__(self, page_db, persist=0, path=None): # save to make sure lib is available at destruction time self._c_aduana = C_ADUANA self._closed = False self._page_db = page_db self._page_db.persist = persist self._sch = ffi.new('FreqScheduler **') self._core = SchedulerCore( self._sch, self._c_aduana.freq_scheduler_add, self._c_aduana.freq_scheduler_request ) ret = self._c_aduana.freq_scheduler_new( self._sch, self._page_db._page_db[0], path or ffi.NULL ) if ret != 0: if self._sch: raise AduanaException.from_error(self._sch[0].error) else: raise AduanaException("Error inside freq_scheduler_new", ret) self._sch[0].persist = persist
def __init__(self, page_db, persist=0, scorer=None, path=None): # save to make sure lib is available at destruction time self._c_aduana = C_ADUANA self._page_db = page_db self._page_db.persist = persist self._sch = ffi.new('BFScheduler **') self._core = SchedulerCore( self._sch, self._c_aduana.bf_scheduler_add, self._c_aduana.bf_scheduler_request ) ret = self._c_aduana.bf_scheduler_new( self._sch, self._page_db._page_db[0], path or ffi.NULL ) if ret != 0: if self._sch: raise PageDBException.from_error(self._sch[0].error) else: raise PageDBException("Error inside bf_scheduler_new", ret) self._c_aduana.bf_scheduler_set_persist(self._sch[0], persist) if scorer: self._scorer = scorer self._scorer.setup(self._sch[0].scorer) ret = self._c_aduana.bf_scheduler_update_start(self._sch[0]) if ret != 0: raise PageDBException.from_error(self._sch[0].error)
def __init__(self, page_db, persist=0, scorer=None, path=None): # save to make sure lib is available at destruction time self._c_aduana = C_ADUANA self._closed = False self._page_db = page_db self._page_db.persist = persist self._sch = ffi.new('BFScheduler **') self._core = SchedulerCore( self._sch, self._c_aduana.bf_scheduler_add, self._c_aduana.bf_scheduler_request ) ret = self._c_aduana.bf_scheduler_new( self._sch, self._page_db._page_db[0], path or ffi.NULL ) if ret != 0: if self._sch: raise AduanaException.from_error(self._sch[0].error) else: raise AduanaException("Error inside bf_scheduler_new", ret) self._c_aduana.bf_scheduler_set_persist(self._sch[0], persist) if scorer: self._scorer = scorer self._scorer.setup(self._sch[0].scorer) ret = self._c_aduana.bf_scheduler_update_start(self._sch[0]) if ret != 0: raise AduanaException.from_error(self._sch[0].error)
def __init__(self, page_db, persist=0, path=None): # save to make sure lib is available at destruction time self._c_aduana = C_ADUANA self._page_db = page_db self._page_db.persist = persist self._sch = ffi.new('FreqScheduler **') self._core = SchedulerCore( self._sch, self._c_aduana.freq_scheduler_add, self._c_aduana.freq_scheduler_request ) ret = self._c_aduana.freq_scheduler_new( self._sch, self._page_db._page_db[0], path or ffi.NULL ) if ret != 0: if self._sch: raise PageDBException.from_error(self._sch[0].error) else: raise PageDBException("Error inside freq_scheduler_new", ret) self._sch[0].persist = persist
def page_info(self, page_hash): pi = ffi.new('PageInfo **') ret = self._c_aduana.page_db_get_info( self._page_db[0], ffi.cast('uint64_t', page_hash), pi) if ret != 0: raise AduanaException.from_error(self._page_db[0].error) return PageInfo(page_hash, pi[0])
def page_info(self, page_hash): pi = ffi.new('PageInfo **') ret = self._c_aduana.page_db_get_info( self._page_db[0], ffi.cast('uint64_t', page_hash), pi) if ret != 0: raise PageDBException.from_error(self._page_db[0].error) return PageInfo(page_hash, pi[0])
def requests(self, n_pages): pReq = ffi.new('PageRequest **') ret = self._scheduler_request(self._sch[0], n_pages, pReq) if ret != 0: raise PageDBException.from_error(self._sch[0].error) reqs = [ffi.string(pReq[0].urls[i]) for i in xrange(pReq[0].n_urls)] self._c_aduana.page_request_delete(pReq[0]) return reqs
def requests(self, n_pages): pReq = ffi.new('PageRequest **') ret = self._scheduler_request(self._sch[0], n_pages, pReq) if ret != 0: raise AduanaException.from_error(self._sch[0].error) reqs = [ffi.string(pReq[0].urls[i]) for i in xrange(pReq[0].n_urls)] self._c_aduana.page_request_delete(pReq[0]) return reqs
def __init__(self, path, persist=0): # save to make sure lib is available at destruction time self._c_aduana = C_ADUANA self._page_db = ffi.new('PageDB **') ret = self._c_aduana.page_db_new(self._page_db, path) if ret != 0: if self._page_db: raise PageDBException.from_error(self._page_db[0].error) else: raise PageDBException("Error inside page_db_new", ret) self.persist = persist
def __init__(self, path, persist=0): # save to make sure lib is available at destruction time self._c_aduana = C_ADUANA self._closed = False self._page_db = ffi.new('PageDB **') ret = self._c_aduana.page_db_new(self._page_db, path) if ret != 0: if self._page_db: raise AduanaException.from_error(self._page_db[0].error) else: raise AduanaException("Error inside page_db_new", ret) self.persist = persist
def load(self, freq_iter): cur = ffi.new('void **') ret = self._c_aduana.freq_scheduler_cursor_open(self._sch[0], cur) if ret != 0: raise PageDBException.from_error(self._sch[0].error) for page_hash, page_freq in freq_iter: self._c_aduana.freq_scheduler_cursor_write( self._sch[0], cur[0], ffi.cast('uint64_t', page_hash), page_freq ) ret = self._c_aduana.freq_scheduler_cursor_commit(self._sch[0], cur[0]) if ret != 0: raise PageDBException.from_error(self._sch[0].error)
def load(self, freq_iter): cur = ffi.new('void **') ret = self._c_aduana.freq_scheduler_cursor_open(self._sch[0], cur) if ret != 0: raise AduanaException.from_error(self._sch[0].error) for page_hash, page_freq in freq_iter: self._c_aduana.freq_scheduler_cursor_write( self._sch[0], cur[0], ffi.cast('uint64_t', page_hash), page_freq ) ret = self._c_aduana.freq_scheduler_cursor_commit(self._sch[0], cur[0]) if ret != 0: raise AduanaException.from_error(self._sch[0].error)
def __init__(self, page_db): self._c_aduana = C_ADUANA self._closed = False self._scorer = ffi.new('PageRankScorer **') self._c_aduana.page_rank_scorer_new(self._scorer, page_db._page_db[0])
def __init__(self, page_db): self._c_aduana = C_ADUANA self._closed = False self._scorer = ffi.new('HitsScorer **') self._c_aduana.hits_scorer_new(self._scorer, page_db._page_db[0])
def __init__(self, page_db): self._c_aduana = C_ADUANA self._scorer = ffi.new('HitsScorer **') self._c_aduana.hits_scorer_new(self._scorer, page_db._page_db[0])
def __init__(self, page_db): self._c_aduana = C_ADUANA self._scorer = ffi.new('PageRankScorer **') self._c_aduana.page_rank_scorer_new(self._scorer, page_db._page_db[0])