def do_open(): try: source = open(self.filename) gen = iter_range(source, query.key, query.end_key) for line in gen: yield line finally: source.close()
def compute_page_range(self, reader, query): # Get End end_iter = search(reader, query.end_key, prev_size=1) try: end_line = end_iter.next() except StopIteration: end_line = read_last_line(reader) # Get Start first_iter = iter_range(reader, query.key, query.end_key, prev_size=1) try: first_line = first_iter.next() except StopIteration: reader.close() raise first = IDXObject(first_line) end = IDXObject(end_line) diff = end['lineno'] - first['lineno'] pagesize = query.page_size if not pagesize: pagesize = self.max_blocks total_pages = diff / pagesize + 1 if query.page_count: info = dict(pages=total_pages, pageSize=pagesize, blocks=diff + 1) yield json.dumps(info) reader.close() return curr_page = query.page if curr_page >= total_pages or curr_page < 0: msg = 'Page {0} invalid: First Page is 0, Last Page is {1}' reader.close() raise CDXException(msg.format(curr_page, total_pages - 1)) startline = curr_page * pagesize endline = min(startline + pagesize - 1, diff) if curr_page == 0: yield first_line else: startline -= 1 idxiter = itertools.islice(first_iter, startline, endline) for idx in idxiter: yield idx reader.close()
def load_cdx(self, query): self.load_loc() reader = open(self.summary) idx_iter = iter_range(reader, query.key, query.end_key, prev_size=1) if query.secondary_index_only: return idx_iter else: blocks = self.idx_to_cdx(idx_iter, query) def gen_cdx(): for blk in blocks: for cdx in blk: yield cdx return gen_cdx()
def _do_iter(self, fh, params): for line in iter_range(fh, params['key'], params['end_key']): yield CDXObject(line)
def compute_page_range(self, reader, query): pagesize = query.page_size if not pagesize: pagesize = self.max_blocks else: pagesize = int(pagesize) last_line = None # Get End end_iter = search(reader, query.end_key, prev_size=1) try: end_line = six.next(end_iter) except StopIteration: last_line = read_last_line(reader) end_line = last_line # Get Start first_iter = iter_range(reader, query.key, query.end_key, prev_size=1) try: first_line = six.next(first_iter) except StopIteration: if end_line == last_line and query.key >= last_line: first_line = last_line else: reader.close() if query.page_count: yield self._page_info(0, pagesize, 0) return first = IDXObject(first_line) end = IDXObject(end_line) try: blocks = end['lineno'] - first['lineno'] total_pages = int(blocks / pagesize) + 1 except: blocks = -1 total_pages = 1 if query.page_count: # same line, so actually need to look at cdx # to determine if it exists if blocks == 0: try: block_cdx_iter = self.idx_to_cdx([first_line], query) block = six.next(block_cdx_iter) cdx = six.next(block) except StopIteration: total_pages = 0 blocks = -1 yield self._page_info(total_pages, pagesize, blocks + 1) reader.close() return curr_page = query.page if curr_page >= total_pages or curr_page < 0: msg = 'Page {0} invalid: First Page is 0, Last Page is {1}' reader.close() raise CDXException(msg.format(curr_page, total_pages - 1)) startline = curr_page * pagesize endline = startline + pagesize - 1 if blocks >= 0: endline = min(endline, blocks) if curr_page == 0: yield first_line else: startline -= 1 idxiter = itertools.islice(first_iter, startline, endline) for idx in idxiter: yield idx reader.close()
def do_open(): with open(self.filename, 'rb') as source: gen = iter_range(source, query.key, query.end_key) for line in gen: yield line
def _do_load_file(filename, query): with open(filename, 'rb') as source: gen = iter_range(source, query.key, query.end_key) for line in gen: yield line
def do_load(fh): with fh: gen = iter_range(fh, params['key'], params['end_key']) for line in gen: yield CDXObject(line)
def compute_page_range(self, reader, query): pagesize = query.page_size if not pagesize: pagesize = self.max_blocks else: pagesize = int(pagesize) last_line = None # Get End end_iter = search(reader, query.end_key, prev_size=1) try: end_line = end_iter.next() except StopIteration: last_line = read_last_line(reader) end_line = last_line # Get Start first_iter = iter_range(reader, query.key, query.end_key, prev_size=1) try: first_line = first_iter.next() except StopIteration: if end_line == last_line and query.key >= last_line: first_line = last_line else: reader.close() if query.page_count: yield self._page_info(0, pagesize, 0) return else: raise first = IDXObject(first_line) end = IDXObject(end_line) try: blocks = end['lineno'] - first['lineno'] total_pages = blocks / pagesize + 1 except: blocks = -1 total_pages = 1 if query.page_count: # same line, so actually need to look at cdx # to determine if it exists if blocks == 0: try: block_cdx_iter = self.idx_to_cdx([first_line], query) block = block_cdx_iter.next() cdx = block.next() except StopIteration: total_pages = 0 blocks = -1 yield self._page_info(total_pages, pagesize, blocks + 1) reader.close() return curr_page = query.page if curr_page >= total_pages or curr_page < 0: msg = 'Page {0} invalid: First Page is 0, Last Page is {1}' reader.close() raise CDXException(msg.format(curr_page, total_pages - 1)) startline = curr_page * pagesize endline = startline + pagesize - 1 if blocks >= 0: endline = min(endline, blocks) if curr_page == 0: yield first_line else: startline -= 1 idxiter = itertools.islice(first_iter, startline, endline) for idx in idxiter: yield idx reader.close()
def load_cdx(self, query): source = open(self.filename) return iter_range(source, query.key, query.end_key)