Ejemplo n.º 1
0
 def do_open():
     try:
         source = open(self.filename)
         gen = iter_range(source, query.key, query.end_key)
         for line in gen:
             yield line
     finally:
         source.close()
Ejemplo n.º 2
0
    def compute_page_range(self, reader, query):

        # Get End
        end_iter = search(reader, query.end_key, prev_size=1)

        try:
            end_line = end_iter.next()
        except StopIteration:
            end_line = read_last_line(reader)

        # Get Start

        first_iter = iter_range(reader, query.key, query.end_key, prev_size=1)

        try:
            first_line = first_iter.next()
        except StopIteration:
            reader.close()
            raise

        first = IDXObject(first_line)

        end = IDXObject(end_line)
        diff = end['lineno'] - first['lineno']

        pagesize = query.page_size
        if not pagesize:
            pagesize = self.max_blocks

        total_pages = diff / pagesize + 1

        if query.page_count:
            info = dict(pages=total_pages, pageSize=pagesize, blocks=diff + 1)
            yield json.dumps(info)
            reader.close()
            return

        curr_page = query.page
        if curr_page >= total_pages or curr_page < 0:
            msg = 'Page {0} invalid: First Page is 0, Last Page is {1}'
            reader.close()
            raise CDXException(msg.format(curr_page, total_pages - 1))

        startline = curr_page * pagesize
        endline = min(startline + pagesize - 1, diff)

        if curr_page == 0:
            yield first_line
        else:
            startline -= 1

        idxiter = itertools.islice(first_iter, startline, endline)
        for idx in idxiter:
            yield idx

        reader.close()
Ejemplo n.º 3
0
    def load_cdx(self, query):
        self.load_loc()

        reader = open(self.summary)

        idx_iter = iter_range(reader, query.key, query.end_key, prev_size=1)

        if query.secondary_index_only:
            return idx_iter
        else:
            blocks = self.idx_to_cdx(idx_iter, query)

            def gen_cdx():
                for blk in blocks:
                    for cdx in blk:
                        yield cdx

            return gen_cdx()
Ejemplo n.º 4
0
    def load_cdx(self, query):
        self.load_loc()

        reader = open(self.summary)

        idx_iter = iter_range(reader,
                              query.key,
                              query.end_key,
                              prev_size=1)

        if query.secondary_index_only:
            return idx_iter
        else:
            blocks = self.idx_to_cdx(idx_iter, query)

            def gen_cdx():
                for blk in blocks:
                    for cdx in blk:
                        yield cdx

            return gen_cdx()
Ejemplo n.º 5
0
 def _do_iter(self, fh, params):
     for line in iter_range(fh, params['key'], params['end_key']):
         yield CDXObject(line)
Ejemplo n.º 6
0
    def compute_page_range(self, reader, query):
        pagesize = query.page_size
        if not pagesize:
            pagesize = self.max_blocks
        else:
            pagesize = int(pagesize)

        last_line = None

        # Get End
        end_iter = search(reader, query.end_key, prev_size=1)

        try:
            end_line = six.next(end_iter)
        except StopIteration:
            last_line = read_last_line(reader)
            end_line = last_line

        # Get Start
        first_iter = iter_range(reader, query.key, query.end_key, prev_size=1)

        try:
            first_line = six.next(first_iter)
        except StopIteration:
            if end_line == last_line and query.key >= last_line:
                first_line = last_line
            else:
                reader.close()
                if query.page_count:
                    yield self._page_info(0, pagesize, 0)
                return

        first = IDXObject(first_line)

        end = IDXObject(end_line)

        try:
            blocks = end['lineno'] - first['lineno']
            total_pages = int(blocks / pagesize) + 1
        except:
            blocks = -1
            total_pages = 1

        if query.page_count:
            # same line, so actually need to look at cdx
            # to determine if it exists
            if blocks == 0:
                try:
                    block_cdx_iter = self.idx_to_cdx([first_line], query)
                    block = six.next(block_cdx_iter)
                    cdx = six.next(block)
                except StopIteration:
                    total_pages = 0
                    blocks = -1

            yield self._page_info(total_pages, pagesize, blocks + 1)
            reader.close()
            return

        curr_page = query.page
        if curr_page >= total_pages or curr_page < 0:
            msg = 'Page {0} invalid: First Page is 0, Last Page is {1}'
            reader.close()
            raise CDXException(msg.format(curr_page, total_pages - 1))

        startline = curr_page * pagesize
        endline = startline + pagesize - 1
        if blocks >= 0:
            endline = min(endline, blocks)

        if curr_page == 0:
            yield first_line
        else:
            startline -= 1

        idxiter = itertools.islice(first_iter, startline, endline)
        for idx in idxiter:
            yield idx

        reader.close()
Ejemplo n.º 7
0
 def do_open():
     with open(self.filename, 'rb') as source:
         gen = iter_range(source, query.key, query.end_key)
         for line in gen:
             yield line
Ejemplo n.º 8
0
 def _do_load_file(filename, query):
     with open(filename, 'rb') as source:
         gen = iter_range(source, query.key, query.end_key)
         for line in gen:
             yield line
Ejemplo n.º 9
0
 def do_load(fh):
     with fh:
         gen = iter_range(fh, params['key'], params['end_key'])
         for line in gen:
             yield CDXObject(line)
Ejemplo n.º 10
0
    def compute_page_range(self, reader, query):
        pagesize = query.page_size
        if not pagesize:
            pagesize = self.max_blocks
        else:
            pagesize = int(pagesize)

        last_line = None

        # Get End
        end_iter = search(reader, query.end_key, prev_size=1)

        try:
            end_line = end_iter.next()
        except StopIteration:
            last_line = read_last_line(reader)
            end_line = last_line

        # Get Start
        first_iter = iter_range(reader,
                                query.key,
                                query.end_key,
                                prev_size=1)

        try:
            first_line = first_iter.next()
        except StopIteration:
            if end_line == last_line and query.key >= last_line:
                first_line = last_line
            else:
                reader.close()
                if query.page_count:
                    yield self._page_info(0, pagesize, 0)
                    return
                else:
                    raise

        first = IDXObject(first_line)

        end = IDXObject(end_line)

        try:
            blocks = end['lineno'] - first['lineno']
            total_pages = blocks / pagesize + 1
        except:
            blocks = -1
            total_pages = 1

        if query.page_count:
            # same line, so actually need to look at cdx
            # to determine if it exists
            if blocks == 0:
                try:
                    block_cdx_iter = self.idx_to_cdx([first_line], query)
                    block = block_cdx_iter.next()
                    cdx = block.next()
                except StopIteration:
                    total_pages = 0
                    blocks = -1

            yield self._page_info(total_pages, pagesize, blocks + 1)
            reader.close()
            return

        curr_page = query.page
        if curr_page >= total_pages or curr_page < 0:
            msg = 'Page {0} invalid: First Page is 0, Last Page is {1}'
            reader.close()
            raise CDXException(msg.format(curr_page, total_pages - 1))

        startline = curr_page * pagesize
        endline = startline + pagesize - 1
        if blocks >= 0:
            endline = min(endline, blocks)

        if curr_page == 0:
            yield first_line
        else:
            startline -= 1

        idxiter = itertools.islice(first_iter, startline, endline)
        for idx in idxiter:
            yield idx

        reader.close()
Ejemplo n.º 11
0
    def compute_page_range(self, reader, query):

        # Get End
        end_iter = search(reader, query.end_key, prev_size=1)

        try:
            end_line = end_iter.next()
        except StopIteration:
            end_line = read_last_line(reader)

        # Get Start

        first_iter = iter_range(reader,
                                query.key,
                                query.end_key,
                                prev_size=1)

        try:
            first_line = first_iter.next()
        except StopIteration:
            reader.close()
            raise

        first = IDXObject(first_line)

        end = IDXObject(end_line)
        diff = end['lineno'] - first['lineno']

        pagesize = query.page_size
        if not pagesize:
            pagesize = self.max_blocks

        total_pages = diff / pagesize + 1

        if query.page_count:
            info = dict(pages=total_pages,
                        pageSize=pagesize,
                        blocks=diff + 1)
            yield json.dumps(info)
            reader.close()
            return

        curr_page = query.page
        if curr_page >= total_pages or curr_page < 0:
            msg = 'Page {0} invalid: First Page is 0, Last Page is {1}'
            reader.close()
            raise CDXException(msg.format(curr_page, total_pages - 1))

        startline = curr_page * pagesize
        endline = min(startline + pagesize - 1, diff)

        if curr_page == 0:
            yield first_line
        else:
            startline -= 1

        idxiter = itertools.islice(first_iter, startline, endline)
        for idx in idxiter:
            yield idx

        reader.close()
Ejemplo n.º 12
0
 def _do_load_file(filename, query):
     with open(filename, 'rb') as source:
         gen = iter_range(source, query.key, query.end_key)
         for line in gen:
             yield line
Ejemplo n.º 13
0
 def do_open():
     with open(self.filename, 'rb') as source:
         gen = iter_range(source, query.key, query.end_key)
         for line in gen:
             yield line
Ejemplo n.º 14
0
 def load_cdx(self, query):
     source = open(self.filename)
     return iter_range(source, query.key, query.end_key)