def create(cls, file_like, get_next, allow_multiple=False): fh, name = cls._get_iterable(file_like) lines = sum(1 for line in fh) bnum = lines if lines > 2**24 else lines * 2 fh.seek(0) db = BDB() db.open(name + cls.ext, bnum=bnum, lcnum=2**19, omode=tc.OWRITER | tc.OTRUNC | tc.OCREAT, apow=6, opts=tc.TLARGE, xmsiz=2**26) pos = fh.tell() putter = db.putcat if allow_multiple else db.put while True: key = get_next(fh) if not key: break # always append the | but only used by multiple. putter(key, str(pos) + "|") # fh has been moved forward by get_next. pos = fh.tell() fh.close() db.close()
def get_db(dbname, mode): db = BDB() if mode in 'cn': db.open(dbname, omode=tc.OWRITER | tc.OTRUNC | tc.OCREAT, bnum=int(1e7), lcnum=2**19, apow=6, opts=tc.TLARGE, xmsiz=2**26) else: db.open(dbname, omode=tc.OREADER) return db
def __init__(self, file_like, call_class, allow_multiple=False): fh, name = self._get_iterable(file_like) self.filename = name self.allow_multiple = allow_multiple self.fh = fh self.call_class = call_class self.db = BDB() self.db.open(name + self.ext, omode=tc.OREADER)
def create(cls, file_like, get_next, allow_multiple=False): fh, name = cls._get_iterable(file_like) lines = sum(1 for line in fh) bnum = lines if lines > 2**24 else lines * 2 fh.seek(0) db = BDB() db.open(name + cls.ext, bnum=bnum, lcnum=2**19, omode=tc.OWRITER | tc.OTRUNC | tc.OCREAT, apow=6, opts=tc.TLARGE, xmsiz=2**26) pos = fh.tell() putter = db.putcat if allow_multiple else db.put while True: key = get_next(fh) if not key: break # always append the | but only used by multiple. putter(key , str(pos) + "|") # fh has been moved forward by get_next. pos = fh.tell() fh.close() db.close()
class FileIndex(object): ext = ".fidx" @classmethod def _get_iterable(self, f): if isinstance(f, basestring): fh = nopen(f) name = fh.name else: fh = f name = getattr(f, 'name', "fileindex") return fh, name @classmethod def create(cls, file_like, get_next, allow_multiple=False): fh, name = cls._get_iterable(file_like) lines = sum(1 for line in fh) bnum = lines if lines > 2**24 else lines * 2 fh.seek(0) db = BDB() db.open(name + cls.ext, bnum=bnum, lcnum=2**19, omode=tc.OWRITER | tc.OTRUNC | tc.OCREAT, apow=6, opts=tc.TLARGE, xmsiz=2**26) pos = fh.tell() putter = db.putcat if allow_multiple else db.put while True: key = get_next(fh) if not key: break # always append the | but only used by multiple. putter(key , str(pos) + "|") # fh has been moved forward by get_next. pos = fh.tell() fh.close() db.close() def __init__(self, file_like, call_class, allow_multiple=False): fh, name = self._get_iterable(file_like) self.filename = name self.allow_multiple = allow_multiple self.fh = fh self.call_class = call_class self.db = BDB() self.db.open(name + self.ext, omode=tc.OREADER) def __getitem__(self, key): # every key has the | appended. pos = self.db.get(key).rstrip("|") if self.allow_multiple: results = [] for p in pos.split("|"): self.fh.seek(long(p)) results.append(self.call_class(self.fh)) return results self.fh.seek(long(pos)) return self.call_class(self.fh) def __contains__(self, key): return key in self.db
class FileIndex(object): ext = ".fidx" @classmethod def _get_iterable(self, f): if isinstance(f, basestring): fh = nopen(f) name = fh.name else: fh = f name = getattr(f, 'name', "fileindex") return fh, name @classmethod def create(cls, file_like, get_next, allow_multiple=False): fh, name = cls._get_iterable(file_like) lines = sum(1 for line in fh) bnum = lines if lines > 2**24 else lines * 2 fh.seek(0) db = BDB() db.open(name + cls.ext, bnum=bnum, lcnum=2**19, omode=tc.OWRITER | tc.OTRUNC | tc.OCREAT, apow=6, opts=tc.TLARGE, xmsiz=2**26) pos = fh.tell() putter = db.putcat if allow_multiple else db.put while True: key = get_next(fh) if not key: break # always append the | but only used by multiple. putter(key, str(pos) + "|") # fh has been moved forward by get_next. pos = fh.tell() fh.close() db.close() def __init__(self, file_like, call_class, allow_multiple=False): fh, name = self._get_iterable(file_like) self.filename = name self.allow_multiple = allow_multiple self.fh = fh self.call_class = call_class self.db = BDB() self.db.open(name + self.ext, omode=tc.OREADER) def __getitem__(self, key): # every key has the | appended. pos = self.db.get(key).rstrip("|") if self.allow_multiple: results = [] for p in pos.split("|"): self.fh.seek(long(p)) results.append(self.call_class(self.fh)) return results self.fh.seek(long(pos)) return self.call_class(self.fh) def __contains__(self, key): return key in self.db