def __loadIndex(self): """load complete index into memory.""" assert self.mCreateMode == False, "asked to read from database opened for writing" if self.mMethod == "uncompressed": self.mDatabaseFile = open(self.mDbname, "r") elif self.mMethod == "dictzip": import dictzip self.mDatabaseFile = dictzip.GzipFile(self.mNameDb) elif self.mMethod == "lzo": import lzo self.mDatabaseFile = Uncompressor(self.mNameDb, lzo.decompress) elif self.mMethod == "gzip": self.mDatabaseFile = Uncompressor(self.mNameDb, gzip_demangler) elif self.mMethod == "zlib": self.mDatabaseFile = Uncompressor(self.mNameDb, zlib.decompress) elif eslf.mMethod == "bz2": self.mDatabaseFile = bz2.BZ2File(self.mNameDb) elif self.mMethod == "debug": self.mDatabaseFile = Uncompressor(self.mDbname + ".debug", lambda x: x) self.mIndex = {} for line in open(self.mNameIndex, "r"): if line.startswith("#"): continue data = line[:-1].split("\t") # index with random access points if len(data) > 4: (identifier, pos_id, block_size, lsequence) = bytes( data[0]), int(data[1]), int(data[2]), int(data[-1]) points = map(int, data[3:-1]) self.mIndex[int(identifier)] = (pos_id, block_size, lsequence, points) else: (identifier, pos_id, pos_seq, lsequence) = bytes(data[0]), int( data[1]), int(data[2]), int(data[-1]) self.mIndex[int(identifier)] = (pos_id, pos_seq, lsequence) self.mIsLoaded = True
def _loadIndex(self, compress=False): """load complete index into memory. if compress is set to true, the index will not be loaded, but a compressed index will be created instead. """ if self.mMethod == "uncompressed": self.mDatabaseFile = open(self.mDbname, "r") elif self.mMethod == "dictzip": import dictzip self.mDatabaseFile = dictzip.GzipFile(self.mDbname) elif self.mMethod == "lzo": import lzo self.mDatabaseFile = Uncompressor(self.mDbname, lzo.decompress) elif self.mMethod == "gzip": self.mDatabaseFile = Uncompressor(self.mDbname, gzip_demangler) elif self.mMethod == "zlib": self.mDatabaseFile = Uncompressor(self.mDbname, zlib.decompress) elif self.mMethod == "bzip2": import bz2 self.mDatabaseFile = Uncompressor(self.mDbname, bz2.decompress) elif self.mMethod == "debug": self.mDatabaseFile = Uncompressor(self.mDbname + ".debug", lambda x: x) filename_index = self.mNameIndex + ".dbm" if compress: #if os.path.exists(filename_index): # raise OSError("file %s already exists" % filename_index) self.mIndex = anydbm.open(filename_index, "n") elif os.path.exists(filename_index): self.mIndex = anydbm.open(filename_index, "r") self.mIsLoaded = True return else: self.mIndex = {} for line in open(self.mNameIndex, "r"): data = line[:-1].split("\t") if len(data) == 2: # ignore synonyms of non-existent contigs identifier = data[1] if data[0] not in self.mIndex: continue self.mSynonyms[identifier] = data[0] else: # index with random access points if len(data) > 4: (identifier, pos_id, block_size, lsequence) = data[0], int( data[1]), int(data[2]), int(data[-1]) points = map(int, data[3:-1]) self.mIndex[identifier] = (pos_id, block_size, lsequence, points) else: (identifier, pos_id, pos_seq, lsequence) = data[0], int( data[1]), int(data[2]), int(data[-1]) self.mIndex[identifier] = struct.pack( "QQi", pos_id, pos_seq, lsequence) self._addSynonyms() self.mIsLoaded = True