예제 #1
0
    def __loadIndex(self):
        """load complete index into memory."""

        assert self.mCreateMode == False, "asked to read from database opened for writing"

        if self.mMethod == "uncompressed":
            self.mDatabaseFile = open(self.mDbname, "r")
        elif self.mMethod == "dictzip":
            import dictzip
            self.mDatabaseFile = dictzip.GzipFile(self.mNameDb)
        elif self.mMethod == "lzo":
            import lzo
            self.mDatabaseFile = Uncompressor(self.mNameDb, lzo.decompress)
        elif self.mMethod == "gzip":
            self.mDatabaseFile = Uncompressor(self.mNameDb, gzip_demangler)
        elif self.mMethod == "zlib":
            self.mDatabaseFile = Uncompressor(self.mNameDb, zlib.decompress)
        elif eslf.mMethod == "bz2":
            self.mDatabaseFile = bz2.BZ2File(self.mNameDb)
        elif self.mMethod == "debug":
            self.mDatabaseFile = Uncompressor(self.mDbname + ".debug",
                                              lambda x: x)

        self.mIndex = {}

        for line in open(self.mNameIndex, "r"):

            if line.startswith("#"): continue
            data = line[:-1].split("\t")

            # index with random access points
            if len(data) > 4:
                (identifier, pos_id, block_size, lsequence) = bytes(
                    data[0]), int(data[1]), int(data[2]), int(data[-1])
                points = map(int, data[3:-1])
                self.mIndex[int(identifier)] = (pos_id, block_size, lsequence,
                                                points)
            else:
                (identifier, pos_id, pos_seq, lsequence) = bytes(data[0]), int(
                    data[1]), int(data[2]), int(data[-1])
                self.mIndex[int(identifier)] = (pos_id, pos_seq, lsequence)

        self.mIsLoaded = True
예제 #2
0
    def _loadIndex(self, compress=False):
        """load complete index into memory.

        if compress is set to true, the index will not be loaded,
        but a compressed index will be created instead.
        """

        if self.mMethod == "uncompressed":
            self.mDatabaseFile = open(self.mDbname, "r")
        elif self.mMethod == "dictzip":
            import dictzip
            self.mDatabaseFile = dictzip.GzipFile(self.mDbname)
        elif self.mMethod == "lzo":
            import lzo
            self.mDatabaseFile = Uncompressor(self.mDbname, lzo.decompress)
        elif self.mMethod == "gzip":
            self.mDatabaseFile = Uncompressor(self.mDbname, gzip_demangler)
        elif self.mMethod == "zlib":
            self.mDatabaseFile = Uncompressor(self.mDbname, zlib.decompress)
        elif self.mMethod == "bzip2":
            import bz2
            self.mDatabaseFile = Uncompressor(self.mDbname, bz2.decompress)
        elif self.mMethod == "debug":
            self.mDatabaseFile = Uncompressor(self.mDbname + ".debug",
                                              lambda x: x)

        filename_index = self.mNameIndex + ".dbm"

        if compress:
            #if os.path.exists(filename_index):
            #    raise OSError("file %s already exists" % filename_index)
            self.mIndex = anydbm.open(filename_index, "n")
        elif os.path.exists(filename_index):
            self.mIndex = anydbm.open(filename_index, "r")
            self.mIsLoaded = True
            return
        else:
            self.mIndex = {}

        for line in open(self.mNameIndex, "r"):

            data = line[:-1].split("\t")

            if len(data) == 2:
                # ignore synonyms of non-existent contigs
                identifier = data[1]
                if data[0] not in self.mIndex:
                    continue
                self.mSynonyms[identifier] = data[0]
            else:
                # index with random access points
                if len(data) > 4:
                    (identifier, pos_id, block_size, lsequence) = data[0], int(
                        data[1]), int(data[2]), int(data[-1])
                    points = map(int, data[3:-1])
                    self.mIndex[identifier] = (pos_id, block_size, lsequence,
                                               points)
                else:
                    (identifier, pos_id, pos_seq, lsequence) = data[0], int(
                        data[1]), int(data[2]), int(data[-1])
                    self.mIndex[identifier] = struct.pack(
                        "QQi", pos_id, pos_seq, lsequence)

        self._addSynonyms()
        self.mIsLoaded = True