예제 #1
0
파일: fasta.py 프로젝트: anykine/bx-python
    def __init__(self, file, revcomp=False, name="", gap=None, lookahead=None, contig=None):
        SeqFile.__init__(self,file,revcomp,name,gap)
        self.lookahead = lookahead
        if (contig == None): contig = 1
        assert (contig >= 1), "contig %d is not legal" % contig

        # nota bene: certainly not the most efficient or elegant implementation

        currContig = 1
        while (True):
            if (self.lookahead != None):
                (line,self.lookahead) = (self.lookahead,None)
            else:
                line = self.file.readline()
            if (line == ""): break
            if not line:
                break
            if (line.startswith(">")):
                if (self.text != None):
                    if (currContig == contig):
                        self.lookahead = line # (next sequence header)
                        break
                    currContig += 1
                self.name = self.extract_name(line[1:])
                self.text = []
                continue
            line = line.split() # (remove whitespace)
            if (self.text == None): self.text = line # (allows headerless fasta)
            else:                   self.text.extend(line)
        assert (currContig == contig), \
            "contig %d is not legal (file contains only %d)" % (contig,currContig)
        if (self.text != None):
            self.text   = "".join(self.text)
            self.length = len(self.text)
예제 #2
0
    def __init__(self, file, revcomp=False, name="", gap=None, codebook=None):
        SeqFile.__init__(self, file, revcomp, name, gap)
        if (gap == None): self.gap = chr(0)
        assert (revcomp == False
                ), "reverse complement is not supported for qdna files"
        self.codebook = codebook

        self.byte_order = ">"
        magic = struct.unpack(">L", file.read(4))[0]
        if (magic != qdnaMagic):
            if (magic == qdnaMagicSwap):
                self.byte_order = "<"
            else:
                raise ValueError("not a quantum-dna file (magic=%08X)" % magic)

        self.magic = magic

        # process header

        self.version = struct.unpack("%sL" % self.byte_order,
                                     self.file.read(4))[0]
        if (self.version not in [0x100, 0x200]):
            raise ValueError("unsupported quantum-dna (version=%08X)" %
                             self.version)

        self.headerLength = struct.unpack("%sL" % self.byte_order,
                                          self.file.read(4))[0]
        if (self.headerLength < 0x10):
            raise ValueError("unsupported quantum-dna (header len=%08X)" %
                             self.headerLength)
        if (self.version == 0x100) and (self.headerLength != 0x10):
            raise ValueError(
                "unsupported quantum-dna (version 1.0 header len=%08X)" %
                self.headerLength)

        self.seqOffset = struct.unpack("%sL" % self.byte_order,
                                       self.file.read(4))[0]
        self.nameOffset = struct.unpack("%sL" % self.byte_order,
                                        self.file.read(4))[0]
        self.length = struct.unpack("%sL" % self.byte_order,
                                    self.file.read(4))[0]

        self.propOffset = 0
        if (self.headerLength >= 0x14):
            self.propOffset = struct.unpack("%sL" % self.byte_order,
                                            self.file.read(4))[0]

        self.name = ""
        if (self.nameOffset != 0):
            self.file.seek(self.nameOffset)
            self.name = self.read_string()

        if (self.propOffset != 0):
            self.file.seek(self.propOffset)
            while (True):
                name = self.read_string()
                if (len(name) == 0): break
                value = self.read_string()
                self.set_property(name, value)
예제 #3
0
    def __init__(self, file, revcomp=False, name="", gap=None):
        SeqFile.__init__(self,file,revcomp,name,gap)

        self.byte_order = ">"
        magic = struct.unpack(">L", file.read(NIB_MAGIC_SIZE))[0]
        if (magic != NIB_MAGIC_NUMBER):
            if magic == NIB_MAGIC_NUMBER_SWAP: self.byte_order = "<"
            else: raise "Not a NIB file"
        self.magic = magic
        self.length = struct.unpack("%sL" % self.byte_order, file.read(NIB_LENGTH_SIZE))[0]
예제 #4
0
파일: qdna.py 프로젝트: anykine/bx-python
    def __init__(self, file, revcomp=False, name="", gap=None, codebook=None):
        SeqFile.__init__(self,file,revcomp,name,gap)
        if (gap == None): self.gap = chr(0)
        assert (revcomp == False), "reverse complement is not supported for qdna files"
        self.codebook = codebook

        self.byte_order = ">"
        magic = struct.unpack(">L", file.read(4))[0]
        if (magic != qdnaMagic):
            if (magic == qdnaMagicSwap):
                self.byte_order = "<"
            else:
                raise ValueError("not a quantum-dna file (magic=%08X)" % magic)

        self.magic = magic

        # process header

        self.version = struct.unpack("%sL" % self.byte_order,
                                     self.file.read(4))[0]
        if (self.version not in [0x100,0x200]):
            raise ValueError("unsupported quantum-dna (version=%08X)" % self.version)

        self.headerLength = struct.unpack("%sL" % self.byte_order,
                                          self.file.read(4))[0]
        if (self.headerLength < 0x10):
            raise ValueError("unsupported quantum-dna (header len=%08X)" % self.headerLength)
        if (self.version == 0x100) and (self.headerLength != 0x10):
            raise ValueError("unsupported quantum-dna (version 1.0 header len=%08X)" % self.headerLength)

        self.seqOffset  = struct.unpack("%sL" % self.byte_order,
                                        self.file.read(4))[0]
        self.nameOffset = struct.unpack("%sL" % self.byte_order,
                                        self.file.read(4))[0]
        self.length     = struct.unpack("%sL" % self.byte_order,
                                        self.file.read(4))[0]

        self.propOffset = 0
        if (self.headerLength >= 0x14):
            self.propOffset = struct.unpack("%sL" % self.byte_order,
                                            self.file.read(4))[0]

        self.name = ""
        if (self.nameOffset != 0):
            self.file.seek(self.nameOffset)
            self.name = self.read_string()

        if (self.propOffset != 0):
            self.file.seek(self.propOffset)
            while (True):
                name  = self.read_string()
                if (len(name) == 0): break
                value = self.read_string()
                self.set_property(name,value)
예제 #5
0
    def __init__(self,
                 file,
                 revcomp=False,
                 name="",
                 gap=None,
                 lookahead=None,
                 contig=None):
        SeqFile.__init__(self, file, revcomp, name, gap)
        self.lookahead = lookahead
        if (contig is None):
            contig = 1
        assert (contig >= 1), "contig %d is not legal" % contig

        # nota bene: certainly not the most efficient or elegant implementation

        currContig = 1
        while (True):
            if (self.lookahead is not None):
                (line, self.lookahead) = (self.lookahead, None)
            else:
                line = self.file.readline()
                if not isinstance(line, str):
                    line = line.decode()
            if (line == ""):
                break
            if not line:
                break
            if (line.startswith(">")):
                if (self.text is not None):
                    if (currContig == contig):
                        self.lookahead = line  # (next sequence header)
                        break
                    currContig += 1
                self.name = self.extract_name(line[1:])
                self.text = []
                continue
            line = line.split()  # (remove whitespace)
            if (self.text is None):
                self.text = line  # (allows headerless fasta)
            else:
                self.text.extend(line)
        assert (currContig == contig), \
            "contig %d is not legal (file contains only %d)" % (contig, currContig)
        if (self.text is not None):
            self.text = "".join(self.text)
            self.length = len(self.text)