Beispiel #1
0
def splitFasta(inpFasta,
        maxChunkSize,
        outBase=None,
        sfxSep=None,
        openWriter=None,
        lineLen=80,
        verbose=True):
    """Split input multi-FASTA file into multiple files of fixed size"""
    assert (outBase is None and sfxSep is None) or openWriter is None,\
            "Incompatible argument combination"
    if outBase is None:
        outBase = ""
    if sfxSep is None:
        sfxSep = '_'
    inpClose = False
    if hasattr(inpFasta,"records"):
        records = inpFasta.records()
    elif hasattr(inpFasta,"read") or is_string(inpFasta):
        inpFasta = FastaReader(inpFasta)
        inpClose = True
        records = inpFasta.records()
    else:
        records = inpFasta
    try:
        iChunk = 0
        chunkSize = 0
        out = None
        if openWriter is None:
            def _open_new_out(iChunk):
                return FastaWriter(outBase+'%s%04d'%(sfxSep,iChunk,),
                        lineLen=lineLen,mode="w")
        else:
            _open_new_out = openWriter
        out = _open_new_out(iChunk)
        try:
            if verbose:
                print "Writing chunk %i of target size %i" % (iChunk,maxChunkSize)
            for rec in records:
                hdr = rec.header()
                idSeq = rec.getId()
                seq = rec.sequence()
                out.record(hdr,seq)
                lenSeq = len(seq)
                yield (idSeq,lenSeq)
                chunkSize += lenSeq
                # we approximate the next seq length by the last one
                if chunkSize + lenSeq >= maxChunkSize:
                    out.close()
                    out = None
                    iChunk += 1
                    chunkSize = 0
                    out = _open_new_out(iChunk)
                    if verbose:
                        print "Writing chunk %i of target size %i" % (iChunk,maxChunkSize)
        finally:
            if out is not None:
                out.close()
    finally:
        if inpClose:
            inpFasta.close()
Beispiel #2
0
 def __init__(self,infile):
     """Ctor.
     @param infile It can be either a string with a file name, or it can be an
     iterator that returns lines (each line should be terminated with a new line),
     e.g. a file object. An iterator can be a filter that reads another FastaReader
     object, performs transformations on the records and emits them as lines."""
     if is_string(infile):
         infile = openCompressed(infile,'r')
         self.ownInfile = True
     else:
         self.ownInfile = False
     self.infile = infile
     self.freshHdr = False
     self.maxLineLen = 0
     self.seqTotal = 0
     self.symbolsTotal = 0