Example #1
0
            if not out:
                # if file can't be opened use stdout
                out = sys.stdout

        n_sequence[fastafile] = 0
        n_match[fastafile] = 0
        n_notmatch[fastafile] = 0
        n_file += 1

        while fasta.next():
            n_sequence[fastafile] += 1
            n_total += 1

            if fasta.id in idlist or not idlist:
                # desired selected sequences
                fasta.trimDocByRegex(trim)
                seqlen = len(fasta.seq)
                if args.minlen and seqlen < args.minlen:
                    # skip sequences shorter than minimum length, if specified
                    continue

                out.write('{}\n'.format(fasta.format(linelen=args.linelen)))
                n_written += 1
                n_match[fastafile] += 1
                if fasta.id in n_found:
                    n_found[fasta.id] += 1
                else:
                    n_found[fasta.id] = 1

            else:
                # not selected sequence
Example #2
0
fasta = Fasta()
fasta.fh = cl.fasta_file

trimre = re.compile(trim)

# initialize counters
base_total = 0
base_current = 0
n_out = 0
n_seq = 0
n_current = 0

while fasta.next():

    if trimre: fasta.trimDocByRegex(trimre)
    if not n_seq or base_current + fasta.length() > maxbases:
        # if number of bases would be greater than cutoff after adding the new sequence
        # close current output file, open new
        # report statistics for old file
        # reset current file counters

        try:
            # prevents error on first pass - file is not yet open
            outfile.close()
            print('   ', base_current, 'bases/amino acids', end=' ')
            print('in', n_current, 'sequences', end=' ')
            print('written to', outfilename)

        except NameError:
            pass