if not out: # if file can't be opened use stdout out = sys.stdout n_sequence[fastafile] = 0 n_match[fastafile] = 0 n_notmatch[fastafile] = 0 n_file += 1 while fasta.next(): n_sequence[fastafile] += 1 n_total += 1 if fasta.id in idlist or not idlist: # desired selected sequences fasta.trimDocByRegex(trim) seqlen = len(fasta.seq) if args.minlen and seqlen < args.minlen: # skip sequences shorter than minimum length, if specified continue out.write('{}\n'.format(fasta.format(linelen=args.linelen))) n_written += 1 n_match[fastafile] += 1 if fasta.id in n_found: n_found[fasta.id] += 1 else: n_found[fasta.id] = 1 else: # not selected sequence
fasta = Fasta() fasta.fh = cl.fasta_file trimre = re.compile(trim) # initialize counters base_total = 0 base_current = 0 n_out = 0 n_seq = 0 n_current = 0 while fasta.next(): if trimre: fasta.trimDocByRegex(trimre) if not n_seq or base_current + fasta.length() > maxbases: # if number of bases would be greater than cutoff after adding the new sequence # close current output file, open new # report statistics for old file # reset current file counters try: # prevents error on first pass - file is not yet open outfile.close() print(' ', base_current, 'bases/amino acids', end=' ') print('in', n_current, 'sequences', end=' ') print('written to', outfilename) except NameError: pass