def __init__(self, theme): """Creates a new FASTAFormatter instance""" import os from cats.styles.sequence import SequenceFormatter # Load sequence formatter self.seq_formatter = SequenceFormatter(theme)
class FASTAFormatter(object): """Formatter for FASTA files""" def __init__(self, theme): """Creates a new FASTAFormatter instance""" import os from cats.styles.sequence import SequenceFormatter # Load sequence formatter self.seq_formatter = SequenceFormatter(theme) def format(self, inbuffer, outbuffer=None, **kwargs): """Format sequence records""" import sys # default/bold text RESET = '\033[0m' BOLD = '\033[1m' # default to STDOUT for output if outbuffer is None: outbuffer = sys.stdout # Iterate through and format each sequence record if kwargs['color']: for line in inbuffer: line = line.decode() # Reset formatting outbuffer.write(RESET) # Print description if line.startswith('>'): outbuffer.write(BOLD + line) continue # DNA/RNA if kwargs['seq_type'] in ['dna', 'rna', 'nucleic_acid']: outbuffer.write(self.seq_formatter.format_nucleic_acid( line, kwargs['stop_codons'], kwargs['cpg'] )) else: # Protein outbuffer.write(self.seq_formatter.format_protein(line)) else: for line in inbuffer: outbuffer.write(line.decode())
class FASTQFormatter(object): """Formatter for FASTQ files""" def __init__(self, theme): """Creates a new FASTQFormatter instance""" import os from cats.styles.sequence import SequenceFormatter # Load sequence formatter self.seq_formatter = SequenceFormatter(theme) def format(self, inbuffer, outbuffer=None, **kwargs): """Format sequence records""" import sys # default/bold text RESET = '\033[0m' BOLD = '\033[1m' # default to STDOUT for output if outbuffer is None: outbuffer = sys.stdout # FASTQ line types FASTQ_ID = 0 FASTQ_SEQ = 1 FASTQ_DESC = 2 FASTQ_QUAL = 3 # Iterate through and format each sequence record if kwargs['color']: for i, line in enumerate(inbuffer): # Reset formatting outbuffer.write(RESET) # line = line.decode('ascii') line = line.decode() # Print description if i % 4 == FASTQ_ID: outbuffer.write(BOLD + line) elif i % 4 == FASTQ_SEQ: outbuffer.write(self.seq_formatter.format_nucleic_acid(line, kwargs['stop_codons'], kwargs['cpg'])) else: outbuffer.write(line) else: for line in inbuffer: outbuffer.write(line.decode())
class SeqStringFormatter(object): """Formatter for sequence strings""" def __init__(self, theme): """Creates a new SeqStringFormatter instance""" import os from cats.styles.sequence import SequenceFormatter # Load sequence formatter self.seq_formatter = SequenceFormatter(theme) def format(self, inbuffer, outbuffer=None, **kwargs): """Format sequence records""" import sys # default/bold text RESET = '\033[0m' BOLD = '\033[1m' # default to STDOUT for output if outbuffer is None: outbuffer = sys.stdout # Iterate through and format each sequence record if kwargs['color']: for i, line in enumerate(inbuffer): # Reset formatting outbuffer.write(RESET) # Convert from byte-string if coming from gzip #line = line.decode('ascii') line = line.decode() # Print description outbuffer.write(self.seq_formatter.format_nucleic_acid(line, kwargs['stop_codons'], kwargs['cpg'])) else: for line in inbuffer: outbuffer.write(line.decode())
class SeqRecordFormatter(object): """Formatter for BioPython SeqRecord objects""" def __init__(self, theme): """Creates a new SeqRecordFormatter instance""" from cats.styles.sequence import SequenceFormatter # Load sequence formatter self.seq_formatter = SequenceFormatter(theme) def format(self, seqs, outbuffer=None, **kwargs): """Format sequence records""" # default to STDOUT for output if outbuffer is None: import sys outbuffer = sys.stdout # for x in range(1,10): # print "\033[03%dmTEST \033[09%dmTEST" % (x,x) # default/bold text RESET = '\033[0m' BOLD = '\033[1m' # select translation table # see: ftp://ftp.ncbi.nlm.nih.gov/entrez/misc/data/gc.prt # Iterate through and format each sequence record for seq in seqs: # Reset formatting outbuffer.write(RESET) # Print description if kwargs['color']: outbuffer.write(BOLD) outbuffer.write(">" + seq.description + "\n") # line width width = kwargs['line_width'] # Protein if kwargs['translate']: # determine frame to use frame = abs(kwargs['translation_frame']) - 1 dna_str = seq.seq[frame:] if kwargs['translation_frame'] < 0: dna_str = seq.seq.reverse_complement()[frame:] table = kwargs['translation_table'] translated = str(dna_str.translate(table=table)) # format and append to output buffer if kwargs['color']: _seq = self._fill(translated, width) outbuffer.write(self.seq_formatter.format_protein(_seq)) else: outbuffer.write(self._fill(translated, width)) # DNA else: if kwargs['color']: _seq = self._fill(str(seq.seq), width) outbuffer.write(self.seq_formatter.format_nucleic_acid(_seq, kwargs['stop_codons'], kwargs['cpg'])) else: outbuffer.write(self._fill(str(seq.seq), width)) def _fill(self, text, width=70): """ Faster text-wrapping for long strings source: http://stackoverflow.com/questions/2657693/insert-a-newline-character-every-64-characters-using-python/2657733#2657733 """ return '\n'.join(text[i:i+width] for i in range(0, len(text), width))