Example #1
0
    def __init__(self, theme):
        """Creates a new FASTAFormatter instance"""
        import os
        from cats.styles.sequence import SequenceFormatter

        # Load sequence formatter
        self.seq_formatter = SequenceFormatter(theme)
Example #2
0
class FASTAFormatter(object):
    """Formatter for FASTA files"""
    def __init__(self, theme):
        """Creates a new FASTAFormatter instance"""
        import os
        from cats.styles.sequence import SequenceFormatter

        # Load sequence formatter
        self.seq_formatter = SequenceFormatter(theme)

    def format(self, inbuffer, outbuffer=None, **kwargs):
        """Format sequence records"""
        import sys

        # default/bold text
        RESET = '\033[0m'
        BOLD = '\033[1m'

        # default to STDOUT for output
        if outbuffer is None:
            outbuffer = sys.stdout

        # Iterate through and format each sequence record
        if kwargs['color']:
            for line in inbuffer:
                line = line.decode()

                # Reset formatting
                outbuffer.write(RESET)

                # Print description
                if line.startswith('>'):
                    outbuffer.write(BOLD + line)
                    continue

                # DNA/RNA
                if kwargs['seq_type'] in ['dna', 'rna', 'nucleic_acid']:
                    outbuffer.write(self.seq_formatter.format_nucleic_acid(
                        line, kwargs['stop_codons'], kwargs['cpg']
                    ))
                else:
                    # Protein
                    outbuffer.write(self.seq_formatter.format_protein(line))
        else:
            for line in inbuffer:
                outbuffer.write(line.decode())
Example #3
0
class FASTQFormatter(object):
    """Formatter for FASTQ files"""
    def __init__(self, theme):
        """Creates a new FASTQFormatter instance"""
        import os
        from cats.styles.sequence import SequenceFormatter

        # Load sequence formatter
        self.seq_formatter = SequenceFormatter(theme)

    def format(self, inbuffer, outbuffer=None, **kwargs):
        """Format sequence records"""
        import sys

        # default/bold text
        RESET = '\033[0m'
        BOLD = '\033[1m'

        # default to STDOUT for output
        if outbuffer is None:
            outbuffer = sys.stdout

        # FASTQ line types
        FASTQ_ID = 0
        FASTQ_SEQ = 1
        FASTQ_DESC = 2
        FASTQ_QUAL = 3

        # Iterate through and format each sequence record
        if kwargs['color']:
            for i, line in enumerate(inbuffer):
                # Reset formatting
                outbuffer.write(RESET)

                # line = line.decode('ascii')
                line = line.decode()

                # Print description
                if i % 4 == FASTQ_ID:
                    outbuffer.write(BOLD + line)
                elif i % 4 == FASTQ_SEQ:
                    outbuffer.write(self.seq_formatter.format_nucleic_acid(line,
                                                            kwargs['stop_codons'],
                                                            kwargs['cpg']))
                else:
                    outbuffer.write(line)
        else:
            for line in inbuffer:
                outbuffer.write(line.decode())
Example #4
0
class SeqStringFormatter(object):
    """Formatter for sequence strings"""
    def __init__(self, theme):
        """Creates a new SeqStringFormatter instance"""
        import os
        from cats.styles.sequence import SequenceFormatter

        # Load sequence formatter
        self.seq_formatter = SequenceFormatter(theme)

    def format(self, inbuffer, outbuffer=None, **kwargs):
        """Format sequence records"""
        import sys

        # default/bold text
        RESET = '\033[0m'
        BOLD = '\033[1m'

        # default to STDOUT for output
        if outbuffer is None:
            outbuffer = sys.stdout

        # Iterate through and format each sequence record
        if kwargs['color']:
            for i, line in enumerate(inbuffer):
                # Reset formatting
                outbuffer.write(RESET)

                # Convert from byte-string if coming from gzip
                #line = line.decode('ascii')
                line = line.decode()

                # Print description
                outbuffer.write(self.seq_formatter.format_nucleic_acid(line,
                                                        kwargs['stop_codons'],
                                                        kwargs['cpg']))
        else:
            for line in inbuffer:
                outbuffer.write(line.decode())
Example #5
0
class SeqRecordFormatter(object):
    """Formatter for BioPython SeqRecord objects"""
    def __init__(self, theme):
        """Creates a new SeqRecordFormatter instance"""
        from cats.styles.sequence import SequenceFormatter

        # Load sequence formatter
        self.seq_formatter = SequenceFormatter(theme)

    def format(self, seqs, outbuffer=None, **kwargs):
        """Format sequence records"""
        # default to STDOUT for output
        if outbuffer is None:
            import sys
            outbuffer = sys.stdout

        # for x in range(1,10):
        # print "\033[03%dmTEST   \033[09%dmTEST" % (x,x)

        # default/bold text
        RESET = '\033[0m'
        BOLD = '\033[1m'

        # select translation table
        # see: ftp://ftp.ncbi.nlm.nih.gov/entrez/misc/data/gc.prt

        # Iterate through and format each sequence record
        for seq in seqs:
            # Reset formatting
            outbuffer.write(RESET)

            # Print description
            if kwargs['color']:
                outbuffer.write(BOLD)

            outbuffer.write(">" + seq.description + "\n")

            # line width
            width = kwargs['line_width']

            # Protein
            if kwargs['translate']:

                # determine frame to use
                frame = abs(kwargs['translation_frame']) - 1
                dna_str = seq.seq[frame:]

                if kwargs['translation_frame'] < 0:
                    dna_str = seq.seq.reverse_complement()[frame:]

                table = kwargs['translation_table']
                translated = str(dna_str.translate(table=table))

                # format and append to output buffer
                if kwargs['color']:
                    _seq = self._fill(translated, width)
                    outbuffer.write(self.seq_formatter.format_protein(_seq))
                else:
                    outbuffer.write(self._fill(translated, width))

            # DNA
            else:
                if kwargs['color']:
                    _seq = self._fill(str(seq.seq), width)
                    outbuffer.write(self.seq_formatter.format_nucleic_acid(_seq,
                                                         kwargs['stop_codons'],
                                                         kwargs['cpg']))
                else:
                    outbuffer.write(self._fill(str(seq.seq), width))

    def _fill(self, text, width=70):
        """
        Faster text-wrapping for long strings
        source: http://stackoverflow.com/questions/2657693/insert-a-newline-character-every-64-characters-using-python/2657733#2657733
        """
        return '\n'.join(text[i:i+width] for i in range(0, len(text), width))