Ejemplo n.º 1
0
 def __init__(
         self, name, sequence, qualities, primer=None, name2='',
         original_length=None, match=None, match_info=None, clipped=None,
         insert_overlap=False, merged=False, corrected=0, alphabet=None):
     # In colorspace, the first character is the last nucleotide of the
     # primer base and the second character encodes the transition from the
     # primer base to the first real base of the read.
     if primer is None:
         self.primer = sequence[0:1]
         sequence = sequence[1:]
     else:
         self.primer = primer
     if qualities is not None and len(sequence) != len(qualities):
         rname = truncate_string(name)
         raise FormatError(
             "In read named {0!r}: length of colorspace quality "
             "sequence ({1}) and length of read ({2}) do not match (primer "
             "is: {3!r})".format(
                 rname, len(qualities), len(sequence), self.primer))
     super().__init__(
         name, sequence, qualities, name2, original_length, match,
         match_info, clipped, insert_overlap, merged, corrected,
         alphabet=alphabet)
     # TODO: use 'alphabet' here
     if not self.primer in ('A', 'C', 'G', 'T'):
         raise FormatError(
             "Primer base is {0!r} in read {1!r}, but it should be one of "
             "A, C, G, T.".format(self.primer, truncate_string(name)))
Ejemplo n.º 2
0
 def __repr__(self):
     fmt_str = \
         '<ColorspaceSequence(name={0!r}, primer={1!r}, sequence={2!r}{3})>'
     qstr = ''
     if self.qualities is not None:
         qstr = ', qualities={0!r}'.format(truncate_string(self.qualities))
     return fmt_str.format(truncate_string(self.name), self.primer,
                           truncate_string(self.sequence), qstr)
Ejemplo n.º 3
0
 def __repr__(self):
     fmt_str = \
         '<ColorspaceSequence(name={0!r}, primer={1!r}, sequence={2!r}{3})>'
     qstr = ''
     if self.qualities is not None:
         qstr = ', qualities={0!r}'.format(truncate_string(self.qualities))
     return fmt_str.format(
         truncate_string(self.name), self.primer,
         truncate_string(self.sequence), qstr)
Ejemplo n.º 4
0
 def __iter__(self):
     """Read next entry from the file (single entry at a time).
     """
     name = None
     seq = []
     for i, line in enumerate(self._file):
         # strip() also removes DOS line breaks
         line = line.strip()
         if not line:
             continue
         if line and line[0] == '>':
             if name is not None:
                 yield self.sequence_class(
                     name, self._delimiter.join(seq), None,
                     alphabet=self.alphabet)
             name = line[1:]
             seq = []
         elif line and line[0] == '#':
             continue
         elif name is not None:
             seq.append(line)
         else:
             raise FormatError(
                 "At line {0}: Expected '>' at beginning of FASTA record, "
                 "but got {1!r}.".format(i+1, truncate_string(line)))
     
     if name is not None:
         yield self.sequence_class(
             name, self._delimiter.join(seq), None,
             alphabet=self.alphabet)
Ejemplo n.º 5
0
    def __iter__(self):
        """Read next entry from the file (single entry at a time).
        """
        name = None
        seq = []
        for i, line in enumerate(self._file):
            # strip() also removes DOS line breaks
            line = line.strip()
            if not line:
                continue
            if line and line[0] == '>':
                if name is not None:
                    yield self.sequence_class(name,
                                              self._delimiter.join(seq),
                                              None,
                                              alphabet=self.alphabet)
                name = line[1:]
                seq = []
            elif line and line[0] == '#':
                continue
            elif name is not None:
                seq.append(line)
            else:
                raise FormatError(
                    "At line {0}: Expected '>' at beginning of FASTA record, "
                    "but got {1!r}.".format(i + 1, truncate_string(line)))

        if name is not None:
            yield self.sequence_class(name,
                                      self._delimiter.join(seq),
                                      None,
                                      alphabet=self.alphabet)
Ejemplo n.º 6
0
 def __init__(self,
              name,
              sequence,
              qualities,
              primer=None,
              name2='',
              original_length=None,
              match=None,
              match_info=None,
              clipped=None,
              insert_overlap=False,
              merged=False,
              corrected=0,
              alphabet=None):
     # In colorspace, the first character is the last nucleotide of the
     # primer base and the second character encodes the transition from the
     # primer base to the first real base of the read.
     if primer is None:
         self.primer = sequence[0:1]
         sequence = sequence[1:]
     else:
         self.primer = primer
     if qualities is not None and len(sequence) != len(qualities):
         rname = truncate_string(name)
         raise FormatError(
             "In read named {0!r}: length of colorspace quality "
             "sequence ({1}) and length of read ({2}) do not match (primer "
             "is: {3!r})".format(rname, len(qualities), len(sequence),
                                 self.primer))
     super().__init__(name,
                      sequence,
                      qualities,
                      name2,
                      original_length,
                      match,
                      match_info,
                      clipped,
                      insert_overlap,
                      merged,
                      corrected,
                      alphabet=alphabet)
     # TODO: use 'alphabet' here
     if not self.primer in ('A', 'C', 'G', 'T'):
         raise FormatError(
             "Primer base is {0!r} in read {1!r}, but it should be one of "
             "A, C, G, T.".format(self.primer, truncate_string(name)))
Ejemplo n.º 7
0
    def __call__(self,
                 *args,
                 colwidths=None,
                 extra_width=None,
                 justification=None,
                 extra_justification=None,
                 indent=None,
                 extra_indent=None,
                 header=False,
                 underline='-',
                 pct=None,
                 default=None,
                 **kwargs):
        """Print a row.
        
        Args:
            args: Fields in the row.
            colwidths, justification, indent: Row-specific colwidths,
                justification, indent.
            extra_width, extra_justification, extra_indent: colwidth/
                justification/indent to use for extra fields.
            header: Whether this is a header row.
            underline: Whether to use an underline after the header row. Either
                a bool or a character.
            pct: Whether floating point values should be formatted as
                percentages.
            default: Default value.
            kwargs: Additional keyword arguments to pass to print.
        """
        ncols = len(args)
        if ncols == 0:
            self.newline()
            return

        if pct is None:
            pct = self.pct

        def adjust(arr, extra=None):
            """Adjust an array. If longer than the number of columns,
            truncate; if shorter, fill in by repeating the last element.
            """
            alen = len(arr)
            if alen == ncols:
                return arr
            elif alen > ncols:
                return arr[:ncols]
            else:
                return arr + ((extra or arr[-1], ) * (ncols - alen))

        colwidths, justification, indent = (adjust(
            arr, extra) for arr, extra in zip((
                colwidths or self.colwidths,
                justification or self.justification,
                indent or self.indent), (extra_width, extra_justification,
                                         extra_indent)))

        # adjust colwidths if this is a header
        if header:
            colwidths = tuple(
                max(w, len(str(a))) for w, a in zip(colwidths, args))

        fmt_str = []
        fmt_args = []
        for i, (value, width, just,
                ind) in enumerate(zip(args, colwidths, justification, indent)):
            if value is None:
                value = default or self.default
            if isinstance(value, str):
                typ = 's'
                if len(value) > width:
                    value = truncate_string(value, width)
            elif isinstance(value, float):
                typ = ',.1' + ('%' if pct else 'f')
            else:
                typ = ',d'
            fmt_str.append(ind + '{' + str(i) + ':' + just +
                           str(width - len(ind)) + typ + '}')
            fmt_args.append(value)

        fmt_str = ' '.join(fmt_str)
        self._print(fmt_str.format(*fmt_args), **kwargs)

        if header:
            sepline = ' '.join((underline * width) for width in colwidths)
            self._print(sepline, **kwargs)
Ejemplo n.º 8
0
 def __call__(
         self, *args, colwidths=None, extra_width=None, justification=None,
         extra_justification=None, indent=None, extra_indent=None,
         header=False, underline='-', pct=None, default=None, **kwargs):
     """Print a row.
     
     Args:
         args: Fields in the row.
         colwidths, justification, indent: Row-specific colwidths,
             justification, indent.
         extra_width, extra_justification, extra_indent: colwidth/
             justification/indent to use for extra fields.
         header: Whether this is a header row.
         underline: Whether to use an underline after the header row. Either
             a bool or a character.
         pct: Whether floating point values should be formatted as
             percentages.
         default: Default value.
         kwargs: Additional keyword arguments to pass to print.
     """
     ncols = len(args)
     if ncols == 0:
         self.newline()
         return
     
     if pct is None:
         pct = self.pct
     
     def adjust(arr, extra=None):
         """Adjust an array. If longer than the number of columns,
         truncate; if shorter, fill in by repeating the last element.
         """
         alen = len(arr)
         if alen == ncols:
             return arr
         elif alen > ncols:
             return arr[:ncols]
         else:
             return arr + ((extra or arr[-1],) * (ncols - alen))
     
     colwidths, justification, indent = (
         adjust(arr, extra)
         for arr, extra in zip(
             (
                 colwidths or self.colwidths,
                 justification or self.justification,
                 indent or self.indent),
             (extra_width, extra_justification, extra_indent)))
     
     # adjust colwidths if this is a header
     if header:
         colwidths = tuple(
             max(w, len(str(a)))
             for w, a in zip(colwidths, args))
     
     fmt_str = []
     fmt_args = []
     for i, (value, width, just, ind) in enumerate(
             zip(args, colwidths, justification, indent)):
         if value is None:
             value = default or self.default
         if isinstance(value, str):
             typ = 's'
             if len(value) > width:
                 value = truncate_string(value, width)
         elif isinstance(value, float):
             typ = ',.1' + ('%' if pct else 'f')
         else:
             typ = ',d'
         fmt_str.append(
             ind + '{' + str(i) + ':' + just + str(width-len(ind)) +
             typ + '}')
         fmt_args.append(value)
     
     fmt_str = ' '.join(fmt_str)
     self._print(fmt_str.format(*fmt_args), **kwargs)
     
     if header:
         sepline = ' '.join((underline * width) for width in colwidths)
         self._print(sepline, **kwargs)