Exemple #1
0
def write(alignments, handle, format):
    """Write complete set of alignments to a file.

    Arguments:
      - alignments - A list (or iterator) of Alignment objects (ideally the
        new MultipleSeqAlignment objects), or (if using Biopython
        1.54 or later) a single alignment object.
      - handle    - File handle object to write to, or filename as string
        (note older versions of Biopython only took a handle).
      - format    - lower case string describing the file format to write.

    You should close the handle after calling this function.

    Returns the number of alignments written (as an integer).
    """
    from anarci.Bio import SeqIO

    # Try and give helpful error messages:
    if not isinstance(format, basestring):
        raise TypeError("Need a string for the file format (lower case)")
    if not format:
        raise ValueError("Format required (lower case string)")
    if format != format.lower():
        raise ValueError("Format string '%s' should be lower case" % format)

    if isinstance(alignments, Alignment):
        # This raised an exception in older versions of Biopython
        alignments = [alignments]

    with as_handle(handle, 'w') as fp:
        # Map the file format to a writer class
        if format in _FormatToWriter:
            writer_class = _FormatToWriter[format]
            count = writer_class(fp).write_file(alignments)
        elif format in SeqIO._FormatToWriter:
            # Exploit the existing SeqIO parser to do the dirty work!
            # TODO - Can we make one call to SeqIO.write() and count the alignments?
            count = 0
            for alignment in alignments:
                if not isinstance(alignment, Alignment):
                    raise TypeError("Expect a list or iterator of Alignment "
                                    "objects, got: %r" % alignment)
                SeqIO.write(alignment, fp, format)
                count += 1
        elif format in _FormatToIterator or format in SeqIO._FormatToIterator:
            raise ValueError(
                "Reading format '%s' is supported, but not writing" % format)
        else:
            raise ValueError("Unknown format '%s'" % format)

    assert isinstance(count, int), "Internal error - the underlying %s " \
           "writer should have returned the alignment count, not %s" \
           % (format, repr(count))

    return count
Exemple #2
0
def _handle_convert(in_handle,
                    in_format,
                    out_handle,
                    out_format,
                    alphabet=None):
    """SeqIO conversion function (PRIVATE)."""
    try:
        f = _converter[(in_format, out_format)]
    except KeyError:
        f = None
    if f:
        return f(in_handle, out_handle, alphabet)
    else:
        records = SeqIO.parse(in_handle, in_format, alphabet)
        return SeqIO.write(records, out_handle, out_format)
Exemple #3
0
def _embl_convert_fasta(in_handle, out_handle, alphabet=None):
    """Fast EMBL to FASTA (PRIVATE)."""
    # We don't need to parse the features...
    from anarci.Bio.GenBank.Scanner import EmblScanner
    records = EmblScanner().parse_records(in_handle, do_features=False)
    # For FASTA output we can ignore the alphabet too
    return SeqIO.write(records, out_handle, "fasta")
Exemple #4
0
def _SeqIO_to_alignment_iterator(handle,
                                 format,
                                 alphabet=None,
                                 seq_count=None):
    """Uses Bio.SeqIO to create an MultipleSeqAlignment iterator (PRIVATE).

    Arguments:
      - handle    - handle to the file.
      - format    - string describing the file format.
      - alphabet  - optional Alphabet object, useful when the sequence type
        cannot be automatically inferred from the file itself
        (e.g. fasta, phylip, clustal)
      - seq_count - Optional integer, number of sequences expected in each
        alignment.  Recommended for fasta format files.

    If count is omitted (default) then all the sequences in the file are
    combined into a single MultipleSeqAlignment.
    """
    from anarci.Bio import SeqIO

    if seq_count:
        # Use the count to split the records into batches.
        seq_record_iterator = SeqIO.parse(handle, format, alphabet)

        records = []
        for record in seq_record_iterator:
            records.append(record)
            if len(records) == seq_count:
                yield MultipleSeqAlignment(records, alphabet)
                records = []
        if records:
            raise ValueError("Check seq_count argument, not enough sequences?")
    else:
        # Must assume that there is a single alignment using all
        # the SeqRecord objects:
        records = list(SeqIO.parse(handle, format, alphabet))
        if records:
            yield MultipleSeqAlignment(records, alphabet)
Exemple #5
0
    def __format__(self, format_spec):
        """Returns the record as a string in the specified file format.

        This method supports the python format() function added in
        Python 2.6/3.0.  The format_spec should be a lower case string
        supported by Bio.SeqIO as an output file format. See also the
        SeqRecord's format() method.

        Under Python 3 please note that for binary formats a bytes
        string is returned, otherwise a (unicode) string is returned.
        """
        if not format_spec:
            # Follow python convention and default to using __str__
            return str(self)
        from anarci.Bio import SeqIO
        if format_spec in SeqIO._BinaryFormats:
            # Return bytes on Python 3
            from io import BytesIO
            handle = BytesIO()
        else:
            from anarci.Bio._py3k import StringIO
            handle = StringIO()
        SeqIO.write(self, handle, format_spec)
        return handle.getvalue()
Exemple #6
0
 def _parse(handle):
     """Dynamically generated parser function (PRIVATE)."""
     try:
         return next(i(handle, alphabet=alphabet))
     except TypeError:
         return next(SeqIO._force_alphabet(i(handle), alphabet))