예제 #1
0
def convert(in_file, in_format, out_file, out_format, alphabet=None):
    """Convert between two sequence file formats, return number of records.

     - in_file - an input handle or filename
     - in_format - input file format, lower case string
     - out_file - an output handle or filename
     - out_format - output file format, lower case string
     - alphabet - optional alphabet to assume

    NOTE - If you provide an output filename, it will be opened which will
    overwrite any existing file without warning. This may happen if even
    the conversion is aborted (e.g. an invalid out_format name is given).

    For example, going from a filename to a handle:

    >>> from Bio import SeqIO
    >>> from StringIO import StringIO
    >>> handle = StringIO("")
    >>> SeqIO.convert("Quality/example.fastq", "fastq", handle, "fasta")
    3
    >>> print handle.getvalue()
    >EAS54_6_R1_2_1_413_324
    CCCTTCTTGTCTTCAGCGTTTCTCC
    >EAS54_6_R1_2_1_540_792
    TTGGCAGGCCAAGGCCGATGGATCA
    >EAS54_6_R1_2_1_443_348
    GTTGCTTCTGGCGTGGGTGGGGGGG
    <BLANKLINE>
    """
    if isinstance(in_file, basestring):
        #Hack for SFF, will need to make this more general in future
        if in_format in _BinaryFormats:
            in_handle = open(in_file, "rb")
        else:
            in_handle = open(in_file, "rU")
        in_close = True
    else:
        in_handle = in_file
        in_close = False
    #Don't open the output file until we've checked the input is OK?
    if isinstance(out_file, basestring):
        if out_format in ["sff", "sff_trim"]:
            out_handle = open(out_file, "wb")
        else:
            out_handle = open(out_file, "w")
        out_close = True
    else:
        out_handle = out_file
        out_close = False
    #This will check the arguments and issue error messages,
    #after we have opened the file which is a shame.
    from _convert import _handle_convert  #Lazy import
    count = _handle_convert(in_handle, in_format, out_handle, out_format,
                            alphabet)
    #Must now close any handles we opened
    if in_close:
        in_handle.close()
    if out_close:
        out_handle.close()
    return count
예제 #2
0
def convert(in_file, in_format, out_file, out_format, alphabet=None):
    """Convert between two sequence file formats, return number of records.

     - in_file - an input handle or filename
     - in_format - input file format, lower case string
     - out_file - an output handle or filename
     - out_format - output file format, lower case string
     - alphabet - optional alphabet to assume

    NOTE - If you provide an output filename, it will be opened which will
    overwrite any existing file without warning. This may happen if even
    the conversion is aborted (e.g. an invalid out_format name is given).

    For example, going from a filename to a handle:

    >>> from Bio import SeqIO
    >>> from StringIO import StringIO
    >>> handle = StringIO("")
    >>> SeqIO.convert("Quality/example.fastq", "fastq", handle, "fasta")
    3
    >>> print handle.getvalue()
    >EAS54_6_R1_2_1_413_324
    CCCTTCTTGTCTTCAGCGTTTCTCC
    >EAS54_6_R1_2_1_540_792
    TTGGCAGGCCAAGGCCGATGGATCA
    >EAS54_6_R1_2_1_443_348
    GTTGCTTCTGGCGTGGGTGGGGGGG
    <BLANKLINE>
    """
    if isinstance(in_file, basestring):
        # Hack for SFF, will need to make this more general in future
        if in_format in _BinaryFormats:
            in_handle = open(in_file, "rb")
        else:
            in_handle = open(in_file, "rU")
        in_close = True
    else:
        in_handle = in_file
        in_close = False
    # Don't open the output file until we've checked the input is OK?
    if isinstance(out_file, basestring):
        if out_format in ["sff", "sff_trim"]:
            out_handle = open(out_file, "wb")
        else:
            out_handle = open(out_file, "w")
        out_close = True
    else:
        out_handle = out_file
        out_close = False
    # This will check the arguments and issue error messages,
    # after we have opened the file which is a shame.
    from _convert import _handle_convert  # Lazy import

    count = _handle_convert(in_handle, in_format, out_handle, out_format, alphabet)
    # Must now close any handles we opened
    if in_close:
        in_handle.close()
    if out_close:
        out_handle.close()
    return count