Esempio n. 1
0
def seqio(in_fhands, out_fhands, out_format, copy_if_same_format=True):
    'It converts sequence files between formats'

    in_formats = [guess_format(fhand) for fhand in in_fhands]

    if (len(in_formats) == 1 and in_formats[0] == out_format and
        hasattr(in_fhands[0], 'name')):
        if copy_if_same_format:
            copyfileobj(in_fhands[0], out_fhands[0])
        else:
            rel_symlink(in_fhands[0].name, out_fhands[0].name)

    elif len(in_fhands) == 1 and len(out_fhands) == 1:
        try:
            SeqIO.convert(in_fhands[0], in_formats[0], out_fhands[0],
                          out_format)
        except ValueError as error:
            if error_quality_disagree(error):
                raise MalformedFile(str(error))
            raise
    elif (len(in_fhands) == 1 and len(out_fhands) == 2 and
          out_format == 'fasta'):
        try:
            for seq in read_seqrecords([in_fhands[0]]):
                SeqIO.write([seq], out_fhands[0], out_format)
                SeqIO.write([seq], out_fhands[1], 'qual')
        except ValueError, error:
            if error_quality_disagree(error):
                raise MalformedFile(str(error))
            raise
Esempio n. 2
0
def fastaqual_to_fasta(seq_fhand, qual_fhand, out_fhand):
    'It converts a fasta and a qual file into a fastq format file'
    seqrecords = PairedFastaQualIterator(seq_fhand, qual_fhand)
    try:
        write_seqrecs(seqrecords, out_fhand.name, 'fastq')
    except ValueError, error:
        if error_quality_disagree(error):
            raise MalformedFile(str(error))
        raise
Esempio n. 3
0
def fastaqual_to_fasta(seq_fhand, qual_fhand, out_fhand):
    'It converts a fasta and a qual file into a fastq format file'
    seqrecords = PairedFastaQualIterator(seq_fhand, qual_fhand)
    try:
        write_seqrecs(seqrecords, out_fhand.name, 'fastq')
    except ValueError, error:
        if error_quality_disagree(error):
            raise MalformedFile(str(error))
        raise
Esempio n. 4
0
 def next(self):
     'It returns a packet'
     # We are returning lists and not generators (as before) because
     # otherwise we would not be able to use multiproces.Pool
     packet = []
     count = 0
     size = self._packet_size
     try:
         for item in self._iterable:
             packet.append(item)
             count += 1
             if count >= size:
                 break
     except ValueError as error:
         if error_quality_disagree(error):
             raise MalformedFile(str(error))
         raise
     if not packet:
         raise StopIteration
     return packet
Esempio n. 5
0
def _read_seqitems(fhands):
    'it returns an iterator of seq items (tuples of name and chunk)'
    seq_iters = []
    for fhand in fhands:
        file_format = get_format(fhand)

        if file_format == 'fasta':
            seq_iter = _itemize_fasta(fhand)
        elif 'multiline' not in file_format and 'fastq' in file_format:
            try:
                seq_iter = _itemize_fastq(fhand)
            except ValueError as error:
                if error_quality_disagree(error):
                    raise MalformedFile(str(error))
                raise
        else:
            msg = 'Format not supported by the itemizers: ' + file_format
            raise NotImplementedError(msg)
        seq_iter = assing_kind_to_seqs(SEQITEM, seq_iter, file_format)
        seq_iters.append(seq_iter)
    return chain.from_iterable(seq_iters)
Esempio n. 6
0
def seqio(in_fhands, out_fhand, out_format, copy_if_same_format=True):
    'It converts sequence files between formats'
    if out_format not in get_setting('SUPPORTED_OUTPUT_FORMATS'):
        raise IncompatibleFormatError("This output format is not supported")

    in_formats = [remove_multiline(guess_format(fhand)) for fhand in in_fhands]

    if len(in_fhands) == 1 and in_formats[0] == out_format:
        if copy_if_same_format:
            copyfileobj(in_fhands[0], out_fhand)
        else:
            rel_symlink(in_fhands[0].name, out_fhand.name)
    else:
        seqs = _read_seqrecords(in_fhands)
        try:
            write_seqrecs(seqs, out_fhand, out_format)
        except ValueError, error:
            if error_quality_disagree(error):
                raise MalformedFile(str(error))
            if 'No suitable quality scores' in str(error):
                msg = 'No qualities available to write output file'
                raise IncompatibleFormatError(msg)
            raise
Esempio n. 7
0
def seqio(in_fhands, out_fhand, out_format, copy_if_same_format=True):
    'It converts sequence files between formats'
    if out_format not in get_setting('SUPPORTED_OUTPUT_FORMATS'):
        raise IncompatibleFormatError("This output format is not supported")

    in_formats = [get_format(fhand) for fhand in in_fhands]

    if len(in_fhands) == 1 and in_formats[0] == out_format:
        if copy_if_same_format:
            copyfileobj(in_fhands[0], out_fhand)
        else:
            rel_symlink(in_fhands[0].name, out_fhand.name)
    else:
        seqs = _read_seqrecords(in_fhands)
        try:
            write_seqrecs(seqs, out_fhand, out_format)
        except ValueError, error:
            if error_quality_disagree(error):
                raise MalformedFile(str(error))
            if 'No suitable quality scores' in str(error):
                msg = 'No qualities available to write output file'
                raise IncompatibleFormatError(msg)
            raise
Esempio n. 8
0
def _read_seqitems(fhands, file_format):
    'it returns an iterator of seq items (tuples of name and chunk)'
    seq_iters = []
    for fhand in fhands:
        if file_format == GUESS_FORMAT or file_format is None:
            file_format = guess_format(fhand)
        else:
            file_format = file_format

        if file_format == 'fasta':
            seq_iter = _itemize_fasta(fhand)
        elif 'multiline' not in file_format and 'fastq' in file_format:
            try:
                seq_iter = _itemize_fastq(fhand)
            except ValueError as error:
                if error_quality_disagree(error):
                    raise MalformedFile(str(error))
                raise
        else:
            msg = 'Format not supported by the itemizers: ' + file_format
            raise NotImplementedError(msg)
        seq_iter = assing_kind_to_seqs(SEQITEM, seq_iter, file_format)
        seq_iters.append(seq_iter)
    return chain.from_iterable(seq_iters)
Esempio n. 9
0
        try:
            for seq in read_seqrecords([in_fhands[0]]):
                SeqIO.write([seq], out_fhands[0], out_format)
                SeqIO.write([seq], out_fhands[1], 'qual')
        except ValueError, error:
            if error_quality_disagree(error):
                raise MalformedFile(str(error))
            raise
    elif (len(in_fhands) == 2 and len(out_fhands) == 1 and
          in_formats == ['fasta', 'qual']):
        seq_records = SeqIO.QualityIO.PairedFastaQualIterator(in_fhands[0],
                                                              in_fhands[1])
        try:
            SeqIO.write(seq_records, out_fhands[0].name, out_format)
        except ValueError, error:
            if error_quality_disagree(error):
                raise MalformedFile(str(error))
            raise
    elif (len(in_fhands) == 2 and len(out_fhands) == 2 and
          in_formats == ['fasta', 'qual'] and out_format == 'fasta'):
        if copy_if_same_format:
            copyfileobj(in_fhands[0], out_fhands[0])
            copyfileobj(in_fhands[1], out_fhands[1])
        else:
            rel_symlink(in_fhands[0].name, out_fhands[0].name)
            rel_symlink(in_fhands[1].name, out_fhands[1].name)
    else:
        raise RuntimeError('Please fixme, we should not be here')

    for out_fhand in out_fhands:
        out_fhand.flush()