def seqio(in_fhands, out_fhands, out_format, copy_if_same_format=True): 'It converts sequence files between formats' in_formats = [guess_format(fhand) for fhand in in_fhands] if (len(in_formats) == 1 and in_formats[0] == out_format and hasattr(in_fhands[0], 'name')): if copy_if_same_format: copyfileobj(in_fhands[0], out_fhands[0]) else: rel_symlink(in_fhands[0].name, out_fhands[0].name) elif len(in_fhands) == 1 and len(out_fhands) == 1: try: SeqIO.convert(in_fhands[0], in_formats[0], out_fhands[0], out_format) except ValueError as error: if error_quality_disagree(error): raise MalformedFile(str(error)) raise elif (len(in_fhands) == 1 and len(out_fhands) == 2 and out_format == 'fasta'): try: for seq in read_seqrecords([in_fhands[0]]): SeqIO.write([seq], out_fhands[0], out_format) SeqIO.write([seq], out_fhands[1], 'qual') except ValueError, error: if error_quality_disagree(error): raise MalformedFile(str(error)) raise
def fastaqual_to_fasta(seq_fhand, qual_fhand, out_fhand): 'It converts a fasta and a qual file into a fastq format file' seqrecords = PairedFastaQualIterator(seq_fhand, qual_fhand) try: write_seqrecs(seqrecords, out_fhand.name, 'fastq') except ValueError, error: if error_quality_disagree(error): raise MalformedFile(str(error)) raise
def next(self): 'It returns a packet' # We are returning lists and not generators (as before) because # otherwise we would not be able to use multiproces.Pool packet = [] count = 0 size = self._packet_size try: for item in self._iterable: packet.append(item) count += 1 if count >= size: break except ValueError as error: if error_quality_disagree(error): raise MalformedFile(str(error)) raise if not packet: raise StopIteration return packet
def _read_seqitems(fhands): 'it returns an iterator of seq items (tuples of name and chunk)' seq_iters = [] for fhand in fhands: file_format = get_format(fhand) if file_format == 'fasta': seq_iter = _itemize_fasta(fhand) elif 'multiline' not in file_format and 'fastq' in file_format: try: seq_iter = _itemize_fastq(fhand) except ValueError as error: if error_quality_disagree(error): raise MalformedFile(str(error)) raise else: msg = 'Format not supported by the itemizers: ' + file_format raise NotImplementedError(msg) seq_iter = assing_kind_to_seqs(SEQITEM, seq_iter, file_format) seq_iters.append(seq_iter) return chain.from_iterable(seq_iters)
def seqio(in_fhands, out_fhand, out_format, copy_if_same_format=True): 'It converts sequence files between formats' if out_format not in get_setting('SUPPORTED_OUTPUT_FORMATS'): raise IncompatibleFormatError("This output format is not supported") in_formats = [remove_multiline(guess_format(fhand)) for fhand in in_fhands] if len(in_fhands) == 1 and in_formats[0] == out_format: if copy_if_same_format: copyfileobj(in_fhands[0], out_fhand) else: rel_symlink(in_fhands[0].name, out_fhand.name) else: seqs = _read_seqrecords(in_fhands) try: write_seqrecs(seqs, out_fhand, out_format) except ValueError, error: if error_quality_disagree(error): raise MalformedFile(str(error)) if 'No suitable quality scores' in str(error): msg = 'No qualities available to write output file' raise IncompatibleFormatError(msg) raise
def seqio(in_fhands, out_fhand, out_format, copy_if_same_format=True): 'It converts sequence files between formats' if out_format not in get_setting('SUPPORTED_OUTPUT_FORMATS'): raise IncompatibleFormatError("This output format is not supported") in_formats = [get_format(fhand) for fhand in in_fhands] if len(in_fhands) == 1 and in_formats[0] == out_format: if copy_if_same_format: copyfileobj(in_fhands[0], out_fhand) else: rel_symlink(in_fhands[0].name, out_fhand.name) else: seqs = _read_seqrecords(in_fhands) try: write_seqrecs(seqs, out_fhand, out_format) except ValueError, error: if error_quality_disagree(error): raise MalformedFile(str(error)) if 'No suitable quality scores' in str(error): msg = 'No qualities available to write output file' raise IncompatibleFormatError(msg) raise
def _read_seqitems(fhands, file_format): 'it returns an iterator of seq items (tuples of name and chunk)' seq_iters = [] for fhand in fhands: if file_format == GUESS_FORMAT or file_format is None: file_format = guess_format(fhand) else: file_format = file_format if file_format == 'fasta': seq_iter = _itemize_fasta(fhand) elif 'multiline' not in file_format and 'fastq' in file_format: try: seq_iter = _itemize_fastq(fhand) except ValueError as error: if error_quality_disagree(error): raise MalformedFile(str(error)) raise else: msg = 'Format not supported by the itemizers: ' + file_format raise NotImplementedError(msg) seq_iter = assing_kind_to_seqs(SEQITEM, seq_iter, file_format) seq_iters.append(seq_iter) return chain.from_iterable(seq_iters)
try: for seq in read_seqrecords([in_fhands[0]]): SeqIO.write([seq], out_fhands[0], out_format) SeqIO.write([seq], out_fhands[1], 'qual') except ValueError, error: if error_quality_disagree(error): raise MalformedFile(str(error)) raise elif (len(in_fhands) == 2 and len(out_fhands) == 1 and in_formats == ['fasta', 'qual']): seq_records = SeqIO.QualityIO.PairedFastaQualIterator(in_fhands[0], in_fhands[1]) try: SeqIO.write(seq_records, out_fhands[0].name, out_format) except ValueError, error: if error_quality_disagree(error): raise MalformedFile(str(error)) raise elif (len(in_fhands) == 2 and len(out_fhands) == 2 and in_formats == ['fasta', 'qual'] and out_format == 'fasta'): if copy_if_same_format: copyfileobj(in_fhands[0], out_fhands[0]) copyfileobj(in_fhands[1], out_fhands[1]) else: rel_symlink(in_fhands[0].name, out_fhands[0].name) rel_symlink(in_fhands[1].name, out_fhands[1].name) else: raise RuntimeError('Please fixme, we should not be here') for out_fhand in out_fhands: out_fhand.flush()