def orientate(self, seq): ret = self.wu.search_text(seq) if ret is not None: if self.conserved_sequences[ret[1]].rc: return reverse_complement(seq) else: return seq else: logging.error('cannot identify any 16S conserved sequences'\ 'in:\n%s\nReturning original sequence', (seq,)) return seq
def _sam_to_fastx2(self, cluster, prefix, fasta=True, qual=True, fastq=False, pairs=False, sep12=False): """Take a pysam AlignedRead and convert it into a fasta, qual or fastq cluster: A Cluster object which contains lists of paired and single reads prefix: file path prefix """ if fasta and fastq: raise RuntimeError("cannot output both fasta and fastq at the same time, choose one or the other") #logging.debug("requrested the following read" # "outputs:\nfasta=%s\nfastq=%s\npairs=%s\nsep12=%s\n", # str(fasta), str(fastq), str(pairs), str(sep12)) #logging.debug("cluster reads: first = %d, second = %d, singles = %d", # len(cluster.first), len(cluster.second), len(cluster.singles)) written_files = {} # for seqs - fasta, fastq fp1 = None fp2 = None fps = None fpp = None # for quals qfp1 = None qfp2 = None qfps = None qfpp = None if len(cluster.first): # we have reads in pairs if fasta: if pairs: if sep12: suffix1 = "_R1.fasta" suffix2 = "_R2.fasta" fp1 = open(prefix+suffix1, 'w') fp2 = open(prefix+suffix2, 'w') written_files['r1'] = suffix1 written_files['r2'] = suffix2 else: suffix12 = "_R12.fasta" fpp = open(prefix+suffix12, 'w') written_files['r12'] = suffix12 else: suffixs = "_s.fasta" fps = open(prefix+suffixs, 'w') written_files['s'] = suffixs if qual: if pairs: if sep12: suffix1 = "_R1.fasta.qual" suffix2 = "_R2.fasta.qual" qfp1 = open(prefix+suffix1, 'w') qfp2 = open(prefix+suffix2, 'w') written_files['q1'] = suffix1 written_files['q2'] = suffix2 else: suffix12 = "_R12.fasta.qual" qfpp = open(prefix+suffix12, 'w') written_files['q12'] = suffix12 else: suffixs = "_s.fasta.qual" qfps = open(prefix+suffixs, 'w') written_files['qs'] = suffixs if fastq: if pairs: if sep12: suffix1 = "_R1.fastq" suffix2 = "_R2.fastq" fp1 = open(prefix+suffix1, 'w') fp2 = open(prefix+suffix2, 'w') written_files['r1'] = suffix1 written_files['r2'] = suffix2 else: suffix12 = "_R12.fastq" fpp = open(prefix+suffix12, 'w') written_files['r12'] = suffix12 else: suffixs = "_s.fastq" fps = open(prefix+suffixs, 'w') written_files['s'] = suffixs for ar1, ar2 in zip(cluster.first, cluster.second): name1 = ar1.qname seq1 = ar1.seq quality1 = ar1.qual name2 = ar2.qname seq2 = ar2.seq quality2 = ar2.qual if ar1.is_reversed(): seq1 = reverse_complement(seq1) quality1 = quality1[::-1] if ar2.is_reversed(): seq2 = reverse_complement(seq2) quality2 = quality2[::-1] if fp1 is not None and fp2 is not None: if fasta: fp1.write(">%s\n%s\n" % (name1, seq1)) fp2.write(">%s\n%s\n" % (name2, seq2)) if fastq: fp1.write('@%s\n%s\n+\n%s\n' % (name1,seq1,quality1)) fp2.write('@%s\n%s\n+\n%s\n' % (name2,seq2,quality2)) if qual: quality = amplishot.parse.fastx.decode_quality(quality1) quality = ' '.join(str(x) for x in quality) qfp1.write('>%s\n%s\n' %(name1, quality)) quality = amplishot.parse.fastx.decode_quality(quality2) quality = ' '.join(str(x) for x in quality) qfp2.write('>%s\n%s\n' %(name2, quality)) elif fpp is not None: if fasta: fpp.write(">%s\n%s\n" % (name1, seq1)) fpp.write(">%s\n%s\n" % (name2, seq2)) if fastq: fpp.write('@%s\n%s\n+\n%s\n' % (name1,seq1,quality1)) fpp.write('@%s\n%s\n+\n%s\n' % (name2,seq2,quality2)) if qual: quality = amplishot.parse.fastx.decode_quality(quality1) quality = ' '.join(str(x) for x in quality) qfpp.write('>%s\n%s\n' %(name1, quality)) quality = amplishot.parse.fastx.decode_quality(quality2) quality = ' '.join(str(x) for x in quality) qfpp.write('>%s\n%s\n' %(name2, quality)) elif fps is not None: if fasta: fps.write(">%s\n%s\n" % (name1, seq1)) fps.write(">%s\n%s\n" % (name2, seq2)) if fastq: fps.write('@%s\n%s\n+\n%s\n' % (name1,seq1,quality1)) fps.write('@%s\n%s\n+\n%s\n' % (name2,seq2,quality2)) if qual: quality = amplishot.parse.fastx.decode_quality(quality1) quality = ' '.join(str(x) for x in quality) qfps.write('>%s\n%s\n' %(name1, quality)) quality = amplishot.parse.fastx.decode_quality(quality2) quality = ' '.join(str(x) for x in quality) qfps.write('>%s\n%s\n' %(name2, quality)) if len(cluster.singles): # only reads in singles if fasta: suffixs = "_s.fasta" if fps is None: fps = open(prefix+suffixs, 'w') written_files['s'] = suffixs if qual: if qfps is None: qfps = open(prefix+"_s.fasta.qual", 'w') written_files['qs'] = "_s.fasta.qual" if fastq: if fps is None: fps = open(prefix+"_s.fastq",'w') written_files['s'] = "_s.fastq" for ar in cluster.singles: name = ar.qname seq = ar.seq quality = ar.qual if ar.is_reversed(): seq = reverse_complement(seq) quality = quality[::-1] if fasta: fps.write(">%s\n%s\n" % (name, seq)) if fastq: fps.write('@%s\n%s\n+\n%s\n' % (name,seq,quality)) if qual: quality = amplishot.parse.fastx.decode_quality(quality) quality = ' '.join(str(x) for x in quality) qfps.write('>%s\n%s\n' %(name, quality)) if fp1: fp1.close() if fp2: fp2.close() if fpp: fpp.close() if fps: fps.close() if qfp1: qfp1.close() if qfp2: qfp2.close() if qfpp: qfpp.close() if qfps: qfps.close() return written_files