Example #1
0
 def get_cds(self, seq_dict):
     """
     Return the CDS sequence (as a string) for the transcript
     (based on the exons) using a sequenceDict as the sequence source.
     The returned sequence is in the correct 5'-3' orientation (i.e. it has
     been reverse complemented if necessary).
     """
     sequence = seq_dict[self.chromosome]
     assert self.stop <= len(sequence)
     # make sure this isn't a non-coding gene
     if self.thick_start == self.thick_stop == 0:
         return ""
     s = []
     for e in self.exon_intervals:
         if self.thick_start < e.start and e.stop < self.thick_stop:
             # squarely in the CDS
             s.append(sequence[e.start:e.stop])
         elif e.start <= self.thick_start < e.stop < self.thick_stop:
             # thickStart marks the start of the CDS
             s.append(sequence[self.thick_start:e.stop])
         elif e.start <= self.thick_start and self.thick_stop <= e.stop:
             # thickStart and thickStop mark the whole CDS
             s.append(sequence[self.thick_start: self.thick_stop])
         elif self.thick_start < e.start < self.thick_stop <= e.stop:
             # thickStop marks the end of the CDS
             s.append(sequence[e.start:self.thick_stop])
     if not self.strand:
         cds = reverse_complement("".join(s))
     else:
         cds = "".join(s)
     return cds
Example #2
0
 def get_sequence(self, seq_dict, stranded=True):
     """
     Returns the sequence for this intron in transcript orientation (reverse complement as necessary)
     If strand is False, returns the + strand regardless of transcript orientation.
     """
     if stranded is False or self.strand is True:
         return seq_dict[self.chromosome][self.start:self.stop]
     if self.strand is False:
         return reverse_complement(seq_dict[self.chromosome][self.start:self.stop])
Example #3
0
def bam_to_rec(read):
    """Convert pysam record to fastq
    https://www.biostars.org/p/6970/
    """
    seq = read.seq
    qual = read.qual
    if read.is_reverse:
        seq = reverse_complement(seq)
        qual = qual[::-1]
    return "@{}\n{}\n+\n{}\n".format(read.qname, seq, qual)
Example #4
0
 def get_mrna(self, seq_dict):
     """
     Returns the mRNA sequence for this transcript based on a Fasta object.
     and the start/end positions and the exons. Sequence returned in
     5'-3' transcript orientation.
     """
     sequence = seq_dict[self.chromosome]
     assert self.stop <= len(sequence)
     s = []
     for e in self.exon_intervals:
         s.append(sequence[e.start:e.stop])
     if self.strand is True:
         mrna = "".join(s)
     else:
         mrna = reverse_complement("".join(s))
     return mrna
Example #5
0
 def get_cds(self, seq_dict, in_frame=True):
     """
     Return the CDS sequence (as a string) for the transcript
     (based on the exons) using a sequenceDict as the sequence source.
     The returned sequence is in the correct 5'-3' orientation (i.e. it has
     been reverse complemented if necessary).
     Overrides get_cds in GenePredTranscript to provide frame functionality
     TODO: now loses the store-and-reuse functionality to avoid slicing offset sizes each time.
     """
     sequence = seq_dict[self.chromosome]
     assert self.stop <= len(sequence)
     # make sure this isn't a non-coding gene
     if self.thick_start == self.thick_stop == 0:
         return ""
     s = []
     for e in self.exon_intervals:
         if self.thick_start < e.start and e.stop < self.thick_stop:
             # squarely in the CDS
             s.append(sequence[e.start:e.stop])
         elif e.start <= self.thick_start < e.stop < self.thick_stop:
             # thickStart marks the start of the CDS
             s.append(sequence[self.thick_start:e.stop])
         elif e.start <= self.thick_start and self.thick_stop <= e.stop:
             # thickStart and thickStop mark the whole CDS
             s.append(sequence[self.thick_start: self.thick_stop])
         elif self.thick_start < e.start < self.thick_stop <= e.stop:
             # thickStop marks the end of the CDS
             s.append(sequence[e.start:self.thick_stop])
     if not self.strand:
         cds = reverse_complement("".join(s))
     else:
         cds = "".join(s)
     if in_frame is True:
         offset = find_offset(self.exon_frames, self.strand)
         cds = cds[offset:]
     return cds