def run(self):
     self.getTranscriptDict()
     self.getAnnotationDict()
     self.getSeqDict()
     self.getRefDict()
     self.getAlignmentDict()
     detailsDict = defaultdict(list)
     classifyDict = {}
     for aId, t in self.transcriptDict.iteritems():
         a = self.annotationDict[psl_lib.remove_alignment_number(aId)]
         aln = self.alignmentDict[aId]
         if a.getCdsLength() <= 75 or t.getCdsLength() <= 75:
             continue
         # TODO: this will miss an inframe stop if it is the last 3 bases that are not the annotated stop.
         # use the logic from EndStop to flag this
         codons = list(codonPairIterator(a, t, aln, self.seqDict, self.refDict))[:-1]
         for i, target_codon, query_codon in codons:
             if seq_lib.codon_to_amino_acid(target_codon) == "*":
                 if target_codon == query_codon:
                     detailsDict[aId].append(seq_lib.cds_coordinate_to_bed(t, i, i + 3, self.colors["input"],
                                                                        self.column))
                 else:
                     detailsDict[aId].append(seq_lib.cds_coordinate_to_bed(t, i, i + 3, self.rgb, self.column))
                 classifyDict[aId] = 1
         if aId not in classifyDict:
             classifyDict[aId] = 0
     self.dumpValueDicts(classifyDict, detailsDict)
 def run(self):
     stopCodons = ('TAA', 'TGA', 'TAG')
     self.getAlignmentDict()
     self.getTranscriptDict()
     self.getAnnotationDict()
     self.getSeqDict()
     self.getRefDict()
     detailsDict = {}
     classifyDict = {}
     for aId, t in self.transcriptDict.iteritems():
         a = self.annotationDict[psl_lib.remove_alignment_number(aId)]
         aln = self.alignmentDict[aId]
         if a.getCdsLength() <= 75 or t.getCdsLength() <= 75:
             continue
         s = t.getCdsLength()
         cds_positions = [t.chromosome_coordinate_to_cds(aln.query_coordinate_to_target(a.cds_coordinate_to_transcript(i)))
                          for i in xrange(s - 4, s - 1)]
         if None in cds_positions or t.get_cds(self.seqDict)[-3:] not in stopCodons:
             # does this problem exist in the reference?
             if a.get_cds(self.refDict)[-3:] not in stopCodons:
                 detailsDict[aId] = seq_lib.cds_coordinate_to_bed(t, s - 3, s, self.colors["input"], self.column)
             else:
                 detailsDict[aId] = seq_lib.cds_coordinate_to_bed(t, s - 3, s, self.rgb, self.column)
             classifyDict[aId] = 1
         else:
             classifyDict[aId] = 0
     self.dumpValueDicts(classifyDict, detailsDict)
 def run(self):
     self.getTranscriptDict()
     self.getAlignmentDict()
     self.getAnnotationDict()
     self.getSeqDict()
     self.getRefDict()
     detailsDict = {}
     classifyDict = {}
     for aId, t in self.transcriptDict.iteritems():
         a = self.annotationDict[psl_lib.remove_alignment_number(aId)]
         aln = self.alignmentDict[aId]
         # do not include noncoding transcripts or lift-overs that contain less than 25 codons
         if a.getCdsLength() <= 75 or t.getCdsLength() <= 75:
             continue
         cds_positions = [t.chromosome_coordinate_to_cds(aln.query_coordinate_to_target(a.cds_coordinate_to_transcript(i)))
                          for i in xrange(3)]
         if None in cds_positions:
             detailsDict[aId] = seq_lib.cds_coordinate_to_bed(t, 0, 3, self.rgb, self.column)
             classifyDict[aId] = 1
         elif t.get_cds(self.seqDict)[:3] != "ATG":
             if a.get_cds(self.refDict)[:3] != "ATG":
                 detailsDict[aId] = seq_lib.cds_coordinate_to_bed(t, 0, 3, self.colors["input"], self.column)
             else:
                 detailsDict[aId] = seq_lib.cds_coordinate_to_bed(t, 0, 3, self.rgb, self.column)
             classifyDict[aId] = 1
         else:
             classifyDict[aId] = 0
     self.dumpValueDicts(classifyDict, detailsDict)
 def run(self):
     self.getTranscriptDict()
     self.getAnnotationDict()
     detailsDict = {}
     classifyDict = {}
     for aId, t in self.transcriptDict.iteritems():
         a = self.annotationDict[psl_lib.remove_alignment_number(aId)]
         # do not include noncoding transcripts or lift-overs that contain less than 25 codon
         if a.getCdsLength() <= 75 or t.getCdsLength() <= 75:
             continue
         # is this is a problem in the reference?
         # remove all -1 frames because those are UTR exons
         a_frames = [x for x in a.exonFrames if x != -1]
         if a.strand is True and a_frames[0] != 0 or a.strand is False and a_frames[-1] != 0:
             classifyDict[aId] = 1
             detailsDict[aId] = seq_lib.cds_coordinate_to_bed(t, 0, 3, self.colors["input"], self.column)
             continue
         # remove all -1 frames because those are UTR exons
         t_frames = [x for x in t.exonFrames if x != -1]
         if t.strand is True and t_frames[0] != 0 or t.strand is False and t_frames[-1] != 0:
             classifyDict[aId] = 1
             detailsDict[aId] = seq_lib.cds_coordinate_to_bed(t, 0, 3, self.rgb, self.column)
             continue
         classifyDict[aId] = 0
     self.dumpValueDicts(classifyDict, detailsDict)
 def run(self):
     self.getAugustusTranscriptDict()
     self.getTranscriptDict()
     classify_dict = {}
     details_dict = {}
     for aug_aId, aug_t in self.augustusTranscriptDict.iteritems():
         if psl_lib.remove_augustus_alignment_number(
                 aug_aId) not in self.transcriptDict:
             continue
         t = self.transcriptDict[psl_lib.remove_augustus_alignment_number(
             aug_aId)]
         if aug_t.strand != t.strand or aug_t.chromosome != t.chromosome or t.thickStart == t.thickStop:
             continue
         if t.thickStart != aug_t.thickStart or t.thickStop != aug_t.thickStop:
             classify_dict[aug_aId] = 1
             s = aug_t.getCdsLength()
             if s > 9:
                 details_dict[aug_aId] = [
                     seq_lib.cds_coordinate_to_bed(aug_t, 0, 3, self.rgb,
                                                   self.column),
                     seq_lib.cds_coordinate_to_bed(aug_t, s - 3, s,
                                                   self.rgb, self.column)
                 ]
             else:
                 details_dict[aug_aId] = seq_lib.cds_coordinate_to_bed(
                     aug_t, 0, s, self.rgb, self.column)
         else:
             classify_dict[aug_aId] = 0
     self.dumpValueDicts(classify_dict, details_dict)
 def run(self):
     self.getAugustusTranscriptDict()
     self.getTranscriptDict()
     classify_dict = {}
     details_dict = {}
     for aug_aId, aug_t in self.augustusTranscriptDict.iteritems():
         if psl_lib.remove_augustus_alignment_number(aug_aId) not in self.transcriptDict:
             continue
         t = self.transcriptDict[psl_lib.remove_augustus_alignment_number(aug_aId)]
         if aug_t.strand != t.strand or aug_t.chromosome != t.chromosome or t.thickStart == t.thickStop:
             continue
         if t.thickStart != aug_t.thickStart or t.thickStop != aug_t.thickStop:
             classify_dict[aug_aId] = 1
             s = aug_t.getCdsLength()
             if s > 9:
                 details_dict[aug_aId] = [
                     seq_lib.cds_coordinate_to_bed(aug_t, 0, 3, self.rgb, self.column),
                     seq_lib.cds_coordinate_to_bed(aug_t, s - 3, s, self.rgb, self.column),
                 ]
             else:
                 details_dict[aug_aId] = seq_lib.cds_coordinate_to_bed(aug_t, 0, s, self.rgb, self.column)
         else:
             classify_dict[aug_aId] = 0
     self.dumpValueDicts(classify_dict, details_dict)
 def run(self, cds=False):
     self.getTranscriptDict()
     self.getSeqDict()
     detailsDict = {}
     classifyDict = {}
     r = re.compile("[atgcATGC][N]+[atgcATGC]")
     for aId, t in self.transcriptDict.iteritems():
         if cds is True:
             s = t.get_cds(self.seqDict)
             tmp = [seq_lib.cds_coordinate_to_bed(t, m.start() + 1, m.end() - 1, self.rgb, self.column) for m in
                    re.finditer(r, s)]
         else:
             s = t.get_mrna(self.seqDict)
             tmp = [seq_lib.transcript_coordinate_to_bed(t, m.start() + 1, m.end() - 1, self.rgb, self.column) for m in
                    re.finditer(r, s)]
         if len(tmp) > 0:
             detailsDict[aId] = tmp
             classifyDict[aId] = 1
         else:
             classifyDict[aId] = 0
     self.dumpValueDicts(classifyDict, detailsDict)
 def run(self):
     self.getTranscriptDict()
     self.getAnnotationDict()
     self.getSeqDict()
     self.getRefDict()
     self.getAlignmentDict()
     detailsDict = defaultdict(list)
     classifyDict = {}
     for aId, aln in self.alignmentDict.iteritems():
         if aId not in self.transcriptDict:
             continue
         t = self.transcriptDict[aId]
         a = self.annotationDict[psl_lib.remove_alignment_number(aId)]
         if a.getCdsLength() <= 75 or t.getCdsLength() <= 75:
             continue
         for i, target_codon, query_codon in codonPairIterator(a, t, aln, self.seqDict, self.refDict):
             if target_codon != query_codon and seq_lib.codon_to_amino_acid(target_codon) == \
                     seq_lib.codon_to_amino_acid(query_codon):
                 detailsDict[aId].append(seq_lib.cds_coordinate_to_bed(t, i, i + 3, self.rgb, self.column))
                 classifyDict[aId] = 1
         if aId not in classifyDict:
             classifyDict[aId] = 0
     self.dumpValueDicts(classifyDict, detailsDict)