def run(self, shortIntronSize=30): self.getTranscriptDict() self.getSeqDict() self.getAlignmentDict() self.getAnnotationDict() self.getRefDict() detailsDict = defaultdict(list) classifyDict = {} for aId, t in self.transcriptDict.iteritems(): a = self.annotationDict[psl_lib.remove_alignment_number(aId)] aln = self.alignmentDict[aId] for intron in t.intronIntervals: if len(intron) <= shortIntronSize: continue elif intron.start >= t.thickStart and intron.stop < t.thickStop: continue seq = intron.get_sequence(self.seqDict, strand=True) donor, acceptor = seq[:2], seq[-2:] if donor not in self.non_canonical or self.non_canonical[donor] != acceptor: classifyDict[aId] = 1 # is this a intron that exists in the reference that also has this problem? if compareIntronToReference(intron, a, aln, self.non_canonical, self.refDict) is True: detailsDict[aId].append(seq_lib.splice_intron_interval_to_bed(t, intron, self.colors["input"], self.column)) else: detailsDict[aId].append(seq_lib.splice_intron_interval_to_bed(t, intron, self.rgb, self.column)) if aId not in classifyDict: classifyDict[aId] = 0 self.dumpValueDicts(classifyDict, detailsDict)
def run(self, shortIntronSize=30): self.getAnnotationDict() self.getTranscriptDict() self.getAlignmentDict() detailsDict = defaultdict(list) classifyDict = {} for aId, aln in self.alignmentDict.iteritems(): if aId not in self.transcriptDict: continue t = self.transcriptDict[aId] a = self.annotationDict[psl_lib.remove_alignment_number(aId)] original_introns = {(x.start, x.stop) for x in a.intronIntervals} target_introns = set() target_intron_mapping = {} for intron in t.intronIntervals: a_start = a.transcript_coordinate_to_chromosome(aln.target_coordinate_to_query(intron.start - 1)) + 1 a_stop = a.transcript_coordinate_to_chromosome(aln.target_coordinate_to_query(intron.stop)) target_introns.add((a_start, a_stop)) target_intron_mapping[(a_start, a_stop)] = intron missing_introns = original_introns - target_introns if len(missing_introns) != 0: classifyDict[aId] = 1 not_original_introns = target_introns - original_introns for a_start, a_stop in not_original_introns: intron = target_intron_mapping[(a_start, a_stop)] if len(intron) >= shortIntronSize: detailsDict[aId].append(seq_lib.splice_intron_interval_to_bed(t, intron, self.rgb, self.column)) else: classifyDict[aId] = 0 self.dumpValueDicts(classifyDict, detailsDict)