def run(self): self.getTranscriptDict() self.getAnnotationDict() self.getSeqDict() self.getRefDict() self.getAlignmentDict() detailsDict = defaultdict(list) classifyDict = {} for aId, t in self.transcriptDict.iteritems(): a = self.annotationDict[psl_lib.remove_alignment_number(aId)] aln = self.alignmentDict[aId] if a.getCdsLength() <= 75 or t.getCdsLength() <= 75: continue # TODO: this will miss an inframe stop if it is the last 3 bases that are not the annotated stop. # use the logic from EndStop to flag this codons = list(codonPairIterator(a, t, aln, self.seqDict, self.refDict))[:-1] for i, target_codon, query_codon in codons: if seq_lib.codon_to_amino_acid(target_codon) == "*": if target_codon == query_codon: detailsDict[aId].append(seq_lib.cds_coordinate_to_bed(t, i, i + 3, self.colors["input"], self.column)) else: detailsDict[aId].append(seq_lib.cds_coordinate_to_bed(t, i, i + 3, self.rgb, self.column)) classifyDict[aId] = 1 if aId not in classifyDict: classifyDict[aId] = 0 self.dumpValueDicts(classifyDict, detailsDict)
def run(self): stopCodons = ('TAA', 'TGA', 'TAG') self.getAlignmentDict() self.getTranscriptDict() self.getAnnotationDict() self.getSeqDict() self.getRefDict() detailsDict = {} classifyDict = {} for aId, t in self.transcriptDict.iteritems(): a = self.annotationDict[psl_lib.remove_alignment_number(aId)] aln = self.alignmentDict[aId] if a.getCdsLength() <= 75 or t.getCdsLength() <= 75: continue s = t.getCdsLength() cds_positions = [t.chromosome_coordinate_to_cds(aln.query_coordinate_to_target(a.cds_coordinate_to_transcript(i))) for i in xrange(s - 4, s - 1)] if None in cds_positions or t.get_cds(self.seqDict)[-3:] not in stopCodons: # does this problem exist in the reference? if a.get_cds(self.refDict)[-3:] not in stopCodons: detailsDict[aId] = seq_lib.cds_coordinate_to_bed(t, s - 3, s, self.colors["input"], self.column) else: detailsDict[aId] = seq_lib.cds_coordinate_to_bed(t, s - 3, s, self.rgb, self.column) classifyDict[aId] = 1 else: classifyDict[aId] = 0 self.dumpValueDicts(classifyDict, detailsDict)
def run(self): self.getTranscriptDict() self.getAlignmentDict() self.getAnnotationDict() self.getSeqDict() self.getRefDict() detailsDict = {} classifyDict = {} for aId, t in self.transcriptDict.iteritems(): a = self.annotationDict[psl_lib.remove_alignment_number(aId)] aln = self.alignmentDict[aId] # do not include noncoding transcripts or lift-overs that contain less than 25 codons if a.getCdsLength() <= 75 or t.getCdsLength() <= 75: continue cds_positions = [t.chromosome_coordinate_to_cds(aln.query_coordinate_to_target(a.cds_coordinate_to_transcript(i))) for i in xrange(3)] if None in cds_positions: detailsDict[aId] = seq_lib.cds_coordinate_to_bed(t, 0, 3, self.rgb, self.column) classifyDict[aId] = 1 elif t.get_cds(self.seqDict)[:3] != "ATG": if a.get_cds(self.refDict)[:3] != "ATG": detailsDict[aId] = seq_lib.cds_coordinate_to_bed(t, 0, 3, self.colors["input"], self.column) else: detailsDict[aId] = seq_lib.cds_coordinate_to_bed(t, 0, 3, self.rgb, self.column) classifyDict[aId] = 1 else: classifyDict[aId] = 0 self.dumpValueDicts(classifyDict, detailsDict)
def run(self): self.getTranscriptDict() self.getAnnotationDict() detailsDict = {} classifyDict = {} for aId, t in self.transcriptDict.iteritems(): a = self.annotationDict[psl_lib.remove_alignment_number(aId)] # do not include noncoding transcripts or lift-overs that contain less than 25 codon if a.getCdsLength() <= 75 or t.getCdsLength() <= 75: continue # is this is a problem in the reference? # remove all -1 frames because those are UTR exons a_frames = [x for x in a.exonFrames if x != -1] if a.strand is True and a_frames[0] != 0 or a.strand is False and a_frames[-1] != 0: classifyDict[aId] = 1 detailsDict[aId] = seq_lib.cds_coordinate_to_bed(t, 0, 3, self.colors["input"], self.column) continue # remove all -1 frames because those are UTR exons t_frames = [x for x in t.exonFrames if x != -1] if t.strand is True and t_frames[0] != 0 or t.strand is False and t_frames[-1] != 0: classifyDict[aId] = 1 detailsDict[aId] = seq_lib.cds_coordinate_to_bed(t, 0, 3, self.rgb, self.column) continue classifyDict[aId] = 0 self.dumpValueDicts(classifyDict, detailsDict)
def run(self): self.getAugustusTranscriptDict() self.getTranscriptDict() classify_dict = {} details_dict = {} for aug_aId, aug_t in self.augustusTranscriptDict.iteritems(): if psl_lib.remove_augustus_alignment_number( aug_aId) not in self.transcriptDict: continue t = self.transcriptDict[psl_lib.remove_augustus_alignment_number( aug_aId)] if aug_t.strand != t.strand or aug_t.chromosome != t.chromosome or t.thickStart == t.thickStop: continue if t.thickStart != aug_t.thickStart or t.thickStop != aug_t.thickStop: classify_dict[aug_aId] = 1 s = aug_t.getCdsLength() if s > 9: details_dict[aug_aId] = [ seq_lib.cds_coordinate_to_bed(aug_t, 0, 3, self.rgb, self.column), seq_lib.cds_coordinate_to_bed(aug_t, s - 3, s, self.rgb, self.column) ] else: details_dict[aug_aId] = seq_lib.cds_coordinate_to_bed( aug_t, 0, s, self.rgb, self.column) else: classify_dict[aug_aId] = 0 self.dumpValueDicts(classify_dict, details_dict)
def run(self): self.getAugustusTranscriptDict() self.getTranscriptDict() classify_dict = {} details_dict = {} for aug_aId, aug_t in self.augustusTranscriptDict.iteritems(): if psl_lib.remove_augustus_alignment_number(aug_aId) not in self.transcriptDict: continue t = self.transcriptDict[psl_lib.remove_augustus_alignment_number(aug_aId)] if aug_t.strand != t.strand or aug_t.chromosome != t.chromosome or t.thickStart == t.thickStop: continue if t.thickStart != aug_t.thickStart or t.thickStop != aug_t.thickStop: classify_dict[aug_aId] = 1 s = aug_t.getCdsLength() if s > 9: details_dict[aug_aId] = [ seq_lib.cds_coordinate_to_bed(aug_t, 0, 3, self.rgb, self.column), seq_lib.cds_coordinate_to_bed(aug_t, s - 3, s, self.rgb, self.column), ] else: details_dict[aug_aId] = seq_lib.cds_coordinate_to_bed(aug_t, 0, s, self.rgb, self.column) else: classify_dict[aug_aId] = 0 self.dumpValueDicts(classify_dict, details_dict)
def run(self, cds=False): self.getTranscriptDict() self.getSeqDict() detailsDict = {} classifyDict = {} r = re.compile("[atgcATGC][N]+[atgcATGC]") for aId, t in self.transcriptDict.iteritems(): if cds is True: s = t.get_cds(self.seqDict) tmp = [seq_lib.cds_coordinate_to_bed(t, m.start() + 1, m.end() - 1, self.rgb, self.column) for m in re.finditer(r, s)] else: s = t.get_mrna(self.seqDict) tmp = [seq_lib.transcript_coordinate_to_bed(t, m.start() + 1, m.end() - 1, self.rgb, self.column) for m in re.finditer(r, s)] if len(tmp) > 0: detailsDict[aId] = tmp classifyDict[aId] = 1 else: classifyDict[aId] = 0 self.dumpValueDicts(classifyDict, detailsDict)
def run(self): self.getTranscriptDict() self.getAnnotationDict() self.getSeqDict() self.getRefDict() self.getAlignmentDict() detailsDict = defaultdict(list) classifyDict = {} for aId, aln in self.alignmentDict.iteritems(): if aId not in self.transcriptDict: continue t = self.transcriptDict[aId] a = self.annotationDict[psl_lib.remove_alignment_number(aId)] if a.getCdsLength() <= 75 or t.getCdsLength() <= 75: continue for i, target_codon, query_codon in codonPairIterator(a, t, aln, self.seqDict, self.refDict): if target_codon != query_codon and seq_lib.codon_to_amino_acid(target_codon) == \ seq_lib.codon_to_amino_acid(query_codon): detailsDict[aId].append(seq_lib.cds_coordinate_to_bed(t, i, i + 3, self.rgb, self.column)) classifyDict[aId] = 1 if aId not in classifyDict: classifyDict[aId] = 0 self.dumpValueDicts(classifyDict, detailsDict)