def run(self): for ens_id, a in self.annotation_iterator(): if comp_ann_lib.short_cds(a) is True and a.cds_size != 0: bed_rec = seq_lib.cds_coordinate_to_bed( a, 0, a.cds_size, self.rgb, self.column) self.details_dict[ens_id].append(bed_rec) self.classify_dict[ens_id] = 1 else: self.classify_dict[ens_id] = 0 self.dump_results_to_disk()
def run(self): for aug_aln_id, aug_t, t in self.augustus_transcript_transmap_iterator( ): if t.thick_stop != aug_t.thick_stop: s = aug_t.cds_size bed_rec = seq_lib.cds_coordinate_to_bed( aug_t, s - 3, s, self.rgb, self.column) self.details_dict[aug_aln_id].append(bed_rec) self.classify_dict[aug_aln_id] = 1 else: self.classify_dict[aug_aln_id] = 0 self.dump_results_to_disk()
def BeginStart(transcript_dict, seq_dict): classify_dict = {} details_dict = {} for ens_id, t in transcript_iterator(transcript_dict): seq = t.get_cds(seq_dict) if len(seq) <= 75: continue if seq[:3] != "ATG": classify_dict[ens_id] = 1 details_dict[ens_id] = seq_lib.cds_coordinate_to_bed(t, 0, 3, rgb, sys._getframe().f_code.co_name) else: classify_dict[ens_id] = 0 return classify_dict, details_dict
def StartOutOfFrame(transcript_dict, seq_dict): classify_dict = {} details_dict = {} for ens_id, t in transcript_iterator(transcript_dict): if t.getCdsLength() <= 75: continue t_frames = [x for x in t.exon_frames if x != -1] if t.strand is True and t_frames[0] != 0 or t.strand is False and t_frames[-1] != 0: classify_dict[ens_id] = 1 details_dict[ens_id] = seq_lib.cds_coordinate_to_bed(t, 0, 3, rgb, sys._getframe().f_code.co_name) continue classify_dict[ens_id] = 0 return classify_dict, details_dict
def run(self): self.get_fasta() for ens_id, a in self.annotation_iterator(): # do not include noncoding transcripts or lift-overs that contain less than short_cds_size if comp_ann_lib.short_cds(a): self.classify_dict[ens_id] = 0 elif a.get_cds(self.ref_seq_dict)[:3] != "ATG": bed_rec = seq_lib.cds_coordinate_to_bed( a, 0, 3, self.rgb, self.column) self.details_dict[ens_id].append(bed_rec) self.classify_dict[ens_id] = 1 else: self.classify_dict[ens_id] = 0 self.dump_results_to_disk()
def InFrameStop(transcript_dict, seq_dict): classify_dict = {} details_dict = defaultdict(list) for ens_id, t in transcript_iterator(transcript_dict): cds = t.get_cds(seq_dict) offset = seq_lib.find_offset(t.exonFrames, t.strand) for i, codon in seq_lib.read_codons_with_position(cds, offset, skip_last=True): amino_acid = seq_lib.codon_to_amino_acid(codon) if amino_acid == "*": classify_dict[ens_id] = 1 details_dict[ens_id].append(seq_lib.cds_coordinate_to_bed(t, i, i + 3, rgb, sys._getframe().f_code.co_name)) if ens_id not in classify_dict: classify_dict[ens_id] = 0 return classify_dict, details_dict
def EndStop(transcript_dict, seq_dict): stop_codons = ('TAA', 'TGA', 'TAG') classify_dict = {} details_dict = defaultdict(list) for ens_id, t in transcript_iterator(transcript_dict): seq = t.get_cds(seq_dict) s = len(seq) if s <= 75: continue if seq[-3:] not in stop_codons: classify_dict[ens_id] = 1 details_dict[ens_id] = seq_lib.cds_coordinate_to_bed(t, s - 3, s, rgb, sys._getframe().f_code.co_name) else: classify_dict[ens_id] = 0 return classify_dict, details_dict
def run(self): self.get_fasta() for ens_id, a in self.annotation_iterator(): cds = a.get_cds(self.ref_seq_dict) offset = seq_lib.find_offset(a.exon_frames, a.strand) for i, codon in seq_lib.read_codons_with_position(cds, offset, skip_last=True): amino_acid = seq_lib.codon_to_amino_acid(codon) if amino_acid == "*": bed_rec = seq_lib.cds_coordinate_to_bed( a, i, i + 3, self.rgb, self.column) self.details_dict[ens_id].append(bed_rec) self.classify_dict[ens_id] = len(self.details_dict[ens_id]) self.dump_results_to_disk()
def run(self): for ens_id, a in self.annotation_iterator(): # do not include noncoding transcripts or lift-overs that contain less than short_cds_size if comp_ann_lib.short_cds(a): self.classify_dict[ens_id] = 0 continue # remove all -1 frames because those are UTR exons a_frames = [x for x in a.exon_frames if x != -1] if a.strand is True and a_frames[ 0] != 0 or a.strand is False and a_frames[-1] != 0: self.classify_dict[ens_id] = 1 self.details_dict[ens_id].append( seq_lib.cds_coordinate_to_bed(a, 0, 3, self.rgb, self.column)) else: self.classify_dict[ens_id] = 0 self.dump_results_to_disk()
def unknown_base(transcript_dict, seq_dict, r, cds): classify_dict = {} details_dict = {} for ens_id, t in transcript_iterator(transcript_dict): if cds is True: s = t.get_cds(seq_dict) tmp = [seq_lib.cds_coordinate_to_bed(t, m.start(), m.end(), rgb, sys._getframe().f_code.co_name) for m in re.finditer(r, s)] else: s = t.get_mrna(seq_dict) tmp = [seq_lib.transcript_coordinate_to_bed(t, m.start(), m.end(), rgb, sys._getframe().f_code.co_name) for m in re.finditer(r, s)] if len(tmp) > 0: details_dict[ens_id] = tmp classify_dict[ens_id] = 1 else: classify_dict[ens_id] = 0 return classify_dict, details_dict
def run(self, equality_test=lambda target, query: target != query): self.get_fasta() for aln_id, aln, t, a in self.alignment_transcript_annotation_iterator( ): # do not include noncoding transcripts or lift-overs that contain less than short_cds_size if comp_ann_lib.short_cds(t) or comp_ann_lib.short_cds(a): self.classify_dict[aln_id] = 0 continue for i, target_codon, query_codon in comp_ann_lib.codon_pair_iterator( a, t, aln, self.seq_dict, self.ref_seq_dict): target_aa = seq_lib.codon_to_amino_acid(target_codon) query_aa = seq_lib.codon_to_amino_acid(query_codon) if target_codon != query_codon and equality_test( target_aa, query_aa) is True: bed_rec = seq_lib.cds_coordinate_to_bed( t, i, i + 3, self.rgb, self.column) self.details_dict[aln_id].append(bed_rec) self.classify_dict[aln_id] = len(self.details_dict[aln_id]) self.dump_results_to_disk()
def run(self): for aln_id, aln, t, a in self.alignment_transcript_annotation_iterator( ): # do not include noncoding transcripts or lift-overs that contain less than short_cds_size if comp_ann_lib.short_cds(t) or comp_ann_lib.short_cds(a): self.classify_dict[aln_id] = 0 continue cds_positions = [ t.chromosome_coordinate_to_cds( aln.query_coordinate_to_target( a.cds_coordinate_to_transcript(i))) for i in xrange(3) ] if None in cds_positions: self.details_dict[aln_id].append( seq_lib.cds_coordinate_to_bed(t, 0, 3, self.rgb, self.column)) self.classify_dict[aln_id] = 1 else: self.classify_dict[aln_id] = 0 self.dump_results_to_disk()