def run(self, mult3=False): for aln_id, aln, t, a in self.alignment_transcript_annotation_iterator( ): # do not include noncoding transcripts or lift-overs that contain less than short_cds_size if comp_ann_lib.short_cds(t) or comp_ann_lib.short_cds(a): self.classify_dict[aln_id] = 0 continue for start, stop, size in comp_ann_lib.deletion_iterator( t, aln, mult3): if start >= t.thick_start and stop < t.thick_stop: bed_rec = seq_lib.chromosome_region_to_bed( t, start, stop, self.rgb, self.column) self.details_dict[aln_id].append(bed_rec) self.classify_dict[aln_id] = len(self.details_dict[aln_id]) self.dump_results_to_disk()
def run(self, equality_test=lambda target, query: target != query): self.get_fasta() for aln_id, aln, t, a in self.alignment_transcript_annotation_iterator( ): # do not include noncoding transcripts or lift-overs that contain less than short_cds_size if comp_ann_lib.short_cds(t) or comp_ann_lib.short_cds(a): self.classify_dict[aln_id] = 0 continue for i, target_codon, query_codon in comp_ann_lib.codon_pair_iterator( a, t, aln, self.seq_dict, self.ref_seq_dict): target_aa = seq_lib.codon_to_amino_acid(target_codon) query_aa = seq_lib.codon_to_amino_acid(query_codon) if target_codon != query_codon and equality_test( target_aa, query_aa) is True: bed_rec = seq_lib.cds_coordinate_to_bed( t, i, i + 3, self.rgb, self.column) self.details_dict[aln_id].append(bed_rec) self.classify_dict[aln_id] = len(self.details_dict[aln_id]) self.dump_results_to_disk()
def run(self): for aln_id, aln, t, a in self.alignment_transcript_annotation_iterator( ): # do not include noncoding transcripts or lift-overs that contain less than short_cds_size if comp_ann_lib.short_cds(t) or comp_ann_lib.short_cds(a): self.classify_dict[aln_id] = 0 continue frame_shifts = list(comp_ann_lib.frame_shift_iterator(a, t, aln)) if len(frame_shifts) == 0: self.classify_dict[aln_id] = 0 continue windowed_stops, windowed_starts = self.window_starts_stops( t, frame_shifts) for start, stop in itertools.izip(windowed_starts, windowed_stops): bed_rec = seq_lib.chromosome_coordinate_to_bed( t, start, stop, self.rgb, self.column) self.details_dict[aln_id].append(bed_rec) self.classify_dict[aln_id] = len(self.details_dict[aln_id]) self.dump_results_to_disk()
def run(self): for ens_id, a in self.annotation_iterator(): if comp_ann_lib.short_cds(a) is True and a.cds_size != 0: bed_rec = seq_lib.cds_coordinate_to_bed( a, 0, a.cds_size, self.rgb, self.column) self.details_dict[ens_id].append(bed_rec) self.classify_dict[ens_id] = 1 else: self.classify_dict[ens_id] = 0 self.dump_results_to_disk()
def run(self): for aln_id, aln, t, a in self.alignment_transcript_annotation_iterator( ): # do not include noncoding transcripts or lift-overs that contain less than short_cds_size if comp_ann_lib.short_cds(t) or comp_ann_lib.short_cds(a): self.classify_dict[aln_id] = 0 continue cds_positions = [ t.chromosome_coordinate_to_cds( aln.query_coordinate_to_target( a.cds_coordinate_to_transcript(i))) for i in xrange(3) ] if None in cds_positions: self.details_dict[aln_id].append( seq_lib.cds_coordinate_to_bed(t, 0, 3, self.rgb, self.column)) self.classify_dict[aln_id] = 1 else: self.classify_dict[aln_id] = 0 self.dump_results_to_disk()
def run(self): self.get_fasta() for ens_id, a in self.annotation_iterator(): # do not include noncoding transcripts or lift-overs that contain less than short_cds_size if comp_ann_lib.short_cds(a): self.classify_dict[ens_id] = 0 elif a.get_cds(self.ref_seq_dict)[:3] != "ATG": bed_rec = seq_lib.cds_coordinate_to_bed( a, 0, 3, self.rgb, self.column) self.details_dict[ens_id].append(bed_rec) self.classify_dict[ens_id] = 1 else: self.classify_dict[ens_id] = 0 self.dump_results_to_disk()
def run(self): for ens_id, a in self.annotation_iterator(): # do not include noncoding transcripts or lift-overs that contain less than short_cds_size if comp_ann_lib.short_cds(a): self.classify_dict[ens_id] = 0 continue if a.cds_size % 3 != 0: bed_rec = seq_lib.chromosome_coordinate_to_bed( a, a.thick_start, a.thick_stop, self.rgb, self.column) self.details_dict[ens_id].append(bed_rec) self.classify_dict[ens_id] = 1 else: self.classify_dict[ens_id] = 0 self.dump_results_to_disk()
def run(self): for ens_id, a in self.annotation_iterator(): # do not include noncoding transcripts or lift-overs that contain less than short_cds_size if comp_ann_lib.short_cds(a): self.classify_dict[ens_id] = 0 continue # remove all -1 frames because those are UTR exons a_frames = [x for x in a.exon_frames if x != -1] if a.strand is True and a_frames[ 0] != 0 or a.strand is False and a_frames[-1] != 0: self.classify_dict[ens_id] = 1 self.details_dict[ens_id].append( seq_lib.cds_coordinate_to_bed(a, 0, 3, self.rgb, self.column)) else: self.classify_dict[ens_id] = 0 self.dump_results_to_disk()