Ejemplo n.º 1
0
 def run(self):
     for ens_id, a in self.annotation_iterator():
         if comp_ann_lib.short_cds(a) is True and a.cds_size != 0:
             bed_rec = seq_lib.cds_coordinate_to_bed(
                 a, 0, a.cds_size, self.rgb, self.column)
             self.details_dict[ens_id].append(bed_rec)
             self.classify_dict[ens_id] = 1
         else:
             self.classify_dict[ens_id] = 0
     self.dump_results_to_disk()
 def run(self):
     for aug_aln_id, aug_t, t in self.augustus_transcript_transmap_iterator(
     ):
         if t.thick_stop != aug_t.thick_stop:
             s = aug_t.cds_size
             bed_rec = seq_lib.cds_coordinate_to_bed(
                 aug_t, s - 3, s, self.rgb, self.column)
             self.details_dict[aug_aln_id].append(bed_rec)
             self.classify_dict[aug_aln_id] = 1
         else:
             self.classify_dict[aug_aln_id] = 0
     self.dump_results_to_disk()
def BeginStart(transcript_dict, seq_dict):
    classify_dict = {}
    details_dict = {}
    for ens_id, t in transcript_iterator(transcript_dict):
        seq = t.get_cds(seq_dict)
        if len(seq) <= 75:
            continue
        if seq[:3] != "ATG":
            classify_dict[ens_id] = 1
            details_dict[ens_id] = seq_lib.cds_coordinate_to_bed(t, 0, 3, rgb, sys._getframe().f_code.co_name)
        else:
            classify_dict[ens_id] = 0
    return classify_dict, details_dict
def StartOutOfFrame(transcript_dict, seq_dict):
    classify_dict = {}
    details_dict = {}
    for ens_id, t in transcript_iterator(transcript_dict):
        if t.getCdsLength() <= 75:
            continue
        t_frames = [x for x in t.exon_frames if x != -1]
        if t.strand is True and t_frames[0] != 0 or t.strand is False and t_frames[-1] != 0:
            classify_dict[ens_id] = 1
            details_dict[ens_id] = seq_lib.cds_coordinate_to_bed(t, 0, 3, rgb, sys._getframe().f_code.co_name)
            continue
        classify_dict[ens_id] = 0
    return classify_dict, details_dict
Ejemplo n.º 5
0
 def run(self):
     self.get_fasta()
     for ens_id, a in self.annotation_iterator():
         # do not include noncoding transcripts or lift-overs that contain less than short_cds_size
         if comp_ann_lib.short_cds(a):
             self.classify_dict[ens_id] = 0
         elif a.get_cds(self.ref_seq_dict)[:3] != "ATG":
             bed_rec = seq_lib.cds_coordinate_to_bed(
                 a, 0, 3, self.rgb, self.column)
             self.details_dict[ens_id].append(bed_rec)
             self.classify_dict[ens_id] = 1
         else:
             self.classify_dict[ens_id] = 0
     self.dump_results_to_disk()
def InFrameStop(transcript_dict, seq_dict):
    classify_dict = {}
    details_dict = defaultdict(list)
    for ens_id, t in transcript_iterator(transcript_dict):
        cds = t.get_cds(seq_dict)
        offset = seq_lib.find_offset(t.exonFrames, t.strand)
        for i, codon in seq_lib.read_codons_with_position(cds, offset, skip_last=True):
            amino_acid = seq_lib.codon_to_amino_acid(codon)
            if amino_acid == "*":
                classify_dict[ens_id] = 1
                details_dict[ens_id].append(seq_lib.cds_coordinate_to_bed(t, i, i + 3, rgb,
                                                                       sys._getframe().f_code.co_name))
        if ens_id not in classify_dict:
            classify_dict[ens_id] = 0
    return classify_dict, details_dict
def EndStop(transcript_dict, seq_dict):
    stop_codons = ('TAA', 'TGA', 'TAG')
    classify_dict = {}
    details_dict = defaultdict(list)
    for ens_id, t in transcript_iterator(transcript_dict):
        seq = t.get_cds(seq_dict)
        s = len(seq)
        if s <= 75:
            continue
        if seq[-3:] not in stop_codons:
            classify_dict[ens_id] = 1
            details_dict[ens_id] = seq_lib.cds_coordinate_to_bed(t, s - 3, s, rgb,  sys._getframe().f_code.co_name)
        else:
            classify_dict[ens_id] = 0
    return classify_dict, details_dict
Ejemplo n.º 8
0
 def run(self):
     self.get_fasta()
     for ens_id, a in self.annotation_iterator():
         cds = a.get_cds(self.ref_seq_dict)
         offset = seq_lib.find_offset(a.exon_frames, a.strand)
         for i, codon in seq_lib.read_codons_with_position(cds,
                                                           offset,
                                                           skip_last=True):
             amino_acid = seq_lib.codon_to_amino_acid(codon)
             if amino_acid == "*":
                 bed_rec = seq_lib.cds_coordinate_to_bed(
                     a, i, i + 3, self.rgb, self.column)
                 self.details_dict[ens_id].append(bed_rec)
         self.classify_dict[ens_id] = len(self.details_dict[ens_id])
     self.dump_results_to_disk()
Ejemplo n.º 9
0
 def run(self):
     for ens_id, a in self.annotation_iterator():
         # do not include noncoding transcripts or lift-overs that contain less than short_cds_size
         if comp_ann_lib.short_cds(a):
             self.classify_dict[ens_id] = 0
             continue
         # remove all -1 frames because those are UTR exons
         a_frames = [x for x in a.exon_frames if x != -1]
         if a.strand is True and a_frames[
                 0] != 0 or a.strand is False and a_frames[-1] != 0:
             self.classify_dict[ens_id] = 1
             self.details_dict[ens_id].append(
                 seq_lib.cds_coordinate_to_bed(a, 0, 3, self.rgb,
                                               self.column))
         else:
             self.classify_dict[ens_id] = 0
     self.dump_results_to_disk()
def unknown_base(transcript_dict, seq_dict, r, cds):
    classify_dict = {}
    details_dict = {}
    for ens_id, t in transcript_iterator(transcript_dict):
        if cds is True:
            s = t.get_cds(seq_dict)
            tmp = [seq_lib.cds_coordinate_to_bed(t, m.start(), m.end(), rgb, sys._getframe().f_code.co_name) for m in
                   re.finditer(r, s)]
        else:
            s = t.get_mrna(seq_dict)
            tmp = [seq_lib.transcript_coordinate_to_bed(t, m.start(), m.end(), rgb, sys._getframe().f_code.co_name)
                   for m in re.finditer(r, s)]
        if len(tmp) > 0:
            details_dict[ens_id] = tmp
            classify_dict[ens_id] = 1
        else:
            classify_dict[ens_id] = 0
    return classify_dict, details_dict
Ejemplo n.º 11
0
 def run(self, equality_test=lambda target, query: target != query):
     self.get_fasta()
     for aln_id, aln, t, a in self.alignment_transcript_annotation_iterator(
     ):
         # do not include noncoding transcripts or lift-overs that contain less than short_cds_size
         if comp_ann_lib.short_cds(t) or comp_ann_lib.short_cds(a):
             self.classify_dict[aln_id] = 0
             continue
         for i, target_codon, query_codon in comp_ann_lib.codon_pair_iterator(
                 a, t, aln, self.seq_dict, self.ref_seq_dict):
             target_aa = seq_lib.codon_to_amino_acid(target_codon)
             query_aa = seq_lib.codon_to_amino_acid(query_codon)
             if target_codon != query_codon and equality_test(
                     target_aa, query_aa) is True:
                 bed_rec = seq_lib.cds_coordinate_to_bed(
                     t, i, i + 3, self.rgb, self.column)
                 self.details_dict[aln_id].append(bed_rec)
         self.classify_dict[aln_id] = len(self.details_dict[aln_id])
     self.dump_results_to_disk()
Ejemplo n.º 12
0
 def run(self):
     for aln_id, aln, t, a in self.alignment_transcript_annotation_iterator(
     ):
         # do not include noncoding transcripts or lift-overs that contain less than short_cds_size
         if comp_ann_lib.short_cds(t) or comp_ann_lib.short_cds(a):
             self.classify_dict[aln_id] = 0
             continue
         cds_positions = [
             t.chromosome_coordinate_to_cds(
                 aln.query_coordinate_to_target(
                     a.cds_coordinate_to_transcript(i))) for i in xrange(3)
         ]
         if None in cds_positions:
             self.details_dict[aln_id].append(
                 seq_lib.cds_coordinate_to_bed(t, 0, 3, self.rgb,
                                               self.column))
             self.classify_dict[aln_id] = 1
         else:
             self.classify_dict[aln_id] = 0
     self.dump_results_to_disk()