def UnknownBases(transcript_dict, seq_dict): r = re.compile("[atgcATGC][N]+[atgcATGC]") classify_dict = {} details_dict = {} for ens_id, t in transcript_iterator(transcript_dict): s = t.get_mrna(seq_dict) tmp = [seq_lib.transcript_coordinate_to_bed(t, m.start() + 1, m.end() - 1, rgb, sys._getframe().f_code.co_name) for m in re.finditer(r, s)] if len(tmp) > 0: details_dict[ens_id] = tmp classify_dict[ens_id] = 1 else: classify_dict[ens_id] = 0 return classify_dict, details_dict
def unknown_base(transcript_dict, seq_dict, r, cds): classify_dict = {} details_dict = {} for ens_id, t in transcript_iterator(transcript_dict): if cds is True: s = t.get_cds(seq_dict) tmp = [seq_lib.cds_coordinate_to_bed(t, m.start(), m.end(), rgb, sys._getframe().f_code.co_name) for m in re.finditer(r, s)] else: s = t.get_mrna(seq_dict) tmp = [seq_lib.transcript_coordinate_to_bed(t, m.start(), m.end(), rgb, sys._getframe().f_code.co_name) for m in re.finditer(r, s)] if len(tmp) > 0: details_dict[ens_id] = tmp classify_dict[ens_id] = 1 else: classify_dict[ens_id] = 0 return classify_dict, details_dict