def detect_rev_seed_match(mirna_id, targetrna_id): ''' X1_seed_match_rev ''' mirna_seq, targetrna_seq = get_sequence(mirna_id, targetrna_id) targetrna_seq_revcomp = utils.reverse_complement(targetrna_seq) tmp_dict = find_mirna_subtarget_candidates(mirna_id,mirna_seq,targetrna_id,targetrna_seq_revcomp) # => list() return run_result(tmp_dict)
def detect_seed_match(mirna_id, targetrna_id): ''' 1_Find_mirna_target_candidates Start find_mirna_target_candidates module ''' #seq_data mirna_seq, targetrna_seq = get_sequence(mirna_id, targetrna_id) targetrna_seq_revcomp = utils.reverse_complement(targetrna_seq) #print (mirna_seq) #print (targetrna_seq) #print (targetrna_seq_revcomp) #parameters - seed mirna_start_pairing = '' seed_length = '' allowed_gu_wobbles = [] allowed_mismatches = [] if hasattr(seed_infor,'MIRNA_START_PAIRING'): mirna_start_pairing = seed_infor.MIRNA_START_PAIRING else: print ('ERROR: MIRNA_START_PAIRING parameter does not exist in module.analysis.mirna_seed.py file') sys.exit(1) if hasattr(seed_infor,'SEED_LENGTH'): seed_length = seed_infor.SEED_LENGTH else: print ('ERROR: SEED_LENGTH parameters do not exist in module.analysis.mirna_seed.py file') sys.exit(1) if hasattr(seed_infor,'ALLOWED_GU_WOBBLES'): allowed_gu_wobbles = seed_infor.ALLOWED_GU_WOBBLES else: print ('ERROR: ALLOWED_GU_WOBBLES parameters do not exist in module.analysis.mirna_seed.py file') sys.exit(1) if hasattr(seed_infor,'ALLOWED_MISMATCHES'): allowed_mismatches = seed_infor.ALLOWED_MISMATCHES else: print ('ERROR: ALLOWED_MISMATCHES parameters do not exist in module.analysis.mirna_seed.py file') sys.exit(1) tmp_dict = {} #run_log("Finding seed matches and calculating motif density in targetRNA sequences...", 0) tmp_dict = find_mirna_target_candidates(mirna_id,mirna_seq,targetrna_id,targetrna_seq_revcomp,mirna_start_pairing,seed_length,allowed_gu_wobbles,allowed_mismatches) # => list() return run_result(tmp_dict)
def motif_occurrence(mirna_seq, targetrna_seq, tmp_dict): two_nt_motif_prob_dict = Markov_Model(targetrna_seq) #print (two_nt_motif_prob_dict) motif_type_need = [] motif_type_dict = {} motif_type_dict['p1_p8_match'] = [] motif_type_dict['p2_p8_match'] = [] motif_type_dict['p1_p7_match'] = [] motif_type_dict['p2_p7_match'] = [] motif_type_dict['p3_p8_match'] = [] for x in list(tmp_dict.keys()): id_infor = x targetrna_motif = tmp_dict[x][3] #NEED TO CHECK!! motif_type = tmp_dict[x][5] #NEED TO CHECK!! if motif_type == '8mer': #p1_p8_match, p2_p8_match, p1_p7_match, p2_p7_match, p3_p8_match motif_type_need.append('p1_p8_match') motif_type_dict['p1_p8_match'].append(utils.reverse_complement(targetrna_motif[0:8])) motif_type_dict['p2_p8_match'].append(utils.reverse_complement(targetrna_motif[1:8])) motif_type_dict['p1_p7_match'].append(utils.reverse_complement(targetrna_motif[0:7])) motif_type_dict['p2_p7_match'].append(utils.reverse_complement(targetrna_motif[1:7])) motif_type_dict['p3_p8_match'].append(utils.reverse_complement(targetrna_motif[2:8])) elif motif_type == '8mer-1A': #p2_p8_match, p2_p7_match, p3_p8_match motif_type_need.append('p2_p8_match') motif_type_dict['p2_p8_match'].append(utils.reverse_complement(targetrna_motif[1:8])) motif_type_dict['p2_p7_match'].append(utils.reverse_complement(targetrna_motif[1:7])) motif_type_dict['p3_p8_match'].append(utils.reverse_complement(targetrna_motif[2:8])) elif motif_type == '7mer-m8': #p2_p8_match, p2_p7_match, p3_p8_match motif_type_need.append('p2_p8_match') motif_type_dict['p2_p8_match'].append(utils.reverse_complement(targetrna_motif[1:8])) motif_type_dict['p2_p7_match'].append(utils.reverse_complement(targetrna_motif[1:7])) motif_type_dict['p3_p8_match'].append(utils.reverse_complement(targetrna_motif[2:8])) elif motif_type == '7mer-m1': #p1_p7_match, p2_p7_match motif_type_need.append('p1_p7_match') motif_type_dict['p1_p7_match'].append(utils.reverse_complement(targetrna_motif[0:7])) motif_type_dict['p2_p7_match'].append(utils.reverse_complement(targetrna_motif[1:7])) elif motif_type == '7mer-1A': #p2_p7_match motif_type_need.append('p2_p7_match') motif_type_dict['p2_p7_match'].append(utils.reverse_complement(targetrna_motif[1:7])) elif motif_type == '6mer-m7': #p2_p7_match motif_type_need.append('p2_p7_match') motif_type_dict['p2_p7_match'].append(utils.reverse_complement(targetrna_motif[1:7])) elif motif_type == '6mer-m8': #p3_p8_match motif_type_need.append('p3_p8_match') motif_type_dict['p3_p8_match'].append(utils.reverse_complement(targetrna_motif[2:8])) else: print ('ERROR: motif_type is wrong...') motif_type_need = utils.rm_duplicate_list(motif_type_need) #motif_type_dict['p1_p8_match'] = utils.rm_duplicate_list(motif_type_dict['p1_p8_match']) #motif_type_dict['p2_p8_match'] = utils.rm_duplicate_list(motif_type_dict['p2_p8_match']) #motif_type_dict['p1_p7_match'] = utils.rm_duplicate_list(motif_type_dict['p1_p7_match']) #motif_type_dict['p2_p7_match'] = utils.rm_duplicate_list(motif_type_dict['p2_p7_match']) #motif_type_dict['p3_p8_match'] = utils.rm_duplicate_list(motif_type_dict['p3_p8_match']) #print (motif_type_need) #print (motif_type_dict['p1_p8_match']) #print (motif_type_dict['p2_p8_match']) #print (motif_type_dict['p1_p7_match']) #print (motif_type_dict['p2_p7_match']) #print (motif_type_dict['p3_p8_match']) motif_prob_dict = {} motif_prob_dict_each = {} for x in motif_type_need: #each_type: p1_p8_match, p2_p8_match, p1_p7_match, p2_p7_match, p3_p8_match test_seed = utils.rm_duplicate_list(motif_type_dict[x]) #print (test_seed) #['UGCUUGAA', 'UACUUGAA', 'UAUUUGAG', 'UAUUUGGA'] #All_existed_motifs_calc pos_motif_number = len(motif_type_dict[x]) total_motif_number = len(targetrna_seq) - len(test_seed[0]) + 1 motif_prob, each_motif_prob_dict = calc_motif_prob(test_seed,two_nt_motif_prob_dict) #test #print ('Motif_number: ' + str(pos_motif_number)) #print ('Total_motif: ' + str(total_motif_number)) #print ('Motif_probability: ' + str(motif_prob)) motif_binom = cumulative_binomial_distribution(pos_motif_number, total_motif_number, motif_prob) #print ('Motif_binom_prob: ' + str(motif_binom)) motif_prob_dict[x] = [pos_motif_number, total_motif_number, motif_prob, motif_binom] #p1_p8_match => [existed_motifs, total_motifs, motif_prob, binom_prob(e.g. 0.00022)] #Each_existed_motif_calc existed_motif_dict = Counter(motif_type_dict[x]) #print (existed_motif_dict) for i in existed_motif_dict.keys(): #motif => number pos_motif_number_each = existed_motif_dict[i] motif_prob_each = each_motif_prob_dict[i] #motif => probability motif_binom_each = cumulative_binomial_distribution(pos_motif_number_each, total_motif_number, motif_prob_each) #print (i) #print ('motif_number: ' + str(pos_motif_number_each)) #print ('motif_prob_each: ' + str(motif_prob_each)) #print (motif_binom_each) motif_prob_dict_each[i] = [pos_motif_number_each, total_motif_number, motif_prob_each, motif_binom_each] #print (motif_prob_dict) #print (motif_prob_dict_each) for x in list(tmp_dict.keys()): id_infor = x targetrna_motif = tmp_dict[x][3] #NEED TO CHECK motif_type = tmp_dict[x][5] #NEED TO CHECK if motif_type == '8mer': #p1-p8 match seed_group = 'p1_p8_match' all_existed_motif_result = motif_prob_dict[seed_group] each_existed_motif_result = motif_prob_dict_each[utils.reverse_complement(targetrna_motif[0:8])] tmp_dict[x].extend(each_existed_motif_result) tmp_dict[x].extend(all_existed_motif_result) elif motif_type == '8mer-1A': #p2_p8_match seed_group = 'p2_p8_match' all_existed_motif_result = motif_prob_dict[seed_group] each_existed_motif_result = motif_prob_dict_each[utils.reverse_complement(targetrna_motif[1:8])] tmp_dict[x].extend(each_existed_motif_result) tmp_dict[x].extend(all_existed_motif_result) elif motif_type == '7mer-m8': #p2_p8_match seed_group = 'p2_p8_match' all_existed_motif_result = motif_prob_dict[seed_group] each_existed_motif_result = motif_prob_dict_each[utils.reverse_complement(targetrna_motif[1:8])] tmp_dict[x].extend(each_existed_motif_result) tmp_dict[x].extend(all_existed_motif_result) elif motif_type == '7mer-m1': #p1_p7_match seed_group = 'p1_p7_match' all_existed_motif_result = motif_prob_dict[seed_group] each_existed_motif_result = motif_prob_dict_each[utils.reverse_complement(targetrna_motif[0:7])] tmp_dict[x].extend(each_existed_motif_result) tmp_dict[x].extend(all_existed_motif_result) elif motif_type == '7mer-1A': #p2_p7_match seed_group = 'p2_p7_match' all_existed_motif_result = motif_prob_dict[seed_group] each_existed_motif_result = motif_prob_dict_each[utils.reverse_complement(targetrna_motif[1:7])] tmp_dict[x].extend(each_existed_motif_result) tmp_dict[x].extend(all_existed_motif_result) elif motif_type == '6mer-m7': #p2_p7_match seed_group = 'p2_p7_match' all_existed_motif_result = motif_prob_dict[seed_group] each_existed_motif_result = motif_prob_dict_each[utils.reverse_complement(targetrna_motif[1:7])] tmp_dict[x].extend(each_existed_motif_result) tmp_dict[x].extend(all_existed_motif_result) elif motif_type == '6mer-m8': #p3_p8_match seed_group = 'p3_p8_match' all_existed_motif_result = motif_prob_dict[seed_group] each_existed_motif_result = motif_prob_dict_each[utils.reverse_complement(targetrna_motif[2:8])] tmp_dict[x].extend(each_existed_motif_result) tmp_dict[x].extend(all_existed_motif_result) else: print ('ERROR: motif_type is wrong...') return tmp_dict #[each_existed_motif_result], [all_existed_motif_result] ''' motif_type_all = [] motif_type_dict = {} for x in list(tmp_dict.keys()): motif_targetrna_revcomp = utils.reverse_complement(str(tmp_dict[x][1])) types = tmp_dict[x][3] motif_type_all.append(tmp_dict[x][3]) #8-mer, 8mer-1A, 7mer-m8, 7mer-m1, motif_type_dict[] = print (motif_type_all) motif_type = utils.rm_duplicate_list(motif_type_all) print (motif_type) seed_group = seed_grouping(motif_type) print (seed_group) if 'p1_p8_match' in seed_group: #8mer |||||||| pass elif 'p2_p8_match' in seed_group: #8mer-1A, 7mer-m8, 8mer x||||||| pass elif 'p1_p7_match, ' in seed_group: #7mer-m1, 8mer |||||||x pass elif 'p2_p7_match' in seed_group: #7mer-1A, 6mer-m7, 7mer-m1, 8mer-1A, 7mer-m8, 8mer x||||||x pass elif 'p3_p8_match' in seed_group: #6mer-m8, 8mer-1A, 7mer-m8, 8mer xx|||||| pass else: print ('ERROR: seed_group is wrong...') ''' '''
def motif_occurrence(mirna_seq, targetrna_seq, tmp_dict): two_nt_motif_prob_dict = Markov_Model(targetrna_seq) #print (two_nt_motif_prob_dict) motif_type_need = [] motif_type_dict = {} motif_type_dict['p1_p8_match'] = [] motif_type_dict['p2_p8_match'] = [] motif_type_dict['p1_p7_match'] = [] motif_type_dict['p2_p7_match'] = [] motif_type_dict['p3_p8_match'] = [] for x in list(tmp_dict.keys()): id_infor = x targetrna_motif = tmp_dict[x][3] #NEED TO CHECK!! motif_type = tmp_dict[x][5] #NEED TO CHECK!! if motif_type == '8mer': #p1_p8_match, p2_p8_match, p1_p7_match, p2_p7_match, p3_p8_match motif_type_need.append('p1_p8_match') motif_type_dict['p1_p8_match'].append( utils.reverse_complement(targetrna_motif[0:8])) motif_type_dict['p2_p8_match'].append( utils.reverse_complement(targetrna_motif[1:8])) motif_type_dict['p1_p7_match'].append( utils.reverse_complement(targetrna_motif[0:7])) motif_type_dict['p2_p7_match'].append( utils.reverse_complement(targetrna_motif[1:7])) motif_type_dict['p3_p8_match'].append( utils.reverse_complement(targetrna_motif[2:8])) elif motif_type == '8mer-1A': #p2_p8_match, p2_p7_match, p3_p8_match motif_type_need.append('p2_p8_match') motif_type_dict['p2_p8_match'].append( utils.reverse_complement(targetrna_motif[1:8])) motif_type_dict['p2_p7_match'].append( utils.reverse_complement(targetrna_motif[1:7])) motif_type_dict['p3_p8_match'].append( utils.reverse_complement(targetrna_motif[2:8])) elif motif_type == '7mer-m8': #p2_p8_match, p2_p7_match, p3_p8_match motif_type_need.append('p2_p8_match') motif_type_dict['p2_p8_match'].append( utils.reverse_complement(targetrna_motif[1:8])) motif_type_dict['p2_p7_match'].append( utils.reverse_complement(targetrna_motif[1:7])) motif_type_dict['p3_p8_match'].append( utils.reverse_complement(targetrna_motif[2:8])) elif motif_type == '7mer-m1': #p1_p7_match, p2_p7_match motif_type_need.append('p1_p7_match') motif_type_dict['p1_p7_match'].append( utils.reverse_complement(targetrna_motif[0:7])) motif_type_dict['p2_p7_match'].append( utils.reverse_complement(targetrna_motif[1:7])) elif motif_type == '7mer-1A': #p2_p7_match motif_type_need.append('p2_p7_match') motif_type_dict['p2_p7_match'].append( utils.reverse_complement(targetrna_motif[1:7])) elif motif_type == '6mer-m7': #p2_p7_match motif_type_need.append('p2_p7_match') motif_type_dict['p2_p7_match'].append( utils.reverse_complement(targetrna_motif[1:7])) elif motif_type == '6mer-m8': #p3_p8_match motif_type_need.append('p3_p8_match') motif_type_dict['p3_p8_match'].append( utils.reverse_complement(targetrna_motif[2:8])) else: print('ERROR: motif_type is wrong...') motif_type_need = utils.rm_duplicate_list(motif_type_need) #motif_type_dict['p1_p8_match'] = utils.rm_duplicate_list(motif_type_dict['p1_p8_match']) #motif_type_dict['p2_p8_match'] = utils.rm_duplicate_list(motif_type_dict['p2_p8_match']) #motif_type_dict['p1_p7_match'] = utils.rm_duplicate_list(motif_type_dict['p1_p7_match']) #motif_type_dict['p2_p7_match'] = utils.rm_duplicate_list(motif_type_dict['p2_p7_match']) #motif_type_dict['p3_p8_match'] = utils.rm_duplicate_list(motif_type_dict['p3_p8_match']) #print (motif_type_need) #print (motif_type_dict['p1_p8_match']) #print (motif_type_dict['p2_p8_match']) #print (motif_type_dict['p1_p7_match']) #print (motif_type_dict['p2_p7_match']) #print (motif_type_dict['p3_p8_match']) motif_prob_dict = {} motif_prob_dict_each = {} for x in motif_type_need: #each_type: p1_p8_match, p2_p8_match, p1_p7_match, p2_p7_match, p3_p8_match test_seed = utils.rm_duplicate_list(motif_type_dict[x]) #print (test_seed) #['UGCUUGAA', 'UACUUGAA', 'UAUUUGAG', 'UAUUUGGA'] #All_existed_motifs_calc pos_motif_number = len(motif_type_dict[x]) total_motif_number = len(targetrna_seq) - len(test_seed[0]) + 1 motif_prob, each_motif_prob_dict = calc_motif_prob( test_seed, two_nt_motif_prob_dict) #test #print ('Motif_number: ' + str(pos_motif_number)) #print ('Total_motif: ' + str(total_motif_number)) #print ('Motif_probability: ' + str(motif_prob)) motif_binom = cumulative_binomial_distribution(pos_motif_number, total_motif_number, motif_prob) #print ('Motif_binom_prob: ' + str(motif_binom)) motif_prob_dict[x] = [ pos_motif_number, total_motif_number, motif_prob, motif_binom ] #p1_p8_match => [existed_motifs, total_motifs, motif_prob, binom_prob(e.g. 0.00022)] #Each_existed_motif_calc existed_motif_dict = Counter(motif_type_dict[x]) #print (existed_motif_dict) for i in existed_motif_dict.keys(): #motif => number pos_motif_number_each = existed_motif_dict[i] motif_prob_each = each_motif_prob_dict[i] #motif => probability motif_binom_each = cumulative_binomial_distribution( pos_motif_number_each, total_motif_number, motif_prob_each) #print (i) #print ('motif_number: ' + str(pos_motif_number_each)) #print ('motif_prob_each: ' + str(motif_prob_each)) #print (motif_binom_each) motif_prob_dict_each[i] = [ pos_motif_number_each, total_motif_number, motif_prob_each, motif_binom_each ] #print (motif_prob_dict) #print (motif_prob_dict_each) for x in list(tmp_dict.keys()): id_infor = x targetrna_motif = tmp_dict[x][3] #NEED TO CHECK motif_type = tmp_dict[x][5] #NEED TO CHECK if motif_type == '8mer': #p1-p8 match seed_group = 'p1_p8_match' all_existed_motif_result = motif_prob_dict[seed_group] each_existed_motif_result = motif_prob_dict_each[ utils.reverse_complement(targetrna_motif[0:8])] tmp_dict[x].extend(each_existed_motif_result) tmp_dict[x].extend(all_existed_motif_result) elif motif_type == '8mer-1A': #p2_p8_match seed_group = 'p2_p8_match' all_existed_motif_result = motif_prob_dict[seed_group] each_existed_motif_result = motif_prob_dict_each[ utils.reverse_complement(targetrna_motif[1:8])] tmp_dict[x].extend(each_existed_motif_result) tmp_dict[x].extend(all_existed_motif_result) elif motif_type == '7mer-m8': #p2_p8_match seed_group = 'p2_p8_match' all_existed_motif_result = motif_prob_dict[seed_group] each_existed_motif_result = motif_prob_dict_each[ utils.reverse_complement(targetrna_motif[1:8])] tmp_dict[x].extend(each_existed_motif_result) tmp_dict[x].extend(all_existed_motif_result) elif motif_type == '7mer-m1': #p1_p7_match seed_group = 'p1_p7_match' all_existed_motif_result = motif_prob_dict[seed_group] each_existed_motif_result = motif_prob_dict_each[ utils.reverse_complement(targetrna_motif[0:7])] tmp_dict[x].extend(each_existed_motif_result) tmp_dict[x].extend(all_existed_motif_result) elif motif_type == '7mer-1A': #p2_p7_match seed_group = 'p2_p7_match' all_existed_motif_result = motif_prob_dict[seed_group] each_existed_motif_result = motif_prob_dict_each[ utils.reverse_complement(targetrna_motif[1:7])] tmp_dict[x].extend(each_existed_motif_result) tmp_dict[x].extend(all_existed_motif_result) elif motif_type == '6mer-m7': #p2_p7_match seed_group = 'p2_p7_match' all_existed_motif_result = motif_prob_dict[seed_group] each_existed_motif_result = motif_prob_dict_each[ utils.reverse_complement(targetrna_motif[1:7])] tmp_dict[x].extend(each_existed_motif_result) tmp_dict[x].extend(all_existed_motif_result) elif motif_type == '6mer-m8': #p3_p8_match seed_group = 'p3_p8_match' all_existed_motif_result = motif_prob_dict[seed_group] each_existed_motif_result = motif_prob_dict_each[ utils.reverse_complement(targetrna_motif[2:8])] tmp_dict[x].extend(each_existed_motif_result) tmp_dict[x].extend(all_existed_motif_result) else: print('ERROR: motif_type is wrong...') return tmp_dict #[each_existed_motif_result], [all_existed_motif_result] ''' motif_type_all = [] motif_type_dict = {} for x in list(tmp_dict.keys()): motif_targetrna_revcomp = utils.reverse_complement(str(tmp_dict[x][1])) types = tmp_dict[x][3] motif_type_all.append(tmp_dict[x][3]) #8-mer, 8mer-1A, 7mer-m8, 7mer-m1, motif_type_dict[] = print (motif_type_all) motif_type = utils.rm_duplicate_list(motif_type_all) print (motif_type) seed_group = seed_grouping(motif_type) print (seed_group) if 'p1_p8_match' in seed_group: #8mer |||||||| pass elif 'p2_p8_match' in seed_group: #8mer-1A, 7mer-m8, 8mer x||||||| pass elif 'p1_p7_match, ' in seed_group: #7mer-m1, 8mer |||||||x pass elif 'p2_p7_match' in seed_group: #7mer-1A, 6mer-m7, 7mer-m1, 8mer-1A, 7mer-m8, 8mer x||||||x pass elif 'p3_p8_match' in seed_group: #6mer-m8, 8mer-1A, 7mer-m8, 8mer xx|||||| pass else: print ('ERROR: seed_group is wrong...') ''' '''