def seed_grouping(seq):
    seed_group_list = []
    for x in seq:
        if x == '8mer':
            seed_group_list.append('p1_p8_match')
        elif x == '8mer-1A' or x == '7mer-m8':
            seed_group_list.append('p2_p8_match')
        elif x == '7mer-m1':
            seed_group_list.append('p1_p7_match')
        elif x == '7mer-1A' or x == '6mer-m7':
            seed_group_list.append('p2_p7_match')
        elif x == '6mer-m8':
            seed_group_list.append('p3_p8_match')
        else:
            print ('ERROR: motif_type is wrong...')
    seed_group_list = utils.rm_duplicate_list(seed_group_list)
    return seed_group_list
Beispiel #2
0
def seed_grouping(seq):
    seed_group_list = []
    for x in seq:
        if x == '8mer':
            seed_group_list.append('p1_p8_match')
        elif x == '8mer-1A' or x == '7mer-m8':
            seed_group_list.append('p2_p8_match')
        elif x == '7mer-m1':
            seed_group_list.append('p1_p7_match')
        elif x == '7mer-1A' or x == '6mer-m7':
            seed_group_list.append('p2_p7_match')
        elif x == '6mer-m8':
            seed_group_list.append('p3_p8_match')
        else:
            print('ERROR: motif_type is wrong...')
    seed_group_list = utils.rm_duplicate_list(seed_group_list)
    return seed_group_list
def calc_motif_prob_old_version(seq, two_nt_motif_prob_dict):
    two_nt_motifs = []
    for x in range(len(seq[0])-1):
        seq_list = [seq[y][x:x+2] for y in range(len(seq))]
        seq_list = utils.rm_duplicate_list(seq_list)
        two_nt_motifs.append(seq_list)
    motif_prob = 1.
    #print (two_nt_motifs)
    for x in two_nt_motifs:
        sum_two_nt_motif_prob = 0.
        for each_two_motif in x:
            probability = two_nt_motif_prob_dict[each_two_motif]
            sum_two_nt_motif_prob += probability
            #print (each_two_motif)
            #print (probability)
        motif_prob *= sum_two_nt_motif_prob
        #print ('motif_prob')
        #print (motif_prob)
    #print (motif_prob)
    return motif_prob
Beispiel #4
0
def calc_motif_prob_old_version(seq, two_nt_motif_prob_dict):
    two_nt_motifs = []
    for x in range(len(seq[0]) - 1):
        seq_list = [seq[y][x:x + 2] for y in range(len(seq))]
        seq_list = utils.rm_duplicate_list(seq_list)
        two_nt_motifs.append(seq_list)
    motif_prob = 1.
    #print (two_nt_motifs)
    for x in two_nt_motifs:
        sum_two_nt_motif_prob = 0.
        for each_two_motif in x:
            probability = two_nt_motif_prob_dict[each_two_motif]
            sum_two_nt_motif_prob += probability
            #print (each_two_motif)
            #print (probability)
        motif_prob *= sum_two_nt_motif_prob
        #print ('motif_prob')
        #print (motif_prob)
    #print (motif_prob)
    return motif_prob
def motif_occurrence(mirna_seq, targetrna_seq, tmp_dict):
    two_nt_motif_prob_dict = Markov_Model(targetrna_seq)
    #print (two_nt_motif_prob_dict)

    motif_type_need = []
    motif_type_dict = {}
    motif_type_dict['p1_p8_match'] = []
    motif_type_dict['p2_p8_match'] = []
    motif_type_dict['p1_p7_match'] = []
    motif_type_dict['p2_p7_match'] = []
    motif_type_dict['p3_p8_match'] = []

    for x in list(tmp_dict.keys()):
        id_infor = x
        targetrna_motif = tmp_dict[x][3] #NEED TO CHECK!!
        motif_type = tmp_dict[x][5] #NEED TO CHECK!!
        if motif_type == '8mer': #p1_p8_match, p2_p8_match, p1_p7_match, p2_p7_match, p3_p8_match
            motif_type_need.append('p1_p8_match')
            motif_type_dict['p1_p8_match'].append(utils.reverse_complement(targetrna_motif[0:8]))
            motif_type_dict['p2_p8_match'].append(utils.reverse_complement(targetrna_motif[1:8]))
            motif_type_dict['p1_p7_match'].append(utils.reverse_complement(targetrna_motif[0:7]))
            motif_type_dict['p2_p7_match'].append(utils.reverse_complement(targetrna_motif[1:7]))
            motif_type_dict['p3_p8_match'].append(utils.reverse_complement(targetrna_motif[2:8]))
        elif motif_type == '8mer-1A': #p2_p8_match, p2_p7_match, p3_p8_match
            motif_type_need.append('p2_p8_match')
            motif_type_dict['p2_p8_match'].append(utils.reverse_complement(targetrna_motif[1:8]))
            motif_type_dict['p2_p7_match'].append(utils.reverse_complement(targetrna_motif[1:7]))
            motif_type_dict['p3_p8_match'].append(utils.reverse_complement(targetrna_motif[2:8]))
        elif motif_type == '7mer-m8': #p2_p8_match, p2_p7_match, p3_p8_match
            motif_type_need.append('p2_p8_match')
            motif_type_dict['p2_p8_match'].append(utils.reverse_complement(targetrna_motif[1:8]))
            motif_type_dict['p2_p7_match'].append(utils.reverse_complement(targetrna_motif[1:7]))
            motif_type_dict['p3_p8_match'].append(utils.reverse_complement(targetrna_motif[2:8]))
        elif motif_type == '7mer-m1': #p1_p7_match, p2_p7_match
            motif_type_need.append('p1_p7_match')
            motif_type_dict['p1_p7_match'].append(utils.reverse_complement(targetrna_motif[0:7]))
            motif_type_dict['p2_p7_match'].append(utils.reverse_complement(targetrna_motif[1:7]))
        elif motif_type == '7mer-1A': #p2_p7_match
            motif_type_need.append('p2_p7_match')
            motif_type_dict['p2_p7_match'].append(utils.reverse_complement(targetrna_motif[1:7]))
        elif motif_type == '6mer-m7': #p2_p7_match
            motif_type_need.append('p2_p7_match')
            motif_type_dict['p2_p7_match'].append(utils.reverse_complement(targetrna_motif[1:7]))
        elif motif_type == '6mer-m8': #p3_p8_match
            motif_type_need.append('p3_p8_match')
            motif_type_dict['p3_p8_match'].append(utils.reverse_complement(targetrna_motif[2:8]))
        else:
            print ('ERROR: motif_type is wrong...')
    
    motif_type_need = utils.rm_duplicate_list(motif_type_need)
    #motif_type_dict['p1_p8_match'] = utils.rm_duplicate_list(motif_type_dict['p1_p8_match'])
    #motif_type_dict['p2_p8_match'] = utils.rm_duplicate_list(motif_type_dict['p2_p8_match'])
    #motif_type_dict['p1_p7_match'] = utils.rm_duplicate_list(motif_type_dict['p1_p7_match'])
    #motif_type_dict['p2_p7_match'] = utils.rm_duplicate_list(motif_type_dict['p2_p7_match'])
    #motif_type_dict['p3_p8_match'] = utils.rm_duplicate_list(motif_type_dict['p3_p8_match'])
    #print (motif_type_need)
    #print (motif_type_dict['p1_p8_match'])
    #print (motif_type_dict['p2_p8_match'])
    #print (motif_type_dict['p1_p7_match'])
    #print (motif_type_dict['p2_p7_match'])
    #print (motif_type_dict['p3_p8_match'])

    motif_prob_dict = {}
    motif_prob_dict_each = {}

    for x in motif_type_need: #each_type: p1_p8_match, p2_p8_match, p1_p7_match, p2_p7_match, p3_p8_match
        test_seed = utils.rm_duplicate_list(motif_type_dict[x])
        #print (test_seed) #['UGCUUGAA', 'UACUUGAA', 'UAUUUGAG', 'UAUUUGGA']

        #All_existed_motifs_calc
        pos_motif_number = len(motif_type_dict[x])
        total_motif_number = len(targetrna_seq) - len(test_seed[0]) + 1
        motif_prob, each_motif_prob_dict = calc_motif_prob(test_seed,two_nt_motif_prob_dict) #test
        #print ('Motif_number: ' + str(pos_motif_number))
        #print ('Total_motif: ' + str(total_motif_number))
        #print ('Motif_probability: ' + str(motif_prob))
        motif_binom = cumulative_binomial_distribution(pos_motif_number, total_motif_number, motif_prob)
        #print ('Motif_binom_prob: ' + str(motif_binom))
        motif_prob_dict[x] = [pos_motif_number, total_motif_number, motif_prob, motif_binom] #p1_p8_match => [existed_motifs, total_motifs, motif_prob, binom_prob(e.g. 0.00022)]

        #Each_existed_motif_calc
        existed_motif_dict = Counter(motif_type_dict[x])
        #print (existed_motif_dict)
        for i in existed_motif_dict.keys(): #motif => number
            pos_motif_number_each = existed_motif_dict[i]
            motif_prob_each = each_motif_prob_dict[i] #motif => probability
            motif_binom_each = cumulative_binomial_distribution(pos_motif_number_each, total_motif_number, motif_prob_each)
            #print (i)
            #print ('motif_number: ' + str(pos_motif_number_each))
            #print ('motif_prob_each: ' + str(motif_prob_each))
            #print (motif_binom_each)
            motif_prob_dict_each[i] = [pos_motif_number_each, total_motif_number, motif_prob_each, motif_binom_each]

    #print (motif_prob_dict)
    #print (motif_prob_dict_each)

    for x in list(tmp_dict.keys()):
        id_infor = x
        targetrna_motif = tmp_dict[x][3] #NEED TO CHECK
        motif_type = tmp_dict[x][5] #NEED TO CHECK
        if motif_type == '8mer': #p1-p8 match
            seed_group = 'p1_p8_match'
            all_existed_motif_result = motif_prob_dict[seed_group]
            each_existed_motif_result = motif_prob_dict_each[utils.reverse_complement(targetrna_motif[0:8])]
            tmp_dict[x].extend(each_existed_motif_result)
            tmp_dict[x].extend(all_existed_motif_result)
        elif motif_type == '8mer-1A': #p2_p8_match
            seed_group = 'p2_p8_match'
            all_existed_motif_result = motif_prob_dict[seed_group]
            each_existed_motif_result = motif_prob_dict_each[utils.reverse_complement(targetrna_motif[1:8])]
            tmp_dict[x].extend(each_existed_motif_result)
            tmp_dict[x].extend(all_existed_motif_result)
        elif motif_type == '7mer-m8': #p2_p8_match
            seed_group = 'p2_p8_match'
            all_existed_motif_result = motif_prob_dict[seed_group]
            each_existed_motif_result = motif_prob_dict_each[utils.reverse_complement(targetrna_motif[1:8])]
            tmp_dict[x].extend(each_existed_motif_result)
            tmp_dict[x].extend(all_existed_motif_result)
        elif motif_type == '7mer-m1': #p1_p7_match
            seed_group = 'p1_p7_match'
            all_existed_motif_result = motif_prob_dict[seed_group]
            each_existed_motif_result = motif_prob_dict_each[utils.reverse_complement(targetrna_motif[0:7])]
            tmp_dict[x].extend(each_existed_motif_result)
            tmp_dict[x].extend(all_existed_motif_result)
        elif motif_type == '7mer-1A': #p2_p7_match
            seed_group = 'p2_p7_match'
            all_existed_motif_result = motif_prob_dict[seed_group]
            each_existed_motif_result = motif_prob_dict_each[utils.reverse_complement(targetrna_motif[1:7])]
            tmp_dict[x].extend(each_existed_motif_result)
            tmp_dict[x].extend(all_existed_motif_result)
        elif motif_type == '6mer-m7': #p2_p7_match
            seed_group = 'p2_p7_match'
            all_existed_motif_result = motif_prob_dict[seed_group]
            each_existed_motif_result = motif_prob_dict_each[utils.reverse_complement(targetrna_motif[1:7])]
            tmp_dict[x].extend(each_existed_motif_result)
            tmp_dict[x].extend(all_existed_motif_result)
        elif motif_type == '6mer-m8': #p3_p8_match
            seed_group = 'p3_p8_match'
            all_existed_motif_result = motif_prob_dict[seed_group]
            each_existed_motif_result = motif_prob_dict_each[utils.reverse_complement(targetrna_motif[2:8])]
            tmp_dict[x].extend(each_existed_motif_result)
            tmp_dict[x].extend(all_existed_motif_result)
        else:
            print ('ERROR: motif_type is wrong...')

    return tmp_dict #[each_existed_motif_result], [all_existed_motif_result]

    '''
    motif_type_all = []
    motif_type_dict = {}
    for x in list(tmp_dict.keys()):
        motif_targetrna_revcomp = utils.reverse_complement(str(tmp_dict[x][1]))
        types = tmp_dict[x][3]
        motif_type_all.append(tmp_dict[x][3]) #8-mer, 8mer-1A, 7mer-m8, 7mer-m1, 
        motif_type_dict[] = 
    print (motif_type_all)
    motif_type = utils.rm_duplicate_list(motif_type_all)
    print (motif_type)
    seed_group = seed_grouping(motif_type)
    print (seed_group)
    if 'p1_p8_match' in seed_group: #8mer ||||||||
        pass
    elif 'p2_p8_match' in seed_group: #8mer-1A, 7mer-m8, 8mer x|||||||
        pass
    elif 'p1_p7_match, ' in seed_group: #7mer-m1, 8mer |||||||x
        pass
    elif 'p2_p7_match' in seed_group: #7mer-1A, 6mer-m7, 7mer-m1, 8mer-1A, 7mer-m8, 8mer x||||||x
        pass
    elif 'p3_p8_match' in seed_group: #6mer-m8, 8mer-1A, 7mer-m8, 8mer xx||||||
        pass
    else:
        print ('ERROR: seed_group is wrong...')
    '''
    '''
Beispiel #6
0
def motif_occurrence(mirna_seq, targetrna_seq, tmp_dict):
    two_nt_motif_prob_dict = Markov_Model(targetrna_seq)
    #print (two_nt_motif_prob_dict)

    motif_type_need = []
    motif_type_dict = {}
    motif_type_dict['p1_p8_match'] = []
    motif_type_dict['p2_p8_match'] = []
    motif_type_dict['p1_p7_match'] = []
    motif_type_dict['p2_p7_match'] = []
    motif_type_dict['p3_p8_match'] = []

    for x in list(tmp_dict.keys()):
        id_infor = x
        targetrna_motif = tmp_dict[x][3]  #NEED TO CHECK!!
        motif_type = tmp_dict[x][5]  #NEED TO CHECK!!
        if motif_type == '8mer':  #p1_p8_match, p2_p8_match, p1_p7_match, p2_p7_match, p3_p8_match
            motif_type_need.append('p1_p8_match')
            motif_type_dict['p1_p8_match'].append(
                utils.reverse_complement(targetrna_motif[0:8]))
            motif_type_dict['p2_p8_match'].append(
                utils.reverse_complement(targetrna_motif[1:8]))
            motif_type_dict['p1_p7_match'].append(
                utils.reverse_complement(targetrna_motif[0:7]))
            motif_type_dict['p2_p7_match'].append(
                utils.reverse_complement(targetrna_motif[1:7]))
            motif_type_dict['p3_p8_match'].append(
                utils.reverse_complement(targetrna_motif[2:8]))
        elif motif_type == '8mer-1A':  #p2_p8_match, p2_p7_match, p3_p8_match
            motif_type_need.append('p2_p8_match')
            motif_type_dict['p2_p8_match'].append(
                utils.reverse_complement(targetrna_motif[1:8]))
            motif_type_dict['p2_p7_match'].append(
                utils.reverse_complement(targetrna_motif[1:7]))
            motif_type_dict['p3_p8_match'].append(
                utils.reverse_complement(targetrna_motif[2:8]))
        elif motif_type == '7mer-m8':  #p2_p8_match, p2_p7_match, p3_p8_match
            motif_type_need.append('p2_p8_match')
            motif_type_dict['p2_p8_match'].append(
                utils.reverse_complement(targetrna_motif[1:8]))
            motif_type_dict['p2_p7_match'].append(
                utils.reverse_complement(targetrna_motif[1:7]))
            motif_type_dict['p3_p8_match'].append(
                utils.reverse_complement(targetrna_motif[2:8]))
        elif motif_type == '7mer-m1':  #p1_p7_match, p2_p7_match
            motif_type_need.append('p1_p7_match')
            motif_type_dict['p1_p7_match'].append(
                utils.reverse_complement(targetrna_motif[0:7]))
            motif_type_dict['p2_p7_match'].append(
                utils.reverse_complement(targetrna_motif[1:7]))
        elif motif_type == '7mer-1A':  #p2_p7_match
            motif_type_need.append('p2_p7_match')
            motif_type_dict['p2_p7_match'].append(
                utils.reverse_complement(targetrna_motif[1:7]))
        elif motif_type == '6mer-m7':  #p2_p7_match
            motif_type_need.append('p2_p7_match')
            motif_type_dict['p2_p7_match'].append(
                utils.reverse_complement(targetrna_motif[1:7]))
        elif motif_type == '6mer-m8':  #p3_p8_match
            motif_type_need.append('p3_p8_match')
            motif_type_dict['p3_p8_match'].append(
                utils.reverse_complement(targetrna_motif[2:8]))
        else:
            print('ERROR: motif_type is wrong...')

    motif_type_need = utils.rm_duplicate_list(motif_type_need)
    #motif_type_dict['p1_p8_match'] = utils.rm_duplicate_list(motif_type_dict['p1_p8_match'])
    #motif_type_dict['p2_p8_match'] = utils.rm_duplicate_list(motif_type_dict['p2_p8_match'])
    #motif_type_dict['p1_p7_match'] = utils.rm_duplicate_list(motif_type_dict['p1_p7_match'])
    #motif_type_dict['p2_p7_match'] = utils.rm_duplicate_list(motif_type_dict['p2_p7_match'])
    #motif_type_dict['p3_p8_match'] = utils.rm_duplicate_list(motif_type_dict['p3_p8_match'])
    #print (motif_type_need)
    #print (motif_type_dict['p1_p8_match'])
    #print (motif_type_dict['p2_p8_match'])
    #print (motif_type_dict['p1_p7_match'])
    #print (motif_type_dict['p2_p7_match'])
    #print (motif_type_dict['p3_p8_match'])

    motif_prob_dict = {}
    motif_prob_dict_each = {}

    for x in motif_type_need:  #each_type: p1_p8_match, p2_p8_match, p1_p7_match, p2_p7_match, p3_p8_match
        test_seed = utils.rm_duplicate_list(motif_type_dict[x])
        #print (test_seed) #['UGCUUGAA', 'UACUUGAA', 'UAUUUGAG', 'UAUUUGGA']

        #All_existed_motifs_calc
        pos_motif_number = len(motif_type_dict[x])
        total_motif_number = len(targetrna_seq) - len(test_seed[0]) + 1
        motif_prob, each_motif_prob_dict = calc_motif_prob(
            test_seed, two_nt_motif_prob_dict)  #test
        #print ('Motif_number: ' + str(pos_motif_number))
        #print ('Total_motif: ' + str(total_motif_number))
        #print ('Motif_probability: ' + str(motif_prob))
        motif_binom = cumulative_binomial_distribution(pos_motif_number,
                                                       total_motif_number,
                                                       motif_prob)
        #print ('Motif_binom_prob: ' + str(motif_binom))
        motif_prob_dict[x] = [
            pos_motif_number, total_motif_number, motif_prob, motif_binom
        ]  #p1_p8_match => [existed_motifs, total_motifs, motif_prob, binom_prob(e.g. 0.00022)]

        #Each_existed_motif_calc
        existed_motif_dict = Counter(motif_type_dict[x])
        #print (existed_motif_dict)
        for i in existed_motif_dict.keys():  #motif => number
            pos_motif_number_each = existed_motif_dict[i]
            motif_prob_each = each_motif_prob_dict[i]  #motif => probability
            motif_binom_each = cumulative_binomial_distribution(
                pos_motif_number_each, total_motif_number, motif_prob_each)
            #print (i)
            #print ('motif_number: ' + str(pos_motif_number_each))
            #print ('motif_prob_each: ' + str(motif_prob_each))
            #print (motif_binom_each)
            motif_prob_dict_each[i] = [
                pos_motif_number_each, total_motif_number, motif_prob_each,
                motif_binom_each
            ]

    #print (motif_prob_dict)
    #print (motif_prob_dict_each)

    for x in list(tmp_dict.keys()):
        id_infor = x
        targetrna_motif = tmp_dict[x][3]  #NEED TO CHECK
        motif_type = tmp_dict[x][5]  #NEED TO CHECK
        if motif_type == '8mer':  #p1-p8 match
            seed_group = 'p1_p8_match'
            all_existed_motif_result = motif_prob_dict[seed_group]
            each_existed_motif_result = motif_prob_dict_each[
                utils.reverse_complement(targetrna_motif[0:8])]
            tmp_dict[x].extend(each_existed_motif_result)
            tmp_dict[x].extend(all_existed_motif_result)
        elif motif_type == '8mer-1A':  #p2_p8_match
            seed_group = 'p2_p8_match'
            all_existed_motif_result = motif_prob_dict[seed_group]
            each_existed_motif_result = motif_prob_dict_each[
                utils.reverse_complement(targetrna_motif[1:8])]
            tmp_dict[x].extend(each_existed_motif_result)
            tmp_dict[x].extend(all_existed_motif_result)
        elif motif_type == '7mer-m8':  #p2_p8_match
            seed_group = 'p2_p8_match'
            all_existed_motif_result = motif_prob_dict[seed_group]
            each_existed_motif_result = motif_prob_dict_each[
                utils.reverse_complement(targetrna_motif[1:8])]
            tmp_dict[x].extend(each_existed_motif_result)
            tmp_dict[x].extend(all_existed_motif_result)
        elif motif_type == '7mer-m1':  #p1_p7_match
            seed_group = 'p1_p7_match'
            all_existed_motif_result = motif_prob_dict[seed_group]
            each_existed_motif_result = motif_prob_dict_each[
                utils.reverse_complement(targetrna_motif[0:7])]
            tmp_dict[x].extend(each_existed_motif_result)
            tmp_dict[x].extend(all_existed_motif_result)
        elif motif_type == '7mer-1A':  #p2_p7_match
            seed_group = 'p2_p7_match'
            all_existed_motif_result = motif_prob_dict[seed_group]
            each_existed_motif_result = motif_prob_dict_each[
                utils.reverse_complement(targetrna_motif[1:7])]
            tmp_dict[x].extend(each_existed_motif_result)
            tmp_dict[x].extend(all_existed_motif_result)
        elif motif_type == '6mer-m7':  #p2_p7_match
            seed_group = 'p2_p7_match'
            all_existed_motif_result = motif_prob_dict[seed_group]
            each_existed_motif_result = motif_prob_dict_each[
                utils.reverse_complement(targetrna_motif[1:7])]
            tmp_dict[x].extend(each_existed_motif_result)
            tmp_dict[x].extend(all_existed_motif_result)
        elif motif_type == '6mer-m8':  #p3_p8_match
            seed_group = 'p3_p8_match'
            all_existed_motif_result = motif_prob_dict[seed_group]
            each_existed_motif_result = motif_prob_dict_each[
                utils.reverse_complement(targetrna_motif[2:8])]
            tmp_dict[x].extend(each_existed_motif_result)
            tmp_dict[x].extend(all_existed_motif_result)
        else:
            print('ERROR: motif_type is wrong...')

    return tmp_dict  #[each_existed_motif_result], [all_existed_motif_result]
    '''
    motif_type_all = []
    motif_type_dict = {}
    for x in list(tmp_dict.keys()):
        motif_targetrna_revcomp = utils.reverse_complement(str(tmp_dict[x][1]))
        types = tmp_dict[x][3]
        motif_type_all.append(tmp_dict[x][3]) #8-mer, 8mer-1A, 7mer-m8, 7mer-m1, 
        motif_type_dict[] = 
    print (motif_type_all)
    motif_type = utils.rm_duplicate_list(motif_type_all)
    print (motif_type)
    seed_group = seed_grouping(motif_type)
    print (seed_group)
    if 'p1_p8_match' in seed_group: #8mer ||||||||
        pass
    elif 'p2_p8_match' in seed_group: #8mer-1A, 7mer-m8, 8mer x|||||||
        pass
    elif 'p1_p7_match, ' in seed_group: #7mer-m1, 8mer |||||||x
        pass
    elif 'p2_p7_match' in seed_group: #7mer-1A, 6mer-m7, 7mer-m1, 8mer-1A, 7mer-m8, 8mer x||||||x
        pass
    elif 'p3_p8_match' in seed_group: #6mer-m8, 8mer-1A, 7mer-m8, 8mer xx||||||
        pass
    else:
        print ('ERROR: seed_group is wrong...')
    '''
    '''