Esempio n. 1
0
def mm_XG_iRNA_M6A(sequence, species, modification, M_threshold):
    M_threshold = Threshold.choiceThreshold(species, modification, M_threshold)
    mm_importance_dict_sorted_list_num_M6A = [
        1834, 1500, 1549, 1367, 1310, 1459, 5, 1854, 1877, 58, 1843, 498, 42,
        71, 1389, 700, 1775, 213, 906, 942, 81, 16, 1429, 888, 1373, 1817,
        1501, 218, 124, 1380, 1436, 60, 12, 1536
    ]
    XG_iRNA_mm_M6A = load('model/mm/mm_xgb_M6A_10fold')
    # --------------------------------------------------------------
    line = sequence  # bug
    if re.search(r'[^AUCG]', line.strip()):
        nota = "Please input the RNA sequence 'AUCG'."
        return nota
    length = len(line.strip())
    if length < 41:
        nota = "Please input the RNA sequence at least 51 bases! "
        return nota
    mm_n_base_41_M6A = 19
    mm_n_base_list_M6A_proba = []
    mm_M6A_list_num = []
    for base in line.strip()[20:-20]:
        mm_n_base_41_M6A += 1
        file_inter_M6A = open('inter_use/mm_inter_use_M6A.txt', 'w')
        if base == 'A':
            new_line = line.strip()[mm_n_base_41_M6A - 20:mm_n_base_41_M6A +
                                    21]
            # ---f nc dnc tnc tenc pnc nd dnd ncp dpcp onehot dbe-----------------
            print(*iMRMFeatureEtraction.nc(new_line, 41),
                  *iMRMFeatureEtraction.dnc(new_line, 41),
                  *iMRMFeatureEtraction.tnc(new_line, 41),
                  *iMRMFeatureEtraction.tenc(new_line, 41),
                  *iMRMFeatureEtraction.pnc(new_line, 41),
                  *iMRMFeatureEtraction.nd(new_line, 41),
                  *iMRMFeatureEtraction.dnd(new_line, 41),
                  *iMRMFeatureEtraction.ncp(new_line, 41),
                  *iMRMFeatureEtraction.dpcp(new_line, 41),
                  *iMRMFeatureEtraction.onehot(new_line, 41),
                  *iMRMFeatureEtraction.dbe(new_line, 41),
                  file=file_inter_M6A)
            file_inter_M6A.close()
            pddtest = pd.read_csv("inter_use/mm_inter_use_M6A.txt",
                                  sep=' ',
                                  header=None)
            X_test = pddtest.values[:, [
                x - 1 for x in mm_importance_dict_sorted_list_num_M6A
            ]]
            prediction = XG_iRNA_mm_M6A.predict_proba(X_test)
            if prediction[0][1] > M_threshold:
                mm_n_base_list_M6A_proba.append(prediction[0][1])
                mm_M6A_list_num.append(mm_n_base_41_M6A + 1)
    if len(mm_M6A_list_num) > 0:
        mm_M6A_list_num.reverse()
        mm_n_base_list_M6A_proba.reverse()
        n = 0
        for i in [i - 1 for i in mm_M6A_list_num]:
            line = list(line)
            line[i] = '<font color=#00FF00>A</font>(' + str(
                mm_n_base_list_M6A_proba[n]) + ')'
            n += 1
    line = str(line).replace(',', '')
    line = line.replace('[', '')
    line = line.replace(']', '')
    line = line.replace(' ', '')
    line = line.replace('\'', '')
    line = line.replace('\\r', '')
    line_pro = line.replace('fontcolor', 'font color')
    line = line_pro
    pro_list = re.findall('\(.*?\)', line_pro)
    for i in pro_list:
        line = line.replace(i, '')
    return line, line_pro, mm_M6A_list_num
Esempio n. 2
0
def human_XG_iRNA_AI(sequence, species, modification, M_threshold):
    hg_importance_dict_sorted_list_num_AI = [
        1899, 1895, 5, 1, 69, 79, 252, 1892, 19, 1540, 1390, 1441, 1391, 52,
        299, 1931, 1406, 25, 572, 1547, 83, 1983, 303, 1367, 72, 1414, 23, 250,
        61, 110, 1584, 24, 401, 1402, 1449, 1903, 1437, 341, 214, 1894, 1833,
        36, 1574, 739, 1382, 44, 1404, 1526
    ]
    XG_iRNA_hg_AI = load('model/hg/hg_xgb_AI_10fold')

    # --------------------------------------------------------------
    line = sequence
    hg_n_base_51 = 24
    hg_n_base_list_AI_proba = []
    hg_AI_list_num = []
    M_threshold = Threshold.choiceThreshold(species, modification, M_threshold)
    # -----------------------------AI ---------------------------------------------------
    for base in line.strip()[25:-25]:
        hg_n_base_51 += 1
        file_inter_AI = open('inter_use/hg_inter_use_AI.txt', 'w')
        if base == 'A':
            new_line = line.strip()[hg_n_base_51 - 25:hg_n_base_51 + 26]
            # ---f nc dnc tnc tenc pnc nd dnd ncp dpcp onehot dbe-----------------
            print(*iMRMFeatureEtraction.nc(new_line, 51),
                  *iMRMFeatureEtraction.dnc(new_line, 51),
                  *iMRMFeatureEtraction.tnc(new_line, 51),
                  *iMRMFeatureEtraction.tenc(new_line, 51),
                  *iMRMFeatureEtraction.pnc(new_line, 51),
                  *iMRMFeatureEtraction.nd(new_line, 51),
                  *iMRMFeatureEtraction.dnd(new_line, 51),
                  *iMRMFeatureEtraction.ncp(new_line, 51),
                  *iMRMFeatureEtraction.dpcp(new_line, 51),
                  *iMRMFeatureEtraction.onehot(new_line, 51),
                  *iMRMFeatureEtraction.dbe(new_line, 51),
                  file=file_inter_AI)
            file_inter_AI.close()
            pddtest = pd.read_csv("inter_use/hg_inter_use_AI.txt",
                                  sep=' ',
                                  header=None)
            X_test = pddtest.values[:, [
                x - 1 for x in hg_importance_dict_sorted_list_num_AI
            ]]
            prediction = XG_iRNA_hg_AI.predict_proba(X_test)
            if prediction[0][1] > M_threshold:
                hg_n_base_list_AI_proba.append(prediction[0][1])
                hg_AI_list_num.append(hg_n_base_51 + 1)
    if len(hg_AI_list_num) > 0:
        hg_AI_list_num.reverse()
        hg_n_base_list_AI_proba.reverse()
        n = 0
        for i in [i - 1 for i in hg_AI_list_num]:
            line = list(line)
            line[i] = '<font color=#9400D3>A</font>(' + str(
                hg_n_base_list_AI_proba[n]) + ')'
            n += 1
    line = str(line).replace(',', '')
    line = line.replace('[', '')
    line = line.replace(']', '')
    line = line.replace(' ', '')
    line = line.replace('\'', '')
    line = line.replace('\\r', '')
    line_pro = line.replace('fontcolor', 'font color')
    line = line_pro
    pro_list = re.findall('\(.*?\)', line_pro)
    for i in pro_list:
        line = line.replace(i, '')
    return line, line_pro, hg_AI_list_num
Esempio n. 3
0
def mm_XG_iRNA_M5C(sequence, species, modification, M_threshold):
    M_threshold = Threshold.choiceThreshold(species, modification, M_threshold)
    mm_importance_dict_sorted_list_num_M5C = [
        16, 48, 1458, 1197, 299, 859, 1359, 1879, 383, 65, 381, 82, 1519
    ]
    XG_iRNA_mm_M5C = load('model/mm/mm_xgb_M5C_10fold')
    # --------------------------------------------------------------
    line = sequence  # bug
    if re.search(r'[^AUCG]', line.strip()):
        nota = "Please input the RNA sequence 'AUCG'."
        return nota
    length = len(line.strip())
    if length < 41:
        nota = "Please input the RNA sequence at least 51 bases! "
        return nota
    mm_n_base_41_M5C = 19
    mm_n_base_list_M5C_proba = []
    mm_M5C_list_num = []
    for base in line.strip()[20:-20]:
        mm_n_base_41_M5C += 1
        file_inter_M5C = open('inter_use/mm_inter_use_M5C.txt', 'w')
        if base == 'C':
            new_line = line.strip()[mm_n_base_41_M5C - 20:mm_n_base_41_M5C +
                                    21]
            # ---f nc dnc tnc tenc pnc nd dnd ncp dpcp onehot dbe-----------------
            print(*iMRMFeatureEtraction.nc(new_line, 41),
                  *iMRMFeatureEtraction.dnc(new_line, 41),
                  *iMRMFeatureEtraction.tnc(new_line, 41),
                  *iMRMFeatureEtraction.tenc(new_line, 41),
                  *iMRMFeatureEtraction.pnc(new_line, 41),
                  *iMRMFeatureEtraction.nd(new_line, 41),
                  *iMRMFeatureEtraction.dnd(new_line, 41),
                  *iMRMFeatureEtraction.ncp(new_line, 41),
                  *iMRMFeatureEtraction.dpcp(new_line, 41),
                  *iMRMFeatureEtraction.onehot(new_line, 41),
                  *iMRMFeatureEtraction.dbe(new_line, 41),
                  file=file_inter_M5C)
            file_inter_M5C.close()
            pddtest = pd.read_csv("inter_use/mm_inter_use_M5C.txt",
                                  sep=' ',
                                  header=None)
            X_test = pddtest.values[:, [
                x - 1 for x in mm_importance_dict_sorted_list_num_M5C
            ]]
            prediction = XG_iRNA_mm_M5C.predict_proba(X_test)
            if prediction[0][1] > M_threshold:
                mm_n_base_list_M5C_proba.append(prediction[0][1])
                mm_M5C_list_num.append(mm_n_base_41_M5C + 1)
    if len(mm_M5C_list_num) > 0:
        mm_M5C_list_num.reverse()
        mm_n_base_list_M5C_proba.reverse()
        n = 0
        for i in [i - 1 for i in mm_M5C_list_num]:
            line = list(line)
            line[i] = '<font color=#FFA500>C</font>(' + str(
                mm_n_base_list_M5C_proba[n]) + ')'
            n += 1
    line = str(line).replace(',', '')
    line = line.replace('[', '')
    line = line.replace(']', '')
    line = line.replace(' ', '')
    line = line.replace('\'', '')
    line = line.replace('\\r', '')
    line_pro = line.replace('fontcolor', 'font color')
    line = line_pro
    pro_list = re.findall('\(.*?\)', line_pro)
    for i in pro_list:
        line = line.replace(i, '')
    return line, line_pro, mm_M5C_list_num
Esempio n. 4
0
def sc_XG_iRNA_M5C(sequence, species, modification, M_threshold):
    M_threshold = Threshold.choiceThreshold(species, modification, M_threshold)
    importance_dict_sorted_list_num_M5C = [
        299, 1009, 705, 1542, 1522, 1008, 1764, 4, 1862, 1200, 142, 1385, 1358,
        1214, 1842, 27, 693, 1086, 193, 1527, 1904, 1533, 1554, 920, 323, 1879,
        16
    ]
    XG_iRNA_sc_M5C = load('model/sc/sc_xgb_M5C_10fold')
    # --------------------------------------------------------------
    line = sequence  # bug
    if re.search(r'[^AUCG]', line.strip()):
        nota = "Please input the RNA sequence 'AUCG'."
        return nota
    length = len(line.strip())
    if length < 41:
        nota = "Please input the RNA sequence at least 51 bases! "
        return nota
    sc_n_base_41_M5C = 19
    sc_n_base_list_M5C_proba = []
    sc_M5C_list_num = []
    for base in line.strip()[20:-20]:
        sc_n_base_41_M5C += 1
        file_inter_M5C = open('inter_use/sc_inter_use_M5C.txt', 'w')
        if base == 'C':
            new_line = line.strip()[sc_n_base_41_M5C - 20:sc_n_base_41_M5C +
                                    21]
            # ---f nc dnc tnc tenc pnc nd dnd ncp dpcp onehot dbe-----------------
            print(*iMRMFeatureEtraction.nc(new_line, 41),
                  *iMRMFeatureEtraction.dnc(new_line, 41),
                  *iMRMFeatureEtraction.tnc(new_line, 41),
                  *iMRMFeatureEtraction.tenc(new_line, 41),
                  *iMRMFeatureEtraction.pnc(new_line, 41),
                  *iMRMFeatureEtraction.nd(new_line, 41),
                  *iMRMFeatureEtraction.dnd(new_line, 41),
                  *iMRMFeatureEtraction.ncp(new_line, 41),
                  *iMRMFeatureEtraction.dpcp(new_line, 41),
                  *iMRMFeatureEtraction.onehot(new_line, 41),
                  *iMRMFeatureEtraction.dbe(new_line, 41),
                  file=file_inter_M5C)
            file_inter_M5C.close()
            pddtest = pd.read_csv("inter_use/sc_inter_use_M5C.txt",
                                  sep=' ',
                                  header=None)
            X_test = pddtest.values[:, [
                x - 1 for x in importance_dict_sorted_list_num_M5C
            ]]
            prediction = XG_iRNA_sc_M5C.predict_proba(X_test)
            if prediction[0][1] > M_threshold:
                sc_n_base_list_M5C_proba.append(prediction[0][1])
                sc_M5C_list_num.append(sc_n_base_41_M5C + 1)
    if len(sc_M5C_list_num) > 0:
        sc_M5C_list_num.reverse()
        sc_n_base_list_M5C_proba.reverse()
        n = 0
        for i in [i - 1 for i in sc_M5C_list_num]:
            line = list(line)
            line[i] = '<font color=#FFA500>C</font>(' + str(
                sc_n_base_list_M5C_proba[n]) + ')'
            n += 1
    line = str(line).replace(',', '')
    line = line.replace('[', '')
    line = line.replace(']', '')
    line = line.replace(' ', '')
    line = line.replace('\'', '')
    line = line.replace('\\r', '')
    line_pro = line.replace('fontcolor', 'font color')
    line = line_pro
    pro_list = re.findall('\(.*?\)', line_pro)
    for i in pro_list:
        line = line.replace(i, '')
    return line, line_pro, sc_M5C_list_num
Esempio n. 5
0
def mm_XG_iRNA_pse(sequence, species, modification, M_threshold):
    M_threshold = Threshold.choiceThreshold(species, modification, M_threshold)
    # -------------------------------------------------------------
    mm_importance_dict_sorted_list_num_Pse = [
        1441, 1442, 85, 1692, 55, 190, 1678, 101, 25, 221, 122, 128, 60, 692,
        93, 16, 255, 173, 1696, 12, 35, 701, 1715, 67, 183, 150, 808, 147,
        1431, 43, 1463, 5, 1378, 71, 44, 1377, 1385, 1440, 1446, 1373, 278,
        1649, 1697, 1368, 49, 662, 1457, 327, 1384, 767
    ]
    XG_iRNA_mm_Pse = load('model/mm/mm_xgb_Pse_10fold')
    # --------------------------------------------------------------
    line = sequence  # bug
    if re.search(r'[^AUCG]', line.strip()):
        nota = "Please input the RNA sequence 'AUCG'."
        return nota
    length = len(line.strip())
    if length < 21:
        nota = "Please input the RNA sequence at least 51 bases! "
        return nota
    mm_n_base_21 = 9
    mm_n_base_list_pse_proba = []
    mm_pse_list_num = []
    # -----------------------------PSE ---------------------------------------------------
    for base in line.strip()[10:-10]:
        mm_n_base_21 += 1
        file_inter_pse = open('inter_use/mm_inter_use_pse.txt', 'w')
        if base == 'U':
            new_line = line.strip()[mm_n_base_21 - 10:mm_n_base_21 + 11]
            # ---f nc dnc tnc tenc pnc nd dnd ncp dpcp onehot dbe-----------------
            print(*iMRMFeatureEtraction.nc(new_line, 21),
                  *iMRMFeatureEtraction.dnc(new_line, 21),
                  *iMRMFeatureEtraction.tnc(new_line, 21),
                  *iMRMFeatureEtraction.tenc(new_line, 21),
                  *iMRMFeatureEtraction.pnc(new_line, 21),
                  *iMRMFeatureEtraction.nd(new_line, 21),
                  *iMRMFeatureEtraction.dnd(new_line, 21),
                  *iMRMFeatureEtraction.ncp(new_line, 21),
                  *iMRMFeatureEtraction.dpcp(new_line, 21),
                  *iMRMFeatureEtraction.onehot(new_line, 21),
                  *iMRMFeatureEtraction.dbe(new_line, 21),
                  file=file_inter_pse)
            file_inter_pse.close()
            pddtest = pd.read_csv("inter_use/mm_inter_use_pse.txt",
                                  sep=' ',
                                  header=None)
            X_test = pddtest.values[:, [
                x - 1 for x in mm_importance_dict_sorted_list_num_Pse
            ]]
            prediction = XG_iRNA_mm_Pse.predict_proba(X_test)
            if prediction[0][1] > M_threshold:
                mm_n_base_list_pse_proba.append(prediction[0][1])
                mm_pse_list_num.append(mm_n_base_21 + 1)
    if len(mm_pse_list_num) > 0:
        mm_pse_list_num.reverse()
        mm_n_base_list_pse_proba.reverse()
        n = 0
        for i in [i - 1 for i in mm_pse_list_num]:
            line = list(line)
            line[i] = '<font color=#FF3333>U</font>(' + str(
                mm_n_base_list_pse_proba[n]) + ')'
            n += 1
    line = str(line).replace(',', '')
    line = line.replace('[', '')
    line = line.replace(']', '')
    line = line.replace(' ', '')
    line = line.replace('\'', '')
    line = line.replace('\\r', '')
    line_pro = line.replace('fontcolor', 'font color')
    line = line_pro
    pro_list = re.findall('\(.*?\)', line_pro)
    for i in pro_list:
        line = line.replace(i, '')
    return line, line_pro, mm_pse_list_num
Esempio n. 6
0
def sc_XG_iRNA_M1A(sequence, species, modification, M_threshold):
    # -------------------------------------------------------------
    M_threshold = Threshold.choiceThreshold(species, modification, M_threshold)
    importance_dict_sorted_list_num_M1A = [1824, 1831, 1500]
    XG_iRNA_sc_M1A = load('model/sc/sc_xgb_M1A_10fold')
    # --------------------------------------------------------------
    line = sequence
    # bug
    if re.search(r'[^AUCG]', line.strip()):
        nota = "Please input the RNA sequence 'AUCG'."
        return nota
    length = len(line.strip())
    if length < 41:
        nota = "Please input the RNA sequence at least 51 bases! "
        return nota
    sc_n_base_41_M1A = 19
    sc_n_base_list_M1A_proba = []
    sc_M1A_list_num = []
    for base in line.strip()[20:-20]:
        sc_n_base_41_M1A += 1
        file_inter_M1A = open('inter_use/sc_inter_use_M1A.txt', 'w')
        if base == 'A':
            new_line = line.strip()[sc_n_base_41_M1A - 20:sc_n_base_41_M1A +
                                    21]
            # ---f nc dnc tnc tenc pnc nd dnd ncp dpcp onehot dbe-----------------
            print(*iMRMFeatureEtraction.nc(new_line, 41),
                  *iMRMFeatureEtraction.dnc(new_line, 41),
                  *iMRMFeatureEtraction.tnc(new_line, 41),
                  *iMRMFeatureEtraction.tenc(new_line, 41),
                  *iMRMFeatureEtraction.pnc(new_line, 41),
                  *iMRMFeatureEtraction.nd(new_line, 41),
                  *iMRMFeatureEtraction.dnd(new_line, 41),
                  *iMRMFeatureEtraction.ncp(new_line, 41),
                  *iMRMFeatureEtraction.dpcp(new_line, 41),
                  *iMRMFeatureEtraction.onehot(new_line, 41),
                  *iMRMFeatureEtraction.dbe(new_line, 41),
                  file=file_inter_M1A)
            file_inter_M1A.close()
            pddtest = pd.read_csv("inter_use/sc_inter_use_M1A.txt",
                                  sep=' ',
                                  header=None)
            X_test = pddtest.values[:, [
                x - 1 for x in importance_dict_sorted_list_num_M1A
            ]]
            prediction = XG_iRNA_sc_M1A.predict_proba(X_test)
            if prediction[0][1] > M_threshold:
                sc_n_base_list_M1A_proba.append(prediction[0][1])
                sc_M1A_list_num.append(sc_n_base_41_M1A + 1)
    if len(sc_M1A_list_num) > 0:
        sc_M1A_list_num.reverse()
        sc_n_base_list_M1A_proba.reverse()
        n = 0
        for i in [i - 1 for i in sc_M1A_list_num]:
            line = list(line)
            line[i] = '<font color=#1E90FF>A</font>(' + str(
                sc_n_base_list_M1A_proba[n]) + ')'
            n += 1
    line = str(line).replace(',', '')
    line = line.replace('[', '')
    line = line.replace(']', '')
    line = line.replace(' ', '')
    line = line.replace('\'', '')
    line = line.replace('\\r', '')
    line_pro = line.replace('fontcolor', 'font color')
    line = line_pro
    pro_list = re.findall('\(.*?\)', line_pro)
    for i in pro_list:
        line = line.replace(i, '')
    return line, line_pro, sc_M1A_list_num
Esempio n. 7
0
def sc_XG_iRNA_M6A(sequence, species, modification, M_threshold):
    M_threshold = Threshold.choiceThreshold(species, modification, M_threshold)
    importance_dict_sorted_list_num_M6A = [
        1912, 1879, 1905, 1310, 327, 11, 1, 1536, 1309, 53, 18, 1311, 762, 280,
        474, 4, 34, 71, 1908, 1555, 1549, 1142, 728, 186, 29, 7, 1554, 43, 46,
        1117, 75, 1943, 100, 1559, 872, 126, 135, 650, 58, 1411, 981, 542,
        1577, 119, 568, 216, 1441, 552
    ]
    XG_iRNA_sc_M6A = load('model/sc/sc_xgb_M6A_10fold')
    # -------------------------------------------------------------
    line = sequence
    if re.search(r'[^AUCG]', line.strip()):
        nota = "Please input the RNA sequence 'AUCG'."
        return nota
    length = len(line.strip())
    if length < 41:
        nota = "Please input the RNA sequence at least 51 bases! "
        return nota
    sc_n_base_41_M6A = 19
    sc_n_base_list_M6A_proba = []
    sc_M6A_list_num = []
    for base in line.strip()[20:-20]:
        sc_n_base_41_M6A += 1
        file_inter_M6A = open('inter_use/sc_inter_use_M6A.txt', 'w')
        if base == 'A':
            new_line = line.strip()[sc_n_base_41_M6A - 20:sc_n_base_41_M6A +
                                    21]
            # ---f nc dnc tnc tenc pnc nd dnd ncp dpcp onehot dbe-----------------
            print(*iMRMFeatureEtraction.nc(new_line, 41),
                  *iMRMFeatureEtraction.dnc(new_line, 41),
                  *iMRMFeatureEtraction.tnc(new_line, 41),
                  *iMRMFeatureEtraction.tenc(new_line, 41),
                  *iMRMFeatureEtraction.pnc(new_line, 41),
                  *iMRMFeatureEtraction.nd(new_line, 41),
                  *iMRMFeatureEtraction.dnd(new_line, 41),
                  *iMRMFeatureEtraction.ncp(new_line, 41),
                  *iMRMFeatureEtraction.dpcp(new_line, 41),
                  *iMRMFeatureEtraction.onehot(new_line, 41),
                  *iMRMFeatureEtraction.dbe(new_line, 41),
                  file=file_inter_M6A)
            file_inter_M6A.close()
            pddtest = pd.read_csv("inter_use/sc_inter_use_M6A.txt",
                                  sep=' ',
                                  header=None)
            X_test = pddtest.values[:, [
                x - 1 for x in importance_dict_sorted_list_num_M6A
            ]]
            prediction = XG_iRNA_sc_M6A.predict_proba(X_test)
            if prediction[0][1] > M_threshold:
                sc_n_base_list_M6A_proba.append(prediction[0][1])
                sc_M6A_list_num.append(sc_n_base_41_M6A + 1)
    if len(sc_M6A_list_num) > 0:
        sc_M6A_list_num.reverse()
        sc_n_base_list_M6A_proba.reverse()
        n = 0
        for i in [i - 1 for i in sc_M6A_list_num]:
            line = list(line)
            line[i] = '<font color=#00FF00>A</font>(' + str(
                sc_n_base_list_M6A_proba[n]) + ')'
            n += 1
    line = str(line).replace(',', '')
    line = line.replace('[', '')
    line = line.replace(']', '')
    line = line.replace(' ', '')
    line = line.replace('\'', '')
    line = line.replace('\\r', '')
    line_pro = line.replace('fontcolor', 'font color')
    line = line_pro
    pro_list = re.findall('\(.*?\)', line_pro)
    for i in pro_list:
        line = line.replace(i, '')
    return line, line_pro, sc_M6A_list_num
Esempio n. 8
0
def sc_XG_iRNA_pse(sequence, species, modification, M_threshold):
    M_threshold = Threshold.choiceThreshold(species, modification, M_threshold)
    # -------------------------------------------------------------
    sc_importance_dict_sorted_list_num_Pse = [
        1477, 201, 1748, 1759, 1380, 1784, 10, 1121, 693, 1492, 135, 1377,
        1792, 808, 1383, 251, 1800, 1766, 1746, 1, 302, 146, 78, 1488, 34, 242,
        76, 230, 12, 1747, 13, 6, 269, 1753, 714, 38, 182, 77, 324, 97, 165,
        1788, 664, 82, 893, 194
    ]
    XG_iRNA_sc_Pse = load('model/sc/sc_xgb_Pse_10fold')
    # --------------------------------------------------------------
    line = sequence
    line = line.upper()  # bug
    if re.search(r'[^AUCG]', line.strip()):
        nota = "Please input the RNA sequence 'AUCG'."
        return nota
    length = len(line.strip())
    if length < 31:
        nota = "Please input the RNA sequence at least 51 bases! "
        return nota
    sc_n_base_31 = 14
    sc_n_base_list_pse_proba = []
    sc_pse_list_num = []
    # -----------------------------PSE ---------------------------------------------------
    for base in line.strip()[15:-15]:
        sc_n_base_31 += 1
        file_inter_pse = open('inter_use/sc_inter_use_pse.txt', 'w')
        if base == 'U':
            new_line = line.strip()[sc_n_base_31 - 15:sc_n_base_31 + 16]
            # ---f nc dnc tnc tenc pnc nd dnd ncp dpcp onehot dbe-----------------
            print(*iMRMFeatureEtraction.nc(new_line, 31),
                  *iMRMFeatureEtraction.dnc(new_line, 31),
                  *iMRMFeatureEtraction.tnc(new_line, 31),
                  *iMRMFeatureEtraction.tenc(new_line, 31),
                  *iMRMFeatureEtraction.pnc(new_line, 31),
                  *iMRMFeatureEtraction.nd(new_line, 31),
                  *iMRMFeatureEtraction.dnd(new_line, 31),
                  *iMRMFeatureEtraction.ncp(new_line, 31),
                  *iMRMFeatureEtraction.dpcp(new_line, 31),
                  *iMRMFeatureEtraction.onehot(new_line, 31),
                  *iMRMFeatureEtraction.dbe(new_line, 31),
                  file=file_inter_pse)
            file_inter_pse.close()
            pddtest = pd.read_csv("inter_use/sc_inter_use_pse.txt",
                                  sep=' ',
                                  header=None)
            X_test = pddtest.values[:, [
                x - 1 for x in sc_importance_dict_sorted_list_num_Pse
            ]]
            prediction = XG_iRNA_sc_Pse.predict_proba(X_test)
            if prediction[0][1] > M_threshold:
                sc_n_base_list_pse_proba.append(prediction[0][1])
                sc_pse_list_num.append(sc_n_base_31 + 1)
    if len(sc_pse_list_num) > 0:
        sc_pse_list_num.reverse()
        sc_n_base_list_pse_proba.reverse()
        n = 0
        for i in [i - 1 for i in sc_pse_list_num]:
            line = list(line)
            line[i] = '<font color=#FF3333>U</font>(' + str(
                sc_n_base_list_pse_proba[n]) + ')'
            n += 1
    line = str(line).replace(',', '')
    line = line.replace('[', '')
    line = line.replace(']', '')
    line = line.replace(' ', '')
    line = line.replace('\'', '')
    line = line.replace('\\r', '')
    line_pro = line.replace('fontcolor', 'font color')
    line = line_pro
    pro_list = re.findall('\(.*?\)', line_pro)
    for i in pro_list:
        line = line.replace(i, '')
    return line, line_pro, sc_pse_list_num
Esempio n. 9
0
def human_XG_iRNA_M5C(sequence, species, modification, M_threshold):
    importance_dict_sorted_list_num_M5C = [
        16, 144, 1442, 82, 18, 1225, 210, 1401, 1430, 80, 57, 68, 1562, 74, 950
    ]
    XG_iRNA_hg_M5C = load('model/hg/hg_xgb_M5C_10fold')
    M_threshold = Threshold.choiceThreshold(species, modification, M_threshold)
    # --------------------------------------------------------------
    line = sequence
    line = line.upper()  # bug
    if re.search(r'[^AUCG]', line.strip()):
        nota = "Please input the RNA sequence 'AUCG'."
        return nota
    length = len(line.strip())
    if length < 41:
        nota = "Please input the RNA sequence at least 51 bases! "
        return nota
    hg_n_base_41_M5C = 19
    hg_n_base_list_M5C_proba = []
    hg_M5C_list_num = []
    for base in line.strip()[20:-20]:
        hg_n_base_41_M5C += 1
        file_inter_M5C = open('inter_use/hg_inter_use_M5C.txt', 'w')
        if base == 'C':
            new_line = line.strip()[hg_n_base_41_M5C - 20:hg_n_base_41_M5C +
                                    21]
            # ---f nc dnc tnc tenc pnc nd dnd ncp dpcp onehot dbe-----------------
            print(*iMRMFeatureEtraction.nc(new_line, 41),
                  *iMRMFeatureEtraction.dnc(new_line, 41),
                  *iMRMFeatureEtraction.tnc(new_line, 41),
                  *iMRMFeatureEtraction.tenc(new_line, 41),
                  *iMRMFeatureEtraction.pnc(new_line, 41),
                  *iMRMFeatureEtraction.nd(new_line, 41),
                  *iMRMFeatureEtraction.dnd(new_line, 41),
                  *iMRMFeatureEtraction.ncp(new_line, 41),
                  *iMRMFeatureEtraction.dpcp(new_line, 41),
                  *iMRMFeatureEtraction.onehot(new_line, 41),
                  *iMRMFeatureEtraction.dbe(new_line, 41),
                  file=file_inter_M5C)
            file_inter_M5C.close()
            pddtest = pd.read_csv("inter_use/hg_inter_use_M5C.txt",
                                  sep=' ',
                                  header=None)
            X_test = pddtest.values[:, [
                x - 1 for x in importance_dict_sorted_list_num_M5C
            ]]
            prediction = XG_iRNA_hg_M5C.predict_proba(X_test)
            if prediction[0][1] > M_threshold:
                hg_n_base_list_M5C_proba.append(prediction[0][1])
                hg_M5C_list_num.append(hg_n_base_41_M5C + 1)
    if len(hg_M5C_list_num) > 0:
        hg_M5C_list_num.reverse()
        hg_n_base_list_M5C_proba.reverse()
        n = 0
        for i in [i - 1 for i in hg_M5C_list_num]:
            line = list(line)
            line[i] = '<font color=#FFA500>C</font>(' + str(
                hg_n_base_list_M5C_proba[n]) + ')'
            n += 1
    line = str(line).replace(',', '')
    line = line.replace('[', '')
    line = line.replace(']', '')
    line = line.replace(' ', '')
    line = line.replace('\'', '')
    line = line.replace('\\r', '')
    line_pro = line.replace('fontcolor', 'font color')
    line = line_pro
    pro_list = re.findall('\(.*?\)', line_pro)
    for i in pro_list:
        line = line.replace(i, '')
    return line, line_pro, hg_M5C_list_num
Esempio n. 10
0
def human_XG_iRNA_M6A(sequence, species, modification, M_threshold):
    importance_dict_sorted_list_num_M6A = [
        1834, 1500, 1831, 1503, 16, 1446, 1489, 1882, 30, 113, 997, 1545, 1857,
        1892, 87, 4, 147, 1866, 209, 369, 1395, 1366, 1520, 1401, 1422, 1397,
        1398, 279, 1454, 40, 243, 1417, 1498, 42, 81, 377, 1441, 51, 12, 1557,
        17, 404, 96, 724
    ]
    XG_iRNA_hg_M6A = load('model/hg/hg_xgb_M6A_10fold')

    M_threshold = Threshold.choiceThreshold(species, modification, M_threshold)
    # -------------------------------------------------------------
    line = sequence
    line = line.upper()  # bug
    if re.search(r'[^AUCG]', line.strip()):
        nota = "Please input the RNA sequence 'AUCG'."
        return nota
    length = len(line.strip())
    if length < 41:
        nota = "Please input the RNA sequence at least 51 bases! "
        return nota
    hg_n_base_41_M6A = 19
    hg_n_base_list_M6A_proba = []
    hg_M6A_list_num = []
    for base in line.strip()[20:-20]:
        hg_n_base_41_M6A += 1
        file_inter_M6A = open('inter_use/hg_inter_use_M6A.txt', 'w')
        if base == 'A':
            new_line = line.strip()[hg_n_base_41_M6A - 20:hg_n_base_41_M6A +
                                    21]
            # ---f nc dnc tnc tenc pnc nd dnd ncp dpcp onehot dbe-----------------
            print(*iMRMFeatureEtraction.nc(new_line, 41),
                  *iMRMFeatureEtraction.dnc(new_line, 41),
                  *iMRMFeatureEtraction.tnc(new_line, 41),
                  *iMRMFeatureEtraction.tenc(new_line, 41),
                  *iMRMFeatureEtraction.pnc(new_line, 41),
                  *iMRMFeatureEtraction.nd(new_line, 41),
                  *iMRMFeatureEtraction.dnd(new_line, 41),
                  *iMRMFeatureEtraction.ncp(new_line, 41),
                  *iMRMFeatureEtraction.dpcp(new_line, 41),
                  *iMRMFeatureEtraction.onehot(new_line, 41),
                  *iMRMFeatureEtraction.dbe(new_line, 41),
                  file=file_inter_M6A)
            file_inter_M6A.close()
            pddtest = pd.read_csv("inter_use/hg_inter_use_M6A.txt",
                                  sep=' ',
                                  header=None)
            X_test = pddtest.values[:, [
                x - 1 for x in importance_dict_sorted_list_num_M6A
            ]]
            prediction = XG_iRNA_hg_M6A.predict_proba(X_test)
            if prediction[0][1] > M_threshold:
                hg_n_base_list_M6A_proba.append(prediction[0][1])
                hg_M6A_list_num.append(hg_n_base_41_M6A + 1)
    if len(hg_M6A_list_num) > 0:
        hg_M6A_list_num.reverse()
        hg_n_base_list_M6A_proba.reverse()
        n = 0
        for i in [i - 1 for i in hg_M6A_list_num]:
            line = list(line)
            line[i] = '<font color=#00FF00>A</font>(' + str(
                hg_n_base_list_M6A_proba[n]) + ')'
            n += 1
    line = str(line).replace(',', '')
    line = line.replace('[', '')
    line = line.replace(']', '')
    line = line.replace(' ', '')
    line = line.replace('\'', '')
    line = line.replace('\\r', '')
    line_pro = line.replace('fontcolor', 'font color')
    line = line_pro
    pro_list = re.findall('\(.*?\)', line_pro)
    for i in pro_list:
        line = line.replace(i, '')
    return line, line_pro, hg_M6A_list_num
Esempio n. 11
0
def human_XG_iRNA_M1A(sequence, species, modification, M_threshold):
    # -------------------------------------------------------------
    importance_dict_sorted_list_num_M1A = [
        1834, 1831, 16, 1500, 1503, 3, 9, 1, 19, 4, 20, 6, 2, 1378, 1386, 1872,
        30, 1520, 1529, 1484, 243, 1002, 1372, 259, 1873, 200, 1817, 312, 1400,
        584, 572, 1404, 943, 239, 41, 190, 1415, 34, 1455, 64, 1856
    ]
    XG_iRNA_hg_M1A = load('model/hg/hg_xgb_M1A_10fold')
    # --------------------------------------------------------------
    M_threshold = Threshold.choiceThreshold(species, modification, M_threshold)
    line = sequence  # bug
    hg_n_base_41_M1A = 19
    hg_n_base_list_M1A_proba = []
    hg_M1A_list_num = []
    for base in line.strip()[20:-20]:
        hg_n_base_41_M1A += 1
        file_inter_M1A = open('inter_use/hg_inter_use_M1A.txt', 'w')
        if base == 'A':
            new_line = line.strip()[hg_n_base_41_M1A - 20:hg_n_base_41_M1A +
                                    21]
            # ---f nc dnc tnc tenc pnc nd dnd ncp dpcp onehot dbe-----------------
            print(*iMRMFeatureEtraction.nc(new_line, 41),
                  *iMRMFeatureEtraction.dnc(new_line, 41),
                  *iMRMFeatureEtraction.tnc(new_line, 41),
                  *iMRMFeatureEtraction.tenc(new_line, 41),
                  *iMRMFeatureEtraction.pnc(new_line, 41),
                  *iMRMFeatureEtraction.nd(new_line, 41),
                  *iMRMFeatureEtraction.dnd(new_line, 41),
                  *iMRMFeatureEtraction.ncp(new_line, 41),
                  *iMRMFeatureEtraction.dpcp(new_line, 41),
                  *iMRMFeatureEtraction.onehot(new_line, 41),
                  *iMRMFeatureEtraction.dbe(new_line, 41),
                  file=file_inter_M1A)
            file_inter_M1A.close()
            pddtest = pd.read_csv("inter_use/hg_inter_use_M1A.txt",
                                  sep=' ',
                                  header=None)
            X_test = pddtest.values[:, [
                x - 1 for x in importance_dict_sorted_list_num_M1A
            ]]
            prediction = XG_iRNA_hg_M1A.predict_proba(X_test)
            if prediction[0][1] > M_threshold:
                hg_n_base_list_M1A_proba.append(prediction[0][1])
                hg_M1A_list_num.append(hg_n_base_41_M1A + 1)
    if len(hg_M1A_list_num) > 0:
        hg_M1A_list_num.reverse()
        hg_n_base_list_M1A_proba.reverse()
        n = 0
        for i in [i - 1 for i in hg_M1A_list_num]:
            line = list(line)
            line[i] = '<font color=#1E90FF>A</font>(' + str(
                hg_n_base_list_M1A_proba[n]) + ')'
            n += 1
    line = str(line).replace(',', '')
    line = line.replace('[', '')
    line = line.replace(']', '')
    line = line.replace(' ', '')
    line = line.replace('\'', '')
    line = line.replace('\\r', '')
    line_pro = line.replace('fontcolor', 'font color')
    line = line_pro
    pro_list = re.findall('\(.*?\)', line_pro)
    for i in pro_list:
        line = line.replace(i, '')
    return line, line_pro, hg_M1A_list_num
Esempio n. 12
0
def human_XG_iRNA_pse(sequence, species, modification, M_threshold):
    # -------------------------------------------------------------
    hg_importance_dict_sorted_list_num_Pse = [
        16, 22, 1441, 1389, 1692, 300, 661, 406, 1666, 1660, 1432, 1698, 1695,
        338, 738, 10, 1680, 808, 1414, 204, 77, 940, 69, 1408, 74, 980, 31,
        234, 8, 292, 178, 1382, 1458, 1, 1689, 13, 1373, 39, 237, 1369, 1444,
        245, 66, 54, 1434, 1417, 1396
    ]
    XG_iRNA_hg_Pse = load('model/hg/hg_xgb_Pse_10fold')
    # --------------------------------------------------------------
    line = sequence
    hg_n_base_21 = 9
    hg_n_base_list_pse_proba = []
    hg_pse_list_num = []
    M_threshold = Threshold.choiceThreshold(species, modification, M_threshold)
    # -----------------------------PSE ---------------------------------------------------
    for base in line.strip()[10:-10]:
        hg_n_base_21 += 1
        file_inter_pse = open('inter_use/hg_inter_use_pse.txt', 'w')
        if base == 'U':
            new_line = line.strip()[hg_n_base_21 - 10:hg_n_base_21 + 11]
            # ---f nc dnc tnc tenc pnc nd dnd ncp dpcp onehot dbe-----------------
            print(*iMRMFeatureEtraction.nc(new_line, 21),
                  *iMRMFeatureEtraction.dnc(new_line, 21),
                  *iMRMFeatureEtraction.tnc(new_line, 21),
                  *iMRMFeatureEtraction.tenc(new_line, 21),
                  *iMRMFeatureEtraction.pnc(new_line, 21),
                  *iMRMFeatureEtraction.nd(new_line, 21),
                  *iMRMFeatureEtraction.dnd(new_line, 21),
                  *iMRMFeatureEtraction.ncp(new_line, 21),
                  *iMRMFeatureEtraction.dpcp(new_line, 21),
                  *iMRMFeatureEtraction.onehot(new_line, 21),
                  *iMRMFeatureEtraction.dbe(new_line, 21),
                  file=file_inter_pse)
            file_inter_pse.close()
            pddtest = pd.read_csv("inter_use/hg_inter_use_pse.txt",
                                  sep=' ',
                                  header=None)
            X_test = pddtest.values[:, [
                x - 1 for x in hg_importance_dict_sorted_list_num_Pse
            ]]
            prediction = XG_iRNA_hg_Pse.predict_proba(X_test)
            if prediction[0][1] > M_threshold:
                hg_n_base_list_pse_proba.append(prediction[0][1])
                hg_pse_list_num.append(hg_n_base_21 + 1)
    if len(hg_pse_list_num) > 0:
        hg_pse_list_num.reverse()
        hg_n_base_list_pse_proba.reverse()
        n = 0
        for i in [i - 1 for i in hg_pse_list_num]:
            line = list(line)
            line[i] = '<font color=#FF3333>U</font>(' + str(
                hg_n_base_list_pse_proba[n]) + ')'
            n += 1
    line = str(line).replace(',', '')
    line = line.replace('[', '')
    line = line.replace(']', '')
    line = line.replace(' ', '')
    line = line.replace('\'', '')
    line = line.replace('\\r', '')
    line_pro = line.replace('fontcolor', 'font color')
    line = line_pro
    pro_list = re.findall('\(.*?\)', line_pro)
    for i in pro_list:
        line = line.replace(i, '')
    return line, line_pro, hg_pse_list_num