Beispiel #1
0
def cal_data(index1, index2):
    num = 5
    ten_list = []
    for i in range(num):
        for j in range(num):
            if i == 0 and j == 0:
                print(cal_tm(gene[index1+i: index2-j]))
            ten_list.append(cal_tm(gene[index1+i: index2-j]))
    return ten_list
Beispiel #2
0
def cal_first_tm(mean_tm=0.):
    """
    计算第一段与第二段的tm值并且计算其标准差
    :return:
    """
    tem_res = []
    for i in range(max_len - min_len):
        mid_cut = min_len + i
        fir_tm = cal_tm(gene[:mid_cut])
        if mean_tm == 0.:
            for j in range(max_len - min_len):
                end_cut = mid_cut + min_len + j
                sec_tm = cal_tm(gene[mid_cut:end_cut])
                tem_res.append([mid_cut, fir_tm, end_cut, sec_tm, np.std([fir_tm, sec_tm])])
        else:  # 使用经验值作为开始
            tem_res.append([mid_cut, fir_tm, np.std([mean_tm, fir_tm])])

    return tem_res
Beispiel #3
0
def cal_next_tm(tm_mea=0.):
    """
    计算第三段到最后的切割位点
    :return:
    """

    result = cal_first_tm(tm_mea)

    result = choose(result, count)
    result = np.delete(result, -1, axis=1)  # 删除最后一列

    fir_ans_tem = []  # 初步切割结果
    answer1_tem = []
    # 尝试控制answer不为0
    while len(fir_ans_tem) == 0:
        tem_res = []
        for i in range(len(result)):  # 遍历上一轮选择到的最优的
            fir_cut = int(result[i, -2])  # 这段gene开始
            for j in range(max_len - min_len):  #
                sec_cut = fir_cut + min_len + j  # 这段gene结束
                if sec_cut > len(gene) - 1:
                    sec_cut = len(gene) - 1
                tem_tm = cal_tm(gene[fir_cut: sec_cut])  # 计算这段gene的tm
                bef_tm = result[i, 1::2]  # 取出前面所有tm
                bef_tm = np.append(bef_tm, tem_tm)  # 将这段gene的tm添加到之前中
                tm_std = np.std(bef_tm)  # 计算标准差
                bef_arr = result[i, :]  # 获取数组,转化为列表
                bef_arr = bef_arr.tolist()
                tem_gene_tm = [sec_cut, tem_tm, tm_std]
                tem_list = bef_arr + tem_gene_tm

                if fir_cut + min_len > len(gene) - 1:
                    answer1_tem.append(tem_list)  # TODO最后一段是独立好还是分开好
                    break
                elif sec_cut == len(gene) - 1:
                    fir_ans_tem.append(tem_list)
                    break
                else:
                    tem_res.append(tem_list)
        # 可能刚刚好处理完
        if len(tem_res) != 0:
            tem_res = choose(tem_res, count)
            result = np.delete(tem_res, -1, axis=1)  # 删除最后一列
    # 挑选结果
    fir_ans_tem = choose(fir_ans_tem)
    if len(answer1_tem) > 0 and len(answer1_tem[0]) == len(answer1_tem[-1]):
        answer1_tem = choose(answer1_tem)
        if fir_ans_tem[0, -1] > answer1_tem[0, -1]:
            fir_ans_tem = answer1_tem
    index111 = np.array(fir_ans_tem[0][:-1:2])
    tm111 = np.array(fir_ans_tem[0][1::2])

    show_w(index111, tm111, "greedy")

    return index111, tm111
Beispiel #4
0
def cal_all_tm(arr):
    """
    求这种切割位点的tm的标准差
    :param arr: 整个基因片段的切割位点
    :return: np.std(tm_list):标准差, tm_list:每段的tm组成的list
    """
    tm_list = []
    arr = arr.astype(int)
    for i in range(1, len(arr)):
        tm_t = cal_tm(gene[arr[i - 1]: arr[i]])
        tm_list.append(tm_t)
    return np.std(tm_list), tm_list
Beispiel #5
0
def overlap(index_list, tm_list):
    index_list = index_list.astype(int)
    gene_list = []
    for i in range(len(tm_list)):  # 将gene截取出来存放在一个二维list中
        # 【原来第一个切割位点,修改后第一个切割位点,原来第二个切割位点,修改后第二个切割位点,修改后片段tm】
        gene_list.append([index_list[i], index_list[i], index_list[i + 1], index_list[i + 1], tm_list[i]])

    over_size = 6
    temp_avg_tm = [0, 1]  # flag 结束迭代的标志

    tem_max_op = 10  # 相邻两个片段间隔最大值

    tem_min_len = min_len - 10  # 切割后每个片段最小值
    x = 0
    while temp_avg_tm[-1] != temp_avg_tm[-2]:  # 迭代,终止条件

        for i in range(len(gene_list)):
            new_tm_list = tm_list.copy()
            tem_result = []
            for j in range(over_size):
                for k in range(over_size):
                    left = gene_list[i][1] + j
                    right = gene_list[i][2] - k

                    if right - left < tem_min_len:  # 长度小于限定值
                        continue
                    if (i == 0 and gene_list[i][1] - 0 > tem_max_op) or (
                            i > 0 and left - gene_list[i - 1][2] > tem_max_op):  # 这段与前一段的距离
                        continue
                    if (i + 1 == len(gene_list) and len(gene) - gene_list[i][2] - 1 > tem_max_op) or (
                            i + 1 < len(gene_list) and gene_list[i + 1][1] - right > tem_max_op):
                        continue

                    tem_tm = cal_tm(gene[left: right])
                    new_tm_list[i] = tem_tm
                    tm_std = np.std(new_tm_list)
                    tem_result.append([left, right, tem_tm, tm_std])
            if len(tem_result) == 0:
                continue
            tem_result = choose(tem_result, 1)
            gene_list[i][1] = int(tem_result[0, 0])
            gene_list[i][2] = int(tem_result[0, 1])
            tm_list[i] = tem_result[0, 2]
            temp_avg_tm.append(tem_result[0, 3])
        x = x + 1
        # show_w(index_list[1:], tm_list, x)

    show_w(index_list[1:], tm_list, "overlap")

    a = np.argsort(tm_list)

    # 经过剪切后,在迭代一次,进行扩展
    for i in range(len(a)):
        if a[i] < 1 or a[i] + 2 > len(a):  # 先不管第一段和最后一段
            continue
        test_tm_list = tm_list.copy()
        test_result = []
        for j in range(int(gene_list[a[i]][1] - gene_list[a[i] - 1][2])):
            # print(gene_list[a[i] + 1][1], gene_list[a[i]][2], gene_list[a[i] + 1][1] - gene_list[a[i]][2])
            for k in range(int(gene_list[a[i] + 1][1] - gene_list[a[i]][2])):
                # print(gene_list[a[i]][1], gene_list[a[i]][1] - j)
                # print(gene_list[a[i]][2], gene_list[a[i]][2] + k)
                test_gene = gene[gene_list[a[i]][1] - j: gene_list[a[i]][2] + k]
                test_tm = cal_tm(test_gene)
                # print(test_tm, gene_list[a[i]][4])
                test_tm_list[a[i]] = test_tm
                test_std = np.std(test_tm_list)
                test_result.append([gene_list[a[i]][1] - j, gene_list[a[i]][2] + k, test_tm, test_std])
        if len(test_result) == 0:
            continue
        # 下表从0开始
        test_result = choose(test_result, 1)
        gene_list[a[i]][1] = test_result[0, 0]
        gene_list[a[i]][2] = test_result[0, 1]
        gene_list[a[i]][4] = test_result[0, 2]
        tm_list[a[i]] = test_result[0, 2]
    show_w(index_list[1:], tm_list, "end")

    # for i in range(len(gene_list)):
    #     print("原来+{0},更改{1}".format(gene_list[i][3] - gene_list[i][0], gene_list[i][2] - gene_list[i][1]))
    print("每个间隙的长度:", end=" ")
    for i in range(len(gene_list) - 1):
        print(gene_list[i + 1][1] - gene_list[i][2], end=" ")
    print()
    return gene_list