Exemple #1
0
def add_CN(chrlen, cn_num, del_rate, min_cn_size, exp_theta, amp_p):
    # for each branch, the copy number change happens on only one allele
    CN_array = []
    new_chrlen = [row[:] for row in chrlen]
    CN_Tot = np.random.poisson(cn_num, 1)
    for i in range(CN_Tot):
        # allele
        CN_Ale = np.random.binomial(1, 0.5)
        # deletion versus amplification
        CN_Del = np.random.binomial(1, del_rate)
        CN_chromosome = get_chr(new_chrlen[CN_Ale])
        #print CN_Ale, CN_chromosome
        CN_p1, CN_p2 = get_range(new_chrlen[CN_Ale][CN_chromosome],
                                 min_cn_size, exp_theta)
        CN_amp_num = 0
        #print new_chrlen
        #print "before changing:"
        #print chrlen, new_chrlen
        if CN_Del == 0:
            # get amplification copy number
            # starting from 0
            CN_amp_num = int(np.random.geometric(amp_p, 1) - 1)
            #print CN_amp_num, CN_p2, CN_p1
            new_chrlen[CN_Ale][CN_chromosome] = new_chrlen[CN_Ale][
                CN_chromosome] + CN_amp_num * (CN_p2 - CN_p1)
            #print new_chrlen
        else:
            new_chrlen[CN_Ale][CN_chromosome] = new_chrlen[CN_Ale][
                CN_chromosome] - (CN_p2 - CN_p1)
            #print new_chrlen
        #print "after changing:"
        #print chrlen, new_chrlen
        CN_array.append(
            CN(CN_Ale, CN_Del, CN_chromosome, CN_p1, CN_p2, CN_amp_num))
    return CN_array, new_chrlen
Exemple #2
0
def make_fa(ID, tree, ref, chr_name_array, fa_prefix):
    fa_f_prefix = fa_prefix + str(ID) + "_"
    # record all the nodes that this route visited (from leaf to root)
    trace = [ID]
    visit = ID
    while visit != 0:
        visit = tree[visit].parentID
        trace.append(visit)
    # now reverse the trace so that the CNV can be applied from root to leaf
    CN = []
    for i in range(len(trace)):
        j = len(trace) - i - 1
        # gather all CNs together
        for cn in tree[trace[j]].cn:
            CN.append(cn)
    new_ref = gen_ref(ref, CN)
    write_ref(new_ref, chr_name_array, fa_f_prefix)
Exemple #3
0
def add_whole_amp(chrlen, whole_amp_rate, whole_amp_num, corres,
                  amp_num_geo_par):
    new_chrlen = [row[:] for row in chrlen]
    new_corres = [row[:] for row in corres]
    # like CN
    wholeamp = []
    for i in range(len(chrlen)):
        # allele
        for j in range(len(chrlen[i])):
            # chromosome
            random_ = np.random.uniform(0.0, 1.0)
            if random_ < whole_amp_rate:
                # chosen to be amplified
                # get the amplification number
                # geometric distribution's mean is 1/p = 1.
                amp_num = whole_amp_num * np.random.geometric(amp_num_geo_par)
                while amp_num < 1:
                    amp_num = whole_amp_num * np.random.geometric(
                        amp_num_geo_par)
                new_CN = CN(i, 0, j, 0, new_chrlen[i][j], amp_num, new_corres)
                new_chrlen[i][j] = new_chrlen[i][j] * (amp_num + 1)
                new_corres = get_new_corres(new_CN, new_corres)
                wholeamp.append(new_CN)
    return wholeamp, new_chrlen, new_corres
Exemple #4
0
def add_CN(chrlen, cn_num, del_rate, min_cn_size, exp_theta, amp_p, corres,
           CN_LIST_ID):
    # for each branch, the copy number change happens on only one allele
    CN_array = []
    # a variable used only for fixing CN, for test
    new_chrlen = [row[:] for row in chrlen]
    new_corres = [row[:] for row in corres]
    if random == 0:
        CN_Tot = 9
    else:
        CN_Tot = int(np.random.poisson(cn_num, 1))
    for i in range(CN_Tot):
        # allele
        if random == 0:
            CN_Ale = allele_list[i]
        else:
            CN_Ale = np.random.binomial(1, 0.5)
        # deletion versus amplification
        if random == 0:
            CN_Del = if_del_list[i]
        else:
            CN_Del = np.random.binomial(1, del_rate)
        CN_chromosome = get_chr(new_chrlen[CN_Ale])
        #print CN_Ale, CN_chromosome
        CN_p1, CN_p2 = get_range(new_chrlen[CN_Ale][CN_chromosome],
                                 min_cn_size, exp_theta, i)
        # to protect from str to int, int to str, make it not dividable by 10
        if CN_p1 % 10 == 0:
            CN_p1 = CN_p1 + 1
        if CN_p2 % 10 == 0:
            CN_p2 = CN_p2 - 1


# think about how to get the actual coordinates given the previous ones.
        CN_amp_num = 0
        #print new_chrlen
        #print "before changing:"
        #print chrlen, new_chrlen
        if CN_Del == 0:
            # get amplification copy number
            # starting from 0
            while 1:
                CN_amp_num_ = np.random.geometric(amp_p, 1)
                CN_amp_num = int(CN_amp_num_[0])
                if CN_amp_num >= 1:
                    break
            if random == 0:
                CN_amp_num = amp_num_list[i]
            #CN_amp_num = int(np.random.geometric(amp_p, 1) - 1)
            #print CN_amp_num, CN_p2, CN_p1
            new_chrlen[CN_Ale][CN_chromosome] = new_chrlen[CN_Ale][
                CN_chromosome] + CN_amp_num * (CN_p2 - CN_p1)
            #print new_chrlen
        else:
            new_chrlen[CN_Ale][CN_chromosome] = new_chrlen[CN_Ale][
                CN_chromosome] - (CN_p2 - CN_p1)
            #print new_chrlen
        #print "after changing:"
        #print chrlen, new_chrlen
        # corresp is the corresponding interval of ref and genome
        new_CN = CN(CN_Ale, CN_Del, CN_chromosome, CN_p1, CN_p2, CN_amp_num,
                    new_corres)
        new_corres = get_new_corres(new_CN, new_corres)
        #print_corrs(new_corres)
        CN_array.append(new_CN)
    return CN_array, new_chrlen, new_corres