def add_CN(chrlen, cn_num, del_rate, min_cn_size, exp_theta, amp_p): # for each branch, the copy number change happens on only one allele CN_array = [] new_chrlen = [row[:] for row in chrlen] CN_Tot = np.random.poisson(cn_num, 1) for i in range(CN_Tot): # allele CN_Ale = np.random.binomial(1, 0.5) # deletion versus amplification CN_Del = np.random.binomial(1, del_rate) CN_chromosome = get_chr(new_chrlen[CN_Ale]) #print CN_Ale, CN_chromosome CN_p1, CN_p2 = get_range(new_chrlen[CN_Ale][CN_chromosome], min_cn_size, exp_theta) CN_amp_num = 0 #print new_chrlen #print "before changing:" #print chrlen, new_chrlen if CN_Del == 0: # get amplification copy number # starting from 0 CN_amp_num = int(np.random.geometric(amp_p, 1) - 1) #print CN_amp_num, CN_p2, CN_p1 new_chrlen[CN_Ale][CN_chromosome] = new_chrlen[CN_Ale][ CN_chromosome] + CN_amp_num * (CN_p2 - CN_p1) #print new_chrlen else: new_chrlen[CN_Ale][CN_chromosome] = new_chrlen[CN_Ale][ CN_chromosome] - (CN_p2 - CN_p1) #print new_chrlen #print "after changing:" #print chrlen, new_chrlen CN_array.append( CN(CN_Ale, CN_Del, CN_chromosome, CN_p1, CN_p2, CN_amp_num)) return CN_array, new_chrlen
def make_fa(ID, tree, ref, chr_name_array, fa_prefix): fa_f_prefix = fa_prefix + str(ID) + "_" # record all the nodes that this route visited (from leaf to root) trace = [ID] visit = ID while visit != 0: visit = tree[visit].parentID trace.append(visit) # now reverse the trace so that the CNV can be applied from root to leaf CN = [] for i in range(len(trace)): j = len(trace) - i - 1 # gather all CNs together for cn in tree[trace[j]].cn: CN.append(cn) new_ref = gen_ref(ref, CN) write_ref(new_ref, chr_name_array, fa_f_prefix)
def add_whole_amp(chrlen, whole_amp_rate, whole_amp_num, corres, amp_num_geo_par): new_chrlen = [row[:] for row in chrlen] new_corres = [row[:] for row in corres] # like CN wholeamp = [] for i in range(len(chrlen)): # allele for j in range(len(chrlen[i])): # chromosome random_ = np.random.uniform(0.0, 1.0) if random_ < whole_amp_rate: # chosen to be amplified # get the amplification number # geometric distribution's mean is 1/p = 1. amp_num = whole_amp_num * np.random.geometric(amp_num_geo_par) while amp_num < 1: amp_num = whole_amp_num * np.random.geometric( amp_num_geo_par) new_CN = CN(i, 0, j, 0, new_chrlen[i][j], amp_num, new_corres) new_chrlen[i][j] = new_chrlen[i][j] * (amp_num + 1) new_corres = get_new_corres(new_CN, new_corres) wholeamp.append(new_CN) return wholeamp, new_chrlen, new_corres
def add_CN(chrlen, cn_num, del_rate, min_cn_size, exp_theta, amp_p, corres, CN_LIST_ID): # for each branch, the copy number change happens on only one allele CN_array = [] # a variable used only for fixing CN, for test new_chrlen = [row[:] for row in chrlen] new_corres = [row[:] for row in corres] if random == 0: CN_Tot = 9 else: CN_Tot = int(np.random.poisson(cn_num, 1)) for i in range(CN_Tot): # allele if random == 0: CN_Ale = allele_list[i] else: CN_Ale = np.random.binomial(1, 0.5) # deletion versus amplification if random == 0: CN_Del = if_del_list[i] else: CN_Del = np.random.binomial(1, del_rate) CN_chromosome = get_chr(new_chrlen[CN_Ale]) #print CN_Ale, CN_chromosome CN_p1, CN_p2 = get_range(new_chrlen[CN_Ale][CN_chromosome], min_cn_size, exp_theta, i) # to protect from str to int, int to str, make it not dividable by 10 if CN_p1 % 10 == 0: CN_p1 = CN_p1 + 1 if CN_p2 % 10 == 0: CN_p2 = CN_p2 - 1 # think about how to get the actual coordinates given the previous ones. CN_amp_num = 0 #print new_chrlen #print "before changing:" #print chrlen, new_chrlen if CN_Del == 0: # get amplification copy number # starting from 0 while 1: CN_amp_num_ = np.random.geometric(amp_p, 1) CN_amp_num = int(CN_amp_num_[0]) if CN_amp_num >= 1: break if random == 0: CN_amp_num = amp_num_list[i] #CN_amp_num = int(np.random.geometric(amp_p, 1) - 1) #print CN_amp_num, CN_p2, CN_p1 new_chrlen[CN_Ale][CN_chromosome] = new_chrlen[CN_Ale][ CN_chromosome] + CN_amp_num * (CN_p2 - CN_p1) #print new_chrlen else: new_chrlen[CN_Ale][CN_chromosome] = new_chrlen[CN_Ale][ CN_chromosome] - (CN_p2 - CN_p1) #print new_chrlen #print "after changing:" #print chrlen, new_chrlen # corresp is the corresponding interval of ref and genome new_CN = CN(CN_Ale, CN_Del, CN_chromosome, CN_p1, CN_p2, CN_amp_num, new_corres) new_corres = get_new_corres(new_CN, new_corres) #print_corrs(new_corres) CN_array.append(new_CN) return CN_array, new_chrlen, new_corres