def bin_dataset(data_file): data_lengths, data_non_cumul, data_cumul, total_length= get_spectrum_counts(data_file) L_series_index=0 data_lengths_binned=[0] for i in range(1, len(data_lengths)+1): if data_lengths[-i]>=L_series[L_series_index]: data_lengths_binned[-1]+=data_non_cumul[-i] else: while data_lengths[-i]<L_series[L_series_index]: L_series_index+=1 data_lengths_binned.append(0) data_lengths_binned[-1]+=data_non_cumul[-i] for i in range(1,len(L_series)): data_lengths_binned[i]*=1.0/(total_length*(-L_series[i]+L_series[i-1])) data_lengths_binned[0]*=1.0/total_length return data_lengths_binned, total_length
def bin_dataset(data_file): data_lengths, data_non_cumul, data_cumul, total_length= get_spectrum_counts(data_file) # print data_lengths[-1] L_series_index=0 data_lengths_binned=[0] for i in range(1, len(data_lengths)+1): if data_lengths[-i]>=L_series[L_series_index]: data_lengths_binned[-1]+=data_non_cumul[-i] else: while data_lengths[-i]<L_series[L_series_index]: L_series_index+=1 data_lengths_binned.append(0) data_lengths_binned[-1]+=data_non_cumul[-i] for i in range(1,len(L_series)): data_lengths_binned[i]*=1.0/(total_length*(-L_series[i]+L_series[i-1])) data_lengths_binned[0]*=1.0/total_length # print 'total length:', total_length return data_lengths_binned, total_length
return prob_L_from_mut_precise_varmu(L,ts,ts*2,1,N) from demographic_function_builder import * def tract_lengths(L,t_diff_vec, N_vec): # returns a history with len(t_diff_vec) population size changes uncoal = 1 prob_list = [prob_L_from_mut_precise(L,t_diff_vec[0],N_vec[0])] time = t_diff_vec[0] for i in range(1,num_size_changes): time += t_diff_vec[i] uncoal = time_lapse(uncoal, N_vec[i-1], t_diff_vec[i]) prob_list, uncoal = popsize_change(L, prob_list, uncoal, N_vec[i-1], N_vec[i],time) return math.fsum(prob_list) pop1_lengths, pop1_non_cumul, pop1_cumul, pop1_total_length = get_spectrum_counts(open(pop1_filename)) L_series=[1] for a in range(1,int(math.log(10**7)/math.log(2))): if int(1*2**a)>L_series[0]: L_series.insert(0,int(1*2**a)) min_tract_index=int(math.log(min_tract_length)/math.log(2)) print min_tract_index, len(L_series) L_series_index=0 pop1_lengths_binned=[0] for i in range(1, len(pop1_lengths)+1): if pop1_lengths[-i]>=L_series[L_series_index]:
def prob_L_from_mut_precise(L,ts,N): return prob_L_from_mut_precise_varmu(L,ts,ts*2,1,N) def tract_lengths(L,t_diff_vec, N_vec): # returns a history with len(t_diff_vec) population size changes uncoal = 1 prob_list = [prob_L_from_mut_precise(L,t_diff_vec[0],N_vec[0])] time = t_diff_vec[0] for i in range(1,len(t_diff_vec)): time += t_diff_vec[i] uncoal = time_lapse(uncoal, N_vec[i-1], t_diff_vec[i]) prob_list, uncoal = popsize_change(L, prob_list, uncoal, N_vec[i-1], N_vec[i],time) return math.fsum(prob_list) YRI_lengths, YRI_non_cumul, YRI_cumul, YRI_total_length = get_spectrum_counts(open(tractfile)) L_series=[1] for a in range(1,int(log(5*20**6)/log(1.25))): if int(1*1.25**a)>L_series[0]: L_series.insert(0,int(1*1.25**a)) total_length=YRI_total_length last_prob_0, last_prob_1, last_prob_2, last_prob_3, last_prob_4, last_prob_5 = 0,0,0,0,0,0 y0, y1, y2, y3,y4,y5 = [],[],[], [],[],[] results=open(result_file) lines=results.readlines() results.close()
import sys non_african_tractfile=sys.argv[1] african_tractfile=sys.argv[2] between_tractfile=sys.argv[3] result_file=sys.argv[4] output_file=sys.argv[5] gen_time=int(sys.argv[6]) min_tract_length = int(sys.argv[7]) from demographic_inputfile import * fig,axs = plt.subplots(nrows=2, ncols=2, sharex=True,sharey=True) ax0,ax1,ax2,ax3=axs[0,0],axs[0,1],axs[1,0],axs[1,1] YRI_lengths, YRI_non_cumul, YRI_cumul, YRI_total_length = get_spectrum_counts(open(non_african_tractfile)) L_series=[1] for a in range(1,int(log(5*20**6)/log(1.25))): if int(1*1.25**a)>L_series[0]: L_series.insert(0,int(1*1.25**a)) #print L_series total_length=YRI_total_length #print 'total CEU length: ',YRI_total_length last_prob_0, last_prob_1, last_prob_2, last_prob_3, last_prob_4, last_prob_5 = 0,0,0,0,0,0 y0, y1, y2, y3,y4,y5 = [],[],[], [],[],[] results=open(result_file) lines=results.readlines()
def tract_lengths( L, t_diff_vec, N_vec ): # returns a history with len(t_diff_vec) population size changes uncoal = 1 prob_list = [] prob_list = initialize_pop(L, prob_list, uncoal, N_vec[0]) time = 0 for i in range(num_size_changes): time += t_diff_vec[i] uncoal = time_lapse(uncoal, N_vec[i], t_diff_vec[i]) prob_list, uncoal = popsize_change(L, prob_list, uncoal, N_vec[i], N_vec[i + 1], time) return math.fsum(prob_list) pop1_lengths, pop1_non_cumul, pop1_cumul, pop1_total_length = get_spectrum_counts( open(pop1_filename)) L_series = [1] for a in range(1, int(math.log(10**7) / math.log(2))): if int(1 * 2**a) > L_series[0]: L_series.insert(0, int(1 * 2**a)) min_tract_index = int(math.log(min_tract_length) / math.log(2)) print min_tract_index, len(L_series) L_series_index = 0 pop1_lengths_binned = [0] for i in range(1, len(pop1_lengths) + 1): if pop1_lengths[-i] >= L_series[L_series_index]: pop1_lengths_binned[-1] += pop1_non_cumul[-i]