def bin_dataset(data_file):
    data_lengths, data_non_cumul, data_cumul, total_length= get_spectrum_counts(data_file)
    L_series_index=0
    data_lengths_binned=[0]
    for i in range(1, len(data_lengths)+1):
        if data_lengths[-i]>=L_series[L_series_index]:
            data_lengths_binned[-1]+=data_non_cumul[-i]
        else:
            while data_lengths[-i]<L_series[L_series_index]:
                L_series_index+=1
                data_lengths_binned.append(0)
            data_lengths_binned[-1]+=data_non_cumul[-i]
    for i in range(1,len(L_series)):
        data_lengths_binned[i]*=1.0/(total_length*(-L_series[i]+L_series[i-1]))
    data_lengths_binned[0]*=1.0/total_length
    return data_lengths_binned, total_length
def bin_dataset(data_file):
    data_lengths, data_non_cumul, data_cumul, total_length= get_spectrum_counts(data_file)
#    print data_lengths[-1]
    L_series_index=0
    data_lengths_binned=[0]
    for i in range(1, len(data_lengths)+1):
        if data_lengths[-i]>=L_series[L_series_index]:
            data_lengths_binned[-1]+=data_non_cumul[-i]
        else:
            while data_lengths[-i]<L_series[L_series_index]:
                L_series_index+=1
                data_lengths_binned.append(0)
            data_lengths_binned[-1]+=data_non_cumul[-i]
    for i in range(1,len(L_series)):
        data_lengths_binned[i]*=1.0/(total_length*(-L_series[i]+L_series[i-1]))
    data_lengths_binned[0]*=1.0/total_length
#    print 'total length:', total_length
    return data_lengths_binned, total_length
    return prob_L_from_mut_precise_varmu(L,ts,ts*2,1,N)


from demographic_function_builder import *

def tract_lengths(L,t_diff_vec, N_vec): # returns a history with len(t_diff_vec) population size changes
    uncoal = 1
    prob_list = [prob_L_from_mut_precise(L,t_diff_vec[0],N_vec[0])]
    time = t_diff_vec[0]
    for i in range(1,num_size_changes):
        time += t_diff_vec[i]
        uncoal = time_lapse(uncoal, N_vec[i-1], t_diff_vec[i])
        prob_list, uncoal = popsize_change(L, prob_list, uncoal, N_vec[i-1], N_vec[i],time)
    return math.fsum(prob_list)

pop1_lengths, pop1_non_cumul, pop1_cumul, pop1_total_length = get_spectrum_counts(open(pop1_filename))

L_series=[1]
for a in range(1,int(math.log(10**7)/math.log(2))):
    if int(1*2**a)>L_series[0]:
        L_series.insert(0,int(1*2**a))

min_tract_index=int(math.log(min_tract_length)/math.log(2))
print min_tract_index, len(L_series)

L_series_index=0
pop1_lengths_binned=[0]


for i in range(1, len(pop1_lengths)+1):
    if pop1_lengths[-i]>=L_series[L_series_index]:
def prob_L_from_mut_precise(L,ts,N):
    return prob_L_from_mut_precise_varmu(L,ts,ts*2,1,N)

def tract_lengths(L,t_diff_vec, N_vec): # returns a history with len(t_diff_vec) population size changes
    uncoal = 1
    prob_list = [prob_L_from_mut_precise(L,t_diff_vec[0],N_vec[0])]
    time = t_diff_vec[0]
    for i in range(1,len(t_diff_vec)):
        time += t_diff_vec[i]
        uncoal = time_lapse(uncoal, N_vec[i-1], t_diff_vec[i])
        prob_list, uncoal = popsize_change(L, prob_list, uncoal, N_vec[i-1], N_vec[i],time)
    return math.fsum(prob_list)


YRI_lengths, YRI_non_cumul, YRI_cumul, YRI_total_length = get_spectrum_counts(open(tractfile))

L_series=[1]
for a in range(1,int(log(5*20**6)/log(1.25))):
    if int(1*1.25**a)>L_series[0]:
        L_series.insert(0,int(1*1.25**a))
total_length=YRI_total_length


last_prob_0, last_prob_1, last_prob_2, last_prob_3, last_prob_4, last_prob_5 = 0,0,0,0,0,0
y0, y1, y2, y3,y4,y5 = [],[],[], [],[],[]

results=open(result_file)
lines=results.readlines()
results.close()
import sys
non_african_tractfile=sys.argv[1]
african_tractfile=sys.argv[2]
between_tractfile=sys.argv[3]
result_file=sys.argv[4]
output_file=sys.argv[5]
gen_time=int(sys.argv[6])
min_tract_length = int(sys.argv[7])

from demographic_inputfile import *
fig,axs = plt.subplots(nrows=2, ncols=2, sharex=True,sharey=True)
ax0,ax1,ax2,ax3=axs[0,0],axs[0,1],axs[1,0],axs[1,1]


YRI_lengths, YRI_non_cumul, YRI_cumul, YRI_total_length = get_spectrum_counts(open(non_african_tractfile))

L_series=[1]
for a in range(1,int(log(5*20**6)/log(1.25))):
    if int(1*1.25**a)>L_series[0]:
        L_series.insert(0,int(1*1.25**a))
#print L_series
total_length=YRI_total_length

#print 'total CEU length: ',YRI_total_length

last_prob_0, last_prob_1, last_prob_2, last_prob_3, last_prob_4, last_prob_5 = 0,0,0,0,0,0
y0, y1, y2, y3,y4,y5 = [],[],[], [],[],[]

results=open(result_file)
lines=results.readlines()
Exemple #6
0
def tract_lengths(
        L, t_diff_vec, N_vec
):  # returns a history with len(t_diff_vec) population size changes
    uncoal = 1
    prob_list = []
    prob_list = initialize_pop(L, prob_list, uncoal, N_vec[0])
    time = 0
    for i in range(num_size_changes):
        time += t_diff_vec[i]
        uncoal = time_lapse(uncoal, N_vec[i], t_diff_vec[i])
        prob_list, uncoal = popsize_change(L, prob_list, uncoal, N_vec[i],
                                           N_vec[i + 1], time)
    return math.fsum(prob_list)


pop1_lengths, pop1_non_cumul, pop1_cumul, pop1_total_length = get_spectrum_counts(
    open(pop1_filename))

L_series = [1]
for a in range(1, int(math.log(10**7) / math.log(2))):
    if int(1 * 2**a) > L_series[0]:
        L_series.insert(0, int(1 * 2**a))

min_tract_index = int(math.log(min_tract_length) / math.log(2))
print min_tract_index, len(L_series)

L_series_index = 0
pop1_lengths_binned = [0]

for i in range(1, len(pop1_lengths) + 1):
    if pop1_lengths[-i] >= L_series[L_series_index]:
        pop1_lengths_binned[-1] += pop1_non_cumul[-i]