muted_dir = main_dir + 'mutation_counter/data/mutation_count/'
sims_dir = main_dir + 'mutation_counter/data/sims_dem/'
diffs = False

mutlog = 'toMut.log'
min_size = 70
sampling = [5, 100, 10]
bases = 'ATCG'
ksize = 3
sample_sim = 0

data, data_freqs = MC_sample_matrix_v1(min_size=min_size,
                                       samp=sampling,
                                       count_dir=count_dir,
                                       dir_launch=dir_launch,
                                       main_dir=main_dir,
                                       sim_dir=sims_dir,
                                       muted_dir=muted_dir,
                                       diffs=diffs,
                                       sample_sim=sample_sim,
                                       exclude=False)


def md_reference_comp(data,
                      p_value=1e-5,
                      test_m='fisher',
                      individually=False,
                      Nbins=10,
                      exclude=False,
                      frequency_range=[0, 1],
                      data_freqs={},
                      extract='pval',
mutlog = 'toMut.log'
min_size = 40
sampling = [5, 100, 5]
bases = 'ATCG'
ksize = 3
sample_sim = 0
freq_extract = True
stepup = ''

data, data_freqs = MC_sample_matrix_v1(min_size=min_size,
                                       samp=sampling,
                                       stepup=stepup,
                                       count_dir=count_dir,
                                       dir_launch=dir_launch,
                                       main_dir=main_dir,
                                       sim_dir=sims_dir,
                                       indfile=indfile,
                                       muted_dir=muted_dir,
                                       diffs=diffs,
                                       sample_sim=sample_sim,
                                       freq_extract=freq_extract,
                                       exclude=False)

###


def run_stats(ref_sim, ref_pair, data, data_freqs={}):
    '''
    co-factor function to md counter comparisons, deploy heatmap and calculate kmer proportion differences 
    between pairs of population.
    - ref pair: list of tuples. can't be dictionary because of repeated pops / reference tags. 
예제 #3
0
os.makedirs(fig_dir, exist_ok=True)
fig_dir= fig_dir + '/'


mutlog= 'toMut.log'
min_size= 70
sampling= [5,100,10]
bases= 'ACGT'
ksize= 3
collapsed= False
row= 48
col= 4
sample_sim= 150

data, data_freqs = MC_sample_matrix_v1(min_size= min_size, samp= sampling, count_dir= count_dir, 
                        dir_launch= dir_launch,main_dir= main_dir,sim_dir= sims_dir,
                          muted_dir= muted_dir, diffs= diffs, row= row,bases= bases,
                       exclude= False,sample_sim= sample_sim,collapsed= collapsed)



print("data extracted: data= {} elements".format(len(data)))

from tools.fasta_utilities import (
    get_mutations, get_by_path, kmer_comp_index, kmer_mut_index,
    fasta_get_freq,kmer_dict_init
    )


def fasta_get_freq(seq,start= 0,end= 0,step= 1,ksize=3,bases= 'ATCG'):
    '''return count of kmer across fasta region'''
    kmer_dict= kmer_dict_init(ksize= ksize,bases=bases)
## directories
main_dir= os.getcwd() + '/'
count_dir= main_dir + 'mutation_counter/count/'
dir_launch= main_dir + 'mutation_counter'
muted_dir= main_dir + 'mutation_counter/data/mutation_count/'
sims_dir= main_dir + 'data/'
indfile= 'integrated_call_samples.20101123.ALL.panel_regions.txt'
diffs= True

mutlog= 'toMut.log'
min_size= 40
sampling= [5,100,5]

data = MC_sample_matrix_v1(min_size= min_size, samp= sampling, count_dir= count_dir, 
                        dir_launch= dir_launch,main_dir= main_dir,sim_dir= sims_dir,
                          muted_dir= muted_dir, diffs= diffs, indfile= indfile,
                       exclude= False)


print(len(data))

### new - pair reference sims and subsetted populations.
### extract kmer comparisons (proportions or pvals) using heatmap_v2.
### make function. 

from tools.mcounter_tools import mcounter_deploy

p_value= 1e-5
test_m= 'fisher'
individually= False
exclude= False