muted_dir = main_dir + 'mutation_counter/data/mutation_count/' sims_dir = main_dir + 'mutation_counter/data/sims_dem/' diffs = False mutlog = 'toMut.log' min_size = 70 sampling = [5, 100, 10] bases = 'ATCG' ksize = 3 sample_sim = 0 data, data_freqs = MC_sample_matrix_v1(min_size=min_size, samp=sampling, count_dir=count_dir, dir_launch=dir_launch, main_dir=main_dir, sim_dir=sims_dir, muted_dir=muted_dir, diffs=diffs, sample_sim=sample_sim, exclude=False) def md_reference_comp(data, p_value=1e-5, test_m='fisher', individually=False, Nbins=10, exclude=False, frequency_range=[0, 1], data_freqs={}, extract='pval',
mutlog = 'toMut.log' min_size = 40 sampling = [5, 100, 5] bases = 'ATCG' ksize = 3 sample_sim = 0 freq_extract = True stepup = '' data, data_freqs = MC_sample_matrix_v1(min_size=min_size, samp=sampling, stepup=stepup, count_dir=count_dir, dir_launch=dir_launch, main_dir=main_dir, sim_dir=sims_dir, indfile=indfile, muted_dir=muted_dir, diffs=diffs, sample_sim=sample_sim, freq_extract=freq_extract, exclude=False) ### def run_stats(ref_sim, ref_pair, data, data_freqs={}): ''' co-factor function to md counter comparisons, deploy heatmap and calculate kmer proportion differences between pairs of population. - ref pair: list of tuples. can't be dictionary because of repeated pops / reference tags.
os.makedirs(fig_dir, exist_ok=True) fig_dir= fig_dir + '/' mutlog= 'toMut.log' min_size= 70 sampling= [5,100,10] bases= 'ACGT' ksize= 3 collapsed= False row= 48 col= 4 sample_sim= 150 data, data_freqs = MC_sample_matrix_v1(min_size= min_size, samp= sampling, count_dir= count_dir, dir_launch= dir_launch,main_dir= main_dir,sim_dir= sims_dir, muted_dir= muted_dir, diffs= diffs, row= row,bases= bases, exclude= False,sample_sim= sample_sim,collapsed= collapsed) print("data extracted: data= {} elements".format(len(data))) from tools.fasta_utilities import ( get_mutations, get_by_path, kmer_comp_index, kmer_mut_index, fasta_get_freq,kmer_dict_init ) def fasta_get_freq(seq,start= 0,end= 0,step= 1,ksize=3,bases= 'ATCG'): '''return count of kmer across fasta region''' kmer_dict= kmer_dict_init(ksize= ksize,bases=bases)
## directories main_dir= os.getcwd() + '/' count_dir= main_dir + 'mutation_counter/count/' dir_launch= main_dir + 'mutation_counter' muted_dir= main_dir + 'mutation_counter/data/mutation_count/' sims_dir= main_dir + 'data/' indfile= 'integrated_call_samples.20101123.ALL.panel_regions.txt' diffs= True mutlog= 'toMut.log' min_size= 40 sampling= [5,100,5] data = MC_sample_matrix_v1(min_size= min_size, samp= sampling, count_dir= count_dir, dir_launch= dir_launch,main_dir= main_dir,sim_dir= sims_dir, muted_dir= muted_dir, diffs= diffs, indfile= indfile, exclude= False) print(len(data)) ### new - pair reference sims and subsetted populations. ### extract kmer comparisons (proportions or pvals) using heatmap_v2. ### make function. from tools.mcounter_tools import mcounter_deploy p_value= 1e-5 test_m= 'fisher' individually= False exclude= False