def NCPHIPzscores_RLM(self): ##1: regressed std of negative controls print('\tLinear regression of std~median of negative controls') #select df with control samples NC_df = self.select_NC() #print NC_df #fit robust linear model NC_df, NC_fit=myRegression.linear(NC_df, ('median', 'std'), self.par['dir_stat']).RLM() divisor=NC_df['pred_std'] divisor[divisor<=0]=1 #2:regressed PHIP values against negative controls print('\tLinear regression of sample-specific medians of negative controls') sdf = self.indf[self.par['sample_names']].copy() med_df = pd.DataFrame(np.zeros(shape=sdf.shape), columns=list(sdf), index=list(sdf.index)) zscores_df = pd.DataFrame(np.zeros(shape=sdf.shape), columns=list(sdf), index=list(sdf.index)) for sample in self.par['sample_names']: sample_dir = self.par['dir_result'] + sample + '/' phip_df=pd.DataFrame({'median':NC_df['median'], 'phip':sdf[sample]}) #fit robust linear model phip_df, phip_fit=myRegression.linear(phip_df, ('median', 'phip'), sample_dir).RLM() #calculate z-score med_df[sample] = phip_df['pred_phip'] zscores_df[sample] = (sdf[sample]-med_df[sample])/divisor #3: #RC minus regress RC #residuals_df=sdf-med_df #then divied by regressed std of negative control in rows #zscores_df=residuals_df.div(divisor, axis=0) zscores_df = np.round(zscores_df, 1) #print residuals_df.ix[1:4, 8:10] myDataframe.basic(zscores_df).export_df(self.outfile, self.par['zscore_threshold'], self.index_label) return zscores_df
def sample_zscores(self): # def median_zscores(x): y = x[x>0] m = np.median(y) s = np.std(y) zscores = [(r - m)/float(s) if s > 0 else (r - m) for r in x] return zscores #calculate z-scores zscores_df = self.indf.apply(median_zscores, axis=0) zscores_df = np.round(zscores_df, 1) #export myDataframe.basic(zscores_df).export_df(self.outfile, self.par['zscore_threshold'], self.index_label) return zscores_df ########## #end
def permute_taxon_blast(self, hits_num): print('permutation of viral blast:{}\t{}'.format(self.par['type'], hits_num)) # counts_df = pd.DataFrame() outfile = '{}{}.txt'.format(myIO.dir_os(self.par['dir_out']).create_dir(), hits_num) if os.path.isfile(outfile): print('Read file: ', outfile) counts_df = pd.read_csv(outfile, header=0, index_col=0, sep="\t", low_memory=False) else: #1: permutated peptides pep_names = list(self.par['binary_aln_df'].index) pep_df = myList.basic(pep_names).permute_list(self.par['permutation_times'], hits_num) #2: permutation based on the non-overlapped hits num for col, perm_pep in pep_df.items(): perm_zb = self.par['binary_aln_df'].ix[perm_pep] p_collapse_zb, p_sim_tag = myDataframe.basic(perm_zb).unispecie(self.par['sim_threshold']) counts_df[col] = p_collapse_zb.apply(sum,axis=0) + p_sim_tag #print list(perm_tmp[col]) #export counts_df.to_csv(outfile, sep='\t', header=True, index_label=self.par['type']) #combine permuated counts #print counts_df.shape perm_mean = counts_df.apply(lambda x: np.mean(np.floor(x)), axis=1).round() #print perm_mean return perm_mean
def init_analysis(self): #1: read annotation file if 'file_annotation' in self.par.keys(): self.par['annot_df'] = myDataframe.basic().annot_df( self.par['file_annotation']) #genome annotation: associations of protein-peptides self.par['dict_pro_pep'] = myCommon.basic( self.par).protein_peptides() #virus only if 'VirScan' in self.par['file_annotation']: #extract aa stretch #get dependent petides that two peptides shared at least 7-aa. self.par['dependent_pep'] = myCommon.basic( self.par).taxon_dependent_peptides() #2: check bowtie or build bowtie index myAlign.alignment(self.par).build_bowtie_index() #3: sample info self.par = myParallel.samples(self.par).export_sample_info() #samples of negative controls group1 = self.par['group1'] if 'NC' in group1.keys(): self.par['NC_samples'] = group1['NC'].split(',') self.par['phip_samples'] = list( set(self.par['sample_names']) - set(self.par['NC_samples'])) print('\nNumber of negative Controls (Beads only): ', self.par['NC_samples'].__len__()) print('Number of PhIP samples: ', self.par['sample_names'].__len__()) #myDict.basic(self.par['sample_dirs']).print_dict() #read reference sequence file (*.fa) ref_dict, ref_ids = myGenome.genome(self.par['file_ref_fa']).read_fa() self.par['ref_dict'] = ref_dict
def NCPHIPzscores_linear(self): ##1: regressed std of negative controls print('\tLinear regression of std ~ median of negative controls') sdf = self.indf[self.par['sample_names']].copy() #select df with control samples NC_df = self.select_NC() #print NC_df #remove 0 mean and highest outlier median99 = np.percentile(NC_df['median'],99) NC_regress = NC_df.loc[(NC_df['median']>0)&(NC_df['median']<median99),:].copy() #print NC_regress #fit linear model lm = myRegression.linear(NC_regress, ('median','std'), self.par['dir_stat']).linear() #print 'Linear regression:', lm['params'] #2:regressed PHIP values against negative controls print('\tLinear regression of medians of phip sample ~ medians of negative controls') reg_df = pd.DataFrame(np.zeros(shape=sdf.shape), columns=list(sdf), index=list(sdf.index)) for sample in self.par['sample_names']: subdf = pd.DataFrame({'NC': NC_df['median'], 'phip':sdf[sample]}) median99 = np.percentile(subdf['phip'], 99) #remove top 1% phipseq values sub_regress = subdf.loc[(subdf['NC']>0)&(subdf['phip']<median99),:] #linear regression #print sample #print sub_regress sample_dir = self.par['dir_result'] + sample + '/' phip_lm = myRegression.linear(sub_regress, ('NC', 'phip'), sample_dir).linear() #predicted phipRC reg_df[sample] = phip_lm['df']['pred_phip'] #3: print('\t calculate z scores against NC:') #RC minus regress RC residuals_df = sdf - reg_df #then divied by regressed std of negative control in rows divisor=pd.Series(NC_df['pred_std']) divisor[divisor==0]=1 zscores_df=residuals_df.div(divisor, axis=0) zscores_df=np.round(zscores_df,1) #print residuals_df.ix[1:4, 8:10] #export myDataframe.basic(zscores_df).export_df(self.outfile, self.par['zscore_threshold'], self.index_label) # return zscores_df
def NC_zscores(self): #select df with control samples df0 = self.select_NC() #remove 0 mean and highest outlier median_99 = np.percentile(NC_median,99) #print mean_99 df1 = df0.loc[(df0['median']>0) & (df0['median']<median_99),:].copy() #print df1 #fit linear model lm = myRegression.linear(df1, ('median','std'), self.par['dir_stat']).linear() #print 'Linear regression:', lm['params'] print('calculate z scores:') self.indf.insert(0, 'predicted_std', lm['df']['pred_std']) self.indf.insert(0, 'median', lm['df']['median']) #loops of data frame zscores_df = self.indf.apply(lambda x: sigAnalysis(x[2:]).Z_test(x[0],x[1])['zscores'], axis=1) zscores_df.columns = list(self.indf)[2:] #export myDataframe.basic(zscores_df).export_df(self.outfile, self.par['zscore_threshold'], self.index_label) return zscores_df
def NCPHIPzscores_PN(self): ##1: regressed std~mean of negative controls #regression of logstd~logmedian across 261 Beads-only file #self.par['file_NC'], self.par['scaling_factor'] wNC, wNC_fit = self.NC_whole_std() #RLM of std~median of beads only of this dataset NC_df = pd.DataFrame({'wNC_median':wNC['median'],'wNC_std':wNC['std'],\ 'mean':self.indf[self.par['NC_samples']].mean(axis=1), \ 'median':self.indf[self.par['NC_samples']].median(axis=1), \ 'std':self.indf[self.par['NC_samples']].std(axis=1)}) pNC, pNC_fit = myRegression.linear(NC_df, ('median','std'), self.par['dir_QC']).RLM() #2:regressed PHIP values against negative controls print('\tLinear regression of sample-specific medians of negative controls') #reg_x = NC['median'].drop_duplicates() zscores_df = self.indf.copy() zscores_df[:]=0.0 for sample in self.par['sample_names']: zdf=NC_df.copy() zdf['phip'] = self.indf[sample] #fit robust linear model sample_dir = '{}{}/'.format(self.par['dir_result'], sample) mNC, mfit = myRegression.linear(zdf, ('mean','phip'), sample_dir).RLM() #zscores zdf['pred_phip']=mNC['pred_phip'] pred_phip=zdf['pred_phip'] pred_phip[pred_phip<=0] = np.nan zdf['pred_logphip'] = np.log10(pred_phip)# work as x value of NCstd~median pred_std = 10**wNC_fit.predict({'logmedian':zdf['pred_logphip']}) zdf['pred_std']=pred_std zscores_df[sample] = (zdf['phip'] - zdf['pred_phip'])/pred_std zdf['zscores'] = zscores_df[sample] #export zscore zdf.to_csv(sample_dir+'polynomial_median.csv', header=True, index_label=self.index_label) #3:export z scores into self.outfiles zscores_df.replace([np.nan, np.inf, -np.inf], 0, inplace=True) zscores_df = np.round(zscores_df, 1) myDataframe.basic(zscores_df).export_df(self.outfile, self.par['zscore_threshold'], self.index_label) return zscores_df
def taxon_blast2(self, file_aln, zscore_file): taxon_type = myIO.file_os(file_aln).name_prefix() print("\n{}:{}\n".format(taxon_type, zscore_file)) #read zscore_df zdf = myDataframe.basic().standard_df(zscore_file) #match order of align score and zscore,replace na #read alignment file for specie alignment binary_b = myDataframe.basic().aln_df(file_aln, self.par['align_score']) #binary_b = myDataframe.basic(binary_b).filter_aln() binary_b = binary_b.reindex(zdf.index).fillna(0) #print binary_b #sample names in columns, and specie in rows sum_df = pd.DataFrame(0, index=list(binary_b), columns=list(zdf)) pep_df = pd.DataFrame(np.nan, index=list(binary_b), columns=list(zdf)) p_df = pd.DataFrame(index=list(binary_b), columns=list(zdf)) #perm_df=pep_df.copy() #print binary_z.apply(sum, axis=0) #n=0 for sample_name, column in zdf.iteritems(): #n += 1 #1: select peptides #column=zscore_df.ix[:,20] #first remove all nont-hits hits = column[ column >= self.par['specieZ_threshold']].copy() #all hits hits.sort_values(axis=0, ascending=False, inplace=True) #print hits #remove overlapped hits nonoverlap_hits = myList.basic(hits).gen_ind_hits( self.par['dependent_pep']) input_num = len(nonoverlap_hits) print("{}:\thits={}, nonoverlapped={}".format( sample_name, len(hits), input_num)) #2: remove overlap hits between species if input_num > 0: zb_df = binary_b.loc[nonoverlap_hits.index] #print list(binary_b.apply(lambda x: sum(x), axis=0)) #loop collapse_zb, sim_tag, p_series = myDataframe.basic( zb_df).binom_unispecie(self.par['dir_ref_seq'], input_num, self.par['p_threshold'], self.par['x_threshold']) #counts of hits sum_df[sample_name] = collapse_zb.apply(sum, axis=0) + sim_tag #all peptide_id list pep_df[sample_name] = collapse_zb.apply( lambda x: myList.basic(x).names_string(0.001), axis=0) p_df[sample_name] = p_series #padjust_df[sample_name]=p_adjust_series #if n==5: break #n+=1 #export to file file_head = myIO.file_os( zscore_file).file_prefix() + '_' + taxon_type + '_' #file_head='random_min_HI_HC_'+taxon_type+'_' sum_df.to_csv(file_head + 'counting.txt', sep='\t', header=True, index_label='Specie') pep_df.to_csv(file_head + 'peptides.txt', sep='\t', header=True, index_label='Specie') p_df.to_csv(file_head + 'p-values.txt', sep='\t', header=True, index_label='Specie') #Adjusted p-values using B-H ''' stats = importr('stats') for i in p_df: pvalue_list = p_df[i].values p_adjust = list(stats.p_adjust(FloatVector(pvalue_list), method = 'BH')) padjust_df[i] = p_adjust padjust_df.to_csv(file_head+'p-adjusted.txt', sep='\t', header=True, index_label='Specie') ''' padjust_df = pd.DataFrame(index=list(binary_b), columns=list(zdf)) for i in p_df.columns: pvals = np.array(p_df[i].values) if not np.isnan(pvals).all(): mask = [j for j in np.where(np.isfinite(pvals))[0]] pval_corrected = np.empty(pvals.shape) pval_corrected.fill(np.nan) pval_corrected[mask] = multipletests(pvals[mask], method='fdr_bh')[1] padjust_df[i] = pval_corrected padjust_df.to_csv(file_head + 'padjusted.txt', sep='\t', header=True, index_label='Specie') #perm_df.to_csv(file_head+'permutation.txt', sep='\t', header=True, index_label='Specie') #end
def taxon_blast(self, file_aln, zscore_file): print( '###Signficant taxon by removing overlapped hits based on blast alignment.' ) taxon_type = myIO.file_os(file_aln).name_prefix() print('{}: {}'.format(taxon_type, zscore_file)) #read zscore_df zdf = myDataframe.basic().standard_df(zscore_file) #match order of align score and zscore,replace na #read alignment file for specie alignment binary_b = myDataframe.basic().aln_df(file_aln, self.par['align_score']) binary_b = binary_b.reindex(zdf.index).fillna(0) #print binary_b #sample names in columns, and specie in rows sum_df = pd.DataFrame(0, index=list(binary_b), columns=list(zdf)) pep_df = pd.DataFrame(np.nan, index=list(binary_b), columns=list(zdf)) #perm_df = pep_df.copy() #print binary_z.apply(sum, axis = 0) #n = 1 for sample_name, column in zdf.items(): #1: select peptides #column = zscore_df.ix[:,20] #first remove all nont-hits hits = column[ column >= self.par['specieZ_threshold']].copy() #all hits hits.sort_values(axis=0, ascending=False, inplace=True) #print hits #remove overlapped hits nonoverlap_hits, overlap_debug = myList.basic(hits).remove_overlap( self.par['dependent_pep']) input_num = len(nonoverlap_hits) print('{}: hits={}, nonoverlapped={}'.format( sample_name, len(hits), input_num)) #2: remove overlap hits between species if input_num > 0: ###2-1: export peptides try: outfile = '{}{}/{}.csv'.format(self.par['dir_result'], sample_name, taxon_type) overlap_debug.to_csv(outfile, header=True, index_label='peptides') except FileNotFoundError: myIO.file_os(self.par['file_err'], "\t").line_replace( {'taxon_blast': sample_name}) ###2-2: specie-specific hits based on non-overlapped hits #sample zscore-alignscore matrix times by zscore #print(nonoverlap_hits.index) zb_df = binary_b.ix[nonoverlap_hits.index] #print(list(binary_b.apply(lambda x: sum(x), axis = 0))) #loop collapse_zb, sim_tag = myDataframe.basic(zb_df).unispecie( self.par['sim_threshold']) #counts of hits sum_df[sample_name] = collapse_zb.apply(sum, axis=0) + sim_tag #print(list(sum_df[sample_name])) #high_sum = sum_df[sample_name] #print(high_sum[high_sum>0]) #all peptide_id list pep_df[sample_name] = collapse_zb.apply( lambda x: myList.basic(x).names_string(0.001), axis=0) #2-3:permutation #perm_df[sample_name] = self.specie_alignment_permutation(input_num) #if n == 10: break #n+ = 1 #export to file file_head = '{}_{}_'.format( myIO.file_os(zscore_file).file_prefix(), taxon_type) sum_df.to_csv(file_head + 'counting.txt', sep='\t', header=True, index_label='Specie') pep_df.to_csv(file_head + 'peptides.txt', sep='\t', header=True, index_label='Specie')
def taxon_spec(self, count_file, taxon_rank, annot_index): #combine two data frame combined_df, phip_df = myCommon.basic(self.par).combine_df( count_file, annot_index) #print(combined_df) #print(list(combined_df.index)) #taxonomy names: taxon_group = combined_df.groupby(taxon_rank).groups taxon_names = taxon_group.keys() taxon_names = [t for t in taxon_names if str(t) != 'nan'] #remove nan #print(taxon_names) taxon_pairs = {'taxon_specie':'InterSpecie', 'taxon_genus':'InterGenus', \ 'taxon_family':'InterFamily', 'taxon_phip':'InterTaxon'} taxon_inter = taxon_pairs[taxon_rank] #inter-score dict #taxon_inter should be pep_ids separated by comma pepid_taxoninter = pd.Series(combined_df[taxon_inter], index=list(phip_df.index)) inter_df = myDataframe.basic(phip_df).interact_df( pepid_taxoninter, max, count_file + taxon_inter) #make permutation of pep_ids #permute_dict = myList.basic(list(phip_df.index)).permute_Series(self.par['permutation_times'], slice_dict = taxon_group) #the hits of significant specie specific #rows are peptides, and columns are phip samples plus species names #z-scores matrix of specific peptides #initiate nested dict taxon_dict = dict([(s, {}) for s in list(phip_df)]) # number of hits taxon_dict['peptides'] = dict([(a, len(b)) for a, b in taxon_group.items()]) #taxon_pval_dict = dict([(s,{}) for s in list(phip_df)]) #pvalues of the hits by permutations taxon_pep_dict = dict([(s, {}) for s in list(phip_df) ]) #pepid and zscores of hits debugging_dict = {} #for identify bugs for s in list(phip_df): debugging_dict[s + ':all_hits'] = {} debugging_dict[s + ':inter_hits'] = {} debugging_dict[s + ':intra_hits'] = {} debugging_dict[s + ':hits'] = {} debugging_dict[s + ':counts'] = {} #debugging_dict[s+':pvals'] = {} #loop by sample_names for sample_name, col in phip_df.items(): #print(sample_name) for s, indexs in taxon_group.items(): #1: inter-taxon searching inter_list = inter_df.ix[indexs][sample_name] inter_dict = self.taxon_inter_searching( col[indexs], inter_list) #export debugging_dict[sample_name + ':all_hits'][s] = inter_dict['all_hits'] debugging_dict[sample_name + ':inter_hits'][s] = inter_dict['inter_hits'] #print(inter_dict) #2: intra-taxon searching intra_dict = self.taxon_intra_searching( col[inter_dict['other_hits']]) #export debugging_dict[sample_name + ':intra_hits'][s] = intra_dict['intra_hits'] debugging_dict[sample_name + ':hits'][s] = intra_dict['hits'] all_hits = [ '{}:{}'.format('all', len(inter_dict['all_hits'])), '{}:{}'.format('inter', len(inter_dict['inter_hits'])), '{}:{}'.format('intra', len(intra_dict['intra_hits'])), '{}:{}'.format('hits', len(intra_dict['hits'])) ] debugging_dict[sample_name + ':counts'][s] = ','.join(all_hits) hit_list = [ '({},{})'.format(a, b) for a, b in col[intra_dict['hits']].items() ] taxon_pep_dict[sample_name][s] = ','.join(hit_list) #counts matrix of taxonomy search taxon_dict[sample_name][s] = len(intra_dict['hits']) #3: permutation #hit_scores = col[intra_dict['hits']] #permuted_scores = permute_dict[s]#df, pepids in rows, permuted scores in columns #pval_dict = self.taxon_permutation(hit_scores, permuted_scores, col) #export #pval_list = [len(intra_dict['hits']), pval_dict['ttest_pval'], pval_dict['utest_pval']] #taxon_pval_dict[sample_name][s] = ','.join(map(str, pval_list)) #pval_list = [ a+':'+str(b) for a,b in pval_dict.items()] #debugging_dict[sample_name+':pvals'][s] = ','.join(pval_list) #export to file file_head = '{}_{}_'.format( myIO.file_os(count_file).file_prefix(), taxon_rank) taxon_dict = myDict.basic(taxon_dict).transform_dict2() myDict.basic(taxon_dict).dict2_to_file(file_head + 'counting.txt', "\t") taxon_pep_dict = myDict.basic(taxon_pep_dict).transform_dict2() myDict.basic(taxon_pep_dict).dict2_to_file(file_head + 'peptides.txt', "\t") debugging_dict = myDict.basic(debugging_dict).transform_dict2() myDict.basic(debugging_dict).dict2_to_file(file_head + 'debugging.txt', "\t")
def enrich_pro(self, infile, annot_A, annot_B, sep1, sep2): if annot_A is None: annot_A = 'transcript_id' if annot_B is None: annot_B = 'pro_motifs' print("Enrichment analysis of {} => {} : {}".format( annot_A, annot_B, infile)) #read data frame file_sep = ',' if infile.endswith('.csv') else '\t' counts_df = pd.read_csv(infile, index_col=0, sep=file_sep, low_memory=False) #get all ids connect counts_df with annot_df A_ids = list(self.par['annot_df'][annot_A]) #get all ids based on annot_type in list formate B_ids = myDataframe.basic(self.par['annot_df']).df_list( annot_B, sep1, sep2) #get A_ids vs list of b_ids in dict formate AB_dict = myDataframe.basic(self.par['annot_df']).list_dict( annot_A, annot_B, sep1, sep2) #initiate: #frequency of observed enriched motifs hits_observed = myDict.basic().init_dict2(B_ids, list(counts_df), 0) #initiate: zscores of obs based on permutation models hits_zscores = myDict.basic().init_dict2(B_ids, list(counts_df), 0) #initiate: detect bugs debugging = myDict.basic().init_dict2( B_ids + ['hits_counts', 'interact_counts'], {}, 'NA') #loop of data frame by columns for sample_name, zscores in counts_df.items(): #print sample_name zscores = pd.Series(zscores) zscores.index = list(counts_df.index) #1: get ids of significant hits sig_zscores = zscores[zscores >= self.par['zscore_threshold']] obs_ids = list(sig_zscores.index) sig_num = len(obs_ids) #print annot_B, sample_name,sig_num #2: count frequency of enriched annotations, namely motifs obs_freq, obs_details = myDict.basic(AB_dict).elements_frequency( obs_ids) #print obs_freq.values() #debugging debugging['hits_counts'][sample_name] = sig_num debugging['interact_counts'][sample_name] = sum(obs_freq.values()) #3: permute samples #print "\tenrichment: %s\t%s\t%s" % (sample_name, sig_num, len(obs_freq.keys())) perm_dict = {} for i in range(self.par['permutation_times']): perm_peps = random.sample(A_ids, sig_num) tmp_perm, tmp_details = myDict.basic( AB_dict).elements_frequency(perm_peps) # frequency dict for key, value in tmp_perm.items(): if key in perm_dict: perm_dict[key].append(value) else: perm_dict[key] = [value] #print perm_dict #4: calcuate z-scores of observed counts for enriched_id, obs_num in obs_freq.items(): #update hit_observed hits_observed[enriched_id][ sample_name] = obs_num #frequency of observed enriched annot #update debugging debugging[enriched_id][sample_name] = '{}:{}'.format( obs_num, obs_details[enriched_id]) #update zscores_dict if enriched_id in perm_dict: perm_pools = perm_dict[enriched_id] #append zero and all pools are the same length perm_pools = perm_pools + [0] * (5 - len(perm_pools)) perm_mean = np.mean(perm_pools) perm_sd = np.std(perm_pools) #zscores of observed hits against the null model zscore = (obs_num - perm_mean) / perm_sd if perm_sd > 0 else ( obs_num - perm_mean) hits_zscores[enriched_id][sample_name] = round(zscore, 2) else: hits_zscores[enriched_id][sample_name] = obs_num #print hits_zscores #export file_head = '{}{}_{}_'.format(self.par['dir_enrichment'], myIO.file_os(infile).name_prefix(), annot_B) myDict.basic(hits_observed).dict2_to_file(out_file=file_head + 'counting.txt', index_label=annot_B) myDict.basic(hits_zscores).dict2_to_file(out_file=file_head + 'zscores.txt', index_label=annot_B) myDict.basic(debugging).dict2_to_file(out_file=file_head + 'debugging.txt', index_label=annot_B, NA='NA')
'sim_threshold': 0.8, 'dir_bin': dir_bin + '/', 'dir_home': dir_home + '/', 'permutation_times': 100 } par['dir_permutation'] = myIO.dir_os(par['dir_home'] + 'permutation/').create_dir() print('###permutation procedure\n\n') pool = mpd.Pool(processes=par['threads_num']) #permuation of organism alignment if par['organism_permutation'] == 'yes': #read aln file file_aln = par['dir_home'] + 'ref_seq/organism_blast.txt' par['binary_aln_df'] = myDataframe.basic().aln_df( file_aln, par['align_score']) par['type'] = myIO.file_os(file_aln).name_prefix() par['dir_out'] = myIO.dir_os(par['dir_home'] + 'permutation/' + par['type']).create_dir() # for hits_num in range(par['start'], par['end']): pool.apply_async(myCommon.basic(par).permute_taxon_blast, args=(hits_num, )) time.sleep(1) #permuation of specie alignment if par['specie_permutation'] == 'yes': #read aln file file_aln = par['dir_home'] + 'ref_seq/specie_blast.txt' par['binary_aln_df'] = myDataframe.basic().aln_df( file_aln, par['align_score'])