percentile = [] median = [] mean = [] sigma = [] idx_for_ribo = [] ribo_count_for_df = [] # pid_cai_list = [] for idx,ribo_count in ribo_cai_info.itertuples(index=False): if ribo_count >= RIBO_LIMIT: cds_dat = orgs.get_group(idx) ribo_cds = cds_dat[cds_dat['ribosomal']]['cDNA'] # cDNA of ribosomal proteins ... codon_usage = cairi.count_codons(ribo_cds) codon_index = cairi.generate_codon_index(codon_usage,genetic_table=list(cds_dat['table'])[0]) # fix that ... # we need to track index from 'dat', as there are some stupid duplications ... pid_cai = pd.DataFrame(((dat_idx,pid,cairi.cai_for_gene(sequence,codon_index)) for dat_idx,pid,sequence in cds_dat[['pid','cDNA']].itertuples()),columns=['dat_idx','pid','CAI']) pid_cai = pid_cai.set_index(keys='dat_idx') # characterize CAI distribution for a given organism ... local_mean = pid_cai['CAI'].mean() local_median = pid_cai['CAI'].median() local_sigma = pid_cai['CAI'].std() mean.append(local_mean) median.append(local_median) sigma.append(local_sigma) idx_for_ribo.append(idx) ribo_count_for_df.append(ribo_count) # local_ribo_indexes = cds_dat['ribosomal'].nonzero()[0] local_ribo = pid_cai.iloc[local_ribo_indexes].reset_index(drop=True) # let's also check our t.o. score qH_all = pid_cai['CAI'].quantile(q=0.75)
median = [] mean = [] sigma = [] idx_for_prot = [] # pid_cai_list = [] for idx in genom_id: cds_dat = orgs.get_group(idx) # instead of taking specific group of proteins, let's take RANDOM sample of 50 cDNA ... prot_cds = cds_dat['cDNA'].sample(PROT_COUNT) # cDNA sample proteins ... codon_usage = cairi.count_codons(prot_cds) codon_index = cairi.generate_codon_index( codon_usage, genetic_table=list(cds_dat['table'])[0]) # fix that ... # we need to track index from 'dat', as there are some stupid duplications ... pid_cai = pd.DataFrame( ((dat_idx, pid, cairi.cai_for_gene(sequence, codon_index)) for dat_idx, pid, sequence in cds_dat[['pid', 'cDNA']].itertuples()), columns=['dat_idx', 'pid', 'CAI']) pid_cai = pid_cai.set_index(keys='dat_idx') # characterize CAI distribution for a given organism ... local_mean = pid_cai['CAI'].mean() local_median = pid_cai['CAI'].median() local_sigma = pid_cai['CAI'].std() mean.append(local_mean) median.append(local_median) sigma.append(local_sigma) idx_for_prot.append(idx) # get relative indexes of sampled cDNAs in 'cds_dat' ... local_prot_indexes = prot_cds.index - cds_dat.index[0] local_prot = pid_cai.iloc[local_prot_indexes].reset_index(drop=True) # let's also check our t.o. score