############# # # cix_prot = {} cix_ribo = {} # # ############# for idx, ribo_count in ribo_cai_info.itertuples(index=False): # cds_dat = orgs.get_group(idx) prot_cds_rnd = cds_dat["cDNA"].sample(PROT_COUNT) # cDNA sample proteins ... codon_usage_rnd = cairi.count_codons(prot_cds_rnd) codon_index_rnd = cairi.generate_codon_index( codon_usage_rnd, genetic_table=list(cds_dat["table"])[0] ) # fix that ... cix_prot[idx] = codon_index_rnd # if ribo_count >= RIBO_LIMIT: pass ribo_cds = cds_dat[cds_dat["ribosomal"]]["cDNA"] # cDNA of ribosomal proteins ... codon_usage = cairi.count_codons(ribo_cds) codon_index = cairi.generate_codon_index(codon_usage, genetic_table=list(cds_dat["table"])[0]) # fix that ... cix_ribo[idx] = codon_index ###################### ########################################################################################################### # some testing
# some lists to describe organism's CAI distribution features ... percentile = [] median = [] mean = [] sigma = [] idx_for_ribo = [] ribo_count_for_df = [] # pid_cai_list = [] for idx,ribo_count in ribo_cai_info.itertuples(index=False): if ribo_count >= RIBO_LIMIT: cds_dat = orgs.get_group(idx) ribo_cds = cds_dat[cds_dat['ribosomal']]['cDNA'] # cDNA of ribosomal proteins ... codon_usage = cairi.count_codons(ribo_cds) codon_index = cairi.generate_codon_index(codon_usage,genetic_table=list(cds_dat['table'])[0]) # fix that ... # we need to track index from 'dat', as there are some stupid duplications ... pid_cai = pd.DataFrame(((dat_idx,pid,cairi.cai_for_gene(sequence,codon_index)) for dat_idx,pid,sequence in cds_dat[['pid','cDNA']].itertuples()),columns=['dat_idx','pid','CAI']) pid_cai = pid_cai.set_index(keys='dat_idx') # characterize CAI distribution for a given organism ... local_mean = pid_cai['CAI'].mean() local_median = pid_cai['CAI'].median() local_sigma = pid_cai['CAI'].std() mean.append(local_mean) median.append(local_median) sigma.append(local_sigma) idx_for_ribo.append(idx) ribo_count_for_df.append(ribo_count) # local_ribo_indexes = cds_dat['ribosomal'].nonzero()[0] local_ribo = pid_cai.iloc[local_ribo_indexes].reset_index(drop=True)