コード例 #1
0
#############
#
#
cix_prot = {}
cix_ribo = {}
#
#
#############
for idx, ribo_count in ribo_cai_info.itertuples(index=False):
    #
    cds_dat = orgs.get_group(idx)
    prot_cds_rnd = cds_dat["cDNA"].sample(PROT_COUNT)  # cDNA sample proteins ...
    codon_usage_rnd = cairi.count_codons(prot_cds_rnd)
    codon_index_rnd = cairi.generate_codon_index(
        codon_usage_rnd, genetic_table=list(cds_dat["table"])[0]
    )  # fix that ...
    cix_prot[idx] = codon_index_rnd
    #
    if ribo_count >= RIBO_LIMIT:
        pass
        ribo_cds = cds_dat[cds_dat["ribosomal"]]["cDNA"]  # cDNA of ribosomal proteins ...
        codon_usage = cairi.count_codons(ribo_cds)
        codon_index = cairi.generate_codon_index(codon_usage, genetic_table=list(cds_dat["table"])[0])  # fix that ...
        cix_ribo[idx] = codon_index

######################


###########################################################################################################
#  some testing
コード例 #2
0
# some lists to describe organism's CAI distribution features ...
percentile = []
median = []
mean = []
sigma = []
idx_for_ribo = []
ribo_count_for_df = [] 
#
pid_cai_list = []
for idx,ribo_count in ribo_cai_info.itertuples(index=False):
    if ribo_count >= RIBO_LIMIT:
        cds_dat = orgs.get_group(idx)
        ribo_cds = cds_dat[cds_dat['ribosomal']]['cDNA'] # cDNA of ribosomal proteins ...
        codon_usage = cairi.count_codons(ribo_cds)
        codon_index = cairi.generate_codon_index(codon_usage,genetic_table=list(cds_dat['table'])[0]) # fix that ...
        # we need to track index from 'dat', as there are some stupid duplications ...
        pid_cai = pd.DataFrame(((dat_idx,pid,cairi.cai_for_gene(sequence,codon_index)) for dat_idx,pid,sequence in cds_dat[['pid','cDNA']].itertuples()),columns=['dat_idx','pid','CAI'])
        pid_cai = pid_cai.set_index(keys='dat_idx')
        # characterize CAI distribution for a given organism ...
        local_mean = pid_cai['CAI'].mean()
        local_median = pid_cai['CAI'].median()
        local_sigma = pid_cai['CAI'].std()
        mean.append(local_mean)
        median.append(local_median)
        sigma.append(local_sigma)
        idx_for_ribo.append(idx)
        ribo_count_for_df.append(ribo_count)
        #
        local_ribo_indexes = cds_dat['ribosomal'].nonzero()[0]
        local_ribo = pid_cai.iloc[local_ribo_indexes].reset_index(drop=True)