def test_rpkm(self): df = get_data('sc_exp').data df = df.set_index('gene') nm = norm() nm.rpkm(df=df, gl='length') np.testing.assert_array_equal( round(nm.rpkm_norm.iloc[0], 2).to_numpy(), np.asarray([ 50.804745, 51.327542, 37.699846, 46.737552, 37.472610, 48.090169 ]).round(2))
def test_tpm(self): df = get_data('sc_exp').data df = df.set_index('gene') nm = norm() nm.tpm(df=df, gl='length') np.testing.assert_array_equal( round(nm.tpm_norm.iloc[0], 2).to_numpy(), np.asarray([ 99.730156, 97.641941, 72.361658, 89.606265, 69.447237, 90.643338 ]).round(2))
def test_cpm(self): df = get_data('sc_exp').data df = df.drop(['length'], axis=1) df = df.set_index('gene') nm = norm() nm.cpm(df=df) np.testing.assert_array_equal( round(nm.cpm_norm.iloc[0], 2).to_numpy(), np.asarray([ 100.695004, 101.731189, 74.721094, 92.633828, 74.270713, 95.314714 ]).round(2))
import dendropy from natsort import natsorted from io import StringIO from skbio import read from skbio.tree import TreeNode from skbio.diversity import alpha_diversity from skbio.diversity import beta_diversity from skbio.stats.distance import mantel from skbio.stats.ordination import pcoa from skbio.stats.distance import anosim from bioinfokit.analys import norm, get_data # load sugarcane RNA-seq expression dataset (Published in Bedre et al., 2019) df = pd.read_csv("all_OM252_counts.csv", sep="\t", header=0, index_col=0) # make gene column as index column #df = df.set_index('ACC') # now, normalize raw counts using TPM method # gene length must be in bp nm = norm() nm.tpm(df=df, gl='genome_length') # get TPM normalized dataframe tpm_df = nm.tpm_norm tpm_df = tpm_df.T tpm_df.to_csv("test_tpm.tsv", sep="\t")