Beispiel #1
0
 def test_rpkm(self):
     df = get_data('sc_exp').data
     df = df.set_index('gene')
     nm = norm()
     nm.rpkm(df=df, gl='length')
     np.testing.assert_array_equal(
         round(nm.rpkm_norm.iloc[0], 2).to_numpy(),
         np.asarray([
             50.804745, 51.327542, 37.699846, 46.737552, 37.472610,
             48.090169
         ]).round(2))
Beispiel #2
0
 def test_tpm(self):
     df = get_data('sc_exp').data
     df = df.set_index('gene')
     nm = norm()
     nm.tpm(df=df, gl='length')
     np.testing.assert_array_equal(
         round(nm.tpm_norm.iloc[0], 2).to_numpy(),
         np.asarray([
             99.730156, 97.641941, 72.361658, 89.606265, 69.447237,
             90.643338
         ]).round(2))
Beispiel #3
0
 def test_cpm(self):
     df = get_data('sc_exp').data
     df = df.drop(['length'], axis=1)
     df = df.set_index('gene')
     nm = norm()
     nm.cpm(df=df)
     np.testing.assert_array_equal(
         round(nm.cpm_norm.iloc[0], 2).to_numpy(),
         np.asarray([
             100.695004, 101.731189, 74.721094, 92.633828, 74.270713,
             95.314714
         ]).round(2))
Beispiel #4
0
import dendropy
from natsort import natsorted

from io import StringIO
from skbio import read
from skbio.tree import TreeNode

from skbio.diversity import alpha_diversity
from skbio.diversity import beta_diversity
from skbio.stats.distance import mantel
from skbio.stats.ordination import pcoa
from skbio.stats.distance import anosim
from bioinfokit.analys import norm, get_data
# load sugarcane RNA-seq expression dataset (Published in Bedre et al., 2019)
df = pd.read_csv("all_OM252_counts.csv", sep="\t", header=0, index_col=0)

# make gene column as index column
#df = df.set_index('ACC')


# now, normalize raw counts using TPM method
# gene length must be in bp
nm = norm()
nm.tpm(df=df, gl='genome_length')
# get TPM normalized dataframe
tpm_df = nm.tpm_norm

tpm_df = tpm_df.T

tpm_df.to_csv("test_tpm.tsv", sep="\t")