예제 #1
0
def test_weird_counts():
    samples = {
        "counts_A": "counts_A",
        "counts_B": "counts_B",
        "counts_C": "counts_C",
        "counts_D": "counts_D",
        "counts_E": "counts_E"
    }
    counts = pandas.read_table("./data/weird_counts.txt", sep="\t")
    counts = counts.set_index("gene_id")
    exp_obj = experiment.Experiment(counts, samples)
    # DESeq normalization
    deseq_norm = normalizers.norm_deseq(exp_obj)
    # TMM normalization
    tmm_norm = normalizers.norm_tmm(exp_obj)
    # Compare results for one gene
    g = "ENSMUSG00000020140"
    print "DESeq results for %s" % (g)
    print deseq_norm.ix[g]
    print "=" * 10
    print "TMM resutls for %s" % (g)
    print tmm_norm.ix[g]
    # fold difference in counts for sample C
    print "Fold difference in sample C (TMM / DESeq): %.4f" \
          %(tmm_norm.ix[g]["counts_C"] / deseq_norm.ix[g]["counts_C"])
    # fold relative difference in counts, comparing A to C
    tmm_A_vs_C = tmm_norm.ix[g]["counts_A"] / tmm_norm.ix[g]["counts_C"]
    deseq_A_vs_C = deseq_norm.ix[g]["counts_A"] / deseq_norm.ix[g]["counts_C"]
    print "TMM fold change A vs. C: %.2f" % (tmm_A_vs_C)
    print "DESeq fold change A vs. C: %.2f" % (deseq_A_vs_C)
예제 #2
0
def test_weird_counts():
    samples = {"counts_A": "counts_A",
               "counts_B": "counts_B",
               "counts_C": "counts_C",
               "counts_D": "counts_D",
               "counts_E": "counts_E"}
    counts = pandas.read_table("./data/weird_counts.txt", sep="\t")
    counts = counts.set_index("gene_id")
    exp_obj = experiment.Experiment(counts, samples)
    # DESeq normalization
    deseq_norm = normalizers.norm_deseq(exp_obj)
    # TMM normalization
    tmm_norm = normalizers.norm_tmm(exp_obj)
    # Compare results for one gene
    g = "ENSMUSG00000020140"
    print "DESeq results for %s" %(g)
    print deseq_norm.ix[g]
    print "=" * 10
    print "TMM resutls for %s" %(g)
    print tmm_norm.ix[g]
    # fold difference in counts for sample C
    print "Fold difference in sample C (TMM / DESeq): %.4f" \
          %(tmm_norm.ix[g]["counts_C"] / deseq_norm.ix[g]["counts_C"])
    # fold relative difference in counts, comparing A to C
    tmm_A_vs_C = tmm_norm.ix[g]["counts_A"] / tmm_norm.ix[g]["counts_C"]
    deseq_A_vs_C = deseq_norm.ix[g]["counts_A"] / deseq_norm.ix[g]["counts_C"]
    print "TMM fold change A vs. C: %.2f" %(tmm_A_vs_C)
    print "DESeq fold change A vs. C: %.2f" %(deseq_A_vs_C)
예제 #3
0
파일: test.py 프로젝트: hjanime/normpy
def test_tmm():
    """
    Calls TMM normalization. Prints raw counts then normed counts.
    """
    counts_fname = utils.load_testdata("pasilla")
    # Consider only a subset of the samples
    samples = OrderedDict()
    samples["Untreated 1"] = "untreated1"
    samples["Untreated 2"] = "untreated2"
    exp_obj = experiment.Experiment(counts_fname, samples)
    norm_counts_df = normalizers.norm_tmm(exp_obj)
    print "\nTMM Testing:"
    print "--------------"
    print "Pre-normalized counts: "
    print exp_obj.counts_df.head()
    print "Normalized counts: "
    print norm_counts_df.head()
예제 #4
0
파일: test.py 프로젝트: lukauskas/normpy
def test_tmm():
    """
    Calls TMM normalization. Prints raw counts then normed counts.
    """
    counts_fname = utils.load_testdata("pasilla")
    # Consider only a subset of the samples
    samples = OrderedDict()
    samples["Untreated 1"] = "untreated1"
    samples["Untreated 2"] = "untreated2"
    exp_obj = experiment.Experiment(counts_fname, samples)
    norm_counts_df = normalizers.norm_tmm(exp_obj)
    print("\nTMM Testing:")
    print("--------------")
    print("Pre-normalized counts: ")
    print(exp_obj.counts_df.head())
    print("Normalized counts: ")
    print(norm_counts_df.head())
예제 #5
0
def test_quantile_vs_tmm():
    """
    Test quantile normalization versus TMM
    in rank correlation of genes.
    """
    counts_fname = utils.load_testdata("pasilla")
    # Consider only a subset of the samples
    samples = OrderedDict()
    samples["Untreated 1"] = "untreated1"
    samples["Untreated 2"] = "untreated2"
    exp_obj = experiment.Experiment(counts_fname, samples)
    quantile_counts_df = normalizers.norm_q(exp_obj)
    tmm_counts_df = normalizers.norm_tmm(exp_obj)
    print("\nQuantile versus TMM Testing:")
    print("--------------")
    print("Normalized quantile counts: ")
    print(quantile_counts_df.head())
    print("Normalized TMM counts: ")
    print(tmm_counts_df.head())
    print("Correlating the genes.")
    # Merge the dataframes together, indexing by gene
    combined_df = pandas.merge(quantile_counts_df,
                               tmm_counts_df,
                               left_index=True,
                               right_index=True,
                               suffixes=["_q", "_tmm"],
                               how="outer")
    # Get log of counts: get rid of infinite values
    log_counts_df = combined_df.apply(np.log2).replace([-np.inf, np.inf],
                                                       np.nan)
    print("Combined dataframe: ")
    print(combined_df.head())
    print("Combined log dataframe: ")
    print(log_counts_df.head())
    # Plot correlation
    from pandas.tools.plotting import scatter_matrix
    scatter_matrix(log_counts_df, alpha=0.2, figsize=(8, 7))
    plot_utils.save_fig("quantile_vs_tmm_corr", ext="png")
    sys.stderr.write("Test quantile vs tmm done!\n")
예제 #6
0
파일: test.py 프로젝트: hjanime/normpy
def test_quantile_vs_tmm():
    """
    Test quantile normalization versus TMM
    in rank correlation of genes.
    """
    counts_fname = utils.load_testdata("pasilla")
    # Consider only a subset of the samples
    samples = OrderedDict()
    samples["Untreated 1"] = "untreated1"
    samples["Untreated 2"] = "untreated2"
    exp_obj = experiment.Experiment(counts_fname, samples)
    quantile_counts_df = normalizers.norm_q(exp_obj)
    tmm_counts_df = normalizers.norm_tmm(exp_obj)
    print "\nQuantile versus TMM Testing:"
    print "--------------"
    print "Normalized quantile counts: "
    print quantile_counts_df.head()
    print "Normalized TMM counts: "
    print tmm_counts_df.head()
    print "Correlating the genes."
    # Merge the dataframes together, indexing by gene
    combined_df = pandas.merge(quantile_counts_df, tmm_counts_df,
                               left_index=True,
                               right_index=True,
                               suffixes=["_q", "_tmm"],
                               how="outer")
    # Get log of counts: get rid of infinite values
    log_counts_df = combined_df.apply(np.log2).replace([-np.inf, np.inf],
                                                       np.nan)
    print "Combined dataframe: "
    print combined_df.head()
    print "Combined log dataframe: "
    print log_counts_df.head()
    # Plot correlation
    from pandas.tools.plotting import scatter_matrix
    scatter_matrix(log_counts_df, alpha=0.2, figsize=(8, 7))
    plot_utils.save_fig("quantile_vs_tmm_corr", ext="png")