Ejemplo n.º 1
0
    def testSlidingZscore(self, ):
        print "sliding zscore"
        df = pd.DataFrame()
        mean_abundance = dfc.calc_mean_abundance(self.elution, self.elution2)
        mean_abundance.name = 'mean_abundance'
        df = df.join(mean_abundance, how='outer')

        feature_series = dfc.calc_diffrac(self.elution, self.elution2, normalize_totalCounts=True)
        feature_series.name = 'diffrac_normalized'
        df = df.join(feature_series, how='outer')

        series = pd.Series([True,True,False,False,False],index= ['a','b','c','d','e'])
        series.name = 'annotated'
        df = df.join(series, how='outer')

        print df
        sliding_zscores = dfc.calc_sliding_zscore(df, window=1)
        print sliding_zscores
        np.testing.assert_almost_equal(sliding_zscores.loc['e']['sliding_zscore'],103.22222222222224)

        sliding_zscores = dfc.calc_sliding_zscore(df, window=1, use_gmm=True)
        print sliding_zscores
        np.testing.assert_almost_equal(sliding_zscores.loc['e']['sliding_zscore'],103.22222222222224)

        print "testing window = 5"
        sliding_zscores = dfc.calc_sliding_zscore(df, window=5)
        print sliding_zscores
        np.testing.assert_almost_equal(sliding_zscores.loc['e']['sliding_zscore'],103.22222222222224)

        print "testing gmm with artifically low min weight threshold 0.2"
        sliding_zscores = dfc.calc_sliding_zscore(df, window=5, use_gmm=True, min_weight_threshold=0.2)
        print sliding_zscores
        #np.testing.assert_almost_equal(sliding_zscores.loc['e']['sliding_zscore'],529.68150807816676) #kdrew: had some instability because both components had equal weight
        np.testing.assert_almost_equal(sliding_zscores.loc['b']['sliding_zscore'],-1.2864445660676826)
Ejemplo n.º 2
0
    def testSlidingZscoreFDR(self, ):
        print "sliding zscore FDR"
        df = pd.DataFrame()
        mean_abundance = dfc.calc_mean_abundance(self.elution, self.elution2)
        mean_abundance.name = 'mean_abundance'
        df = df.join(mean_abundance, how='outer')

        feature_series = dfc.calc_diffrac(self.elution,
                                          self.elution2,
                                          normalize_totalCounts=True)
        feature_series.name = 'diffrac_normalized'
        df = df.join(feature_series, how='outer')

        series = pd.Series([True, True, False, False, False],
                           index=['a', 'b', 'c', 'd', 'e'])
        series.name = 'annotated'
        df = df.join(series, how='outer')

        sliding_zscores = dfc.calc_sliding_zscore(df, window=1)
        df = df.join(sliding_zscores, how='outer')
        fdr_pvals = dfc.calc_sliding_fdr_correct(df)
        print fdr_pvals
        np.testing.assert_almost_equal(fdr_pvals.loc['a']['sliding_pvalues'],
                                       0.848460)
        np.testing.assert_almost_equal(fdr_pvals.loc['e']['sliding_pvalues'],
                                       0.0)
        np.testing.assert_almost_equal(
            fdr_pvals.loc['a']['sliding_pvalues_fdrcor'], 0.850628)
        np.testing.assert_almost_equal(
            fdr_pvals.loc['e']['sliding_pvalues_fdrcor'], 0.0)
Ejemplo n.º 3
0
    def testSlidingZscore_nan(self, ):
        print "sliding zscore2"
        df = pd.DataFrame()
        mean_abundance = dfc.calc_mean_abundance(self.elution3, self.elution4)
        mean_abundance.name = 'mean_abundance'
        df = df.join(mean_abundance, how='outer')

        feature_series = dfc.calc_diffrac(self.elution3,
                                          self.elution4,
                                          normalize_totalCounts=True)
        feature_series.name = 'diffrac_normalized'
        df = df.join(feature_series, how='outer')

        #series = pd.Series([True,True,False,False,False],index= ['a','b','c','d','e'])
        #series.name = 'annotated'
        #df = df.join(series, how='outer')

        print df
        sliding_zscores = dfc.calc_sliding_zscore(df, window=1)

        print sliding_zscores

        #kdrew: a is nan because there is no defined standard deviation within in the window, (b and c are both 1.0
        np.testing.assert_almost_equal(
            sliding_zscores.loc['a']['sliding_zscore'], np.nan)
Ejemplo n.º 4
0
 def testDifFrac(self, ):
     #kdrew: subtracts one dataframe from another and sums the absolute values (L1 norm)
     diffrac_sum = dfc.calc_diffrac(self.elution, self.elution2)
     print diffrac_sum
     assert(diffrac_sum.loc['a'] == 0.0)
     assert(diffrac_sum.loc['b'] == 1.0)
     assert(diffrac_sum.loc['c'] == 2.0)
     assert(diffrac_sum.loc['d'] == 4.0)
     assert(diffrac_sum.loc['e'] == 17.0)
Ejemplo n.º 5
0
    def testSlidingZscore(self, ):
        print "sliding zscore"
        df = pd.DataFrame()
        mean_abundance = dfc.calc_mean_abundance(self.elution, self.elution2)
        mean_abundance.name = 'mean_abundance'
        df = df.join(mean_abundance, how='outer')

        feature_series = dfc.calc_diffrac(self.elution,
                                          self.elution2,
                                          normalize_totalCounts=True)
        feature_series.name = 'diffrac_normalized'
        df = df.join(feature_series, how='outer')

        series = pd.Series([True, True, False, False, False],
                           index=['a', 'b', 'c', 'd', 'e'])
        series.name = 'annotated'
        df = df.join(series, how='outer')

        print df
        sliding_zscores = dfc.calc_sliding_zscore(df, window=1)
        print "Sliding_zscores"
        print sliding_zscores
        #kdrew: these scores changed alot after commit 17182d3, the crucial change was to the line that queries for gt_entries, mean_abundance went from > to >=
        #kdrew: updated diffrac script to remove current index, now matches again
        np.testing.assert_almost_equal(
            sliding_zscores.loc['e']['sliding_zscore'], 103.22222222222224)

        sliding_zscores = dfc.calc_sliding_zscore(df, window=1, use_gmm=True)
        print sliding_zscores
        np.testing.assert_almost_equal(
            sliding_zscores.loc['e']['sliding_zscore'], 103.22222222222224)

        print "testing window = 5"
        sliding_zscores = dfc.calc_sliding_zscore(df, window=5)
        print sliding_zscores
        np.testing.assert_almost_equal(
            sliding_zscores.loc['e']['sliding_zscore'], 103.22222222222224)

        print "testing gmm with artifically low min weight threshold 0.2"
        sliding_zscores = dfc.calc_sliding_zscore(df,
                                                  window=5,
                                                  use_gmm=True,
                                                  min_weight_threshold=0.2)
        print sliding_zscores
        #np.testing.assert_almost_equal(sliding_zscores.loc['e']['sliding_zscore'],529.68150807816676) #kdrew: had some instability because both components had equal weight
        np.testing.assert_almost_equal(
            sliding_zscores.loc['b']['sliding_zscore'], -1.2864445660676826)
Ejemplo n.º 6
0
    def testZscore(self, ):
        print "zscore"
        df = pd.DataFrame()
        mean_abundance = dfc.calc_mean_abundance(self.elution, self.elution2)
        mean_abundance.name = 'mean_abundance'
        df = df.join(mean_abundance, how='outer')

        feature_series = dfc.calc_diffrac(self.elution, self.elution2, normalize_totalCounts=True)
        feature_series.name = 'diffrac_normalized'
        df = df.join(feature_series, how='outer')

        series = pd.Series([True,True,False,False,False],index= ['a','b','c','d','e'])
        series.name = 'annotated'
        df = df.join(series, how='outer')

        print df
        zscores = dfc.calc_zscore(df)

        print zscores.loc['e'] 
        np.testing.assert_almost_equal(zscores.loc['e'],1.4140145096382672)