def testSlidingZscore(self, ): print "sliding zscore" df = pd.DataFrame() mean_abundance = dfc.calc_mean_abundance(self.elution, self.elution2) mean_abundance.name = 'mean_abundance' df = df.join(mean_abundance, how='outer') feature_series = dfc.calc_diffrac(self.elution, self.elution2, normalize_totalCounts=True) feature_series.name = 'diffrac_normalized' df = df.join(feature_series, how='outer') series = pd.Series([True,True,False,False,False],index= ['a','b','c','d','e']) series.name = 'annotated' df = df.join(series, how='outer') print df sliding_zscores = dfc.calc_sliding_zscore(df, window=1) print sliding_zscores np.testing.assert_almost_equal(sliding_zscores.loc['e']['sliding_zscore'],103.22222222222224) sliding_zscores = dfc.calc_sliding_zscore(df, window=1, use_gmm=True) print sliding_zscores np.testing.assert_almost_equal(sliding_zscores.loc['e']['sliding_zscore'],103.22222222222224) print "testing window = 5" sliding_zscores = dfc.calc_sliding_zscore(df, window=5) print sliding_zscores np.testing.assert_almost_equal(sliding_zscores.loc['e']['sliding_zscore'],103.22222222222224) print "testing gmm with artifically low min weight threshold 0.2" sliding_zscores = dfc.calc_sliding_zscore(df, window=5, use_gmm=True, min_weight_threshold=0.2) print sliding_zscores #np.testing.assert_almost_equal(sliding_zscores.loc['e']['sliding_zscore'],529.68150807816676) #kdrew: had some instability because both components had equal weight np.testing.assert_almost_equal(sliding_zscores.loc['b']['sliding_zscore'],-1.2864445660676826)
def testSlidingZscore_nan(self, ): print "sliding zscore2" df = pd.DataFrame() mean_abundance = dfc.calc_mean_abundance(self.elution3, self.elution4) mean_abundance.name = 'mean_abundance' df = df.join(mean_abundance, how='outer') feature_series = dfc.calc_diffrac(self.elution3, self.elution4, normalize_totalCounts=True) feature_series.name = 'diffrac_normalized' df = df.join(feature_series, how='outer') #series = pd.Series([True,True,False,False,False],index= ['a','b','c','d','e']) #series.name = 'annotated' #df = df.join(series, how='outer') print df sliding_zscores = dfc.calc_sliding_zscore(df, window=1) print sliding_zscores #kdrew: a is nan because there is no defined standard deviation within in the window, (b and c are both 1.0 np.testing.assert_almost_equal( sliding_zscores.loc['a']['sliding_zscore'], np.nan)
def testSlidingZscoreFDR(self, ): print "sliding zscore FDR" df = pd.DataFrame() mean_abundance = dfc.calc_mean_abundance(self.elution, self.elution2) mean_abundance.name = 'mean_abundance' df = df.join(mean_abundance, how='outer') feature_series = dfc.calc_diffrac(self.elution, self.elution2, normalize_totalCounts=True) feature_series.name = 'diffrac_normalized' df = df.join(feature_series, how='outer') series = pd.Series([True, True, False, False, False], index=['a', 'b', 'c', 'd', 'e']) series.name = 'annotated' df = df.join(series, how='outer') sliding_zscores = dfc.calc_sliding_zscore(df, window=1) df = df.join(sliding_zscores, how='outer') fdr_pvals = dfc.calc_sliding_fdr_correct(df) print fdr_pvals np.testing.assert_almost_equal(fdr_pvals.loc['a']['sliding_pvalues'], 0.848460) np.testing.assert_almost_equal(fdr_pvals.loc['e']['sliding_pvalues'], 0.0) np.testing.assert_almost_equal( fdr_pvals.loc['a']['sliding_pvalues_fdrcor'], 0.850628) np.testing.assert_almost_equal( fdr_pvals.loc['e']['sliding_pvalues_fdrcor'], 0.0)
def testSlidingZscore(self, ): print "sliding zscore" df = pd.DataFrame() mean_abundance = dfc.calc_mean_abundance(self.elution, self.elution2) mean_abundance.name = 'mean_abundance' df = df.join(mean_abundance, how='outer') feature_series = dfc.calc_diffrac(self.elution, self.elution2, normalize_totalCounts=True) feature_series.name = 'diffrac_normalized' df = df.join(feature_series, how='outer') series = pd.Series([True, True, False, False, False], index=['a', 'b', 'c', 'd', 'e']) series.name = 'annotated' df = df.join(series, how='outer') print df sliding_zscores = dfc.calc_sliding_zscore(df, window=1) print "Sliding_zscores" print sliding_zscores #kdrew: these scores changed alot after commit 17182d3, the crucial change was to the line that queries for gt_entries, mean_abundance went from > to >= #kdrew: updated diffrac script to remove current index, now matches again np.testing.assert_almost_equal( sliding_zscores.loc['e']['sliding_zscore'], 103.22222222222224) sliding_zscores = dfc.calc_sliding_zscore(df, window=1, use_gmm=True) print sliding_zscores np.testing.assert_almost_equal( sliding_zscores.loc['e']['sliding_zscore'], 103.22222222222224) print "testing window = 5" sliding_zscores = dfc.calc_sliding_zscore(df, window=5) print sliding_zscores np.testing.assert_almost_equal( sliding_zscores.loc['e']['sliding_zscore'], 103.22222222222224) print "testing gmm with artifically low min weight threshold 0.2" sliding_zscores = dfc.calc_sliding_zscore(df, window=5, use_gmm=True, min_weight_threshold=0.2) print sliding_zscores #np.testing.assert_almost_equal(sliding_zscores.loc['e']['sliding_zscore'],529.68150807816676) #kdrew: had some instability because both components had equal weight np.testing.assert_almost_equal( sliding_zscores.loc['b']['sliding_zscore'], -1.2864445660676826)