def test_smvConcatHist(self): df = self.createDF("k:String;v:String", "a,1;b,2") res = dfhelper(df)._smvConcatHist("k", "v") self.assertEqual(res, """Histogram of k_v: String sort by Key key count Pct cumCount cumPct a_1 1 50.00% 1 50.00% b_2 1 50.00% 2 100.00% -------------------------------------------------""")
def test_smvHistInt(self): df = self.createDF("k:String;v:Integer", "a,1;b,2") res = dfhelper(df)._smvHist("v") self.assertEqual(res, """Histogram of v: Numeric sort by Key key count Pct cumCount cumPct 1.0 1 50.00% 1 50.00% 2.0 1 50.00% 2 100.00% -------------------------------------------------""")
def test_smvHistDate(self): df = self.createDF("k:Date;v:Integer", "2010-01-01,1;2010-01-02,2") res = dfhelper(df)._smvHist("k") self.assertEqual(res, """Histogram of k: String sort by Key key count Pct cumCount cumPct 2010-01-01 1 50.00% 1 50.00% 2010-01-02 1 50.00% 2 100.00% -------------------------------------------------""")
def test_smvHist(self): df = self.createDF("k:String;v:Integer", "a,1;b,2") res = dfhelper(df)._smvHist("k") self.assertEqual(res, """Histogram of k: String sort by Key key count Pct cumCount cumPct a 1 50.00% 1 50.00% b 1 50.00% 2 100.00% -------------------------------------------------""")
def test_smvBinHist(self): import smv.helpers as smv df = self.createDF("k:String;v:Integer", "a,10;b,200;a,30") res = dfhelper(df)._smvBinHist(("v", 100)) self.assertEqual(res, """Histogram of v: with BIN size 100.0 key count Pct cumCount cumPct 0.0 2 66.67% 2 66.67% 200.0 1 33.33% 3 100.00% -------------------------------------------------""")
def test_smvCountHist(self): import smv.helpers as smv df = self.createDF("k:String;v:String", "a,1;b,2;a,3") res = dfhelper(df)._smvCountHist(["k"], 1) self.assertEqual(res, """Histogram of N_k: with BIN size 1.0 key count Pct cumCount cumPct 1.0 1 50.00% 1 50.00% 2.0 1 50.00% 2 100.00% -------------------------------------------------""")
def test_smvFreqHistDate(self): import smv.helpers as smv df = self.createDF("k:Date;v:String", "2010-01-01,1;2010-01-02,2;2010-01-02,3") res = dfhelper(df)._smvFreqHist("k") self.assertEqual(res, """Histogram of k: String sorted by Frequency key count Pct cumCount cumPct 2010-01-02 2 66.67% 2 66.67% 2010-01-01 1 33.33% 3 100.00% -------------------------------------------------""")
def test_smvFreqHist(self): import smv.helpers as smv df = self.createDF("k:String;v:String", "a,1;b,2;a,3") res = dfhelper(df)._smvFreqHist("k") self.assertEqual(res, """Histogram of k: String sorted by Frequency key count Pct cumCount cumPct a 2 66.67% 2 66.67% b 1 33.33% 3 100.00% -------------------------------------------------""")
def test_smvHistDate(self): df = self.createDF("k:Date;v:Integer", "2010-01-01,1;2010-01-02,2") res = dfhelper(df)._smvHist("k") self.assertEqual( res, """Histogram of k: String sort by Key key count Pct cumCount cumPct 2010-01-01 1 50.00% 1 50.00% 2010-01-02 1 50.00% 2 100.00% -------------------------------------------------""")
def test_smvFreqHistDate(self): import smv.helpers as smv df = self.createDF("k:Date;v:String", "2010-01-01,1;2010-01-02,2;2010-01-02,3") res = dfhelper(df)._smvFreqHist("k") self.assertEqual( res, """Histogram of k: String sorted by Frequency key count Pct cumCount cumPct 2010-01-02 2 66.67% 2 66.67% 2010-01-01 1 33.33% 3 100.00% -------------------------------------------------""")
def test_smvEdd(self): df = self.createDF("k:String;v:Integer", "a,1;b,2") res = dfhelper(df)._smvEdd() self.assertEqual(res, """k Non-Null Count 2 k Null Count 0 k Min Length 1 k Max Length 1 k Approx Distinct Count 2 v Non-Null Count 2 v Null Count 0 v Average 1.5 v Standard Deviation 0.7071067811865476 v Min 1.0 v Max 2.0""")