Esempio n. 1
0
    def test_smvConcatHist(self):
        df = self.createDF("k:String;v:String", "a,1;b,2")
        res = dfhelper(df)._smvConcatHist("k", "v")
        self.assertEqual(res, """Histogram of k_v: String sort by Key
key                      count      Pct    cumCount   cumPct
a_1                          1   50.00%           1   50.00%
b_2                          1   50.00%           2  100.00%
-------------------------------------------------""")
Esempio n. 2
0
    def test_smvHistInt(self):
        df = self.createDF("k:String;v:Integer", "a,1;b,2")
        res = dfhelper(df)._smvHist("v")
        self.assertEqual(res, """Histogram of v: Numeric sort by Key
key                      count      Pct    cumCount   cumPct
1.0                          1   50.00%           1   50.00%
2.0                          1   50.00%           2  100.00%
-------------------------------------------------""")
Esempio n. 3
0
    def test_smvHistDate(self):
        df = self.createDF("k:Date;v:Integer", "2010-01-01,1;2010-01-02,2")
        res = dfhelper(df)._smvHist("k")
        self.assertEqual(res, """Histogram of k: String sort by Key
key                      count      Pct    cumCount   cumPct
2010-01-01                   1   50.00%           1   50.00%
2010-01-02                   1   50.00%           2  100.00%
-------------------------------------------------""")
Esempio n. 4
0
    def test_smvConcatHist(self):
        df = self.createDF("k:String;v:String", "a,1;b,2")
        res = dfhelper(df)._smvConcatHist("k", "v")
        self.assertEqual(res, """Histogram of k_v: String sort by Key
key                      count      Pct    cumCount   cumPct
a_1                          1   50.00%           1   50.00%
b_2                          1   50.00%           2  100.00%
-------------------------------------------------""")
Esempio n. 5
0
    def test_smvHist(self):
        df = self.createDF("k:String;v:Integer", "a,1;b,2")
        res = dfhelper(df)._smvHist("k")
        self.assertEqual(res, """Histogram of k: String sort by Key
key                      count      Pct    cumCount   cumPct
a                            1   50.00%           1   50.00%
b                            1   50.00%           2  100.00%
-------------------------------------------------""")
Esempio n. 6
0
    def test_smvBinHist(self):
        import smv.helpers as smv
        df = self.createDF("k:String;v:Integer", "a,10;b,200;a,30")
        res = dfhelper(df)._smvBinHist(("v", 100))
        self.assertEqual(res, """Histogram of v: with BIN size 100.0
key                      count      Pct    cumCount   cumPct
0.0                          2   66.67%           2   66.67%
200.0                        1   33.33%           3  100.00%
-------------------------------------------------""")
Esempio n. 7
0
    def test_smvCountHist(self):
        import smv.helpers as smv
        df = self.createDF("k:String;v:String", "a,1;b,2;a,3")
        res = dfhelper(df)._smvCountHist(["k"], 1)
        self.assertEqual(res, """Histogram of N_k: with BIN size 1.0
key                      count      Pct    cumCount   cumPct
1.0                          1   50.00%           1   50.00%
2.0                          1   50.00%           2  100.00%
-------------------------------------------------""")
Esempio n. 8
0
    def test_smvFreqHistDate(self):
        import smv.helpers as smv
        df = self.createDF("k:Date;v:String", "2010-01-01,1;2010-01-02,2;2010-01-02,3")
        res = dfhelper(df)._smvFreqHist("k")
        self.assertEqual(res, """Histogram of k: String sorted by Frequency
key                      count      Pct    cumCount   cumPct
2010-01-02                   2   66.67%           2   66.67%
2010-01-01                   1   33.33%           3  100.00%
-------------------------------------------------""")
Esempio n. 9
0
    def test_smvFreqHist(self):
        import smv.helpers as smv
        df = self.createDF("k:String;v:String", "a,1;b,2;a,3")
        res = dfhelper(df)._smvFreqHist("k")
        self.assertEqual(res, """Histogram of k: String sorted by Frequency
key                      count      Pct    cumCount   cumPct
a                            2   66.67%           2   66.67%
b                            1   33.33%           3  100.00%
-------------------------------------------------""")
Esempio n. 10
0
    def test_smvBinHist(self):
        import smv.helpers as smv
        df = self.createDF("k:String;v:Integer", "a,10;b,200;a,30")
        res = dfhelper(df)._smvBinHist(("v", 100))
        self.assertEqual(res, """Histogram of v: with BIN size 100.0
key                      count      Pct    cumCount   cumPct
0.0                          2   66.67%           2   66.67%
200.0                        1   33.33%           3  100.00%
-------------------------------------------------""")
Esempio n. 11
0
    def test_smvCountHist(self):
        import smv.helpers as smv
        df = self.createDF("k:String;v:String", "a,1;b,2;a,3")
        res = dfhelper(df)._smvCountHist(["k"], 1)
        self.assertEqual(res, """Histogram of N_k: with BIN size 1.0
key                      count      Pct    cumCount   cumPct
1.0                          1   50.00%           1   50.00%
2.0                          1   50.00%           2  100.00%
-------------------------------------------------""")
Esempio n. 12
0
    def test_smvFreqHist(self):
        import smv.helpers as smv
        df = self.createDF("k:String;v:String", "a,1;b,2;a,3")
        res = dfhelper(df)._smvFreqHist("k")
        self.assertEqual(res, """Histogram of k: String sorted by Frequency
key                      count      Pct    cumCount   cumPct
a                            2   66.67%           2   66.67%
b                            1   33.33%           3  100.00%
-------------------------------------------------""")
Esempio n. 13
0
    def test_smvHistDate(self):
        df = self.createDF("k:Date;v:Integer", "2010-01-01,1;2010-01-02,2")
        res = dfhelper(df)._smvHist("k")
        self.assertEqual(
            res, """Histogram of k: String sort by Key
key                      count      Pct    cumCount   cumPct
2010-01-01                   1   50.00%           1   50.00%
2010-01-02                   1   50.00%           2  100.00%
-------------------------------------------------""")
Esempio n. 14
0
    def test_smvFreqHistDate(self):
        import smv.helpers as smv
        df = self.createDF("k:Date;v:String",
                           "2010-01-01,1;2010-01-02,2;2010-01-02,3")
        res = dfhelper(df)._smvFreqHist("k")
        self.assertEqual(
            res, """Histogram of k: String sorted by Frequency
key                      count      Pct    cumCount   cumPct
2010-01-02                   2   66.67%           2   66.67%
2010-01-01                   1   33.33%           3  100.00%
-------------------------------------------------""")
Esempio n. 15
0
    def test_smvEdd(self):
        df = self.createDF("k:String;v:Integer", "a,1;b,2")
        res = dfhelper(df)._smvEdd()
        self.assertEqual(res, """k                    Non-Null Count         2
k                    Null Count             0
k                    Min Length             1
k                    Max Length             1
k                    Approx Distinct Count  2
v                    Non-Null Count         2
v                    Null Count             0
v                    Average                1.5
v                    Standard Deviation     0.7071067811865476
v                    Min                    1.0
v                    Max                    2.0""")
Esempio n. 16
0
    def test_smvEdd(self):
        df = self.createDF("k:String;v:Integer", "a,1;b,2")
        res = dfhelper(df)._smvEdd()
        self.assertEqual(res, """k                    Non-Null Count         2
k                    Null Count             0
k                    Min Length             1
k                    Max Length             1
k                    Approx Distinct Count  2
v                    Non-Null Count         2
v                    Null Count             0
v                    Average                1.5
v                    Standard Deviation     0.7071067811865476
v                    Min                    1.0
v                    Max                    2.0""")