Esempio n. 1
0
 def test_cols_same(self):
     df1 = self.spark.createDataFrame([(1, 2)], ["a", "b"])
     df2 = self.spark.createDataFrame([(1, 2)], ["b", "c"])
     sdf1 = sparkle_df(df1)
     sdf2 = sparkle_df(df2)
     self.assertEqual(len(sdf1.columns), len(sdf2.columns))
     self.assertTrue(sdf1.hasSameColumns(sdf1))
     self.assertTrue(sdf2.hasSameColumns(sdf2))
     self.assertFalse(sdf1.hasSameColumns(sdf2))
     self.assertFalse(sdf2.hasSameColumns(sdf1))
Esempio n. 2
0
 def test_require_column(self):
     df1 = self.spark.createDataFrame([(1, 2)], ["a", "b"])
     sdf1 = sparkle_df(df1)
     sdf1.requireColumn("a")
     sdf1.requireColumn("b")
     sdf1.requireColumn(("a", LongType))
     # noinspection PyTypeChecker
     sdf1.requireColumn(("b", LongType()))
Esempio n. 3
0
 def test_all_any(self):
     df = self.spark.createDataFrame([(1, 2), (3, 4)], ["a", "b"])
     sdf = sparkle_df(df)
     self.assertIsNotNone(sdf)
     self.assertEqual(1, sdf.filter("a == 1").count())
     self.assertFalse(sdf.all('a == 1'))
     self.assertTrue(sdf.all('a == 1 OR a == 3'))
     self.assertTrue(sdf.any('a == 1'))
Esempio n. 4
0
    def test_stay_sparkle(self):
        df1 = self.spark.createDataFrame([(1, )], ["a"])
        sdf1 = sparkle_df(df1)
        self.assertIsInstance(sdf1, SparkleDataFrame)
        self.assertIsInstance(sdf1.select("a"), SparkleDataFrame)
        self.assertIsInstance(sdf1.drop("a"), SparkleDataFrame)
        self.assertIsInstance(sdf1.sort('a'), SparkleDataFrame)
        self.assertIsInstance(sdf1.schema, SparkleStructType)

        self.assertIsInstance(
            sdf1.groupBy('a').agg(f.max('a'), f.max('a')), SparkleDataFrame)
Esempio n. 5
0
 def setUp(self):
     s = SparkSession.builder.getOrCreate()
     s.sql("DROP TABLE IF EXISTS foo")
     df = s.createDataFrame([(1, 2)], ["a", "b"])
     self.df = sparkle_df(df)
     self.spark.conf.unset("spark.app.env")
Esempio n. 6
0
 def setUp(self):
     df = self.spark.createDataFrame([(1, 2), (3, 4)], ["a", "b"])
     self.sdf = sparkle_df(df)
Esempio n. 7
0
 def test_max_value(self):
     df1 = self.spark.createDataFrame([(1, ), (2, ), (0, )], ["a"])
     sdf1 = sparkle_df(df1)
     self.assertEqual(2, sdf1.maxValue("a"))
Esempio n. 8
0
 def test_drop(self):
     df = self.spark.createDataFrame([(1, "y"), (3, "z")], ["a", "b"])
     sdf = sparkle_df(df)
     self.assertNotIn("a", sdf.dropOfType('bigint').columns)
     self.assertIn("b", sdf.dropOfType('bigint').columns)
     self.assertNotIn("b", sdf.dropOfType('string').columns)