コード例 #1
0
ファイル: test_functions.py プロジェクト: Spencerzsp/spark-1
    def test_sort_with_nulls_order(self):
        from pyspark.sql import functions

        df = self.spark.createDataFrame([('Tom', 80), (None, 60),
                                         ('Alice', 50)], ["name", "height"])
        self.assertEquals(
            df.select(df.name).orderBy(
                functions.asc_nulls_first('name')).collect(),
            [Row(name=None),
             Row(name=u'Alice'),
             Row(name=u'Tom')])
        self.assertEquals(
            df.select(df.name).orderBy(
                functions.asc_nulls_last('name')).collect(),
            [Row(name=u'Alice'),
             Row(name=u'Tom'),
             Row(name=None)])
        self.assertEquals(
            df.select(df.name).orderBy(
                functions.desc_nulls_first('name')).collect(),
            [Row(name=None),
             Row(name=u'Tom'),
             Row(name=u'Alice')])
        self.assertEquals(
            df.select(df.name).orderBy(
                functions.desc_nulls_last('name')).collect(),
            [Row(name=u'Tom'),
             Row(name=u'Alice'),
             Row(name=None)])
コード例 #2
0
ファイル: test_functions.py プロジェクト: yliou/spark
    def test_sort_with_nulls_order(self):
        from pyspark.sql import functions

        df = self.spark.createDataFrame([("Tom", 80), (None, 60),
                                         ("Alice", 50)], ["name", "height"])
        self.assertEqual(
            df.select(df.name).orderBy(
                functions.asc_nulls_first("name")).collect(),
            [Row(name=None),
             Row(name="Alice"),
             Row(name="Tom")],
        )
        self.assertEqual(
            df.select(df.name).orderBy(
                functions.asc_nulls_last("name")).collect(),
            [Row(name="Alice"),
             Row(name="Tom"),
             Row(name=None)],
        )
        self.assertEqual(
            df.select(df.name).orderBy(
                functions.desc_nulls_first("name")).collect(),
            [Row(name=None),
             Row(name="Tom"),
             Row(name="Alice")],
        )
        self.assertEqual(
            df.select(df.name).orderBy(
                functions.desc_nulls_last("name")).collect(),
            [Row(name="Tom"),
             Row(name="Alice"),
             Row(name=None)],
        )
コード例 #3
0
ファイル: test_functions.py プロジェクト: apache/spark
    def test_sort_with_nulls_order(self):
        from pyspark.sql import functions

        df = self.spark.createDataFrame(
            [('Tom', 80), (None, 60), ('Alice', 50)], ["name", "height"])
        self.assertEquals(
            df.select(df.name).orderBy(functions.asc_nulls_first('name')).collect(),
            [Row(name=None), Row(name=u'Alice'), Row(name=u'Tom')])
        self.assertEquals(
            df.select(df.name).orderBy(functions.asc_nulls_last('name')).collect(),
            [Row(name=u'Alice'), Row(name=u'Tom'), Row(name=None)])
        self.assertEquals(
            df.select(df.name).orderBy(functions.desc_nulls_first('name')).collect(),
            [Row(name=None), Row(name=u'Tom'), Row(name=u'Alice')])
        self.assertEquals(
            df.select(df.name).orderBy(functions.desc_nulls_last('name')).collect(),
            [Row(name=u'Tom'), Row(name=u'Alice'), Row(name=None)])
コード例 #4
0
"""
Script para retornar o montante (valor) transacionado por bandeira e status
"""
if __name__ == '__main__':

    df1 = spark.read.csv(path="../output/sanitize_transactions/",
                         header=True,
                         inferSchema=True,
                         sep=";")

    df_brand = spark.read.csv(path="../card_brand.csv",
                              header=True,
                              inferSchema=True)

    df2 = df1.groupBy(
        functions.date_trunc("day", df1.created_at).alias("transaction_day"),
        df1.card_brand, df1.status).agg(
            functions.sum(df1.valor).cast("decimal(15,2)").alias("valor"))

    df3 = df2.join(df_brand,
                   on=df2.card_brand == df_brand.brand_code,
                   how="left")

    df4 = df3.select(df3.transaction_day, df3.card_brand, df3.brand_name,
                     df3.status, df3.valor).orderBy(
                         df3.transaction_day,
                         functions.asc_nulls_last(df3.card_brand), df3.status)

    df4.show(100, truncate=False)
    df4.printSchema()