def test_sort_with_nulls_order(self): from pyspark.sql import functions df = self.spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', 50)], ["name", "height"]) self.assertEquals( df.select(df.name).orderBy( functions.asc_nulls_first('name')).collect(), [Row(name=None), Row(name=u'Alice'), Row(name=u'Tom')]) self.assertEquals( df.select(df.name).orderBy( functions.asc_nulls_last('name')).collect(), [Row(name=u'Alice'), Row(name=u'Tom'), Row(name=None)]) self.assertEquals( df.select(df.name).orderBy( functions.desc_nulls_first('name')).collect(), [Row(name=None), Row(name=u'Tom'), Row(name=u'Alice')]) self.assertEquals( df.select(df.name).orderBy( functions.desc_nulls_last('name')).collect(), [Row(name=u'Tom'), Row(name=u'Alice'), Row(name=None)])
def test_sort_with_nulls_order(self): from pyspark.sql import functions df = self.spark.createDataFrame([("Tom", 80), (None, 60), ("Alice", 50)], ["name", "height"]) self.assertEqual( df.select(df.name).orderBy( functions.asc_nulls_first("name")).collect(), [Row(name=None), Row(name="Alice"), Row(name="Tom")], ) self.assertEqual( df.select(df.name).orderBy( functions.asc_nulls_last("name")).collect(), [Row(name="Alice"), Row(name="Tom"), Row(name=None)], ) self.assertEqual( df.select(df.name).orderBy( functions.desc_nulls_first("name")).collect(), [Row(name=None), Row(name="Tom"), Row(name="Alice")], ) self.assertEqual( df.select(df.name).orderBy( functions.desc_nulls_last("name")).collect(), [Row(name="Tom"), Row(name="Alice"), Row(name=None)], )
def test_sort_with_nulls_order(self): from pyspark.sql import functions df = self.spark.createDataFrame( [('Tom', 80), (None, 60), ('Alice', 50)], ["name", "height"]) self.assertEquals( df.select(df.name).orderBy(functions.asc_nulls_first('name')).collect(), [Row(name=None), Row(name=u'Alice'), Row(name=u'Tom')]) self.assertEquals( df.select(df.name).orderBy(functions.asc_nulls_last('name')).collect(), [Row(name=u'Alice'), Row(name=u'Tom'), Row(name=None)]) self.assertEquals( df.select(df.name).orderBy(functions.desc_nulls_first('name')).collect(), [Row(name=None), Row(name=u'Tom'), Row(name=u'Alice')]) self.assertEquals( df.select(df.name).orderBy(functions.desc_nulls_last('name')).collect(), [Row(name=u'Tom'), Row(name=u'Alice'), Row(name=None)])
""" Script para retornar o montante (valor) transacionado por bandeira e status """ if __name__ == '__main__': df1 = spark.read.csv(path="../output/sanitize_transactions/", header=True, inferSchema=True, sep=";") df_brand = spark.read.csv(path="../card_brand.csv", header=True, inferSchema=True) df2 = df1.groupBy( functions.date_trunc("day", df1.created_at).alias("transaction_day"), df1.card_brand, df1.status).agg( functions.sum(df1.valor).cast("decimal(15,2)").alias("valor")) df3 = df2.join(df_brand, on=df2.card_brand == df_brand.brand_code, how="left") df4 = df3.select(df3.transaction_day, df3.card_brand, df3.brand_name, df3.status, df3.valor).orderBy( df3.transaction_day, functions.asc_nulls_last(df3.card_brand), df3.status) df4.show(100, truncate=False) df4.printSchema()