Esempio n. 1
0
def add_increment(
    current_df: pyspark.sql.DataFrame,
    increment_df: pyspark.sql.DataFrame,
) -> pyspark.sql.DataFrame:
    union_df = current_df.union(increment_df)
    return (union_df.withColumn(
        '_row_number',
        F.row_number().over(
            Window.partitionBy(union_df['link']).orderBy([
                'scraped_at'
            ]))).where(F.col('_row_number') == 1).drop('_row_number'))
Esempio n. 2
0
 def my_spark(df: pyspark.sql.DataFrame) -> my_schema:
     session = flytekit.current_context().spark_session
     new_df = session.createDataFrame([("Bob", 10)], my_schema.column_names())
     return df.union(new_df)