def ts_ft(): phone_accel_df = dlt.read("ts_bronze") phone_accel_tsdf = TSDF(phone_accel_df, ts_col="event_ts", partition_cols=["User"]) ts_ft_df = phone_accel_tsdf.fourier_transform(timestep=1, valueCol="x").df return ts_ft_df
def silver_airport_data(): df = dlt.read("bronze_airport_data") df = df.replace("\\N", None) df = df.withColumn("nulls", array()) for i, c in enumerate(df.columns): df = df.withColumn( "nulls", when(col(c).isNull(), array_union(col("nulls"), array(lit(i)))).otherwise(col("nulls"))) return df.drop("ingest_timestamp", "ingest_source")
def turbine_gold(): return dlt.read_stream("turbine_silver").join(dlt.read("device_status"), ['id'], 'left')
def read_silver_airport_data_clean(): return dlt.read("silver_airport_data_clean")
def read_silver_airport_data(): return dlt.read("silver_airport_data")
def read_bronze_airport_data(): return dlt.read("bronze_airport_data")
def gold_airports_by_country(): return dlt.read("silver_airport_data_clean").groupby( "country").count().withColumn("processed_timestamp", current_timestamp())
def silver_airport_data_nulls(): return dlt.read("silver_airport_data").filter( size("nulls") > 0).withColumn("processed_timestamp", current_timestamp())