예제 #1
0
def ts_ft():
    phone_accel_df = dlt.read("ts_bronze")
    phone_accel_tsdf = TSDF(phone_accel_df,
                            ts_col="event_ts",
                            partition_cols=["User"])
    ts_ft_df = phone_accel_tsdf.fourier_transform(timestep=1, valueCol="x").df
    return ts_ft_df
예제 #2
0
def silver_airport_data():
    df = dlt.read("bronze_airport_data")
    df = df.replace("\\N", None)
    df = df.withColumn("nulls", array())
    for i, c in enumerate(df.columns):
        df = df.withColumn(
            "nulls",
            when(col(c).isNull(),
                 array_union(col("nulls"),
                             array(lit(i)))).otherwise(col("nulls")))
    return df.drop("ingest_timestamp", "ingest_source")
def turbine_gold():
    return dlt.read_stream("turbine_silver").join(dlt.read("device_status"),
                                                  ['id'], 'left')
예제 #4
0
def read_silver_airport_data_clean():
  return dlt.read("silver_airport_data_clean")
예제 #5
0
def read_silver_airport_data():
  return dlt.read("silver_airport_data")
예제 #6
0
def read_bronze_airport_data():
  return dlt.read("bronze_airport_data")
예제 #7
0
def gold_airports_by_country():
    return dlt.read("silver_airport_data_clean").groupby(
        "country").count().withColumn("processed_timestamp",
                                      current_timestamp())
예제 #8
0
def silver_airport_data_nulls():
    return dlt.read("silver_airport_data").filter(
        size("nulls") > 0).withColumn("processed_timestamp",
                                      current_timestamp())