# }, # { # "_index": "distinct_agg_streaming", # "_type": "pyflink", # "_id": "3TfsHWwBHRafi3KHm2Uf", # "_score": 1, # "_source": { # "a": "a", # "b": "1" # } # }, # { # "_index": "distinct_agg_streaming", # "_type": "pyflink", # "_id": "3jfsHWwBHRafi3KHm2Uf", # "_score": 1, # "_source": { # "a": "b", # "b": "2" # } # } # ] # } # } if __name__ == '__main__': from table.prepare_environment import prepare_env prepare_env(need_stream_source=True, need_upsert_sink=True) distinct_agg_streaming()
.proctime() .field("a", DataTypes.STRING()) .field("b", DataTypes.STRING()) .field("c", DataTypes.STRING()) ) \ .in_append_mode() \ .register_table_source("source") st_env.register_table_sink("result", CsvTableSink(["a", "b"], [DataTypes.STRING(), DataTypes.STRING()], result_file)) st_env.scan("source").window(Tumble.over("2.rows").on("proctime").alias("w")) \ .group_by("w, a") \ .select("a, max(b)").insert_into("result") st_env.execute("tumble row window streaming") # cat /tmp/tumble_row_window_streaming.csv # a,3 # b,4 # a 5 if __name__ == '__main__': from table.prepare_environment import prepare_env prepare_env(need_stream_source=True) # tumble_time_window_streaming() tumble_row_window_streaming()
# "hits": [ # { # "_index": "group_by_agg_streaming", # "_type": "group_by_agg_streaming", # "_id": "b", # "_score": 1, # "_source": { # "a": "b", # "b": "6" # } # }, # { # "_index": "group_by_agg_streaming", # "_type": "group_by_agg_streaming", # "_id": "a", # "_score": 1, # "_source": { # "a": "a", # "b": "13" # } # } # ] # } # } if __name__ == '__main__': from table.prepare_environment import prepare_env prepare_env(need_upsert_sink=True) group_by_agg_streaming()