def test_meltano_order_by_timeframe_periods(self, gitflix): # Test normal date and time filters order_by_timeframe_periods = ( PayloadBuilder("dynamic_dates").timeframes({ "name": "updated_at", "periods": [{ "name": "month" }, { "name": "dom" }] }).aggregates("count").order_by("dynamic_dates.updated_at.month", "asc").order_by( "dynamic_dates.updated_at.dom", "asc")) q = MeltanoQuery( definition=order_by_timeframe_periods.payload, design_helper=gitflix.design("dynamic_dates"), ) # Generating the query (sql, query_attributes, aggregate_columns) = q.get_query() assert ( 'EXTRACT(\'MONTH\' FROM "dynamic_dates"."updated_at") "dynamic_dates.updated_at.month"' in sql) assert ( 'EXTRACT(\'DAY\' FROM "dynamic_dates"."updated_at") "dynamic_dates.updated_at.dom"' in sql) assert ( 'ORDER BY "dynamic_dates.updated_at.month" ASC,"dynamic_dates.updated_at.dom" ASC' in sql)
def no_join_with_filters(self): return (PayloadBuilder("users_design").columns("name").aggregates( "count", "avg_age", "sum_clv", "max_clv").legacy_column_filter( "users_design", "name", "is_not_null", "").column_filter( "users_design.name", "like", "%yannis%").column_filter( "users_design.gender", "is_null", "").legacy_aggregate_filter( "users_design", "count", "equal_to", 10).aggregate_filter( "users_design.avg_age", "greater_than", 20).aggregate_filter( "users_design.avg_age", "less_than", 40).aggregate_filter( "users_design.sum_clv", "greater_or_equal_than", 100).aggregate_filter( "users_design.sum_clv", "less_or_equal_than", 500).aggregate_filter( "users_design.max_clv", "greater_than", 10).legacy_order_by( "users_design", "name", "asc"). order_by("users_design.avg_age", "desc").order_by("users_design.sum_clv", "").order_by("users_design.max_clv", "desc"))
def join_with_filters(self): return (PayloadBuilder("users_design").columns("gender").aggregates( "count", "avg_age", "sum_clv").columns( "day", "month", "year", join="streams_join").aggregates( "count", "sum_minutes", "count_days", join="streams_join").columns( "tv_series", join="episodes_join").aggregates( "count", "avg_rating", join="episodes_join").column_filter( "users_design", "gender", "equal_to", "male").column_filter( "streams_join", "year", "greater_or_equal_than", "2017").column_filter( "episodes_join", "tv_series", "like", "Marvel").column_filter( "episodes_join", "title", "like", "%Wolverine%").aggregate_filter( "users_design", "sum_clv", "less_than", 50).aggregate_filter( "episodes_join", "avg_rating", "greater_than", 8). order_by("users_design", "gender", "asc").order_by("users_design", "avg_age", "asc").order_by( "streams_join", "year", "desc").order_by( "streams_join", "sum_minutes", "desc").order_by( "episodes_join", "tv_series", "").order_by( "episodes_join", "avg_rating", ""))
def streams(self): return (PayloadBuilder("streams_design").columns( "day", "month", "year").aggregates("count", "sum_minutes", "count_days").columns( "gender", join="users_join").aggregates( "count", "avg_age", "sum_clv", join="users_join").columns( "tv_series", join="episodes_join").aggregates("count", "avg_rating", join="episodes_join"))
def streams(self): return (PayloadBuilder("streams_design").columns( "day", "month", "year").timeframes({ "name": "streamed_at", "periods": [{ "name": "year" }, { "label": "Month" }, { "name": "dom" }], }).aggregates("count", "sum_minutes", "count_days").columns( "gender", join="users_join").aggregates( "count", "avg_age", "sum_clv", "max_clv", join="users_join").columns( "tv_series", join="episodes_join").aggregates("count", "avg_rating", "min_rating", join="episodes_join"))
def _factory(): return PayloadBuilder("region", run=False, loader="target-mock")
def test_meltano_date_filters(self, gitflix): # Test normal date and time filters normal_dates = (PayloadBuilder("dynamic_dates").columns( "report_date", "updated_at").column_filter( "dynamic_dates.report_date", "greater_or_equal_than", "2020-03-01").column_filter( "dynamic_dates.report_date", "less_or_equal_than", "2020-03-31").column_filter( "dynamic_dates.updated_at", "greater_or_equal_than", "2020-03-01T00:00:00.000Z", ).column_filter( "dynamic_dates.updated_at", "less_or_equal_than", "2020-03-31T23:59:59.999Z", ).aggregates("count").aggregate_filter( "dynamic_dates.count", "greater_or_equal_than", 0).aggregate_filter("dynamic_dates.count", "less_or_equal_than", 100)) q = MeltanoQuery( definition=normal_dates.payload, design_helper=gitflix.design("dynamic_dates"), ) # Generating the query (sql, query_attributes, aggregate_columns) = q.get_query() # Check that all the WHERE filters were added correctly assert '"dynamic_dates"."report_date">=\'2020-03-01\'' in sql assert '"dynamic_dates"."report_date"<=\'2020-03-31\'' in sql assert '"dynamic_dates"."updated_at">=\'2020-03-01T00:00:00.000Z\'' in sql assert '"dynamic_dates"."updated_at"<=\'2020-03-31T23:59:59.999Z\'' in sql # Test dynamic date filters dynamic_date_range = (PayloadBuilder( "dynamic_dates").columns("report_date").column_filter( "dynamic_dates.report_date", "greater_or_equal_than", "-7d").column_filter("dynamic_dates.report_date", "less_or_equal_than", "+0d").aggregates("count")) q = MeltanoQuery( definition=dynamic_date_range.payload, design_helper=gitflix.design("dynamic_dates"), ) # Generating the query (sql, query_attributes, aggregate_columns) = q.get_query() start_date = "DATE(DATE(NOW())-INTERVAL '7 DAY')" end_date = "DATE(NOW())" # Check that all the WHERE filters were added correctly assert f'"dynamic_dates"."report_date">={start_date}' in sql assert f'"dynamic_dates"."report_date"<={end_date}' in sql # Test dynamic time filters dynamic_time_range = (PayloadBuilder( "dynamic_dates").columns("updated_at").column_filter( "dynamic_dates.updated_at", "greater_or_equal_than", "-3m").column_filter("dynamic_dates.updated_at", "less_or_equal_than", "-2d").aggregates("count")) q = MeltanoQuery( definition=dynamic_time_range.payload, design_helper=gitflix.design("dynamic_dates"), ) # Generating the query (sql, query_attributes, aggregate_columns) = q.get_query() start_date_time = "DATE(NOW())-INTERVAL '3 MONTH'" end_date_time = "DATE(NOW())-INTERVAL '2 DAY'+INTERVAL '23 HOUR'+INTERVAL '59 MINUTE'+INTERVAL '59 SECOND'+INTERVAL '999999 MICROSECOND'" # Check that all the WHERE filters were added correctly assert f'"dynamic_dates"."updated_at">={start_date_time}' in sql assert f'"dynamic_dates"."updated_at"<={end_date_time}' in sql # Test dynamic date/time filters against preset date for "today" dynamic_date_range = (PayloadBuilder( "dynamic_dates", today="2020-03-05").columns( "report_date", "updated_at").column_filter( "dynamic_dates.report_date", "greater_or_equal_than", "-7d").column_filter( "dynamic_dates.report_date", "less_or_equal_than", "+0d").column_filter("dynamic_dates.updated_at", "greater_or_equal_than", "-3m").column_filter( "dynamic_dates.updated_at", "less_or_equal_than", "-1d").aggregates("count")) q = MeltanoQuery( definition=dynamic_date_range.payload, design_helper=gitflix.design("dynamic_dates"), ) # Generating the query (sql, query_attributes, aggregate_columns) = q.get_query() # Check that all the WHERE filters were added correctly start_date = "DATE(DATE('2020-03-05')-INTERVAL '7 DAY')" end_date = "DATE('2020-03-05')" assert f'"dynamic_dates"."report_date">={start_date}' in sql assert f'"dynamic_dates"."report_date"<={end_date}' in sql start_date_time = "DATE('2020-03-05')-INTERVAL '3 MONTH'" end_date_time = "DATE('2020-03-05')-INTERVAL '1 DAY'+INTERVAL '23 HOUR'+INTERVAL '59 MINUTE'+INTERVAL '59 SECOND'+INTERVAL '999999 MICROSECOND'" assert f'"dynamic_dates"."updated_at">={start_date_time}' in sql assert f'"dynamic_dates"."updated_at"<={end_date_time}' in sql
def test_meltano_invalid_filters(self, gitflix): # Test for wrong expression bad_payload = ( PayloadBuilder("users_design").columns("gender").aggregates( "count", "avg_age", "sum_clv").column_filter("users_design.gender", "WRONG_EXPRESSION_TYPE", "male")) with pytest.raises(NotImplementedError) as e: assert MeltanoQuery( definition=bad_payload.payload, design_helper=gitflix.design("users_design"), ) assert "Unknown filter expression: WRONG_EXPRESSION_TYPE" in str( e.value) # Test for wrong value bad_payload = ( PayloadBuilder("users_design").columns("gender").aggregates( "count", "avg_age", "sum_clv").aggregate_filter("users_design.sum_clv", "equal_to", None)) with pytest.raises(ParseError) as e: assert MeltanoQuery( definition=bad_payload.payload, design_helper=gitflix.design("users_design"), ) assert "Filter expression: equal_to needs a non-empty value." in str( e.value) # Test for table not defined in design using legacy format bad_payload = ( PayloadBuilder("users_design").columns("gender").aggregates( "count", "avg_age", "sum_clv").legacy_column_filter("UNAVAILABLE_SOURCE", "gender", "equal_to", "male")) with pytest.raises(ParseError) as e: assert MeltanoQuery( definition=bad_payload.payload, design_helper=gitflix.design("users_design"), ) assert "Table UNAVAILABLE_SOURCE not found in design users_design" in str( e.value) # Test for table not defined in design bad_payload = ( PayloadBuilder("users_design").columns("gender").aggregates( "count", "avg_age", "sum_clv").column_filter("UNAVAILABLE_SOURCE.gender", "equal_to", "male")) with pytest.raises(ParseError) as e: assert MeltanoQuery( definition=bad_payload.payload, design_helper=gitflix.design("users_design"), ) assert ( "Attribute UNAVAILABLE_SOURCE.gender not found in design users_design" in str(e.value)) # Test for column not defined in design bad_payload = ( PayloadBuilder("users_design").columns("gender").aggregates( "count", "avg_age", "sum_clv").column_filter("users_design.UNAVAILABLE_COLUMN", "equal_to", "male")) with pytest.raises(ParseError) as e: assert MeltanoQuery( definition=bad_payload.payload, design_helper=gitflix.design("users_design"), ) assert ( "Attribute users_design.UNAVAILABLE_COLUMN not found in design users_design" in str(e.value)) # Test for aggregate not defined in design bad_payload = ( PayloadBuilder("users_design").columns("gender").aggregates( "count", "avg_age", "sum_clv").aggregate_filter( "users_design.UNAVAILABLE_AGGREGATE", "less_than", 50)) with pytest.raises(ParseError) as e: assert MeltanoQuery( definition=bad_payload.payload, design_helper=gitflix.design("users_design"), ) assert ( "Attribute users_design.UNAVAILABLE_AGGREGATE not found in design users_design" in str(e.value))