def test_meltano_order_by_timeframe_periods(self, gitflix): # Test normal date and time filters order_by_timeframe_periods = ( PayloadBuilder("dynamic_dates").timeframes({ "name": "updated_at", "periods": [{ "name": "month" }, { "name": "dom" }] }).aggregates("count").order_by("dynamic_dates.updated_at.month", "asc").order_by( "dynamic_dates.updated_at.dom", "asc")) q = MeltanoQuery( definition=order_by_timeframe_periods.payload, design_helper=gitflix.design("dynamic_dates"), ) # Generating the query (sql, query_attributes, aggregate_columns) = q.get_query() assert ( 'EXTRACT(\'MONTH\' FROM "dynamic_dates"."updated_at") "dynamic_dates.updated_at.month"' in sql) assert ( 'EXTRACT(\'DAY\' FROM "dynamic_dates"."updated_at") "dynamic_dates.updated_at.dom"' in sql) assert ( 'ORDER BY "dynamic_dates.updated_at.month" ASC,"dynamic_dates.updated_at.dom" ASC' in sql)
def test_meltano_hda_query_filters(self, join_with_filters, gitflix): # Test an HDA query with filters q = MeltanoQuery( definition=join_with_filters.payload, design_helper=gitflix.design("users_design"), ) # Test generating an HDA query (sql, query_attributes, aggregate_columns) = q.get_query() # There should be one where clause and 2 having clauses assert sql.count("WHERE") == 1 assert sql.count("HAVING") == 2 # Check that all the WHERE filters were added correctly assert '"users_design"."gender"=\'male\'' in sql assert '"streams_join"."year">=\'2017\'' in sql assert '"episodes_join"."tv_series" LIKE \'Marvel\'' in sql assert '"episodes_join"."title" LIKE \'%Wolverine%\'' in sql # Check that all the HAVING filters were added correctly assert 'HAVING COALESCE(SUM("users_design.clv"),0)<50' in sql assert 'COALESCE(AVG("episodes_join.rating"),0)>8' in sql assert 'COALESCE(MIN("episodes_join.rating"),0)>6' in sql # Check that the correct order by clauses have been generated # and that they are in the correct order order_by_clause = ('ORDER BY "result"."users_design.gender" ASC,' '"result"."users_design.avg_age" ASC,' '"result"."streams_join.year" DESC,' '"result"."streams_join.sum_minutes" DESC,' '"result"."episodes_join.tv_series" ASC,' '"result"."episodes_join.avg_rating" ASC') assert order_by_clause in sql
def test_meltano_no_join_query_filters(self, no_join_with_filters, gitflix): # Test a no-join query with filters q = MeltanoQuery( definition=no_join_with_filters.payload, design_helper=gitflix.design("users_design"), ) # Test generating an HDA query (sql, query_attributes, aggregate_columns) = q.get_query() # There should be one where clause and 1 having clause assert sql.count("WHERE") == 1 assert sql.count("HAVING") == 1 # Check that all the WHERE filters were added correctly assert 'NOT "users_design"."name" IS NULL' in sql assert '"users_design"."name" LIKE \'%yannis%\'' in sql assert '"users_design"."gender" IS NULL' in sql # Check that all the HAVING filters were added correctly assert 'COALESCE(SUM("users_design"."clv"),0)>=100' in sql assert 'COALESCE(SUM("users_design"."clv"),0)<=500' in sql assert 'COALESCE(COUNT("users_design"."id"),0)=10' in sql assert 'COALESCE(AVG("users_design"."age"),0)>20' in sql assert 'COALESCE(AVG("users_design"."age"),0)<40' in sql assert 'COALESCE(MAX("users_design"."clv"),0)>10' in sql # Check that the correct order by clauses have been generated assert ( 'ORDER BY "users_design.name" ASC,"users_design.avg_age" DESC,"users_design.sum_clv" ASC,"users_design.max_clv" DESC' in sql)
def test_meltano_disjointed_query(self, streams, gitflix): # Test parsing a json payload using a Design generated from a m5oc file # and generating a proper MeltanoQuery Object q = MeltanoQuery(definition=streams.payload, design_helper=gitflix.design("streams_design")) assert q.design.name == "streams_design" assert len(q.tables) == 3 assert len(q.join_order) == 3 assert q.join_order[2]["table"] == "episodes_table" # Test generating an HDA query (sql, query_attributes, aggregate_columns) = q.get_query() assert any(attr["attribute_label"] == "Average Age" for attr in query_attributes) assert any(attr["attribute_name"] == "sum_minutes" for attr in query_attributes) assert any(attr["id"] == "users.sum_clv" for attr in aggregate_columns) assert "WITH base_join AS (SELECT" in sql assert "base_streams_table AS (SELECT DISTINCT" in sql assert "users_table_stats AS (" in sql assert 'COALESCE(AVG("episodes.rating"),0)' in sql assert 'COALESCE(COUNT("users.id"),0)' in sql assert 'COALESCE(SUM("users.clv"),0)' in sql assert 'SELECT * FROM "result"' in sql assert 'JOIN "users"' in sql assert 'JOIN "episodes"' in sql # Check that the attribute used both as a column and as an aggregate # 1. Only appears once in the select clause of the base query # 2. Properly appears as an aggregate assert sql.count('"streams"."day" "streams.day"') == 1 assert 'COALESCE(COUNT("streams.day"),0)' in sql
def test_meltano_date_filters(self, gitflix): # Test normal date and time filters normal_dates = (PayloadBuilder("dynamic_dates").columns( "report_date", "updated_at").column_filter( "dynamic_dates.report_date", "greater_or_equal_than", "2020-03-01").column_filter( "dynamic_dates.report_date", "less_or_equal_than", "2020-03-31").column_filter( "dynamic_dates.updated_at", "greater_or_equal_than", "2020-03-01T00:00:00.000Z", ).column_filter( "dynamic_dates.updated_at", "less_or_equal_than", "2020-03-31T23:59:59.999Z", ).aggregates("count").aggregate_filter( "dynamic_dates.count", "greater_or_equal_than", 0).aggregate_filter("dynamic_dates.count", "less_or_equal_than", 100)) q = MeltanoQuery( definition=normal_dates.payload, design_helper=gitflix.design("dynamic_dates"), ) # Generating the query (sql, query_attributes, aggregate_columns) = q.get_query() # Check that all the WHERE filters were added correctly assert '"dynamic_dates"."report_date">=\'2020-03-01\'' in sql assert '"dynamic_dates"."report_date"<=\'2020-03-31\'' in sql assert '"dynamic_dates"."updated_at">=\'2020-03-01T00:00:00.000Z\'' in sql assert '"dynamic_dates"."updated_at"<=\'2020-03-31T23:59:59.999Z\'' in sql # Test dynamic date filters dynamic_date_range = (PayloadBuilder( "dynamic_dates").columns("report_date").column_filter( "dynamic_dates.report_date", "greater_or_equal_than", "-7d").column_filter("dynamic_dates.report_date", "less_or_equal_than", "+0d").aggregates("count")) q = MeltanoQuery( definition=dynamic_date_range.payload, design_helper=gitflix.design("dynamic_dates"), ) # Generating the query (sql, query_attributes, aggregate_columns) = q.get_query() start_date = "DATE(DATE(NOW())-INTERVAL '7 DAY')" end_date = "DATE(NOW())" # Check that all the WHERE filters were added correctly assert f'"dynamic_dates"."report_date">={start_date}' in sql assert f'"dynamic_dates"."report_date"<={end_date}' in sql # Test dynamic time filters dynamic_time_range = (PayloadBuilder( "dynamic_dates").columns("updated_at").column_filter( "dynamic_dates.updated_at", "greater_or_equal_than", "-3m").column_filter("dynamic_dates.updated_at", "less_or_equal_than", "-2d").aggregates("count")) q = MeltanoQuery( definition=dynamic_time_range.payload, design_helper=gitflix.design("dynamic_dates"), ) # Generating the query (sql, query_attributes, aggregate_columns) = q.get_query() start_date_time = "DATE(NOW())-INTERVAL '3 MONTH'" end_date_time = "DATE(NOW())-INTERVAL '2 DAY'+INTERVAL '23 HOUR'+INTERVAL '59 MINUTE'+INTERVAL '59 SECOND'+INTERVAL '999999 MICROSECOND'" # Check that all the WHERE filters were added correctly assert f'"dynamic_dates"."updated_at">={start_date_time}' in sql assert f'"dynamic_dates"."updated_at"<={end_date_time}' in sql # Test dynamic date/time filters against preset date for "today" dynamic_date_range = (PayloadBuilder( "dynamic_dates", today="2020-03-05").columns( "report_date", "updated_at").column_filter( "dynamic_dates.report_date", "greater_or_equal_than", "-7d").column_filter( "dynamic_dates.report_date", "less_or_equal_than", "+0d").column_filter("dynamic_dates.updated_at", "greater_or_equal_than", "-3m").column_filter( "dynamic_dates.updated_at", "less_or_equal_than", "-1d").aggregates("count")) q = MeltanoQuery( definition=dynamic_date_range.payload, design_helper=gitflix.design("dynamic_dates"), ) # Generating the query (sql, query_attributes, aggregate_columns) = q.get_query() # Check that all the WHERE filters were added correctly start_date = "DATE(DATE('2020-03-05')-INTERVAL '7 DAY')" end_date = "DATE('2020-03-05')" assert f'"dynamic_dates"."report_date">={start_date}' in sql assert f'"dynamic_dates"."report_date"<={end_date}' in sql start_date_time = "DATE('2020-03-05')-INTERVAL '3 MONTH'" end_date_time = "DATE('2020-03-05')-INTERVAL '1 DAY'+INTERVAL '23 HOUR'+INTERVAL '59 MINUTE'+INTERVAL '59 SECOND'+INTERVAL '999999 MICROSECOND'" assert f'"dynamic_dates"."updated_at">={start_date_time}' in sql assert f'"dynamic_dates"."updated_at"<={end_date_time}' in sql
def test_meltano_invalid_filters(self, gitflix): # Test for wrong expression bad_payload = ( PayloadBuilder("users_design").columns("gender").aggregates( "count", "avg_age", "sum_clv").column_filter("users_design.gender", "WRONG_EXPRESSION_TYPE", "male")) with pytest.raises(NotImplementedError) as e: assert MeltanoQuery( definition=bad_payload.payload, design_helper=gitflix.design("users_design"), ) assert "Unknown filter expression: WRONG_EXPRESSION_TYPE" in str( e.value) # Test for wrong value bad_payload = ( PayloadBuilder("users_design").columns("gender").aggregates( "count", "avg_age", "sum_clv").aggregate_filter("users_design.sum_clv", "equal_to", None)) with pytest.raises(ParseError) as e: assert MeltanoQuery( definition=bad_payload.payload, design_helper=gitflix.design("users_design"), ) assert "Filter expression: equal_to needs a non-empty value." in str( e.value) # Test for table not defined in design using legacy format bad_payload = ( PayloadBuilder("users_design").columns("gender").aggregates( "count", "avg_age", "sum_clv").legacy_column_filter("UNAVAILABLE_SOURCE", "gender", "equal_to", "male")) with pytest.raises(ParseError) as e: assert MeltanoQuery( definition=bad_payload.payload, design_helper=gitflix.design("users_design"), ) assert "Table UNAVAILABLE_SOURCE not found in design users_design" in str( e.value) # Test for table not defined in design bad_payload = ( PayloadBuilder("users_design").columns("gender").aggregates( "count", "avg_age", "sum_clv").column_filter("UNAVAILABLE_SOURCE.gender", "equal_to", "male")) with pytest.raises(ParseError) as e: assert MeltanoQuery( definition=bad_payload.payload, design_helper=gitflix.design("users_design"), ) assert ( "Attribute UNAVAILABLE_SOURCE.gender not found in design users_design" in str(e.value)) # Test for column not defined in design bad_payload = ( PayloadBuilder("users_design").columns("gender").aggregates( "count", "avg_age", "sum_clv").column_filter("users_design.UNAVAILABLE_COLUMN", "equal_to", "male")) with pytest.raises(ParseError) as e: assert MeltanoQuery( definition=bad_payload.payload, design_helper=gitflix.design("users_design"), ) assert ( "Attribute users_design.UNAVAILABLE_COLUMN not found in design users_design" in str(e.value)) # Test for aggregate not defined in design bad_payload = ( PayloadBuilder("users_design").columns("gender").aggregates( "count", "avg_age", "sum_clv").aggregate_filter( "users_design.UNAVAILABLE_AGGREGATE", "less_than", 50)) with pytest.raises(ParseError) as e: assert MeltanoQuery( definition=bad_payload.payload, design_helper=gitflix.design("users_design"), ) assert ( "Attribute users_design.UNAVAILABLE_AGGREGATE not found in design users_design" in str(e.value))