Example #1
0
    def test_meltano_order_by_timeframe_periods(self, gitflix):
        # Test normal date and time filters
        order_by_timeframe_periods = (
            PayloadBuilder("dynamic_dates").timeframes({
                "name":
                "updated_at",
                "periods": [{
                    "name": "month"
                }, {
                    "name": "dom"
                }]
            }).aggregates("count").order_by("dynamic_dates.updated_at.month",
                                            "asc").order_by(
                                                "dynamic_dates.updated_at.dom",
                                                "asc"))

        q = MeltanoQuery(
            definition=order_by_timeframe_periods.payload,
            design_helper=gitflix.design("dynamic_dates"),
        )

        # Generating the query
        (sql, query_attributes, aggregate_columns) = q.get_query()

        assert (
            'EXTRACT(\'MONTH\' FROM "dynamic_dates"."updated_at") "dynamic_dates.updated_at.month"'
            in sql)
        assert (
            'EXTRACT(\'DAY\' FROM "dynamic_dates"."updated_at") "dynamic_dates.updated_at.dom"'
            in sql)
        assert (
            'ORDER BY "dynamic_dates.updated_at.month" ASC,"dynamic_dates.updated_at.dom" ASC'
            in sql)
Example #2
0
    def test_meltano_hda_query_filters(self, join_with_filters, gitflix):
        # Test an HDA query with filters
        q = MeltanoQuery(
            definition=join_with_filters.payload,
            design_helper=gitflix.design("users_design"),
        )

        # Test generating an HDA query
        (sql, query_attributes, aggregate_columns) = q.get_query()

        # There should be one where clause and 2 having clauses
        assert sql.count("WHERE") == 1
        assert sql.count("HAVING") == 2

        # Check that all the WHERE filters were added correctly
        assert '"users_design"."gender"=\'male\'' in sql
        assert '"streams_join"."year">=\'2017\'' in sql
        assert '"episodes_join"."tv_series" LIKE \'Marvel\'' in sql
        assert '"episodes_join"."title" LIKE \'%Wolverine%\'' in sql

        # Check that all the HAVING filters were added correctly
        assert 'HAVING COALESCE(SUM("users_design.clv"),0)<50' in sql
        assert 'COALESCE(AVG("episodes_join.rating"),0)>8' in sql
        assert 'COALESCE(MIN("episodes_join.rating"),0)>6' in sql

        # Check that the correct order by clauses have been generated
        #  and that they are in the correct order
        order_by_clause = ('ORDER BY "result"."users_design.gender" ASC,'
                           '"result"."users_design.avg_age" ASC,'
                           '"result"."streams_join.year" DESC,'
                           '"result"."streams_join.sum_minutes" DESC,'
                           '"result"."episodes_join.tv_series" ASC,'
                           '"result"."episodes_join.avg_rating" ASC')
        assert order_by_clause in sql
Example #3
0
    def test_meltano_no_join_query_filters(self, no_join_with_filters,
                                           gitflix):
        # Test a no-join query with filters
        q = MeltanoQuery(
            definition=no_join_with_filters.payload,
            design_helper=gitflix.design("users_design"),
        )

        # Test generating an HDA query
        (sql, query_attributes, aggregate_columns) = q.get_query()

        # There should be one where clause and 1 having clause
        assert sql.count("WHERE") == 1
        assert sql.count("HAVING") == 1

        # Check that all the WHERE filters were added correctly
        assert 'NOT "users_design"."name" IS NULL' in sql
        assert '"users_design"."name" LIKE \'%yannis%\'' in sql
        assert '"users_design"."gender" IS NULL' in sql

        # Check that all the HAVING filters were added correctly
        assert 'COALESCE(SUM("users_design"."clv"),0)>=100' in sql
        assert 'COALESCE(SUM("users_design"."clv"),0)<=500' in sql
        assert 'COALESCE(COUNT("users_design"."id"),0)=10' in sql
        assert 'COALESCE(AVG("users_design"."age"),0)>20' in sql
        assert 'COALESCE(AVG("users_design"."age"),0)<40' in sql
        assert 'COALESCE(MAX("users_design"."clv"),0)>10' in sql

        # Check that the correct order by clauses have been generated
        assert (
            'ORDER BY "users_design.name" ASC,"users_design.avg_age" DESC,"users_design.sum_clv" ASC,"users_design.max_clv" DESC'
            in sql)
Example #4
0
    def test_meltano_disjointed_query(self, streams, gitflix):
        # Test parsing a json payload using a Design generated from a m5oc file
        #  and generating a proper MeltanoQuery Object
        q = MeltanoQuery(definition=streams.payload,
                         design_helper=gitflix.design("streams_design"))

        assert q.design.name == "streams_design"
        assert len(q.tables) == 3
        assert len(q.join_order) == 3
        assert q.join_order[2]["table"] == "episodes_table"

        # Test generating an HDA query
        (sql, query_attributes, aggregate_columns) = q.get_query()

        assert any(attr["attribute_label"] == "Average Age"
                   for attr in query_attributes)
        assert any(attr["attribute_name"] == "sum_minutes"
                   for attr in query_attributes)
        assert any(attr["id"] == "users.sum_clv" for attr in aggregate_columns)

        assert "WITH base_join AS (SELECT" in sql
        assert "base_streams_table AS (SELECT DISTINCT" in sql
        assert "users_table_stats AS (" in sql
        assert 'COALESCE(AVG("episodes.rating"),0)' in sql
        assert 'COALESCE(COUNT("users.id"),0)' in sql
        assert 'COALESCE(SUM("users.clv"),0)' in sql
        assert 'SELECT * FROM "result"' in sql
        assert 'JOIN "users"' in sql
        assert 'JOIN "episodes"' in sql

        # Check that the attribute used both as a column and as an aggregate
        # 1. Only appears once in the select clause of the base query
        # 2. Properly appears as an aggregate
        assert sql.count('"streams"."day" "streams.day"') == 1
        assert 'COALESCE(COUNT("streams.day"),0)' in sql
Example #5
0
    def test_meltano_date_filters(self, gitflix):
        # Test normal date and time filters
        normal_dates = (PayloadBuilder("dynamic_dates").columns(
            "report_date", "updated_at").column_filter(
                "dynamic_dates.report_date",
                "greater_or_equal_than", "2020-03-01").column_filter(
                    "dynamic_dates.report_date", "less_or_equal_than",
                    "2020-03-31").column_filter(
                        "dynamic_dates.updated_at",
                        "greater_or_equal_than",
                        "2020-03-01T00:00:00.000Z",
                    ).column_filter(
                        "dynamic_dates.updated_at",
                        "less_or_equal_than",
                        "2020-03-31T23:59:59.999Z",
                    ).aggregates("count").aggregate_filter(
                        "dynamic_dates.count", "greater_or_equal_than",
                        0).aggregate_filter("dynamic_dates.count",
                                            "less_or_equal_than", 100))

        q = MeltanoQuery(
            definition=normal_dates.payload,
            design_helper=gitflix.design("dynamic_dates"),
        )

        # Generating the query
        (sql, query_attributes, aggregate_columns) = q.get_query()

        # Check that all the WHERE filters were added correctly
        assert '"dynamic_dates"."report_date">=\'2020-03-01\'' in sql
        assert '"dynamic_dates"."report_date"<=\'2020-03-31\'' in sql
        assert '"dynamic_dates"."updated_at">=\'2020-03-01T00:00:00.000Z\'' in sql
        assert '"dynamic_dates"."updated_at"<=\'2020-03-31T23:59:59.999Z\'' in sql

        # Test dynamic date filters
        dynamic_date_range = (PayloadBuilder(
            "dynamic_dates").columns("report_date").column_filter(
                "dynamic_dates.report_date", "greater_or_equal_than",
                "-7d").column_filter("dynamic_dates.report_date",
                                     "less_or_equal_than",
                                     "+0d").aggregates("count"))

        q = MeltanoQuery(
            definition=dynamic_date_range.payload,
            design_helper=gitflix.design("dynamic_dates"),
        )

        # Generating the query
        (sql, query_attributes, aggregate_columns) = q.get_query()

        start_date = "DATE(DATE(NOW())-INTERVAL '7 DAY')"
        end_date = "DATE(NOW())"

        # Check that all the WHERE filters were added correctly
        assert f'"dynamic_dates"."report_date">={start_date}' in sql
        assert f'"dynamic_dates"."report_date"<={end_date}' in sql

        # Test dynamic time filters
        dynamic_time_range = (PayloadBuilder(
            "dynamic_dates").columns("updated_at").column_filter(
                "dynamic_dates.updated_at", "greater_or_equal_than",
                "-3m").column_filter("dynamic_dates.updated_at",
                                     "less_or_equal_than",
                                     "-2d").aggregates("count"))

        q = MeltanoQuery(
            definition=dynamic_time_range.payload,
            design_helper=gitflix.design("dynamic_dates"),
        )

        # Generating the query
        (sql, query_attributes, aggregate_columns) = q.get_query()

        start_date_time = "DATE(NOW())-INTERVAL '3 MONTH'"
        end_date_time = "DATE(NOW())-INTERVAL '2 DAY'+INTERVAL '23 HOUR'+INTERVAL '59 MINUTE'+INTERVAL '59 SECOND'+INTERVAL '999999 MICROSECOND'"

        # Check that all the WHERE filters were added correctly
        assert f'"dynamic_dates"."updated_at">={start_date_time}' in sql
        assert f'"dynamic_dates"."updated_at"<={end_date_time}' in sql

        # Test dynamic date/time filters against preset date for "today"
        dynamic_date_range = (PayloadBuilder(
            "dynamic_dates", today="2020-03-05").columns(
                "report_date", "updated_at").column_filter(
                    "dynamic_dates.report_date",
                    "greater_or_equal_than", "-7d").column_filter(
                        "dynamic_dates.report_date", "less_or_equal_than",
                        "+0d").column_filter("dynamic_dates.updated_at",
                                             "greater_or_equal_than",
                                             "-3m").column_filter(
                                                 "dynamic_dates.updated_at",
                                                 "less_or_equal_than",
                                                 "-1d").aggregates("count"))

        q = MeltanoQuery(
            definition=dynamic_date_range.payload,
            design_helper=gitflix.design("dynamic_dates"),
        )

        # Generating the query
        (sql, query_attributes, aggregate_columns) = q.get_query()

        # Check that all the WHERE filters were added correctly
        start_date = "DATE(DATE('2020-03-05')-INTERVAL '7 DAY')"
        end_date = "DATE('2020-03-05')"

        assert f'"dynamic_dates"."report_date">={start_date}' in sql
        assert f'"dynamic_dates"."report_date"<={end_date}' in sql

        start_date_time = "DATE('2020-03-05')-INTERVAL '3 MONTH'"
        end_date_time = "DATE('2020-03-05')-INTERVAL '1 DAY'+INTERVAL '23 HOUR'+INTERVAL '59 MINUTE'+INTERVAL '59 SECOND'+INTERVAL '999999 MICROSECOND'"

        assert f'"dynamic_dates"."updated_at">={start_date_time}' in sql
        assert f'"dynamic_dates"."updated_at"<={end_date_time}' in sql
Example #6
0
    def test_meltano_invalid_filters(self, gitflix):
        # Test for wrong expression
        bad_payload = (
            PayloadBuilder("users_design").columns("gender").aggregates(
                "count", "avg_age",
                "sum_clv").column_filter("users_design.gender",
                                         "WRONG_EXPRESSION_TYPE", "male"))

        with pytest.raises(NotImplementedError) as e:
            assert MeltanoQuery(
                definition=bad_payload.payload,
                design_helper=gitflix.design("users_design"),
            )

        assert "Unknown filter expression: WRONG_EXPRESSION_TYPE" in str(
            e.value)

        # Test for wrong value
        bad_payload = (
            PayloadBuilder("users_design").columns("gender").aggregates(
                "count", "avg_age",
                "sum_clv").aggregate_filter("users_design.sum_clv", "equal_to",
                                            None))

        with pytest.raises(ParseError) as e:
            assert MeltanoQuery(
                definition=bad_payload.payload,
                design_helper=gitflix.design("users_design"),
            )

        assert "Filter expression: equal_to needs a non-empty value." in str(
            e.value)

        # Test for table not defined in design using legacy format
        bad_payload = (
            PayloadBuilder("users_design").columns("gender").aggregates(
                "count", "avg_age",
                "sum_clv").legacy_column_filter("UNAVAILABLE_SOURCE", "gender",
                                                "equal_to", "male"))

        with pytest.raises(ParseError) as e:
            assert MeltanoQuery(
                definition=bad_payload.payload,
                design_helper=gitflix.design("users_design"),
            )

        assert "Table UNAVAILABLE_SOURCE not found in design users_design" in str(
            e.value)

        # Test for table not defined in design
        bad_payload = (
            PayloadBuilder("users_design").columns("gender").aggregates(
                "count", "avg_age",
                "sum_clv").column_filter("UNAVAILABLE_SOURCE.gender",
                                         "equal_to", "male"))

        with pytest.raises(ParseError) as e:
            assert MeltanoQuery(
                definition=bad_payload.payload,
                design_helper=gitflix.design("users_design"),
            )

        assert (
            "Attribute UNAVAILABLE_SOURCE.gender not found in design users_design"
            in str(e.value))

        # Test for column not defined in design
        bad_payload = (
            PayloadBuilder("users_design").columns("gender").aggregates(
                "count", "avg_age",
                "sum_clv").column_filter("users_design.UNAVAILABLE_COLUMN",
                                         "equal_to", "male"))

        with pytest.raises(ParseError) as e:
            assert MeltanoQuery(
                definition=bad_payload.payload,
                design_helper=gitflix.design("users_design"),
            )

        assert (
            "Attribute users_design.UNAVAILABLE_COLUMN not found in design users_design"
            in str(e.value))

        # Test for aggregate not defined in design
        bad_payload = (
            PayloadBuilder("users_design").columns("gender").aggregates(
                "count", "avg_age", "sum_clv").aggregate_filter(
                    "users_design.UNAVAILABLE_AGGREGATE", "less_than", 50))

        with pytest.raises(ParseError) as e:
            assert MeltanoQuery(
                definition=bad_payload.payload,
                design_helper=gitflix.design("users_design"),
            )

        assert (
            "Attribute users_design.UNAVAILABLE_AGGREGATE not found in design users_design"
            in str(e.value))