예제 #1
0
    def test_meltano_order_by_timeframe_periods(self, gitflix):
        # Test normal date and time filters
        order_by_timeframe_periods = (
            PayloadBuilder("dynamic_dates").timeframes({
                "name":
                "updated_at",
                "periods": [{
                    "name": "month"
                }, {
                    "name": "dom"
                }]
            }).aggregates("count").order_by("dynamic_dates.updated_at.month",
                                            "asc").order_by(
                                                "dynamic_dates.updated_at.dom",
                                                "asc"))

        q = MeltanoQuery(
            definition=order_by_timeframe_periods.payload,
            design_helper=gitflix.design("dynamic_dates"),
        )

        # Generating the query
        (sql, query_attributes, aggregate_columns) = q.get_query()

        assert (
            'EXTRACT(\'MONTH\' FROM "dynamic_dates"."updated_at") "dynamic_dates.updated_at.month"'
            in sql)
        assert (
            'EXTRACT(\'DAY\' FROM "dynamic_dates"."updated_at") "dynamic_dates.updated_at.dom"'
            in sql)
        assert (
            'ORDER BY "dynamic_dates.updated_at.month" ASC,"dynamic_dates.updated_at.dom" ASC'
            in sql)
예제 #2
0
 def no_join_with_filters(self):
     return (PayloadBuilder("users_design").columns("name").aggregates(
         "count", "avg_age", "sum_clv", "max_clv").legacy_column_filter(
             "users_design", "name", "is_not_null", "").column_filter(
                 "users_design.name", "like", "%yannis%").column_filter(
                     "users_design.gender",
                     "is_null", "").legacy_aggregate_filter(
                         "users_design", "count",
                         "equal_to", 10).aggregate_filter(
                             "users_design.avg_age",
                             "greater_than", 20).aggregate_filter(
                                 "users_design.avg_age", "less_than",
                                 40).aggregate_filter(
                                     "users_design.sum_clv",
                                     "greater_or_equal_than",
                                     100).aggregate_filter(
                                         "users_design.sum_clv",
                                         "less_or_equal_than",
                                         500).aggregate_filter(
                                             "users_design.max_clv",
                                             "greater_than",
                                             10).legacy_order_by(
                                                 "users_design", "name",
                                                 "asc").
             order_by("users_design.avg_age",
                      "desc").order_by("users_design.sum_clv",
                                       "").order_by("users_design.max_clv",
                                                    "desc"))
예제 #3
0
 def join_with_filters(self):
     return (PayloadBuilder("users_design").columns("gender").aggregates(
         "count", "avg_age", "sum_clv").columns(
             "day", "month", "year", join="streams_join").aggregates(
                 "count", "sum_minutes", "count_days",
                 join="streams_join").columns(
                     "tv_series", join="episodes_join").aggregates(
                         "count", "avg_rating",
                         join="episodes_join").column_filter(
                             "users_design", "gender",
                             "equal_to", "male").column_filter(
                                 "streams_join", "year",
                                 "greater_or_equal_than",
                                 "2017").column_filter(
                                     "episodes_join", "tv_series", "like",
                                     "Marvel").column_filter(
                                         "episodes_join", "title", "like",
                                         "%Wolverine%").aggregate_filter(
                                             "users_design", "sum_clv",
                                             "less_than",
                                             50).aggregate_filter(
                                                 "episodes_join",
                                                 "avg_rating",
                                                 "greater_than", 8).
             order_by("users_design", "gender",
                      "asc").order_by("users_design", "avg_age",
                                      "asc").order_by(
                                          "streams_join", "year",
                                          "desc").order_by(
                                              "streams_join", "sum_minutes",
                                              "desc").order_by(
                                                  "episodes_join",
                                                  "tv_series", "").order_by(
                                                      "episodes_join",
                                                      "avg_rating", ""))
예제 #4
0
 def streams(self):
     return (PayloadBuilder("streams_design").columns(
         "day", "month",
         "year").aggregates("count", "sum_minutes", "count_days").columns(
             "gender", join="users_join").aggregates(
                 "count", "avg_age", "sum_clv", join="users_join").columns(
                     "tv_series",
                     join="episodes_join").aggregates("count",
                                                      "avg_rating",
                                                      join="episodes_join"))
예제 #5
0
 def streams(self):
     return (PayloadBuilder("streams_design").columns(
         "day", "month", "year").timeframes({
             "name":
             "streamed_at",
             "periods": [{
                 "name": "year"
             }, {
                 "label": "Month"
             }, {
                 "name": "dom"
             }],
         }).aggregates("count", "sum_minutes", "count_days").columns(
             "gender", join="users_join").aggregates(
                 "count",
                 "avg_age",
                 "sum_clv",
                 "max_clv",
                 join="users_join").columns(
                     "tv_series",
                     join="episodes_join").aggregates("count",
                                                      "avg_rating",
                                                      "min_rating",
                                                      join="episodes_join"))
예제 #6
0
 def _factory():
     return PayloadBuilder("region", run=False, loader="target-mock")
예제 #7
0
    def test_meltano_date_filters(self, gitflix):
        # Test normal date and time filters
        normal_dates = (PayloadBuilder("dynamic_dates").columns(
            "report_date", "updated_at").column_filter(
                "dynamic_dates.report_date",
                "greater_or_equal_than", "2020-03-01").column_filter(
                    "dynamic_dates.report_date", "less_or_equal_than",
                    "2020-03-31").column_filter(
                        "dynamic_dates.updated_at",
                        "greater_or_equal_than",
                        "2020-03-01T00:00:00.000Z",
                    ).column_filter(
                        "dynamic_dates.updated_at",
                        "less_or_equal_than",
                        "2020-03-31T23:59:59.999Z",
                    ).aggregates("count").aggregate_filter(
                        "dynamic_dates.count", "greater_or_equal_than",
                        0).aggregate_filter("dynamic_dates.count",
                                            "less_or_equal_than", 100))

        q = MeltanoQuery(
            definition=normal_dates.payload,
            design_helper=gitflix.design("dynamic_dates"),
        )

        # Generating the query
        (sql, query_attributes, aggregate_columns) = q.get_query()

        # Check that all the WHERE filters were added correctly
        assert '"dynamic_dates"."report_date">=\'2020-03-01\'' in sql
        assert '"dynamic_dates"."report_date"<=\'2020-03-31\'' in sql
        assert '"dynamic_dates"."updated_at">=\'2020-03-01T00:00:00.000Z\'' in sql
        assert '"dynamic_dates"."updated_at"<=\'2020-03-31T23:59:59.999Z\'' in sql

        # Test dynamic date filters
        dynamic_date_range = (PayloadBuilder(
            "dynamic_dates").columns("report_date").column_filter(
                "dynamic_dates.report_date", "greater_or_equal_than",
                "-7d").column_filter("dynamic_dates.report_date",
                                     "less_or_equal_than",
                                     "+0d").aggregates("count"))

        q = MeltanoQuery(
            definition=dynamic_date_range.payload,
            design_helper=gitflix.design("dynamic_dates"),
        )

        # Generating the query
        (sql, query_attributes, aggregate_columns) = q.get_query()

        start_date = "DATE(DATE(NOW())-INTERVAL '7 DAY')"
        end_date = "DATE(NOW())"

        # Check that all the WHERE filters were added correctly
        assert f'"dynamic_dates"."report_date">={start_date}' in sql
        assert f'"dynamic_dates"."report_date"<={end_date}' in sql

        # Test dynamic time filters
        dynamic_time_range = (PayloadBuilder(
            "dynamic_dates").columns("updated_at").column_filter(
                "dynamic_dates.updated_at", "greater_or_equal_than",
                "-3m").column_filter("dynamic_dates.updated_at",
                                     "less_or_equal_than",
                                     "-2d").aggregates("count"))

        q = MeltanoQuery(
            definition=dynamic_time_range.payload,
            design_helper=gitflix.design("dynamic_dates"),
        )

        # Generating the query
        (sql, query_attributes, aggregate_columns) = q.get_query()

        start_date_time = "DATE(NOW())-INTERVAL '3 MONTH'"
        end_date_time = "DATE(NOW())-INTERVAL '2 DAY'+INTERVAL '23 HOUR'+INTERVAL '59 MINUTE'+INTERVAL '59 SECOND'+INTERVAL '999999 MICROSECOND'"

        # Check that all the WHERE filters were added correctly
        assert f'"dynamic_dates"."updated_at">={start_date_time}' in sql
        assert f'"dynamic_dates"."updated_at"<={end_date_time}' in sql

        # Test dynamic date/time filters against preset date for "today"
        dynamic_date_range = (PayloadBuilder(
            "dynamic_dates", today="2020-03-05").columns(
                "report_date", "updated_at").column_filter(
                    "dynamic_dates.report_date",
                    "greater_or_equal_than", "-7d").column_filter(
                        "dynamic_dates.report_date", "less_or_equal_than",
                        "+0d").column_filter("dynamic_dates.updated_at",
                                             "greater_or_equal_than",
                                             "-3m").column_filter(
                                                 "dynamic_dates.updated_at",
                                                 "less_or_equal_than",
                                                 "-1d").aggregates("count"))

        q = MeltanoQuery(
            definition=dynamic_date_range.payload,
            design_helper=gitflix.design("dynamic_dates"),
        )

        # Generating the query
        (sql, query_attributes, aggregate_columns) = q.get_query()

        # Check that all the WHERE filters were added correctly
        start_date = "DATE(DATE('2020-03-05')-INTERVAL '7 DAY')"
        end_date = "DATE('2020-03-05')"

        assert f'"dynamic_dates"."report_date">={start_date}' in sql
        assert f'"dynamic_dates"."report_date"<={end_date}' in sql

        start_date_time = "DATE('2020-03-05')-INTERVAL '3 MONTH'"
        end_date_time = "DATE('2020-03-05')-INTERVAL '1 DAY'+INTERVAL '23 HOUR'+INTERVAL '59 MINUTE'+INTERVAL '59 SECOND'+INTERVAL '999999 MICROSECOND'"

        assert f'"dynamic_dates"."updated_at">={start_date_time}' in sql
        assert f'"dynamic_dates"."updated_at"<={end_date_time}' in sql
예제 #8
0
    def test_meltano_invalid_filters(self, gitflix):
        # Test for wrong expression
        bad_payload = (
            PayloadBuilder("users_design").columns("gender").aggregates(
                "count", "avg_age",
                "sum_clv").column_filter("users_design.gender",
                                         "WRONG_EXPRESSION_TYPE", "male"))

        with pytest.raises(NotImplementedError) as e:
            assert MeltanoQuery(
                definition=bad_payload.payload,
                design_helper=gitflix.design("users_design"),
            )

        assert "Unknown filter expression: WRONG_EXPRESSION_TYPE" in str(
            e.value)

        # Test for wrong value
        bad_payload = (
            PayloadBuilder("users_design").columns("gender").aggregates(
                "count", "avg_age",
                "sum_clv").aggregate_filter("users_design.sum_clv", "equal_to",
                                            None))

        with pytest.raises(ParseError) as e:
            assert MeltanoQuery(
                definition=bad_payload.payload,
                design_helper=gitflix.design("users_design"),
            )

        assert "Filter expression: equal_to needs a non-empty value." in str(
            e.value)

        # Test for table not defined in design using legacy format
        bad_payload = (
            PayloadBuilder("users_design").columns("gender").aggregates(
                "count", "avg_age",
                "sum_clv").legacy_column_filter("UNAVAILABLE_SOURCE", "gender",
                                                "equal_to", "male"))

        with pytest.raises(ParseError) as e:
            assert MeltanoQuery(
                definition=bad_payload.payload,
                design_helper=gitflix.design("users_design"),
            )

        assert "Table UNAVAILABLE_SOURCE not found in design users_design" in str(
            e.value)

        # Test for table not defined in design
        bad_payload = (
            PayloadBuilder("users_design").columns("gender").aggregates(
                "count", "avg_age",
                "sum_clv").column_filter("UNAVAILABLE_SOURCE.gender",
                                         "equal_to", "male"))

        with pytest.raises(ParseError) as e:
            assert MeltanoQuery(
                definition=bad_payload.payload,
                design_helper=gitflix.design("users_design"),
            )

        assert (
            "Attribute UNAVAILABLE_SOURCE.gender not found in design users_design"
            in str(e.value))

        # Test for column not defined in design
        bad_payload = (
            PayloadBuilder("users_design").columns("gender").aggregates(
                "count", "avg_age",
                "sum_clv").column_filter("users_design.UNAVAILABLE_COLUMN",
                                         "equal_to", "male"))

        with pytest.raises(ParseError) as e:
            assert MeltanoQuery(
                definition=bad_payload.payload,
                design_helper=gitflix.design("users_design"),
            )

        assert (
            "Attribute users_design.UNAVAILABLE_COLUMN not found in design users_design"
            in str(e.value))

        # Test for aggregate not defined in design
        bad_payload = (
            PayloadBuilder("users_design").columns("gender").aggregates(
                "count", "avg_age", "sum_clv").aggregate_filter(
                    "users_design.UNAVAILABLE_AGGREGATE", "less_than", 50))

        with pytest.raises(ParseError) as e:
            assert MeltanoQuery(
                definition=bad_payload.payload,
                design_helper=gitflix.design("users_design"),
            )

        assert (
            "Attribute users_design.UNAVAILABLE_AGGREGATE not found in design users_design"
            in str(e.value))