def test_scope_filters(self, mock__get_taxons, mock__load_models):
        mock__load_models.return_value = [
            get_mock_entity_model(),
            get_mock_metric_model(),
        ]
        request = InternalDataRequest({
            'scope': {
                'project_id': 'project',
                'company_id': 'company',
                "preaggregation_filters": {
                    "type":
                    "group",
                    "logical_operator":
                    "AND",
                    "clauses": [
                        {
                            "type": "taxon_value",
                            "taxon": "account_id",
                            "operator": "=",
                            "value": "595126134331606"
                        },
                    ],
                    "negate":
                    False,
                },
            },
            'properties': {
                'data_sources': ['mock_data_source']
            },
            'taxons': ['account_id', 'ad_name'],
        })
        dataframe = QueryBuilder.build_query(
            SNOWFLAKE_HUSKY_CONTEXT,
            request,
            QueryInfo.create(request),
            preloaded_taxons=TAXON_MAP,
        )

        actual = compile_query(dataframe.query)
        self.write_test_expectations('query.sql', actual)
        expected = self.read_test_expectations('query.sql')
        assert expected == actual
        self.assertEqual({'mock_data_source.entity_model'},
                         dataframe.used_model_names)
Ejemplo n.º 2
0
    def _build_data_blend_query(
        cls,
        ctx: HuskyQueryContext,
        taxon_manager: BlendingTaxonManager,
        config_arg: BlendingDataRequest,
        query_info: BlendingQueryInfo,
    ) -> Dataframe:
        """
        Builds subquery for each subrequest and then blends them all into one dataframe.
        :param ctx: Husky query context
        """
        dataframes = []
        request = BlendingDataRequest(config_arg.to_native(
        ))  # Clone, coz we will be modifying subqueries
        for subrequest in request.data_subrequests:
            # add comparison taxons to data subrequest taxons
            subrequest.taxons = taxon_manager.get_subrequest_taxons(subrequest)
            sub_query_info = QueryInfo({
                'used_raw_taxons': subrequest.taxons,
            })
            query_info.subrequests_info.append(sub_query_info)

            # Build query for subrequest and add it to the list
            data_source = subrequest.properties.data_source
            dimension_templates = taxon_manager.plan.data_source_formula_templates[
                data_source]
            filter_templates = taxon_manager.plan.data_source_filter_templates[
                data_source]
            df = MainQueryBuilder.build_query(
                ctx,
                subrequest.to_internal_model(),
                sub_query_info,
                taxon_manager.used_taxons,
                dimension_templates,
                filter_templates=filter_templates,
                allowed_physical_data_sources=set(
                    request.physical_data_sources)
                if request.physical_data_sources else None,
            )
            dataframes.append(df)

        return blend_dataframes(
            ctx, dataframes, taxon_manager.plan.data_source_formula_templates)
 def test_simple_concat(self, mock__get_taxons, mock__load_models):
     mock__load_models.return_value = [
         get_mock_entity_model(),
         get_mock_metric_model(),
     ]
     request = InternalDataRequest({
         'scope': {
             'project_id': 'project-id',
             'company_id': 'company-id',
             "preaggregation_filters": {
                 "type": "taxon_value",
                 "taxon": "account_id",
                 "operator": "=",
                 "value": "abc",
             },
         },
         'properties': {
             'data_sources': ['mock_data_source']
         },
         'taxons': ['account_id', 'ad_name'],
     })
     dimension_templates = [
         SqlFormulaTemplate(SqlTemplate('''concat(${ad_name},'xx')'''),
                            '''__1''', MOCK_DATA_SOURCE_NAME, {'ad_name'})
     ]
     df = QueryBuilder.build_query(
         SNOWFLAKE_HUSKY_CONTEXT,
         request,
         QueryInfo.create(request),
         preloaded_taxons=TAXON_MAP,
         dimension_templates=dimension_templates,
     )
     actual = compile_query(df.query)
     self.write_test_expectations('query.sql', actual)
     expected = self.read_test_expectations('query.sql')
     assert expected == actual
     self.assertEqual({'mock_data_source.entity_model'},
                      df.used_model_names)
Ejemplo n.º 4
0
    def build_query(
        ctx: HuskyQueryContext,
        subrequest: InternalDataRequest,
        query_info: QueryInfo,
        preloaded_taxons: TaxonMap,
        dimension_templates: Optional[List[SqlFormulaTemplate]] = None,
        filter_templates: Optional[TaxonToTemplate] = None,
    ) -> Dataframe:
        """
        Returns Query and Taxons obtained in it
        :param dimension_templates Sql column templates to select
        :param filter_templates Filter temples keyed by taxon slug, referenced from scope or preagg filters.
        """
        dimension_templates = dimension_templates or []
        filter_templates = filter_templates or dict()
        # Fetch Taxons
        simple_taxon_manager = SimpleTaxonManager.initialize(
            subrequest, dimension_templates, filter_templates, preloaded_taxons
        )

        data_sources = set(subrequest.properties.data_sources)
        if len(subrequest.properties.data_sources) != 1:
            # Joining across data sources is more complex and not implemented yet.
            raise MultipleDataSources(data_sources)
        data_source = subrequest.properties.data_sources[0]

        models = ModelRetriever.load_models(data_sources, subrequest.scope, subrequest.properties.model_name)

        # Build Graph
        graph = GraphBuilder.create_with_models(models)

        # Create Select Query
        select_query, taxon_model_info_map, effectively_used_models = SelectBuilder(
            ctx,
            subrequest.scope,
            simple_taxon_manager.graph_select_taxons,
            simple_taxon_manager.projection_taxons,
            graph,
            data_source,
            subrequest.preaggregation_filters,
            dimension_templates,
            filter_templates,
        ).get_query()

        query_info.definition = QueryDefinition({'effectively_used_models': effectively_used_models})

        logger.debug('Select Query: %s', compile_query(select_query, ctx.dialect))

        # Create Projection Query
        final_dataframe = ProjectionBuilder.query(
            select_query,
            taxon_model_info_map,
            simple_taxon_manager.projection_taxons,
            subrequest.properties.data_source,
            subrequest.order_by,
            subrequest.limit,
            subrequest.offset,
            dimension_templates,
        )

        logger.debug('Projection Query: %s', compile_query(final_dataframe.query, ctx.dialect))
        return final_dataframe
    def _build_comparison_blend_query(
        cls,
        ctx: HuskyQueryContext,
        config_arg: BlendingDataRequest,
        taxon_manager: BlendingTaxonManager,
        query_info: BlendingQueryInfo,
        allowed_physical_data_sources: Optional[Set[str]] = None,
    ) -> Optional[Dataframe]:
        """
        Builds comparison query for each subrequest and then blends them all into one comparison dataframe.
        """
        dataframes = []
        config = BlendingDataRequest(config_arg.to_native(
        ))  # Clone, coz we will be modifying subqueries
        assert config.comparison, 'Comparison must be defined when trying to build comparison query..'
        comparison: ComparisonConfig = config.comparison
        for _subrequest in config.data_subrequests:
            subrequest = cls._build_comparison_subrequest(
                _subrequest, comparison, taxon_manager)
            data_source = subrequest.properties.data_source

            # if no comparison taxons were found for this subrequest, skip creating comparison query for it as well
            if len(subrequest.taxons) == 0:
                continue

            bm_sub_query_info = QueryInfo.create(subrequest)
            query_info.comparison_subrequests_info.append(bm_sub_query_info)
            # Build comparison dataframe and add it to a list.
            # TODO pass down TelPlan for comparisons
            # ComparisonRequestBuilder might have added filters (typically for company id project id)
            # Me create new filter templates for this comparison subrequest.
            filter_templates = TelPlanner.get_preaggregation_filter_templates(
                ctx,
                [
                    subrequest.preaggregation_filters,
                    subrequest.scope.preaggregation_filters
                ],
                taxon_manager.taxon_map,
                data_source,
            )

            dataframes.append(
                QueryBuilder.build_query(
                    ctx,
                    subrequest,
                    bm_sub_query_info,
                    taxon_manager.used_taxons,
                    dimension_templates=taxon_manager.plan.
                    comparison_data_source_formula_templates[data_source],
                    filter_templates=filter_templates,
                    allowed_physical_data_sources=allowed_physical_data_sources,
                ))

        # if no comparison subrequests were created, there is no need to blend data frames
        if len(dataframes) == 0:
            return None

        # Blend all comparison dataframes into one
        # TODO pass down TelPlan for comparisons
        data_source_formula_templates = taxon_manager.plan.comparison_data_source_formula_templates
        dataframe = blend_dataframes(ctx, dataframes,
                                     data_source_formula_templates)

        # Prefix all comparison metric columns with 'comparison@' and create comparison taxon for it.
        query = dataframe.query
        final_columns = []
        aliased_taxon_by_slug: Dict[TaxonExpressionStr,
                                    DataframeColumn] = dict()
        for slug, df_column in dataframe.slug_to_column.items():
            # Alias metrics with comparison@ prefix, and select dimensions..
            if df_column.taxon.is_dimension:
                new_taxon = df_column.taxon.copy(deep=True)
                new_slug = TaxonExpressionStr(f'{slug}')
            else:
                new_slug, new_taxon = BlendingTaxonManager.create_comparison_taxon(
                    df_column.taxon)

            final_columns.append(query.c[safe_identifier(slug)].label(
                new_taxon.slug_safe_sql_identifier))
            aliased_taxon_by_slug[new_slug] = DataframeColumn(
                new_slug, new_taxon, df_column.quantity_type)
        for pre_formulas in data_source_formula_templates.values():
            # and also select the dim columns from dim templates.
            for pre_formula in pre_formulas:
                final_columns.append(
                    literal_column(
                        quote_identifier(pre_formula.label, ctx.dialect)))
        renamed_cols_query = select(sort_columns(final_columns)).select_from(
            dataframe.query)
        return Dataframe(renamed_cols_query, aliased_taxon_by_slug,
                         dataframe.used_model_names,
                         dataframe.used_physical_data_sources)