コード例 #1
0
    def aggregation_definition(self, context: TelRootContext) -> Optional[AggregationDefinition]:
        """
        Calculates the aggregation definition for this node. None, if it isn't possible to deduce the definition
        """
        agg_definitions = [self._left.aggregation_definition(context), self._right.aggregation_definition(context)]

        return AggregationDefinition.common_defined_definition(agg_definitions)
コード例 #2
0
ファイル: mappers.py プロジェクト: panoramichq/panoramic-cli
    def to_husky(origin: PanoField) -> Taxon:
        """Maps external field definitions to internal taxon representation"""

        slug = origin.slug if origin.data_source is None else f'{origin.data_source}{NAMESPACE_DELIMITER}{origin.slug}'
        aggregation = None
        if origin.aggregation:
            aggregation = AggregationDefinition.parse_obj(
                origin.aggregation.to_dict())

        validation = EnumHelper.from_value(ValidationType, origin.data_type)
        assert validation

        return Taxon(
            slug=slug,
            taxon_group=origin.group,
            display_name=origin.display_name,
            taxon_type=origin.field_type,
            validation_type=validation,
            taxon_description=origin.description,
            data_source=origin.data_source,
            calculation=origin.calculation,
            aggregation=aggregation,
            display_state=DisplayState.visible,
            company_id=get_company_id(),
        )
コード例 #3
0
def test_common_defined_definition(
    agg_definitions: List[AggregationDefinition],
    fallback: bool,
    taxon_type: Optional[TaxonTypeEnum],
    expected: Optional[AggregationDefinition],
):
    if taxon_type is None or fallback is None:
        actual = AggregationDefinition.common_defined_definition(
            agg_definitions)
    else:
        actual = AggregationDefinition.common_defined_definition(
            agg_definitions, fallback, taxon_type)

    if expected is None:
        assert actual is None
    else:
        assert actual
        assert actual.dict() == expected.dict()
コード例 #4
0
 def __init__(
     self,
     context: TelRootContext,
     value: TelExpression,
     aggregation: Optional[AggregationDefinition] = None,
     label: Optional[str] = None,
 ):
     super().__init__(context, value)
     self._aggregation = aggregation or AggregationDefinition(type=AggregationType.sum)
     self._label = label
コード例 #5
0
 def __init__(
     self,
     formula: ClauseElement,
     label: str,
     aggregation: Optional[AggregationDefinition] = None,
     data_source: Optional[str] = None,
 ):
     self.formula = formula
     self.label = safe_identifier(label)
     self.aggregation = aggregation or AggregationDefinition(
         type=AggregationType.sum)
     self.data_source = data_source
コード例 #6
0
    def result(self, context: TelRootContext) -> TelQueryResult:
        result = self._value.result(context)

        dimension_formulas = []

        value_phase = self._value.phase(context)
        if value_phase == self._phase:
            return result

        if value_phase in [TelPhase.dimension, TelPhase.any]:
            if self.used_taxons(context).has_some():
                if not self._label and not self._cached_label:
                    self._cached_label = context.new_label

                dimension_formulas.append(
                    PreFormula(
                        result.sql,
                        cast(str, self._label or self._cached_label),
                        # no aggregations are performed in Dimension Builder
                        AggregationDefinition(type=AggregationType.not_set),
                    )
                )
                sql = literal_column(safe_quote_identifier(self._label or self._cached_label, context.husky_dialect))
                template = sql
            else:
                sql = result.sql
                template = result.template

            label = self._label or self._cached_label or result.label

            if not self._value.invalid_value(context):
                return TelQueryResult(
                    sql=sql,
                    dialect=context.husky_dialect,
                    aggregations=result.aggregations,
                    dimension_formulas=dimension_formulas + result.dimension_formulas,
                    data_source_formula_templates=result.data_source_formula_templates,
                    override_mappings=result.override_mappings,
                    label=label,
                    exclude_slugs=result.exclude_slugs,
                    template=template,
                )
            else:
                return TelQueryResult(
                    sql, dialect=context.husky_dialect, dimension_formulas=dimension_formulas, label=label
                )
        else:
            raise RuntimeError(f'Cannot move to {self._phase} phase from {value_phase}')
コード例 #7
0
    def plan_phase_transitions(self, context: TelRootContext) -> TelExpression:
        if self._value.invalid_value(context):
            return self

        value = self._value.plan_phase_transitions(context)
        phase = value.phase(context)

        if context.subrequest_only:
            if TelPhase.dimension_data_source == phase:
                return TelRoot.copy(self, TelDimensionPhase.copy(self, value))
        elif phase.is_dimension():
            if TelPhase.dimension_data_source == phase:
                return TelRoot.copy(
                    self,
                    TelPostAggregationPhase.copy(
                        self,
                        TelAggregationPhase.copy(self, TelDimensionPhase.copy(self, value), label=context.taxon_slug),
                        aggregation=value.aggregation_definition(context),
                        label=context.taxon_slug,
                    ),
                )
            else:
                return TelRoot.copy(
                    self,
                    TelPostAggregationPhase.copy(
                        self,
                        TelAggregationPhase.copy(self, value, label=context.taxon_slug).plan_phase_transitions(context),
                        aggregation=value.aggregation_definition(context),
                        label=context.taxon_slug,
                    ),
                )
        elif TelPhase.any == phase:
            return TelRoot.copy(
                self,
                TelPostAggregationPhase.copy(
                    self,
                    TelAggregationPhase.copy(self, value).plan_phase_transitions(context),
                    aggregation=AggregationDefinition(type=AggregationType.not_set),
                ),
            )
        elif TelPhase.metric_pre == phase:
            aggregation = value.aggregation_definition(context)
            return TelRoot.copy(
                self, TelPostAggregationPhase.copy(self, value, aggregation=aggregation, label=context.taxon_slug)
            )

        return TelRoot.copy(self, value)
コード例 #8
0
    def aggregation_definition(self, context: TelRootContext) -> Optional[AggregationDefinition]:
        """
        Calculates the aggregation definition for this node. None, if it isn't possible to deduce the definition
        """

        # there was calculation expression so let's deduce the aggregation type from it
        if self._calculation_expr:
            return self._calculation_expr.aggregation_definition(context)
        else:
            # taxon is defined and we also want to enforce its aggregation type
            if self._taxon and self._taxon.aggregation:
                return self._taxon.aggregation

            # otherwise, no aggregation type is set for metric & group_by aggregation is set for dimensions
            agg_type = (
                AggregationType.not_set
                if self._taxon is None or self._taxon.taxon_type == 'metric'
                else AggregationType.group_by
            )

            return AggregationDefinition(type=agg_type)
コード例 #9
0
 def test_fb_tw_merged_objective_and_generic_cpm(self, mock__get_taxons):
     preprocess_request(self._blending_request)
     taxon_manager = BlendingTaxonManager(self._blending_request)
     taxon_manager.load_all_used_taxons(SNOWFLAKE_HUSKY_CONTEXT)
     plan = taxon_manager.plan
     assert plan.data_source_formula_templates == {
         'facebook_ads': [
             SqlFormulaTemplate(
                 SqlTemplate('''${facebook_ads|objective}'''),
                 '''__fb_tw_merged_objective1''',
                 'facebook_ads',
                 {'facebook_ads|objective'},
             )
         ],
         'twitter': [
             SqlFormulaTemplate(
                 SqlTemplate('''${twitter|objective}'''),
                 '''__fb_tw_merged_objective2''',
                 'twitter',
                 {'twitter|objective'},
             )
         ],
     }
     assert list(map(repr, plan.dimension_formulas)) == [
         repr(
             PreFormula(
                 '''coalesce(__fb_tw_merged_objective1, __fb_tw_merged_objective2)''',
                 '''fb_tw_merged_objective''',
                 AggregationDefinition(type=AggregationType.not_set),
                 None,
             ))
     ]
     assert list(map(repr, plan.metric_pre)) == [
         repr(
             PreFormula(
                 '''fb_tw_merged_objective''',
                 '''fb_tw_merged_objective''',
                 AggregationDefinition(type=AggregationType.group_by),
                 None,
             )),
         repr(
             PreFormula(
                 '''1000 * (coalesce(facebook_ads_spend_5811c78c7c741b5a, 0) + coalesce(twitter_spend_68657fbb141b10c8, 0))''',
                 '''__generic_cpm1''',
                 AggregationDefinition(type=AggregationType.sum),
                 None,
             )),
         repr(
             PreFormula(
                 '''coalesce(facebook_ads_impressions_0bf2e36fb4e71190, 0) + coalesce(twitter_impressions_ef12a84724a0ad7d, 0)''',
                 '''__generic_cpm2''',
                 AggregationDefinition(type=AggregationType.sum),
                 None,
             )),
     ]
     expected_merge_taxon = get_mocked_taxons_by_slug(
         ['fb_tw_merged_objective'])[0]
     expected_cpm_taxon = get_mocked_taxons_by_slug(['generic_cpm'])[0]
     assert list(map(repr, plan.metric_post)) == list(
         map(
             repr,
             [
                 (PostFormula(
                     'fb_tw_merged_objective',
                     'fb_tw_merged_objective'), expected_merge_taxon),
                 (
                     PostFormula(
                         '__generic_cpm1 / nullif(__generic_cpm2, 0)',
                         '__generic_cpm1 / nullif(__generic_cpm2, 0)'),
                     expected_cpm_taxon,
                 ),
             ],
         ))
コード例 #10
0
    def plan(
        cls,
        ctx: HuskyQueryContext,
        request: BlendingDataRequest,
        projection_taxons: TaxonMap,
        all_taxons: TaxonMap,
        taxon_to_ds: Dict[str, Set[str]],
    ) -> TelPlan:
        """
        Prepares taxons plan
        """
        plan = TelPlan()
        result_cache = dict()
        all_data_sources = {subreq.properties.data_source for subreq in request.data_subrequests}
        for taxon in projection_taxons.values():
            if taxon.calculation:
                original_slug = taxon.comparison_taxon_slug_origin or taxon.slug
                taxon_data_sources = taxon_to_ds[original_slug]
                result = cls._parse_taxon_expr(ctx, taxon, taxon.slug, taxon_data_sources, all_taxons)
                result_cache[taxon.slug] = result

                # Create dict for dim templates, key is data source
                for ds_formula in result.data_source_formula_templates:
                    plan.data_source_formula_templates[ds_formula.data_source].append(ds_formula)

                plan.dimension_formulas.extend(result.dimension_formulas)
                plan.metric_pre.extend(result.pre_formulas)
                plan.metric_post.append((result.post_formula, taxon))

                plan.override_mappings.update(result.override_mappings)
            else:
                sql_slug = column(taxon.slug_safe_sql_identifier)
                if taxon.is_dimension:
                    aggregation = taxon.aggregation or AggregationDefinition(type=AggregationType.group_by)
                else:
                    aggregation = taxon.aggregation or AggregationDefinition(type=AggregationType.sum)

                plan.metric_pre.append(PreFormula(sql_slug, taxon.slug, aggregation))
                plan.metric_post.append((PostFormula(sql_slug), taxon))

        if request.comparison and request.comparison.taxons:
            for taxon in [all_taxons[slug] for slug in request.comparison.taxons]:
                if taxon.calculation:
                    taxon_data_sources = all_data_sources
                    result = cls._parse_taxon_expr(
                        ctx, taxon, 'comp_join_col_' + taxon.slug, taxon_data_sources, all_taxons
                    )
                    # Create dict for dim templates, key is data source
                    for ds_formula in result.data_source_formula_templates:
                        plan.data_source_formula_templates[ds_formula.data_source].append(ds_formula)

                    if result.override_mappings:
                        plan.override_mappings.update(result.override_mappings)
                        plan.comparison_override_mappings.update(result.override_mappings)

                    plan.dimension_formulas.extend(result.dimension_formulas)
                    for ds_formula in result.data_source_formula_templates:
                        plan.comparison_data_source_formula_templates[ds_formula.data_source].append(ds_formula)
                    plan.comparison_dimension_formulas.extend(result.dimension_formulas)
                    for dim_formula in result.dimension_formulas:
                        plan.comparison_join_columns.append(dim_formula.label)
                else:
                    # Raw comparison join taxon taxon.. add it to join and also to select from dataframes
                    plan.comparison_join_columns.append(taxon.slug_safe_sql_identifier)
                    plan.comparison_raw_taxon_slugs.append(taxon.slug_safe_sql_identifier)

        cls._populate_filter_templates_to_plan(ctx, plan, request, all_taxons)

        return plan
コード例 #11
0
         data_source_formula_templates=[],
         dimension_formulas=[],
         pre_formulas=[],
         post_formula=PostFormula('690'),
     ),
 ),
 (
     ("123 + (twitter|spend / 567)", {'twitter'}, TaxonTypeEnum.metric),
     ExprResult(
         data_source_formula_templates=[],
         dimension_formulas=[],
         pre_formulas=[
             PreFormula(
                 '(twitter_spend_68657fbb141b10c8 / nullif(567, 0))',
                 '__1',
                 AggregationDefinition(type=AggregationType.sum),
                 None,
             )
         ],
         post_formula=PostFormula(
             'coalesce(123, 0) + coalesce(__1, 0)'),
     ),
 ),
 (
     ("444.3 / (computed_metric_avg - 1)", set(), TaxonTypeEnum.metric),
     ExprResult(
         data_source_formula_templates=[],
         dimension_formulas=[],
         pre_formulas=[
             PreFormula('1000 * avg_spend', '__1',
                        AggregationDefinition(type=AggregationType.avg),
コード例 #12
0
 def aggregation_definition(self, context: TelRootContext) -> Optional[AggregationDefinition]:
     """
     Calculates the aggregation definition for this node. None, if it isn't possible to deduce the definition
     """
     return AggregationDefinition(type=AggregationType.not_set)
コード例 #13
0
def get_case_id_common_defined_agg(val):
    if isinstance(val, list):
        return ', '.join(agg.type if agg else 'None' for agg in val)
    elif isinstance(val, AggregationDefinition):
        return str(val.type.value if val else None)
    else:
        return str(val)


@pytest.mark.parametrize(
    "agg_definitions, fallback, taxon_type, expected",
    [
        [
            [
                AggregationDefinition(type=AggregationType.sum),
                AggregationDefinition(type=AggregationType.sum)
            ],
            False,
            None,
            AggregationDefinition(type=AggregationType.sum),
        ],
        [
            [
                AggregationDefinition(type=AggregationType.not_set),
                AggregationDefinition(type=AggregationType.avg)
            ],
            False,
            None,
            AggregationDefinition(type=AggregationType.avg),
        ],