def aggregation_definition(self, context: TelRootContext) -> Optional[AggregationDefinition]: """ Calculates the aggregation definition for this node. None, if it isn't possible to deduce the definition """ agg_definitions = [self._left.aggregation_definition(context), self._right.aggregation_definition(context)] return AggregationDefinition.common_defined_definition(agg_definitions)
def to_husky(origin: PanoField) -> Taxon: """Maps external field definitions to internal taxon representation""" slug = origin.slug if origin.data_source is None else f'{origin.data_source}{NAMESPACE_DELIMITER}{origin.slug}' aggregation = None if origin.aggregation: aggregation = AggregationDefinition.parse_obj( origin.aggregation.to_dict()) validation = EnumHelper.from_value(ValidationType, origin.data_type) assert validation return Taxon( slug=slug, taxon_group=origin.group, display_name=origin.display_name, taxon_type=origin.field_type, validation_type=validation, taxon_description=origin.description, data_source=origin.data_source, calculation=origin.calculation, aggregation=aggregation, display_state=DisplayState.visible, company_id=get_company_id(), )
def test_common_defined_definition( agg_definitions: List[AggregationDefinition], fallback: bool, taxon_type: Optional[TaxonTypeEnum], expected: Optional[AggregationDefinition], ): if taxon_type is None or fallback is None: actual = AggregationDefinition.common_defined_definition( agg_definitions) else: actual = AggregationDefinition.common_defined_definition( agg_definitions, fallback, taxon_type) if expected is None: assert actual is None else: assert actual assert actual.dict() == expected.dict()
def __init__( self, context: TelRootContext, value: TelExpression, aggregation: Optional[AggregationDefinition] = None, label: Optional[str] = None, ): super().__init__(context, value) self._aggregation = aggregation or AggregationDefinition(type=AggregationType.sum) self._label = label
def __init__( self, formula: ClauseElement, label: str, aggregation: Optional[AggregationDefinition] = None, data_source: Optional[str] = None, ): self.formula = formula self.label = safe_identifier(label) self.aggregation = aggregation or AggregationDefinition( type=AggregationType.sum) self.data_source = data_source
def result(self, context: TelRootContext) -> TelQueryResult: result = self._value.result(context) dimension_formulas = [] value_phase = self._value.phase(context) if value_phase == self._phase: return result if value_phase in [TelPhase.dimension, TelPhase.any]: if self.used_taxons(context).has_some(): if not self._label and not self._cached_label: self._cached_label = context.new_label dimension_formulas.append( PreFormula( result.sql, cast(str, self._label or self._cached_label), # no aggregations are performed in Dimension Builder AggregationDefinition(type=AggregationType.not_set), ) ) sql = literal_column(safe_quote_identifier(self._label or self._cached_label, context.husky_dialect)) template = sql else: sql = result.sql template = result.template label = self._label or self._cached_label or result.label if not self._value.invalid_value(context): return TelQueryResult( sql=sql, dialect=context.husky_dialect, aggregations=result.aggregations, dimension_formulas=dimension_formulas + result.dimension_formulas, data_source_formula_templates=result.data_source_formula_templates, override_mappings=result.override_mappings, label=label, exclude_slugs=result.exclude_slugs, template=template, ) else: return TelQueryResult( sql, dialect=context.husky_dialect, dimension_formulas=dimension_formulas, label=label ) else: raise RuntimeError(f'Cannot move to {self._phase} phase from {value_phase}')
def plan_phase_transitions(self, context: TelRootContext) -> TelExpression: if self._value.invalid_value(context): return self value = self._value.plan_phase_transitions(context) phase = value.phase(context) if context.subrequest_only: if TelPhase.dimension_data_source == phase: return TelRoot.copy(self, TelDimensionPhase.copy(self, value)) elif phase.is_dimension(): if TelPhase.dimension_data_source == phase: return TelRoot.copy( self, TelPostAggregationPhase.copy( self, TelAggregationPhase.copy(self, TelDimensionPhase.copy(self, value), label=context.taxon_slug), aggregation=value.aggregation_definition(context), label=context.taxon_slug, ), ) else: return TelRoot.copy( self, TelPostAggregationPhase.copy( self, TelAggregationPhase.copy(self, value, label=context.taxon_slug).plan_phase_transitions(context), aggregation=value.aggregation_definition(context), label=context.taxon_slug, ), ) elif TelPhase.any == phase: return TelRoot.copy( self, TelPostAggregationPhase.copy( self, TelAggregationPhase.copy(self, value).plan_phase_transitions(context), aggregation=AggregationDefinition(type=AggregationType.not_set), ), ) elif TelPhase.metric_pre == phase: aggregation = value.aggregation_definition(context) return TelRoot.copy( self, TelPostAggregationPhase.copy(self, value, aggregation=aggregation, label=context.taxon_slug) ) return TelRoot.copy(self, value)
def aggregation_definition(self, context: TelRootContext) -> Optional[AggregationDefinition]: """ Calculates the aggregation definition for this node. None, if it isn't possible to deduce the definition """ # there was calculation expression so let's deduce the aggregation type from it if self._calculation_expr: return self._calculation_expr.aggregation_definition(context) else: # taxon is defined and we also want to enforce its aggregation type if self._taxon and self._taxon.aggregation: return self._taxon.aggregation # otherwise, no aggregation type is set for metric & group_by aggregation is set for dimensions agg_type = ( AggregationType.not_set if self._taxon is None or self._taxon.taxon_type == 'metric' else AggregationType.group_by ) return AggregationDefinition(type=agg_type)
def test_fb_tw_merged_objective_and_generic_cpm(self, mock__get_taxons): preprocess_request(self._blending_request) taxon_manager = BlendingTaxonManager(self._blending_request) taxon_manager.load_all_used_taxons(SNOWFLAKE_HUSKY_CONTEXT) plan = taxon_manager.plan assert plan.data_source_formula_templates == { 'facebook_ads': [ SqlFormulaTemplate( SqlTemplate('''${facebook_ads|objective}'''), '''__fb_tw_merged_objective1''', 'facebook_ads', {'facebook_ads|objective'}, ) ], 'twitter': [ SqlFormulaTemplate( SqlTemplate('''${twitter|objective}'''), '''__fb_tw_merged_objective2''', 'twitter', {'twitter|objective'}, ) ], } assert list(map(repr, plan.dimension_formulas)) == [ repr( PreFormula( '''coalesce(__fb_tw_merged_objective1, __fb_tw_merged_objective2)''', '''fb_tw_merged_objective''', AggregationDefinition(type=AggregationType.not_set), None, )) ] assert list(map(repr, plan.metric_pre)) == [ repr( PreFormula( '''fb_tw_merged_objective''', '''fb_tw_merged_objective''', AggregationDefinition(type=AggregationType.group_by), None, )), repr( PreFormula( '''1000 * (coalesce(facebook_ads_spend_5811c78c7c741b5a, 0) + coalesce(twitter_spend_68657fbb141b10c8, 0))''', '''__generic_cpm1''', AggregationDefinition(type=AggregationType.sum), None, )), repr( PreFormula( '''coalesce(facebook_ads_impressions_0bf2e36fb4e71190, 0) + coalesce(twitter_impressions_ef12a84724a0ad7d, 0)''', '''__generic_cpm2''', AggregationDefinition(type=AggregationType.sum), None, )), ] expected_merge_taxon = get_mocked_taxons_by_slug( ['fb_tw_merged_objective'])[0] expected_cpm_taxon = get_mocked_taxons_by_slug(['generic_cpm'])[0] assert list(map(repr, plan.metric_post)) == list( map( repr, [ (PostFormula( 'fb_tw_merged_objective', 'fb_tw_merged_objective'), expected_merge_taxon), ( PostFormula( '__generic_cpm1 / nullif(__generic_cpm2, 0)', '__generic_cpm1 / nullif(__generic_cpm2, 0)'), expected_cpm_taxon, ), ], ))
def plan( cls, ctx: HuskyQueryContext, request: BlendingDataRequest, projection_taxons: TaxonMap, all_taxons: TaxonMap, taxon_to_ds: Dict[str, Set[str]], ) -> TelPlan: """ Prepares taxons plan """ plan = TelPlan() result_cache = dict() all_data_sources = {subreq.properties.data_source for subreq in request.data_subrequests} for taxon in projection_taxons.values(): if taxon.calculation: original_slug = taxon.comparison_taxon_slug_origin or taxon.slug taxon_data_sources = taxon_to_ds[original_slug] result = cls._parse_taxon_expr(ctx, taxon, taxon.slug, taxon_data_sources, all_taxons) result_cache[taxon.slug] = result # Create dict for dim templates, key is data source for ds_formula in result.data_source_formula_templates: plan.data_source_formula_templates[ds_formula.data_source].append(ds_formula) plan.dimension_formulas.extend(result.dimension_formulas) plan.metric_pre.extend(result.pre_formulas) plan.metric_post.append((result.post_formula, taxon)) plan.override_mappings.update(result.override_mappings) else: sql_slug = column(taxon.slug_safe_sql_identifier) if taxon.is_dimension: aggregation = taxon.aggregation or AggregationDefinition(type=AggregationType.group_by) else: aggregation = taxon.aggregation or AggregationDefinition(type=AggregationType.sum) plan.metric_pre.append(PreFormula(sql_slug, taxon.slug, aggregation)) plan.metric_post.append((PostFormula(sql_slug), taxon)) if request.comparison and request.comparison.taxons: for taxon in [all_taxons[slug] for slug in request.comparison.taxons]: if taxon.calculation: taxon_data_sources = all_data_sources result = cls._parse_taxon_expr( ctx, taxon, 'comp_join_col_' + taxon.slug, taxon_data_sources, all_taxons ) # Create dict for dim templates, key is data source for ds_formula in result.data_source_formula_templates: plan.data_source_formula_templates[ds_formula.data_source].append(ds_formula) if result.override_mappings: plan.override_mappings.update(result.override_mappings) plan.comparison_override_mappings.update(result.override_mappings) plan.dimension_formulas.extend(result.dimension_formulas) for ds_formula in result.data_source_formula_templates: plan.comparison_data_source_formula_templates[ds_formula.data_source].append(ds_formula) plan.comparison_dimension_formulas.extend(result.dimension_formulas) for dim_formula in result.dimension_formulas: plan.comparison_join_columns.append(dim_formula.label) else: # Raw comparison join taxon taxon.. add it to join and also to select from dataframes plan.comparison_join_columns.append(taxon.slug_safe_sql_identifier) plan.comparison_raw_taxon_slugs.append(taxon.slug_safe_sql_identifier) cls._populate_filter_templates_to_plan(ctx, plan, request, all_taxons) return plan
data_source_formula_templates=[], dimension_formulas=[], pre_formulas=[], post_formula=PostFormula('690'), ), ), ( ("123 + (twitter|spend / 567)", {'twitter'}, TaxonTypeEnum.metric), ExprResult( data_source_formula_templates=[], dimension_formulas=[], pre_formulas=[ PreFormula( '(twitter_spend_68657fbb141b10c8 / nullif(567, 0))', '__1', AggregationDefinition(type=AggregationType.sum), None, ) ], post_formula=PostFormula( 'coalesce(123, 0) + coalesce(__1, 0)'), ), ), ( ("444.3 / (computed_metric_avg - 1)", set(), TaxonTypeEnum.metric), ExprResult( data_source_formula_templates=[], dimension_formulas=[], pre_formulas=[ PreFormula('1000 * avg_spend', '__1', AggregationDefinition(type=AggregationType.avg),
def aggregation_definition(self, context: TelRootContext) -> Optional[AggregationDefinition]: """ Calculates the aggregation definition for this node. None, if it isn't possible to deduce the definition """ return AggregationDefinition(type=AggregationType.not_set)
def get_case_id_common_defined_agg(val): if isinstance(val, list): return ', '.join(agg.type if agg else 'None' for agg in val) elif isinstance(val, AggregationDefinition): return str(val.type.value if val else None) else: return str(val) @pytest.mark.parametrize( "agg_definitions, fallback, taxon_type, expected", [ [ [ AggregationDefinition(type=AggregationType.sum), AggregationDefinition(type=AggregationType.sum) ], False, None, AggregationDefinition(type=AggregationType.sum), ], [ [ AggregationDefinition(type=AggregationType.not_set), AggregationDefinition(type=AggregationType.avg) ], False, None, AggregationDefinition(type=AggregationType.avg), ],