def test_company_scoped_models(self): entity_model = get_mock_entity_model() company_model = get_mock_metric_model(company_id='company-id') models = [entity_model, company_model] sorted_models = sort_models_with_heuristic( models, {TaxonSlugExpression('ad_id'), TaxonSlugExpression('impressions')}) self.assertEqual(2, len(sorted_models)) self.assertEqual([company_model, entity_model], sorted_models)
def _build_selectors( self, get_column_name: Callable[[TaxonSlugExpression, str], str], ) -> Select: """ Returns the select part of query. """ selectors = [] for taxon_slug_expression, taxon in self.projection_taxons.items(): column_name = get_column_name(taxon_slug_expression, taxon_slug_expression.slug) col = literal_column(column_name) selectors.append(col.label(taxon.slug_safe_sql_identifier)) for template in self.dimension_templates: # We must render the dimension templates with correct sql columns slug_to_column = { slug: get_column_name(TaxonSlugExpression(slug), slug) for slug in template.used_taxons } sql_formula = template.render_formula(**slug_to_column) col = literal_column(sql_formula).label(template.label) selectors.append(col) return select(sort_columns(selectors))
def move_top_level_to_subrequests( top_level_taxons: Iterable[TaxonExpressionStr], subrequests: Iterable[ApiDataRequest] ): # Move top level taxons to subrequests ds_to_subrequest: Dict[str, ApiDataRequest] = { subrequest.properties.data_sources[0]: subrequest for subrequest in subrequests } for slug in top_level_taxons or []: if slug[0] == TEL_EXPR_QUERY_PREFIX: # If taxon is dynamic TEL expr, move it to all subrequests for subreq1 in ds_to_subrequest.values(): subreq1.taxons.append(slug) else: taxon_expr = TaxonSlugExpression(slug) if taxon_expr.data_source is None: # If taxon has no data source, move it to all subrequests for subreq1 in ds_to_subrequest.values(): subreq1.taxons.append(slug) else: subreq2 = ds_to_subrequest.get(taxon_expr.data_source) if subreq2: # Otherwise, move it to subrequest based on data source subreq2.taxons.append(slug) else: # Or throw error, if there is no subrequest for given DS raise InvalidRequest(slug, f'Request does not have proper data source for taxon {slug}.')
def _get_expr_taxon_map(cls, slugs: Iterable[str], preloaded_taxons: TaxonMap) -> SlugExprTaxonMap: taxons = [preloaded_taxons[TaxonExpressionStr(slug)] for slug in slugs] expr_taxon_map = { TaxonSlugExpression(taxon.slug): taxon for taxon in taxons if taxon.calculation is None } return expr_taxon_map
def get_specific_select_mocked_taxons( taxon_slugs: List[str]) -> Dict[TaxonSlugExpression, Taxon]: results = OrderedDict() for taxon_slug in taxon_slugs: taxon = mock_get_taxons_map(None, [taxon_slug])[taxon_slug] taxon_slug_expression = TaxonSlugExpression(taxon_slug) results[taxon_slug_expression] = taxon return results
def _get_raw_taxons(self, company_id: str, taxon_slugs: Iterable[TaxonExpressionStr], data_source: str) -> Set[TaxonExpressionStr]: """ Returns raw taxons needed by the taxons slugs for the given data source. """ # We need to pass down all available data sources, so it can raise exception if required DS is missing raw_slugs = get_used_raw_taxon_slugs_all(company_id, taxon_slugs, self.data_sources, self.taxonless_map) raw_exprs = [TaxonSlugExpression(slug) for slug in raw_slugs] # Keep slugs only from given DS or without DS. raw_slugs_for_ds = { raw_expr.slug for raw_expr in raw_exprs if raw_expr.data_source in [None, data_source] } return raw_slugs_for_ds
def _build_query_window_aggregations( self, taxon_to_model: Dict[TaxonSlugExpression, HuskyModel], ordered_query_joins: Sequence[QueryJoins], ) -> Select: """ Generates query for taxons which need window functions for aggregation :param taxon_to_model: Map of taxon slugs (key) and models they are coming from (value) :param ordered_query_joins: List of joins """ selectors = [] # generate inner query with window aggregation functions for taxon_slug_expression, taxon in sorted( self.projection_taxons.items(), key=lambda x: str(x[0])): model = taxon_to_model[taxon_slug_expression] if (taxon.tel_metadata and taxon.tel_metadata.aggregation_definition and taxon.tel_metadata.aggregation_definition.params and taxon.tel_metadata_aggregation_type in self._AGGREGATION_WINDOW_FUNCTIONS): # find the order_by columns order_by = [] window_params = cast( AggregationParamsSortDimension, taxon.tel_metadata.aggregation_definition.params) for field in window_params.sort_dimensions: col = taxon_to_model[TaxonSlugExpression( field.taxon)].taxon_sql_accessor( self.ctx, field.taxon) order_by_dir = field.order_by or TaxonOrderType.asc order_by.append( nullslast(ORDER_BY_FUNCTIONS[order_by_dir]( literal_column(col)))) # apply window aggregation functions column = self._AGGREGATION_WINDOW_FUNCTIONS[ taxon.tel_metadata_aggregation_type](literal_column( model.taxon_sql_accessor(self.ctx, taxon.slug))).over( partition_by=self.get_partition_by_columns(model), order_by=order_by) else: # otherwise, render the columns "as-is" column = literal_column( model.taxon_sql_accessor(self.ctx, taxon.slug)) selectors.append(column.label(taxon.slug_safe_sql_identifier)) # add joins to the inner query inner_query = select(selectors).select_from( self._build_from_joins(ordered_query_joins)) # apply scope filters to the inner query inner_query = ScopeGuard.add_scope_row_filters( self.ctx, self.scope, inner_query, self.taxon_model_info_map) # update taxon model info map, because we're selecting from outer query and not the inner query self._rebuild_taxon_info_map_inner_query() # then, we use prepare the outer query on which we can safely apply GROUP BY return self._build_selectors(lambda _, taxon_slug: safe_identifier( taxon_slug)).select_from(inner_query)
def _get_taxons(cls, taxons: Dict[str, Taxon]) -> List[Tuple[TaxonSlugExpression, Taxon]]: slug_expression_taxons = [(TaxonSlugExpression(taxon.slug), taxon) for taxon in taxons.values()] return slug_expression_taxons