def from_spec(cls, registry, unit_type, measures=None, segment_by=None, where=None, **opts): # Step 0: Resolve applicable measures and dimensions unit_type = registry._resolve_identifier(unit_type) measures = [] if measures is None else measures segment_by = [] if segment_by is None else segment_by measures = [ registry._resolve_measure(unit_type, measure) for measure in measures ] segment_by = [ registry._resolve_dimension(unit_type, dimension) for dimension in segment_by ] where = Constraint.from_spec(where) where_dimensions = [ (registry._resolve_dimension(unit_type, dimension).as_implicit) for dimension in where.scoped_for_unit_type(unit_type).dimensions if dimension not in segment_by ] # Step 1: Collect measures and dimensions into groups based on current unit_type # and next unit_type current_evaluation = DimensionBundle(unit_type=unit_type, dimensions=[], measures=[]) next_evaluations = {} def collect_dimensions(dimensions, kind='measures', for_constraint=False): for dimension in dimensions: if not dimension.via: current_evaluation._asdict()[kind].append(dimension) elif ( # Handle reverse foreign key joins (for_constraint or kind == 'measures') and dimension.next_unit_type in registry.reverse_foreign_keys_for_unit(unit_type)): next_unit_type = registry._resolve_reverse_foreign_key( unit_type, dimension.next_unit_type) if next_unit_type not in next_evaluations: next_evaluations[next_unit_type] = DimensionBundle( unit_type=unit_type, dimensions=[], measures=[]) next_evaluations[next_unit_type]._asdict()[kind].append( dimension.via_next) else: next_unit_type = registry._resolve_foreign_key( unit_type, dimension.next_unit_type) if next_unit_type not in next_evaluations: next_evaluations[next_unit_type] = DimensionBundle( unit_type=next_unit_type, dimensions=[], measures=[]) next_evaluations[next_unit_type]._asdict()[kind].append( dimension.via_next) collect_dimensions(measures, kind='measures') collect_dimensions(segment_by, kind='dimensions') collect_dimensions(where_dimensions, kind='dimensions', for_constraint=True) # Add required dimension for joining in next unit_types for dimension_bundle in next_evaluations.values(): fk = registry._resolve_foreign_key(unit_type, dimension_bundle.unit_type) if fk not in current_evaluation.dimensions: current_evaluation.dimensions.append(fk.as_private) # Step 2: Create optimal joins for current unit_type provisions = registry._find_optimal_provision( unit_type=unit_type, measures=current_evaluation.measures, dimensions=current_evaluation.dimensions) evaluations = [] for provision in provisions: generic_constraints = where.generic_for_provider( provision.provider) generic_constraint_dimensions = [ provision.provider.resolve(dimension).as_private for dimension in generic_constraints.dimensions if not provision.dimensions or dimension not in provision.dimensions ] evaluations.append( cls(registry=registry, provider=provision.provider, unit_type=unit_type, measures=provision.measures, segment_by=provision.dimensions + generic_constraint_dimensions, where=generic_constraints, join_prefix=provision.join_prefix)) # Step 3: For each next unit_type, recurse problem and join into above query for foreign_key, dim_bundle in next_evaluations.items(): foreign_strategy = cls.from_spec(registry=registry, unit_type=foreign_key, measures=dim_bundle.measures, segment_by=dim_bundle.dimensions, where=where.via_next( foreign_key.name), **opts) if foreign_key != dim_bundle.unit_type: # Reverse foreign key join foreign_key = dim_bundle.unit_type foreign_strategy.unit_type = dim_bundle.unit_type added = False for sub_strategy in evaluations: for dimension in sub_strategy.segment_by: if isinstance(dimension, _StatisticalUnitIdentifier ) and dimension.matches(foreign_key): sub_strategy.add_join(foreign_key, foreign_strategy) added = True break if not added: raise RuntimeError("Could not add foreign strategy: {}".format( foreign_strategy)) strategy = evaluations[0] for sub_strategy in evaluations[1:]: strategy.add_join(unit_type, sub_strategy) strategy.where = And.from_operands(strategy.where, where.scoped_applicable) # Step 4: Mark any resolved where dependencies as private, unless otherwise # requested in `segment_by` for dimension in strategy.segment_by: if dimension.implicit and dimension in where.scoped_applicable.dimensions: index = strategy.segment_by.index(dimension) strategy.segment_by[index] = strategy.segment_by[ index].as_private # Step 5: Return EvaluationStrategy, and profit. return strategy
def _compat_fields_split(self, measures, segment_by, where, joins_post=None): """ This method splits measures and segment_by dictionaries into two, corresponding to pre- and post- computation. The pre- field modify private statuses to prevent loss of join keys, and suppress external fields in joins_post. The second set are remove all features that were private in the pre- computation phase. It also splits where constraints such that constraints are applied as early as possible while still being semantically correct. """ if len(joins_post) == 0: return measures, segment_by, where, None, None, None join_post_fields = [] # TODO: Use dictionaries for performance for join in joins_post: join_post_fields.extend( [m.as_via(join.join_prefix) for m in join.measures]) join_post_fields.extend( [d.as_via(join.join_prefix) for d in join.dimensions]) join_left_post_keys = list( itertools.chain(*[ # TODO: Use dictionaries for performance join.left_on for join in joins_post ])) join_right_post_keys = list( itertools.chain(*[ # TODO: Use dictionaries for performance join.right_on for join in joins_post ])) # Process constraint clauses where_pre = [] where_post = [] def add_constraint(op): if len( set(op.dimensions).intersection([ d if isinstance(d, str) else d.via_name for d in (join_post_fields + join_right_post_keys) ])) > 0: where_post.append(op) else: where_pre.append(op) if where: if where.kind is CONSTRAINTS.AND: for op in where.operands: add_constraint(op) else: add_constraint(where) where_pre = And.from_operands(where_pre) where_post = And.from_operands(where_post) # Process measures and dimensions def features_split(features, extra_public_keys=[]): pre = {} post = {} for feature in features: if feature.external and feature in join_post_fields: post[feature] = feature continue if feature.private and feature in ( join_left_post_keys + extra_public_keys + (where_post.dimensions if where_post else [])): pre[feature.as_public] = feature.as_public else: pre[feature] = feature if not pre[feature].private: post[feature] = feature return pre, post measures_pre, measures_post = features_split( measures, [self.resolve(unit_type=None, features='count', role='dimension')]) segment_by_pre, segment_by_post = features_split(segment_by) return measures_pre, segment_by_pre, where_pre, measures_post, segment_by_post, where_post