def _features_lookup(self, unit_type, kind, attr_filter=None): assert kind in ('foreign_key', 'reverse_foreign_key', 'dimension', 'measure') unit_type = self.identifier_for_unit(unit_type) feature_source = getattr(self._cache, kind + 's') features = SequenceMap() for avail_unit_type in feature_source: if avail_unit_type.matches(unit_type): for feature, instances in feature_source[ avail_unit_type].items(): if feature not in features and (not attr_filter or attr_filter(feature)): mask = None if kind in ('foreign_key', 'reverse_foreign_key' ) and avail_unit_type == feature.name: mask = unit_type.name features.append( _ResolvedFeature( feature.name, providers=[d.provider for d in instances], unit_type=unit_type, mask=mask, kind=kind)) return features
def measures_for_unit(self, unit_type): unit_type = self.identifier_for_unit(unit_type) if unit_type is None: return self.measures measures = SequenceMap() for measure in self.measures: if self._unit_has_measure(unit_type, measure): measures.append(measure) return measures
def dimensions_for_unit(self, unit_type, include_partitions=True): unit_type = self.identifier_for_unit(unit_type) if unit_type is None: return self.dimensions dimensions = SequenceMap() for dimension in self.dimensions: if (self._unit_has_dimension(unit_type, dimension) and (include_partitions or not dimension.partition)): dimensions.append(dimension) return dimensions
def foreign_keys_for_unit(self, unit_type): unit_type = self.identifier_for_unit(unit_type) if unit_type is None: return self.identifiers foreign_keys = SequenceMap() for foreign_key in self.identifiers: if self._unit_has_foreign_key(unit_type, foreign_key): if unit_type.name == foreign_key: foreign_key = foreign_key.with_mask(unit_type.mask) foreign_keys.append(foreign_key) return foreign_keys
class EvaluationStrategy(object): class Type(Enum): REGULAR = 1 UNIT_REBASE = 2 @classmethod def from_spec(cls, registry, unit_type, measures=None, segment_by=None, where=None, **opts): # Step 0: Resolve applicable measures and dimensions unit_type = registry.identifier_for_unit(unit_type) measures = [] if measures is None else measures segment_by = [] if segment_by is None else segment_by if not isinstance(measures, list): measures = [measures] if not isinstance(segment_by, list): segment_by = [segment_by] measures = [ registry.resolve(unit_type, measure, role='measure') for measure in measures ] segment_by = [ registry.resolve(unit_type, dimension, role='dimension') for dimension in segment_by ] where = Constraint.from_spec(where) where_dimensions = [ ( registry.resolve(unit_type, dimension, role='dimension').as_implicit ) for dimension in where.scoped_for_unit_type(unit_type).dimensions if dimension not in segment_by ] # Step 1: Collect measures and dimensions into groups based on current unit_type # and next unit_type current_evaluation = FeatureBundle(unit_type=unit_type, dimensions=[], measures=[]) next_evaluations = {} def collect_dimensions(dimensions, kind='measures', for_constraint=False): for dimension in dimensions: if not dimension.via: current_evaluation._asdict()[kind].append(dimension) elif ( # Handle reverse foreign key joins dimension.next_unit_type in registry.reverse_foreign_keys_for_unit(unit_type) ): next_unit_type = registry.resolve(unit_type, dimension.next_unit_type, role='reverse_foreign_key') if next_unit_type not in next_evaluations: next_evaluations[next_unit_type] = FeatureBundle(unit_type=unit_type, dimensions=[], measures=[]) next_evaluations[next_unit_type]._asdict()[kind].append(dimension.via_next) else: next_unit_type = registry.resolve(unit_type, dimension.next_unit_type, role='foreign_key') if next_unit_type not in next_evaluations: next_evaluations[next_unit_type] = FeatureBundle(unit_type=next_unit_type, dimensions=[], measures=[]) next_evaluations[next_unit_type]._asdict()[kind].append(dimension.via_next) collect_dimensions(measures, kind='measures') collect_dimensions(segment_by, kind='dimensions') collect_dimensions(where_dimensions, kind='dimensions', for_constraint=True) # Add required dimension for joining in next unit_types for dimension_bundle in next_evaluations.values(): fk = registry.resolve(unit_type, dimension_bundle.unit_type, role='foreign_key') if fk not in current_evaluation.dimensions: current_evaluation.dimensions.append(fk.as_private) # Step 2: Create optimal joins for current unit_type provisions = registry._find_optimal_provision( unit_type=unit_type, measures=current_evaluation.measures, dimensions=current_evaluation.dimensions ) evaluations = [] for provision in provisions: generic_constraints = where.generic_for_provider(provision.provider) generic_constraint_dimensions = [ provision.provider.resolve(unit_type, dimension).as_private for dimension in generic_constraints.dimensions if not provision.dimensions or dimension not in provision.dimensions ] evaluations.append( cls( registry=registry, provider=provision.provider, unit_type=unit_type, measures=provision.measures, segment_by=provision.dimensions + generic_constraint_dimensions, where=generic_constraints, join_prefix=provision.join_prefix ) ) # Step 3: For each next unit_type, recurse problem and join into above query for foreign_key, dim_bundle in next_evaluations.items(): foreign_strategy = cls.from_spec(registry=registry, unit_type=foreign_key, measures=dim_bundle.measures, segment_by=dim_bundle.dimensions, where=where.via_next(foreign_key.name), **opts) if foreign_key != dim_bundle.unit_type: # Reverse foreign key join foreign_key = dim_bundle.unit_type foreign_strategy.unit_type = dim_bundle.unit_type added = False for sub_strategy in evaluations: for dimension in sub_strategy.segment_by: if isinstance(dimension, _StatisticalUnitIdentifier) and dimension.matches(foreign_key): sub_strategy.add_join(foreign_key, foreign_strategy) added = True break if not added: raise RuntimeError("Could not add foreign strategy: {}".format(foreign_strategy)) strategy = evaluations[0] for sub_strategy in evaluations[1:]: strategy.add_join(unit_type, sub_strategy) strategy.where = And.from_operands(strategy.where, where.scoped_for_unit_type(unit_type).scoped_applicable) # Step 4: Mark any resolved where dependencies as private, unless otherwise # requested in `segment_by` for dimension in strategy.segment_by: if dimension.implicit and dimension in where.scoped_for_unit_type(unit_type).dimensions: strategy.segment_by[dimension] = strategy.segment_by[dimension].as_private # Step 5: Return EvaluationStrategy, and profit. return strategy def __init__(self, registry, provider, unit_type, measures, segment_by=None, where=None, join_on_left=None, join_on_right=None, join_prefix=None, joins=None): self.registry = registry self.provider = provider # Statistical unit used for evaluation self.unit_type = unit_type # Anticipated measures, segmentations and constraints self.measures = SequenceMap(measures or []) self.segment_by = SequenceMap(segment_by or []) self.where = where # Join parameters self.is_joined = False self.join_is_compatible = True self.join_on_left = join_on_left self.join_on_right = join_on_right or [self.matched_unit_type.name] self.joins = joins or [] self.join_prefix = join_prefix or self.unit_type.name def _check_constraints(self, prefix=None, raise_on_unconstrained=True): """ This method checks whether dimensions that require constraints have been constrained. """ unconstrained = [] constrained_dimensions = self.where.dimensions constrained_dimensions.extend(self.join_on_right) for dimension in self.provider.dimensions_for_unit(self.unit_type): if dimension.requires_constraint and dimension not in constrained_dimensions: unconstrained.append('{}/{}'.format(prefix, dimension.name) if prefix else dimension.name) for join in self.joins: unconstrained.extend(join._check_constraints(prefix='{}/{}'.format(prefix, join.unit_type.name) if prefix else join.unit_type.name, raise_on_unconstrained=False)) if raise_on_unconstrained and len(unconstrained) > 0: raise RuntimeError("The following dimensions require and lack constraints: {}.".format(unconstrained)) return unconstrained @property def matched_unit_type(self): return self.provider.identifier_for_unit(self.unit_type) @property def strategy_type(self): if not self.matched_unit_type.is_unique: return self.Type.UNIT_REBASE else: return self.Type.REGULAR @property def joins_all_compatible(self): for join in self.joins: if ( not self.provider.is_compatible_with(join.provider) or not join.joins_all_compatible ): return False return True def __repr__(self): class StrategyEncoder(json.JSONEncoder): def default(self, o): if isinstance(o, EvaluationStrategy): d = OrderedDict([ ('provider', o.provider), ('unit_type', o.unit_type) ]) d['strategy_type'] = o.strategy_type if o.measures: d['measures'] = o.measures if o.segment_by: d['segment_by'] = o.segment_by if o.where: d['where'] = o.where if o.is_joined: d['join_on_left'] = o.join_on_left d['join_on_right'] = o.join_on_right d['join_type'] = o.join_type if o.join_prefix != o.unit_type.name: d['join_prefix'] = o.join_prefix d['join_is_compatible'] = o.join_is_compatible if o.joins: d['joins'] = o.joins d['joins_all_compatible'] = o.joins_all_compatible return d return o.__repr__() return 'EvaluationStrategy(' + json.dumps(self, indent=4, cls=StrategyEncoder, ensure_ascii=False) + ')' def add_join(self, unit_type, strategy): # TODO: Make atomic assert isinstance(strategy, EvaluationStrategy) # Add primary join key if missing and set join self_unit_type = self.provider.identifier_for_unit(unit_type.name).with_mask(unit_type.name) join_unit_type = strategy.provider.identifier_for_unit(unit_type.name) if self_unit_type not in self.segment_by: self.segment_by.prepend(self_unit_type.as_private) if join_unit_type not in strategy.segment_by: strategy.segment_by.prepend(join_unit_type) else: strategy.segment_by[join_unit_type].private = False strategy.join_on_left = [self_unit_type.fieldname(role='dimension')] strategy.join_on_right = [join_unit_type.fieldname(role='dimension')] # Add common partitions to join keys common_partitions = list( set(self.provider.partitions_for_unit(self_unit_type.fieldname(role='dimension'))) .intersection(strategy.provider.partitions_for_unit(join_unit_type.fieldname(role='dimension'))) ) for partition in common_partitions: if partition not in self.segment_by: self.segment_by.append(self.provider.resolve(self.unit_type, partition, role='dimension').as_private) if partition not in strategy.segment_by: strategy.segment_by.append(strategy.provider.resolve(strategy.unit_type, partition, role='dimension')) else: strategy.segment_by[partition].private = False strategy.join_on_left.extend([p.fieldname(role='dimension') for p in common_partitions]) strategy.join_on_right.extend([p.fieldname(role='dimension') for p in common_partitions]) # Add measures and segmentations in parent from join self.measures.extend( ( measure.as_external.as_via(strategy.join_prefix) if strategy.join_prefix != self.unit_type else measure.as_external ) for measure in strategy.measures if not measure.private ) self.segment_by.extend( ( dimension.as_external.as_via(strategy.join_prefix) if strategy.join_prefix != self.unit_type else dimension.as_external ) for dimension in strategy.segment_by if ( not dimension.private and ( dimension not in strategy.join_on_right or dimension.implicit ) ) ) # Set join metadata on incoming strategy strategy.is_joined = True strategy.join_is_compatible = ( self.provider.is_compatible_with(strategy.provider) and strategy.joins_all_compatible ) if strategy.join_prefix == self.join_prefix: strategy.join_prefix = None self.joins.append(strategy) return self @property def join_type(self): if self.strategy_type == self.Type.UNIT_REBASE: return 'left' if len(self.where.dimensions) > 0: return 'inner' for join in self.joins: if join.join_type == 'inner': return 'inner' return 'left' def execute(self, stats=True, ir_only=False, as_join=False, compatible=False, **opts): self._check_constraints() # Step 1: Build joins stats = stats and not self.is_joined joins = [] for join in self.joins: joins.append(join.execute( as_join=True, compatible=self.provider.is_compatible_with(join.provider), **opts )) # Step 2: Evaluate provider if as_join and compatible: try: return Join( provider=self.provider, unit_type=self.unit_type, join_prefix=self.join_prefix, left_on=self.join_on_left, right_on=self.join_on_right, measures=self.measures, dimensions=self.segment_by, object=self.provider.get_ir( unit_type=self.unit_type, measures=self.measures, segment_by=self.segment_by, where=self.where, joins=joins, stats_registry=self.registry._stats_registry, stats=stats, **opts ), how=self.join_type, compatible=True ) except NotImplementedError: pass if ir_only: return self.provider.get_ir( unit_type=self.unit_type, measures=self.measures, segment_by=self.segment_by, where=self.where, joins=joins, stats_registry=self.registry._stats_registry, stats=stats, **opts ) else: evaluated = self.provider.evaluate( unit_type=self.unit_type, measures=self.measures, segment_by=self.segment_by, where=self.where, joins=joins, stats_registry=self.registry._stats_registry, stats=stats, **opts ) if as_join: if self.join_prefix: evaluated = evaluated.add_prefix('{}/'.format(self.join_prefix)) right_on = ['{}/{}'.format(self.join_prefix, j) for j in self.join_on_right] else: right_on = self.join_on_right return Join( provider=self.provider, unit_type=self.unit_type, join_prefix=self.join_prefix, left_on=self.join_on_left, right_on=right_on, measures=self.measures, dimensions=self.segment_by, how=self.join_type, object=evaluated, compatible=False ) return evaluated