def aggregate(self, cell, measures=None, drill_down=None): """See :meth:`cubes.browsers.cell.aggregate`.""" ################################################### # 1. Prepare cell selector if drill_down: drill_dimension = self.cube.dimension(drill_down) else: drill_dimension = None selector = self.selector_object(cell, drill_dimension) condition = {self.cell_selector_name: selector} condition[self.aggregate_flag_field] = True if drill_down: drill_dimension = self.cube.dimension(drill_down) else: drill_dimension = None ################################################### # 2. Prepare dimension filter conditions dim_conditions = {} for cut in cell.cuts: if type(cut) != PointCut: raise AttributeError( "only point cuts are currently supported for mongo aggregation browsing" ) dimension = self.cube.dimension(cut.dimension) path = cut.path dim_levels = dimension.default_hierarchy.levels # Get physical field names from field mappings specified in cube and use them # in selection condition for i, value in enumerate(path): level = dim_levels[i] mapped = base.dimension_field_mapping(self.cube, dimension, level.key) dim_conditions[mapped[0]] = value # Expand dictionary: convert key1.key2 = value into 'key1 : { key2 : value}' dim_conditions = cubes.common.expand_dictionary(dim_conditions) condition.update(dim_conditions) ################################################### # 3. Perform selection - find records in collection cursor = self.collection.find(spec=condition) return cursor
def aggregate(self, cell, measures = None, drill_down = None): """See :meth:`cubes.browsers.cell.aggregate`.""" ################################################### # 1. Prepare cell selector if drill_down: drill_dimension = self.cube.dimension(drill_down) else: drill_dimension = None selector = self.selector_object(cell, drill_dimension) condition = { self.cell_selector_name: selector } condition[self.aggregate_flag_field] = True if drill_down: drill_dimension = self.cube.dimension(drill_down) else: drill_dimension = None ################################################### # 2. Prepare dimension filter conditions dim_conditions = {} for cut in cell.cuts: if type(cut) != PointCut: raise AttributeError("only point cuts are currently supported for mongo aggregation browsing") dimension = self.cube.dimension(cut.dimension) path = cut.path dim_levels = dimension.default_hierarchy.levels # Get physical field names from field mappings specified in cube and use them # in selection condition for i, value in enumerate(path): level = dim_levels[i] mapped = base.dimension_field_mapping(self.cube, dimension, level.key) dim_conditions[mapped[0]] = value # Expand dictionary: convert key1.key2 = value into 'key1 : { key2 : value}' dim_conditions = cubes.util.expand_dictionary(dim_conditions) condition.update(dim_conditions) ################################################### # 3. Perform selection - find records in collection cursor = self.collection.find(spec = condition) return cursor
def compute_cell(self, selector): """ Compute aggregation for cell specified by selector. cell is computed using MongoDB aggregate_ function. Computed records are inserted into `cube_collection` and they contain: * key fields used for grouping * aggregated measures suffixed with `_sum`, for example: `amount_sum` * record count in `record_count` * cell selector as `_selector` (configurable) with dimension names as keys and current dimension levels as values, for example: {"date": ["year", "month"] } * cell reference as `_cell` (configurable) with dimension names as keys and level keys forming dimension paths as values, for example: {"date": [2010, 10] } .. _aggregate: http://www.mongodb.org/display/DOCS/Aggregation#Aggregation-Group :Arguments: * `selector` is a list of tuples: (dimension, level_names) .. note:: Only 'sum' aggregation is being computed. Other aggregations might be implemented in the future, such as average, min, max, rank, ... """ self.log.info("computing selector") key_maps = [] attrib_maps = [] selector_record = {} for dimsel in selector: dim = dimsel[0] levels = dimsel[1] self.log.info("-- dimension: %s levels: %s", dim.name, levels) level_names = [] for level in levels: level_names.append(level.name) mapped = base.dimension_field_mapping(self.cube, dim, level.key) key_maps.append(mapped) for field in level.attributes: mapped = base.dimension_field_mapping( self.cube, dim, field) attrib_maps.append((mapped[0], field)) selector_record[dim.name] = level_names ########################################### # Prepare group command parameters # # condition - filter condition for find() (check for existence of keys) # keys - list of keys to be used for grouping # measures - list of measures condition = {} keys = [] for mapping in key_maps: mapped_key = mapping[0] condition[mapped_key] = {"$exists": True} keys.append(mapped_key) fields = [] for mapping in attrib_maps: mapped = mapping[0] fields.append(mapped) for measure in self.measures: mapping = base.fact_field_mapping(self.cube, measure) fields.append(mapping[0]) self.log.info("condition: %s", condition) self.log.info("keys: %s", keys) self.log.info("fields: %s", fields) # Exclude aggregates: condition[self.aggregate_flag_field] = {'$ne': True} #################################################### # Prepare group functions + reduce + finalize # initial = {"record_count": 0} aggregate_lines = [] for measure in self.measures: measure_agg_name = measure + "_sum" line = "out.%s += doc.%s;" % (measure_agg_name, measure) aggregate_lines.append(line) initial[measure_agg_name] = 0 reduce_function = ''' function(doc, out) { out.record_count ++; %(aggregate_lines)s }\n''' % { "aggregate_lines": "\n".join(aggregate_lines) } finalize_function = None cursor = self.fact_collection.group(key=keys, condition=condition, initial=initial, reduce=reduce_function, finalize=finalize_function) for record in cursor: # use: cubes.commons.expand_dictionary(record) cell = {} for dimsel in selector: dimension, levels = dimsel path = [] for level in levels: mapped = base.dimension_field_mapping( self.cube, dimension, level.key) path.append(record[mapped[0]]) cell[dimension.name] = path record = self.construct_record(record) record[self.aggregate_flag_field] = True record[self.cell_record_name] = selector_record record[self.cell_reference_record_name] = cell self.cube_collection.insert(record)
def compute_cell(self, selector): """ Compute aggregation for cell specified by selector. cell is computed using MongoDB aggregate_ function. Computed records are inserted into `cube_collection` and they contain: * key fields used for grouping * aggregated measures suffixed with `_sum`, for example: `amount_sum` * record count in `record_count` * cell selector as `_selector` (configurable) with dimension names as keys and current dimension levels as values, for example: {"date": ["year", "month"] } * cell reference as `_cell` (configurable) with dimension names as keys and level keys forming dimension paths as values, for example: {"date": [2010, 10] } .. _aggregate: http://www.mongodb.org/display/DOCS/Aggregation#Aggregation-Group :Arguments: * `selector` is a list of tuples: (dimension, level_names) .. note:: Only 'sum' aggregation is being computed. Other aggregations might be implemented in the future, such as average, min, max, rank, ... """ self.log.info("computing selector") key_maps = [] attrib_maps = [] selector_record = {} for dimsel in selector: dim = dimsel[0] levels = dimsel[1] self.log.info("-- dimension: %s levels: %s", dim.name, levels) level_names = [] for level in levels: level_names.append(level.name) mapped = base.dimension_field_mapping(self.cube, dim, level.key) key_maps.append(mapped) for field in level.attributes: mapped = base.dimension_field_mapping(self.cube, dim, field) attrib_maps.append((mapped[0], field)) selector_record[dim.name] = level_names ########################################### # Prepare group command parameters # # condition - filter condition for find() (check for existence of keys) # keys - list of keys to be used for grouping # measures - list of measures condition = {} keys = [] for mapping in key_maps: mapped_key = mapping[0] condition[mapped_key] = { "$exists" : True} keys.append(mapped_key) fields = [] for mapping in attrib_maps: mapped = mapping[0] fields.append(mapped) for measure in self.measures: mapping = base.fact_field_mapping(self.cube, measure) fields.append(mapping[0]) self.log.info("condition: %s", condition) self.log.info("keys: %s", keys) self.log.info("fields: %s", fields) # Exclude aggregates: condition[self.aggregate_flag_field] = {'$ne': True} #################################################### # Prepare group functions + reduce + finalize # initial = { "record_count": 0 } aggregate_lines = [] for measure in self.measures: measure_agg_name = measure + "_sum" line = "out.%s += doc.%s;" % (measure_agg_name, measure) aggregate_lines.append(line) initial[measure_agg_name] = 0 reduce_function = ''' function(doc, out) { out.record_count ++; %(aggregate_lines)s }\n''' % {"aggregate_lines": "\n".join(aggregate_lines)} finalize_function = None cursor = self.fact_collection.group(key = keys, condition = condition, initial = initial, reduce = reduce_function, finalize = finalize_function) for record in cursor: # use: cubes.commons.expand_dictionary(record) cell = {} for dimsel in selector: dimension, levels = dimsel path = [] for level in levels: mapped = base.dimension_field_mapping(self.cube, dimension, level.key) path.append(record[mapped[0]]) cell[dimension.name] = path record = self.construct_record(record) record[self.aggregate_flag_field] = True record[self.cell_record_name] = selector_record record[self.cell_reference_record_name] = cell self.cube_collection.insert(record)