Beispiel #1
0
    def aggregate(self, cell, measures=None, drill_down=None):
        """See :meth:`cubes.browsers.cell.aggregate`."""

        ###################################################
        # 1. Prepare cell selector

        if drill_down:
            drill_dimension = self.cube.dimension(drill_down)
        else:
            drill_dimension = None

        selector = self.selector_object(cell, drill_dimension)
        condition = {self.cell_selector_name: selector}
        condition[self.aggregate_flag_field] = True

        if drill_down:
            drill_dimension = self.cube.dimension(drill_down)
        else:
            drill_dimension = None

        ###################################################
        # 2. Prepare dimension filter conditions

        dim_conditions = {}
        for cut in cell.cuts:
            if type(cut) != PointCut:
                raise AttributeError(
                    "only point cuts are currently supported for mongo aggregation browsing"
                )

            dimension = self.cube.dimension(cut.dimension)
            path = cut.path

            dim_levels = dimension.default_hierarchy.levels

            # Get physical field names from field mappings specified in cube and use them
            # in selection condition
            for i, value in enumerate(path):
                level = dim_levels[i]
                mapped = base.dimension_field_mapping(self.cube, dimension,
                                                      level.key)
                dim_conditions[mapped[0]] = value

        # Expand dictionary: convert key1.key2 = value into 'key1 : { key2 : value}'
        dim_conditions = cubes.common.expand_dictionary(dim_conditions)
        condition.update(dim_conditions)

        ###################################################
        # 3. Perform selection - find records in collection

        cursor = self.collection.find(spec=condition)
        return cursor
Beispiel #2
0
    def aggregate(self, cell, measures = None, drill_down = None):
        """See :meth:`cubes.browsers.cell.aggregate`."""
        
        ###################################################
        # 1. Prepare cell selector

        if drill_down:
            drill_dimension = self.cube.dimension(drill_down)
        else:
            drill_dimension = None
            
        selector = self.selector_object(cell, drill_dimension)
        condition = { self.cell_selector_name: selector }
        condition[self.aggregate_flag_field] = True
        
        if drill_down:
            drill_dimension = self.cube.dimension(drill_down)
        else:
            drill_dimension = None

        ###################################################
        # 2. Prepare dimension filter conditions
        
        dim_conditions = {}
        for cut in cell.cuts:
            if type(cut) != PointCut:
                raise AttributeError("only point cuts are currently supported for mongo aggregation browsing")

            dimension = self.cube.dimension(cut.dimension)
            path = cut.path

            dim_levels = dimension.default_hierarchy.levels

            # Get physical field names from field mappings specified in cube and use them
            # in selection condition
            for i, value in enumerate(path):
                level = dim_levels[i]
                mapped = base.dimension_field_mapping(self.cube, dimension, level.key)
                dim_conditions[mapped[0]] = value
                
        # Expand dictionary: convert key1.key2 = value into 'key1 : { key2 : value}'
        dim_conditions = cubes.util.expand_dictionary(dim_conditions)
        condition.update(dim_conditions)
        
        ###################################################
        # 3. Perform selection - find records in collection
        
        cursor = self.collection.find(spec = condition)
        return cursor
Beispiel #3
0
    def compute_cell(self, selector):
        """ 
        Compute aggregation for cell specified by selector. cell is computed using MongoDB
        aggregate_ function. Computed records are inserted into `cube_collection` and they contain:
        
        * key fields used for grouping
        * aggregated measures suffixed with `_sum`, for example: `amount_sum`
        * record count in `record_count`
        * cell selector as `_selector` (configurable) with dimension names as keys and current
          dimension levels as values, for example: {"date": ["year", "month"] }
        * cell reference as `_cell` (configurable) with dimension names as keys and level 
          keys forming dimension paths as values, for example: {"date": [2010, 10] }

        .. _aggregate: http://www.mongodb.org/display/DOCS/Aggregation#Aggregation-Group

        :Arguments:
            * `selector` is a list of tuples: (dimension, level_names)
                    
        .. note::
        
            Only 'sum' aggregation is being computed. Other aggregations might be implemented in the
            future, such as average, min, max, rank, ...
        """
        self.log.info("computing selector")

        key_maps = []
        attrib_maps = []
        selector_record = {}

        for dimsel in selector:
            dim = dimsel[0]
            levels = dimsel[1]
            self.log.info("-- dimension: %s levels: %s", dim.name, levels)

            level_names = []
            for level in levels:
                level_names.append(level.name)
                mapped = base.dimension_field_mapping(self.cube, dim,
                                                      level.key)
                key_maps.append(mapped)

                for field in level.attributes:
                    mapped = base.dimension_field_mapping(
                        self.cube, dim, field)
                    attrib_maps.append((mapped[0], field))

            selector_record[dim.name] = level_names

        ###########################################
        # Prepare group command parameters
        #
        # condition - filter condition for find() (check for existence of keys)
        # keys - list of keys to be used for grouping
        # measures - list of measures

        condition = {}
        keys = []
        for mapping in key_maps:
            mapped_key = mapping[0]
            condition[mapped_key] = {"$exists": True}
            keys.append(mapped_key)

        fields = []
        for mapping in attrib_maps:
            mapped = mapping[0]
            fields.append(mapped)

        for measure in self.measures:
            mapping = base.fact_field_mapping(self.cube, measure)
            fields.append(mapping[0])

        self.log.info("condition: %s", condition)
        self.log.info("keys: %s", keys)
        self.log.info("fields: %s", fields)

        # Exclude aggregates:
        condition[self.aggregate_flag_field] = {'$ne': True}

        ####################################################
        # Prepare group functions + reduce + finalize
        #

        initial = {"record_count": 0}

        aggregate_lines = []
        for measure in self.measures:
            measure_agg_name = measure + "_sum"
            line = "out.%s += doc.%s;" % (measure_agg_name, measure)
            aggregate_lines.append(line)
            initial[measure_agg_name] = 0

        reduce_function = '''
        function(doc, out) {
                out.record_count ++;
                %(aggregate_lines)s
        }\n''' % {
            "aggregate_lines": "\n".join(aggregate_lines)
        }

        finalize_function = None

        cursor = self.fact_collection.group(key=keys,
                                            condition=condition,
                                            initial=initial,
                                            reduce=reduce_function,
                                            finalize=finalize_function)

        for record in cursor:
            # use: cubes.commons.expand_dictionary(record)
            cell = {}
            for dimsel in selector:
                dimension, levels = dimsel
                path = []
                for level in levels:
                    mapped = base.dimension_field_mapping(
                        self.cube, dimension, level.key)
                    path.append(record[mapped[0]])
                cell[dimension.name] = path

            record = self.construct_record(record)
            record[self.aggregate_flag_field] = True
            record[self.cell_record_name] = selector_record
            record[self.cell_reference_record_name] = cell
            self.cube_collection.insert(record)
Beispiel #4
0
    def compute_cell(self, selector):
        """ 
        Compute aggregation for cell specified by selector. cell is computed using MongoDB
        aggregate_ function. Computed records are inserted into `cube_collection` and they contain:
        
        * key fields used for grouping
        * aggregated measures suffixed with `_sum`, for example: `amount_sum`
        * record count in `record_count`
        * cell selector as `_selector` (configurable) with dimension names as keys and current
          dimension levels as values, for example: {"date": ["year", "month"] }
        * cell reference as `_cell` (configurable) with dimension names as keys and level 
          keys forming dimension paths as values, for example: {"date": [2010, 10] }

        .. _aggregate: http://www.mongodb.org/display/DOCS/Aggregation#Aggregation-Group

        :Arguments:
            * `selector` is a list of tuples: (dimension, level_names)
                    
        .. note::
        
            Only 'sum' aggregation is being computed. Other aggregations might be implemented in the
            future, such as average, min, max, rank, ...
        """
        self.log.info("computing selector")

        key_maps = []
        attrib_maps = []
        selector_record = {}
        
        for dimsel in selector:
            dim = dimsel[0]
            levels = dimsel[1]
            self.log.info("-- dimension: %s levels: %s", dim.name, levels)

            level_names = []
            for level in levels:
                level_names.append(level.name)
                mapped = base.dimension_field_mapping(self.cube, dim, level.key)
                key_maps.append(mapped)
                
                for field in level.attributes:
                    mapped = base.dimension_field_mapping(self.cube, dim, field)
                    attrib_maps.append((mapped[0], field))

            selector_record[dim.name] = level_names
            

        ###########################################
        # Prepare group command parameters
        #
        # condition - filter condition for find() (check for existence of keys)
        # keys - list of keys to be used for grouping
        # measures - list of measures

        condition = {}
        keys = []
        for mapping in key_maps:
            mapped_key = mapping[0]
            condition[mapped_key] = { "$exists" : True}
            keys.append(mapped_key)

        fields = []
        for mapping in attrib_maps:
            mapped = mapping[0]
            fields.append(mapped)
        
        for measure in self.measures:
            mapping = base.fact_field_mapping(self.cube, measure)
            fields.append(mapping[0])
            
        self.log.info("condition: %s", condition)
        self.log.info("keys: %s", keys)
        self.log.info("fields: %s", fields)

        # Exclude aggregates:
        condition[self.aggregate_flag_field] = {'$ne': True}

        ####################################################
        # Prepare group functions + reduce + finalize
        #

        initial = { "record_count": 0 }

        aggregate_lines = []
        for measure in self.measures:
            measure_agg_name = measure + "_sum"
            line = "out.%s += doc.%s;" % (measure_agg_name, measure)
            aggregate_lines.append(line)
            initial[measure_agg_name] = 0

        reduce_function = '''
        function(doc, out) {
                out.record_count ++;
                %(aggregate_lines)s
        }\n''' % {"aggregate_lines": "\n".join(aggregate_lines)}

        finalize_function = None

        cursor = self.fact_collection.group(key = keys, condition = condition,
                                            initial = initial, reduce = reduce_function,
                                            finalize = finalize_function)

        for record in cursor:
            # use: cubes.commons.expand_dictionary(record)
            cell = {}
            for dimsel in selector:
                dimension, levels = dimsel
                path = []
                for level in levels:
                    mapped = base.dimension_field_mapping(self.cube, dimension, level.key)
                    path.append(record[mapped[0]])
                cell[dimension.name] = path

            record = self.construct_record(record)
            record[self.aggregate_flag_field] = True
            record[self.cell_record_name] = selector_record
            record[self.cell_reference_record_name] = cell
            self.cube_collection.insert(record)