Ejemplo n.º 1
0
    def save(self, dataset, formula, name, group_str=None):
        """Parse, save, and calculate a formula.

        Validate `formula` and `group_str` for the given `dataset`. If the
        formula and group are valid for the dataset, then save a new
        calculation for them under `name`. Finally, create a background task
        to compute the calculation.

        Calculations are initially saved in a **pending** state, after the
        calculation has finished processing it will be in a **ready** state.

        :param dataset: The DataSet to save.
        :param formula: The formula to save.
        :param name: The name of the formula.
        :param group_str: Columns to group on.
        :type group_str: String, list or strings, or None.

        :raises: `ParseError` if an invalid formula was supplied.
        """
        # ensure that the formula is parsable
        groups = self.split_groups(group_str) if group_str else []
        Parser.validate(dataset, formula, groups)
        aggregation = Parser.parse_aggregation(formula)

        if aggregation:
            # set group if aggregation and group unset
            group_str = group_str or ''

            # check that name is unique for aggregation
            aggregated_dataset = dataset.aggregated_dataset(groups)

            if aggregated_dataset:
                name = _check_name_and_make_unique(name, aggregated_dataset)

        else:
            # set group if aggregation and group unset
            name = _check_name_and_make_unique(name, dataset)

        record = {
            DATASET_ID: dataset.dataset_id,
            self.AGGREGATION: aggregation,
            self.FORMULA: formula,
            self.GROUP: group_str,
            self.NAME: name,
            self.STATE: self.STATE_PENDING,
        }
        super(self.__class__, self).save(record)

        return self
Ejemplo n.º 2
0
    def save(self, dataset, formula, name, group_str=None):
        """Parse, save, and calculate a formula.

        Validate `formula` and `group_str` for the given `dataset`. If the
        formula and group are valid for the dataset, then save a new
        calculation for them under `name`. Finally, create a background task
        to compute the calculation.

        Calculations are initially saved in a **pending** state, after the
        calculation has finished processing it will be in a **ready** state.

        :param dataset: The DataSet to save.
        :param formula: The formula to save.
        :param name: The name of the formula.
        :param group_str: Columns to group on.
        :type group_str: String, list or strings, or None.

        :raises: `ParseError` if an invalid formula was supplied.
        """
        # ensure that the formula is parsable
        groups = self.split_groups(group_str) if group_str else []
        Parser.validate(dataset, formula, groups)
        aggregation = Parser.parse_aggregation(formula)

        if aggregation:
            # set group if aggregation and group unset
            group_str = group_str or ''

            # check that name is unique for aggregation
            aggregated_dataset = dataset.aggregated_dataset(groups)

            if aggregated_dataset:
                name = _check_name_and_make_unique(name, aggregated_dataset)

        else:
            # set group if aggregation and group unset
            name = _check_name_and_make_unique(name, dataset)

        record = {
            DATASET_ID: dataset.dataset_id,
            self.AGGREGATION: aggregation,
            self.FORMULA: formula,
            self.GROUP: group_str,
            self.NAME: name,
            self.STATE: self.STATE_PENDING,
        }
        super(self.__class__, self).save(record)

        return self
Ejemplo n.º 3
0
def __create_aggregator(dataset, formula, name, groups, dframe=None):
    # TODO this should work with index eventually
    columns = parse_columns(dataset, formula, name, dframe, no_index=True)

    dependent_columns = Parser.dependent_columns(formula, dataset)
    aggregation = Parser.parse_aggregation(formula)

    # get dframe with only the necessary columns
    select = combine_dicts({group: 1 for group in groups},
                           {col: 1 for col in dependent_columns})

    # ensure at least one column (MONGO_ID) for the count aggregation
    query_args = QueryArgs(select=select or {MONGO_ID: 1})
    dframe = dataset.dframe(query_args=query_args, keep_mongo_keys=not select)

    return Aggregator(dframe, groups, aggregation, name, columns)