def save(self, dataset, formula, name, group_str=None): """Parse, save, and calculate a formula. Validate `formula` and `group_str` for the given `dataset`. If the formula and group are valid for the dataset, then save a new calculation for them under `name`. Finally, create a background task to compute the calculation. Calculations are initially saved in a **pending** state, after the calculation has finished processing it will be in a **ready** state. :param dataset: The DataSet to save. :param formula: The formula to save. :param name: The name of the formula. :param group_str: Columns to group on. :type group_str: String, list or strings, or None. :raises: `ParseError` if an invalid formula was supplied. """ # ensure that the formula is parsable groups = self.split_groups(group_str) if group_str else [] Parser.validate(dataset, formula, groups) aggregation = Parser.parse_aggregation(formula) if aggregation: # set group if aggregation and group unset group_str = group_str or '' # check that name is unique for aggregation aggregated_dataset = dataset.aggregated_dataset(groups) if aggregated_dataset: name = _check_name_and_make_unique(name, aggregated_dataset) else: # set group if aggregation and group unset name = _check_name_and_make_unique(name, dataset) record = { DATASET_ID: dataset.dataset_id, self.AGGREGATION: aggregation, self.FORMULA: formula, self.GROUP: group_str, self.NAME: name, self.STATE: self.STATE_PENDING, } super(self.__class__, self).save(record) return self
def __create_aggregator(dataset, formula, name, groups, dframe=None): # TODO this should work with index eventually columns = parse_columns(dataset, formula, name, dframe, no_index=True) dependent_columns = Parser.dependent_columns(formula, dataset) aggregation = Parser.parse_aggregation(formula) # get dframe with only the necessary columns select = combine_dicts({group: 1 for group in groups}, {col: 1 for col in dependent_columns}) # ensure at least one column (MONGO_ID) for the count aggregation query_args = QueryArgs(select=select or {MONGO_ID: 1}) dframe = dataset.dframe(query_args=query_args, keep_mongo_keys=not select) return Aggregator(dframe, groups, aggregation, name, columns)