def updated_dframe(self, dataset, formula, dframe): """Create a new aggregation and update return updated dframe.""" # build column arguments from original dframe columns = parse_columns(dataset, formula, self.name, self.dframe) new_dframe = self.aggregation.eval(columns) new_columns = [x for x in new_dframe.columns if x not in self.groups] dframe = dframe.drop(new_columns, axis=1) dframe = group_join(self.groups, new_dframe, dframe) return dframe
def update_calculations(record, dataset): calculations = dataset.calculations(include_aggs=False) if len(calculations): dframe = DataFrame(data=record, index=[0]) labels_to_slugs = dataset.schema.labels_to_slugs for c in calculations: columns = parse_columns(dataset, c.formula, c.name, dframe=dframe) record[labels_to_slugs[c.name]] = columns[0][0] return record
def __create_aggregator(dataset, formula, name, groups, dframe=None): # TODO this should work with index eventually columns = parse_columns(dataset, formula, name, dframe, no_index=True) dependent_columns = Parser.dependent_columns(formula, dataset) aggregation = Parser.parse_aggregation(formula) # get dframe with only the necessary columns select = combine_dicts({group: 1 for group in groups}, {col: 1 for col in dependent_columns}) # ensure at least one column (MONGO_ID) for the count aggregation query_args = QueryArgs(select=select or {MONGO_ID: 1}) dframe = dataset.dframe(query_args=query_args, keep_mongo_keys=not select) return Aggregator(dframe, groups, aggregation, name, columns)
def calculate_columns(dataset, calculations): """Calculate and store new columns for `calculations`. The new columns are join t othe Calculation dframe and replace the dataset's observations. .. note:: This can result in race-conditions when: - deleting ``controllers.Datasets.DELETE`` - updating ``controllers.Datasets.POST([dataset_id])`` Therefore, perform these actions asychronously. :param dataset: The dataset to calculate for. :param calculations: A list of calculations. """ new_cols = None for c in calculations: if c.aggregation: aggregator = __create_aggregator( dataset, c.formula, c.name, c.groups_as_list) aggregator.save(dataset) else: columns = parse_columns(dataset, c.formula, c.name) if new_cols is None: new_cols = DataFrame(columns[0]) else: new_cols = new_cols.join(columns[0]) if new_cols is not None: dataset.update_observations(new_cols) # propagate calculation to any merged child datasets [__propagate_column(x, dataset) for x in dataset.merged_datasets]