コード例 #1
0
ファイル: calculator.py プロジェクト: astrorobotics/bamboo
    def _update_aggregate_dataset(self, formula, new_dframe, name, groups,
                                  agg_dataset):
        """Update the aggregated dataset built for `self` with `calculation`.

        Proceed with the following steps:

            - delete the rows in this dataset from the parent
            - recalculate aggregated dataframe from aggregation
            - update aggregated dataset with new dataframe and add parent id
            - recur on all merged datasets descending from the aggregated
              dataset

        :param formula: The formula to execute.
        :param new_dframe: The DataFrame to aggregate on.
        :param name: The name of the aggregation.
        :param groups: A column or columns to group on.
        :type group: String, list of strings, or None.
        :param agg_dataset: The DataSet to store the aggregation in.
        """
        # parse aggregation and build column arguments
        aggregation, new_columns = self.make_columns(
            formula, name, new_dframe)

        agg = Aggregator(self.dataset, self.dframe,
                         groups, aggregation, name)
        new_agg_dframe = agg.update(agg_dataset, self, formula, new_columns)

        # jsondict from new dframe
        new_data = new_agg_dframe.to_jsondict()

        for merged_dataset in agg_dataset.merged_datasets:
            # remove rows in child from this merged dataset
            merged_dataset.remove_parent_observations(
                agg_dataset.dataset_id)

            # calculate updates on the child
            merged_calculator = Calculator(merged_dataset)
            call_async(merged_calculator.calculate_updates, merged_calculator,
                       new_data, parent_dataset_id=agg_dataset.dataset_id)
コード例 #2
0
ファイル: calculator.py プロジェクト: astrorobotics/bamboo
    def calculate_column(self, formula, name, groups=None):
        """Calculate a new column based on `formula` store as `name`.

        The new column is joined to `dframe` and stored in `self.dataset`.
        The `group_str` is only applicable to aggregations and groups for
        aggregations.

        .. note::

            This can result in race-conditions when:

            - deleting ``controllers.Datasets.DELETE``
            - updating ``controllers.Datasets.POST([dataset_id])``

            Therefore, perform these actions asychronously.

        :param formula: The formula parsed by `self.parser` and applied to
            `self.dframe`.
        :param name: The name of the new column or aggregate column.
        :param groups: A list of columns to group on for aggregate
            calculations.
        """
        self._ensure_dframe()

        aggregation, new_columns = self.make_columns(formula, name)

        if aggregation:
            agg = Aggregator(self.dataset, self.dataset.dframe(),
                             groups, aggregation, name)
            agg.save(new_columns)
        else:
            self.dataset.replace_observations(self.dframe.join(new_columns[0]))

        # propagate calculation to any merged child datasets
        for merged_dataset in self.dataset.merged_datasets:
            merged_calculator = Calculator(merged_dataset)
            merged_calculator.propagate_column(self.dataset)
コード例 #3
0
def __create_aggregator(dataset, formula, name, groups, dframe=None):
    # TODO this should work with index eventually
    columns = parse_columns(dataset, formula, name, dframe, no_index=True)

    dependent_columns = Parser.dependent_columns(formula, dataset)
    aggregation = Parser.parse_aggregation(formula)

    # get dframe with only the necessary columns
    select = combine_dicts({group: 1 for group in groups},
                           {col: 1 for col in dependent_columns})

    # ensure at least one column (MONGO_ID) for the count aggregation
    query_args = QueryArgs(select=select or {MONGO_ID: 1})
    dframe = dataset.dframe(query_args=query_args, keep_mongo_keys=not select)

    return Aggregator(dframe, groups, aggregation, name, columns)