예제 #1
0
    def _test_calculator(self):
        self.dframe = self.dataset.dframe()
        row = self.dframe.irow(0)

        columns = self.dframe.columns.tolist()
        self.start_num_cols = len(columns)
        self.added_num_cols = 0

        column_labels_to_slugs = {
            column_attrs[Dataset.LABEL]: (column_name) for
            (column_name, column_attrs) in self.dataset.schema.items()
        }
        self.label_list, self.slugified_key_list = [
            list(ary) for ary in zip(*column_labels_to_slugs.items())
        ]

        for idx, formula in enumerate(self.calculations):
            name = 'test-%s' % idx
            self.parser.validate_formula(formula, row)

            calculator = Calculator(self.dataset)

            groups = self.dataset.split_groups(self.group)
            calculator.calculate_column(formula, name, groups)

            self.column_labels_to_slugs = self.dataset.schema.labels_to_slugs

            self._test_calculation_results(name, formula)
예제 #2
0
    def save(self, dataset, formula, name, group_str=None):
        """Parse, save, and calculate a formula.

        Validate `formula` and `group_str` for the given `dataset`. If the
        formula and group are valid for the dataset, then save a new
        calculation for them under `name`. Finally, create a background task
        to compute the calculation.

        Calculations are initially saved in a **pending** state, after the
        calculation has finished processing it will be in a **ready** state.

        :param dataset: The DataSet to save.
        :param formula: The formula to save.
        :param name: The name of the formula.
        :param group_str: Columns to group on.
        :type group_str: String, list or strings, or None.

        :raises: `ParseError` if an invalid formula was supplied.
        """
        calculator = Calculator(dataset)

        # ensure that the formula is parsable
        groups = self.split_groups(group_str) if group_str else []
        aggregation = calculator.validate(formula, groups)

        if aggregation:
            # set group if aggregation and group unset
            group_str = group_str or ''

            # check that name is unique for aggregation
            aggregated_dataset = dataset.aggregated_dataset(groups)

            if aggregated_dataset:
                name = self.__check_name_and_make_unique(name,
                                                         aggregated_dataset)

        else:
            # set group if aggregation and group unset
            name = self.__check_name_and_make_unique(name, dataset)

        record = {
            DATASET_ID: dataset.dataset_id,
            self.AGGREGATION: aggregation,
            self.FORMULA: formula,
            self.GROUP: group_str,
            self.NAME: name,
            self.STATE: self.STATE_PENDING,
        }
        super(self.__class__, self).save(record)

        return self
예제 #3
0
파일: dataset.py 프로젝트: helioid/bamboo
    def add_observations(self, json_data):
        """Update `dataset` with new `data`."""
        record = self.record
        update_id = uuid.uuid4().hex
        self.add_pending_update(update_id)

        new_data = json.loads(json_data)
        calculator = Calculator(self)

        new_dframe_raw = calculator.dframe_from_update(
            new_data, self.schema.labels_to_slugs)
        calculator._check_update_is_valid(new_dframe_raw)

        call_async(calculator.calculate_updates, calculator, new_data,
                   new_dframe_raw=new_dframe_raw, update_id=update_id)
예제 #4
0
파일: dataset.py 프로젝트: sparkplug/bamboo
    def add_observations(self, new_data):
        """Update `dataset` with `new_data`."""
        update_id = uuid.uuid4().hex
        self.add_pending_update(update_id)

        new_data = to_list(new_data)

        calculator = Calculator(self)

        new_dframe_raw = calculator.dframe_from_update(
            new_data, self.schema.labels_to_slugs)
        calculator._check_update_is_valid(new_dframe_raw)
        calculator.dataset.clear_cache()

        call_async(calculator.calculate_updates, calculator, new_data,
                   new_dframe_raw=new_dframe_raw, update_id=update_id)
예제 #5
0
파일: dataset.py 프로젝트: j/bamboo
    def add_observations(self, new_data):
        """Update `dataset` with `new_data`."""
        record = self.record
        update_id = uuid.uuid4().hex
        self.add_pending_update(update_id)

        if not isinstance(new_data, list):
            new_data = [new_data]

        calculator = Calculator(self)

        new_dframe_raw = calculator.dframe_from_update(
            new_data, self.schema.labels_to_slugs)
        calculator._check_update_is_valid(new_dframe_raw)

        call_async(calculator.calculate_updates, calculator, new_data,
                   new_dframe_raw=new_dframe_raw, update_id=update_id)
예제 #6
0
def calculate_task(calculations, dataset):
    """Background task to run a calculation.

    Set calculation to failed and raise if an exception occurs.

    :param calculation: Calculation to run.
    :param dataset: Dataset to run calculation on.
    """
    # block until other calculations for this dataset are finished
    calculations[0].restart_if_has_pending(dataset, calculations[1:])

    calculator = Calculator(dataset)
    calculator.calculate_columns(calculations)

    for calculation in calculations:
        calculation.add_dependencies(dataset, calculator.dependent_columns)

        if calculation.aggregation is not None:
            aggregated_id = dataset.aggregated_datasets_dict[calculation.group]
            calculation.set_aggregation_id(aggregated_id)

        calculation.ready()
예제 #7
0
def calculate_task(calculation, dataset):
    """Background task to run a calculation.

    Set calculation to failed and raise if an exception occurs.

    :param calculation: Calculation to run.
    :param dataset: Dataset to run calculation on.
    """
    # block until other calculations for this dataset are finished
    calculation.restart_if_has_pending(dataset)
    dataset.clear_summary_stats()

    calculator = Calculator(dataset)
    calculator.calculate_column(calculation.formula, calculation.name,
                                calculation.groups_as_list)
    calculation.add_dependencies(dataset, calculator.dependent_columns())

    if calculation.aggregation is not None:
        dataset.reload()
        aggregated_id = dataset.aggregated_datasets_dict[calculation.group]
        calculation.set_aggregation_id(aggregated_id)

    calculation.ready()