def _test_calculator(self): self.dframe = self.dataset.dframe() row = self.dframe.irow(0) columns = self.dframe.columns.tolist() self.start_num_cols = len(columns) self.added_num_cols = 0 column_labels_to_slugs = { column_attrs[Dataset.LABEL]: (column_name) for (column_name, column_attrs) in self.dataset.schema.items() } self.label_list, self.slugified_key_list = [ list(ary) for ary in zip(*column_labels_to_slugs.items()) ] for idx, formula in enumerate(self.calculations): name = 'test-%s' % idx self.parser.validate_formula(formula, row) calculator = Calculator(self.dataset) groups = self.dataset.split_groups(self.group) calculator.calculate_column(formula, name, groups) self.column_labels_to_slugs = self.dataset.schema.labels_to_slugs self._test_calculation_results(name, formula)
def save(self, dataset, formula, name, group_str=None): """Parse, save, and calculate a formula. Validate `formula` and `group_str` for the given `dataset`. If the formula and group are valid for the dataset, then save a new calculation for them under `name`. Finally, create a background task to compute the calculation. Calculations are initially saved in a **pending** state, after the calculation has finished processing it will be in a **ready** state. :param dataset: The DataSet to save. :param formula: The formula to save. :param name: The name of the formula. :param group_str: Columns to group on. :type group_str: String, list or strings, or None. :raises: `ParseError` if an invalid formula was supplied. """ calculator = Calculator(dataset) # ensure that the formula is parsable groups = self.split_groups(group_str) if group_str else [] aggregation = calculator.validate(formula, groups) if aggregation: # set group if aggregation and group unset group_str = group_str or '' # check that name is unique for aggregation aggregated_dataset = dataset.aggregated_dataset(groups) if aggregated_dataset: name = self.__check_name_and_make_unique(name, aggregated_dataset) else: # set group if aggregation and group unset name = self.__check_name_and_make_unique(name, dataset) record = { DATASET_ID: dataset.dataset_id, self.AGGREGATION: aggregation, self.FORMULA: formula, self.GROUP: group_str, self.NAME: name, self.STATE: self.STATE_PENDING, } super(self.__class__, self).save(record) return self
def add_observations(self, json_data): """Update `dataset` with new `data`.""" record = self.record update_id = uuid.uuid4().hex self.add_pending_update(update_id) new_data = json.loads(json_data) calculator = Calculator(self) new_dframe_raw = calculator.dframe_from_update( new_data, self.schema.labels_to_slugs) calculator._check_update_is_valid(new_dframe_raw) call_async(calculator.calculate_updates, calculator, new_data, new_dframe_raw=new_dframe_raw, update_id=update_id)
def add_observations(self, new_data): """Update `dataset` with `new_data`.""" update_id = uuid.uuid4().hex self.add_pending_update(update_id) new_data = to_list(new_data) calculator = Calculator(self) new_dframe_raw = calculator.dframe_from_update( new_data, self.schema.labels_to_slugs) calculator._check_update_is_valid(new_dframe_raw) calculator.dataset.clear_cache() call_async(calculator.calculate_updates, calculator, new_data, new_dframe_raw=new_dframe_raw, update_id=update_id)
def add_observations(self, new_data): """Update `dataset` with `new_data`.""" record = self.record update_id = uuid.uuid4().hex self.add_pending_update(update_id) if not isinstance(new_data, list): new_data = [new_data] calculator = Calculator(self) new_dframe_raw = calculator.dframe_from_update( new_data, self.schema.labels_to_slugs) calculator._check_update_is_valid(new_dframe_raw) call_async(calculator.calculate_updates, calculator, new_data, new_dframe_raw=new_dframe_raw, update_id=update_id)
def calculate_task(calculations, dataset): """Background task to run a calculation. Set calculation to failed and raise if an exception occurs. :param calculation: Calculation to run. :param dataset: Dataset to run calculation on. """ # block until other calculations for this dataset are finished calculations[0].restart_if_has_pending(dataset, calculations[1:]) calculator = Calculator(dataset) calculator.calculate_columns(calculations) for calculation in calculations: calculation.add_dependencies(dataset, calculator.dependent_columns) if calculation.aggregation is not None: aggregated_id = dataset.aggregated_datasets_dict[calculation.group] calculation.set_aggregation_id(aggregated_id) calculation.ready()
def calculate_task(calculation, dataset): """Background task to run a calculation. Set calculation to failed and raise if an exception occurs. :param calculation: Calculation to run. :param dataset: Dataset to run calculation on. """ # block until other calculations for this dataset are finished calculation.restart_if_has_pending(dataset) dataset.clear_summary_stats() calculator = Calculator(dataset) calculator.calculate_column(calculation.formula, calculation.name, calculation.groups_as_list) calculation.add_dependencies(dataset, calculator.dependent_columns()) if calculation.aggregation is not None: dataset.reload() aggregated_id = dataset.aggregated_datasets_dict[calculation.group] calculation.set_aggregation_id(aggregated_id) calculation.ready()