def _test_calculator(self, delay=True): dframe = Observation.find(self.dataset, as_df=True) columns = dframe.columns.tolist() start_num_cols = len(columns) added_num_cols = 0 column_labels_to_slugs = build_labels_to_slugs(self.dataset) label_list, slugified_key_list = [list(ary) for ary in zip(*column_labels_to_slugs.items())] for idx, formula in enumerate(self.calculations): name = 'test-%s' % idx if delay: task = calculate_column.delay(self.dataset, dframe, formula, name) # test that task has completed self.assertTrue(task.ready()) self.assertTrue(task.successful()) else: task = calculate_column(self.dataset, dframe, formula, name) column_labels_to_slugs = build_labels_to_slugs(self.dataset) unslug_name = name name = column_labels_to_slugs[unslug_name] # test that updated dataframe persisted dframe = Observation.find(self.dataset, as_df=True) self.assertTrue(name in dframe.columns) # test new number of columns added_num_cols += 1 self.assertEqual(start_num_cols + added_num_cols, len(dframe.columns.tolist())) # test that the schema is up to date dataset = Dataset.find_one(self.dataset[DATASET_ID]) self.assertTrue(SCHEMA in dataset.keys()) self.assertTrue(isinstance(dataset[SCHEMA], dict)) schema = dataset[SCHEMA] # test slugified column names slugified_key_list.append(name) self.assertEqual(sorted(schema.keys()), sorted(slugified_key_list)) # test column labels label_list.append(unslug_name) labels = [schema[col][LABEL] for col in schema.keys()] self.assertEqual(sorted(labels), sorted(label_list)) # test result of calculation formula = column_labels_to_slugs[formula] for idx, row in dframe.iterrows(): try: result = np.float64(row[name]) stored = np.float64(row[formula]) # np.nan != np.nan, continue if we have two nan values if np.isnan(result) and np.isnan(stored): continue msg = self._equal_msg(result, stored, formula) self.assertAlmostEqual(result, stored, self.places, msg) except ValueError: msg = self._equal_msg(row[name], row[formula], formula) self.assertEqual(row[name], row[formula], msg)
except ParseError, err: # do not save record, return error return {ERROR: err} record = { DATASET_ID: dataset[DATASET_ID], cls.FORMULA: formula, cls.NAME: name, } cls.collection.insert(record) # invalidate summary ALL since we have a new column stats = dataset.get(STATS) if stats: del stats[ALL] del dataset[STATS] Dataset.update(dataset, {STATS: stats}) # call remote calculate and pass calculation id calculate_column.delay(dataset, dframe, formula, name) return mongo_remove_reserved_keys(record) @classmethod def find(cls, dataset): """ Return the calculations for given *dataset*. """ return [mongo_remove_reserved_keys(record) for record in cls.collection.find({ DATASET_ID: dataset[DATASET_ID], })]