def test_POST_remove_summary(self): Datasets().GET(self.dataset_id, mode=MODE_SUMMARY) dataset = Dataset.find_one(self.dataset_id) self.assertTrue(isinstance(dataset[STATS], dict)) self.assertTrue(isinstance(dataset[STATS][ALL], dict)) self._post_formula() # [STATS][ALL] should be removed dataset = Dataset.find_one(self.dataset_id) self.assertEqual(dataset[STATS].get(ALL), None)
def test_build_schema(self): illegal_col_regex = re.compile(r'\W|[A-Z]') for dataset_name in self.TEST_DATASETS: dataset = Dataset.create(self.test_dataset_ids[dataset_name]) Dataset.build_schema(dataset, self.test_data[dataset_name].dtypes) # get dataset with new schema dataset = Dataset.find_one(self.test_dataset_ids[dataset_name]) for key in [CREATED_AT, SCHEMA, UPDATED_AT]: self.assertTrue(key in dataset.keys()) df_columns = self.test_data[dataset_name].columns.tolist() seen_columns = [] for column_name, column_attributes in dataset[SCHEMA].items(): # check column_name is unique self.assertFalse(column_name in seen_columns) seen_columns.append(column_name) # check column name is only legal chars self.assertFalse(illegal_col_regex.search(column_name)) # check has require attributes self.assertTrue(SIMPLETYPE in column_attributes) self.assertTrue(OLAP_TYPE in column_attributes) self.assertTrue(LABEL in column_attributes) # check label is an original column self.assertTrue(column_attributes[LABEL] in df_columns) df_columns.remove(column_attributes[LABEL]) # ensure all columns in df_columns have store columns self.assertTrue(len(df_columns) == 0)
def test_update(self): for dataset_name in self.TEST_DATASETS: dataset = Dataset.create(self.test_dataset_ids[dataset_name]) self.assertFalse('field' in dataset) Dataset.update(dataset, {'field': {'key': 'value'}}) dataset = Dataset.find_one(self.test_dataset_ids[dataset_name]) self.assertTrue('field' in dataset) self.assertEqual(dataset['field'], {'key': 'value'})
def test_find(self): for dataset_name in self.TEST_DATASETS: record = Dataset.save(self.test_dataset_ids[dataset_name]) cursor = Dataset.find(self.test_dataset_ids[dataset_name]) rows = [x for x in cursor] self.assertTrue(isinstance(cursor, Cursor)) self.assertEqual(record, rows[0]) self.assertEqual(record, Dataset.find_one( self.test_dataset_ids[dataset_name]))
def DELETE(self, dataset_id): """ Delete observations (i.e. the dataset) with hash *dataset_id* from mongo """ dataset = Dataset.find_one(dataset_id) result = None if dataset: Dataset.delete(dataset_id) Observation.delete(dataset) result = {SUCCESS: 'deleted dataset: %s' % dataset_id} return dump_or_error(result, 'id not found')
def _test_summary_no_group(self, results): result_keys = results.keys() print result_keys print self.test_data[self._file_name].columns.tolist() self.assertEqual(len(result_keys), self.NUM_COLS) columns = [col for col in self.test_data[self._file_name].columns.tolist() if not col in MONGO_RESERVED_KEYS] dataset = Dataset.find_one(self.dataset_id) labels_to_slugs = build_labels_to_slugs(dataset) for col in columns: slug = labels_to_slugs[col] self.assertTrue(slug in result_keys, 'col (slug): %s in: %s' % (slug, result_keys)) self.assertTrue(SUMMARY in results[slug].keys())
def GET(self, dataset_id, mode=False, query='{}', select=None, group=ALL): """ Return data set for hash *dataset_id*. Execute query *query* in mongo if passed. If summary is passed return summary statistics for data set. If group is passed group the summary, if summary is false group is ignored. """ dataset = Dataset.find_one(dataset_id) result = None try: if dataset: if mode == MODE_INFO: result = Dataset.schema(dataset) elif mode == MODE_SUMMARY: result = summarize(dataset, query, select, group) else: return mongo_to_json(Observation.find(dataset, query, select)) except JSONError, e: result = {ERROR: e.__str__()}
def _test_calculator(self, delay=True): dframe = Observation.find(self.dataset, as_df=True) columns = dframe.columns.tolist() start_num_cols = len(columns) added_num_cols = 0 column_labels_to_slugs = build_labels_to_slugs(self.dataset) label_list, slugified_key_list = [list(ary) for ary in zip(*column_labels_to_slugs.items())] for idx, formula in enumerate(self.calculations): name = 'test-%s' % idx if delay: task = calculate_column.delay(self.dataset, dframe, formula, name) # test that task has completed self.assertTrue(task.ready()) self.assertTrue(task.successful()) else: task = calculate_column(self.dataset, dframe, formula, name) column_labels_to_slugs = build_labels_to_slugs(self.dataset) unslug_name = name name = column_labels_to_slugs[unslug_name] # test that updated dataframe persisted dframe = Observation.find(self.dataset, as_df=True) self.assertTrue(name in dframe.columns) # test new number of columns added_num_cols += 1 self.assertEqual(start_num_cols + added_num_cols, len(dframe.columns.tolist())) # test that the schema is up to date dataset = Dataset.find_one(self.dataset[DATASET_ID]) self.assertTrue(SCHEMA in dataset.keys()) self.assertTrue(isinstance(dataset[SCHEMA], dict)) schema = dataset[SCHEMA] # test slugified column names slugified_key_list.append(name) self.assertEqual(sorted(schema.keys()), sorted(slugified_key_list)) # test column labels label_list.append(unslug_name) labels = [schema[col][LABEL] for col in schema.keys()] self.assertEqual(sorted(labels), sorted(label_list)) # test result of calculation formula = column_labels_to_slugs[formula] for idx, row in dframe.iterrows(): try: result = np.float64(row[name]) stored = np.float64(row[formula]) # np.nan != np.nan, continue if we have two nan values if np.isnan(result) and np.isnan(stored): continue msg = self._equal_msg(result, stored, formula) self.assertAlmostEqual(result, stored, self.places, msg) except ValueError: msg = self._equal_msg(row[name], row[formula], formula) self.assertEqual(row[name], row[formula], msg)