예제 #1
0
 def test_POST_remove_summary(self):
     Datasets().GET(self.dataset_id, mode=MODE_SUMMARY)
     dataset = Dataset.find_one(self.dataset_id)
     self.assertTrue(isinstance(dataset[STATS], dict))
     self.assertTrue(isinstance(dataset[STATS][ALL], dict))
     self._post_formula()
     # [STATS][ALL] should be removed
     dataset = Dataset.find_one(self.dataset_id)
     self.assertEqual(dataset[STATS].get(ALL), None)
예제 #2
0
    def test_build_schema(self):
        illegal_col_regex = re.compile(r'\W|[A-Z]')

        for dataset_name in self.TEST_DATASETS:
            dataset = Dataset.create(self.test_dataset_ids[dataset_name])
            Dataset.build_schema(dataset,
                    self.test_data[dataset_name].dtypes)

            # get dataset with new schema
            dataset = Dataset.find_one(self.test_dataset_ids[dataset_name])

            for key in [CREATED_AT, SCHEMA, UPDATED_AT]:
                self.assertTrue(key in dataset.keys())

            df_columns = self.test_data[dataset_name].columns.tolist()
            seen_columns = []

            for column_name, column_attributes in dataset[SCHEMA].items():
                # check column_name is unique
                self.assertFalse(column_name in seen_columns)
                seen_columns.append(column_name)

                # check column name is only legal chars
                self.assertFalse(illegal_col_regex.search(column_name))
                # check has require attributes
                self.assertTrue(SIMPLETYPE in column_attributes)
                self.assertTrue(OLAP_TYPE in column_attributes)
                self.assertTrue(LABEL in column_attributes)

                # check label is an original column
                self.assertTrue(column_attributes[LABEL] in df_columns)
                df_columns.remove(column_attributes[LABEL])

            # ensure all columns in df_columns have store columns
            self.assertTrue(len(df_columns) == 0)
예제 #3
0
 def test_update(self):
     for dataset_name in self.TEST_DATASETS:
         dataset = Dataset.create(self.test_dataset_ids[dataset_name])
         self.assertFalse('field' in dataset)
         Dataset.update(dataset, {'field': {'key': 'value'}})
         dataset = Dataset.find_one(self.test_dataset_ids[dataset_name])
         self.assertTrue('field' in dataset)
         self.assertEqual(dataset['field'], {'key': 'value'})
예제 #4
0
 def test_find(self):
     for dataset_name in self.TEST_DATASETS:
         record = Dataset.save(self.test_dataset_ids[dataset_name])
         cursor = Dataset.find(self.test_dataset_ids[dataset_name])
         rows = [x for x in cursor]
         self.assertTrue(isinstance(cursor, Cursor))
         self.assertEqual(record, rows[0])
         self.assertEqual(record, Dataset.find_one(
                     self.test_dataset_ids[dataset_name]))
예제 #5
0
파일: datasets.py 프로젝트: asseym/bamboo
    def DELETE(self, dataset_id):
        """
        Delete observations (i.e. the dataset) with hash *dataset_id* from mongo
        """
        dataset = Dataset.find_one(dataset_id)
        result = None

        if dataset:
            Dataset.delete(dataset_id)
            Observation.delete(dataset)
            result = {SUCCESS: 'deleted dataset: %s' % dataset_id}
        return dump_or_error(result, 'id not found')
예제 #6
0
 def _test_summary_no_group(self, results):
     result_keys = results.keys()
     print result_keys
     print self.test_data[self._file_name].columns.tolist()
     self.assertEqual(len(result_keys), self.NUM_COLS)
     columns = [col for col in
             self.test_data[self._file_name].columns.tolist()
             if not col in MONGO_RESERVED_KEYS]
     dataset = Dataset.find_one(self.dataset_id)
     labels_to_slugs = build_labels_to_slugs(dataset)
     for col in columns:
         slug = labels_to_slugs[col]
         self.assertTrue(slug in result_keys,
                 'col (slug): %s in: %s' % (slug, result_keys))
         self.assertTrue(SUMMARY in results[slug].keys())
예제 #7
0
파일: datasets.py 프로젝트: asseym/bamboo
    def GET(self, dataset_id, mode=False, query='{}', select=None,
            group=ALL):
        """
        Return data set for hash *dataset_id*.
        Execute query *query* in mongo if passed.
        If summary is passed return summary statistics for data set.
        If group is passed group the summary, if summary is false group is
        ignored.
        """
        dataset = Dataset.find_one(dataset_id)
        result = None

        try:
            if dataset:
                if mode == MODE_INFO:
                    result = Dataset.schema(dataset)
                elif mode == MODE_SUMMARY:
                    result = summarize(dataset, query, select, group)
                else:
                    return mongo_to_json(Observation.find(dataset, query,
                                select))
        except JSONError, e:
            result = {ERROR: e.__str__()}
예제 #8
0
    def _test_calculator(self, delay=True):
        dframe = Observation.find(self.dataset, as_df=True)

        columns = dframe.columns.tolist()
        start_num_cols = len(columns)
        added_num_cols = 0

        column_labels_to_slugs = build_labels_to_slugs(self.dataset)
        label_list, slugified_key_list = [list(ary) for ary in
                zip(*column_labels_to_slugs.items())]

        for idx, formula in enumerate(self.calculations):
            name = 'test-%s' % idx
            if delay:
                task = calculate_column.delay(self.dataset, dframe,
                        formula, name)
                # test that task has completed
                self.assertTrue(task.ready())
                self.assertTrue(task.successful())
            else:
                task = calculate_column(self.dataset, dframe,
                        formula, name)

            column_labels_to_slugs = build_labels_to_slugs(self.dataset)

            unslug_name = name
            name = column_labels_to_slugs[unslug_name]

            # test that updated dataframe persisted
            dframe = Observation.find(self.dataset, as_df=True)
            self.assertTrue(name in dframe.columns)

            # test new number of columns
            added_num_cols += 1
            self.assertEqual(start_num_cols + added_num_cols,
                    len(dframe.columns.tolist()))

            # test that the schema is up to date
            dataset = Dataset.find_one(self.dataset[DATASET_ID])
            self.assertTrue(SCHEMA in dataset.keys())
            self.assertTrue(isinstance(dataset[SCHEMA], dict))
            schema = dataset[SCHEMA]

            # test slugified column names
            slugified_key_list.append(name)
            self.assertEqual(sorted(schema.keys()), sorted(slugified_key_list))

            # test column labels
            label_list.append(unslug_name)
            labels = [schema[col][LABEL] for col in schema.keys()]
            self.assertEqual(sorted(labels), sorted(label_list))

            # test result of calculation
            formula = column_labels_to_slugs[formula]

            for idx, row in dframe.iterrows():
                try:
                    result = np.float64(row[name])
                    stored = np.float64(row[formula])
                    # np.nan != np.nan, continue if we have two nan values
                    if np.isnan(result) and np.isnan(stored):
                        continue
                    msg = self._equal_msg(result, stored, formula)
                    self.assertAlmostEqual(result, stored, self.places, msg)
                except ValueError:
                    msg = self._equal_msg(row[name], row[formula], formula)
                    self.assertEqual(row[name], row[formula], msg)