def test_find_one(self):
        for dataset_name in self.TEST_DATASETS:
            dataset = Dataset.create(self.test_dataset_ids[dataset_name])
            record = dataset.record
            row = Dataset.find_one(self.test_dataset_ids[dataset_name])

            self.assertEqual(record, row.record)
Example #2
0
    def test_find_one(self):
        for dataset_name in self.TEST_DATASETS:
            dataset = Dataset.create(self.test_dataset_ids[dataset_name])
            record = dataset.record
            row = Dataset.find_one(self.test_dataset_ids[dataset_name])

            self.assertEqual(record, row.record)
Example #3
0
 def test_delete(self):
     for dataset_name in self.TEST_DATASETS:
         dataset = Dataset.create(self.test_dataset_ids[dataset_name])
         records = Dataset.find(self.test_dataset_ids[dataset_name])
         self.assertNotEqual(records, [])
         dataset.delete()
         records = Dataset.find(self.test_dataset_ids[dataset_name])
         self.assertEqual(records, [])
Example #4
0
 def test_update(self):
     for dataset_name in self.TEST_DATASETS:
         dataset = Dataset.create(self.test_dataset_ids[dataset_name])
         self.assertFalse('field' in dataset.record)
         dataset.update({'field': {'key': 'value'}})
         dataset = Dataset.find_one(self.test_dataset_ids[dataset_name])
         self.assertTrue('field' in dataset.record)
         self.assertEqual(dataset.record['field'], {'key': 'value'})
    def test_update(self):
        for dataset_name in self.TEST_DATASETS:
            dataset = Dataset.create(self.test_dataset_ids[dataset_name])
            self.assertFalse('field' in dataset.record)
            dataset.update({'field': {'key': 'value'}})
            dataset = Dataset.find_one(self.test_dataset_ids[dataset_name])

            self.assertTrue('field' in dataset.record)
            self.assertEqual(dataset.record['field'], {'key': 'value'})
Example #6
0
    def test_update(self):
        for dataset_name in self.TEST_DATASETS:
            dataset = Dataset.create(self.test_dataset_ids[dataset_name])
            self.assertFalse("field" in dataset.record)
            dataset.update({"field": {"key": "value"}})
            dataset = Dataset.find_one(self.test_dataset_ids[dataset_name])

            self.assertTrue("field" in dataset.record)
            self.assertEqual(dataset.record["field"], {"key": "value"})
    def test_delete(self):
        for dataset_name in self.TEST_DATASETS:
            dataset = Dataset.create(self.test_dataset_ids[dataset_name])
            records = Dataset.find(self.test_dataset_ids[dataset_name])
            self.assertNotEqual(records, [])
            dataset.delete()
            records = Dataset.find(self.test_dataset_ids[dataset_name])

            self.assertEqual(records, [])
            self.assertEqual(Observation.encoding(dataset), None)
Example #8
0
    def test_dframe(self):
        dataset = Dataset.create(self.test_dataset_ids["good_eats.csv"])
        dataset.save_observations(recognize_dates(self.get_data("good_eats.csv")))
        dframe = dataset.dframe()

        self.assertTrue(isinstance(dframe, DataFrame))
        self.assertTrue(all(self.get_data("good_eats.csv").reindex(columns=dframe.columns).eq(dframe)))
        columns = dframe.columns

        # ensure no reserved keys
        self.assertFalse(MONGO_ID_ENCODED in columns)

        # ensure date is converted
        self.assertTrue(isinstance(dframe.submit_date[0], datetime))
    def test_dframe(self):
        dataset = Dataset.create(self.test_dataset_ids['good_eats.csv'])
        dataset.save_observations(
            recognize_dates(self.get_data('good_eats.csv')))
        dframe = dataset.dframe()

        self.assertTrue(isinstance(dframe, DataFrame))
        self.assertTrue(all(self.get_data('good_eats.csv').reindex(
                        columns=dframe.columns).eq(dframe)))
        columns = dframe.columns

        # ensure no reserved keys
        self.assertFalse(MONGO_ID_ENCODED in columns)

        # ensure date is converted
        self.assertTrue(isinstance(dframe.submit_date[0], datetime))
Example #10
0
    def test_dframe(self):
        dataset = Dataset.create(self.test_dataset_ids['good_eats.csv'])
        dataset.save_observations(
            recognize_dates(self.get_data('good_eats.csv')))
        records = [x for x in Observation.find(dataset)]
        dframe = dataset.dframe()

        self.assertTrue(isinstance(dframe, DataFrame))
        self.assertTrue(all(self.get_data('good_eats.csv').reindex(
                        columns=dframe.columns).eq(dframe)))
        columns = dframe.columns
        # ensure no reserved keys
        for key in MONGO_RESERVED_KEY_STRS:
            self.assertFalse(key in columns)
        # ensure date is converted
        self.assertTrue(isinstance(dframe.submit_date[0], datetime))
    def test_build_schema(self):
        for dataset_name in self.TEST_DATASETS:
            dataset = Dataset.create(self.test_dataset_ids[dataset_name])
            dataset.build_schema(self.get_data(dataset_name))

            # get dataset with new schema
            dataset = Dataset.find_one(self.test_dataset_ids[dataset_name])

            for key in [
                    Dataset.CREATED_AT, Dataset.SCHEMA, Dataset.UPDATED_AT]:
                self.assertTrue(key in dataset.record.keys())

            df_columns = self.get_data(dataset_name).columns.tolist()
            seen_columns = []

            for column_name, column_attributes in dataset.schema.items():
                # check column_name is unique
                self.assertFalse(column_name in seen_columns)
                seen_columns.append(column_name)

                # check column name is only legal chars
                self.assertFalse(RE_ENCODED_COLUMN.search(column_name))

                # check has require attributes
                self.assertTrue(SIMPLETYPE in column_attributes)
                self.assertTrue(OLAP_TYPE in column_attributes)
                self.assertTrue(Dataset.LABEL in column_attributes)

                # check label is an original column
                original_col = column_attributes[Dataset.LABEL]
                error_msg = '%s not in %s' % (original_col, df_columns)
                self.assertTrue(original_col in df_columns, error_msg)
                df_columns.remove(column_attributes[Dataset.LABEL])

                # check not reserved key
                self.assertFalse(column_name == MONGO_ID_ENCODED)

            # ensure all columns in df_columns have store columns
            self.assertTrue(len(df_columns) == 0)
Example #12
0
    def test_build_schema(self):
        for dataset_name in self.TEST_DATASETS:
            dataset = Dataset.create(self.test_dataset_ids[dataset_name])
            dataset.build_schema(self.get_data(dataset_name))

            # get dataset with new schema
            dataset = Dataset.find_one(self.test_dataset_ids[dataset_name])

            for key in [
                    Dataset.CREATED_AT, Dataset.SCHEMA, Dataset.UPDATED_AT]:
                self.assertTrue(key in dataset.record.keys())

            df_columns = self.get_data(dataset_name).columns.tolist()
            seen_columns = []

            for column_name, column_attributes in dataset.schema.items():
                # check column_name is unique
                self.assertFalse(column_name in seen_columns)
                seen_columns.append(column_name)

                # check column name is only legal chars
                self.assertFalse(RE_ENCODED_COLUMN.search(column_name))

                # check has require attributes
                self.assertTrue(SIMPLETYPE in column_attributes)
                self.assertTrue(OLAP_TYPE in column_attributes)
                self.assertTrue(Dataset.LABEL in column_attributes)

                # check label is an original column
                original_col = column_attributes[Dataset.LABEL]
                error_msg = '%s not in %s' % (original_col, df_columns)
                self.assertTrue(original_col in df_columns, error_msg)
                df_columns.remove(column_attributes[Dataset.LABEL])

                # check not reserved key
                self.assertFalse(column_name in MONGO_RESERVED_KEY_STRS)

            # ensure all columns in df_columns have store columns
            self.assertTrue(len(df_columns) == 0)
Example #13
0
def merge_dataset_ids(dataset_ids, mapping):
    """Load a JSON array of dataset IDs and start a background merge task.

    :param dataset_ids: An array of dataset IDs to merge.

    :raises: `MergeError` if less than 2 datasets are provided. If a dataset
        cannot be found for a dataset ID it is ignored. Therefore if 2 dataset
        IDs are provided and one of them is bad an error is raised.  However,
        if three dataset IDs are provided and one of them is bad, an error is
        not raised.
    """
    datasets = [Dataset.find_one(dataset_id) for dataset_id in dataset_ids]
    datasets = [dataset for dataset in datasets if dataset.record]

    if len(datasets) < 2:
        raise MergeError(
            'merge requires 2 datasets (found %s)' % len(datasets))

    new_dataset = Dataset.create()

    call_async(__merge_datasets_task, new_dataset, datasets, mapping)

    return new_dataset
Example #14
0
def merge_dataset_ids(dataset_ids, mapping):
    """Load a JSON array of dataset IDs and start a background merge task.

    :param dataset_ids: An array of dataset IDs to merge.

    :raises: `MergeError` if less than 2 datasets are provided. If a dataset
        cannot be found for a dataset ID it is ignored. Therefore if 2 dataset
        IDs are provided and one of them is bad an error is raised.  However,
        if three dataset IDs are provided and one of them is bad, an error is
        not raised.
    """
    datasets = [Dataset.find_one(dataset_id) for dataset_id in dataset_ids]
    datasets = [dataset for dataset in datasets if dataset.record]

    if len(datasets) < 2:
        raise MergeError('merge requires 2 datasets (found %s)' %
                         len(datasets))

    new_dataset = Dataset.create()

    call_async(__merge_datasets_task, new_dataset, datasets, mapping)

    return new_dataset
Example #15
0
 def _post_file(self, file_name='good_eats.csv'):
     dataset = Dataset.create()
     return dataset.import_from_csv(
         self._file_mock(self._fixture_path_prefix(file_name))).dataset_id
Example #16
0
 def _create_dataset_from_url(self, url):
     dataset = Dataset.create()
     return dataset.import_from_url(url, allow_local_file=True).dataset_id
    def test_create(self):
        for dataset_name in self.TEST_DATASETS:
            dataset = Dataset.create(self.test_dataset_ids[dataset_name])

            self.assertTrue(isinstance(dataset, Dataset))
Example #18
0
    def test_create(self):
        for dataset_name in self.TEST_DATASETS:
            dataset = Dataset.create(self.test_dataset_ids[dataset_name])

            self.assertTrue(isinstance(dataset, Dataset))
Example #19
0
    def test_count(self):
        dataset = Dataset.create(self.test_dataset_ids["good_eats.csv"])
        dataset.save_observations(recognize_dates(self.get_data("good_eats.csv")))

        self.assertEqual(len(dataset.dframe()), dataset.count())
    def test_count(self):
        dataset = Dataset.create(self.test_dataset_ids['good_eats.csv'])
        dataset.save_observations(
            recognize_dates(self.get_data('good_eats.csv')))

        self.assertEqual(len(dataset.dframe()), dataset.count())
Example #21
0
 def _create_dataset_from_url(self, url):
     dataset = Dataset.create()
     return dataset.import_from_url(url, allow_local_file=True).dataset_id
Example #22
0
 def _post_file(self, file_name='good_eats.csv'):
     dataset = Dataset.create()
     return dataset.import_from_csv(
         self._file_mock(self._fixture_path_prefix(file_name))).dataset_id