def test_dataset_data_source_unit(self): """This test creates a simple schema hierarchy, and tests updates, etc""" unit = UnitOfWork(None) schema1 = DataEntrySchema("base1") schema1.addAttr(FileDataType("file")) schema_id = unit.post(schema1) loc = Location(10.0, 11.0) loc.name = "Location" loc_id = unit.post(loc) dataset1 = Dataset() dataset1.schema = schema_id dataset1.location = loc_id dataset1_id = unit.post(dataset1) dataset2 = Dataset() dataset2.schema = schema_id dataset2.location = loc_id dataset2.data_source = DatasetDataSource(dataset1_id, "") dataset2_id = unit.post(dataset2) ret = self.service.commit(unit, None) found = False for r in ret: if isinstance(r, Dataset) and dataset1_id == r.correlationid: dataset1_id = r.id elif isinstance(r, Dataset) and dataset2_id == r.correlationid: self.assertEquals(dataset1_id, r.data_source.dataset_id, "Data source dataset_id was not updated") found = True self.assertTrue( found, "Didn't find the dataset with the dataset data source")
def test_dataset_persist(self): schema = DataEntrySchema("base1") schema.addAttr(FileDataType("file")) schema = self.service.persist(schema) loc = Location(10.0, 11.0) loc.name = "Location" loc = self.service.persist(loc) dataset = Dataset() dataset.schema = schema.id dataset.location = loc.id dataset1 = self.service.persist(dataset) self.assertEquals(1, dataset1.version) dataset1.version = 0 self.assertRaises(StaleObjectError, self.service.persist, dataset1) dataset1.version = 1 dataset2 = self.service.persist(dataset1) self.assertEquals(2, dataset2.version)
def test_data_types(self): schema1 = DatasetMetadataSchema("schema1") schema1.addAttr(FileDataType("file")) schema1a = self.service.persist(schema1) self.assertEquals(1, len(schema1a.attrs)) schema2 = DataEntrySchema("schema2") schema2.addAttr(FileDataType("file")) schema2.addAttr(Double("x")) schema2a = self.service.persist(schema2) loc = Location(10.0, 11.0) loca = self.service.persist(loc) dataset = Dataset() dataset.schema = schema1a.id dataset.location = loca.id # We've trying to use a dataset_metadata schema, so this should fail self.assertRaises(ValueError, self.service.persist, dataset) dataset.schema = schema2a.id # Now we're using the correct type of schema dataset1a = self.service.persist(dataset) dataset1b = self.service.get_dataset(dataset1a.id) self.assertEquals(dataset1a.id, dataset1b.id) self.assertDictEqual(dataset1a.__dict__, dataset1b.__dict__) # Update and add a data source dataset1b.data_source = PullDataSource( "http://www.abc.net.au", None, recursive=False, field="file", processing_script="TEST", sampling=PeriodicSampling(10000)) dataset1b.enabled = True dataset1c = self.service.persist(dataset1b) self.assertNotEqual(None, dataset1c.data_source) self.assertEqual("TEST", dataset1c.data_source.processing_script) self.assertNotEqual(None, dataset1c.data_source.sampling) datasets = self.service.get_active_datasets() self.assertEquals(1, len(datasets)) self.assertNotEqual(None, datasets[0].data_source) self.assertEqual("TEST", datasets[0].data_source.processing_script) self.assertNotEqual(None, datasets[0].data_source.sampling) # Test with criteria datasets = self.service.get_active_datasets(kind="pull_data_source") self.assertEquals(1, len(datasets)) datasets = self.service.get_active_datasets(kind="push_data_source") self.assertEquals(0, len(datasets)) schema1b = self.service.get_schema(schema1a.id) self.assertEquals(schema1a.id, schema1b.id) datasets = self.service.search("dataset") self.assertEquals(1, len(datasets)) schemas = self.service.search("data_entry_schema") self.assertEquals(1, len(schemas)) schemas = self.service.search("dataset_metadata_schema") self.assertEquals(1, len(schemas)) locs = self.service.search("location") self.assertEquals(1, len(locs)) # Test ingest data_entry_1 = DataEntry(dataset1b.id, datetime.datetime.now()) data_entry_1['x'] = 27.8 data_entry_1 = self.service.persist(data_entry_1) self.assertIsNotNone(data_entry_1.id)