def test_makeGraphSelect(self): """Test for makeGraph() implementation with subset of data. """ taskFactory = TaskFactoryMock() reg = Registry.fromConfig(RegistryConfig(), SchemaConfig()) gbuilder = GraphBuilder(taskFactory, reg) pipeline = self._makePipeline() collection = "" userQuery = "1 = 1" coll = DatasetOriginInfoDef([collection], collection) graph = gbuilder.makeGraph(pipeline, coll, userQuery) self.assertEqual(len(graph), 2) taskDef = graph[0].taskDef self.assertEqual(taskDef.taskName, "TaskOne") self.assertEqual(taskDef.taskClass, TaskOne) # TODO: temporary until we implement makeGraph() # quanta = graph[0].quanta # self.assertEqual(len(quanta), 3) # for quantum in quanta: # self._checkQuantum(quantum.inputs, Dataset1, [1, 5, 9]) # self._checkQuantum(quantum.outputs, Dataset2, [1, 5, 9]) taskDef = graph[1].taskDef self.assertEqual(taskDef.taskName, "TaskTwo") self.assertEqual(taskDef.taskClass, TaskTwo)
def loadDimensionData() -> DataCoordinateSequence: """Load dimension data from an export file included in the code repository. Returns ------- dataIds : `DataCoordinateSet` A set containing all data IDs in the export file. """ # Create an in-memory SQLite database and Registry just to import the YAML # data and retreive it as a set of DataCoordinate objects. config = RegistryConfig() config["db"] = "sqlite://" registry = Registry.fromConfig(config, create=True) with open(DIMENSION_DATA_FILE, 'r') as stream: backend = YamlRepoImportBackend(stream, registry) backend.register() backend.load(datastore=None) dimensions = DimensionGraph(registry.dimensions, names=["visit", "detector", "tract", "patch"]) return DataCoordinateSequence( dataIds=tuple(registry.queryDimensions(dimensions, expand=True)), graph=dimensions, hasFull=True, hasRecords=True, )
def test_makeGraph(self): """Test for makeGraph() implementation. """ taskFactory = TaskFactoryMock() reg = Registry.fromConfig(RegistryConfig(), SchemaConfig()) gbuilder = GraphBuilder(taskFactory, reg) pipeline = self._makePipeline() collection = "" userQuery = None coll = DatasetOriginInfoDef([collection], collection) graph = gbuilder.makeGraph(pipeline, coll, userQuery) self.assertEqual(len(graph), 2) taskDef = graph[0].taskDef self.assertEqual(taskDef.taskName, "TaskOne") self.assertEqual(taskDef.taskClass, TaskOne) # TODO: temporary until we add some content to regitry # quanta = graph[0].quanta # self.assertEqual(len(quanta), 10) # for quantum in quanta: # self._checkQuantum(quantum.inputs, Dataset1, range(10)) # self._checkQuantum(quantum.outputs, Dataset2, range(10)) taskDef = graph[1].taskDef self.assertEqual(taskDef.taskName, "TaskTwo") self.assertEqual(taskDef.taskClass, TaskTwo)
def test_register(self): """Test that register() sets appropriate Dimensions. """ registryConfigPath = os.path.join(getPackageDir("daf_butler"), "tests/config/basic/butler.yaml") registry = Registry.fromConfig(ButlerConfig(registryConfigPath)) # check that the registry starts out empty self.assertEqual(list(registry.queryDimensions(["instrument"])), []) self.assertEqual(list(registry.queryDimensions(["detector"])), []) self.assertEqual(list(registry.queryDimensions(["physical_filter"])), []) # register the instrument and check that certain dimensions appear self.instrument.register(registry) instrumentDataIds = list(registry.queryDimensions(["instrument"])) self.assertEqual(len(instrumentDataIds), 1) instrumentNames = { dataId["instrument"] for dataId in instrumentDataIds } self.assertEqual(instrumentNames, {self.data.name}) detectorDataIds = list(registry.queryDimensions(["detector"])) self.assertEqual(len(detectorDataIds), self.data.nDetectors) detectorNames = { dataId.records["detector"].full_name for dataId in detectorDataIds } self.assertIn(self.data.firstDetectorName, detectorNames) physicalFilterDataIds = list( registry.queryDimensions(["physical_filter"])) filterNames = { dataId['physical_filter'] for dataId in physicalFilterDataIds } self.assertGreaterEqual(filterNames, self.data.physical_filters)
def testTransfer(self): metrics = makeExampleMetrics() dataUnits = frozenset(("visit", "filter")) dataId = {"visit": 2048, "filter": "Uprime"} sc = self.storageClassFactory.getStorageClass("StructuredData") ref = self.makeDatasetRef("metric", dataUnits, sc, dataId) inputConfig = DatastoreConfig(self.configFile) inputConfig['datastore.root'] = os.path.join(self.testDir, "./test_input_datastore") inputPosixDatastore = PosixDatastore(config=inputConfig, registry=self.registry) outputConfig = inputConfig.copy() outputConfig['datastore.root'] = os.path.join( self.testDir, "./test_output_datastore") outputPosixDatastore = PosixDatastore(config=outputConfig, registry=Registry.fromConfig( self.configFile)) inputPosixDatastore.put(metrics, ref) outputPosixDatastore.transfer(inputPosixDatastore, ref) metricsOut = outputPosixDatastore.get(ref) self.assertEqual(metrics, metricsOut)
def test_makeFullIODatasetTypes(self): """Test for _makeFullIODatasetTypes() implementation. """ taskFactory = TaskFactoryMock() reg = Registry.fromConfig(RegistryConfig(), SchemaConfig()) gbuilder = GraphBuilder(taskFactory, reg) # build a pipeline tasks = self._makePipeline() # collect inputs/outputs from each task taskDatasets = [] for taskDef in tasks: taskClass = taskDef.taskClass taskInputs = taskClass.getInputDatasetTypes(taskDef.config) or {} taskInputs = [dsTypeDescr.datasetType for dsTypeDescr in taskInputs.values()] taskOutputs = taskClass.getOutputDatasetTypes(taskDef.config) or {} taskOutputs = [dsTypeDescr.datasetType for dsTypeDescr in taskOutputs.values()] taskInitInputs = taskClass.getInitInputDatasetTypes(taskDef.config) or {} taskInitInputs = [dsTypeDescr.datasetType for dsTypeDescr in taskInitInputs.values()] taskInitOutputs = taskClass.getInitOutputDatasetTypes(taskDef.config) or {} taskInitOutputs = [dsTypeDescr.datasetType for dsTypeDescr in taskInitOutputs.values()] taskDatasets.append(_TaskDatasetTypes(taskDef=taskDef, inputs=taskInputs, outputs=taskOutputs, initInputs=taskInitInputs, initOutputs=taskInitOutputs)) # make inputs and outputs from per-task dataset types inputs, outputs, initInputs, initOutputs = gbuilder._makeFullIODatasetTypes(taskDatasets) self.assertIsInstance(inputs, set) self.assertIsInstance(outputs, set) self.assertIsInstance(initInputs, set) self.assertIsInstance(initOutputs, set) self.assertEqual([x.name for x in inputs], ["input"]) self.assertEqual(set(x.name for x in outputs), set(["output", "output2"])) self.assertEqual([x.name for x in initInputs], ['initInput']) self.assertEqual([x.name for x in initOutputs], ['initOutputs'])
def testTransfer(self): catalog = self.makeExampleCatalog() dataUnits = frozenset(("visit", "filter")) dataId = {"visit": 12345, "filter": "red"} storageClass = self.storageClassFactory.getStorageClass("SourceCatalog") ref = self.makeDatasetRef("calexp", dataUnits, storageClass, dataId) inputConfig = DatastoreConfig(self.configFile) inputConfig['datastore.root'] = os.path.join(self.testDir, "./test_input_datastore") inputPosixDatastore = PosixDatastore(config=inputConfig, registry=self.registry) outputConfig = inputConfig.copy() outputConfig['datastore.root'] = os.path.join(self.testDir, "./test_output_datastore") outputPosixDatastore = PosixDatastore(config=outputConfig, registry=Registry.fromConfig(self.configFile)) inputPosixDatastore.put(catalog, ref) outputPosixDatastore.transfer(inputPosixDatastore, ref) catalogOut = outputPosixDatastore.get(ref) self.assertCatalogEqual(catalog, catalogOut)
def test_register(self): """Test that register() sets appropriate Dimensions. """ registryConfigPath = os.path.join(getPackageDir("daf_butler"), "tests/config/basic/butler.yaml") registry = Registry.fromConfig(ButlerConfig(registryConfigPath)) # check that the registry starts out empty self.assertEqual(registry.findDimensionEntries('instrument'), []) self.assertEqual(registry.findDimensionEntries('detector'), []) self.assertEqual(registry.findDimensionEntries('physical_filter'), []) # register the instrument and check that certain dimensions appear self.instrument.register(registry) self.assertEqual(len(registry.findDimensionEntries('instrument')), 1) self.assertEqual( registry.findDimensionEntries('instrument')[0]['instrument'], self.data.name) self.assertEqual(len(registry.findDimensionEntries('detector')), self.data.nDetectors) filterNames = { x['physical_filter'] for x in registry.findDimensionEntries('physical_filter') } self.assertGreaterEqual(filterNames, self.data.physical_filters)
def getRegistry(self): return Registry.fromConfig(self.butlerConfig, butlerRoot=self.root)
def makeRegistry(self): testDir = os.path.dirname(__file__) configFile = os.path.join(testDir, "config/basic/butler.yaml") butlerConfig = ButlerConfig(configFile) butlerConfig["registry", "limited"] = True return Registry.fromConfig(butlerConfig, create=True)
def setUp(self): self.registry = Registry.fromConfig(self.configFile) # Need to keep ID for each datasetRef since we have no butler # for these tests self.id = 1
def setUp(self): self.testDir = os.path.dirname(__file__) self.configFile = os.path.join(self.testDir, "config/basic/butler.yaml") self.butlerConfig = ButlerConfig(self.configFile) self.registry = Registry.fromConfig(self.butlerConfig)