def testMatplotlibFormatter(self): butler = Butler(self.root, run="testrun") datasetType = DatasetType("test_plot", [], "Plot", universe=butler.registry.dimensions) butler.registry.registerDatasetType(datasetType) # Does not have to be a random image pyplot.imshow([self.rng.sample(range(50), 10), self.rng.sample(range(50), 10), self.rng.sample(range(50), 10), ]) ref = butler.put(pyplot.gcf(), datasetType) uri = butler.getURI(ref) # Following test needs a local file with uri.as_local() as local: with tempfile.NamedTemporaryFile(suffix=".png") as file: pyplot.gcf().savefig(file.name) self.assertTrue( filecmp.cmp( local.ospath, file.name, shallow=True ) ) self.assertTrue(butler.datasetExists(ref)) with self.assertRaises(ValueError): butler.get(ref) butler.pruneDatasets([ref], unstore=True, purge=True) with self.assertRaises(LookupError): butler.datasetExists(ref)
def testHealSparseMapFormatter(self): butler = Butler(self.root, run="testrun") datasetType = DatasetType("map", [], "HealSparseMap", universe=butler.registry.dimensions) butler.registry.registerDatasetType(datasetType) ref = butler.put(self.hspMap, datasetType) uri = butler.getURI(ref) self.assertEqual(uri.getExtension(), '.hsp') # Retrieve the full map. hspMap = butler.get('map') self.assertTrue(np.all(hspMap._sparse_map == self.hspMap._sparse_map)) # Retrieve the coverage map coverage = butler.get('map.coverage') self.assertTrue( np.all(coverage.coverage_mask == self.hspMap.coverage_mask)) # Retrieve a partial map pixels = [0, 6] partialMap = butler.get('map', parameters={'pixels': pixels}) self.assertTrue( np.all(np.where(partialMap.coverage_mask)[0] == np.array(pixels))) self.assertTrue(np.all(partialMap[0:10000] == self.hspMap[0:10000])) self.assertTrue( np.all(partialMap[100000:110000] == self.hspMap[100000:110000])) # Retrieve a degraded map degradedMapRead = butler.get('map', parameters={'degrade_nside': 512}) degradedMap = self.hspMap.degrade(512) self.assertTrue( np.all(degradedMapRead._sparse_map == degradedMap._sparse_map))
def testMatplotlibFormatter(self): butler = Butler(self.root, run="testrun") datasetType = DatasetType("test_plot", [], "Plot", universe=butler.registry.dimensions) butler.registry.registerDatasetType(datasetType) # Does not have to be a random image pyplot.imshow([ self.rng.sample(range(50), 10), self.rng.sample(range(50), 10), self.rng.sample(range(50), 10), ]) ref = butler.put(pyplot.gcf(), datasetType) uri = butler.getURI(ref) # The test after this will not work if we don't have local file self.assertEqual(uri.scheme, "file", "Testing returned URI: {uri}") with tempfile.NamedTemporaryFile(suffix=".png") as file: pyplot.gcf().savefig(file.name) self.assertTrue(filecmp.cmp(uri.path, file.name, shallow=True)) self.assertTrue(butler.datasetExists(ref)) with self.assertRaises(ValueError): butler.get(ref) butler.pruneDatasets([ref], unstore=True, purge=True) with self.assertRaises(LookupError): butler.datasetExists(ref)
def verifyIngest(self, files=None, cli=False, fullCheck=False): """ Test that RawIngestTask ingested the expected files. Parameters ---------- files : `list` [`str`], or None List of files to be ingested, or None to use ``self.file`` fullCheck : `bool`, optional If `True`, read the full raw dataset and check component consistency. If `False` check that a component can be read but do not read the entire raw exposure. Notes ----- Reading all the ingested test data can be expensive. The code paths for reading the second raw are the same as reading the first so we do not gain anything by doing full checks of everything. Only read full pixel data for first dataset from file. Don't even do that if we are requested not to by the caller. This only really affects files that contain multiple datasets. """ butler = Butler(self.root, run=self.outputRun) datasets = list( butler.registry.queryDatasets("raw", collections=self.outputRun)) self.assertEqual(len(datasets), len(self.dataIds)) # Get the URI to the first dataset and check it is inside the # datastore datasetUri = butler.getURI(datasets[0]) self.assertIsNotNone(datasetUri.relative_to(butler.datastore.root)) for dataId in self.dataIds: # Check that we can read metadata from a raw metadata = butler.get("raw.metadata", dataId) if not fullCheck: continue fullCheck = False exposure = butler.get("raw", dataId) self.assertEqual(metadata.toDict(), exposure.getMetadata().toDict()) # Since components follow a different code path we check that # WCS match and also we check that at least the shape # of the image is the same (rather than doing per-pixel equality) wcs = butler.get("raw.wcs", dataId) self.assertEqual(wcs, exposure.getWcs()) rawImage = butler.get("raw.image", dataId) self.assertEqual(rawImage.getBBox(), exposure.getBBox()) # check that the filter label got the correct band filterLabel = butler.get("raw.filterLabel", dataId) self.assertEqual(filterLabel, self.filterLabel) self.checkRepo(files=files)
def testDirect(self): self._ingestRaws(transfer="direct") # Check that it really did have a URI outside of datastore srcUri = ButlerURI(self.file) butler = Butler(self.root, run=self.outputRun) datasets = list( butler.registry.queryDatasets("raw", collections=self.outputRun)) datastoreUri = butler.getURI(datasets[0]) self.assertEqual(datastoreUri, srcUri)
def testAstropyTableFormatter(self): butler = Butler(self.root, run="testrun") datasetType = DatasetType("table", [], "AstropyTable", universe=butler.registry.dimensions) butler.registry.registerDatasetType(datasetType) ref = butler.put(self.table, datasetType) uri = butler.getURI(ref) self.assertEqual(uri.getExtension(), '.ecsv') table = butler.get('table') self.assertTrue(numpy.all(table == self.table))
def testInPlace(self): """Test that files already in the directory can be added to the registry in-place. """ # symlink into repo root manually butler = Butler(self.root, run=self.outputRun) pathInStore = "prefix-" + os.path.basename(self.file) newPath = butler.datastore.root.join(pathInStore) os.symlink(os.path.abspath(self.file), newPath.ospath) self._ingestRaws(transfer="auto", file=newPath.ospath) self.verifyIngest() # Recreate a butler post-ingest (the earlier one won't see the # ingested files) butler = Butler(self.root, run=self.outputRun) # Check that the URI associated with this path is the right one uri = butler.getURI("raw", self.dataIds[0]) self.assertEqual(uri.relative_to(butler.datastore.root), pathInStore)
except lsst.daf.butler.registry.MissingCollectionError: pass # Already removed; nothing to do logging.info("Preparing destination repository %s...", DEST_DIR) _remove_refcat_run(dest_repo, DEST_RUN) dest_repo.registry.registerCollection(DEST_RUN, CollectionType.RUN) for src_cat, dest_cat in REFCATS.items(): src_type = src_repo.registry.getDatasetType(src_cat) dest_type = _rename_dataset_type(src_type, dest_cat) dest_repo.registry.registerDatasetType(dest_type) dest_repo.registry.refresh() logging.info("Searching for refcats in %s:%s...", args.src_dir, args.src_collection) query = f"htm{HTM_LEVEL} in ({','.join(id_ranges)})" datasets = [] for src_ref in src_repo.registry.queryDatasets(REFCATS.keys(), where=query, findFirst=True): src_type = src_ref.datasetType dest_type = _rename_dataset_type(src_type, REFCATS[src_type.name]) dest_ref = DatasetRef(dest_type, src_ref.dataId) datasets.append(FileDataset(path=src_repo.getURI(src_ref), refs=dest_ref)) logging.info("Copying refcats...") dest_repo.ingest(*datasets, transfer="copy") logging.info("%d refcat shards copied to %s:%s", len(datasets), DEST_DIR, DEST_RUN)