def testValidateRaDecMag(self): config = makeConvertConfig() config.validate() for name in ("ra_name", "dec_name", "mag_column_list"): with self.subTest(name=name): config = makeConvertConfig() setattr(config, name, None) with self.assertRaises(ValueError): config.validate()
def setUp(self): np.random.seed(10) self.tempDir = tempfile.TemporaryDirectory() tempPath = self.tempDir.name self.log = lsst.log.Log.getLogger("lsst.TestIngestIndexManager") self.config = ingestIndexTestBase.makeConvertConfig(withRaDecErr=True) self.config.id_name = 'id' self.depth = 2 # very small depth, for as few pixels as possible. self.indexer = HtmIndexer(self.depth) self.htm = lsst.sphgeom.HtmPixelization(self.depth) ingester = ConvertReferenceCatalogTask(output_dir=tempPath, config=self.config) dtype = [('id', '<f8'), ('ra', '<f8'), ('dec', '<f8'), ('ra_err', '<f8'), ('dec_err', '<f8'), ('a', '<f8'), ('a_err', '<f8')] self.schema, self.key_map = ingester.makeSchema(dtype) self.fileReader = ReadTextCatalogTask() self.fakeInput = self.makeSkyCatalog(outPath=None, size=5, idStart=6543) self.matchedPixels = np.array([1, 1, 2, 2, 3]) self.tempDir2 = tempfile.TemporaryDirectory() tempPath = self.tempDir2.name self.filenames = { x: os.path.join(tempPath, "%d.fits" % x) for x in set(self.matchedPixels) } self.worker = ConvertRefcatManager(self.filenames, self.config, self.fileReader, self.indexer, self.schema, self.key_map, self.htm.universe()[0], addRefCatMetadata, self.log)
def testValidateParallax(self): """Validation should fail if any parallax-related fields are missing. """ names = [ "parallax_name", "epoch_name", "epoch_format", "epoch_scale", "parallax_err_name" ] config = makeConvertConfig(withParallax=True) config.validate() del config for name in names: with self.subTest(name=name): config = makeConvertConfig(withParallax=True) setattr(config, name, None) with self.assertRaises(ValueError, msg=name): config.validate()
def testValidateRaDecErr(self): # check that a basic config validates config = makeConvertConfig(withRaDecErr=True) config.validate() # check that a config with any of these fields missing does not validate for name in ("ra_err_name", "dec_err_name", "coord_err_unit"): with self.subTest(name=name): config = makeConvertConfig(withRaDecErr=True) setattr(config, name, None) with self.assertRaises(ValueError): config.validate() # check that coord_err_unit must be an astropy unit config = makeConvertConfig(withRaDecErr=True) config.coord_err_unit = "nonsense unit" with self.assertRaisesRegex(ValueError, "is not a valid astropy unit string"): config.validate()
def testValidateMagErr(self): config = makeConvertConfig(withMagErr=True) config.validate() # test for missing names for name in config.mag_column_list: with self.subTest(name=name): config = makeConvertConfig(withMagErr=True) del config.mag_err_column_map[name] with self.assertRaises(ValueError): config.validate() # test for incorrect names for name in config.mag_column_list: with self.subTest(name=name): config = makeConvertConfig(withMagErr=True) config.mag_err_column_map[ "badName"] = config.mag_err_column_map[name] del config.mag_err_column_map[name] with self.assertRaises(ValueError): config.validate()
def testValidatePm(self): basicNames = [ "pm_ra_name", "pm_dec_name", "epoch_name", "epoch_format", "epoch_scale" ] for withPmErr in (False, True): config = makeConvertConfig(withPm=True, withPmErr=withPmErr) config.validate() del config if withPmErr: names = basicNames + ["pm_ra_err_name", "pm_dec_err_name"] else: names = basicNames for name in names: with self.subTest(name=name, withPmErr=withPmErr): config = makeConvertConfig(withPm=True, withPmErr=withPmErr) setattr(config, name, None) with self.assertRaises(ValueError): config.validate()
def setUpClass(cls): super().setUpClass() # Generate a catalog, with arbitrary ids inTempDir = tempfile.TemporaryDirectory() inPath = inTempDir.name skyCatalogFile, _, skyCatalog = cls.makeSkyCatalog(inPath, idStart=25, seed=123) cls.skyCatalog = skyCatalog # override some field names. config = ingestIndexTestBase.makeConvertConfig(withRaDecErr=True, withMagErr=True, withPm=True, withPmErr=True) # use a very small HTM pixelization depth depth = 2 config.dataset_config.indexer.active.depth = depth # np.savetxt prepends '# ' to the header lines, so use a reader that understands that config.file_reader.format = 'ascii.commented_header' config.n_processes = 1 config.id_name = 'id' # Use the ids from the generated catalogs cls.repoTempDir = tempfile.TemporaryDirectory() repoPath = cls.repoTempDir.name # Convert the input data files to our HTM indexed format. dataTempDir = tempfile.TemporaryDirectory() dataPath = dataTempDir.name converter = ConvertReferenceCatalogTask(output_dir=dataPath, config=config) converter.run([skyCatalogFile]) # Make a temporary butler to ingest them into. butler = cls.makeTemporaryRepo( repoPath, config.dataset_config.indexer.active.depth) dimensions = [f"htm{depth}"] datasetType = DatasetType(config.dataset_config.ref_dataset_name, dimensions, "SimpleCatalog", universe=butler.registry.dimensions, isCalibration=False) butler.registry.registerDatasetType(datasetType) # Ingest the files into the new butler. run = "testingRun" htmTableFile = os.path.join(dataPath, "filename_to_htm.ecsv") ingest_files(repoPath, config.dataset_config.ref_dataset_name, run, htmTableFile, transfer="auto") # Test if we can get back the catalogs, with a new butler. butler = lsst.daf.butler.Butler(repoPath) datasetRefs = list( butler.registry.queryDatasets( config.dataset_config.ref_dataset_name, collections=[run]).expanded()) handles = [] for dataRef in datasetRefs: handles.append( DeferredDatasetHandle(butler=butler, ref=dataRef, parameters=None)) cls.datasetRefs = datasetRefs cls.handles = handles inTempDir.cleanup() dataTempDir.cleanup()
def runTest(withRaDecErr): # Generate a second catalog, with different ids inTempDir1 = tempfile.TemporaryDirectory() inPath1 = inTempDir1.name skyCatalogFile1, _, skyCatalog1 = self.makeSkyCatalog(inPath1, idStart=25, seed=123) inTempDir2 = tempfile.TemporaryDirectory() inPath2 = inTempDir2.name skyCatalogFile2, _, skyCatalog2 = self.makeSkyCatalog(inPath2, idStart=5432, seed=11) # override some field names, and use multiple cores config = ingestIndexTestBase.makeConvertConfig( withRaDecErr=withRaDecErr, withMagErr=True, withPm=True, withPmErr=True) # use a very small HTM pixelization depth to ensure there will be collisions when # ingesting the files in parallel depth = 2 config.dataset_config.indexer.active.depth = depth # np.savetxt prepends '# ' to the header lines, so use a reader that understands that config.file_reader.format = 'ascii.commented_header' config.n_processes = 2 # use multiple cores for this test only config.id_name = 'id' # Use the ids from the generated catalogs repoPath = os.path.join( self.outPath, "output_multifile_parallel", "_withRaDecErr" if withRaDecErr else "_noRaDecErr") # Convert the input data files to our HTM indexed format. dataTempDir = tempfile.TemporaryDirectory() dataPath = dataTempDir.name converter = ConvertReferenceCatalogTask(output_dir=dataPath, config=config) converter.run([skyCatalogFile1, skyCatalogFile2]) # Make a temporary butler to ingest them into. butler = self.makeTemporaryRepo( repoPath, config.dataset_config.indexer.active.depth) dimensions = [f"htm{depth}"] datasetType = DatasetType(config.dataset_config.ref_dataset_name, dimensions, "SimpleCatalog", universe=butler.registry.dimensions, isCalibration=False) butler.registry.registerDatasetType(datasetType) # Ingest the files into the new butler. run = "testingRun" htmTableFile = os.path.join(dataPath, "filename_to_htm.ecsv") ingest_files(repoPath, config.dataset_config.ref_dataset_name, run, htmTableFile, transfer="auto") # Test if we can get back the catalogs, with a new butler. butler = lsst.daf.butler.Butler(repoPath) datasetRefs = list( butler.registry.queryDatasets( config.dataset_config.ref_dataset_name, collections=[run]).expanded()) handlers = [] for dataRef in datasetRefs: handlers.append( DeferredDatasetHandle(butler=butler, ref=dataRef, parameters=None)) loaderConfig = ReferenceObjectLoader.ConfigClass() loader = ReferenceObjectLoader( [dataRef.dataId for dataRef in datasetRefs], handlers, config=loaderConfig, log=self.logger) self.checkAllRowsInRefcat(loader, skyCatalog1, config) self.checkAllRowsInRefcat(loader, skyCatalog2, config) inTempDir1.cleanup() inTempDir2.cleanup() dataTempDir.cleanup()