def test_storeSources(self): """Store and retrieve DiaSources.""" config = PpdbConfig(db_url="sqlite:///", isolation_level="READ_UNCOMMITTED", read_sources_months=12, read_forced_sources_months=12) ppdb = Ppdb(config) ppdb.makeSchema() pixel_ranges = _makePixelRanges() visit_time = datetime.datetime.now() # have to store Objects first if self.use_pandas: objects = _makeObjectCatalogPandas(pixel_ranges) catalog, oids = _makeSourceCatalogPandas(objects) else: objects = _makeObjectCatalog(pixel_ranges) catalog, oids = _makeSourceCatalog(objects) # save the objects ppdb.storeDiaObjects(objects, visit_time) # save the sources ppdb.storeDiaSources(catalog) # read it back and check sizes res = ppdb.getDiaSourcesInRegion(pixel_ranges, visit_time, self.use_pandas) self._assertCatalog(res, len(catalog), type=self.data_type) # read it back using different method res = ppdb.getDiaSources(oids, visit_time, self.use_pandas) self._assertCatalog(res, len(catalog), type=self.data_type)
def test_makeSchema(self): """Test for making an instance of Ppdb using in-memory sqlite engine. """ # sqlite does not support default READ_COMMITTED, for in-memory # database have to use connection pool config = PpdbConfig(db_url="sqlite://", isolation_level="READ_UNCOMMITTED") ppdb = Ppdb(config) # the essence of a test here is that there are no exceptions. ppdb.makeSchema()
def setUp(self): self.ppdbCfg = PpdbConfig() # Create DB in memory. self.ppdbCfg.db_url = 'sqlite://' self.ppdbCfg.isolation_level = "READ_UNCOMMITTED" self.ppdbCfg.dia_object_index = "baseline" self.ppdbCfg.dia_object_columns = [] self.ppdb = Ppdb( config=self.ppdbCfg, afw_schemas=dict(DiaObject=afwTable.SourceTable.makeMinimalSchema(), DiaSource=afwTable.SourceTable.makeMinimalSchema())) self.ppdb._schema.makeSchema()
class TestApVerifyQueries(unittest.TestCase): def setUp(self): self.ppdbCfg = PpdbConfig() # Create DB in memory. self.ppdbCfg.db_url = 'sqlite://' self.ppdbCfg.isolation_level = "READ_UNCOMMITTED" self.ppdbCfg.dia_object_index = "baseline" self.ppdbCfg.dia_object_columns = [] self.ppdb = Ppdb( config=self.ppdbCfg, afw_schemas=dict(DiaObject=afwTable.SourceTable.makeMinimalSchema(), DiaSource=afwTable.SourceTable.makeMinimalSchema())) self.ppdb._schema.makeSchema() def tearDown(self): del self.ppdb def test_count_zero_objects(self): value = countUnassociatedObjects(self.ppdb) self.assertEqual(value, 0) def test_count_objects(self): n_created = 5 sources = createTestObjects(n_created, {'nDiaSources': 'I'}) sources[-1]['nDiaSources'] = 2 # nsecs must be an integer, not 1.4e18 dateTime = dafBase.DateTime(nsecs=1400000000 * 10**9) self.ppdb.storeDiaObjects(sources, dateTime.toPython()) value = countUnassociatedObjects(self.ppdb) self.assertEqual(n_created - 1, value) @staticmethod def _makeVisitInfo(exposureId): # Real VisitInfo hard to create visitInfo = unittest.mock.NonCallableMock( afwImage.VisitInfo, **{"getExposureId.return_value": exposureId} ) return visitInfo def test_isExposureProcessed(self): n_created = 5 sources = createTestObjects(n_created, {'ccdVisitId': 'I'}) for source in sources: source['ccdVisitId'] = 2381 self.ppdb.storeDiaSources(sources) self.assertTrue(isVisitProcessed(self.ppdb, TestApVerifyQueries._makeVisitInfo(2381))) self.assertFalse(isVisitProcessed(self.ppdb, TestApVerifyQueries._makeVisitInfo(42)))
def test_emptyGetsBaseline0months(self): """Test for getting data from empty database. All get() methods should return empty results, only useful for checking that code is not broken. """ # set read_sources_months to 0 so that Forced/Sources are None config = PpdbConfig(db_url="sqlite:///", isolation_level="READ_UNCOMMITTED", read_sources_months=0, read_forced_sources_months=0) ppdb = Ppdb(config) ppdb.makeSchema() pixel_ranges = _makePixelRanges() visit_time = datetime.datetime.now() # get objects by region res = ppdb.getDiaObjects(pixel_ranges, return_pandas=self.use_pandas) self._assertCatalog(res, 0, type=self.data_type) # get sources by region res = ppdb.getDiaSourcesInRegion(pixel_ranges, visit_time, return_pandas=self.use_pandas) self.assertIs(res, None) # get sources by object ID, empty object list res = ppdb.getDiaSources([], visit_time, return_pandas=self.use_pandas) # get forced sources by object ID, empty object list res = ppdb.getDiaForcedSources([], visit_time, return_pandas=self.use_pandas) self.assertIs(res, None)
def test_storeObjectsLast(self): """Store and retrieve DiaObjects using DiaObjectLast table.""" # don't care about sources. config = PpdbConfig(db_url="sqlite:///", isolation_level="READ_UNCOMMITTED", dia_object_index="last_object_table", object_last_replace=True) ppdb = Ppdb(config) ppdb.makeSchema() pixel_ranges = _makePixelRanges() visit_time = datetime.datetime.now() # make afw catalog with Objects if self.use_pandas: catalog = _makeObjectCatalogPandas(pixel_ranges) else: catalog = _makeObjectCatalog(pixel_ranges) # store catalog ppdb.storeDiaObjects(catalog, visit_time) # read it back and check sizes res = ppdb.getDiaObjects(pixel_ranges, return_pandas=self.use_pandas) self._assertCatalog(res, len(catalog), type=self.data_type)
def test_emptyGetsObjectLast(self): """Test for getting DiaObjects from empty database using DiaObjectLast table. All get() methods should return empty results, only useful for checking that code is not broken. """ # don't care about sources. config = PpdbConfig(db_url="sqlite:///", isolation_level="READ_UNCOMMITTED", dia_object_index="last_object_table") ppdb = Ppdb(config) ppdb.makeSchema() pixel_ranges = _makePixelRanges() # get objects by region res = ppdb.getDiaObjects(pixel_ranges, return_pandas=self.use_pandas) self._assertCatalog(res, 0, type=self.data_type)
def main(): descr = 'Create schema for Prompt Products Database.' parser = ArgumentParser(description=descr) parser.add_argument('-v', '--verbose', action='count', default=0, help='More verbose output, can use several times.') parser.add_argument('--drop', action='store_true', default=False, help='Drop existing schema first, this will delete ' 'all data in the tables, use with extreme caution') parser.add_argument('-c', '--config', default=None, metavar='PATH', help='Name of the database config file (pex.config)') parser.add_argument('-t', '--tablespace', default=None, metavar='TABLESPACE', help='Name of the Oracle tablespace for new tables.') parser.add_argument('-i', '--iot', default=False, action='store_true', help='Make index-organized DiaObjectLast table.') parser.add_argument('-a', '--association', default=False, action='store_true', help='Use afw.table schema from ap_association') args = parser.parse_args() # configure logging _configLogger(args.verbose) config = L1dbprotoConfig() if args.config: config.load(args.config) afw_schemas = None if args.association: afw_schemas = dict(DiaObject=make_minimal_dia_object_schema(), DiaSource=make_minimal_dia_source_schema()) # instantiate db interface db = Ppdb(config=config, afw_schemas=afw_schemas) # do it db.makeSchema(drop=args.drop, oracle_tablespace=args.tablespace, oracle_iot=args.iot)
def _getPpdb(self, config): """Extract a Ppdb object from an arbitrary task config. Parameters ---------- config : `lsst.pex.config.Config` or `None` A config that may contain a `lsst.dax.ppdb.PpdbConfig`. Behavior is undefined if there is more than one such member. Returns ------- ppdb : `lsst.dax.ppdb.Ppdb`-like or `None` A `lsst.dax.ppdb.Ppdb` object or a drop-in replacement, or `None` if no `lsst.dax.ppdb.PpdbConfig` is present in ``config``. """ if config is None: return None if isinstance(config, PpdbConfig): return Ppdb(config) for field in config.values(): if isinstance(field, ConfigurableInstance): result = self._getPpdbFromConfigurableField(field) if result: return result elif isinstance(field, ConfigChoiceField.instanceDictClass): try: # can't test with hasattr because of non-standard getattr field.names except FieldValidationError: result = self._getPpdb(field.active) else: result = self._getPpdbFromConfigIterable(field.active) if result: return result elif isinstance(field, ConfigDictField.DictClass): result = self._getPpdbFromConfigIterable(field.values()) if result: return result elif isinstance(field, Config): # Can't test for `ConfigField` more directly than this result = self._getPpdb(field) if result: return result return None
def run(self): """Run whole shebang. """ if self.args.config: self.config.load(self.args.config) if self.args.dump_config: self.config.saveToStream(sys.stdout) return 0 # instantiate db interface db = Ppdb(self.config) if self.config.divide > 1: # check that we have reasonable MPI setup if self.config.mp_mode == "mpi": comm = MPI.COMM_WORLD num_proc = comm.Get_size() rank = comm.Get_rank() node = MPI.Get_processor_name() _LOG.info(COLOR_YELLOW + "MPI job rank=%d size=%d, node %s" + COLOR_RESET, rank, num_proc, node) num_tiles = self.config.divide**2 if num_proc != num_tiles: raise ValueError(f"Number of MPI processes ({num_proc}) " f"does not match number of tiles ({num_tiles})") if rank != 0: # run simple loop for all non-master processes return self.run_mpi_tile_loop(db, comm) # Initialize starting values from database visits table last_visit = db.lastVisit() if last_visit is not None: start_visit_id = last_visit.visitId + 1 start_time = last_visit.visitTime + timedelta(seconds=self.config.interval) else: start_visit_id = self.config.start_visit_id start_time = self.config.start_time_dt if self.config.divide > 1: _LOG.info("Will divide FOV into %dx%d regions", self.config.divide, self.config.divide) _LOG.info("Max. number of ranges for pixelator: %d", self.config.htm_max_ranges) # read sources file _LOG.info("Start loading variable sources from %r", self.config.sources_file) var_sources = numpy.load(self.config.sources_file) _LOG.info("Finished loading variable sources, count = %s", len(var_sources)) # diaObjectId for last new DIA object, for variable sources we use their # index as objectId, for transients we want to use ID outside that range if last_visit is not None and last_visit.lastObjectId is not None: self.lastObjectId = max(self.lastObjectId, last_visit.lastObjectId) if self.lastObjectId < len(var_sources): _LOG.error('next object id is too low: %s', self.lastObjectId) return 1 _LOG.debug("lastObjectId: %s", self.lastObjectId) # diaSourceId for last DIA source stored in database if last_visit is not None and last_visit.lastSourceId is not None: self.lastSourceId = max(self.lastSourceId, last_visit.lastSourceId) _LOG.info("lastSourceId: %s", self.lastSourceId) # loop over visits visitTimes = _visitTimes(start_time, self.config.interval, self.args.num_visits) for visit_id, dt in enumerate(visitTimes, start_visit_id): if visit_id % 1000 == 0: _LOG.info(COLOR_YELLOW + "+++ Start daily activities" + COLOR_RESET) db.dailyJob() _LOG.info(COLOR_YELLOW + "+++ Done with daily activities" + COLOR_RESET) _LOG.info(COLOR_GREEN + "+++ Start processing visit %s at %s" + COLOR_RESET, visit_id, dt) loop_timer = timer.Timer().start() with timer.Timer("DIA"): # point telescope in random southern direction pointing_xyz = generators.rand_sphere_xyz(1, -1)[0] pointing_v = UnitVector3d(pointing_xyz[0], pointing_xyz[1], pointing_xyz[2]) ra = LonLat.longitudeOf(pointing_v).asDegrees() decl = LonLat.latitudeOf(pointing_v).asDegrees() # sphgeom.Circle opening angle is actually a half of opening angle region = Circle(pointing_v, Angle(self.config.FOV_rad/2)) _LOG.info("Pointing ra, decl = %s, %s; xyz = %s", ra, decl, pointing_xyz) # Simulating difference image analysis dia = DIA.DIA(pointing_xyz, self.config.FOV_rad, var_sources, self.config.false_per_visit + self.config.transient_per_visit) sources, indices = dia.makeSources() _LOG.info("DIA generated %s sources", len(sources)) # assign IDs to transients for i in range(len(sources)): if indices[i] < 0: self.lastObjectId += 1 indices[i] = self.lastObjectId # print current database row counts, this takes long time # so only do it once in a while modu = 200 if visit_id <= 10000 else 1000 if visit_id % modu == 0: counts = db.tableRowCount() for tbl, count in sorted(counts.items()): _LOG.info('%s row count: %s', tbl, count) # numpy seems to do some multi-threaded stuff which "leaks" CPU cycles to the code below # and it gets counted as resource usage in timers, add a short delay here so that threads # finish and don't influence our timers below. time.sleep(0.1) if self.config.divide == 1: # do it in-process with timer.Timer("VisitProcessing"): self.visit(db, visit_id, dt, region, sources, indices) else: if self.config.mp_mode == "fork": tiles = geom.make_square_tiles( self.config.FOV_rad, self.config.divide, self.config.divide, pointing_v) with timer.Timer("VisitProcessing"): # spawn subprocesses to handle individual tiles children = [] for ix, iy, region in tiles: # make sure lastSourceId is unique in in each process self.lastSourceId += len(sources) tile = (ix, iy) pid = os.fork() if pid == 0: # child self.visit(db, visit_id, dt, region, sources, indices, tile) # stop here sys.exit(0) else: _LOG.debug("Forked process %d for tile %s", pid, tile) children.append(pid) # wait until all children finish for pid in children: try: pid, status = os.waitpid(pid, 0) if status != 0: _LOG.warning(COLOR_RED + "Child process PID=%s failed: %s" + COLOR_RESET, pid, status) except OSError as exc: _LOG.warning(COLOR_RED + "wait failed for PID=%s: %s" + COLOR_RESET, pid, exc) elif self.config.mp_mode == "mpi": tiles = geom.make_square_tiles( self.config.FOV_rad, self.config.divide, self.config.divide, pointing_v, False) _LOG.info("Split FOV into %d tiles for MPI", len(tiles)) # spawn subprocesses to handle individual tiles, special # care needed for self.lastSourceId because it's # propagated back from (0, 0) lastSourceId = self.lastSourceId tile_data = [] for ix, iy, region in tiles: lastSourceId += len(sources) tile = (ix, iy) tile_data += [(visit_id, dt, region, sources, indices, tile, lastSourceId)] # make sure lastSourceId is unique in in each process with timer.Timer("VisitProcessing"): _LOG.info("Scatter sources to %d tile processes", len(tile_data)) self.run_mpi_tile(db, MPI.COMM_WORLD, tile_data) self.lastSourceId = lastSourceId if not self.args.no_update: # store last visit info db.saveVisit(visit_id, dt) _LOG.info(COLOR_BLUE + "--- Finished processing visit %s, time: %s" + COLOR_RESET, visit_id, loop_timer) # stop MPI slaves if self.config.divide > 1 and self.config.mp_mode == "mpi": _LOG.info("Stopping MPI tile processes") tile_data = [None] * self.config.divide**2 self.run_mpi_tile(db, MPI.COMM_WORLD, tile_data) return 0