def test_storeSources(self): """Store and retrieve DiaSources.""" config = ApdbConfig(db_url="sqlite:///", isolation_level="READ_UNCOMMITTED", read_sources_months=12, read_forced_sources_months=12) apdb = Apdb(config) apdb.makeSchema() pixel_ranges = _makePixelRanges() visit_time = datetime.datetime.now() # have to store Objects first if self.use_pandas: objects = _makeObjectCatalogPandas(pixel_ranges) catalog, oids = _makeSourceCatalogPandas(objects) else: objects = _makeObjectCatalog(pixel_ranges) catalog, oids = _makeSourceCatalog(objects) # save the objects apdb.storeDiaObjects(objects, visit_time) # save the sources apdb.storeDiaSources(catalog) # read it back and check sizes res = apdb.getDiaSourcesInRegion(pixel_ranges, visit_time, self.use_pandas) self._assertCatalog(res, len(catalog), type=self.data_type) # read it back using different method res = apdb.getDiaSources(oids, visit_time, self.use_pandas) self._assertCatalog(res, len(catalog), type=self.data_type)
def makeApdb(args=None): """Create an APDB according to a config. The command-line arguments should provide config values or a config file for `ApdbConfig`. Parameters ---------- args : `list` [`str`], optional List of command-line arguments; if `None` use `sys.argv`. Returns ------- apdb : `lsst.dax.apdb.Apdb` The newly configured APDB object. """ parser = ConfigOnlyParser() parsedCmd = parser.parse_args(args=args) apdb = Apdb(config=parsedCmd.config, afw_schemas=dict(DiaObject=make_dia_object_schema(), DiaSource=make_dia_source_schema())) apdb.makeSchema() return apdb
def test_makeSchema(self): """Test for making an instance of Apdb using in-memory sqlite engine. """ # sqlite does not support default READ_COMMITTED, for in-memory # database have to use connection pool config = ApdbConfig(db_url="sqlite://", isolation_level="READ_UNCOMMITTED") apdb = Apdb(config) # the essence of a test here is that there are no exceptions. apdb.makeSchema()
def test_emptyGetsBaseline(self): """Test for getting data from empty database. All get() methods should return empty results, only useful for checking that code is not broken. """ # use non-zero months for Forced/Source fetching config = ApdbConfig(db_url="sqlite:///", isolation_level="READ_UNCOMMITTED", read_sources_months=12, read_forced_sources_months=12) apdb = Apdb(config) apdb.makeSchema() pixel_ranges = _makePixelRanges() visit_time = datetime.datetime.now() # get objects by region res = apdb.getDiaObjects(pixel_ranges, return_pandas=self.use_pandas) self._assertCatalog(res, 0, type=self.data_type) # get sources by region res = apdb.getDiaSourcesInRegion(pixel_ranges, visit_time, return_pandas=self.use_pandas) self._assertCatalog(res, 0, type=self.data_type) # get sources by object ID, empty object list, should return None res = apdb.getDiaSources([], visit_time, return_pandas=self.use_pandas) self.assertIs(res, None) # get sources by object ID, non-empty object list res = apdb.getDiaSources([1, 2, 3], visit_time, return_pandas=self.use_pandas) self._assertCatalog(res, 0, type=self.data_type) # get forced sources by object ID, empty object list res = apdb.getDiaForcedSources([], visit_time, return_pandas=self.use_pandas) self.assertIs(res, None) # get sources by object ID, non-empty object list res = apdb.getDiaForcedSources([1, 2, 3], visit_time, return_pandas=self.use_pandas) self._assertCatalog(res, 0, type=self.data_type)
def __init__(self, db_file): """Create similar configuration for tasks ad in ap_pipe. """ self.log = Log.getLogger("RunAssociation") self.apdbConfig = ApdbConfig() self.apdbConfig.db_url = "sqlite:///" + db_file self.apdbConfig.isolation_level = "READ_UNCOMMITTED" self.apdbConfig.dia_object_index = "baseline" self.apdbConfig.dia_object_columns = [] self.apdbConfig.connection_timeout = 240 self.apdbConfig.schema_file = _data_file_name("apdb-schema.yaml", "dax_apdb") self.apdbConfig.column_map = _data_file_name( "apdb-ap-pipe-afw-map.yaml", "ap_association") self.apdbConfig.extra_schema_file = _data_file_name( "apdb-ap-pipe-schema-extra.yaml", "ap_association") self.apdb = Apdb(config=self.apdbConfig, afw_schemas=dict(DiaObject=make_dia_object_schema(), DiaSource=make_dia_source_schema())) # apdb.makeSchema() self.differencerConfig = ImageDifferenceConfig() # Schema is different if we do decorrelation self.differencerConfig.doDecorrelation = True self.differencerSchema = ImageDifferenceTask( config=self.differencerConfig).schema self.diaSourceDpddifier = MapDiaSourceTask( inputSchema=self.differencerSchema) self.associator = AssociationTask() self.diffType = "deep"
def test_storeObjectsLast(self): """Store and retrieve DiaObjects using DiaObjectLast table.""" # don't care about sources. config = ApdbConfig(db_url="sqlite:///", isolation_level="READ_UNCOMMITTED", dia_object_index="last_object_table", object_last_replace=True) apdb = Apdb(config) apdb.makeSchema() pixel_ranges = _makePixelRanges() visit_time = datetime.datetime.now() # make afw catalog with Objects if self.use_pandas: catalog = _makeObjectCatalogPandas(pixel_ranges) else: catalog = _makeObjectCatalog(pixel_ranges) # store catalog apdb.storeDiaObjects(catalog, visit_time) # read it back and check sizes res = apdb.getDiaObjects(pixel_ranges, return_pandas=self.use_pandas) self._assertCatalog(res, len(catalog), type=self.data_type)
def test_emptyGetsObjectLast(self): """Test for getting DiaObjects from empty database using DiaObjectLast table. All get() methods should return empty results, only useful for checking that code is not broken. """ # don't care about sources. config = ApdbConfig(db_url="sqlite:///", isolation_level="READ_UNCOMMITTED", dia_object_index="last_object_table") apdb = Apdb(config) apdb.makeSchema() pixel_ranges = _makePixelRanges() # get objects by region res = apdb.getDiaObjects(pixel_ranges, return_pandas=self.use_pandas) self._assertCatalog(res, 0, type=self.data_type)
def _fillRandomData(self, catalog: pandas.DataFrame, table: ApdbTables, db: Apdb) -> None: """Add missing fields to a catalog and fill it with random numbers. Parameters ---------- catalog : `pandas.DataFrame` Catalog to extend and fill. table : `ApdbTables` Table type. db : `Apdb` APDB interface """ rng = numpy.random.default_rng() table_def = db.tableDef(table) if table_def is None: return count = len(catalog) for colDef in table_def.columns: if table is ApdbTables.DiaObject and colDef.name in ( "validityStart", "validityEnd"): continue if colDef.name == "pixelId": continue if colDef.name not in catalog.columns: # need to make a new column if colDef.type == "FLOAT": data = rng.random(count, dtype=numpy.float32) elif colDef.type == "DOUBLE": data = rng.random(count, dtype=numpy.float64) elif colDef.type == "INT": data = rng.integers(0, 1000, count, dtype=numpy.int32) elif colDef.type == "BIGINT": data = rng.integers(0, 1000, count, dtype=numpy.int64) elif colDef.type == "SMALLINT": data = rng.integers(0, 1000, count, dtype=numpy.int16) elif "INT" in colDef.type: data = rng.integers(0, 100, count) elif colDef.type == "BLOB": # random bytes data = pandas.Series( [rng.bytes(100) for i in range(count)]) elif colDef.type == "DATETIME": data = rng.integers(1500000000, 1600000000, count, dtype=numpy.int64) data = numpy.array(data, dtype="datetime64[s]") else: data = rng.random(count) catalog[colDef.name] = data
def run(self, config): """Create a database from a config. Parameters ---------- config : `lsst.dax.apdb.ApdbConfig` or `None` A config for the database connection. Returns ------- result : `lsst.pipe.base.Struct` Result struct with components: ``apdb`` A database configured the same way as in ``config``. """ return Struct(apdb=(Apdb(config) if config else None))
def _getApdb(self, config): """Extract an Apdb object from an arbitrary task config. Parameters ---------- config : `lsst.pex.config.Config` or `None` A config that may contain a `lsst.dax.apdb.ApdbConfig`. Behavior is undefined if there is more than one such member. Returns ------- apdb : `lsst.dax.apdb.Apdb`-like or `None` A `lsst.dax.apdb.Apdb` object or a drop-in replacement, or `None` if no `lsst.dax.apdb.ApdbConfig` is present in ``config``. """ if config is None: return None if isinstance(config, ApdbConfig): return Apdb(config) for field in config.values(): if isinstance(field, ConfigurableInstance): result = self._getApdbFromConfigurableField(field) if result: return result elif isinstance(field, ConfigChoiceField.instanceDictClass): try: # can't test with hasattr because of non-standard getattr field.names except FieldValidationError: result = self._getApdb(field.active) else: result = self._getApdbFromConfigIterable(field.active) if result: return result elif isinstance(field, ConfigDictField.DictClass): result = self._getApdbFromConfigIterable(field.values()) if result: return result elif isinstance(field, Config): # Can't test for `ConfigField` more directly than this result = self._getApdb(field) if result: return result return None
def _roundTripThroughApdb(objects, sources, forcedSources, dateTime): """Run object and source catalogs through the Apdb to get the correct table schemas. Parameters ---------- objects : `pandas.DataFrame` Set of test DiaObjects to round trip. sources : `pandas.DataFrame` Set of test DiaSources to round trip. forcedSources : `pandas.DataFrame` Set of test DiaForcedSources to round trip. dateTime : `datetime.datetime` Time for the Apdb. Returns ------- objects : `pandas.DataFrame` Round tripped objects. sources : `pandas.DataFrame` Round tripped sources. """ tmpFile = tempfile.NamedTemporaryFile() apdbConfig = ApdbConfig() apdbConfig.db_url = "sqlite:///" + tmpFile.name apdbConfig.isolation_level = "READ_UNCOMMITTED" apdbConfig.dia_object_index = "baseline" apdbConfig.dia_object_columns = [] apdbConfig.schema_file = _data_file_name("apdb-schema.yaml", "dax_apdb") apdbConfig.column_map = _data_file_name("apdb-ap-pipe-afw-map.yaml", "ap_association") apdbConfig.extra_schema_file = _data_file_name( "apdb-ap-pipe-schema-extra.yaml", "ap_association") apdb = Apdb(config=apdbConfig, afw_schemas=dict(DiaObject=make_dia_object_schema(), DiaSource=make_dia_source_schema())) apdb.makeSchema() minId = objects["pixelId"].min() maxId = objects["pixelId"].max() diaObjects = apdb.getDiaObjects([[minId, maxId + 1]], return_pandas=True).append(objects) diaSources = apdb.getDiaSources(np.unique(objects["diaObjectId"]), dateTime, return_pandas=True).append(sources) diaForcedSources = apdb.getDiaForcedSources( np.unique(objects["diaObjectId"]), dateTime, return_pandas=True).append(forcedSources) apdb.storeDiaSources(diaSources) apdb.storeDiaForcedSources(diaForcedSources) apdb.storeDiaObjects(diaObjects, dateTime) diaObjects = apdb.getDiaObjects([[minId, maxId + 1]], return_pandas=True) diaSources = apdb.getDiaSources(np.unique(diaObjects["diaObjectId"]), dateTime, return_pandas=True) diaForcedSources = apdb.getDiaForcedSources(np.unique( diaObjects["diaObjectId"]), dateTime, return_pandas=True) diaObjects.set_index("diaObjectId", drop=False, inplace=True) diaSources.set_index(["diaObjectId", "filterName", "diaSourceId"], drop=False, inplace=True) diaForcedSources.set_index(["diaObjectId"], drop=False, inplace=True) return (diaObjects, diaSources, diaForcedSources)
def visit(self, db: Apdb, visit_id: int, dt: DateTime, region: Region, sources: numpy.ndarray, indices: numpy.ndarray, tile: Optional[Tuple[int, int]] = None) -> None: """AP processing of a single visit (with known sources) Parameters ---------- db : `Apdb` APDB interface visit_id : `int` Visit ID. dt : `DateTime` Time of visit region : `sphgeom.Region` Region, could be the whole FOV (Circle) or small piece of it sources : `numpy.array` Array of xyz coordinates of sources, this has all visit sources, not only current tile indices : `numpy.array` array of indices of sources, 1-dim ndarray, transient sources have negative indices tile : `tuple` tile position (x, y) """ name = "" if tile is not None: name = "tile={}x{} ".format(*tile) src_read_period = self.config.src_read_period src_read_visits = round(self.config.src_read_period * self.config.src_read_duty_cycle) do_read_src = visit_id % src_read_period < src_read_visits # make a mask for i in range(len(sources)): xyz = sources[i] if not region.contains(UnitVector3d(xyz[0], xyz[1], xyz[2])): indices[i] = _OUTSIDER with timer.Timer(name + "Objects-read"): # Retrieve DiaObjects (latest versions) from database for matching, # this will produce wider coverage so further filtering is needed latest_objects = db.getDiaObjects(region) _LOG.info(name + 'database found %s objects', _nrows(latest_objects)) # filter database objects to a mask latest_objects = self._filterDiaObjects(latest_objects, region) _LOG.info(name + 'after filtering %s objects', _nrows(latest_objects)) with timer.Timer(name + "S2O-matching"): # create all new DiaObjects objects = self._makeDiaObjects(sources, indices, dt) # make all sources srcs = self._makeDiaSources(sources, indices, dt, visit_id) # do forced photometry (can extends objects) fsrcs, objects = self._forcedPhotometry(objects, latest_objects, dt, visit_id) if self.config.fill_empty_fields: self._fillRandomData(objects, ApdbTables.DiaObject, db) self._fillRandomData(srcs, ApdbTables.DiaSource, db) self._fillRandomData(fsrcs, ApdbTables.DiaForcedSource, db) if do_read_src: with timer.Timer(name + "Source-read"): latest_objects_ids = list(latest_objects['diaObjectId']) read_srcs = db.getDiaSources(region, latest_objects_ids, dt) _LOG.info(name + 'database found %s sources', _nrows(read_srcs)) read_srcs = db.getDiaForcedSources(region, latest_objects_ids, dt) _LOG.info(name + 'database found %s forced sources', _nrows(read_srcs)) else: _LOG.info("skipping reading of sources for this visit") if not self.args.no_update: with timer.Timer(name + "L1-store"): # store new versions of objects _LOG.info(name + 'will store %d Objects', len(objects)) _LOG.info(name + 'will store %d Sources', len(srcs)) _LOG.info(name + 'will store %d ForcedSources', len(fsrcs)) db.store(dt, objects, srcs, fsrcs)