Esempio n. 1
0
    def test_storeSources(self):
        """Store and retrieve DiaSources."""
        config = ApdbConfig(db_url="sqlite:///",
                            isolation_level="READ_UNCOMMITTED",
                            read_sources_months=12,
                            read_forced_sources_months=12)
        apdb = Apdb(config)
        apdb.makeSchema()

        pixel_ranges = _makePixelRanges()
        visit_time = datetime.datetime.now()

        # have to store Objects first
        if self.use_pandas:
            objects = _makeObjectCatalogPandas(pixel_ranges)
            catalog, oids = _makeSourceCatalogPandas(objects)
        else:
            objects = _makeObjectCatalog(pixel_ranges)
            catalog, oids = _makeSourceCatalog(objects)

        # save the objects
        apdb.storeDiaObjects(objects, visit_time)

        # save the sources
        apdb.storeDiaSources(catalog)

        # read it back and check sizes
        res = apdb.getDiaSourcesInRegion(pixel_ranges, visit_time,
                                         self.use_pandas)
        self._assertCatalog(res, len(catalog), type=self.data_type)

        # read it back using different method
        res = apdb.getDiaSources(oids, visit_time, self.use_pandas)
        self._assertCatalog(res, len(catalog), type=self.data_type)
Esempio n. 2
0
def makeApdb(args=None):
    """Create an APDB according to a config.

    The command-line arguments should provide config values or a config file
    for `ApdbConfig`.

    Parameters
    ----------
    args : `list` [`str`], optional
        List of command-line arguments; if `None` use `sys.argv`.

    Returns
    -------
    apdb : `lsst.dax.apdb.Apdb`
        The newly configured APDB object.
    """

    parser = ConfigOnlyParser()
    parsedCmd = parser.parse_args(args=args)

    apdb = Apdb(config=parsedCmd.config,
                afw_schemas=dict(DiaObject=make_dia_object_schema(),
                                 DiaSource=make_dia_source_schema()))
    apdb.makeSchema()
    return apdb
Esempio n. 3
0
 def test_makeSchema(self):
     """Test for making an instance of Apdb using in-memory sqlite engine.
     """
     # sqlite does not support default READ_COMMITTED, for in-memory
     # database have to use connection pool
     config = ApdbConfig(db_url="sqlite://",
                         isolation_level="READ_UNCOMMITTED")
     apdb = Apdb(config)
     # the essence of a test here is that there are no exceptions.
     apdb.makeSchema()
Esempio n. 4
0
    def test_emptyGetsBaseline(self):
        """Test for getting data from empty database.

        All get() methods should return empty results, only useful for
        checking that code is not broken.
        """

        # use non-zero months for Forced/Source fetching
        config = ApdbConfig(db_url="sqlite:///",
                            isolation_level="READ_UNCOMMITTED",
                            read_sources_months=12,
                            read_forced_sources_months=12)
        apdb = Apdb(config)
        apdb.makeSchema()

        pixel_ranges = _makePixelRanges()
        visit_time = datetime.datetime.now()

        # get objects by region
        res = apdb.getDiaObjects(pixel_ranges, return_pandas=self.use_pandas)
        self._assertCatalog(res, 0, type=self.data_type)

        # get sources by region
        res = apdb.getDiaSourcesInRegion(pixel_ranges,
                                         visit_time,
                                         return_pandas=self.use_pandas)
        self._assertCatalog(res, 0, type=self.data_type)

        # get sources by object ID, empty object list, should return None
        res = apdb.getDiaSources([], visit_time, return_pandas=self.use_pandas)
        self.assertIs(res, None)

        # get sources by object ID, non-empty object list
        res = apdb.getDiaSources([1, 2, 3],
                                 visit_time,
                                 return_pandas=self.use_pandas)
        self._assertCatalog(res, 0, type=self.data_type)

        # get forced sources by object ID, empty object list
        res = apdb.getDiaForcedSources([],
                                       visit_time,
                                       return_pandas=self.use_pandas)
        self.assertIs(res, None)

        # get sources by object ID, non-empty object list
        res = apdb.getDiaForcedSources([1, 2, 3],
                                       visit_time,
                                       return_pandas=self.use_pandas)
        self._assertCatalog(res, 0, type=self.data_type)
    def __init__(self, db_file):
        """Create similar configuration for tasks ad in ap_pipe.
        """

        self.log = Log.getLogger("RunAssociation")
        self.apdbConfig = ApdbConfig()
        self.apdbConfig.db_url = "sqlite:///" + db_file
        self.apdbConfig.isolation_level = "READ_UNCOMMITTED"
        self.apdbConfig.dia_object_index = "baseline"
        self.apdbConfig.dia_object_columns = []
        self.apdbConfig.connection_timeout = 240
        self.apdbConfig.schema_file = _data_file_name("apdb-schema.yaml",
                                                      "dax_apdb")
        self.apdbConfig.column_map = _data_file_name(
            "apdb-ap-pipe-afw-map.yaml", "ap_association")
        self.apdbConfig.extra_schema_file = _data_file_name(
            "apdb-ap-pipe-schema-extra.yaml", "ap_association")

        self.apdb = Apdb(config=self.apdbConfig,
                         afw_schemas=dict(DiaObject=make_dia_object_schema(),
                                          DiaSource=make_dia_source_schema()))
        # apdb.makeSchema()
        self.differencerConfig = ImageDifferenceConfig()
        # Schema is different if we do decorrelation
        self.differencerConfig.doDecorrelation = True
        self.differencerSchema = ImageDifferenceTask(
            config=self.differencerConfig).schema
        self.diaSourceDpddifier = MapDiaSourceTask(
            inputSchema=self.differencerSchema)
        self.associator = AssociationTask()

        self.diffType = "deep"
Esempio n. 6
0
    def test_storeObjectsLast(self):
        """Store and retrieve DiaObjects using DiaObjectLast table."""
        # don't care about sources.
        config = ApdbConfig(db_url="sqlite:///",
                            isolation_level="READ_UNCOMMITTED",
                            dia_object_index="last_object_table",
                            object_last_replace=True)
        apdb = Apdb(config)
        apdb.makeSchema()

        pixel_ranges = _makePixelRanges()
        visit_time = datetime.datetime.now()

        # make afw catalog with Objects
        if self.use_pandas:
            catalog = _makeObjectCatalogPandas(pixel_ranges)
        else:
            catalog = _makeObjectCatalog(pixel_ranges)

        # store catalog
        apdb.storeDiaObjects(catalog, visit_time)

        # read it back and check sizes
        res = apdb.getDiaObjects(pixel_ranges, return_pandas=self.use_pandas)
        self._assertCatalog(res, len(catalog), type=self.data_type)
Esempio n. 7
0
    def test_emptyGetsObjectLast(self):
        """Test for getting DiaObjects from empty database using DiaObjectLast
        table.

        All get() methods should return empty results, only useful for
        checking that code is not broken.
        """

        # don't care about sources.
        config = ApdbConfig(db_url="sqlite:///",
                            isolation_level="READ_UNCOMMITTED",
                            dia_object_index="last_object_table")
        apdb = Apdb(config)
        apdb.makeSchema()

        pixel_ranges = _makePixelRanges()

        # get objects by region
        res = apdb.getDiaObjects(pixel_ranges, return_pandas=self.use_pandas)
        self._assertCatalog(res, 0, type=self.data_type)
Esempio n. 8
0
    def _fillRandomData(self, catalog: pandas.DataFrame, table: ApdbTables,
                        db: Apdb) -> None:
        """Add missing fields to a catalog and fill it with random numbers.

        Parameters
        ----------
        catalog : `pandas.DataFrame`
            Catalog to extend and fill.
        table : `ApdbTables`
            Table type.
        db : `Apdb`
            APDB interface
        """
        rng = numpy.random.default_rng()
        table_def = db.tableDef(table)
        if table_def is None:
            return
        count = len(catalog)
        for colDef in table_def.columns:
            if table is ApdbTables.DiaObject and colDef.name in (
                    "validityStart", "validityEnd"):
                continue
            if colDef.name == "pixelId":
                continue
            if colDef.name not in catalog.columns:
                # need to make a new column
                if colDef.type == "FLOAT":
                    data = rng.random(count, dtype=numpy.float32)
                elif colDef.type == "DOUBLE":
                    data = rng.random(count, dtype=numpy.float64)
                elif colDef.type == "INT":
                    data = rng.integers(0, 1000, count, dtype=numpy.int32)
                elif colDef.type == "BIGINT":
                    data = rng.integers(0, 1000, count, dtype=numpy.int64)
                elif colDef.type == "SMALLINT":
                    data = rng.integers(0, 1000, count, dtype=numpy.int16)
                elif "INT" in colDef.type:
                    data = rng.integers(0, 100, count)
                elif colDef.type == "BLOB":
                    # random bytes
                    data = pandas.Series(
                        [rng.bytes(100) for i in range(count)])
                elif colDef.type == "DATETIME":
                    data = rng.integers(1500000000,
                                        1600000000,
                                        count,
                                        dtype=numpy.int64)
                    data = numpy.array(data, dtype="datetime64[s]")
                else:
                    data = rng.random(count)
                catalog[colDef.name] = data
Esempio n. 9
0
    def run(self, config):
        """Create a database from a config.

        Parameters
        ----------
        config : `lsst.dax.apdb.ApdbConfig` or `None`
            A config for the database connection.

        Returns
        -------
        result : `lsst.pipe.base.Struct`
            Result struct with components:

            ``apdb``
                A database configured the same way as in ``config``.
        """
        return Struct(apdb=(Apdb(config) if config else None))
Esempio n. 10
0
    def _getApdb(self, config):
        """Extract an Apdb object from an arbitrary task config.

        Parameters
        ----------
        config : `lsst.pex.config.Config` or `None`
            A config that may contain a `lsst.dax.apdb.ApdbConfig`.
            Behavior is undefined if there is more than one such member.

        Returns
        -------
        apdb : `lsst.dax.apdb.Apdb`-like or `None`
            A `lsst.dax.apdb.Apdb` object or a drop-in replacement, or `None`
            if no `lsst.dax.apdb.ApdbConfig` is present in ``config``.
        """
        if config is None:
            return None
        if isinstance(config, ApdbConfig):
            return Apdb(config)

        for field in config.values():
            if isinstance(field, ConfigurableInstance):
                result = self._getApdbFromConfigurableField(field)
                if result:
                    return result
            elif isinstance(field, ConfigChoiceField.instanceDictClass):
                try:
                    # can't test with hasattr because of non-standard getattr
                    field.names
                except FieldValidationError:
                    result = self._getApdb(field.active)
                else:
                    result = self._getApdbFromConfigIterable(field.active)
                if result:
                    return result
            elif isinstance(field, ConfigDictField.DictClass):
                result = self._getApdbFromConfigIterable(field.values())
                if result:
                    return result
            elif isinstance(field, Config):
                # Can't test for `ConfigField` more directly than this
                result = self._getApdb(field)
                if result:
                    return result
        return None
def _roundTripThroughApdb(objects, sources, forcedSources, dateTime):
    """Run object and source catalogs through the Apdb to get the correct
    table schemas.

    Parameters
    ----------
    objects : `pandas.DataFrame`
        Set of test DiaObjects to round trip.
    sources : `pandas.DataFrame`
        Set of test DiaSources to round trip.
    forcedSources : `pandas.DataFrame`
        Set of test DiaForcedSources to round trip.
    dateTime : `datetime.datetime`
        Time for the Apdb.

    Returns
    -------
    objects : `pandas.DataFrame`
        Round tripped objects.
    sources : `pandas.DataFrame`
        Round tripped sources.
    """
    tmpFile = tempfile.NamedTemporaryFile()

    apdbConfig = ApdbConfig()
    apdbConfig.db_url = "sqlite:///" + tmpFile.name
    apdbConfig.isolation_level = "READ_UNCOMMITTED"
    apdbConfig.dia_object_index = "baseline"
    apdbConfig.dia_object_columns = []
    apdbConfig.schema_file = _data_file_name("apdb-schema.yaml", "dax_apdb")
    apdbConfig.column_map = _data_file_name("apdb-ap-pipe-afw-map.yaml",
                                            "ap_association")
    apdbConfig.extra_schema_file = _data_file_name(
        "apdb-ap-pipe-schema-extra.yaml", "ap_association")

    apdb = Apdb(config=apdbConfig,
                afw_schemas=dict(DiaObject=make_dia_object_schema(),
                                 DiaSource=make_dia_source_schema()))
    apdb.makeSchema()

    minId = objects["pixelId"].min()
    maxId = objects["pixelId"].max()
    diaObjects = apdb.getDiaObjects([[minId, maxId + 1]],
                                    return_pandas=True).append(objects)
    diaSources = apdb.getDiaSources(np.unique(objects["diaObjectId"]),
                                    dateTime,
                                    return_pandas=True).append(sources)
    diaForcedSources = apdb.getDiaForcedSources(
        np.unique(objects["diaObjectId"]), dateTime,
        return_pandas=True).append(forcedSources)

    apdb.storeDiaSources(diaSources)
    apdb.storeDiaForcedSources(diaForcedSources)
    apdb.storeDiaObjects(diaObjects, dateTime)

    diaObjects = apdb.getDiaObjects([[minId, maxId + 1]], return_pandas=True)
    diaSources = apdb.getDiaSources(np.unique(diaObjects["diaObjectId"]),
                                    dateTime,
                                    return_pandas=True)
    diaForcedSources = apdb.getDiaForcedSources(np.unique(
        diaObjects["diaObjectId"]),
                                                dateTime,
                                                return_pandas=True)

    diaObjects.set_index("diaObjectId", drop=False, inplace=True)
    diaSources.set_index(["diaObjectId", "filterName", "diaSourceId"],
                         drop=False,
                         inplace=True)
    diaForcedSources.set_index(["diaObjectId"], drop=False, inplace=True)

    return (diaObjects, diaSources, diaForcedSources)
Esempio n. 12
0
    def visit(self,
              db: Apdb,
              visit_id: int,
              dt: DateTime,
              region: Region,
              sources: numpy.ndarray,
              indices: numpy.ndarray,
              tile: Optional[Tuple[int, int]] = None) -> None:
        """AP processing of a single visit (with known sources)

        Parameters
        ----------
        db : `Apdb`
            APDB interface
        visit_id : `int`
            Visit ID.
        dt : `DateTime`
            Time of visit
        region : `sphgeom.Region`
            Region, could be the whole FOV (Circle) or small piece of it
        sources : `numpy.array`
            Array of xyz coordinates of sources, this has all visit sources,
            not only current tile
        indices : `numpy.array`
            array of indices of sources, 1-dim ndarray, transient sources
            have negative indices
        tile : `tuple`
            tile position (x, y)
        """

        name = ""
        if tile is not None:
            name = "tile={}x{} ".format(*tile)

        src_read_period = self.config.src_read_period
        src_read_visits = round(self.config.src_read_period *
                                self.config.src_read_duty_cycle)
        do_read_src = visit_id % src_read_period < src_read_visits

        # make a mask
        for i in range(len(sources)):
            xyz = sources[i]
            if not region.contains(UnitVector3d(xyz[0], xyz[1], xyz[2])):
                indices[i] = _OUTSIDER

        with timer.Timer(name + "Objects-read"):

            # Retrieve DiaObjects (latest versions) from database for matching,
            # this will produce wider coverage so further filtering is needed
            latest_objects = db.getDiaObjects(region)
            _LOG.info(name + 'database found %s objects',
                      _nrows(latest_objects))

            # filter database objects to a mask
            latest_objects = self._filterDiaObjects(latest_objects, region)
            _LOG.info(name + 'after filtering %s objects',
                      _nrows(latest_objects))

        with timer.Timer(name + "S2O-matching"):

            # create all new DiaObjects
            objects = self._makeDiaObjects(sources, indices, dt)

            # make all sources
            srcs = self._makeDiaSources(sources, indices, dt, visit_id)

            # do forced photometry (can extends objects)
            fsrcs, objects = self._forcedPhotometry(objects, latest_objects,
                                                    dt, visit_id)

            if self.config.fill_empty_fields:
                self._fillRandomData(objects, ApdbTables.DiaObject, db)
                self._fillRandomData(srcs, ApdbTables.DiaSource, db)
                self._fillRandomData(fsrcs, ApdbTables.DiaForcedSource, db)

        if do_read_src:
            with timer.Timer(name + "Source-read"):

                latest_objects_ids = list(latest_objects['diaObjectId'])

                read_srcs = db.getDiaSources(region, latest_objects_ids, dt)
                _LOG.info(name + 'database found %s sources',
                          _nrows(read_srcs))

                read_srcs = db.getDiaForcedSources(region, latest_objects_ids,
                                                   dt)
                _LOG.info(name + 'database found %s forced sources',
                          _nrows(read_srcs))
        else:
            _LOG.info("skipping reading of sources for this visit")

        if not self.args.no_update:

            with timer.Timer(name + "L1-store"):

                # store new versions of objects
                _LOG.info(name + 'will store %d Objects', len(objects))
                _LOG.info(name + 'will store %d Sources', len(srcs))
                _LOG.info(name + 'will store %d ForcedSources', len(fsrcs))
                db.store(dt, objects, srcs, fsrcs)