예제 #1
0
 def test_getAllFilesRegex(self):
     """Test getting all the files with a regex-specified file ending."""
     expected = Counter(
         glob(os.path.join(TESTDIR, "config", "**", "*.yaml"),
              recursive=True))
     self.assertNotEqual(len(expected), 0)  # verify some files were found
     files = Counter(
         findFileResources([os.path.join(TESTDIR, "config")], r"\.yaml\b"))
     self.assertEqual(expected, files)
예제 #2
0
 def test_getAllFiles(self):
     """Test getting all the files by not passing a regex."""
     expected = Counter([
         p for p in glob(os.path.join(TESTDIR, "config", "**"),
                         recursive=True) if os.path.isfile(p)
     ])
     self.assertNotEqual(len(expected), 0)  # verify some files were found
     files = Counter(findFileResources([os.path.join(TESTDIR, "config")]))
     self.assertEqual(expected, files)
예제 #3
0
def ingestRaws(repo,
               locations,
               regex,
               output_run,
               config=None,
               config_file=None,
               transfer="auto",
               processes=1,
               ingest_task="lsst.obs.base.RawIngestTask"):
    """Ingests raw frames into the butler registry

    Parameters
    ----------
    repo : `str`
        URI to the repository.
    locations : `list` [`str`]
        Files to ingest and directories to search for files that match
        ``regex`` to ingest.
    regex : `str`
        Regex string used to find files in directories listed in locations.
    output_run : `str`
        The path to the location, the run, where datasets should be put.
    config : `dict` [`str`, `str`] or `None`
        Key-value pairs to apply as overrides to the ingest config.
    config_file : `str` or `None`
        Path to a config file that contains overrides to the ingest config.
    transfer : `str` or None
        The external data transfer type, by default "auto".
    processess : `int`
        Number of processes to use for ingest.
    ingest_task : `str`
        The fully qualified class name of the ingest task to use by default
        lsst.obs.base.RawIngestTask.

    Raises
    ------
    Exception
        Raised if operations on configuration object fail.
    """
    butler = Butler(repo, writeable=True)
    TaskClass = doImport(ingest_task)
    ingestConfig = TaskClass.ConfigClass()
    ingestConfig.transfer = transfer
    configOverrides = ConfigOverrides()
    if config_file is not None:
        configOverrides.addFileOverride(config_file)
    if config is not None:
        for name, value in config.items():
            configOverrides.addValueOverride(name, value)
    configOverrides.applyTo(ingestConfig)
    ingester = TaskClass(config=ingestConfig, butler=butler)
    files = findFileResources(locations, regex)
    ingester.run(files, run=output_run, processes=processes)
예제 #4
0
 def test_multipleInputs(self):
     """Test specifying more than one location to find a files."""
     expected = Counter(
         glob(os.path.join(TESTDIR, "config", "basic", "**", "*.yaml"),
              recursive=True))
     expected.update(
         glob(os.path.join(TESTDIR, "config", "templates", "**", "*.yaml"),
              recursive=True))
     self.assertNotEqual(len(expected), 0)  # verify some files were found
     files = Counter(
         findFileResources([
             os.path.join(TESTDIR, "config", "basic"),
             os.path.join(TESTDIR, "config", "templates")
         ], r"\.yaml\b"))
     self.assertEqual(expected, files)
예제 #5
0
 def test_getSingleFile(self):
     """Test getting a file by its file name."""
     filename = os.path.join(TESTDIR, "config/basic/butler.yaml")
     self.assertEqual([filename], findFileResources([filename]))
예제 #6
0
def ingestSimulated(repo,
                    locations,
                    regex,
                    output_run,
                    transfer="auto",
                    ingest_type="rawexp"):
    """Ingests raw frames into the butler registry

    Parameters
    ----------
    repo : `str`
        URI to the repository.
    locations : `list` [`str`]
        Files to ingest and directories to search for files that match
        ``regex`` to ingest.
    regex : `str`
        Regex string used to find files in directories listed in locations.
    output_run : `str`
        The path to the location, the run, where datasets should be put.
    transfer : `str` or None
        The external data transfer type, by default "auto".
    ingest_type : `str`
        ingest product data type.

    Raises
    ------
    Exception
        Raised if operations on configuration object fail.

    Notes
    -----
    This method inserts all datasets for an exposure within a transaction,
    guaranteeing that partial exposures are never ingested.  The exposure
    dimension record is inserted with `Registry.syncDimensionData` first
    (in its own transaction), which inserts only if a record with the same
    primary key does not already exist.  This allows different files within
    the same exposure to be incremented in different runs.
    """

    butler = Butler(repo, writeable=True)

    # make sure instrument and detector dimensions are populated
    with butler.registry.transaction():
        instrument_record = {
            "name": "simulator",
            "exposure_max": 600000,
            "detector_max": 6,
            "class_name": "spherex.instrument.SimulatorInstrument"
        }
        butler.registry.syncDimensionData("instrument", instrument_record)
        for idx in range(1, 7):
            detector_record = {
                "instrument": "simulator",
                "id": idx,
                "full_name": f"array{idx}"
            }
            butler.registry.syncDimensionData("detector", detector_record)

    dimension_universe = butler.registry.dimensions
    datasetType = DatasetType(ingest_type,
                              dimension_universe.extract(
                                  ("instrument", "detector", "exposure")),
                              "SPHERExImage",
                              universe=dimension_universe)
    # idempotent dataset type registration
    butler.registry.registerDatasetType(datasetType)

    # idempotent collection registration
    run = f"{ingest_type}r" if (output_run is None) else output_run
    butler.registry.registerCollection(run, type=CollectionType.RUN)

    n_failed = 0
    files = findFileResources(locations, regex)

    # example: sim_exposure_000000_array_1.fits or
    #   sim_exposure_000000_array_2_dark_current.fits
    pattern = re.compile(r"sim_exposure_(\d+)_array_(\d)[_,.]")

    # do we want to group observations?
    grp = datetime.date.today().strftime("%Y%m%d")

    datasets = []
    for file in files:
        # parse exposure and detector ids from file name
        m = pattern.search(file)
        if m is None:
            n_failed += 1
            logging.error(f"{file} does not match simulator file pattern")
            continue
        else:
            g = m.groups()
            if len(g) != 2:
                n_failed += 1
                logging.error(
                    f"Unable to get exposure and detector from file name: {file}"
                )
                continue
            else:
                [exposure_id, detector_id] = list(map(int, g))

        try:
            exposure_record = {
                "instrument": "simulator",
                "id": exposure_id,
                "name": f"{exposure_id:06d}",
                "group_name": f"{grp}",
                "timespan": Timespan(begin=None, end=None)
            }
            # idempotent insertion of individual dimension rows
            butler.registry.syncDimensionData("exposure", exposure_record)
        except Exception as e:
            n_failed += 1
            logging.error(
                f"Unable to insert exposure record for file {file}: {e}")
            continue

        dataId = DataCoordinate.standardize(
            instrument="simulator",
            detector=detector_id,
            exposure=exposure_id,
            universe=butler.registry.dimensions)
        ref = DatasetRef(datasetType, dataId=dataId)
        datasets.append(
            FileDataset(refs=ref, path=file, formatter=AstropyImageFormatter))

    with butler.transaction():
        butler.ingest(*datasets, transfer=transfer, run=run)