Example #1
0
    def test_metric_package_reload(self):
        # Create a Job without Metric definitions
        meas = Measurement('validate_drp.PA1', 15 * u.mmag)
        measurement_set = MeasurementSet([meas])

        job = Job(measurements=measurement_set)
        job.reload_metrics_package('verify_metrics')

        # Should now have metrics and specs
        self.assertTrue(len(job.specs) > 0)
        self.assertTrue(len(job.metrics) > 0)
        self.assertIsInstance(job.measurements['validate_drp.PA1'].metric,
                              Metric)
Example #2
0
def ingest_data(filenames, metrics_package):
    """Load JSON files into a list of lsst.validate.base measurement Jobs.

    Parameters
    ----------
    filenames : list of str
        Filenames of JSON files to load.

    Returns
    -------
    job_list : list of lsst.validate.base.Job
        Each element is the Job representation of the JSON file.
    """
    jobs = {}
    # Read in JSON output from metrics run
    for filename in filenames:
        with open(filename) as fh:
            data = json.load(fh)
            job = Job.deserialize(**data)
        filter_name = job.meta['filter_name']
        metrics = MetricSet.load_metrics_package(metrics_package)
        job.metrics.update(metrics)
        specs = SpecificationSet.load_metrics_package(metrics_package)
        job.specs.update(specs)
        jobs[filter_name] = job

    return jobs
    def run(self):
        jobs = {}
        for metric in self.metrics:
            data_ids = list(self.registry.queryDatasets((f'metricvalue_{metric.package}'
                                                         f'_{metric.metric}'),
                            collections=self.collection))
            for did in data_ids:
                m = self.butler.get(did, collections=self.collection)
                # make the name the same as what SQuaSH Expects
                m.metric_name = metric
                # Grab the physical filter associated with the abstract filter
                # In general there may be more than one.  Take the shortest assuming
                # it is the most generic.
                pfilts = [el.name for el in self.butler.registry.queryDimensionRecords('physical_filter',
                                                                                       dataId=did.dataId)]
                pfilt = min(pfilts, key=len)

                tract = did.dataId['tract']
                afilt = did.dataId['band']
                key = f"{tract}_{afilt}"
                if key not in jobs.keys():
                    job_metadata = {'instrument': did.dataId['instrument'],
                                    'filter': pfilt,
                                    'band': afilt,
                                    'tract': tract,
                                    'butler_generation': 'Gen3',
                                    'ci_dataset': self.dataset_name}
                    # Get dataset_repo_url from repository somehow?
                    jobs[key] = Job(meta=job_metadata, metrics=self.metrics)
                jobs[key].measurements.insert(m)
        return jobs
    def runDataRefs(self, datarefs, customMetadata=None):
        """Call all registered metric tasks on each dataref.

        This method loads all datasets required to compute a particular
        metric, and persists the metrics as one or more `lsst.verify.Job`
        objects. Only metrics that successfully produce a
        `~lsst.verify.Measurement` will be included in a job.

        Parameters
        ----------
        datarefs : `list` of `lsst.daf.persistence.ButlerDataRef`
            The data to measure. Datarefs may be complete or partial; each
            generates a measurement at the same granularity (e.g., a
            dataref with only ``"visit"`` specified generates visit-level
            measurements).
        customMetadata : `dict`, optional
            Any metadata that are needed for a specific pipeline, but that are
            not needed by the ``lsst.verify`` framework or by general-purpose
            measurement analysis code (these cases are handled by the
            `~MetricsControllerConfig.metadataAdder` subtask). If omitted,
            only generic metadata are added. Both keys and values must be valid
            inputs to `~lsst.verify.Metadata`.

        Returns
        -------
        struct : `lsst.pipe.base.Struct`
            A `~lsst.pipe.base.Struct` containing the following component:

            - ``jobs`` : a list of collections of measurements (`list` of
              `lsst.verify.Job`). Each job in the list contains the
              measurement(s) for the corresponding dataref, and each job has
              at most one measurement for each element in `self.measurers`. A
              particular measurement is omitted if it could not be created.

        Notes
        -----
        Some objects may be persisted, or incorrectly persisted, in the event
        of an exception.
        """
        jobs = []
        index = 0
        for dataref in datarefs:
            job = Job.load_metrics_package()
            try:
                self.metadataAdder.run(job, dataref=dataref)
                if customMetadata:
                    job.meta.update(customMetadata)

                for task in self.measurers:
                    self._computeSingleMeasurement(job, task, dataref)
            finally:
                jobFile = self._getJobFilePath(index, dataref.dataId)
                self.log.info("Persisting metrics to %s...", jobFile)
                # This call order maximizes the chance that job gets
                # written, and to a unique file
                index += 1
                job.write(jobFile)
                jobs.append(job)

        return Struct(jobs=jobs)
Example #5
0
    def __init__(self, butler=None, profile_jointcal=False, **kwargs):
        """
        Instantiate a JointcalTask.

        Parameters
        ----------
        butler : `lsst.daf.persistence.Butler`
            The butler is passed to the refObjLoader constructor in case it is
            needed. Ignored if the refObjLoader argument provides a loader directly.
            Used to initialize the astrometry and photometry refObjLoaders.
        profile_jointcal : `bool`
            Set to True to profile different stages of this jointcal run.
        """
        pipeBase.CmdLineTask.__init__(self, **kwargs)
        self.profile_jointcal = profile_jointcal
        self.makeSubtask("sourceSelector")
        if self.config.doAstrometry:
            self.makeSubtask('astrometryRefObjLoader', butler=butler)
            self.makeSubtask("astrometryReferenceSelector")
        else:
            self.astrometryRefObjLoader = None
        if self.config.doPhotometry:
            self.makeSubtask('photometryRefObjLoader', butler=butler)
            self.makeSubtask("photometryReferenceSelector")
        else:
            self.photometryRefObjLoader = None

        # To hold various computed metrics for use by tests
        self.job = Job.load_metrics_package(subset='jointcal')
Example #6
0
def unpersistJob(fileName):
    """Unpersist a Job object from the filename of its serialized form.

    Returns
    -------
    The `lsst.verify.Job` object contained in `fileName`.
    """
    with open(fileName) as handle:
        return Job.deserialize(**json.load(handle))
Example #7
0
    def test_metadataonly(self, mock_stdout):
        """Test that inspect_job can handle files with metadata but no metrics.
        """
        # Job and its components were not designed to support deletion, so
        # create a new Job from scratch to ensure it's a valid object.
        job = Job()
        job.metrics.insert(
            Metric("foo.boringmetric", "", u.percent, tags=["redundant"]))
        job.metrics.insert(
            Metric("foo.fancymetric", "", u.meter, tags=["vital"]))
        job.meta["bar"] = "high"
        job.meta["shape"] = "rotund"
        job.specs.insert(
            ThresholdSpecification("utterly_ridiculous", 1e10 * u.meter, ">"))

        inspect_job(job)
        output = mock_stdout.getvalue()
        for key, value in [("bar", "high"), ("shape", "rotund")]:
            self._check_metadata(key, value, output)
Example #8
0
def main():
    """Present all Job files.
    """
    args = build_argparser().parse_args()
    for filename in args.json_paths:
        if len(args.json_paths) > 1:
            print("\n%s:" % filename)
        with open(filename) as f:
            job = Job.deserialize(**json.load(f))
        inspect_job(job)
Example #9
0
    def test_job_iadd(self):
        job_1 = Job(metrics=self.metric_set,
                    specs=self.spec_set,
                    measurements=self.measurement_set)
        job_2 = Job(metrics=self.metric_set_2,
                    specs=self.spec_set_2,
                    measurements=self.measurement_set_2)

        job_1 += job_2

        self.assertIn(self.metric_photrms.name, job_1.metrics)
        self.assertIn(self.metric_test_2.name, job_1.metrics)
        self.assertIn('test.PhotRms.design', job_1.specs)
        self.assertIn('test2.SourceCount.design', job_1.specs)
        self.assertIn('test.PhotRms', job_1.measurements)
        self.assertIn('test2.SourceCount', job_1.measurements)
        self.assertIn('test.PhotRms', job_1.measurements['test.PhotRms'].blobs)
        self.assertIn('test2_blob',
                      job_1.measurements['test2.SourceCount'].blobs)
Example #10
0
    def run(self):
        """Collate job information.

        Returns
        -------
        jobs : `dict` [`str`, `lsst.verify.Job`]
            A mapping of `~lsst.verify.Job` objects, indexed by a string
            representation of their data ID.
        """
        jobs = {}
        for metric in self.metrics:
            dataset = f'metricvalue_{metric.package}_{metric.metric}'
            datasetRefs = set(
                self.registry.queryDatasets(dataset,
                                            collections=self.collection,
                                            findFirst=True))
            for ref in datasetRefs:
                # getDirect skips dataset resolution; ref is guaranteed to
                # be valid.
                m = self.butler.getDirect(ref)
                # make the name the same as what SQuaSH Expects
                m.metric_name = metric

                # queryDatasets guarantees ref.dataId.hasFull()
                dataId = ref.dataId.full.byName()
                key = make_key(ref)

                # For backward-compatibility with Gen 2 SQuaSH uploads
                pfilt = dataId.get('physical_filter')
                if not pfilt:
                    # Grab the physical filter associated with the abstract
                    # filter. In general there may be more than one. Take the
                    # shortest assuming it is the most generic.
                    pfilts = [
                        el.name for el in self.registry.queryDimensionRecords(
                            'physical_filter', dataId=ref.dataId)
                    ]
                    pfilt = min(pfilts, key=len)

                if key not in jobs.keys():
                    job_metadata = {
                        'filter': pfilt,
                        'butler_generation': 'Gen3',
                        'ci_dataset': self.dataset_name,
                    }
                    job_metadata.update(dataId)
                    # Get dataset_repo_url from repository somehow?
                    jobs[key] = Job(meta=job_metadata, metrics=self.metrics)
                jobs[key].measurements.insert(m)
        return jobs
Example #11
0
def main(filenames):
    """Present all Job files.

    Parameters
    ----------
    filenames : `list` of `str`
        The Job files to open. Must be in JSON format.
    """
    for filename in filenames:
        if len(filenames) > 1:
            print("\n%s:" % filename)
        with open(filename) as f:
            job = Job.deserialize(**json.load(f))
        inspect_job(job)
Example #12
0
 def setUp(self):
     self.job = Job()
     self.job.metrics.insert(
         Metric("foo.boringmetric", "", u.percent, tags=["redundant"]))
     self.job.metrics.insert(
         Metric("foo.fancymetric", "", u.meter, tags=["vital"]))
     self.job.measurements.insert(
         Measurement("foo.fancymetric", 2.0 * u.meter))
     self.job.measurements.insert(
         Measurement("foo.fanciermetric", 3.5 * u.second))
     self.job.measurements["foo.fanciermetric"].notes["fanciness"] \
         = "moderate"
     self.job.measurements.insert(
         Measurement("foo.fanciestmetric", 3.1415927 * u.kilogram))
     self.job.meta["bar"] = "high"
     self.job.meta["shape"] = "rotund"
     self.job.specs.insert(
         ThresholdSpecification("utterly_ridiculous", 1e10 * u.meter, ">"))
Example #13
0
    def test_query_metadata(self):
        job = Job(meta={'filter_name': 'r', 'camera': 'MegaCam'})
        s1 = ThresholdSpecification(Name('validate_drp.AM1.design_r'),
                                    5. * u.marcsec,
                                    '<',
                                    metadata_query={'filter_name': 'r'})
        s2 = ThresholdSpecification(Name('validate_drp.AM1.design_i'),
                                    5. * u.marcsec,
                                    '<',
                                    metadata_query={'filter_name': 'i'})
        s3 = ThresholdSpecification(Name('validate_drp.AM1.design_HSC_r'),
                                    5. * u.marcsec,
                                    '<',
                                    metadata_query={
                                        'filter_name': 'r',
                                        'camera': 'HSC'
                                    })

        self.assertTrue(s1.query_metadata(job.meta))
        self.assertFalse(s2.query_metadata(job.meta))
        self.assertFalse(s3.query_metadata(job.meta))
Example #14
0
    def test_metricsonly(self, mock_stdout):
        """Test that inspect_job can handle files with metrics but no metadata.
        """
        # Job and its components were not designed to support deletion, so
        # create a new Job from scratch to ensure it's a valid object.
        job = Job()
        job.metrics.insert(
            Metric("foo.boringmetric", "", u.percent, tags=["redundant"]))
        job.metrics.insert(
            Metric("foo.fancymetric", "", u.meter, tags=["vital"]))
        job.measurements.insert(Measurement("foo.fancymetric", 2.0 * u.meter))
        job.measurements.insert(
            Measurement("foo.fanciermetric", 3.5 * u.second))
        job.measurements["foo.fanciermetric"].notes["fanciness"] = "moderate"
        job.measurements.insert(
            Measurement("foo.fanciestmetric", 3.1415927 * u.kilogram))

        inspect_job(job)
        output = mock_stdout.getvalue()
        # MeasurementSet.values does not exist
        for _, measurement in job.measurements.items():
            self._check_measurement(measurement, output)
Example #15
0
    def __init__(self, butler=None, profile_jointcal=False, **kwargs):
        """
        Instantiate a JointcalTask.

        Parameters
        ----------
        butler : lsst.daf.persistence.Butler
            The butler is passed to the refObjLoader constructor in case it is
            needed. Ignored if the refObjLoader argument provides a loader directly.
            Used to initialize the astrometry and photometry refObjLoaders.
        profile_jointcal : bool
            set to True to profile different stages of this jointcal run.
        """
        pipeBase.CmdLineTask.__init__(self, **kwargs)
        self.profile_jointcal = profile_jointcal
        self.makeSubtask("sourceSelector")
        if self.config.doAstrometry:
            self.makeSubtask('astrometryRefObjLoader', butler=butler)
        if self.config.doPhotometry:
            self.makeSubtask('photometryRefObjLoader', butler=butler)

        # To hold various computed metrics for use by tests
        self.job = Job.load_metrics_package(subset='jointcal')
Example #16
0
def load_json_output(filepath, metrics_package='verify_metrics'):
    """Read JSON from a file into a job object.

    Currently just does a trivial de-serialization with no checking
    to make sure that one results with a valid validate.base.job object.

    Parameters
    ----------
    filepath : `str`
        Source file name for JSON output.

    Returns
    -------
    job : A `validate.base.job` object.
    """
    with open(filepath, 'r') as infile:
        json_data = json.load(infile)

    job = Job.deserialize(**json_data)
    metrics = MetricSet.load_metrics_package(metrics_package)
    job.metrics.update(metrics)
    specs = SpecificationSet.load_metrics_package(metrics_package)
    job.specs.update(specs)
    return job
 def setUp(self):
     self.testbed = SquashMetadataTask()
     self.job = Job()
def main():
    """Entrypoint for the ``dispatch_verify.py`` command line executable.
    """
    log = lsst.log.Log.getLogger('verify.bin.dispatchverify.main')

    args = parse_args()
    config = Configuration(args)
    log.debug(str(config))

    # Parse all Job JSON
    jobs = []
    for json_path in config.json_paths:
        log.info('Loading {0}'.format(json_path))
        with open(json_path) as fp:
            json_data = json.load(fp)
        job = Job.deserialize(**json_data)
        jobs.append(job)

    # Merge all Jobs into one
    job = jobs.pop(0)
    if len(jobs) > 0:
        log.info('Merging verification Job JSON.')
    for other_job in jobs:
        job += other_job

    # Ensure all measurements have a metric so that units are normalized
    log.info('Refreshing metric definitions from verify_metrics')
    job.reload_metrics_package('verify_metrics')

    # Insert package metadata from lsstsw
    if not config.ignore_lsstsw:
        log.info('Inserting lsstsw package metadata from '
                 '{0}.'.format(config.lsstsw))
        job = insert_lsstsw_metadata(job, config)

    # Insert metadata from additional specified packages
    if config.extra_package_paths is not None:
        job = insert_extra_package_metadata(job, config)

    # Add environment variable metadata from the Jenkins CI environment
    if config.env_name == 'jenkins':
        log.info('Inserting Jenkins CI environment metadata.')
        jenkins_metadata = get_jenkins_env()
        job = insert_env_metadata(job, 'jenkins', jenkins_metadata)

    # Upload job
    if not config.test:
        log.info('Uploading Job JSON to {0}.'.format(config.api_url))
        job.dispatch(api_user=config.api_user,
                     api_password=config.api_password,
                     api_url=config.api_url)

    if config.show_json:
        print(
            json.dumps(job.json,
                       sort_keys=True,
                       indent=4,
                       separators=(',', ': ')))

    # Write a json file
    if config.output_filepath is not None:
        log.info('Writing Job JSON to {0}.'.format(config.output_filepath))
        job.write(config.output_filepath)
Example #19
0
def runOneFilter(repo,
                 visitDataIds,
                 brightSnrMin=None,
                 brightSnrMax=None,
                 makeJson=True,
                 filterName=None,
                 outputPrefix='',
                 doApplyExternalPhotoCalib=False,
                 externalPhotoCalibName=None,
                 doApplyExternalSkyWcs=False,
                 externalSkyWcsName=None,
                 skipTEx=False,
                 verbose=False,
                 metrics_package='verify_metrics',
                 instrument='Unknown',
                 dataset_repo_url='./',
                 skipNonSrd=False,
                 **kwargs):
    r"""Main executable for the case where there is just one filter.

    Plot files and JSON files are generated in the local directory
    prefixed with the repository name (where '_' replace path separators),
    unless overriden by specifying `outputPrefix`.
    E.g., Analyzing a repository ``CFHT/output``
    will result in filenames that start with ``CFHT_output_``.

    Parameters
    ----------
    repo : string or Butler
        A Butler or a repository URL that can be used to construct one.
    dataIds : list of dict
        List of `butler` data IDs of Image catalogs to compare to reference.
        The `calexp` pixel image is needed for the photometric calibration
        unless doApplyExternalPhotoCalib is True such
        that the appropriate `photoCalib` dataset is used. Note that these
        have data IDs that include the tract number.
    brightSnrMin : float, optional
        Minimum median SNR for a source to be considered bright; passed to
        `lsst.validate.drp.matchreduce.build_matched_dataset`.
    brightSnrMax : float, optional
        Maximum median SNR for a source to be considered bright; passed to
        `lsst.validate.drp.matchreduce.build_matched_dataset`.
    makeJson : bool, optional
        Create JSON output file for metrics.  Saved to current working directory.
    outputPrefix : str, optional
        Specify the beginning filename for output files.
    filterName : str, optional
        Name of the filter (bandpass).
    doApplyExternalPhotoCalib : bool, optional
        Apply external photoCalib to calibrate fluxes.
    externalPhotoCalibName : str, optional
        Type of external `PhotoCalib` to apply.  Currently supported are jointcal,
        fgcm, and fgcm_tract.  Must be set if doApplyExternalPhotoCalib is True.
    doApplyExternalSkyWcs : bool, optional
        Apply external wcs to calibrate positions.
    externalSkyWcsName : str, optional
        Type of external `wcs` to apply.  Currently supported is jointcal.
        Must be set if "doApplyExternalSkyWcs" is True.
    skipTEx : bool, optional
        Skip TEx calculations (useful for older catalogs that don't have
        PsfShape measurements).
    verbose : bool, optional
        Output additional information on the analysis steps.
    skipNonSrd : bool, optional
        Skip any metrics not defined in the LSST SRD.

    Raises
    ------
    RuntimeError:
        Raised if "doApplyExternalPhotoCalib" is True and "externalPhotoCalibName"
        is None, or if "doApplyExternalSkyWcs" is True and "externalSkyWcsName" is
        None.
    """

    if kwargs:
        log.warn(
            f"Extra kwargs - {kwargs}, will be ignored. Did you add extra things to your config file?"
        )

    if doApplyExternalPhotoCalib and externalPhotoCalibName is None:
        raise RuntimeError(
            "Must set externalPhotoCalibName if doApplyExternalPhotoCalib is True."
        )
    if doApplyExternalSkyWcs and externalSkyWcsName is None:
        raise RuntimeError(
            "Must set externalSkyWcsName if doApplyExternalSkyWcs is True.")

    # collect just the common key, value pairs to omit the keys that are aggregated over
    job_metadata = dict(
        set.intersection(*[set(vid.items()) for vid in visitDataIds]))

    # update with metadata passed into the method
    job_metadata.update({
        'instrument': instrument,
        'filter_name': filterName,
        'dataset_repo_url': dataset_repo_url
    })

    job = Job.load_metrics_package(meta=job_metadata,
                                   subset='validate_drp',
                                   package_name_or_path=metrics_package)

    matchedDataset = build_matched_dataset(
        repo,
        visitDataIds,
        doApplyExternalPhotoCalib=doApplyExternalPhotoCalib,
        externalPhotoCalibName=externalPhotoCalibName,
        doApplyExternalSkyWcs=doApplyExternalSkyWcs,
        externalSkyWcsName=externalSkyWcsName,
        skipTEx=skipTEx,
        skipNonSrd=skipNonSrd,
        brightSnrMin=brightSnrMin,
        brightSnrMax=brightSnrMax)

    snr = matchedDataset['snr'].quantity
    bright = (matchedDataset['brightSnrMin'].quantity <
              snr) & (snr < matchedDataset['brightSnrMax'].quantity)
    photomModel = build_photometric_error_model(matchedDataset, bright)
    astromModel = build_astrometric_error_model(matchedDataset, bright)

    linkedBlobs = [matchedDataset, photomModel, astromModel]

    metrics = job.metrics
    specs = job.specs

    def add_measurement(measurement):
        for blob in linkedBlobs:
            measurement.link_blob(blob)
        job.measurements.insert(measurement)

    for x, D in zip((1, 2, 3), (5., 20., 200.)):
        amxName = 'AM{0:d}'.format(x)
        afxName = 'AF{0:d}'.format(x)
        adxName = 'AD{0:d}'.format(x)

        amx = measureAMx(metrics['validate_drp.' + amxName],
                         matchedDataset,
                         D * u.arcmin,
                         verbose=verbose)
        add_measurement(amx)

        afx_spec_set = specs.subset(required_meta={'instrument': 'HSC'},
                                    spec_tags=[
                                        afxName,
                                    ])
        adx_spec_set = specs.subset(required_meta={'instrument': 'HSC'},
                                    spec_tags=[
                                        adxName,
                                    ])
        for afx_spec_key, adx_spec_key in zip(afx_spec_set, adx_spec_set):
            afx_spec = afx_spec_set[afx_spec_key]
            adx_spec = adx_spec_set[adx_spec_key]
            adx = measureADx(metrics[adx_spec.metric_name], amx, afx_spec)
            add_measurement(adx)
            afx = measureAFx(metrics[afx_spec.metric_name], amx, adx, adx_spec)
            add_measurement(afx)

    pa1 = measurePA1(metrics['validate_drp.PA1'], filterName,
                     matchedDataset.matchesBright, matchedDataset.magKey)
    add_measurement(pa1)

    pf1_spec_set = specs.subset(required_meta={
        'instrument': instrument,
        'filter_name': filterName
    },
                                spec_tags=[
                                    'PF1',
                                ])
    pa2_spec_set = specs.subset(required_meta={
        'instrument': instrument,
        'filter_name': filterName
    },
                                spec_tags=[
                                    'PA2',
                                ])
    # I worry these might not always be in the right order.  Sorting...
    pf1_spec_keys = list(pf1_spec_set.keys())
    pa2_spec_keys = list(pa2_spec_set.keys())
    pf1_spec_keys.sort()
    pa2_spec_keys.sort()
    for pf1_spec_key, pa2_spec_key in zip(pf1_spec_keys, pa2_spec_keys):
        pf1_spec = pf1_spec_set[pf1_spec_key]
        pa2_spec = pa2_spec_set[pa2_spec_key]

        pa2 = measurePA2(metrics[pa2_spec.metric_name], pa1,
                         pf1_spec.threshold)
        add_measurement(pa2)

        pf1 = measurePF1(metrics[pf1_spec.metric_name], pa1, pa2_spec)
        add_measurement(pf1)

    if not skipTEx:
        for x, D, bin_range_operator in zip((1, 2), (1.0, 5.0), ("<=", ">=")):
            texName = 'TE{0:d}'.format(x)
            tex = measureTEx(metrics['validate_drp.' + texName],
                             matchedDataset,
                             D * u.arcmin,
                             bin_range_operator,
                             verbose=verbose)
            add_measurement(tex)

    if not skipNonSrd:
        model_phot_reps = measure_model_phot_rep(metrics, filterName,
                                                 matchedDataset)
        for measurement in model_phot_reps:
            add_measurement(measurement)

    if makeJson:
        job.write(outputPrefix + '.json')

    return job
Example #20
0
def merge(jobs, lastJob):
    """Combine measurements from multiple chips or visits.

    Other job properties will be dictionary-merged (i.e., if multiple entries
    are assigned to the same key, only one will be preserved).

    Parameters
    ----------
    jobs: iterable of `lsst.verify.Job`
        The jobs containing data to combine.
    lastJob:
        The job corresponding to the final run of ap_verify.

    Return
    ------
    A single `lsst.verify.Job` object containing merged measurements from
    `jobs`.
    """
    merged = Job.load_metrics_package()
    # Visible Job state:
    #     job.measurements
    #     job.meta
    #     job.metrics (guaranteed by load_metrics_package)
    #     job.specs (guaranteed by load_metrics_package)

    measurementsPerMetric = defaultdict(list)
    for job in jobs:
        for metricName in job.measurements:
            measurementsPerMetric[str(metricName)].append(
                job.measurements[metricName])

    for metric in measurementsPerMetric:
        # Running times, object counts
        if metric.endswith("Time") or metric in {
                "ip_diffim.numSciSources", "association.numNewDiaObjects",
                "association.totalUnassociatedDiaObjects"
        }:
            addIfDefined(merged.measurements,
                         sumMeasurements(measurementsPerMetric[metric]))

    # Fractions require special handling
    addIfDefined(
        merged.measurements,
        # Due to time constraints, no metric for total DIAObjects was implemented,
        # so we have to work around its absence
        mergeFractionsPartial(
            measurementsPerMetric["association.fracUpdatedDiaObjects"],
            measurementsPerMetric["association.numUnassociatedDiaObjects"]))
    addIfDefined(
        merged.measurements,
        mergeFractions(
            measurementsPerMetric["ip_diffim.fracDiaSourcesToSciSources"],
            measurementsPerMetric["ip_diffim.numSciSources"]))

    # L1 database metrics are cumulative, not per-CCD, so just copy them over
    for metric in ["association.totalUnassociatedDiaObjects"]:
        if metric in lastJob.measurements:
            addIfDefined(merged.measurements, lastJob.measurements[metric])

    for job in jobs:
        merged.meta.update(job.meta)

    return merged
Example #21
0
    def test_job(self):
        """Create a Job from object sets."""
        job = Job(metrics=self.metric_set,
                  specs=self.spec_set,
                  measurements=self.measurement_set)

        # Test object access via properties
        self.assertIn('test.PhotRms.design', job.specs)
        self.assertIn('test.PhotRms', job.metrics)
        self.assertIn('test.PhotRms', job.measurements)

        # Test metadata access
        self.assertIn('test.PhotRms.note', job.meta)
        self.assertEqual(job.meta['test.PhotRms.note'], 'value')
        # measurement metadata is always prefixed
        self.assertNotIn('note', job.meta)

        job.meta['job-level-key'] = 'yes'
        self.assertEqual(job.meta['job-level-key'], 'yes')
        self.assertIn('job-level-key', job.meta)

        self.assertEqual(len(job.meta), 2)

        job.meta.update({'test.PhotRms.note2': 'foo', 'dataset': 'ci_hsc'})
        # note2 should be in measurement notes
        self.assertEqual(job.measurements['test.PhotRms'].notes['note2'],
                         'foo')
        self.assertEqual(job.meta['dataset'], 'ci_hsc')
        # Delete measurement and job-level metadata
        del job.meta['test.PhotRms.note2']
        self.assertNotIn('test.PhotRms.note2', job.meta)
        self.assertNotIn('note2', job.measurements['test.PhotRms'].notes)
        del job.meta['dataset']
        self.assertNotIn('dataset', job.meta)

        self.assertEqual(set(job.meta.keys()),
                         set(['job-level-key', 'test.PhotRms.note']))
        self.assertEqual(set([key for key in job.meta]),
                         set(['job-level-key', 'test.PhotRms.note']))
        keys = set()
        for key, value in job.meta.items():
            keys.add(key)
        self.assertEqual(keys, set(['job-level-key', 'test.PhotRms.note']))

        # Add a new measurement
        m = Measurement('test.PhotMedian',
                        28.5 * u.mag,
                        notes={'aperture_corr': True})
        job.measurements.insert(m)
        self.assertIn('test.PhotMedian', job.measurements)
        self.assertEqual(job.meta['test.PhotMedian.aperture_corr'], True)

        # Test serialization
        json_doc = job.json

        self.assertIn('measurements', json_doc)
        self.assertEqual(len(json_doc['measurements']), len(job.measurements))

        self.assertIn('blobs', json_doc)

        self.assertIn('metrics', json_doc)
        self.assertEqual(len(json_doc['metrics']), len(job.metrics))

        self.assertIn('specs', json_doc)
        self.assertEqual(len(json_doc['specs']), len(job.specs))

        self.assertIn('meta', json_doc)
        self.assertEqual(len(json_doc['meta']), len(job.meta))

        new_job = Job.deserialize(**json_doc)
        self.assertEqual(job, new_job)

        # check job-to-measurement metadata deserialization
        self.assertEqual(new_job.measurements['test.PhotRms'].notes['note'],
                         'value')
        self.assertEqual(new_job.meta['test.PhotRms.note'], 'value')
        self.assertEqual(new_job.meta['job-level-key'], 'yes')
Example #22
0
def runOneFilter(repo,
                 visitDataIds,
                 metrics,
                 brightSnr=100,
                 makeJson=True,
                 filterName=None,
                 outputPrefix='',
                 useJointCal=False,
                 skipTEx=False,
                 verbose=False,
                 metrics_package='verify_metrics',
                 **kwargs):
    """Main executable for the case where there is just one filter.

    Plot files and JSON files are generated in the local directory
    prefixed with the repository name (where '_' replace path separators),
    unless overriden by specifying `outputPrefix`.
    E.g., Analyzing a repository ``CFHT/output``
    will result in filenames that start with ``CFHT_output_``.

    Parameters
    ----------
    repo : string or Butler
        A Butler or a repository URL that can be used to construct one.
    dataIds : list of dict
        List of `butler` data IDs of Image catalogs to compare to reference.
        The `calexp` pixel image is needed for the photometric calibration
        unless useJointCal is True, in which the `photoCalib` and `wcs`
        datasets are used instead.  Note that these have data IDs that include
        the tract number.
    metrics : `dict` or `collections.OrderedDict`
        Dictionary of `lsst.validate.base.Metric` instances. Typically this is
        data from ``validate_drp``\ 's ``metrics.yaml`` and loaded with
        `lsst.validate.base.load_metrics`.
    brightSnr : float, optional
        Minimum SNR for a star to be considered bright
    makeJson : bool, optional
        Create JSON output file for metrics.  Saved to current working directory.
    outputPrefix : str, optional
        Specify the beginning filename for output files.
    filterName : str, optional
        Name of the filter (bandpass).
    useJointCal : bool, optional
        Use jointcal/meas_mosaic outputs to calibrate positions and fluxes.
    skipTEx : bool, optional
        Skip TEx calculations (useful for older catalogs that don't have
        PsfShape measurements).
    verbose : bool, optional
        Output additional information on the analysis steps.
    """
    matchedDataset = build_matched_dataset(repo,
                                           visitDataIds,
                                           useJointCal=useJointCal,
                                           skipTEx=skipTEx)

    photomModel = build_photometric_error_model(matchedDataset)

    astromModel = build_astrometric_error_model(matchedDataset)

    linkedBlobs = [matchedDataset, photomModel, astromModel]

    try:
        instrument = kwargs['instrument']
        dataset_repo_url = kwargs['dataset_repo_url']
    except KeyError:
        raise ValueError(
            "Instrument name and input dataset URL must be set in config file")
    job = Job.load_metrics_package(meta={
        'instrument': instrument,
        'filter_name': filterName,
        'dataset_repo_url': dataset_repo_url
    },
                                   subset='validate_drp',
                                   package_name_or_path=metrics_package)
    metrics = job.metrics

    specs = job.specs

    def add_measurement(measurement):
        for blob in linkedBlobs:
            measurement.link_blob(blob)
        job.measurements.insert(measurement)

    for x, D in zip((1, 2, 3), (5., 20., 200.)):
        amxName = 'AM{0:d}'.format(x)
        afxName = 'AF{0:d}'.format(x)
        adxName = 'AD{0:d}'.format(x)

        amx = measureAMx(metrics['validate_drp.' + amxName], matchedDataset,
                         D * u.arcmin)
        add_measurement(amx)

        afx_spec_set = specs.subset(required_meta={'instrument': 'HSC'},
                                    spec_tags=[
                                        afxName,
                                    ])
        adx_spec_set = specs.subset(required_meta={'instrument': 'HSC'},
                                    spec_tags=[
                                        adxName,
                                    ])
        for afx_spec_key, adx_spec_key in zip(afx_spec_set, adx_spec_set):
            afx_spec = afx_spec_set[afx_spec_key]
            adx_spec = adx_spec_set[adx_spec_key]
            adx = measureADx(metrics[adx_spec.metric_name], amx, afx_spec)
            add_measurement(adx)
            afx = measureAFx(metrics[afx_spec.metric_name], amx, adx, adx_spec)
            add_measurement(afx)

    pa1 = measurePA1(metrics['validate_drp.PA1'], matchedDataset, filterName)
    add_measurement(pa1)

    pf1_spec_set = specs.subset(required_meta={
        'instrument': instrument,
        'filter_name': filterName
    },
                                spec_tags=[
                                    'PF1',
                                ])
    pa2_spec_set = specs.subset(required_meta={
        'instrument': instrument,
        'filter_name': filterName
    },
                                spec_tags=[
                                    'PA2',
                                ])
    # I worry these might not always be in the right order.  Sorting...
    pf1_spec_keys = list(pf1_spec_set.keys())
    pa2_spec_keys = list(pa2_spec_set.keys())
    pf1_spec_keys.sort()
    pa2_spec_keys.sort()
    for pf1_spec_key, pa2_spec_key in zip(pf1_spec_keys, pa2_spec_keys):
        pf1_spec = pf1_spec_set[pf1_spec_key]
        pa2_spec = pa2_spec_set[pa2_spec_key]

        pa2 = measurePA2(metrics[pa2_spec.metric_name], pa1,
                         pf1_spec.threshold)
        add_measurement(pa2)

        pf1 = measurePF1(metrics[pf1_spec.metric_name], pa1, pa2_spec)
        add_measurement(pf1)

    if not skipTEx:
        for x, D, bin_range_operator in zip((1, 2), (1.0, 5.0), ("<=", ">=")):
            texName = 'TE{0:d}'.format(x)
            tex = measureTEx(metrics['validate_drp.' + texName],
                             matchedDataset, D * u.arcmin, bin_range_operator)
            add_measurement(tex)

    if makeJson:
        job.write(outputPrefix + '.json')

    return job
Example #23
0
def main():
    """Entrypoint for the ``dispatch_verify.py`` command line executable.
    """
    logging.basicConfig(level=logging.INFO,
                        stream=sys.stdout,
                        format="{name} {levelname}: {message}",
                        style="{")
    log = _LOG.getChild('main')

    parser = build_argparser()
    args = parser.parse_args()
    config = Configuration(args)
    log.debug(str(config))

    # Parse all Job JSON
    jobs = []
    for json_path in config.json_paths:
        log.info('Loading {0}'.format(json_path))
        with open(json_path) as fp:
            json_data = json.load(fp)
            # Ignore blobs from the verification jobs
            if config.ignore_blobs:
                log.info('Ignoring blobs from Job JSON {0}'.format(json_path))
                json_data = delete_blobs(json_data)
        job = Job.deserialize(**json_data)
        jobs.append(job)

    # Merge all Jobs into one
    job = jobs.pop(0)
    if len(jobs) > 0:
        log.info('Merging verification Job JSON.')
    for other_job in jobs:
        job += other_job

    # Ensure all measurements have a metric so that units are normalized
    log.info('Refreshing metric definitions from verify_metrics')
    job.reload_metrics_package('verify_metrics')

    # Insert package metadata from lsstsw
    if not config.ignore_lsstsw:
        log.info('Inserting lsstsw package metadata from '
                 '{0}.'.format(config.lsstsw))
        job = insert_lsstsw_metadata(job, config)

    # Insert metadata from additional specified packages
    if config.extra_package_paths is not None:
        job = insert_extra_package_metadata(job, config)

    # Add environment variable metadata from the Jenkins CI environment
    if config.env_name == 'jenkins':
        log.info('Inserting Jenkins CI environment metadata.')
        jenkins_metadata = get_jenkins_env()
        job = insert_env_metadata(job, 'jenkins', jenkins_metadata,
                                  config.date_created)
    elif config.env_name == 'ldf':
        log.info('Inserting LSST Data Facility environment metadata.')
        ldf_metadata = get_ldf_env()
        job = insert_env_metadata(job, 'ldf', ldf_metadata,
                                  config.date_created)

    # Upload job
    if not config.test:
        log.info('Uploading Job JSON to {0}.'.format(config.api_url))
        response = job.dispatch(api_user=config.api_user,
                                api_password=config.api_password,
                                api_url=config.api_url)
        log.info(response.json()['message'])

    if config.show_json:
        print(
            json.dumps(job.json,
                       sort_keys=True,
                       indent=4,
                       separators=(',', ': ')))

    # Write a json file
    if config.output_filepath is not None:
        log.info('Writing Job JSON to {0}.'.format(config.output_filepath))
        job.write(config.output_filepath)
Example #24
0
 def test_empty(self, mock_stdout):
     """Test that inspect_job can handle files with neither metrics nor
     metadata.
     """
     inspect_job(Job())