Ejemplo n.º 1
0
def main():
    """Entry point for the check_model script.

    Returns
    -------
    :class:`int`
        An integer suitable for passing to :func:`sys.exit`.
    """
    from sys import argv
    from argparse import ArgumentParser
    desc = """Check actual files against the data model for validity.
"""
    parser = ArgumentParser(description=desc, prog=os.path.basename(argv[0]))
    parser.add_argument('-d', '--datamodel-dir', dest='desidatamodel',
                        metavar='DIR',
                        help='Override the value of DESIDATAMODEL.')
    parser.add_argument('-F', '--compare-files', dest='files',
                        action='store_true',
                        help='Compare an individual data model to an individual file.')
    parser.add_argument('-W', '--warning-is-error', dest='error',
                        action='store_true',
                        help='Data model warnings raise exceptions.')
    parser.add_argument('-v', '--verbose', dest='verbose', action='store_true',
                        help='Set log level to DEBUG.')
    parser.add_argument('section', metavar='DIR or FILE',
                        help='Section of the data model or individual model file.')
    parser.add_argument('directory', metavar='DIR or FILE',
                        help='Check files in this top-level directory, or one individual file.')
    options = parser.parse_args()
    if options.verbose:
        log.setLevel(DEBUG)
    if 'DESIDATAMODEL' in os.environ:
        data_model_root = os.environ['DESIDATAMODEL']
    else:
        if options.desidatamodel is not None:
            data_model_root = options.desidatamodel
        else:
            log.critical(("DESIDATAMODEL is not defined. " +
                          "Cannot find data model files!"))
            return 1
    log.debug("DESIDATAMODEL=%s", data_model_root)
    if options.files:
        filename = os.path.join(data_model_root, 'doc', options.section)
        section = os.path.join(data_model_root, 'doc', options.section.split('/')[0])
        log.info("Loading individual data model: %s.", filename)
        files = [DataModel(filename, section)]
        log.info("Skipping regular expression processing.")
        # files[0].get_regexp(options.directory, error=options.error)
        log.info("Setting prototype file for %s to %s.", filename, options.directory)
        files[0].prototype = options.directory
    else:
        section = os.path.join(data_model_root, 'doc', options.section)
        log.info("Loading data model file in %s.", section)
        files = scan_model(section)
        log.info("Searching for data files in %s.", options.directory)
        files_to_regexp(options.directory, files, error=options.error)
        log.info("Identifying prototype files in %s.", options.directory)
        collect_files(options.directory, files)
    validate_prototypes(files, error=options.error)
    return 0
Ejemplo n.º 2
0
    def columns(self, hdu, error=False):
        """Describe the columns of a BINTABLE HDU.

        Parameters
        ----------
        hdu : :class:`int`
            The HDU number (zero-indexed).
        error : :class:`bool`, optional
            If ``True``, failure to extract certain required metadata raises an
            exception.

        Returns
        -------
        :class:`list`
            The rows of the table.

        Raises
        ------
        :exc:`~desidatamodel.DataModelError`
            If the BINTABLE is actually a compressed image.
        :exc:`ValueError`
            If `error` and a ``TUNIT`` value does not have FITS-standard
            units.
        """
        hdr = self.headers[hdu]
        if 'ZBITPIX' in hdr:
            raise DataModelError(
                "HDU{0:d} is actually a compressed image!".format(hdu))
        ncol = hdr['TFIELDS']
        c = list()
        c.append(self.columns_header)
        for j in range(ncol):
            jj = '{0:d}'.format(j + 1)
            name = hdr['TTYPE' + jj].strip()
            ttype = fits_column_format(hdr['TFORM' + jj].strip())
            tunit = 'TUNIT' + jj
            if tunit in hdr:
                units = hdr[tunit].strip()
                bad_unit = self.check_unit(units, error=error)
                if bad_unit:
                    log.debug(
                        "Non-standard (but acceptable) unit %s detected for column %s in HDU %d of %s.",
                        bad_unit, j, hdu, self.filename)
            else:
                units = ''
            # Check TCOMMnn keyword, otherwise use TTYPE comment
            # for description.
            commkey = 'TCOMM' + jj
            if commkey in hdr:
                description = escape(hdr[commkey].strip())
            else:
                description = escape(hdr.comments['TTYPE' + jj])
            c.append((name, ttype, units, description))
        return c
Ejemplo n.º 3
0
    def columns(self, hdu, error=False):
        """Describe the columns of a BINTABLE HDU.

        Parameters
        ----------
        hdu : :class:`int`
            The HDU number (zero-indexed).
        error : :class:`bool`, optional
            If ``True``, failure to extract certain required metadata raises an
            exception.

        Returns
        -------
        :class:`list`
            The rows of the table.

        Raises
        ------
        :exc:`~desidatamodel.DataModelError`
            If the BINTABLE is actually a compressed image.
        :exc:`ValueError`
            If `error` and a ``TUNIT`` value does not have FITS-standard
            units.
        """
        hdr = self.headers[hdu]
        if 'ZBITPIX' in hdr:
            raise DataModelError("HDU{0:d} is actually a compressed image!".format(hdu))
        ncol = hdr['TFIELDS']
        c = list()
        c.append(self.columns_header)
        for j in range(ncol):
            jj = '{0:d}'.format(j+1)
            name = hdr['TTYPE'+jj].strip()
            ttype = fits_column_format(hdr['TFORM'+jj].strip())
            tunit = 'TUNIT'+jj
            if tunit in hdr:
                units = hdr[tunit].strip()
                bad_unit = self.check_unit(units, error=error)
                if bad_unit:
                    log.debug("Non-standard (but acceptable) unit %s detected for column %s in HDU %d of %s.",
                              bad_unit, j, hdu, self.filename)
            else:
                units = ''
            # Check TCOMMnn keyword, otherwise use TTYPE comment
            # for description.
            commkey = 'TCOMM'+jj
            if commkey in hdr:
                description = escape(hdr[commkey].strip())
            else:
                description = escape(hdr.comments['TTYPE'+jj])
            c.append((name, ttype, units, description))
        return c
Ejemplo n.º 4
0
    def image_format(self, hdr):
        """Obtain format of an image HDU.

        Parameters
        ----------
        hdr : :class:`~astropy.io.fits.Header`
            The header to parse.

        Returns
        -------
        :class:`str`
            A string describing the image format.

        Raises
        ------
        :exc:`~desidatamodel.DataModelError`
            If ``self.error`` is set a `BUNIT` header with units that do not
            follow the FITS standard is detected.
        """
        n = hdr['NAXIS']
        if n == 0:
            return 'Empty HDU.'
        bitmap = {
            8: 'char',
            16: 'int16',
            32: 'int32',
            64: 'int64',
            -32: 'float32',
            -64: 'float64'
        }
        if 'ZBITPIX' in hdr:
            n = hdr['ZNAXIS']
            dims = [str(hdr['ZNAXIS{0:d}'.format(k + 1)]) for k in range(n)]
            try:
                datatype = bitmap[hdr['ZBITPIX']] + ' (compressed)'
            except KeyError:
                datatype = 'BITPIX={0} (compressed)'.format(hdr['ZBITPIX'])
        else:
            dims = [str(hdr['NAXIS{0:d}'.format(k + 1)]) for k in range(n)]
            try:
                datatype = bitmap[hdr['BITPIX']]
            except KeyError:
                datatype = 'BITPIX={}'.format(hdr['BITPIX'])
        if 'BUNIT' in hdr:
            log.debug("BUNIT   = '%s'", hdr['BUNIT'])
            bad_unit = self.check_unit(hdr['BUNIT'], error=self.error)
            if bad_unit:
                log.debug(
                    "Non-standard (but acceptable) unit %s detected in %s.",
                    bad_unit, self.filename)
        return 'Data: FITS image [{0}, {1}]'.format(datatype, 'x'.join(dims))
Ejemplo n.º 5
0
    def image_format(self, hdr):
        """Obtain format of an image HDU.

        Parameters
        ----------
        hdr : :class:`~astropy.io.fits.Header`
            The header to parse.

        Returns
        -------
        :class:`str`
            A string describing the image format.

        Raises
        ------
        :exc:`~desidatamodel.DataModelError`
            If ``self.error`` is set a `BUNIT` header with units that do not
            follow the FITS standard is detected.
        """
        n = hdr['NAXIS']
        if n == 0:
            return 'Empty HDU.'
        bitmap = {8: 'char', 16: 'int16', 32: 'int32', 64: 'int64',
                  -32: 'float32', -64: 'float64'}
        if 'ZBITPIX' in hdr:
            n = hdr['ZNAXIS']
            dims = [str(hdr['ZNAXIS{0:d}'.format(k+1)]) for k in range(n)]
            try:
                datatype = bitmap[hdr['ZBITPIX']] + ' (compressed)'
            except KeyError:
                datatype = 'BITPIX={0} (compressed)'.format(hdr['ZBITPIX'])
        else:
            dims = [str(hdr['NAXIS{0:d}'.format(k+1)]) for k in range(n)]
            try:
                datatype = bitmap[hdr['BITPIX']]
            except KeyError:
                datatype = 'BITPIX={}'.format(hdr['BITPIX'])
        if 'BUNIT' in hdr:
            log.debug("BUNIT   = '%s'", hdr['BUNIT'])
            bad_unit = self.check_unit(hdr['BUNIT'], error=self.error)
            if bad_unit:
                log.debug("Non-standard (but acceptable) unit %s detected in %s.",
                          bad_unit, self.filename)
        return 'Data: FITS image [{0}, {1}]'.format(datatype, 'x'.join(dims))
Ejemplo n.º 6
0
    def get_regexp(self, root, error=False):
        """Obtain the regular expression used to match files on disk.

        Parameters
        ----------
        root : :class:`str`
            Path to real files on disk.
        error : :class:`bool`, optional
            If ``True``, failure to find a regular expression raises an
            exception instead of just a warning.

        Returns
        -------
        regular expression
            The regular expression found, or ``None`` if not found.
            The regular expression is also stored internally.

        Raises
        ------
        :exc:`~desimodel.DataModelError`
            If `error` is set and problems with the data model file are
            detected.
        """
        with open(self.filename) as dm:
            for line in dm.readlines():
                if line.startswith('See :doc:'):
                    self.ref = self._cross_reference(line)
                    log.debug("Cross reference detected %s -> %s.",
                              self.filename, self.ref)
                    break
                if self._regexpline.match(line) is not None:
                    d = os.path.dirname(self.filename).replace(
                        self.section, root)
                    for k in self._d2r:
                        d = d.replace(k, self._d2r[k])
                    r = line.strip().split()[1].replace('``', '')
                    self.regexp = re.compile(os.path.join(d, r))
                    break
        if self.regexp is None and self.ref is not None:
            with open(self.ref) as dm:
                for line in dm.readlines():
                    #
                    # Hopefully cross-references are not nested.
                    #
                    # if line.startswith('See :doc:'):
                    #     self.ref = self._cross_reference(line)
                    #     break
                    if self._regexpline.match(line) is not None:
                        d = os.path.dirname(self.filename).replace(
                            self.section, root)
                        for k in self._d2r:
                            d = d.replace(k, self._d2r[k])
                        r = line.strip().split()[1].replace('``', '')
                        self.regexp = re.compile(os.path.join(d, r))
                        break
        if self.regexp is None:
            m = "%s has no file regexp!"
            if error:
                log.critical(m, self.filename)
                raise DataModelError(m % self.filename)
            else:
                log.warning(m, self.filename)
        return self.regexp
Ejemplo n.º 7
0
def main():
    """Entry point for the check_model script.

    Returns
    -------
    :class:`int`
        An integer suitable for passing to :func:`sys.exit`.
    """
    from sys import argv
    from argparse import ArgumentParser
    desc = """Check actual files against the data model for validity.
"""
    parser = ArgumentParser(description=desc, prog=os.path.basename(argv[0]))
    parser.add_argument('-d',
                        '--datamodel-dir',
                        dest='desidatamodel',
                        metavar='DIR',
                        help='Override the value of DESIDATAMODEL.')
    parser.add_argument(
        '-F',
        '--compare-files',
        dest='files',
        action='store_true',
        help='Compare an individual data model to an individual file.')
    parser.add_argument('-W',
                        '--warning-is-error',
                        dest='error',
                        action='store_true',
                        help='Data model warnings raise exceptions.')
    parser.add_argument('-v',
                        '--verbose',
                        dest='verbose',
                        action='store_true',
                        help='Set log level to DEBUG.')
    parser.add_argument(
        'section',
        metavar='DIR or FILE',
        help='Section of the data model or individual model file.')
    parser.add_argument(
        'directory',
        metavar='DIR or FILE',
        help='Check files in this top-level directory, or one individual file.'
    )
    options = parser.parse_args()
    if options.verbose:
        log.setLevel(DEBUG)
    if 'DESIDATAMODEL' in os.environ:
        data_model_root = os.environ['DESIDATAMODEL']
    else:
        if options.desidatamodel is not None:
            data_model_root = options.desidatamodel
        else:
            log.critical(("DESIDATAMODEL is not defined. " +
                          "Cannot find data model files!"))
            return 1
    log.debug("DESIDATAMODEL=%s", data_model_root)
    if options.files:
        filename = os.path.join(data_model_root, 'doc', options.section)
        section = os.path.join(data_model_root, 'doc',
                               options.section.split('/')[0])
        log.info("Loading individual data model: %s.", filename)
        files = [DataModel(filename, section)]
        log.info("Skipping regular expression processing.")
        # files[0].get_regexp(options.directory, error=options.error)
        log.info("Setting prototype file for %s to %s.", filename,
                 options.directory)
        files[0].prototype = options.directory
    else:
        section = os.path.join(data_model_root, 'doc', options.section)
        log.info("Loading data model file in %s.", section)
        files = scan_model(section)
        log.info("Searching for data files in %s.", options.directory)
        files_to_regexp(options.directory, files, error=options.error)
        log.info("Identifying prototype files in %s.", options.directory)
        collect_files(options.directory, files)
    validate_prototypes(files, error=options.error)
    return 0
Ejemplo n.º 8
0
    def extract_metadata(self, error=False):
        """Extract metadata from a data model file.

        Parameters
        ----------
        error : :class:`bool`, optional
            If ``True``, failure to extract certain required metadata raises an
            exception.

        Returns
        -------
        :class:`list`
            Metadata in a form similar to :class:`~desidatamodel.stub.Stub`
            metadata.

        Raises
        ------
        :exc:`~desidatamodel.DataModelError`
            If `error` is set and the HDU has no `EXTNAME` keyword.
        """
        metafile = self.filename
        if self.ref is not None:
            metafile = self.ref
        if self._metafile_data is None:
            with open(metafile) as f:
                self._metafile_data = f.read()
        lines = self._metafile_data.split('\n')
        hdu_sections = [
            i for i, l in enumerate(lines)
            if (self._hduline.match(l) is not None
                or self._hduspan.match(l) is not None)
        ]
        self.hdumeta = list()
        for k in range(len(hdu_sections)):
            try:
                section = lines[hdu_sections[k]:hdu_sections[k + 1]]
            except IndexError:
                section = lines[hdu_sections[k]:]
            m = self._hduspan.match(section[0])
            if m is not None:
                #
                # Detected HDU span.
                #
                g = m.groups()
                spanstart = int(g[0])
                spanend = int(g[1])
                log.debug('Detected range specification from HDU %d to HDU %d',
                          spanstart, spanend)
                spanref = [l for l in section if l.startswith('Data:')][0]
                spanext = spanref[spanref.lower().index('see') + 4:].replace(
                    '.', '')
                spanmeta = [
                    m for m in self.hdumeta if m['extname'] == spanext
                ][0]
                spanname = [
                    l.split('=')[1].strip() for l in section
                    if l.startswith('EXTNAME = ')
                ][0]
                extnames = [p.strip() for p in spanname.split(',')]
                if len(range(spanstart, spanend + 1)) == len(extnames):
                    for i, l in enumerate(range(spanstart, spanend + 1)):
                        meta = dict()
                        meta['title'] = 'HDU{0:d}'.format(l)
                        meta['extname'] = extnames[i]
                        meta['extension'] = spanmeta['extension']
                        meta['format'] = spanmeta['format']
                        meta['keywords'] = spanmeta['keywords']
                        self.hdumeta.append(meta)
                else:
                    log.warning(
                        ('Range specification from HDU %d to HDU %d ' +
                         'does not have a matching EXTNAME specification'),
                        spanstart, spanend)
                continue
            meta = dict()
            meta['title'] = section[0]
            if 'Empty HDU.' in section:
                meta['extension'] = 'IMAGE'
                meta['format'] = 'Empty HDU.'
            image_data = [l for l in section if l.startswith('Data:')]
            if image_data:
                meta['extension'] = 'IMAGE'
                meta['format'] = image_data[0]
            try:
                rdtc = section.index('Required Data Table Columns')
            except ValueError:
                rdtc = None
            if rdtc is not None:
                meta['extension'] = 'BINTABLE'
                table = [
                    i for i, l in enumerate(section[rdtc:])
                    if self._tableboundary.match(l) is not None
                ][1:3]
                columns = list(map(len, section[rdtc:][table[0]].split()))
                table_lines = section[rdtc:][table[0] + 1:table[1]]
                meta['format'] = [
                    self._extract_columns(t, columns) for t in table_lines
                ]
                for mk in meta['format']:
                    if not mk[1]:
                        m = "Missing type for column %s in HDU %d of %s!"
                        if error:
                            log.critical(m, mk[0], k, metafile)
                            raise DataModelError(m % (mk[0], k, metafile))
                        else:
                            log.warning(m, mk[0], k, metafile)
                    if mk[2]:
                        bad_unit = self.check_unit(mk[2], error=error)
                        if bad_unit:
                            log.debug(
                                "Non-standard (but acceptable) unit %s detected for column %s in HDU %d of %s.",
                                bad_unit, mk[0], k, metafile)
            try:
                rhk = section.index('Required Header Keywords')
            except ValueError:
                meta['keywords'] = []
            else:
                table = [
                    i for i, l in enumerate(section[rhk:])
                    if self._tableboundary.match(l) is not None
                ][1:3]
                columns = list(map(len, section[rhk:][table[0]].split()))
                table_lines = section[rhk:][table[0] + 1:table[1]]
                meta['keywords'] = [
                    self._extract_columns(t, columns) for t in table_lines
                ]
                for mk in meta['keywords']:
                    if not mk[2]:
                        m = "Missing type for keyword %s in HDU %d of %s!"
                        if error:
                            log.critical(m, mk[0], k, metafile)
                            raise DataModelError(m % (mk[0], k, metafile))
                        else:
                            log.warning(m, mk[0], k, metafile)
                    if mk[0] == 'BUNIT':
                        bad_unit = self.check_unit(mk[1], error=error)
                        if bad_unit:
                            log.debug(
                                "Non-standard (but acceptable) unit %s detected for column %s in HDU %d of %s.",
                                bad_unit, mk[0], k, metafile)
            #
            # Need to know the format by this point!
            #
            try:
                foo = meta['format']
            except KeyError:
                m = "Unable to determine format for HDU %d in %s!"
                log.critical(m, k, metafile)
                raise DataModelError(m % (k, metafile))
            #
            # See https://github.com/desihub/desidatamodel/issues/69 for
            # the detailed policy on EXTNAME.
            #
            try:
                meta['extname'] = [
                    l.split()[2] for l in section if l.startswith('EXTNAME = ')
                ][0]
            except IndexError:
                meta['extname'] = ''
                if (k > 0 or (k == 0 and meta['format'] != 'Empty HDU.')):
                    m = "HDU %d in %s has no EXTNAME!"
                    if error:
                        log.critical(m, k, metafile)
                        raise DataModelError(m % (k, metafile))
                    else:
                        log.warning(m, k, metafile)
                else:
                    if k == 0 and meta['format'] == 'Empty HDU.':
                        if len(meta['keywords']) > 0:
                            m = "HDU %d in %s should have EXTNAME = 'PRIMARY'."
                            log.warning(m, k, metafile)
            else:
                #
                # If we reach here, meta['extname'] *is* defined.
                #
                if k == 0:
                    if meta['format'] == 'Empty HDU.':
                        if len(meta['keywords']
                               ) > 0 and meta['extname'] != 'PRIMARY':
                            m = "HDU %d in %s has acceptable alternative EXTNAME = '%d'."
                            log.debug(m, k, metafile, meta['extname'])
                    else:
                        if meta['extname'] == 'PRIMARY':
                            m = "HDU %d in %s should have a more meaningful EXTNAME than 'PRIMARY'."
                            log.warning(m, k, metafile)
            self.hdumeta.append(meta)
        return self.hdumeta
Ejemplo n.º 9
0
    def get_regexp(self, root, error=False):
        """Obtain the regular expression used to match files on disk.

        Parameters
        ----------
        root : :class:`str`
            Path to real files on disk.
        error : :class:`bool`, optional
            If ``True``, failure to find a regular expression raises an
            exception instead of just a warning.

        Returns
        -------
        regular expression
            The regular expression found, or ``None`` if not found.
            The regular expression is also stored internally.

        Raises
        ------
        :exc:`~desimodel.DataModelError`
            If `error` is set and problems with the data model file are
            detected.
        """
        with open(self.filename) as dm:
            for line in dm.readlines():
                if line.startswith('See :doc:'):
                    self.ref = self._cross_reference(line)
                    log.debug("Cross reference detected %s -> %s.",
                              self.filename, self.ref)
                    break
                if self._regexpline.match(line) is not None:
                    d = os.path.dirname(self.filename).replace(self.section,
                                                               root)
                    for k in self._d2r:
                        d = d.replace(k, self._d2r[k])
                    r = line.strip().split()[1].replace('``', '')
                    self.regexp = re.compile(os.path.join(d, r))
                    break
        if self.regexp is None and self.ref is not None:
            with open(self.ref) as dm:
                for line in dm.readlines():
                    #
                    # Hopefully cross-references are not nested.
                    #
                    # if line.startswith('See :doc:'):
                    #     self.ref = self._cross_reference(line)
                    #     break
                    if self._regexpline.match(line) is not None:
                        d = os.path.dirname(self.filename).replace(self.section,
                                                                   root)
                        for k in self._d2r:
                            d = d.replace(k, self._d2r[k])
                        r = line.strip().split()[1].replace('``', '')
                        self.regexp = re.compile(os.path.join(d, r))
                        break
        if self.regexp is None:
            m = "%s has no file regexp!"
            if error:
                log.critical(m, self.filename)
                raise DataModelError(m % self.filename)
            else:
                log.warning(m, self.filename)
        return self.regexp
Ejemplo n.º 10
0
    def extract_metadata(self, error=False):
        """Extract metadata from a data model file.

        Parameters
        ----------
        error : :class:`bool`, optional
            If ``True``, failure to extract certain required metadata raises an
            exception.

        Returns
        -------
        :class:`list`
            Metadata in a form similar to :class:`~desidatamodel.stub.Stub`
            metadata.

        Raises
        ------
        :exc:`~desidatamodel.DataModelError`
            If `error` is set and the HDU has no `EXTNAME` keyword.
        """
        metafile = self.filename
        if self.ref is not None:
            metafile = self.ref
        if self._metafile_data is None:
            with open(metafile) as f:
                self._metafile_data = f.read()
        lines = self._metafile_data.split('\n')
        hdu_sections = [i for i, l in enumerate(lines)
                        if (self._hduline.match(l) is not None or
                            self._hduspan.match(l) is not None)]
        self.hdumeta = list()
        for k in range(len(hdu_sections)):
            try:
                section = lines[hdu_sections[k]:hdu_sections[k+1]]
            except IndexError:
                section = lines[hdu_sections[k]:]
            m = self._hduspan.match(section[0])
            if m is not None:
                #
                # Detected HDU span.
                #
                g = m.groups()
                spanstart = int(g[0])
                spanend = int(g[1])
                log.debug('Detected range specification from HDU %d to HDU %d',
                          spanstart, spanend)
                spanref = [l for l in section if l.startswith('Data:')][0]
                spanext = spanref[spanref.lower().index('see') + 4:].replace('.', '')
                spanmeta = [m for m in self.hdumeta if m['extname'] == spanext][0]
                spanname = [l.split('=')[1].strip() for l in section
                            if l.startswith('EXTNAME = ')][0]
                extnames = [p.strip() for p in spanname.split(',')]
                if len(range(spanstart, spanend+1)) == len(extnames):
                    for i, l in enumerate(range(spanstart, spanend+1)):
                        meta = dict()
                        meta['title'] = 'HDU{0:d}'.format(l)
                        meta['extname'] = extnames[i]
                        meta['extension'] = spanmeta['extension']
                        meta['format'] = spanmeta['format']
                        meta['keywords'] = spanmeta['keywords']
                        self.hdumeta.append(meta)
                else:
                    log.warning(('Range specification from HDU %d to HDU %d ' +
                                 'does not have a matching EXTNAME specification'),
                                spanstart, spanend)
                continue
            meta = dict()
            meta['title'] = section[0]
            if 'Empty HDU.' in section:
                meta['extension'] = 'IMAGE'
                meta['format'] = 'Empty HDU.'
            image_data = [l for l in section if l.startswith('Data:')]
            if image_data:
                meta['extension'] = 'IMAGE'
                meta['format'] = image_data[0]
            try:
                rdtc = section.index('Required Data Table Columns')
            except ValueError:
                rdtc = None
            if rdtc is not None:
                meta['extension'] = 'BINTABLE'
                table = [i for i, l in enumerate(section[rdtc:])
                         if self._tableboundary.match(l) is not None][1:3]
                columns = list(map(len, section[rdtc:][table[0]].split()))
                table_lines = section[rdtc:][table[0]+1:table[1]]
                meta['format'] = [self._extract_columns(t, columns)
                                  for t in table_lines]
                for mk in meta['format']:
                    if not mk[1]:
                        m = "Missing type for column %s in HDU %d of %s!"
                        if error:
                            log.critical(m, mk[0], k, metafile)
                            raise DataModelError(m % (mk[0], k, metafile))
                        else:
                            log.warning(m, mk[0], k, metafile)
                    if mk[2]:
                        bad_unit = self.check_unit(mk[2], error=error)
                        if bad_unit:
                            log.debug("Non-standard (but acceptable) unit %s detected for column %s in HDU %d of %s.",
                                      bad_unit, mk[0], k, metafile)
            try:
                rhk = section.index('Required Header Keywords')
            except ValueError:
                meta['keywords'] = []
            else:
                table = [i for i, l in enumerate(section[rhk:])
                         if self._tableboundary.match(l) is not None][1:3]
                columns = list(map(len, section[rhk:][table[0]].split()))
                table_lines = section[rhk:][table[0]+1:table[1]]
                meta['keywords'] = [self._extract_columns(t, columns)
                                    for t in table_lines]
                for mk in meta['keywords']:
                    if not mk[2]:
                        m = "Missing type for keyword %s in HDU %d of %s!"
                        if error:
                            log.critical(m, mk[0], k, metafile)
                            raise DataModelError(m % (mk[0], k, metafile))
                        else:
                            log.warning(m, mk[0], k, metafile)
                    if mk[0] == 'BUNIT':
                        bad_unit = self.check_unit(mk[1], error=error)
                        if bad_unit:
                            log.debug("Non-standard (but acceptable) unit %s detected for column %s in HDU %d of %s.",
                                      bad_unit, mk[0], k, metafile)
            #
            # Need to know the format by this point!
            #
            try:
                foo = meta['format']
            except KeyError:
                m = "Unable to determine format for HDU %d in %s!"
                log.critical(m, k, metafile)
                raise DataModelError(m % (k, metafile))
            #
            # See https://github.com/desihub/desidatamodel/issues/69 for
            # the detailed policy on EXTNAME.
            #
            try:
                meta['extname'] = [l.split()[2] for l in section
                                   if l.startswith('EXTNAME = ')][0]
            except IndexError:
                meta['extname'] = ''
                if (k > 0 or (k == 0 and meta['format'] != 'Empty HDU.')):
                    m = "HDU %d in %s has no EXTNAME!"
                    if error:
                        log.critical(m, k, metafile)
                        raise DataModelError(m % (k, metafile))
                    else:
                        log.warning(m, k, metafile)
                else:
                    if k == 0 and meta['format'] == 'Empty HDU.':
                        if len(meta['keywords']) > 0:
                            m = "HDU %d in %s should have EXTNAME = 'PRIMARY'."
                            log.warning(m, k, metafile)
            else:
                if k == 0:
                    if meta['format'] == 'Empty HDU.':
                        if len(meta['keywords']) > 0:
                            m = "HDU %d in %s should have EXTNAME = 'PRIMARY'."
                            log.warning(m, k, metafile)
                    else:
                        if meta['extname'] == 'PRIMARY':
                            m = "HDU %d in %s should have a more meaningful EXTNAME than 'PRIMARY'."
                            log.warning(m, k, metafile)
            self.hdumeta.append(meta)
        return self.hdumeta
Ejemplo n.º 11
0
def load_zbest(datapath=None, hdu='ZBEST', q3c=False):
    """Load zbest files into the zcat table.

    This function is deprecated since there should now be a single
    redshift catalog file.

    Parameters
    ----------
    datapath : :class:`str`
        Full path to the directory containing zbest files.
    hdu : :class:`int` or :class:`str`, optional
        Read a data table from this HDU (default 'ZBEST').
    q3c : :class:`bool`, optional
        If set, create q3c index on the table.
    """
    if datapath is None:
        datapath = specprod_root()
    zbestpath = os.path.join(datapath, 'spectra-64', '*', '*',
                             'zbest-64-*.fits')
    log.info("Using zbest file search path: %s.", zbestpath)
    zbest_files = glob.glob(zbestpath)
    if len(zbest_files) == 0:
        log.error("No zbest files found!")
        return
    log.info("Found %d zbest files.", len(zbest_files))
    #
    # Read the identified zbest files.
    #
    for f in zbest_files:
        brickname = os.path.basename(os.path.dirname(f))
        with fits.open(f) as hdulist:
            data = hdulist[hdu].data
        log.info("Read data from %s HDU %s.", f, hdu)
        good_targetids = ((data['TARGETID'] != 0) & (data['TARGETID'] != -1))
        #
        # If there are too many targetids, the in_ clause will blow up.
        # Disabling this test, and crossing fingers.
        #
        # q = dbSession.query(ZCat).filter(ZCat.targetid.in_(data['TARGETID'].tolist())).all()
        # if len(q) != 0:
        #     log.warning("Duplicate TARGETID found in %s.", f)
        #     for z in q:
        #         log.warning("Duplicate TARGETID = %d.", z.targetid)
        #         good_targetids = good_targetids & (data['TARGETID'] != z.targetid)
        data_list = [data[col][good_targetids].tolist() for col in data.names]
        data_names = [col.lower() for col in data.names]
        log.info("Initial column conversion complete on brick = %s.",
                 brickname)
        #
        # Expand COEFF
        #
        col = 'COEFF'
        expand = (
            'coeff_0',
            'coeff_1',
            'coeff_2',
            'coeff_3',
            'coeff_4',
            'coeff_5',
            'coeff_6',
            'coeff_7',
            'coeff_8',
            'coeff_9',
        )
        i = data_names.index(col.lower())
        del data_names[i]
        del data_list[i]
        for j, n in enumerate(expand):
            log.debug("Expanding column %d of %s (at index %d) to %s.", j, col,
                      i, n)
            data_names.insert(i + j, n)
            data_list.insert(i + j, data[col][:, j].tolist())
        log.debug(data_names)
        #
        # zbest files don't contain the same columns as zcatalog.
        #
        for col in ZCat.__table__.columns:
            if col.name not in data_names:
                data_names.append(col.name)
                data_list.append([0] * len(data_list[0]))
        data_rows = list(zip(*data_list))
        log.info("Converted columns into rows on brick = %s.", brickname)
        try:
            dbSession.bulk_insert_mappings(
                ZCat, [dict(zip(data_names, row)) for row in data_rows])
        except IntegrityError as e:
            log.error("Integrity Error detected!")
            log.error(e)
            dbSession.rollback()
        else:
            log.info("Inserted %d rows in %s for brick = %s.", len(data_rows),
                     ZCat.__tablename__, brickname)
            dbSession.commit()
    if q3c:
        q3c_index('zcat')
    return
Ejemplo n.º 12
0
def load_file(filepath,
              tcls,
              hdu=1,
              expand=None,
              convert=None,
              index=None,
              rowfilter=None,
              q3c=False,
              chunksize=50000,
              maxrows=0):
    """Load a data file into the database, assuming that column names map
    to database column names with no surprises.

    Parameters
    ----------
    filepath : :class:`str`
        Full path to the data file.
    tcls : :class:`sqlalchemy.ext.declarative.api.DeclarativeMeta`
        The table to load, represented by its class.
    hdu : :class:`int` or :class:`str`, optional
        Read a data table from this HDU (default 1).
    expand : :class:`dict`, optional
        If set, map FITS column names to one or more alternative column names.
    convert : :class:`dict`, optional
        If set, convert the data for a named (database) column using the
        supplied function.
    index : :class:`str`, optional
        If set, add a column that just counts the number of rows.
    rowfilter : callable, optional
        If set, apply this filter to the rows to be loaded.  The function
        should return :class:`bool`, with ``True`` meaning a good row.
    q3c : :class:`bool`, optional
        If set, create q3c index on the table.
    chunksize : :class:`int`, optional
        If set, load database `chunksize` rows at a time (default 50000).
    maxrows : :class:`int`, optional
        If set, stop loading after `maxrows` are loaded.  Alteratively,
        set `maxrows` to zero (0) to load all rows.
    """
    tn = tcls.__tablename__
    if filepath.endswith('.fits'):
        with fits.open(filepath) as hdulist:
            data = hdulist[hdu].data
    elif filepath.endswith('.ecsv'):
        data = Table.read(filepath, format='ascii.ecsv')
    else:
        log.error("Unrecognized data file, %s!", filepath)
        return
    if maxrows == 0:
        maxrows = len(data)
    log.info("Read data from %s HDU %s", filepath, hdu)
    try:
        colnames = data.names
    except AttributeError:
        colnames = data.colnames
    for col in colnames:
        if data[col].dtype.kind == 'f':
            bad = np.isnan(data[col][0:maxrows])
            if np.any(bad):
                nbad = bad.sum()
                log.warning(
                    "%d rows of bad data detected in column " + "%s of %s.",
                    nbad, col, filepath)
                #
                # Temporary workaround for bad flux values, see
                # https://github.com/desihub/desitarget/issues/397
                #
                if col in ('FLUX_R', 'FIBERFLUX_R', 'FIBERTOTFLUX_R'):
                    data[col][0:maxrows][bad] = -9999.0
    log.info("Integrity check complete on %s.", tn)
    if rowfilter is None:
        good_rows = np.ones((maxrows, ), dtype=np.bool)
    else:
        good_rows = rowfilter(data[0:maxrows])
    data_list = [data[col][0:maxrows][good_rows].tolist() for col in colnames]
    data_names = [col.lower() for col in colnames]
    finalrows = len(data_list[0])
    log.info("Initial column conversion complete on %s.", tn)
    if expand is not None:
        for col in expand:
            i = data_names.index(col.lower())
            if isinstance(expand[col], str):
                #
                # Just rename a column.
                #
                log.debug("Renaming column %s (at index %d) to %s.",
                          data_names[i], i, expand[col])
                data_names[i] = expand[col]
            else:
                #
                # Assume this is an expansion of an array-valued column
                # into individual columns.
                #
                del data_names[i]
                del data_list[i]
                for j, n in enumerate(expand[col]):
                    log.debug("Expanding column %d of %s (at index %d) to %s.",
                              j, col, i, n)
                    data_names.insert(i + j, n)
                    data_list.insert(i + j, data[col][:, j].tolist())
                log.debug(data_names)
    log.info("Column expansion complete on %s.", tn)
    del data
    if convert is not None:
        for col in convert:
            i = data_names.index(col)
            data_list[i] = [convert[col](x) for x in data_list[i]]
    log.info("Column conversion complete on %s.", tn)
    if index is not None:
        data_list.insert(0, list(range(1, finalrows + 1)))
        data_names.insert(0, index)
        log.info("Added index column '%s'.", index)
    data_rows = list(zip(*data_list))
    del data_list
    log.info("Converted columns into rows on %s.", tn)
    for k in range(finalrows // chunksize + 1):
        data_chunk = [
            dict(zip(data_names, row))
            for row in data_rows[k * chunksize:(k + 1) * chunksize]
        ]
        if len(data_chunk) > 0:
            engine.execute(tcls.__table__.insert(), data_chunk)
            log.info("Inserted %d rows in %s.",
                     min((k + 1) * chunksize, finalrows), tn)
    # for k in range(finalrows//chunksize + 1):
    #     data_insert = [dict([(col, data_list[i].pop(0))
    #                          for i, col in enumerate(data_names)])
    #                    for j in range(chunksize)]
    #     session.bulk_insert_mappings(tcls, data_insert)
    #     log.info("Inserted %d rows in %s..",
    #              min((k+1)*chunksize, finalrows), tn)
    # session.commit()
    # dbSession.commit()
    if q3c:
        q3c_index(tn)
    return
Ejemplo n.º 13
0
def load_zbest(datapath=None, hdu='ZBEST', q3c=False):
    """Load zbest files into the zcat table.

    This function is deprecated since there should now be a single
    redshift catalog file.

    Parameters
    ----------
    datapath : :class:`str`
        Full path to the directory containing zbest files.
    hdu : :class:`int` or :class:`str`, optional
        Read a data table from this HDU (default 'ZBEST').
    q3c : :class:`bool`, optional
        If set, create q3c index on the table.
    """
    if datapath is None:
        datapath = specprod_root()
    zbestpath = os.path.join(datapath, 'spectra-64', '*', '*', 'zbest-64-*.fits')
    log.info("Using zbest file search path: %s.", zbestpath)
    zbest_files = glob.glob(zbestpath)
    if len(zbest_files) == 0:
        log.error("No zbest files found!")
        return
    log.info("Found %d zbest files.", len(zbest_files))
    #
    # Read the identified zbest files.
    #
    for f in zbest_files:
        brickname = os.path.basename(os.path.dirname(f))
        with fits.open(f) as hdulist:
            data = hdulist[hdu].data
        log.info("Read data from %s HDU %s.", f, hdu)
        good_targetids = ((data['TARGETID'] != 0) & (data['TARGETID'] != -1))
        #
        # If there are too many targetids, the in_ clause will blow up.
        # Disabling this test, and crossing fingers.
        #
        # q = dbSession.query(ZCat).filter(ZCat.targetid.in_(data['TARGETID'].tolist())).all()
        # if len(q) != 0:
        #     log.warning("Duplicate TARGETID found in %s.", f)
        #     for z in q:
        #         log.warning("Duplicate TARGETID = %d.", z.targetid)
        #         good_targetids = good_targetids & (data['TARGETID'] != z.targetid)
        data_list = [data[col][good_targetids].tolist()
                     for col in data.names]
        data_names = [col.lower() for col in data.names]
        log.info("Initial column conversion complete on brick = %s.", brickname)
        #
        # Expand COEFF
        #
        col = 'COEFF'
        expand = ('coeff_0', 'coeff_1', 'coeff_2', 'coeff_3', 'coeff_4',
                  'coeff_5', 'coeff_6', 'coeff_7', 'coeff_8', 'coeff_9',)
        i = data_names.index(col.lower())
        del data_names[i]
        del data_list[i]
        for j, n in enumerate(expand):
            log.debug("Expanding column %d of %s (at index %d) to %s.", j, col, i, n)
            data_names.insert(i + j, n)
            data_list.insert(i + j, data[col][:, j].tolist())
        log.debug(data_names)
        #
        # zbest files don't contain the same columns as zcatalog.
        #
        for col in ZCat.__table__.columns:
            if col.name not in data_names:
                data_names.append(col.name)
                data_list.append([0]*len(data_list[0]))
        data_rows = list(zip(*data_list))
        log.info("Converted columns into rows on brick = %s.", brickname)
        try:
            dbSession.bulk_insert_mappings(ZCat, [dict(zip(data_names, row))
                                                  for row in data_rows])
        except IntegrityError as e:
            log.error("Integrity Error detected!")
            log.error(e)
            dbSession.rollback()
        else:
            log.info("Inserted %d rows in %s for brick = %s.",
                     len(data_rows), ZCat.__tablename__, brickname)
            dbSession.commit()
    if q3c:
        q3c_index('zcat')
    return
Ejemplo n.º 14
0
def load_file(filepath, tcls, hdu=1, expand=None, convert=None, index=None,
              rowfilter=None, q3c=False, chunksize=50000, maxrows=0):
    """Load a data file into the database, assuming that column names map
    to database column names with no surprises.

    Parameters
    ----------
    filepath : :class:`str`
        Full path to the data file.
    tcls : :class:`sqlalchemy.ext.declarative.api.DeclarativeMeta`
        The table to load, represented by its class.
    hdu : :class:`int` or :class:`str`, optional
        Read a data table from this HDU (default 1).
    expand : :class:`dict`, optional
        If set, map FITS column names to one or more alternative column names.
    convert : :class:`dict`, optional
        If set, convert the data for a named (database) column using the
        supplied function.
    index : :class:`str`, optional
        If set, add a column that just counts the number of rows.
    rowfilter : callable, optional
        If set, apply this filter to the rows to be loaded.  The function
        should return :class:`bool`, with ``True`` meaning a good row.
    q3c : :class:`bool`, optional
        If set, create q3c index on the table.
    chunksize : :class:`int`, optional
        If set, load database `chunksize` rows at a time (default 50000).
    maxrows : :class:`int`, optional
        If set, stop loading after `maxrows` are loaded.  Alteratively,
        set `maxrows` to zero (0) to load all rows.
    """
    tn = tcls.__tablename__
    if filepath.endswith('.fits'):
        with fits.open(filepath) as hdulist:
            data = hdulist[hdu].data
    elif filepath.endswith('.ecsv'):
        data = Table.read(filepath, format='ascii.ecsv')
    else:
        log.error("Unrecognized data file, %s!", filepath)
        return
    if maxrows == 0:
        maxrows = len(data)
    log.info("Read data from %s HDU %s", filepath, hdu)
    try:
        colnames = data.names
    except AttributeError:
        colnames = data.colnames
    for col in colnames:
        if data[col].dtype.kind == 'f':
            bad = np.isnan(data[col][0:maxrows])
            if np.any(bad):
                nbad = bad.sum()
                log.warning("%d rows of bad data detected in column " +
                            "%s of %s.", nbad, col, filepath)
                #
                # Temporary workaround for bad flux values, see
                # https://github.com/desihub/desitarget/issues/397
                #
                if col in ('FLUX_R', 'FIBERFLUX_R', 'FIBERTOTFLUX_R'):
                    data[col][0:maxrows][bad] = -9999.0
    log.info("Integrity check complete on %s.", tn)
    if rowfilter is None:
        good_rows = np.ones((maxrows,), dtype=np.bool)
    else:
        good_rows = rowfilter(data[0:maxrows])
    data_list = [data[col][0:maxrows][good_rows].tolist() for col in colnames]
    data_names = [col.lower() for col in colnames]
    finalrows = len(data_list[0])
    log.info("Initial column conversion complete on %s.", tn)
    if expand is not None:
        for col in expand:
            i = data_names.index(col.lower())
            if isinstance(expand[col], str):
                #
                # Just rename a column.
                #
                log.debug("Renaming column %s (at index %d) to %s.", data_names[i], i, expand[col])
                data_names[i] = expand[col]
            else:
                #
                # Assume this is an expansion of an array-valued column
                # into individual columns.
                #
                del data_names[i]
                del data_list[i]
                for j, n in enumerate(expand[col]):
                    log.debug("Expanding column %d of %s (at index %d) to %s.", j, col, i, n)
                    data_names.insert(i + j, n)
                    data_list.insert(i + j, data[col][:, j].tolist())
                log.debug(data_names)
    log.info("Column expansion complete on %s.", tn)
    del data
    if convert is not None:
        for col in convert:
            i = data_names.index(col)
            data_list[i] = [convert[col](x) for x in data_list[i]]
    log.info("Column conversion complete on %s.", tn)
    if index is not None:
        data_list.insert(0, list(range(1, finalrows+1)))
        data_names.insert(0, index)
        log.info("Added index column '%s'.", index)
    data_rows = list(zip(*data_list))
    del data_list
    log.info("Converted columns into rows on %s.", tn)
    for k in range(finalrows//chunksize + 1):
        data_chunk = [dict(zip(data_names, row))
                      for row in data_rows[k*chunksize:(k+1)*chunksize]]
        if len(data_chunk) > 0:
            engine.execute(tcls.__table__.insert(), data_chunk)
            log.info("Inserted %d rows in %s.",
                     min((k+1)*chunksize, finalrows), tn)
    # for k in range(finalrows//chunksize + 1):
    #     data_insert = [dict([(col, data_list[i].pop(0))
    #                          for i, col in enumerate(data_names)])
    #                    for j in range(chunksize)]
    #     session.bulk_insert_mappings(tcls, data_insert)
    #     log.info("Inserted %d rows in %s..",
    #              min((k+1)*chunksize, finalrows), tn)
    # session.commit()
    # dbSession.commit()
    if q3c:
        q3c_index(tn)
    return