def main(): """Entry point for the check_model script. Returns ------- :class:`int` An integer suitable for passing to :func:`sys.exit`. """ from sys import argv from argparse import ArgumentParser desc = """Check actual files against the data model for validity. """ parser = ArgumentParser(description=desc, prog=os.path.basename(argv[0])) parser.add_argument('-d', '--datamodel-dir', dest='desidatamodel', metavar='DIR', help='Override the value of DESIDATAMODEL.') parser.add_argument('-F', '--compare-files', dest='files', action='store_true', help='Compare an individual data model to an individual file.') parser.add_argument('-W', '--warning-is-error', dest='error', action='store_true', help='Data model warnings raise exceptions.') parser.add_argument('-v', '--verbose', dest='verbose', action='store_true', help='Set log level to DEBUG.') parser.add_argument('section', metavar='DIR or FILE', help='Section of the data model or individual model file.') parser.add_argument('directory', metavar='DIR or FILE', help='Check files in this top-level directory, or one individual file.') options = parser.parse_args() if options.verbose: log.setLevel(DEBUG) if 'DESIDATAMODEL' in os.environ: data_model_root = os.environ['DESIDATAMODEL'] else: if options.desidatamodel is not None: data_model_root = options.desidatamodel else: log.critical(("DESIDATAMODEL is not defined. " + "Cannot find data model files!")) return 1 log.debug("DESIDATAMODEL=%s", data_model_root) if options.files: filename = os.path.join(data_model_root, 'doc', options.section) section = os.path.join(data_model_root, 'doc', options.section.split('/')[0]) log.info("Loading individual data model: %s.", filename) files = [DataModel(filename, section)] log.info("Skipping regular expression processing.") # files[0].get_regexp(options.directory, error=options.error) log.info("Setting prototype file for %s to %s.", filename, options.directory) files[0].prototype = options.directory else: section = os.path.join(data_model_root, 'doc', options.section) log.info("Loading data model file in %s.", section) files = scan_model(section) log.info("Searching for data files in %s.", options.directory) files_to_regexp(options.directory, files, error=options.error) log.info("Identifying prototype files in %s.", options.directory) collect_files(options.directory, files) validate_prototypes(files, error=options.error) return 0
def main(): """Entry point for the generate_model script. Returns ------- :class:`int` An integer suitable for passing to :func:`sys.exit`. """ from sys import argv from argparse import ArgumentParser try: from astropy.io import fits except ImportError: log.critical("This script requires astropy.io.fits, " + "available in your " + "favourite Python distribution.") return 1 desc = """Generate an DESI data model stub for a given FITS file. You will still need to hand edit the file to add descriptions, etc., but it gives you a good starting point in the correct format. """ parser = ArgumentParser(description=desc, prog=os.path.basename(argv[0])) parser.add_argument('-v', '--verbose', dest='verbose', action='store_true', help='Set log level to DEBUG.') parser.add_argument('filename', help='A FITS file.', metavar='FILE', nargs='+') options = parser.parse_args() if options.verbose: log.setLevel(DEBUG) for f in options.filename: stub = Stub(f) data = str(stub) # # Write the file # with open("{0}.rst".format(stub.modelname), 'w') as m: m.write(data) return 0
def check_unit(self, unit, error=False): """Check units for consistency with FITS standard, while allowing some special exceptions. Parameters ---------- unit : :class:`str` The unit to parse. error : :class:`bool`, optional If ``True``, failure to interpret the unit raises an exception. Returns ------- :class:`str` If a special exception is detected, the name of the unit is returned. Otherwise, ``None``. Raises ------ :exc:`ValueError` If `error` is set and the unit can't be parsed. """ try: au = Unit(unit, format='fits') except ValueError as e: bad_unit = str(e).split()[0] if any([u in bad_unit for u in self._acceptable_units]): return bad_unit else: if error: log.critical(str(e)) raise else: log.warning(str(e)) return None
def get_regexp(self, root, error=False): """Obtain the regular expression used to match files on disk. Parameters ---------- root : :class:`str` Path to real files on disk. error : :class:`bool`, optional If ``True``, failure to find a regular expression raises an exception instead of just a warning. Returns ------- regular expression The regular expression found, or ``None`` if not found. The regular expression is also stored internally. Raises ------ :exc:`~desimodel.DataModelError` If `error` is set and problems with the data model file are detected. """ with open(self.filename) as dm: for line in dm.readlines(): if line.startswith('See :doc:'): self.ref = self._cross_reference(line) log.debug("Cross reference detected %s -> %s.", self.filename, self.ref) break if self._regexpline.match(line) is not None: d = os.path.dirname(self.filename).replace( self.section, root) for k in self._d2r: d = d.replace(k, self._d2r[k]) r = line.strip().split()[1].replace('``', '') self.regexp = re.compile(os.path.join(d, r)) break if self.regexp is None and self.ref is not None: with open(self.ref) as dm: for line in dm.readlines(): # # Hopefully cross-references are not nested. # # if line.startswith('See :doc:'): # self.ref = self._cross_reference(line) # break if self._regexpline.match(line) is not None: d = os.path.dirname(self.filename).replace( self.section, root) for k in self._d2r: d = d.replace(k, self._d2r[k]) r = line.strip().split()[1].replace('``', '') self.regexp = re.compile(os.path.join(d, r)) break if self.regexp is None: m = "%s has no file regexp!" if error: log.critical(m, self.filename) raise DataModelError(m % self.filename) else: log.warning(m, self.filename) return self.regexp
def main(): """Entry point for the check_model script. Returns ------- :class:`int` An integer suitable for passing to :func:`sys.exit`. """ from sys import argv from argparse import ArgumentParser desc = """Check actual files against the data model for validity. """ parser = ArgumentParser(description=desc, prog=os.path.basename(argv[0])) parser.add_argument('-d', '--datamodel-dir', dest='desidatamodel', metavar='DIR', help='Override the value of DESIDATAMODEL.') parser.add_argument( '-F', '--compare-files', dest='files', action='store_true', help='Compare an individual data model to an individual file.') parser.add_argument('-W', '--warning-is-error', dest='error', action='store_true', help='Data model warnings raise exceptions.') parser.add_argument('-v', '--verbose', dest='verbose', action='store_true', help='Set log level to DEBUG.') parser.add_argument( 'section', metavar='DIR or FILE', help='Section of the data model or individual model file.') parser.add_argument( 'directory', metavar='DIR or FILE', help='Check files in this top-level directory, or one individual file.' ) options = parser.parse_args() if options.verbose: log.setLevel(DEBUG) if 'DESIDATAMODEL' in os.environ: data_model_root = os.environ['DESIDATAMODEL'] else: if options.desidatamodel is not None: data_model_root = options.desidatamodel else: log.critical(("DESIDATAMODEL is not defined. " + "Cannot find data model files!")) return 1 log.debug("DESIDATAMODEL=%s", data_model_root) if options.files: filename = os.path.join(data_model_root, 'doc', options.section) section = os.path.join(data_model_root, 'doc', options.section.split('/')[0]) log.info("Loading individual data model: %s.", filename) files = [DataModel(filename, section)] log.info("Skipping regular expression processing.") # files[0].get_regexp(options.directory, error=options.error) log.info("Setting prototype file for %s to %s.", filename, options.directory) files[0].prototype = options.directory else: section = os.path.join(data_model_root, 'doc', options.section) log.info("Loading data model file in %s.", section) files = scan_model(section) log.info("Searching for data files in %s.", options.directory) files_to_regexp(options.directory, files, error=options.error) log.info("Identifying prototype files in %s.", options.directory) collect_files(options.directory, files) validate_prototypes(files, error=options.error) return 0
def extract_metadata(self, error=False): """Extract metadata from a data model file. Parameters ---------- error : :class:`bool`, optional If ``True``, failure to extract certain required metadata raises an exception. Returns ------- :class:`list` Metadata in a form similar to :class:`~desidatamodel.stub.Stub` metadata. Raises ------ :exc:`~desidatamodel.DataModelError` If `error` is set and the HDU has no `EXTNAME` keyword. """ metafile = self.filename if self.ref is not None: metafile = self.ref if self._metafile_data is None: with open(metafile) as f: self._metafile_data = f.read() lines = self._metafile_data.split('\n') hdu_sections = [ i for i, l in enumerate(lines) if (self._hduline.match(l) is not None or self._hduspan.match(l) is not None) ] self.hdumeta = list() for k in range(len(hdu_sections)): try: section = lines[hdu_sections[k]:hdu_sections[k + 1]] except IndexError: section = lines[hdu_sections[k]:] m = self._hduspan.match(section[0]) if m is not None: # # Detected HDU span. # g = m.groups() spanstart = int(g[0]) spanend = int(g[1]) log.debug('Detected range specification from HDU %d to HDU %d', spanstart, spanend) spanref = [l for l in section if l.startswith('Data:')][0] spanext = spanref[spanref.lower().index('see') + 4:].replace( '.', '') spanmeta = [ m for m in self.hdumeta if m['extname'] == spanext ][0] spanname = [ l.split('=')[1].strip() for l in section if l.startswith('EXTNAME = ') ][0] extnames = [p.strip() for p in spanname.split(',')] if len(range(spanstart, spanend + 1)) == len(extnames): for i, l in enumerate(range(spanstart, spanend + 1)): meta = dict() meta['title'] = 'HDU{0:d}'.format(l) meta['extname'] = extnames[i] meta['extension'] = spanmeta['extension'] meta['format'] = spanmeta['format'] meta['keywords'] = spanmeta['keywords'] self.hdumeta.append(meta) else: log.warning( ('Range specification from HDU %d to HDU %d ' + 'does not have a matching EXTNAME specification'), spanstart, spanend) continue meta = dict() meta['title'] = section[0] if 'Empty HDU.' in section: meta['extension'] = 'IMAGE' meta['format'] = 'Empty HDU.' image_data = [l for l in section if l.startswith('Data:')] if image_data: meta['extension'] = 'IMAGE' meta['format'] = image_data[0] try: rdtc = section.index('Required Data Table Columns') except ValueError: rdtc = None if rdtc is not None: meta['extension'] = 'BINTABLE' table = [ i for i, l in enumerate(section[rdtc:]) if self._tableboundary.match(l) is not None ][1:3] columns = list(map(len, section[rdtc:][table[0]].split())) table_lines = section[rdtc:][table[0] + 1:table[1]] meta['format'] = [ self._extract_columns(t, columns) for t in table_lines ] for mk in meta['format']: if not mk[1]: m = "Missing type for column %s in HDU %d of %s!" if error: log.critical(m, mk[0], k, metafile) raise DataModelError(m % (mk[0], k, metafile)) else: log.warning(m, mk[0], k, metafile) if mk[2]: bad_unit = self.check_unit(mk[2], error=error) if bad_unit: log.debug( "Non-standard (but acceptable) unit %s detected for column %s in HDU %d of %s.", bad_unit, mk[0], k, metafile) try: rhk = section.index('Required Header Keywords') except ValueError: meta['keywords'] = [] else: table = [ i for i, l in enumerate(section[rhk:]) if self._tableboundary.match(l) is not None ][1:3] columns = list(map(len, section[rhk:][table[0]].split())) table_lines = section[rhk:][table[0] + 1:table[1]] meta['keywords'] = [ self._extract_columns(t, columns) for t in table_lines ] for mk in meta['keywords']: if not mk[2]: m = "Missing type for keyword %s in HDU %d of %s!" if error: log.critical(m, mk[0], k, metafile) raise DataModelError(m % (mk[0], k, metafile)) else: log.warning(m, mk[0], k, metafile) if mk[0] == 'BUNIT': bad_unit = self.check_unit(mk[1], error=error) if bad_unit: log.debug( "Non-standard (but acceptable) unit %s detected for column %s in HDU %d of %s.", bad_unit, mk[0], k, metafile) # # Need to know the format by this point! # try: foo = meta['format'] except KeyError: m = "Unable to determine format for HDU %d in %s!" log.critical(m, k, metafile) raise DataModelError(m % (k, metafile)) # # See https://github.com/desihub/desidatamodel/issues/69 for # the detailed policy on EXTNAME. # try: meta['extname'] = [ l.split()[2] for l in section if l.startswith('EXTNAME = ') ][0] except IndexError: meta['extname'] = '' if (k > 0 or (k == 0 and meta['format'] != 'Empty HDU.')): m = "HDU %d in %s has no EXTNAME!" if error: log.critical(m, k, metafile) raise DataModelError(m % (k, metafile)) else: log.warning(m, k, metafile) else: if k == 0 and meta['format'] == 'Empty HDU.': if len(meta['keywords']) > 0: m = "HDU %d in %s should have EXTNAME = 'PRIMARY'." log.warning(m, k, metafile) else: # # If we reach here, meta['extname'] *is* defined. # if k == 0: if meta['format'] == 'Empty HDU.': if len(meta['keywords'] ) > 0 and meta['extname'] != 'PRIMARY': m = "HDU %d in %s has acceptable alternative EXTNAME = '%d'." log.debug(m, k, metafile, meta['extname']) else: if meta['extname'] == 'PRIMARY': m = "HDU %d in %s should have a more meaningful EXTNAME than 'PRIMARY'." log.warning(m, k, metafile) self.hdumeta.append(meta) return self.hdumeta
def get_regexp(self, root, error=False): """Obtain the regular expression used to match files on disk. Parameters ---------- root : :class:`str` Path to real files on disk. error : :class:`bool`, optional If ``True``, failure to find a regular expression raises an exception instead of just a warning. Returns ------- regular expression The regular expression found, or ``None`` if not found. The regular expression is also stored internally. Raises ------ :exc:`~desimodel.DataModelError` If `error` is set and problems with the data model file are detected. """ with open(self.filename) as dm: for line in dm.readlines(): if line.startswith('See :doc:'): self.ref = self._cross_reference(line) log.debug("Cross reference detected %s -> %s.", self.filename, self.ref) break if self._regexpline.match(line) is not None: d = os.path.dirname(self.filename).replace(self.section, root) for k in self._d2r: d = d.replace(k, self._d2r[k]) r = line.strip().split()[1].replace('``', '') self.regexp = re.compile(os.path.join(d, r)) break if self.regexp is None and self.ref is not None: with open(self.ref) as dm: for line in dm.readlines(): # # Hopefully cross-references are not nested. # # if line.startswith('See :doc:'): # self.ref = self._cross_reference(line) # break if self._regexpline.match(line) is not None: d = os.path.dirname(self.filename).replace(self.section, root) for k in self._d2r: d = d.replace(k, self._d2r[k]) r = line.strip().split()[1].replace('``', '') self.regexp = re.compile(os.path.join(d, r)) break if self.regexp is None: m = "%s has no file regexp!" if error: log.critical(m, self.filename) raise DataModelError(m % self.filename) else: log.warning(m, self.filename) return self.regexp
def extract_metadata(self, error=False): """Extract metadata from a data model file. Parameters ---------- error : :class:`bool`, optional If ``True``, failure to extract certain required metadata raises an exception. Returns ------- :class:`list` Metadata in a form similar to :class:`~desidatamodel.stub.Stub` metadata. Raises ------ :exc:`~desidatamodel.DataModelError` If `error` is set and the HDU has no `EXTNAME` keyword. """ metafile = self.filename if self.ref is not None: metafile = self.ref if self._metafile_data is None: with open(metafile) as f: self._metafile_data = f.read() lines = self._metafile_data.split('\n') hdu_sections = [i for i, l in enumerate(lines) if (self._hduline.match(l) is not None or self._hduspan.match(l) is not None)] self.hdumeta = list() for k in range(len(hdu_sections)): try: section = lines[hdu_sections[k]:hdu_sections[k+1]] except IndexError: section = lines[hdu_sections[k]:] m = self._hduspan.match(section[0]) if m is not None: # # Detected HDU span. # g = m.groups() spanstart = int(g[0]) spanend = int(g[1]) log.debug('Detected range specification from HDU %d to HDU %d', spanstart, spanend) spanref = [l for l in section if l.startswith('Data:')][0] spanext = spanref[spanref.lower().index('see') + 4:].replace('.', '') spanmeta = [m for m in self.hdumeta if m['extname'] == spanext][0] spanname = [l.split('=')[1].strip() for l in section if l.startswith('EXTNAME = ')][0] extnames = [p.strip() for p in spanname.split(',')] if len(range(spanstart, spanend+1)) == len(extnames): for i, l in enumerate(range(spanstart, spanend+1)): meta = dict() meta['title'] = 'HDU{0:d}'.format(l) meta['extname'] = extnames[i] meta['extension'] = spanmeta['extension'] meta['format'] = spanmeta['format'] meta['keywords'] = spanmeta['keywords'] self.hdumeta.append(meta) else: log.warning(('Range specification from HDU %d to HDU %d ' + 'does not have a matching EXTNAME specification'), spanstart, spanend) continue meta = dict() meta['title'] = section[0] if 'Empty HDU.' in section: meta['extension'] = 'IMAGE' meta['format'] = 'Empty HDU.' image_data = [l for l in section if l.startswith('Data:')] if image_data: meta['extension'] = 'IMAGE' meta['format'] = image_data[0] try: rdtc = section.index('Required Data Table Columns') except ValueError: rdtc = None if rdtc is not None: meta['extension'] = 'BINTABLE' table = [i for i, l in enumerate(section[rdtc:]) if self._tableboundary.match(l) is not None][1:3] columns = list(map(len, section[rdtc:][table[0]].split())) table_lines = section[rdtc:][table[0]+1:table[1]] meta['format'] = [self._extract_columns(t, columns) for t in table_lines] for mk in meta['format']: if not mk[1]: m = "Missing type for column %s in HDU %d of %s!" if error: log.critical(m, mk[0], k, metafile) raise DataModelError(m % (mk[0], k, metafile)) else: log.warning(m, mk[0], k, metafile) if mk[2]: bad_unit = self.check_unit(mk[2], error=error) if bad_unit: log.debug("Non-standard (but acceptable) unit %s detected for column %s in HDU %d of %s.", bad_unit, mk[0], k, metafile) try: rhk = section.index('Required Header Keywords') except ValueError: meta['keywords'] = [] else: table = [i for i, l in enumerate(section[rhk:]) if self._tableboundary.match(l) is not None][1:3] columns = list(map(len, section[rhk:][table[0]].split())) table_lines = section[rhk:][table[0]+1:table[1]] meta['keywords'] = [self._extract_columns(t, columns) for t in table_lines] for mk in meta['keywords']: if not mk[2]: m = "Missing type for keyword %s in HDU %d of %s!" if error: log.critical(m, mk[0], k, metafile) raise DataModelError(m % (mk[0], k, metafile)) else: log.warning(m, mk[0], k, metafile) if mk[0] == 'BUNIT': bad_unit = self.check_unit(mk[1], error=error) if bad_unit: log.debug("Non-standard (but acceptable) unit %s detected for column %s in HDU %d of %s.", bad_unit, mk[0], k, metafile) # # Need to know the format by this point! # try: foo = meta['format'] except KeyError: m = "Unable to determine format for HDU %d in %s!" log.critical(m, k, metafile) raise DataModelError(m % (k, metafile)) # # See https://github.com/desihub/desidatamodel/issues/69 for # the detailed policy on EXTNAME. # try: meta['extname'] = [l.split()[2] for l in section if l.startswith('EXTNAME = ')][0] except IndexError: meta['extname'] = '' if (k > 0 or (k == 0 and meta['format'] != 'Empty HDU.')): m = "HDU %d in %s has no EXTNAME!" if error: log.critical(m, k, metafile) raise DataModelError(m % (k, metafile)) else: log.warning(m, k, metafile) else: if k == 0 and meta['format'] == 'Empty HDU.': if len(meta['keywords']) > 0: m = "HDU %d in %s should have EXTNAME = 'PRIMARY'." log.warning(m, k, metafile) else: if k == 0: if meta['format'] == 'Empty HDU.': if len(meta['keywords']) > 0: m = "HDU %d in %s should have EXTNAME = 'PRIMARY'." log.warning(m, k, metafile) else: if meta['extname'] == 'PRIMARY': m = "HDU %d in %s should have a more meaningful EXTNAME than 'PRIMARY'." log.warning(m, k, metafile) self.hdumeta.append(meta) return self.hdumeta
def setup_db(options=None, **kwargs): """Initialize the database connection. Parameters ---------- options : :class:`argpare.Namespace` Parsed command-line options. kwargs : keywords If present, use these instead of `options`. This is more user-friendly than setting up a :class:`~argpare.Namespace` object in, *e.g.* a Jupyter Notebook. Returns ------- :class:`bool` ``True`` if the configured database is a PostgreSQL database. """ global engine, schemaname # # Schema creation # if options is None: if len(kwargs) > 0: try: schema = kwargs['schema'] except KeyError: schema = None try: overwrite = kwargs['overwrite'] except KeyError: overwrite = False try: hostname = kwargs['hostname'] except KeyError: hostname = None try: username = kwargs['username'] except KeyError: username = '******' try: dbfile = kwargs['dbfile'] except KeyError: dbfile = 'redshift.db' try: datapath = kwargs['datapath'] except KeyError: datapath = None try: verbose = kwargs['verbose'] except KeyError: verbose = False else: raise ValueError("No options specified!") else: schema = options.schema overwrite = options.overwrite hostname = options.hostname username = options.username dbfile = options.dbfile datapath = options.datapath verbose = options.verbose if schema: schemaname = schema # event.listen(Base.metadata, 'before_create', CreateSchema(schemaname)) if overwrite: event.listen( Base.metadata, 'before_create', DDL('DROP SCHEMA IF EXISTS {0} CASCADE'.format(schemaname))) event.listen(Base.metadata, 'before_create', DDL('CREATE SCHEMA IF NOT EXISTS {0}'.format(schemaname))) # # Create the file. # postgresql = False if hostname: postgresql = True db_connection = parse_pgpass(hostname=hostname, username=username) if db_connection is None: log.critical("Could not load database information!") return 1 else: if os.path.basename(dbfile) == dbfile: db_file = os.path.join(datapath, dbfile) else: db_file = dbfile if overwrite and os.path.exists(db_file): log.info("Removing file: %s.", db_file) os.remove(db_file) db_connection = 'sqlite:///' + db_file # # SQLAlchemy stuff. # engine = create_engine(db_connection, echo=verbose) dbSession.remove() dbSession.configure(bind=engine, autoflush=False, expire_on_commit=False) log.info("Begin creating tables.") for tab in Base.metadata.tables.values(): tab.schema = schemaname Base.metadata.create_all(engine) log.info("Finished creating tables.") return postgresql
def setup_db(options=None, **kwargs): """Initialize the database connection. Parameters ---------- options : :class:`argpare.Namespace` Parsed command-line options. kwargs : keywords If present, use these instead of `options`. This is more user-friendly than setting up a :class:`~argpare.Namespace` object in, *e.g.* a Jupyter Notebook. Returns ------- :class:`bool` ``True`` if the configured database is a PostgreSQL database. """ global engine, schemaname # # Schema creation # if options is None: if len(kwargs) > 0: try: schema = kwargs['schema'] except KeyError: schema = None try: overwrite = kwargs['overwrite'] except KeyError: overwrite = False try: hostname = kwargs['hostname'] except KeyError: hostname = None try: username = kwargs['username'] except KeyError: username = '******' try: dbfile = kwargs['dbfile'] except KeyError: dbfile = 'redshift.db' try: datapath = kwargs['datapath'] except KeyError: datapath = None try: verbose = kwargs['verbose'] except KeyError: verbose = False else: raise ValueError("No options specified!") else: schema = options.schema overwrite = options.overwrite hostname = options.hostname username = options.username dbfile = options.dbfile datapath = options.datapath verbose = options.verbose if schema: schemaname = schema # event.listen(Base.metadata, 'before_create', CreateSchema(schemaname)) if overwrite: event.listen(Base.metadata, 'before_create', DDL('DROP SCHEMA IF EXISTS {0} CASCADE'.format(schemaname))) event.listen(Base.metadata, 'before_create', DDL('CREATE SCHEMA IF NOT EXISTS {0}'.format(schemaname))) # # Create the file. # postgresql = False if hostname: postgresql = True db_connection = parse_pgpass(hostname=hostname, username=username) if db_connection is None: log.critical("Could not load database information!") return 1 else: if os.path.basename(dbfile) == dbfile: db_file = os.path.join(datapath, dbfile) else: db_file = dbfile if overwrite and os.path.exists(db_file): log.info("Removing file: %s.", db_file) os.remove(db_file) db_connection = 'sqlite:///'+db_file # # SQLAlchemy stuff. # engine = create_engine(db_connection, echo=verbose) dbSession.remove() dbSession.configure(bind=engine, autoflush=False, expire_on_commit=False) log.info("Begin creating tables.") for tab in Base.metadata.tables.values(): tab.schema = schemaname Base.metadata.create_all(engine) log.info("Finished creating tables.") return postgresql