Python info Examples, desiutil.log.log.info Python Examples

Example #1

0

Show file

def update_truth(filepath, hdu=2, chunksize=50000, skip=('SLOPES', 'EMLINES')):
    """Add data from columns in other HDUs of the Truth table.

    Parameters
    ----------
    filepath : :class:`str`
        Full path to the data file.
    hdu : :class:`int` or :class:`str`, optional
        Read a data table from this HDU (default 2).
    chunksize : :class:`int`, optional
        If set, update database `chunksize` rows at a time (default 50000).
    skip : :func:`tuple`, optional
        Do not load columns with these names (default, ``('SLOPES', 'EMLINES')``)
    """
    tcls = Truth
    tn = tcls.__tablename__
    t = tcls.__table__
    if filepath.endswith('.fits'):
        with fits.open(filepath) as hdulist:
            data = hdulist[hdu].data
    elif filepath.endswith('.ecsv'):
        data = Table.read(filepath, format='ascii.ecsv')
    else:
        log.error("Unrecognized data file, %s!", filepath)
        return
    log.info("Read data from %s HDU %s", filepath, hdu)
    try:
        colnames = data.names
    except AttributeError:
        colnames = data.colnames
    for col in colnames:
        if data[col].dtype.kind == 'f':
            bad = np.isnan(data[col])
            if np.any(bad):
                nbad = bad.sum()
                log.warning("%d rows of bad data detected in column " +
                            "%s of %s.", nbad, col, filepath)
    log.info("Integrity check complete on %s.", tn)
    # if rowfilter is None:
    #     good_rows = np.ones((maxrows,), dtype=np.bool)
    # else:
    #     good_rows = rowfilter(data[0:maxrows])
    # data_list = [data[col][0:maxrows][good_rows].tolist() for col in colnames]
    data_list = [data[col].tolist() for col in colnames if col not in skip]
    data_names = [col.lower() for col in colnames if col not in skip]
    data_names[0] = 'b_targetid'
    finalrows = len(data_list[0])
    log.info("Initial column conversion complete on %s.", tn)
    del data
    data_rows = list(zip(*data_list))
    del data_list
    log.info("Converted columns into rows on %s.", tn)
    for k in range(finalrows//chunksize + 1):
        data_chunk = [dict(zip(data_names, row))
                      for row in data_rows[k*chunksize:(k+1)*chunksize]]
        q = t.update().where(t.c.targetid == bindparam('b_targetid'))
        if len(data_chunk) > 0:
            engine.execute(q, data_chunk)
            log.info("Updated %d rows in %s.",
                     min((k+1)*chunksize, finalrows), tn)

Example #2

0

Show file

def q3c_index(table, ra='ra'):
    """Create a q3c index on a table.

    Parameters
    ----------
    table : :class:`str`
        Name of the table to index.
    ra : :class:`str`, optional
        If the RA, Dec columns are called something besides "ra" and "dec",
        set its name.  For example, ``ra='target_ra'``.
    """
    q3c_sql = """CREATE INDEX ix_{table}_q3c_ang2ipix ON {schema}.{table} (q3c_ang2ipix({ra}, {dec}));
    CLUSTER {schema}.{table} USING ix_{table}_q3c_ang2ipix;
    ANALYZE {schema}.{table};
    """.format(ra=ra, dec=ra.lower().replace('ra', 'dec'),
               schema=schemaname, table=table)
    log.info("Creating q3c index on %s.%s.", schemaname, table)
    dbSession.execute(q3c_sql)
    log.info("Finished q3c index on %s.%s.", schemaname, table)
    dbSession.commit()
    return

Example #3

0

Show file

File: check.py Project: segasai/desidatamodel

def main():
    """Entry point for the check_model script.

    Returns
    -------
    :class:`int`
        An integer suitable for passing to :func:`sys.exit`.
    """
    from sys import argv
    from argparse import ArgumentParser
    desc = """Check actual files against the data model for validity.
"""
    parser = ArgumentParser(description=desc, prog=os.path.basename(argv[0]))
    parser.add_argument('-d',
                        '--datamodel-dir',
                        dest='desidatamodel',
                        metavar='DIR',
                        help='Override the value of DESIDATAMODEL.')
    parser.add_argument(
        '-F',
        '--compare-files',
        dest='files',
        action='store_true',
        help='Compare an individual data model to an individual file.')
    parser.add_argument('-W',
                        '--warning-is-error',
                        dest='error',
                        action='store_true',
                        help='Data model warnings raise exceptions.')
    parser.add_argument('-v',
                        '--verbose',
                        dest='verbose',
                        action='store_true',
                        help='Set log level to DEBUG.')
    parser.add_argument(
        'section',
        metavar='DIR or FILE',
        help='Section of the data model or individual model file.')
    parser.add_argument(
        'directory',
        metavar='DIR or FILE',
        help='Check files in this top-level directory, or one individual file.'
    )
    options = parser.parse_args()
    if options.verbose:
        log.setLevel(DEBUG)
    if 'DESIDATAMODEL' in os.environ:
        data_model_root = os.environ['DESIDATAMODEL']
    else:
        if options.desidatamodel is not None:
            data_model_root = options.desidatamodel
        else:
            log.critical(("DESIDATAMODEL is not defined. " +
                          "Cannot find data model files!"))
            return 1
    log.debug("DESIDATAMODEL=%s", data_model_root)
    if options.files:
        filename = os.path.join(data_model_root, 'doc', options.section)
        section = os.path.join(data_model_root, 'doc',
                               options.section.split('/')[0])
        log.info("Loading individual data model: %s.", filename)
        files = [DataModel(filename, section)]
        log.info("Skipping regular expression processing.")
        # files[0].get_regexp(options.directory, error=options.error)
        log.info("Setting prototype file for %s to %s.", filename,
                 options.directory)
        files[0].prototype = options.directory
    else:
        section = os.path.join(data_model_root, 'doc', options.section)
        log.info("Loading data model file in %s.", section)
        files = scan_model(section)
        log.info("Searching for data files in %s.", options.directory)
        files_to_regexp(options.directory, files, error=options.error)
        log.info("Identifying prototype files in %s.", options.directory)
        collect_files(options.directory, files)
    validate_prototypes(files, error=options.error)
    return 0

Example #4

0

Show file

File: check.py Project: segasai/desidatamodel

    def validate_prototype(self, error=False):
        """Compares a model's prototype data file to the data models.

        Parameters
        ----------
        error : :class:`bool`, optional
            If ``True``, failure to extract certain required metadata raises an
            exception.

        Notes
        -----
        * Use set theory to compare the data headers to model headers.  This should
          automatically find missing headers, extraneous headers, etc.
        """
        if self.prototype is None:
            #
            # A warning should have been issued already, so just skip silently.
            #
            return
        log.info("Comparing %s to %s.", self.prototype, self.filename)
        if self._stub is None:
            self._stub = Stub(self.prototype, error=error)
        stub_meta = self._stub_meta = self._stub.hdumeta
        modelmeta = self.extract_metadata(error=error)
        #
        # Check number of headers.
        #
        if self._stub.nhdr != len(modelmeta):
            log.warning(
                "Prototype file %s has the wrong number of " +
                "sections (HDUs) according to %s.", self.prototype,
                self.filename)
            return
        for i in range(self._stub.nhdr):
            dkw = stub_meta[i]['keywords']
            mkw = modelmeta[i]['keywords']
            #
            # Check number of keywords.
            #
            if len(dkw) != len(mkw):
                log.warning(
                    "Prototype file %s has the wrong number of " +
                    "HDU%d keywords according to %s.", self.prototype, i,
                    self.filename)
                continue
            #
            # If number of keywords is correct, check them individually.
            #
            for j in range(len(dkw)):
                if dkw[j][0] != mkw[j][0]:
                    log.warning(
                        "Prototype file %s has a keyword " +
                        "mismatch (%s != %s) in HDU%d according to " + "%s.",
                        self.prototype, dkw[j][0], mkw[j][0], i, self.filename)
            #
            # Check the extension type.
            #
            dex = stub_meta[i]['extension']
            try:
                mex = modelmeta[i]['extension']
            except KeyError:
                mex = "Extension type not found"
            if dex != mex:
                log.warning(
                    "Prototype file %s has an extension type " +
                    "mismatch in HDU%d (%s != %s) " + "according to %s.",
                    self.prototype, i, dex, mex, self.filename)
                continue
            #
            # Check for EXTNAME
            #
            dexex = stub_meta[i]['extname']
            mexex = modelmeta[i]['extname']
            if dexex == '' and i > 0:
                log.warning("Prototype file %s has no EXTNAME in HDU%d.",
                            self.prototype, i)
            if (dexex != '' and mexex != '' and dexex != mexex):
                log.warning(
                    "Prototype file %s has an EXTNAME mismatch " +
                    "in HDU%d (%s != %s) " + "according to %s.",
                    self.prototype, i, dexex, mexex, self.filename)
            #
            # If the extension type is correct, check the contents of the
            # extension.
            #
            dexf = stub_meta[i]['format']
            try:
                mexf = modelmeta[i]['format']
            except KeyError:
                mexf = "Extension format not found"
            if dex == 'IMAGE':
                try:
                    icomma = dexf.index(',')
                except ValueError:
                    icomma = len(dexf)
                if dexf[:icomma] != mexf[:icomma]:
                    log.warning(
                        "Prototype file %s has an extension " +
                        "format mismatch in HDU%d " + "according to %s.",
                        self.prototype, i, self.filename)
            else:
                dexf = dexf[1:]  # Get rid of header line.
                if len(dexf) != len(mexf):
                    log.warning(
                        "Prototype file %s has the wrong " +
                        "number of HDU%d columns according to %s.",
                        self.prototype, i, self.filename)
                else:
                    for j in range(len(dexf)):
                        if dexf[j][0] != mexf[j][0]:
                            log.warning(
                                "Prototype file %s has a " +
                                "column name mismatch (%s != %s) " +
                                "in HDU%d according to %s.", self.prototype,
                                dexf[j][0], mexf[j][0], i, self.filename)
        return

Example #5

0

Show file

File: check.py Project: desihub/desidatamodel

def main():
    """Entry point for the check_model script.

    Returns
    -------
    :class:`int`
        An integer suitable for passing to :func:`sys.exit`.
    """
    from sys import argv
    from argparse import ArgumentParser
    desc = """Check actual files against the data model for validity.
"""
    parser = ArgumentParser(description=desc, prog=os.path.basename(argv[0]))
    parser.add_argument('-d', '--datamodel-dir', dest='desidatamodel',
                        metavar='DIR',
                        help='Override the value of DESIDATAMODEL.')
    parser.add_argument('-F', '--compare-files', dest='files',
                        action='store_true',
                        help='Compare an individual data model to an individual file.')
    parser.add_argument('-W', '--warning-is-error', dest='error',
                        action='store_true',
                        help='Data model warnings raise exceptions.')
    parser.add_argument('-v', '--verbose', dest='verbose', action='store_true',
                        help='Set log level to DEBUG.')
    parser.add_argument('section', metavar='DIR or FILE',
                        help='Section of the data model or individual model file.')
    parser.add_argument('directory', metavar='DIR or FILE',
                        help='Check files in this top-level directory, or one individual file.')
    options = parser.parse_args()
    if options.verbose:
        log.setLevel(DEBUG)
    if 'DESIDATAMODEL' in os.environ:
        data_model_root = os.environ['DESIDATAMODEL']
    else:
        if options.desidatamodel is not None:
            data_model_root = options.desidatamodel
        else:
            log.critical(("DESIDATAMODEL is not defined. " +
                          "Cannot find data model files!"))
            return 1
    log.debug("DESIDATAMODEL=%s", data_model_root)
    if options.files:
        filename = os.path.join(data_model_root, 'doc', options.section)
        section = os.path.join(data_model_root, 'doc', options.section.split('/')[0])
        log.info("Loading individual data model: %s.", filename)
        files = [DataModel(filename, section)]
        log.info("Skipping regular expression processing.")
        # files[0].get_regexp(options.directory, error=options.error)
        log.info("Setting prototype file for %s to %s.", filename, options.directory)
        files[0].prototype = options.directory
    else:
        section = os.path.join(data_model_root, 'doc', options.section)
        log.info("Loading data model file in %s.", section)
        files = scan_model(section)
        log.info("Searching for data files in %s.", options.directory)
        files_to_regexp(options.directory, files, error=options.error)
        log.info("Identifying prototype files in %s.", options.directory)
        collect_files(options.directory, files)
    validate_prototypes(files, error=options.error)
    return 0

Example #6

0

Show file

File: check.py Project: desihub/desidatamodel

    def validate_prototype(self, error=False):
        """Compares a model's prototype data file to the data models.

        Parameters
        ----------
        error : :class:`bool`, optional
            If ``True``, failure to extract certain required metadata raises an
            exception.

        Notes
        -----
        * Use set theory to compare the data headers to model headers.  This should
          automatically find missing headers, extraneous headers, etc.
        """
        if self.prototype is None:
            #
            # A warning should have been issued already, so just skip silently.
            #
            return
        log.info("Comparing %s to %s.", self.prototype, self.filename)
        if self._stub is None:
            self._stub = Stub(self.prototype, error=error)
        stub_meta = self._stub_meta = self._stub.hdumeta
        modelmeta = self.extract_metadata(error=error)
        #
        # Check number of headers.
        #
        if self._stub.nhdr != len(modelmeta):
            log.warning("Prototype file %s has the wrong number of " +
                        "sections (HDUs) according to %s.",
                        self.prototype, self.filename)
            return
        for i in range(self._stub.nhdr):
            dkw = stub_meta[i]['keywords']
            mkw = modelmeta[i]['keywords']
            #
            # Check number of keywords.
            #
            if len(dkw) != len(mkw):
                log.warning("Prototype file %s has the wrong number of " +
                            "HDU%d keywords according to %s.",
                            self.prototype, i, self.filename)
                continue
            #
            # If number of keywords is correct, check them individually.
            #
            for j in range(len(dkw)):
                if dkw[j][0] != mkw[j][0]:
                    log.warning("Prototype file %s has a keyword " +
                                "mismatch (%s != %s) in HDU%d according to " +
                                "%s.", self.prototype, dkw[j][0], mkw[j][0], i,
                                self.filename)
            #
            # Check the extension type.
            #
            dex = stub_meta[i]['extension']
            try:
                mex = modelmeta[i]['extension']
            except KeyError:
                mex = "Extension type not found"
            if dex != mex:
                log.warning("Prototype file %s has an extension type " +
                            "mismatch in HDU%d (%s != %s) " +
                            "according to %s.",
                            self.prototype, i, dex, mex, self.filename)
                continue
            #
            # Check for EXTNAME
            #
            dexex = stub_meta[i]['extname']
            mexex = modelmeta[i]['extname']
            if dexex == '' and i > 0:
                log.warning("Prototype file %s has no EXTNAME in HDU%d.",
                            self.prototype, i)
            if (dexex != '' and mexex != '' and dexex != mexex):
                log.warning("Prototype file %s has an EXTNAME mismatch " +
                            "in HDU%d (%s != %s) " +
                            "according to %s.",
                            self.prototype, i, dexex, mexex, self.filename)
            #
            # If the extension type is correct, check the contents of the
            # extension.
            #
            dexf = stub_meta[i]['format']
            try:
                mexf = modelmeta[i]['format']
            except KeyError:
                mexf = "Extension format not found"
            if dex == 'IMAGE':
                try:
                    icomma = dexf.index(',')
                except ValueError:
                    icomma = len(dexf)
                if dexf[:icomma] != mexf[:icomma]:
                    log.warning("Prototype file %s has an extension " +
                                "format mismatch in HDU%d " +
                                "according to %s.",
                                self.prototype, i, self.filename)
            else:
                dexf = dexf[1:]  # Get rid of header line.
                if len(dexf) != len(mexf):
                    log.warning("Prototype file %s has the wrong " +
                                "number of HDU%d columns according to %s.",
                                self.prototype, i, self.filename)
                else:
                    for j in range(len(dexf)):
                        if dexf[j][0] != mexf[j][0]:
                            log.warning("Prototype file %s has a " +
                                        "column name mismatch (%s != %s) " +
                                        "in HDU%d according to %s.",
                                        self.prototype, dexf[j][0], mexf[j][0],
                                        i, self.filename)
        return

Example #7

0

Show file

File: redshift.py Project: sbailey/desispec

def main():
    """Entry point for command-line script.

    Returns
    -------
    :class:`int`
        An integer suitable for passing to :func:`sys.exit`.
    """
    # from pkg_resources import resource_filename
    #
    # command-line arguments
    #
    options = get_options()
    #
    # Logging
    #
    if options.verbose:
        log = get_logger(DEBUG, timestamp=True)
    else:
        log = get_logger(INFO, timestamp=True)
    #
    # Initialize DB
    #
    postgresql = setup_db(options)
    #
    # Load configuration
    #
    loader = [
        {
            'filepath':
            os.path.join(options.datapath, 'targets', 'truth-dark.fits'),
            'tcls':
            Truth,
            'hdu':
            'TRUTH',
            'expand':
            None,
            'convert':
            None,
            'index':
            None,
            'q3c':
            False,
            'chunksize':
            options.chunksize,
            'maxrows':
            options.maxrows
        },
        {
            'filepath':
            os.path.join(options.datapath, 'targets', 'targets-dark.fits'),
            'tcls':
            Target,
            'hdu':
            'TARGETS',
            'expand': {
                'DCHISQ': (
                    'dchisq_psf',
                    'dchisq_rex',
                    'dchisq_dev',
                    'dchisq_exp',
                    'dchisq_comp',
                )
            },
            'convert':
            None,
            'index':
            None,
            'q3c':
            postgresql,
            'chunksize':
            options.chunksize,
            'maxrows':
            options.maxrows
        },
        {
            'filepath': os.path.join(options.datapath, 'survey',
                                     'exposures.fits'),
            'tcls': ObsList,
            'hdu': 'EXPOSURES',
            'expand': {
                'PASS': '******'
            },
            # 'convert': {'dateobs': lambda x: convert_dateobs(x, tzinfo=utc)},
            'convert': None,
            'index': None,
            'q3c': postgresql,
            'chunksize': options.chunksize,
            'maxrows': options.maxrows
        },
        {
            'filepath':
            os.path.join(options.datapath, 'spectro', 'redux', 'mini',
                         'zcatalog-mini.fits'),
            'tcls':
            ZCat,
            'hdu':
            'ZCATALOG',
            'expand': {
                'COEFF': (
                    'coeff_0',
                    'coeff_1',
                    'coeff_2',
                    'coeff_3',
                    'coeff_4',
                    'coeff_5',
                    'coeff_6',
                    'coeff_7',
                    'coeff_8',
                    'coeff_9',
                )
            },
            'convert':
            None,
            'rowfilter':
            lambda x: ((x['TARGETID'] != 0) & (x['TARGETID'] != -1)),
            'q3c':
            postgresql,
            'chunksize':
            options.chunksize,
            'maxrows':
            options.maxrows
        }
    ]
    #
    # Load the tables that correspond to a single file.
    #
    for l in loader:
        tn = l['tcls'].__tablename__
        #
        # Don't use .one().  It actually fetches *all* rows.
        #
        q = dbSession.query(l['tcls']).first()
        if q is None:
            if options.zbest and tn == 'zcat':
                log.info("Loading %s from zbest files in %s.", tn,
                         options.datapath)
                load_zbest(datapath=options.datapath, q3c=postgresql)
            else:
                log.info("Loading %s from %s.", tn, l['filepath'])
                load_file(**l)
            log.info("Finished loading %s.", tn)
        else:
            log.info("%s table already loaded.", tn.title())
    #
    # Update truth table.
    #
    for h in ('BGS', 'ELG', 'LRG', 'QSO', 'STAR', 'WD'):
        update_truth(
            os.path.join(options.datapath, 'targets', 'truth-dark.fits'),
            'TRUTH_' + h)
    #
    # Load fiber assignment files.
    #
    q = dbSession.query(FiberAssign).first()
    if q is None:
        log.info("Loading FiberAssign from %s.", options.datapath)
        load_fiberassign(options.datapath, q3c=postgresql)
        log.info("Finished loading FiberAssign.")
    else:
        log.info("FiberAssign table already loaded.")
    return 0

Example #8

0

Show file

File: redshift.py Project: sbailey/desispec

def setup_db(options=None, **kwargs):
    """Initialize the database connection.

    Parameters
    ----------
    options : :class:`argpare.Namespace`
        Parsed command-line options.
    kwargs : keywords
        If present, use these instead of `options`.  This is more
        user-friendly than setting up a :class:`~argpare.Namespace`
        object in, *e.g.* a Jupyter Notebook.

    Returns
    -------
    :class:`bool`
        ``True`` if the configured database is a PostgreSQL database.
    """
    global engine, schemaname
    #
    # Schema creation
    #
    if options is None:
        if len(kwargs) > 0:
            try:
                schema = kwargs['schema']
            except KeyError:
                schema = None
            try:
                overwrite = kwargs['overwrite']
            except KeyError:
                overwrite = False
            try:
                hostname = kwargs['hostname']
            except KeyError:
                hostname = None
            try:
                username = kwargs['username']
            except KeyError:
                username = '******'
            try:
                dbfile = kwargs['dbfile']
            except KeyError:
                dbfile = 'redshift.db'
            try:
                datapath = kwargs['datapath']
            except KeyError:
                datapath = None
            try:
                verbose = kwargs['verbose']
            except KeyError:
                verbose = False
        else:
            raise ValueError("No options specified!")
    else:
        schema = options.schema
        overwrite = options.overwrite
        hostname = options.hostname
        username = options.username
        dbfile = options.dbfile
        datapath = options.datapath
        verbose = options.verbose
    if schema:
        schemaname = schema
        # event.listen(Base.metadata, 'before_create', CreateSchema(schemaname))
        if overwrite:
            event.listen(
                Base.metadata, 'before_create',
                DDL('DROP SCHEMA IF EXISTS {0} CASCADE'.format(schemaname)))
        event.listen(Base.metadata, 'before_create',
                     DDL('CREATE SCHEMA IF NOT EXISTS {0}'.format(schemaname)))
    #
    # Create the file.
    #
    postgresql = False
    if hostname:
        postgresql = True
        db_connection = parse_pgpass(hostname=hostname, username=username)
        if db_connection is None:
            log.critical("Could not load database information!")
            return 1
    else:
        if os.path.basename(dbfile) == dbfile:
            db_file = os.path.join(datapath, dbfile)
        else:
            db_file = dbfile
        if overwrite and os.path.exists(db_file):
            log.info("Removing file: %s.", db_file)
            os.remove(db_file)
        db_connection = 'sqlite:///' + db_file
    #
    # SQLAlchemy stuff.
    #
    engine = create_engine(db_connection, echo=verbose)
    dbSession.remove()
    dbSession.configure(bind=engine, autoflush=False, expire_on_commit=False)
    log.info("Begin creating tables.")
    for tab in Base.metadata.tables.values():
        tab.schema = schemaname
    Base.metadata.create_all(engine)
    log.info("Finished creating tables.")
    return postgresql

Example #9

0

Show file

File: redshift.py Project: sbailey/desispec

def load_fiberassign(datapath,
                     maxpass=4,
                     hdu='FIBERASSIGN',
                     q3c=False,
                     latest_epoch=False,
                     last_column='NUMOBS_MORE'):
    """Load fiber assignment files into the fiberassign table.

    Tile files can appear in multiple epochs, so for a given tileid, load
    the tile file with the largest value of epoch.  In the "real world",
    a tile file appears in each epoch until it is observed, therefore
    the tile file corresponding to the actual observation is the one
    with the largest epoch.

    Parameters
    ----------
    datapath : :class:`str`
        Full path to the directory containing tile files.
    maxpass : :class:`int`, optional
        Search for pass numbers up to this value (default 4).
    hdu : :class:`int` or :class:`str`, optional
        Read a data table from this HDU (default 'FIBERASSIGN').
    q3c : :class:`bool`, optional
        If set, create q3c index on the table.
    latest_epoch : :class:`bool`, optional
        If set, search for the latest tile file among several epochs.
    last_column : :class:`str`, optional
        Do not load columns past this name (default 'NUMOBS_MORE').
    """
    fiberpath = os.path.join(datapath, 'fiberassign*.fits')
    log.info("Using tile file search path: %s.", fiberpath)
    tile_files = glob.glob(fiberpath)
    if len(tile_files) == 0:
        log.error("No tile files found!")
        return
    log.info("Found %d tile files.", len(tile_files))
    #
    # Find the latest epoch for every tile file.
    #
    latest_tiles = dict()
    if latest_epoch:
        tileidre = re.compile(r'/(\d+)/fiberassign/fiberassign\-(\d+)\.fits$')
        for f in tile_files:
            m = tileidre.search(f)
            if m is None:
                log.error("Could not match %s!", f)
                continue
            epoch, tileid = map(int, m.groups())
            if tileid in latest_tiles:
                if latest_tiles[tileid][0] < epoch:
                    latest_tiles[tileid] = (epoch, f)
            else:
                latest_tiles[tileid] = (epoch, f)
    else:
        for f in tile_files:
            # fiberassign-TILEID.fits
            tileid = int(
                re.match('fiberassign\-(\d+)\.fits', os.path.basename(f))[1])
            latest_tiles[tileid] = (0, f)
    log.info("Identified %d tile files for loading.", len(latest_tiles))
    #
    # Read the identified tile files.
    #
    data_index = None
    for tileid in latest_tiles:
        epoch, f = latest_tiles[tileid]
        with fits.open(f) as hdulist:
            data = hdulist[hdu].data
        log.info("Read data from %s HDU %s", f, hdu)
        for col in data.names[:data_index]:
            if data[col].dtype.kind == 'f':
                bad = np.isnan(data[col])
                if np.any(bad):
                    nbad = bad.sum()
                    log.warning(
                        "%d rows of bad data detected in column " +
                        "%s of %s.", nbad, col, f)
                    #
                    # This replacement may be deprecated in the future.
                    #
                    if col in ('TARGET_RA', 'TARGET_DEC', 'FIBERASSIGN_X',
                               'FIBERASSIGN_Y'):
                        data[col][bad] = -9999.0
                assert not np.any(np.isnan(data[col]))
                assert np.all(np.isfinite(data[col]))
        n_rows = len(data)
        if data_index is None:
            data_index = data.names.index(last_column) + 1
        data_list = ([[tileid] * n_rows] +
                     [data[col].tolist() for col in data.names[:data_index]])
        data_names = ['tileid'
                      ] + [col.lower() for col in data.names[:data_index]]
        log.info("Initial column conversion complete on tileid = %d.", tileid)
        data_rows = list(zip(*data_list))
        log.info("Converted columns into rows on tileid = %d.", tileid)
        dbSession.bulk_insert_mappings(
            FiberAssign, [dict(zip(data_names, row)) for row in data_rows])
        log.info("Inserted %d rows in %s for tileid = %d.", n_rows,
                 FiberAssign.__tablename__, tileid)
        dbSession.commit()
    if q3c:
        q3c_index('fiberassign', ra='target_ra')
    return

Example #10

0

Show file

File: redshift.py Project: sbailey/desispec

def load_zbest(datapath=None, hdu='ZBEST', q3c=False):
    """Load zbest files into the zcat table.

    This function is deprecated since there should now be a single
    redshift catalog file.

    Parameters
    ----------
    datapath : :class:`str`
        Full path to the directory containing zbest files.
    hdu : :class:`int` or :class:`str`, optional
        Read a data table from this HDU (default 'ZBEST').
    q3c : :class:`bool`, optional
        If set, create q3c index on the table.
    """
    if datapath is None:
        datapath = specprod_root()
    zbestpath = os.path.join(datapath, 'spectra-64', '*', '*',
                             'zbest-64-*.fits')
    log.info("Using zbest file search path: %s.", zbestpath)
    zbest_files = glob.glob(zbestpath)
    if len(zbest_files) == 0:
        log.error("No zbest files found!")
        return
    log.info("Found %d zbest files.", len(zbest_files))
    #
    # Read the identified zbest files.
    #
    for f in zbest_files:
        brickname = os.path.basename(os.path.dirname(f))
        with fits.open(f) as hdulist:
            data = hdulist[hdu].data
        log.info("Read data from %s HDU %s.", f, hdu)
        good_targetids = ((data['TARGETID'] != 0) & (data['TARGETID'] != -1))
        #
        # If there are too many targetids, the in_ clause will blow up.
        # Disabling this test, and crossing fingers.
        #
        # q = dbSession.query(ZCat).filter(ZCat.targetid.in_(data['TARGETID'].tolist())).all()
        # if len(q) != 0:
        #     log.warning("Duplicate TARGETID found in %s.", f)
        #     for z in q:
        #         log.warning("Duplicate TARGETID = %d.", z.targetid)
        #         good_targetids = good_targetids & (data['TARGETID'] != z.targetid)
        data_list = [data[col][good_targetids].tolist() for col in data.names]
        data_names = [col.lower() for col in data.names]
        log.info("Initial column conversion complete on brick = %s.",
                 brickname)
        #
        # Expand COEFF
        #
        col = 'COEFF'
        expand = (
            'coeff_0',
            'coeff_1',
            'coeff_2',
            'coeff_3',
            'coeff_4',
            'coeff_5',
            'coeff_6',
            'coeff_7',
            'coeff_8',
            'coeff_9',
        )
        i = data_names.index(col.lower())
        del data_names[i]
        del data_list[i]
        for j, n in enumerate(expand):
            log.debug("Expanding column %d of %s (at index %d) to %s.", j, col,
                      i, n)
            data_names.insert(i + j, n)
            data_list.insert(i + j, data[col][:, j].tolist())
        log.debug(data_names)
        #
        # zbest files don't contain the same columns as zcatalog.
        #
        for col in ZCat.__table__.columns:
            if col.name not in data_names:
                data_names.append(col.name)
                data_list.append([0] * len(data_list[0]))
        data_rows = list(zip(*data_list))
        log.info("Converted columns into rows on brick = %s.", brickname)
        try:
            dbSession.bulk_insert_mappings(
                ZCat, [dict(zip(data_names, row)) for row in data_rows])
        except IntegrityError as e:
            log.error("Integrity Error detected!")
            log.error(e)
            dbSession.rollback()
        else:
            log.info("Inserted %d rows in %s for brick = %s.", len(data_rows),
                     ZCat.__tablename__, brickname)
            dbSession.commit()
    if q3c:
        q3c_index('zcat')
    return

Example #11

0

Show file

File: redshift.py Project: sbailey/desispec

def load_file(filepath,
              tcls,
              hdu=1,
              expand=None,
              convert=None,
              index=None,
              rowfilter=None,
              q3c=False,
              chunksize=50000,
              maxrows=0):
    """Load a data file into the database, assuming that column names map
    to database column names with no surprises.

    Parameters
    ----------
    filepath : :class:`str`
        Full path to the data file.
    tcls : :class:`sqlalchemy.ext.declarative.api.DeclarativeMeta`
        The table to load, represented by its class.
    hdu : :class:`int` or :class:`str`, optional
        Read a data table from this HDU (default 1).
    expand : :class:`dict`, optional
        If set, map FITS column names to one or more alternative column names.
    convert : :class:`dict`, optional
        If set, convert the data for a named (database) column using the
        supplied function.
    index : :class:`str`, optional
        If set, add a column that just counts the number of rows.
    rowfilter : callable, optional
        If set, apply this filter to the rows to be loaded.  The function
        should return :class:`bool`, with ``True`` meaning a good row.
    q3c : :class:`bool`, optional
        If set, create q3c index on the table.
    chunksize : :class:`int`, optional
        If set, load database `chunksize` rows at a time (default 50000).
    maxrows : :class:`int`, optional
        If set, stop loading after `maxrows` are loaded.  Alteratively,
        set `maxrows` to zero (0) to load all rows.
    """
    tn = tcls.__tablename__
    if filepath.endswith('.fits'):
        with fits.open(filepath) as hdulist:
            data = hdulist[hdu].data
    elif filepath.endswith('.ecsv'):
        data = Table.read(filepath, format='ascii.ecsv')
    else:
        log.error("Unrecognized data file, %s!", filepath)
        return
    if maxrows == 0:
        maxrows = len(data)
    log.info("Read data from %s HDU %s", filepath, hdu)
    try:
        colnames = data.names
    except AttributeError:
        colnames = data.colnames
    for col in colnames:
        if data[col].dtype.kind == 'f':
            bad = np.isnan(data[col][0:maxrows])
            if np.any(bad):
                nbad = bad.sum()
                log.warning(
                    "%d rows of bad data detected in column " + "%s of %s.",
                    nbad, col, filepath)
                #
                # Temporary workaround for bad flux values, see
                # https://github.com/desihub/desitarget/issues/397
                #
                if col in ('FLUX_R', 'FIBERFLUX_R', 'FIBERTOTFLUX_R'):
                    data[col][0:maxrows][bad] = -9999.0
    log.info("Integrity check complete on %s.", tn)
    if rowfilter is None:
        good_rows = np.ones((maxrows, ), dtype=np.bool)
    else:
        good_rows = rowfilter(data[0:maxrows])
    data_list = [data[col][0:maxrows][good_rows].tolist() for col in colnames]
    data_names = [col.lower() for col in colnames]
    finalrows = len(data_list[0])
    log.info("Initial column conversion complete on %s.", tn)
    if expand is not None:
        for col in expand:
            i = data_names.index(col.lower())
            if isinstance(expand[col], str):
                #
                # Just rename a column.
                #
                log.debug("Renaming column %s (at index %d) to %s.",
                          data_names[i], i, expand[col])
                data_names[i] = expand[col]
            else:
                #
                # Assume this is an expansion of an array-valued column
                # into individual columns.
                #
                del data_names[i]
                del data_list[i]
                for j, n in enumerate(expand[col]):
                    log.debug("Expanding column %d of %s (at index %d) to %s.",
                              j, col, i, n)
                    data_names.insert(i + j, n)
                    data_list.insert(i + j, data[col][:, j].tolist())
                log.debug(data_names)
    log.info("Column expansion complete on %s.", tn)
    del data
    if convert is not None:
        for col in convert:
            i = data_names.index(col)
            data_list[i] = [convert[col](x) for x in data_list[i]]
    log.info("Column conversion complete on %s.", tn)
    if index is not None:
        data_list.insert(0, list(range(1, finalrows + 1)))
        data_names.insert(0, index)
        log.info("Added index column '%s'.", index)
    data_rows = list(zip(*data_list))
    del data_list
    log.info("Converted columns into rows on %s.", tn)
    for k in range(finalrows // chunksize + 1):
        data_chunk = [
            dict(zip(data_names, row))
            for row in data_rows[k * chunksize:(k + 1) * chunksize]
        ]
        if len(data_chunk) > 0:
            engine.execute(tcls.__table__.insert(), data_chunk)
            log.info("Inserted %d rows in %s.",
                     min((k + 1) * chunksize, finalrows), tn)
    # for k in range(finalrows//chunksize + 1):
    #     data_insert = [dict([(col, data_list[i].pop(0))
    #                          for i, col in enumerate(data_names)])
    #                    for j in range(chunksize)]
    #     session.bulk_insert_mappings(tcls, data_insert)
    #     log.info("Inserted %d rows in %s..",
    #              min((k+1)*chunksize, finalrows), tn)
    # session.commit()
    # dbSession.commit()
    if q3c:
        q3c_index(tn)
    return

Example #12

0

Show file