def _connect_to_engine(self): #DbAuth will not look up hosts that are None, '' or 0 if self._host: try: authDict = {'username': DbAuth.username(self._host, str(self._port)), 'password': DbAuth.password(self._host, str(self._port))} except: if self._driver == 'mssql+pymssql': print("\nFor more information on database authentication using the db-auth.paf" " policy file see: " "https://confluence.lsstcorp.org/display/SIM/Accessing+the+UW+CATSIM+Database\n") raise dbUrl = url.URL(self._driver, host=self._host, port=self._port, database=self._database, **authDict) else: dbUrl = url.URL(self._driver, database=self._database) self._engine = create_engine(dbUrl, echo=self._verbose) if self._engine.dialect.name == 'sqlite': event.listen(self._engine, 'checkout', declareTrigFunctions) self._session = scoped_session(sessionmaker(autoflush=True, bind=self._engine)) self._metadata = MetaData(bind=self._engine)
def __init__(self, broker, host, port, runid, database): super(Logger, self).__init__(broker, runid) # set the highwater mark for the number of messages retrieved before attempting to drain them. self.highwatermark = 10000 self.database = database # # get database authorization info # home = os.getenv("HOME") pol = Policy(home+"/.lsst/db-auth.paf") dbAuth = DbAuth() dbAuth.setPolicy(pol) user = dbAuth.username(host,port) password = dbAuth.password(host,port) # # create the logger for the database and connect to it # self.dbLogger = DatabaseLogger(host, int(port)) self.dbLogger.connect(user, password, self.database)
def main(): # Setup command line options parser = argparse.ArgumentParser(description="Program which transposes a key-value table into a table " "where each key is mapped to a column.") addDbOptions(parser) parser.add_argument( "-s", "--skip-keys", dest="skipKeys", help="Comma separated list of metadata keys to omit in the output table") parser.add_argument( "-c", "--compress", dest="compress", action="store_true", help="Lift keys with constant values into a view") parser.add_argument( "database", help="Name of database containing metadata table to transpose") parser.add_argument( "metadataTable", help="Name of metadata table to transpose") parser.add_argument( "idCol", help="Primary key column name for metadata table") parser.add_argument( "outputTable", help="Name of output table to create") ns = parser.parse_args() db, metadataTable, idCol, outputTable = args if DbAuth.available(ns.host, str(ns.port)): ns.user = DbAuth.username(ns.host, str(ns.port)) passwd = DbAuth.password(ns.host, str(ns.port)) elif os.path.exists(os.path.join(os.environ["HOME"], ".mysql.cnf")): passwd = None else: passwd = getpass.getpass("%s's MySQL password: "******",")]) run(ns.host, ns.port, ns.user, passwd, db, metadataTable, idCol, outputTable, skipCols, ns.compress)
def run(): basename = os.path.basename(sys.argv[0]) parser = argparse.ArgumentParser(prog=basename, description='''A statistics reporting utility. Use to print out information about what happened during a run. Takes as an argument previously ingested run information one of the ingest utilities in a named database.''', epilog='''example: report.py -H kaboom.ncsa.illinois.edu -p 3303 -d srp_2013_0601_140432 -S''') parser.add_argument("-H", "--host", action="store", default=None, dest="host", help="mysql server host", type=str, required=True) parser.add_argument("-p", "--port", action="store", default=3306, dest="port", help="mysql server port", type=int) parser.add_argument("-d", "--database", action="store", default=None, dest="database", help="database name", type=str, required=True) parser.add_argument("-I", "--submits-per-interval", action="store_true", default=None, dest="submits", help="number of submits to the condor queue per interval") parser.add_argument("-C", "--cores-used-each-second", action="store_true", default=None, dest="cores", help="cores used each second") parser.add_argument("-N", "--cores-used-each-interval", type=int, default=-1, dest="interval", help="cores used each interval") parser.add_argument("-S", "--summary", action="store_true", default=None, dest="summary", help="summary of run") parser.add_argument("-v", "--verbose", action="store_true", dest="verbose", help="verbose") args = parser.parse_args() host = args.host port = args.port database = args.database # # get database authorization info # dbAuth = DbAuth() user = dbAuth.username(host, str(port)) password = dbAuth.password(host,str(port)) # connect to the database dbm = DatabaseManager(host, port, user, password) dbm.execCommand0('use '+database) # command line arguments values = None submitTimes = SubmissionTimes(dbm) entries = submitTimes.getEntries() if args.submits == True: submitsPerInterval = SubmitsPerInterval(dbm, 1) values = submitsPerInterval.getValues() writeDateValues(values) elif args.cores == True: coresPerSecond = CoresPerSecond(dbm, entries) values = coresPerSecond.getValues() writeDateValues(values) elif args.interval > -1: coresPerInterval = CoresPerInterval(dbm,entries, args.interval) values = coresPerInterval.getValues() writeDateValues(values) elif args.summary == True: printSummary(dbm, entries)
def connect(host, port, db, user=None): kwargs = dict(host=host, port=port, db=db) if user is not None: kwargs["user"] = user try: # See if we can connect without a password (e.g. via my.cnf) return MySQLdb.connect(**kwargs) except: # Fallback to DbAuth kwargs["user"] = DbAuth.username(host, str(port)) kwargs["passwd"] = DbAuth.password(host, str(port)) return MySQLdb.connect(**kwargs)
def __init__(self, outDir= None, database=None, driver='sqlite', host=None, port=None, verbose=False): """ Instantiate the results database, creating metrics, plots and summarystats tables. """ # Connect to database # for sqlite, connecting to non-existent database creates it automatically if database is None: # Using default value for database name, should specify directory. if outDir is None: outDir = '.' # Check for output directory, make if needed. if not os.path.isdir(outDir): try: os.makedirs(outDir) except OSError as msg: raise OSError(msg, '\n (If this was the database file (not outDir), ' 'remember to use kwarg "database")') self.database = os.path.join(outDir, 'resultsDb_sqlite.db') self.driver = 'sqlite' else: if driver == 'sqlite': # Using non-default database, but may also specify directory root. if outDir is not None: database = os.path.join(outDir, database) self.database = database self.driver = driver else: # If not sqlite, then 'outDir' doesn't make much sense. self.database = database self.driver = driver self.host = host self.port = port if self.driver == 'sqlite': dbAddress = url.URL(self.driver, database=self.database) else: dbAddress = url.URL(self.driver, username=DbAuth.username(self.host, str(self.port)), password=DbAuth.password(self.host, str(self.port)), host=self.host, port=self.port, database=self.database) engine = create_engine(dbAddress, echo=verbose) self.Session = sessionmaker(bind=engine) self.session = self.Session() # Create the tables, if they don't already exist. try: Base.metadata.create_all(engine) except DatabaseError: raise ValueError("Cannot create a %s database at %s. Check directory exists." %(self.driver, self.database)) self.slen = 1024
def run(self, dataId): """Select flugMag0's of LsstSim images for a particular visit @param[in] visit: visit id @return a pipeBase Struct containing: - fluxMagInfoList: a list of FluxMagInfo objects """ try: runArgDict = self.runArgDictFromDataId(dataId) visit = runArgDict["visit"] except Exception: self.log.fatal( "dataId does not contain mandatory visit key: dataId: %s", dataId) if self._display: self.log.info(self.config.database) db = MySQLdb.connect( host=self.config.host, port=self.config.port, db=self.config.database, user=DbAuth.username(self.config.host, str(self.config.port)), passwd=DbAuth.password(self.config.host, str(self.config.port)), ) cursor = db.cursor() columnNames = tuple(FluxMagInfo.getColumnNames()) queryStr = "select %s from Science_Ccd_Exposure where " % ( ", ".join(columnNames)) dataTuple = () # compute where clauses as a list of (clause, data) whereDataList = [ ("visit = %s", visit), ] queryStr += " and ".join(wd[0] for wd in whereDataList) dataTuple += tuple(wd[1] for wd in whereDataList) if self._display: self.log.info("queryStr=%r; dataTuple=%s", queryStr, dataTuple) cursor.execute(queryStr, dataTuple) result = cursor.fetchall() fluxMagInfoList = [FluxMagInfo(r) for r in result] if self._display: self.log.info("Found %d exposures", len(fluxMagInfoList)) return pipeBase.Struct(fluxMagInfoList=fluxMagInfoList, )
def run(self, dataId): """Select flugMag0's of LsstSim images for a particular visit @param[in] visit: visit id @return a pipeBase Struct containing: - fluxMagInfoList: a list of FluxMagInfo objects """ try: runArgDict = self.runArgDictFromDataId(dataId) visit = runArgDict["visit"] except Exception: self.log.fatal("dataId does not contain mandatory visit key: dataId: %s"%dataId) if self._display: self.log.info(self.config.database) db = MySQLdb.connect( host = self.config.host, port = self.config.port, db = self.config.database, user = DbAuth.username(self.config.host, str(self.config.port)), passwd = DbAuth.password(self.config.host, str(self.config.port)), ) cursor = db.cursor() columnNames = tuple(FluxMagInfo.getColumnNames()) queryStr = "select %s from Science_Ccd_Exposure where "%(", ".join(columnNames)) dataTuple = () # compute where clauses as a list of (clause, data) whereDataList = [ ("visit = %s", visit), ] queryStr += " and ".join(wd[0] for wd in whereDataList) dataTuple += tuple(wd[1] for wd in whereDataList) if self._display: self.log.info("queryStr=%r; dataTuple=%s" % (queryStr, dataTuple)) cursor.execute(queryStr, dataTuple) result = cursor.fetchall() fluxMagInfoList = [FluxMagInfo(r) for r in result] if self._display: self.log.info("Found %d exposures" % \ (len(fluxMagInfoList))) return pipeBase.Struct( fluxMagInfoList = fluxMagInfoList, )
def testSetPolicy(self): self.assert_(DbAuth.available("lsst10.ncsa.uiuc.edu", "3306")) self.assertEqual(DbAuth.authString("lsst10.ncsa.uiuc.edu", "3306"), "test:globular.test") self.assertEqual(DbAuth.username("lsst10.ncsa.uiuc.edu", "3306"), "test") self.assertEqual(DbAuth.password("lsst10.ncsa.uiuc.edu", "3306"), "globular.test") self.assert_(DbAuth.available("lsst10.ncsa.uiuc.edu", "3307")) self.assertEqual(DbAuth.authString("lsst10.ncsa.uiuc.edu", "3307"), "boris:natasha") self.assertEqual(DbAuth.username("lsst10.ncsa.uiuc.edu", "3307"), "boris") self.assertEqual(DbAuth.password("lsst10.ncsa.uiuc.edu", "3307"), "natasha") self.assert_(DbAuth.available("lsst9.ncsa.uiuc.edu", "3306")) self.assertEqual(DbAuth.authString("lsst9.ncsa.uiuc.edu", "3306"), "rocky:squirrel") self.assertEqual(DbAuth.username("lsst9.ncsa.uiuc.edu", "3306"), "rocky") self.assertEqual(DbAuth.password("lsst9.ncsa.uiuc.edu", "3306"), "squirrel")
def connect(host, port, db, user=None): """Connect to the specified MySQL database server.""" kwargs = dict(host=host, port=port, db=db) if user is not None: kwargs["user"] = user try: # See if we can connect without a password (e.g. via my.cnf) return MySQLdb.connect(**kwargs) except: # Fallback to DbAuth kwargs["user"] = DbAuth.username(host, str(port)) kwargs["passwd"] = DbAuth.password(host, str(port)) return MySQLdb.connect(**kwargs)
def testSetPolicy(self): self.assertTrue(DbAuth.available("lsst-db.ncsa.illinois.edu", "3306")) self.assertEqual( DbAuth.authString("lsst-db.ncsa.illinois.edu", "3306"), "test:globular.test") self.assertEqual(DbAuth.username("lsst-db.ncsa.illinois.edu", "3306"), "test") self.assertEqual(DbAuth.password("lsst-db.ncsa.illinois.edu", "3306"), "globular.test") self.assertTrue(DbAuth.available("lsst-db.ncsa.illinois.edu", "3307")) self.assertEqual( DbAuth.authString("lsst-db.ncsa.illinois.edu", "3307"), "boris:natasha") self.assertEqual(DbAuth.username("lsst-db.ncsa.illinois.edu", "3307"), "boris") self.assertEqual(DbAuth.password("lsst-db.ncsa.illinois.edu", "3307"), "natasha") self.assertTrue(DbAuth.available("lsst9.ncsa.illinois.edu", "3306")) self.assertEqual(DbAuth.authString("lsst9.ncsa.illinois.edu", "3306"), "rocky:squirrel") self.assertEqual(DbAuth.username("lsst9.ncsa.illinois.edu", "3306"), "rocky") self.assertEqual(DbAuth.password("lsst9.ncsa.illinois.edu", "3306"), "squirrel")
def run(): basename = os.path.basename(sys.argv[0]) parser = argparse.ArgumentParser(prog=basename, description='''Takes a list of log files and ingests them into a database''', epilog='''example: condorLogIngest.py -H lsst10 -d testing -f worker.log''' ) parser.add_argument("-H", "--host", action="store", default=None, dest="host", help="mysql server host", type=str, required=True) parser.add_argument("-p", "--port", action="store", default=3306, dest="port", help="mysql server port", type=int) parser.add_argument("-d", "--database", action="store", default=None, dest="database", help="database name", type=str, required=True) parser.add_argument("-f", "--file", action="store", default=None, dest="filenames", help="condor log files", nargs='+', type=str, required=True) parser.add_argument("-v", "--verbose", action="store_true", dest="verbose", help="verbose") args = parser.parse_args() host = args.host port = args.port database = args.database # # get database authorization info # dbAuth = DbAuth() user = dbAuth.username(host, str(port)) password = dbAuth.password(host,str(port)) # connect to the database dbm = DatabaseManager(host, port, user, password) # create the database if it doesn't exist if not dbm.dbExists(database): dbm.createDb(database) # create the LogIngestor, which creates all the tables, and will # be used to consolidate file information logIngestor = LogIngestor(dbm, database) # go through the list of files and ingest them, ignoring any # that don't exist. for filename in args.filenames: if not os.path.exists(filename): if args.verbose: print "warning: %s does not exist." % filename continue logIngestor.ingest(filename)
def __init__(self, database=None, driver='sqlite', host=None, port=None, trackingDbverbose=False): """ Instantiate the results database, creating metrics, plots and summarystats tables. """ self.verbose = trackingDbverbose # Connect to database # for sqlite, connecting to non-existent database creates it automatically if database is None: # Default is a file in the current directory. self.database = os.path.join(os.getcwd(), 'trackingDb_sqlite.db') self.driver = 'sqlite' else: self.database = database self.driver = driver self.host = host self.port = port if self.driver == 'sqlite': dbAddress = url.URL(drivername=self.driver, database=self.database) else: dbAddress = url.URL( self.driver, username=DbAuth.username(self.host, str(self.port)), password=DbAuth.password(self.host, str(self.port)), host=self.host, port=self.port, database=self.database) engine = create_engine(dbAddress, echo=self.verbose) if self.verbose: print('Created or connected to MAF tracking %s database at %s' % (self.driver, self.database)) self.Session = sessionmaker(bind=engine) self.session = self.Session() # Create the tables, if they don't already exist. try: Base.metadata.create_all(engine) except DatabaseError: raise DatabaseError( "Cannot create a %s database at %s. Check directory exists." % (self.driver, self.database))
def main(): # Setup command line options parser = argparse.ArgumentParser( description="Program which transposes a key-value table into a table " "where each key is mapped to a column.") addDbOptions(parser) parser.add_argument( "-s", "--skip-keys", dest="skipKeys", help="Comma separated list of metadata keys to omit in the output table" ) parser.add_argument("-c", "--compress", dest="compress", action="store_true", help="Lift keys with constant values into a view") parser.add_argument( "database", help="Name of database containing metadata table to transpose") parser.add_argument("metadataTable", help="Name of metadata table to transpose") parser.add_argument("idCol", help="Primary key column name for metadata table") parser.add_argument("outputTable", help="Name of output table to create") ns = parser.parse_args() db, metadataTable, idCol, outputTable = args if DbAuth.available(ns.host, str(ns.port)): ns.user = DbAuth.username(ns.host, str(ns.port)) passwd = DbAuth.password(ns.host, str(ns.port)) elif os.path.exists(os.path.join(os.environ["HOME"], ".mysql.cnf")): passwd = None else: passwd = getpass.getpass("%s's MySQL password: "******",")]) run(ns.host, ns.port, ns.user, passwd, db, metadataTable, idCol, outputTable, skipCols, ns.compress)
def __init__(self, host, database, user, port=3306, password=None): self.host = host self.port = port self.user = user self.database = database if password is None: if self.host is not None and self.port is not None and \ DbAuth.available(self.host, str(self.port)): self.user = DbAuth.username(self.host, str(self.port)) password = DbAuth.password(self.host, str(self.port)) elif not os.path.exists(os.path.join(os.environ['HOME'], ".my.cnf")): password = getpass.getpass("%s's MySQL password: " % user) self.password = password self.mysqlCmd = ['mysql'] if host is not None: self.mysqlCmd += ['-h', self.host] if port is not None: self.mysqlCmd += ['-P', str(self.port)] if user is not None: self.mysqlCmd += ['-u', self.user] if password is not None: self.mysqlCmd += ['-p' + self.password]
def __init__(self, database=None, driver='sqlite', host=None, port=None, trackingDbverbose=False): """ Instantiate the results database, creating metrics, plots and summarystats tables. """ self.verbose = trackingDbverbose # Connect to database # for sqlite, connecting to non-existent database creates it automatically if database is None: # Default is a file in the current directory. self.database = os.path.join(os.getcwd(), 'trackingDb_sqlite.db') self.driver = 'sqlite' else: self.database = database self.driver = driver self.host = host self.port = port if self.driver == 'sqlite': dbAddress = url.URL(drivername=self.driver, database=self.database) else: dbAddress = url.URL(self.driver, username=DbAuth.username(self.host, str(self.port)), password=DbAuth.password(self.host, str(self.port)), host=self.host, port=self.port, database=self.database) engine = create_engine(dbAddress, echo=self.verbose) if self.verbose: print 'Created or connected to MAF tracking %s database at %s' %(self.driver, self.database) self.Session = sessionmaker(bind=engine) self.session = self.Session() # Create the tables, if they don't already exist. try: Base.metadata.create_all(engine) except DatabaseError: raise DatabaseError("Cannot create a %s database at %s. Check directory exists." %(self.driver, self.database))
def dbConnection(self): """ The pymssql connection to the catsim database used to query refrence objects """ if self._dbConnection is None: config = bcm.BaseCatalogConfig() config.load(os.path.join(getPackageDir("sims_catUtils"), "config", "db.py")) username = DbAuth.username(config.host, config.port) password = DbAuth.password(config.host, config.port) hostname = config.host if self.dbHostName is not None: hostname = self.dbHostName DBConnection = pymssql.connect(user=username, password=password, host=hostname, database=config.database, port=config.port) return DBConnection else: return self._dbConnection
database = os.path.join(outDir, database) self.database = database self.driver = driver else: # If not sqlite, then 'outDir' doesn't make much sense. self.database = database self.driver = driver self.host = host self.port = port if self.driver == 'sqlite': dbAddress = url.URL(self.driver, database=self.database) else: dbAddress = url.URL(self.driver, username=DbAuth.username(self.host, str(self.port)), password=DbAuth.password(self.host, str(self.port)), host=self.host, port=self.port, database=self.database) engine = create_engine(dbAddress, echo=verbose) self.Session = sessionmaker(bind=engine) self.session = self.Session() # Create the tables, if they don't already exist. try: Base.metadata.create_all(engine) except DatabaseError: raise ValueError("Cannot create a %s database at %s. Check directory exists." %(self.driver, self.database)) self.slen = 1024 self.stype = 'S%d' %(self.slen)
def run(self, coordList, filter): """Select LSST images suitable for coaddition in a particular region @param[in] coordList: list of coordinates defining region of interest; if None then select all images @param[in] filter: filter (e.g. "g", "r", "i"...) @return a pipeBase Struct containing: - exposureInfoList: a list of ExposureInfo objects, which have the following fields: - dataId: data ID of exposure (a dict) - coordList: a list of corner coordinates of the exposure (list of afwCoord.IcrsCoord) - fwhm: fwhm column """ db = MySQLdb.connect( host = self.config.host, port = self.config.port, user = DbAuth.username(self.config.host, str(self.config.port)), passwd = DbAuth.password(self.config.host, str(self.config.port)), db = self.config.database, ) cursor = db.cursor() if coordList is not None: # look for exposures that overlap the specified region # create table scisql.Region containing patch region coordStrList = ["%s, %s" % (c.getLongitude().asDegrees(), c.getLatitude().asDegrees()) for c in coordList] coordStr = ", ".join(coordStrList) coordCmd = "call scisql.scisql_s2CPolyRegion(scisql_s2CPolyToBin(%s), 10)" % (coordStr,) cursor.execute(coordCmd) cursor.nextset() # ignore one-line result of coordCmd # find exposures queryStr = ("""select %s from Science_Ccd_Exposure as ccdExp, (select distinct scienceCcdExposureId from Science_Ccd_Exposure_To_Htm10 as ccdHtm inner join scisql.Region on (ccdHtm.htmId10 between scisql.Region.htmMin and scisql.Region.htmMax)) as idList where ccdExp.scienceCcdExposureId = idList.scienceCcdExposureId and filterName = %%s and fwhm < %%s """ % ExposureInfo.getColumnNames()) else: # no region specified; look over the whole sky queryStr = ("""select %s from Science_Ccd_Exposure where filterName = %%s and fwhm < %%s """ % ExposureInfo.getColumnNames()) if self.config.maxExposures is not None: queryStr += " limit %s" % (self.config.maxExposures,) dataTuple = (filter, self.config.maxFwhm) self.log.info("queryStr=%r; dataTuple=%s" % (queryStr, dataTuple)) cursor.execute(queryStr, dataTuple) exposureInfoList = [ExposureInfo(result) for result in cursor] return pipeBase.Struct( exposureInfoList = exposureInfoList, )
dest="verbose", help="verbose") args = parser.parse_args() host = args.host port = args.port database = args.database # # get database authorization info # dbAuth = DbAuth() user = dbAuth.username(host, str(port)) password = dbAuth.password(host, str(port)) # connect to the database dbm = DatabaseManager(host, port, user, password) # create the database if it doesn't exist if not dbm.dbExists(database): dbm.createDb(database) # create the LogIngestor, which creates all the tables, and will # be used to consolidate file information logIngestor = LogIngestor(dbm, database) # go through the list of files and ingest them, ignoring any # that don't exist. for filename in args.filenames:
def report(): basename = os.path.basename(sys.argv[0]) parser = argparse.ArgumentParser( prog=basename, description='''A statistics reporting utility. Use to print out information about what happened during a run. Takes as an argument previously ingested run information one of the ingest utilities in a named database.''', epilog='''example: report.py -H kaboom.ncsa.illinois.edu -p 3303 -d srp_2013_0601_140432 -S''') parser.add_argument("-H", "--host", action="store", default=None, dest="host", help="mysql server host", type=str, required=True) parser.add_argument("-p", "--port", action="store", default=3306, dest="port", help="mysql server port", type=int) parser.add_argument("-d", "--database", action="store", default=None, dest="database", help="database name", type=str, required=True) parser.add_argument("-I", "--submits-per-interval", action="store_true", default=None, dest="submits", help="number of submits to queue per interval") parser.add_argument("-S", "--slots-used-each-second", action="store_true", default=None, dest="slots", help="slots used each second") parser.add_argument("-N", "--slots-used-each-interval", type=int, default=-1, dest="interval", help="slots used each interval") parser.add_argument("-L", "--local-time-zone", action="store_true", default=False, dest="localTimeZone", help="output dates converted to local time zone") parser.add_argument("-v", "--verbose", action="store_true", dest="verbose", help="verbose") args = parser.parse_args() host = args.host port = args.port database = args.database # # get database authorization info # dbAuth = DbAuth() user = dbAuth.username(host, str(port)) password = dbAuth.password(host, str(port)) # connect to the database dbm = DatabaseManager(host, port, user, password) dbm.execCommand0('use ' + database) # command line arguments values = None submitTimes = SubmissionTimes(dbm) entries = submitTimes.getEntries() r = Report(dbm, args.localTimeZone) if args.submits: submitsPerInterval = SubmitsPerInterval(dbm, 1) values = submitsPerInterval.getValues() r.writePerTimeIntervals(values) elif args.slots: slotsPerSecond = SlotsPerSecond(dbm, entries) values = slotsPerSecond.getValues() r.writePerTimeIntervals(values) elif args.interval > -1: slotsPerInterval = SlotsPerInterval(dbm, entries, args.interval) values = slotsPerInterval.getValues() r.writePerTimeIntervals(values) else: printSummary(r) dbm.close()
self.database = database self.driver = driver else: # If not sqlite, then 'outDir' doesn't make much sense. self.database = database self.driver = driver self.host = host self.port = port if self.driver == 'sqlite': dbAddress = url.URL(self.driver, database=self.database) else: dbAddress = url.URL( self.driver, username=DbAuth.username(self.host, str(self.port)), password=DbAuth.password(self.host, str(self.port)), host=self.host, port=self.port, database=self.database) engine = create_engine(dbAddress, echo=verbose) self.Session = sessionmaker(bind=engine) self.session = self.Session() # Create the tables, if they don't already exist. try: Base.metadata.create_all(engine) except DatabaseError: raise ValueError( "Cannot create a %s database at %s. Check directory exists." % (self.driver, self.database)) self.slen = 1024
def run(self, coordList, filter): """Select LSST images suitable for coaddition in a particular region @param[in] coordList: list of coordinates defining region of interest; if None then select all images @param[in] filter: filter (e.g. "g", "r", "i"...) @return a pipeBase Struct containing: - exposureInfoList: a list of ExposureInfo objects, which have the following fields: - dataId: data ID of exposure (a dict) - coordList: a list of corner coordinates of the exposure (list of afwCoord.IcrsCoord) - fwhm: fwhm column """ db = MySQLdb.connect( host=self.config.host, port=self.config.port, user=DbAuth.username(self.config.host, str(self.config.port)), passwd=DbAuth.password(self.config.host, str(self.config.port)), db=self.config.database, ) cursor = db.cursor() if coordList is not None: # look for exposures that overlap the specified region # create table scisql.Region containing patch region coordStrList = [ "%s, %s" % (c.getLongitude().asDegrees(), c.getLatitude().asDegrees()) for c in coordList ] coordStr = ", ".join(coordStrList) coordCmd = "call scisql.scisql_s2CPolyRegion(scisql_s2CPolyToBin(%s), 10)" % ( coordStr, ) cursor.execute(coordCmd) cursor.nextset() # ignore one-line result of coordCmd # find exposures queryStr = ("""select %s from Science_Ccd_Exposure as ccdExp, (select distinct scienceCcdExposureId from Science_Ccd_Exposure_To_Htm10 as ccdHtm inner join scisql.Region on (ccdHtm.htmId10 between scisql.Region.htmMin and scisql.Region.htmMax)) as idList where ccdExp.scienceCcdExposureId = idList.scienceCcdExposureId and filterName = %%s and fwhm < %%s """ % ExposureInfo.getColumnNames()) else: # no region specified; look over the whole sky queryStr = ("""select %s from Science_Ccd_Exposure where filterName = %%s and fwhm < %%s """ % ExposureInfo.getColumnNames()) if self.config.maxExposures is not None: queryStr += " limit %s" % (self.config.maxExposures, ) dataTuple = (filter, self.config.maxFwhm) self.log.info("queryStr=%r; dataTuple=%s", queryStr, dataTuple) cursor.execute(queryStr, dataTuple) exposureInfoList = [ExposureInfo(result) for result in cursor] return pipeBase.Struct(exposureInfoList=exposureInfoList, )
def analyzeLogs(self, runId, inProgress=False): import MySQLdb jobStartRegex = re.compile( r"Processing job:" r"(\s+filter=(?P<filter>\w)" r"|\s+field=(?P<field>\d+)" r"|\s+camcol=(?P<camcol>\d)" r"|\s+run=(?P<run>\d+)" r"|\s+type=calexp){5}" ) host = RunConfiguration.dbHost port = RunConfiguration.dbPort with MySQLdb.connect( host=host, port=port, user=self.dbUser, passwd=DbAuth.password(host, str(port))) as conn: runpat = '%' + runId + '%' conn.execute("SHOW DATABASES LIKE %s", (runpat,)) ret = conn.fetchall() if ret is None or len(ret) == 0: raise NoMatchError("No match for run %s" % (runId,)) elif len(ret) > 1: raise RuntimeError("Multiple runs match:\n" + str([r[0] for r in ret])) dbName = ret[0][0] result = "" try: conn = MySQLdb.connect( host=host, port=port, user=self.dbUser, passwd=DbAuth.password(host, str(port)), db=dbName) cursor = conn.cursor() cursor.execute("""SELECT TIMESTAMP, timereceived FROM Logs WHERE id = (SELECT MIN(id) FROM Logs)""") row = cursor.fetchone() if row is None: if inProgress: return "No log entries yet\n" else: return "*** No log entries written\n" startTime, start = row result += "First orca log entry: %s\n" % (start,) cursor = conn.cursor() cursor.execute("""SELECT TIMESTAMP, timereceived FROM Logs WHERE id = (SELECT MAX(id) FROM Logs)""") stopTime, stop = cursor.fetchone() result += "Last orca log entry: %s\n" % (stop,) elapsed = long(stopTime) - long(startTime) elapsedHr = elapsed / 3600 / 1000 / 1000 / 1000 elapsed -= elapsedHr * 3600 * 1000 * 1000 * 1000 elapsedMin = elapsed / 60 / 1000 / 1000 / 1000 elapsed -= elapsedMin * 60 * 1000 * 1000 * 1000 elapsedSec = elapsed / 1.0e9 result += "Orca elapsed time: %d:%02d:%06.3f\n" % (elapsedHr, elapsedMin, elapsedSec) cursor = conn.cursor() cursor.execute(""" SELECT COUNT(DISTINCT workerid) FROM (SELECT workerid FROM Logs LIMIT 10000) AS sample""") nPipelines = cursor.fetchone()[0] result += "%d pipelines used\n" % (nPipelines,) cursor = conn.cursor() cursor.execute(""" SELECT CASE gid WHEN 1 THEN 'pipeline shutdowns seen' WHEN 2 THEN 'CCDs attempted' WHEN 3 THEN 'src writes' WHEN 4 THEN 'calexp writes' END AS descr, COUNT(*) FROM ( SELECT CASE WHEN COMMENT LIKE 'Processing job:% filter=0%' THEN 1 WHEN COMMENT LIKE 'Processing job:%' AND COMMENT NOT LIKE '% filter=0%' THEN 2 WHEN COMMENT LIKE 'Ending write to BoostStorage%/src%' THEN 3 WHEN COMMENT LIKE 'Ending write to FitsStorage%/calexp%' THEN 4 ELSE 0 END AS gid FROM Logs ) AS stats WHERE gid > 0 GROUP BY gid""") nShutdown = 0 for d, n in cursor.fetchall(): result += "%d %s\n" % (n, d) if d == 'pipeline shutdowns seen': nShutdown = n if nShutdown != nPipelines: if not inProgress: if nShutdown == 0: result += "\n*** No pipelines were shut down properly\n" else: result += "\n*** Shutdowns do not match pipelines\n" cursor = conn.cursor() cursor.execute(""" SELECT workerid, COMMENT FROM Logs JOIN (SELECT MAX(id) AS last FROM Logs GROUP BY workerid) AS a ON (Logs.id = a.last)""") for worker, msg in cursor.fetchall(): if inProgress: result += "Pipeline %s last status: %s\n" % (worker, msg) else: result += "Pipeline %s ended with: %s\n" % (worker, msg) cursor = conn.cursor() cursor.execute(""" SELECT COUNT(*) FROM Logs WHERE ( COMMENT LIKE '%rror%' OR COMMENT LIKE '%xception%' OR COMMENT LIKE '%arning%' OR COMMENT LIKE 'Fail' OR COMMENT LIKE 'fail' ) AND COMMENT NOT LIKE '%failureStage%' AND COMMENT NOT LIKE '%failure stage%' AND COMMENT NOT LIKE 'failSerialName%' AND COMMENT NOT LIKE 'failParallelName%' AND COMMENT NOT LIKE 'Distortion fitter failed to improve%' AND COMMENT NOT LIKE '%magnitude error column%' AND COMMENT NOT LIKE '%errorFlagged%' AND COMMENT NOT LIKE 'Skipping process due to error' """) result += "%s failures seen\n" % cursor.fetchone() cursor = conn.cursor(MySQLdb.cursors.DictCursor) cursor.execute(""" SELECT * FROM Logs WHERE COMMENT LIKE 'Processing job:%' OR ( ( COMMENT LIKE '%rror%' OR COMMENT LIKE '%xception%' OR COMMENT LIKE '%arning%' OR COMMENT LIKE '%Fail%' OR COMMENT LIKE '%fail%' ) AND COMMENT NOT LIKE '%failureStage%' AND COMMENT NOT LIKE '%failure stage%' AND COMMENT NOT LIKE 'failSerialName%' AND COMMENT NOT LIKE 'failParallelName%' AND COMMENT NOT LIKE 'Distortion fitter failed to improve%' AND COMMENT NOT LIKE '%magnitude error column%' AND COMMENT NOT LIKE '%errorFlagged%' AND COMMENT NOT LIKE 'Skipping process due to error' ) ORDER BY id;""") jobs = dict() for d in cursor.fetchall(): match = jobStartRegex.search(d['COMMENT']) if match: jobs[d['workerid']] = "Band %s Run %s Camcol %s Frame %s" % ( match.group("filter"), match.group("run"), match.group("camcol"), match.group("field")) elif not d['COMMENT'].startswith('Processing job:'): if jobs.has_key(d['workerid']): job = jobs[d['workerid']] else: job = "unknown" result += "\n*** Error in %s in stage %s on %s:\n" % ( job, d['stagename'], d['workerid']) lines = d['COMMENT'].split('\n') i = len(lines) - 1 message = lines[i].strip() # Skip blank lines at end while i > 0 and message == "": i -= 1 message = lines[i].strip() # Go back until we find a traceback line with " in " while i > 0 and lines[i].find(" in ") == -1: i -= 1 message = lines[i].strip() + "\n" + message result += message + "\n" finally: conn.close() outputDir = os.path.join(self.options.output, runId) logFile = os.path.join(outputDir, "run", "unifiedPipeline.log") with open(logFile, "r") as log: try: log.seek(-500, 2) except: pass tail = log.read(500) if not tail.endswith("logger handled...and...done!\n"): result += "\n*** Unified pipeline log file\n" result += "(last 500 bytes)... " + tail + "\n" for logFile in glob.glob( os.path.join(outputDir, "work", "*", "launch.log")): with open(logFile, "r") as log: try: log.seek(-500, 2) except: pass tail = log.read(500) if not re.search(r"harness.runPipeline: workerid \w+$", tail) \ and not re.search(r"Applying aperture", tail) \ and tail != "done. Now starting job office\n": result += "\n*** " + logFile + "\n" result += "(last 500 bytes)... " + tail + "\n" return result
def run(self, dataId, coordList): """Select flugMag0's of SDSS images for a particular run @param[in] dataId: a dataId containing at least a run and filter @param[in] coordList: list of coordinates defining region of interest @return a pipeBase Struct containing: - fluxMagInfoList: a list of FluxMagInfo objects """ argDict = self.runArgDictFromDataId(dataId) run = argDict["run"] filter = argDict["filter"] if filter not in set(("u", "g", "r", "i", "z")): raise RuntimeError("filter=%r is an invalid name" % (filter,)) filterDict = {"u": 0, "g": 1, "r": 2, "i": 3, "z": 4} if self._display: self.log.info(self.config.database) db = MySQLdb.connect( host = self.config.host, port = self.config.port, db = self.config.database, user = DbAuth.username(self.config.host, str(self.config.port)), passwd = DbAuth.password(self.config.host, str(self.config.port)), ) cursor = db.cursor() columnNames = tuple(FluxMagInfo.getColumnNames()) queryStr = "select %s from Science_Ccd_Exposure where " % (", ".join(columnNames)) dataTuple = () if coordList is not None: # look for exposures that overlap the specified region for c in coordList: dataTuple += (c.getLongitude().asDegrees(), c.getLatitude().asDegrees()) queryStr += " scisql_s2PtInCPoly(ra, decl" queryStr += ", %s, %s" * len(coordList) queryStr += ") = 1 and " # compute where clauses as a list of (clause, data) whereDataList = [ ("filterId = %s", filterDict[filter]), ("run = %s", run), ] queryStr += " and ".join(wd[0] for wd in whereDataList) dataTuple += tuple(wd[1] for wd in whereDataList) queryStr += " order by field desc" if self._display: self.log.info("queryStr=%r; dataTuple=%s" % (queryStr, dataTuple)) cursor.execute(queryStr, dataTuple) exposureInfoList = [FluxMagInfo(result) for result in cursor] if self._display: self.log.info("Found %d exposures" % \ (len(exposureInfoList))) return pipeBase.Struct( fluxMagInfoList = exposureInfoList, )
def analyzeLogs(self, runId, inProgress=False): import MySQLdb jobStartRegex = re.compile(r"Processing job:" r"(\s+raft=(?P<raft>\d,\d)" r"|\s+sensor=(?P<sensor>\d,\d)" r"|\s+type=calexp" r"|\s+visit=(?P<visit>\d+)){4}") host = RunConfiguration.dbHost port = RunConfiguration.dbPort with MySQLdb.connect(host=host, port=port, user=self.dbUser, passwd=DbAuth.password(host, str(port))) as conn: runpat = '%' + runId + '%' conn.execute("SHOW DATABASES LIKE %s", (runpat, )) ret = conn.fetchall() if ret is None or len(ret) == 0: raise NoMatchError("No match for run %s" % (runId, )) elif len(ret) > 1: raise RuntimeError("Multiple runs match:\n" + str([r[0] for r in ret])) dbName = ret[0][0] result = "" try: conn = MySQLdb.connect(host=host, port=port, user=self.dbUser, passwd=DbAuth.password(host, str(port)), db=dbName) cursor = conn.cursor() cursor.execute("""SELECT TIMESTAMP, timereceived FROM Logs WHERE id = (SELECT MIN(id) FROM Logs)""") row = cursor.fetchone() if row is None: if inProgress: return "No log entries yet\n" else: return "*** No log entries written\n" startTime, start = row result += "First orca log entry: %s\n" % (start, ) cursor = conn.cursor() cursor.execute("""SELECT TIMESTAMP, timereceived FROM Logs WHERE id = (SELECT MAX(id) FROM Logs)""") stopTime, stop = cursor.fetchone() result += "Last orca log entry: %s\n" % (stop, ) elapsed = long(stopTime) - long(startTime) elapsedHr = elapsed / 3600 / 1000 / 1000 / 1000 elapsed -= elapsedHr * 3600 * 1000 * 1000 * 1000 elapsedMin = elapsed / 60 / 1000 / 1000 / 1000 elapsed -= elapsedMin * 60 * 1000 * 1000 * 1000 elapsedSec = elapsed / 1.0e9 result += "Orca elapsed time: %d:%02d:%06.3f\n" % ( elapsedHr, elapsedMin, elapsedSec) cursor = conn.cursor() cursor.execute(""" SELECT COUNT(DISTINCT workerid) FROM (SELECT workerid FROM Logs LIMIT 10000) AS sample""") nPipelines = cursor.fetchone()[0] result += "%d pipelines used\n" % (nPipelines, ) cursor = conn.cursor() cursor.execute(""" SELECT CASE gid WHEN 1 THEN 'pipeline shutdowns seen' WHEN 2 THEN 'CCDs attempted' WHEN 3 THEN 'src writes' WHEN 4 THEN 'calexp writes' END AS descr, COUNT(*) FROM ( SELECT CASE WHEN COMMENT LIKE 'Processing job:% visit=0' THEN 1 WHEN COMMENT LIKE 'Processing job:%' AND COMMENT NOT LIKE '% visit=0' THEN 2 WHEN COMMENT LIKE 'Ending write to BoostStorage%/src%' THEN 3 WHEN COMMENT LIKE 'Ending write to FitsStorage%/calexp%' THEN 4 ELSE 0 END AS gid FROM Logs ) AS stats WHERE gid > 0 GROUP BY gid""") nShutdown = 0 for d, n in cursor.fetchall(): result += "%d %s\n" % (n, d) if d == 'pipeline shutdowns seen': nShutdown = n if nShutdown != nPipelines: if not inProgress: if nShutdown == 0: result += "\n*** No pipelines were shut down properly\n" else: result += "\n*** Shutdowns do not match pipelines\n" cursor = conn.cursor() cursor.execute(""" SELECT workerid, COMMENT FROM Logs JOIN (SELECT MAX(id) AS last FROM Logs GROUP BY workerid) AS a ON (Logs.id = a.last)""") for worker, msg in cursor.fetchall(): if inProgress: result += "Pipeline %s last status: %s\n" % (worker, msg) else: result += "Pipeline %s ended with: %s\n" % (worker, msg) cursor = conn.cursor() cursor.execute(""" SELECT COUNT(*) FROM Logs WHERE ( COMMENT LIKE '%rror%' OR COMMENT LIKE '%xception%' OR COMMENT LIKE '%arning%' OR COMMENT LIKE 'Fail' OR COMMENT LIKE 'fail' ) AND COMMENT NOT LIKE '%failureStage%' AND COMMENT NOT LIKE '%failure stage%' AND COMMENT NOT LIKE 'failSerialName%' AND COMMENT NOT LIKE 'failParallelName%' AND COMMENT NOT LIKE 'Distortion fitter failed to improve%' AND COMMENT NOT LIKE '%magnitude error column%' AND COMMENT NOT LIKE '%errorFlagged%' AND COMMENT NOT LIKE 'Skipping process due to error' """) result += "%s failures seen\n" % cursor.fetchone() cursor = conn.cursor(MySQLdb.cursors.DictCursor) cursor.execute(""" SELECT * FROM Logs WHERE COMMENT LIKE 'Processing job:%' OR ( ( COMMENT LIKE '%rror%' OR COMMENT LIKE '%xception%' OR COMMENT LIKE '%arning%' OR COMMENT LIKE '%Fail%' OR COMMENT LIKE '%fail%' ) AND COMMENT NOT LIKE '%failureStage%' AND COMMENT NOT LIKE '%failure stage%' AND COMMENT NOT LIKE 'failSerialName%' AND COMMENT NOT LIKE 'failParallelName%' AND COMMENT NOT LIKE 'Distortion fitter failed to improve%' AND COMMENT NOT LIKE '%magnitude error column%' AND COMMENT NOT LIKE '%errorFlagged%' AND COMMENT NOT LIKE 'Skipping process due to error' ) ORDER BY id;""") jobs = dict() for d in cursor.fetchall(): match = jobStartRegex.search(d['COMMENT']) if match: jobs[d['workerid']] = "Visit %s Raft %s Sensor %s" % ( match.group("visit"), match.group("raft"), match.group("sensor")) elif not d['COMMENT'].startswith('Processing job:'): if jobs.has_key(d['workerid']): job = jobs[d['workerid']] else: job = "unknown" result += "\n*** Error in %s in stage %s on %s:\n" % ( job, d['stagename'], d['workerid']) lines = d['COMMENT'].split('\n') i = len(lines) - 1 message = lines[i].strip() # Skip blank lines at end while i > 0 and message == "": i -= 1 message = lines[i].strip() # Go back until we find a traceback line with " in " while i > 0 and lines[i].find(" in ") == -1: i -= 1 message = lines[i].strip() + "\n" + message result += message + "\n" finally: conn.close() outputDir = os.path.join(self.options.output, runId) logFile = os.path.join(outputDir, "run", "unifiedPipeline.log") with open(logFile, "r") as log: try: log.seek(-500, 2) except: pass tail = log.read(500) if not tail.endswith("logger handled...and...done!\n"): result += "\n*** Unified pipeline log file\n" result += "(last 500 bytes)... " + tail + "\n" for logFile in glob.glob( os.path.join(outputDir, "work", "*", "launch.log")): with open(logFile, "r") as log: try: log.seek(-500, 2) except: pass tail = log.read(500) if not re.search(r"harness.runPipeline: workerid \w+$", tail) \ and not re.search(r"Applying aperture", tail) \ and tail != "done. Now starting job office\n": result += "\n*** " + logFile + "\n" result += "(last 500 bytes)... " + tail + "\n" return result
def run(self, coordList, filter): """Select Decam images suitable for coaddition in a particular region @param[in] filter: filter for images (one of g", "r", "i", "z", Y") @param[in] coordList: list of coordinates defining region of interest @return a pipeBase Struct containing: - exposureInfoList: a list of ExposureInfo objects """ if filter not in set(("g", "r", "i", "z", "Y")): raise RuntimeError("filter=%r is an invalid name" % (filter, )) read_default_file = os.path.expanduser("~/.my.cnf") try: open(read_default_file) kwargs = dict(read_default_file=read_default_file, ) except IOError: kwargs = dict( user=DbAuth.username(self.config.host, str(self.config.port)), passwd=DbAuth.password(self.config.host, str(self.config.port)), ) db = MySQLdb.connect(host=self.config.host, port=self.config.port, db=self.config.database, **kwargs) cursor = db.cursor() columnNames = tuple(ExposureInfo.getColumnNames()) if not columnNames: raise RuntimeError("Bug: no column names") queryStr = "select %s " % (", ".join(columnNames), ) dataTuple = () # tuple(columnNames) if coordList is not None: # look for exposures that overlap the specified region # create table scisql.Region containing patch region coordStrList = [ "%s, %s" % (c.getLongitude().asDegrees(), c.getLatitude().asDegrees()) for c in coordList ] coordStr = ", ".join(coordStrList) coordCmd = "call scisql.scisql_s2CPolyRegion(scisql_s2CPolyToBin(%s), 10)" % ( coordStr, ) cursor.execute(coordCmd) cursor.nextset() # ignore one-line result of coordCmd queryStr += """ from %s as ccdExp, (select distinct id from y1CcdQuality_To_Htm10 as ccdHtm inner join scisql.Region on (ccdHtm.htmId10 between scisql.Region.htmMin and scisql.Region.htmMax) where ccdHtm.filter = %%s) as idList where ccdExp.id = idList.id and """ % (self.config.table, ) dataTuple += (filter, ) else: # no region specified; look over the whole sky queryStr += " from %s as ccdExp where " % (self.config.table, ) # compute where clauses as a list of (clause, data) whereDataList = [ ("filter = %s", filter), ] queryStr += " and ".join(wd[0] for wd in whereDataList) dataTuple += tuple(wd[1] for wd in whereDataList) self.log.info("queryStr=%r; dataTuple=%s" % (queryStr, dataTuple)) cursor.execute(queryStr, dataTuple) exposureInfoList = [ExposureInfo(result) for result in cursor] return pipeBase.Struct(exposureInfoList=exposureInfoList, )
def run(self, coordList, filter, strip=None): """Select SDSS images suitable for coaddition in a particular region @param[in] filter: filter for images (one of "u", "g", "r", "i" or "z") @param[in] coordList: list of coordinates defining region of interest @return a pipeBase Struct containing: - exposureInfoList: a list of ExposureInfo objects @raise RuntimeError if filter not one of "u", "g", "r", "i" or "z" """ if filter not in set(("u", "g", "r", "i", "z")): raise RuntimeError("filter=%r is an invalid name" % (filter, )) read_default_file = os.path.expanduser("~/.my.cnf") try: open(read_default_file) kwargs = dict(read_default_file=read_default_file, ) except IOError: kwargs = dict( user=DbAuth.username(self.config.host, str(self.config.port)), passwd=DbAuth.password(self.config.host, str(self.config.port)), ) db = MySQLdb.connect(host=self.config.host, port=self.config.port, db=self.config.database, **kwargs) cursor = db.cursor() columnNames = tuple(ExposureInfo.getColumnNames()) if not columnNames: raise RuntimeError("Bug: no column names") queryStr = "select %s " % (", ".join(columnNames), ) dataTuple = () # tuple(columnNames) if coordList is not None: # look for exposures that overlap the specified region # create table scisql.Region containing patch region coordStrList = [ "%s, %s" % (c.getLongitude().asDegrees(), c.getLatitude().asDegrees()) for c in coordList ] coordStr = ", ".join(coordStrList) coordCmd = "call scisql.scisql_s2CPolyRegion(scisql_s2CPolyToBin(%s), 10)" % ( coordStr, ) cursor.execute(coordCmd) cursor.nextset() # ignore one-line result of coordCmd queryStr += """ from %s as ccdExp, (select distinct fieldid from SeasonFieldQuality_To_Htm10 as ccdHtm inner join scisql.Region on (ccdHtm.htmId10 between scisql.Region.htmMin and scisql.Region.htmMax) where ccdHtm.filter = %%s) as idList where ccdExp.fieldid = idList.fieldid and """ % (self.config.table, ) dataTuple += (filter, ) else: # no region specified; look over the whole sky queryStr += """ from %s as ccdExp where """ % (self.config.table, ) # compute where clauses as a list of (clause, data) whereDataList = [ ("filter = %s", filter), ] if self.config.camcols is not None: whereDataList.append( _whereDataFromList("camcol", self.config.camcols)) if strip is not None: # None corresponds to query for both strips: no constraint added whereDataList.append(("strip = %s", strip)) queryStr += " and ".join(wd[0] for wd in whereDataList) dataTuple += tuple(wd[1] for wd in whereDataList) self.log.info("queryStr=%r; dataTuple=%s" % (queryStr, dataTuple)) cursor.execute(queryStr, dataTuple) exposureInfoList = [ExposureInfo(result) for result in cursor] runExpInfoSetDict = dict() for expInfo in exposureInfoList: run = expInfo.dataId["run"] expInfoSet = runExpInfoSetDict.get(run) if expInfoSet: expInfoSet.add(expInfo) else: runExpInfoSetDict[run] = set([expInfo]) self.log.info("Before quality cuts found %d exposures in %d runs" % (len(exposureInfoList), len(runExpInfoSetDict))) goodRunSet = set() goodExposureInfoList = [] if self.config.rejectWholeRuns: # reject runs for which any exposure does not meet our quality criteria # or the run begins or ends in the region regionRaRange = None regionCtrRa = None if coordList is not None: regionRaRange = _computeRaRange(coordList) regionCtrRa = (regionRaRange[0] + regionRaRange[1]) * 0.5 numRangeCuts = 0 for run, expInfoSet in runExpInfoSetDict.items(): runRaRange = None for expInfo in expInfoSet: if self._isBadExposure(expInfo): break if regionRaRange is not None: expRaRange = _computeRaRange(expInfo.coordList, ctrRa=regionCtrRa) if runRaRange is None: runRaRange = expRaRange else: runRaRange = (min(runRaRange[0], expRaRange[0]), max(runRaRange[1], expRaRange[1])) else: # all exposures in this run are valid; # if approriate, check that the run starts and ends outside the region if regionRaRange is not None: if (runRaRange[0] > regionRaRange[0]) or ( runRaRange[1] < regionRaRange[1]): numRangeCuts += 1 continue goodExposureInfoList += list(expInfoSet) goodRunSet.add(run) self.log.info( "Rejected %d whole runs, including %d for incomplete range" % (len(runExpInfoSetDict) - len(goodRunSet), numRangeCuts)) else: # reject individual exposures which do not meet our quality criteria for expInfo in exposureInfoList: if not self._isBadExposure(expInfo): goodExposureInfoList.append(expInfo) goodRunSet.add(expInfo.dataId["run"]) self.log.info( "Rejected %d individual exposures" % (len(exposureInfoList) - len(goodExposureInfoList), )) exposureInfoList = goodExposureInfoList self.log.info("After quality cuts, found %d exposures in %d runs" % (len(exposureInfoList), len(goodRunSet))) if exposureInfoList: # compute qscore according to RHL's formula and sort by it qArr = np.array([expInfo.q for expInfo in exposureInfoList]) qMax = np.percentile(qArr, 95.0) for expInfo in exposureInfoList: expInfo.qscore = (expInfo.q / qMax) - expInfo.quality exposureInfoList.sort(key=lambda expInfo: expInfo.qscore) if self.config.maxExposures is not None: # select config.maxExposures exposures with the highest qscore exposureInfoList = exposureInfoList[0:self.config.maxExposures] self.log.info("After maxExposures cut, found %d exposures" % (len(exposureInfoList), )) elif self.config.maxRuns is not None: # select config.maxRuns runs with the highest median qscore # (of those exposures that overlap the patch) runQualListDict = dict() for expInfo in exposureInfoList: run = expInfo.dataId["run"] qualList = runQualListDict.get(run) if qualList: qualList.append(expInfo.qscore) else: runQualListDict[run] = [expInfo.qscore] if len(runQualListDict) > self.config.maxRuns: qualRunList = [] for run, qualList in runQualListDict.items(): runQscore = np.median(qualList) qualRunList.append((runQscore, run)) qualRunList.sort() qualRunList = qualRunList[0:self.config.maxRuns] goodRunSet = set(qr[1] for qr in qualRunList) exposureInfoList = [ ei for ei in exposureInfoList if ei.dataId["run"] in goodRunSet ] return pipeBase.Struct(exposureInfoList=exposureInfoList, )
def __init__(self, outDir=None, database=None, driver='sqlite', host=None, port=None, verbose=False): """ Instantiate the results database, creating metrics, plots and summarystats tables. """ # Connect to database # for sqlite, connecting to non-existent database creates it automatically if database is None: # Using default value for database name, should specify directory. if outDir is None: outDir = '.' # Check for output directory, make if needed. if not os.path.isdir(outDir): try: os.makedirs(outDir) except OSError as msg: raise OSError( msg, '\n (If this was the database file (not outDir), ' 'remember to use kwarg "database")') self.database = os.path.join(outDir, 'resultsDb_sqlite.db') self.driver = 'sqlite' else: if driver == 'sqlite': # Using non-default database, but may also specify directory root. if outDir is not None: database = os.path.join(outDir, database) self.database = database self.driver = driver else: # If not sqlite, then 'outDir' doesn't make much sense. self.database = database self.driver = driver self.host = host self.port = port if self.driver == 'sqlite': dbAddress = url.URL(self.driver, database=self.database) else: dbAddress = url.URL( self.driver, username=DbAuth.username(self.host, str(self.port)), password=DbAuth.password(self.host, str(self.port)), host=self.host, port=self.port, database=self.database) engine = create_engine(dbAddress, echo=verbose) self.Session = sessionmaker(bind=engine) self.session = self.Session() # Create the tables, if they don't already exist. try: Base.metadata.create_all(engine) except DatabaseError: raise ValueError( "Cannot create a %s database at %s. Check directory exists." % (self.driver, self.database)) self.slen = 1024
def run(self, coordList, filter): """Select Decam images suitable for coaddition in a particular region @param[in] filter: filter for images (one of g", "r", "i", "z", Y") @param[in] coordList: list of coordinates defining region of interest @return a pipeBase Struct containing: - exposureInfoList: a list of ExposureInfo objects """ if filter not in set(("g", "r", "i", "z", "Y")): raise RuntimeError("filter=%r is an invalid name" % (filter,)) read_default_file = os.path.expanduser("~/.my.cnf") try: open(read_default_file) kwargs = dict(read_default_file=read_default_file) except IOError: kwargs = dict( user=DbAuth.username(self.config.host, str(self.config.port)), passwd=DbAuth.password(self.config.host, str(self.config.port)), ) db = MySQLdb.connect(host=self.config.host, port=self.config.port, db=self.config.database, **kwargs) cursor = db.cursor() columnNames = tuple(ExposureInfo.getColumnNames()) if not columnNames: raise RuntimeError("Bug: no column names") queryStr = "select %s " % (", ".join(columnNames),) dataTuple = () # tuple(columnNames) if coordList is not None: # look for exposures that overlap the specified region # create table scisql.Region containing patch region coordStrList = ["%s, %s" % (c.getLongitude().asDegrees(), c.getLatitude().asDegrees()) for c in coordList] coordStr = ", ".join(coordStrList) coordCmd = "call scisql.scisql_s2CPolyRegion(scisql_s2CPolyToBin(%s), 10)" % (coordStr,) cursor.execute(coordCmd) cursor.nextset() # ignore one-line result of coordCmd queryStr += """ from %s as ccdExp, (select distinct id from y1CcdQuality_To_Htm10 as ccdHtm inner join scisql.Region on (ccdHtm.htmId10 between scisql.Region.htmMin and scisql.Region.htmMax) where ccdHtm.filter = %%s) as idList where ccdExp.id = idList.id and """ % ( self.config.table, ) dataTuple += (filter,) else: # no region specified; look over the whole sky queryStr += " from %s as ccdExp where " % (self.config.table,) # compute where clauses as a list of (clause, data) whereDataList = [("filter = %s", filter)] queryStr += " and ".join(wd[0] for wd in whereDataList) dataTuple += tuple(wd[1] for wd in whereDataList) self.log.info("queryStr=%r; dataTuple=%s" % (queryStr, dataTuple)) cursor.execute(queryStr, dataTuple) exposureInfoList = [ExposureInfo(result) for result in cursor] return pipeBase.Struct(exposureInfoList=exposureInfoList)
def run(self, dataId, coordList): """Select flugMag0's of SDSS images for a particular run @param[in] dataId: a dataId containing at least a run and filter @param[in] coordList: list of coordinates defining region of interest @return a pipeBase Struct containing: - fluxMagInfoList: a list of FluxMagInfo objects """ argDict = self.runArgDictFromDataId(dataId) run = argDict["run"] filter = argDict["filter"] if filter not in set(("u", "g", "r", "i", "z")): raise RuntimeError("filter=%r is an invalid name" % (filter,)) filterDict = {"u": 0, "g": 1, "r": 2, "i": 3, "z": 4} if self._display: self.log.info(self.config.database) db = MySQLdb.connect( host=self.config.host, port=self.config.port, db=self.config.database, user=DbAuth.username(self.config.host, str(self.config.port)), passwd=DbAuth.password(self.config.host, str(self.config.port)), ) cursor = db.cursor() columnNames = tuple(FluxMagInfo.getColumnNames()) queryStr = "select %s from Science_Ccd_Exposure where " % (", ".join(columnNames)) dataTuple = () if coordList is not None: # look for exposures that overlap the specified region for c in coordList: dataTuple += (c.getLongitude().asDegrees(), c.getLatitude().asDegrees()) queryStr += " scisql_s2PtInCPoly(ra, decl" queryStr += ", %s, %s" * len(coordList) queryStr += ") = 1 and " # compute where clauses as a list of (clause, data) whereDataList = [ ("filterId = %s", filterDict[filter]), ("run = %s", run), ] queryStr += " and ".join(wd[0] for wd in whereDataList) dataTuple += tuple(wd[1] for wd in whereDataList) queryStr += " order by field desc" if self._display: self.log.info("queryStr=%r; dataTuple=%s" % (queryStr, dataTuple)) cursor.execute(queryStr, dataTuple) exposureInfoList = [FluxMagInfo(result) for result in cursor] if self._display: self.log.info("Found %d exposures" % (len(exposureInfoList))) return pipeBase.Struct( fluxMagInfoList=exposureInfoList, )
parser.add_argument("-v", "--verbose", action="store_true", dest="verbose", help="verbose") args = parser.parse_args() host = args.host port = args.port database = args.database # # get database authorization info # dbAuth = DbAuth() user = dbAuth.username(host, str(port)) password = dbAuth.password(host, str(port)) # connect to the database dbm = DatabaseManager(host, port, user, password) # create the database if it doesn't exist if not dbm.dbExists(database): dbm.createDb(database) # create the LogIngestor, which creates all the tables, and will # be used to consolidate file information logIngestor = LogIngestor(dbm, database) # go through the list of files and ingest them, ignoring any # that don't exist. for filename in args.filenames:
def run(self, coordList, filter, strip=None): """Select SDSS images suitable for coaddition in a particular region @param[in] filter: filter for images (one of "u", "g", "r", "i" or "z") @param[in] coordList: list of coordinates defining region of interest @return a pipeBase Struct containing: - exposureInfoList: a list of ExposureInfo objects @raise RuntimeError if filter not one of "u", "g", "r", "i" or "z" """ if filter not in set(("u", "g", "r", "i", "z")): raise RuntimeError("filter=%r is an invalid name" % (filter,)) read_default_file=os.path.expanduser("~/.my.cnf") try: open(read_default_file) kwargs = dict( read_default_file=read_default_file, ) except IOError: kwargs = dict( user = DbAuth.username(self.config.host, str(self.config.port)), passwd = DbAuth.password(self.config.host, str(self.config.port)), ) db = MySQLdb.connect( host=self.config.host, port=self.config.port, db=self.config.database, **kwargs ) cursor = db.cursor() columnNames = tuple(ExposureInfo.getColumnNames()) if not columnNames: raise RuntimeError("Bug: no column names") queryStr = "select %s " % (", ".join(columnNames),) dataTuple = () # tuple(columnNames) if coordList is not None: # look for exposures that overlap the specified region # create table scisql.Region containing patch region coordStrList = ["%s, %s" % (c.getLongitude().asDegrees(), c.getLatitude().asDegrees()) for c in coordList] coordStr = ", ".join(coordStrList) coordCmd = "call scisql.scisql_s2CPolyRegion(scisql_s2CPolyToBin(%s), 10)" % (coordStr,) cursor.execute(coordCmd) cursor.nextset() # ignore one-line result of coordCmd queryStr += """ from %s as ccdExp, (select distinct fieldid from SeasonFieldQuality_To_Htm10 as ccdHtm inner join scisql.Region on (ccdHtm.htmId10 between scisql.Region.htmMin and scisql.Region.htmMax) where ccdHtm.filter = %%s) as idList where ccdExp.fieldid = idList.fieldid and """ % (self.config.table,) dataTuple += (filter,) else: # no region specified; look over the whole sky queryStr += """ from %s as ccdExp where """ % (self.config.table,) # compute where clauses as a list of (clause, data) whereDataList = [ ("filter = %s", filter), ] if self.config.camcols is not None: whereDataList.append(_whereDataFromList("camcol", self.config.camcols)) if strip is not None: # None corresponds to query for both strips: no constraint added whereDataList.append(("strip = %s", strip)) queryStr += " and ".join(wd[0] for wd in whereDataList) dataTuple += tuple(wd[1] for wd in whereDataList) self.log.info("queryStr=%r; dataTuple=%s" % (queryStr, dataTuple)) cursor.execute(queryStr, dataTuple) exposureInfoList = [ExposureInfo(result) for result in cursor] runExpInfoSetDict = dict() for expInfo in exposureInfoList: run = expInfo.dataId["run"] expInfoSet = runExpInfoSetDict.get(run) if expInfoSet: expInfoSet.add(expInfo) else: runExpInfoSetDict[run] = set([expInfo]) self.log.info("Before quality cuts found %d exposures in %d runs" % \ (len(exposureInfoList), len(runExpInfoSetDict))) goodRunSet = set() goodExposureInfoList = [] if self.config.rejectWholeRuns: # reject runs for which any exposure does not meet our quality criteria # or the run begins or ends in the region regionRaRange = None regionCtrRa = None if coordList is not None: regionRaRange = _computeRaRange(coordList) regionCtrRa = (regionRaRange[0] + regionRaRange[1]) * 0.5 numRangeCuts = 0 for run, expInfoSet in runExpInfoSetDict.iteritems(): runRaRange = None for expInfo in expInfoSet: if self._isBadExposure(expInfo): break if regionRaRange is not None: expRaRange = _computeRaRange(expInfo.coordList, ctrRa=regionCtrRa) if runRaRange is None: runRaRange = expRaRange else: runRaRange = (min(runRaRange[0], expRaRange[0]), max(runRaRange[1], expRaRange[1])) else: # all exposures in this run are valid; # if approriate, check that the run starts and ends outside the region if regionRaRange is not None: if (runRaRange[0] > regionRaRange[0]) or (runRaRange[1] < regionRaRange[1]): numRangeCuts += 1 continue goodExposureInfoList += list(expInfoSet) goodRunSet.add(run) self.log.info("Rejected %d whole runs, including %d for incomplete range" % \ (len(runExpInfoSetDict) - len(goodRunSet), numRangeCuts)) else: # reject individual exposures which do not meet our quality criteria for expInfo in exposureInfoList: if not self._isBadExposure(expInfo): goodExposureInfoList.append(expInfo) goodRunSet.add(expInfo.dataId["run"]) self.log.info("Rejected %d individual exposures" % \ (len(exposureInfoList) - len(goodExposureInfoList),)) exposureInfoList = goodExposureInfoList self.log.info("After quality cuts, found %d exposures in %d runs" % \ (len(exposureInfoList), len(goodRunSet))) if exposureInfoList: # compute qscore according to RHL's formula and sort by it qArr = numpy.array([expInfo.q for expInfo in exposureInfoList]) qMax = numpy.percentile(qArr, 95.0) for expInfo in exposureInfoList: expInfo.qscore = (expInfo.q / qMax) - expInfo.quality exposureInfoList.sort(key=lambda expInfo: expInfo.qscore) if self.config.maxExposures is not None: # select config.maxExposures exposures with the highest qscore exposureInfoList = exposureInfoList[0:self.config.maxExposures] self.log.info("After maxExposures cut, found %d exposures" % \ (len(exposureInfoList),)) elif self.config.maxRuns is not None: # select config.maxRuns runs with the highest median qscore # (of those exposures that overlap the patch) runQualListDict = dict() for expInfo in exposureInfoList: run = expInfo.dataId["run"] qualList = runQualListDict.get(run) if qualList: qualList.append(expInfo.qscore) else: runQualListDict[run] = [expInfo.qscore] if len(runQualListDict) > self.config.maxRuns: qualRunList = [] for run, qualList in runQualListDict.iteritems(): runQscore = numpy.median(qualList) qualRunList.append((runQscore, run)) qualRunList.sort() qualRunList = qualRunList[0:self.config.maxRuns] goodRunSet = set(qr[1] for qr in qualRunList) exposureInfoList = [ei for ei in exposureInfoList if ei.dataId["run"] in goodRunSet] return pipeBase.Struct( exposureInfoList = exposureInfoList, )