def main(): # Setup command line options parser = argparse.ArgumentParser(description="Program which transposes a key-value table into a table " "where each key is mapped to a column.") addDbOptions(parser) parser.add_argument( "-s", "--skip-keys", dest="skipKeys", help="Comma separated list of metadata keys to omit in the output table") parser.add_argument( "-c", "--compress", dest="compress", action="store_true", help="Lift keys with constant values into a view") parser.add_argument( "database", help="Name of database containing metadata table to transpose") parser.add_argument( "metadataTable", help="Name of metadata table to transpose") parser.add_argument( "idCol", help="Primary key column name for metadata table") parser.add_argument( "outputTable", help="Name of output table to create") ns = parser.parse_args() db, metadataTable, idCol, outputTable = args if DbAuth.available(ns.host, str(ns.port)): ns.user = DbAuth.username(ns.host, str(ns.port)) passwd = DbAuth.password(ns.host, str(ns.port)) elif os.path.exists(os.path.join(os.environ["HOME"], ".mysql.cnf")): passwd = None else: passwd = getpass.getpass("%s's MySQL password: "******",")]) run(ns.host, ns.port, ns.user, passwd, db, metadataTable, idCol, outputTable, skipCols, ns.compress)
def __init__(self, broker, host, port, runid, database): super(Logger, self).__init__(broker, runid) # set the highwater mark for the number of messages retrieved before attempting to drain them. self.highwatermark = 10000 self.database = database # # get database authorization info # home = os.getenv("HOME") pol = Policy(home+"/.lsst/db-auth.paf") dbAuth = DbAuth() dbAuth.setPolicy(pol) user = dbAuth.username(host,port) password = dbAuth.password(host,port) # # create the logger for the database and connect to it # self.dbLogger = DatabaseLogger(host, int(port)) self.dbLogger.connect(user, password, self.database)
def _connect_to_engine(self): #DbAuth will not look up hosts that are None, '' or 0 if self._host: try: authDict = {'username': DbAuth.username(self._host, str(self._port)), 'password': DbAuth.password(self._host, str(self._port))} except: if self._driver == 'mssql+pymssql': print("\nFor more information on database authentication using the db-auth.paf" " policy file see: " "https://confluence.lsstcorp.org/display/SIM/Accessing+the+UW+CATSIM+Database\n") raise dbUrl = url.URL(self._driver, host=self._host, port=self._port, database=self._database, **authDict) else: dbUrl = url.URL(self._driver, database=self._database) self._engine = create_engine(dbUrl, echo=self._verbose) if self._engine.dialect.name == 'sqlite': event.listen(self._engine, 'checkout', declareTrigFunctions) self._session = scoped_session(sessionmaker(autoflush=True, bind=self._engine)) self._metadata = MetaData(bind=self._engine)
def run(): basename = os.path.basename(sys.argv[0]) parser = argparse.ArgumentParser(prog=basename, description='''A statistics reporting utility. Use to print out information about what happened during a run. Takes as an argument previously ingested run information one of the ingest utilities in a named database.''', epilog='''example: report.py -H kaboom.ncsa.illinois.edu -p 3303 -d srp_2013_0601_140432 -S''') parser.add_argument("-H", "--host", action="store", default=None, dest="host", help="mysql server host", type=str, required=True) parser.add_argument("-p", "--port", action="store", default=3306, dest="port", help="mysql server port", type=int) parser.add_argument("-d", "--database", action="store", default=None, dest="database", help="database name", type=str, required=True) parser.add_argument("-I", "--submits-per-interval", action="store_true", default=None, dest="submits", help="number of submits to the condor queue per interval") parser.add_argument("-C", "--cores-used-each-second", action="store_true", default=None, dest="cores", help="cores used each second") parser.add_argument("-N", "--cores-used-each-interval", type=int, default=-1, dest="interval", help="cores used each interval") parser.add_argument("-S", "--summary", action="store_true", default=None, dest="summary", help="summary of run") parser.add_argument("-v", "--verbose", action="store_true", dest="verbose", help="verbose") args = parser.parse_args() host = args.host port = args.port database = args.database # # get database authorization info # dbAuth = DbAuth() user = dbAuth.username(host, str(port)) password = dbAuth.password(host,str(port)) # connect to the database dbm = DatabaseManager(host, port, user, password) dbm.execCommand0('use '+database) # command line arguments values = None submitTimes = SubmissionTimes(dbm) entries = submitTimes.getEntries() if args.submits == True: submitsPerInterval = SubmitsPerInterval(dbm, 1) values = submitsPerInterval.getValues() writeDateValues(values) elif args.cores == True: coresPerSecond = CoresPerSecond(dbm, entries) values = coresPerSecond.getValues() writeDateValues(values) elif args.interval > -1: coresPerInterval = CoresPerInterval(dbm,entries, args.interval) values = coresPerInterval.getValues() writeDateValues(values) elif args.summary == True: printSummary(dbm, entries)
def setUp(self): """Initialize the DB connection. Raise SkipTest if unable to access DB.""" config = SelectLsstImagesTask.ConfigClass() try: DbAuth.username(config.host, str(config.port)), except RuntimeError as e: reason = "Warning: did not find host=%s, port=%s in your db-auth file; or %s " \ "skipping unit tests" % \ (config.host, str(config.port), e) raise unittest.SkipTest(reason)
def setUp(self): """Initialize the DB connection. Raise SkipTest if unable to access DB.""" config = SpatialScaleZeroPointTask.ConfigClass() config.selectFluxMag0.retarget(SelectLsstSimFluxMag0Task) try: DbAuth.username(config.selectFluxMag0.host, str(config.selectFluxMag0.port)), except RuntimeError as e: reason = "Warning: did not find host=%s, port=%s in your db-auth file; or %s " \ "skipping unit tests" % \ (config.selectFluxMag0.host, str(config.selectFluxMag0.port), e) raise unittest.SkipTest(reason)
def __init__(self, outDir= None, database=None, driver='sqlite', host=None, port=None, verbose=False): """ Instantiate the results database, creating metrics, plots and summarystats tables. """ # Connect to database # for sqlite, connecting to non-existent database creates it automatically if database is None: # Using default value for database name, should specify directory. if outDir is None: outDir = '.' # Check for output directory, make if needed. if not os.path.isdir(outDir): try: os.makedirs(outDir) except OSError as msg: raise OSError(msg, '\n (If this was the database file (not outDir), ' 'remember to use kwarg "database")') self.database = os.path.join(outDir, 'resultsDb_sqlite.db') self.driver = 'sqlite' else: if driver == 'sqlite': # Using non-default database, but may also specify directory root. if outDir is not None: database = os.path.join(outDir, database) self.database = database self.driver = driver else: # If not sqlite, then 'outDir' doesn't make much sense. self.database = database self.driver = driver self.host = host self.port = port if self.driver == 'sqlite': dbAddress = url.URL(self.driver, database=self.database) else: dbAddress = url.URL(self.driver, username=DbAuth.username(self.host, str(self.port)), password=DbAuth.password(self.host, str(self.port)), host=self.host, port=self.port, database=self.database) engine = create_engine(dbAddress, echo=verbose) self.Session = sessionmaker(bind=engine) self.session = self.Session() # Create the tables, if they don't already exist. try: Base.metadata.create_all(engine) except DatabaseError: raise ValueError("Cannot create a %s database at %s. Check directory exists." %(self.driver, self.database)) self.slen = 1024
def connect(host, port, db, user=None): kwargs = dict(host=host, port=port, db=db) if user is not None: kwargs["user"] = user try: # See if we can connect without a password (e.g. via my.cnf) return MySQLdb.connect(**kwargs) except: # Fallback to DbAuth kwargs["user"] = DbAuth.username(host, str(port)) kwargs["passwd"] = DbAuth.password(host, str(port)) return MySQLdb.connect(**kwargs)
def run(self, dataId): """Select flugMag0's of LsstSim images for a particular visit @param[in] visit: visit id @return a pipeBase Struct containing: - fluxMagInfoList: a list of FluxMagInfo objects """ try: runArgDict = self.runArgDictFromDataId(dataId) visit = runArgDict["visit"] except Exception: self.log.fatal( "dataId does not contain mandatory visit key: dataId: %s", dataId) if self._display: self.log.info(self.config.database) db = MySQLdb.connect( host=self.config.host, port=self.config.port, db=self.config.database, user=DbAuth.username(self.config.host, str(self.config.port)), passwd=DbAuth.password(self.config.host, str(self.config.port)), ) cursor = db.cursor() columnNames = tuple(FluxMagInfo.getColumnNames()) queryStr = "select %s from Science_Ccd_Exposure where " % ( ", ".join(columnNames)) dataTuple = () # compute where clauses as a list of (clause, data) whereDataList = [ ("visit = %s", visit), ] queryStr += " and ".join(wd[0] for wd in whereDataList) dataTuple += tuple(wd[1] for wd in whereDataList) if self._display: self.log.info("queryStr=%r; dataTuple=%s", queryStr, dataTuple) cursor.execute(queryStr, dataTuple) result = cursor.fetchall() fluxMagInfoList = [FluxMagInfo(r) for r in result] if self._display: self.log.info("Found %d exposures", len(fluxMagInfoList)) return pipeBase.Struct(fluxMagInfoList=fluxMagInfoList, )
def run(self, dataId): """Select flugMag0's of LsstSim images for a particular visit @param[in] visit: visit id @return a pipeBase Struct containing: - fluxMagInfoList: a list of FluxMagInfo objects """ try: runArgDict = self.runArgDictFromDataId(dataId) visit = runArgDict["visit"] except Exception: self.log.fatal("dataId does not contain mandatory visit key: dataId: %s"%dataId) if self._display: self.log.info(self.config.database) db = MySQLdb.connect( host = self.config.host, port = self.config.port, db = self.config.database, user = DbAuth.username(self.config.host, str(self.config.port)), passwd = DbAuth.password(self.config.host, str(self.config.port)), ) cursor = db.cursor() columnNames = tuple(FluxMagInfo.getColumnNames()) queryStr = "select %s from Science_Ccd_Exposure where "%(", ".join(columnNames)) dataTuple = () # compute where clauses as a list of (clause, data) whereDataList = [ ("visit = %s", visit), ] queryStr += " and ".join(wd[0] for wd in whereDataList) dataTuple += tuple(wd[1] for wd in whereDataList) if self._display: self.log.info("queryStr=%r; dataTuple=%s" % (queryStr, dataTuple)) cursor.execute(queryStr, dataTuple) result = cursor.fetchall() fluxMagInfoList = [FluxMagInfo(r) for r in result] if self._display: self.log.info("Found %d exposures" % \ (len(fluxMagInfoList))) return pipeBase.Struct( fluxMagInfoList = fluxMagInfoList, )
def connect(host, port, db, user=None): """Connect to the specified MySQL database server.""" kwargs = dict(host=host, port=port, db=db) if user is not None: kwargs["user"] = user try: # See if we can connect without a password (e.g. via my.cnf) return MySQLdb.connect(**kwargs) except: # Fallback to DbAuth kwargs["user"] = DbAuth.username(host, str(port)) kwargs["passwd"] = DbAuth.password(host, str(port)) return MySQLdb.connect(**kwargs)
def run(shouldExit=False): """Run the tests""" config = SpatialScaleZeroPointTask.ConfigClass() config.selectFluxMag0.retarget(SelectLsstSimFluxMag0Task) print config try: DbAuth.username(config.selectFluxMag0.host, str(config.selectFluxMag0.port)), except Exception, e: print "Warning: did not find host=%s, port=%s in your db-auth file; or %s " \ "skipping unit tests" % \ (config.selectFluxMag0.host, str(config.selectFluxMag0.port), e) return
def run(): basename = os.path.basename(sys.argv[0]) parser = argparse.ArgumentParser(prog=basename, description='''Takes a list of log files and ingests them into a database''', epilog='''example: condorLogIngest.py -H lsst10 -d testing -f worker.log''' ) parser.add_argument("-H", "--host", action="store", default=None, dest="host", help="mysql server host", type=str, required=True) parser.add_argument("-p", "--port", action="store", default=3306, dest="port", help="mysql server port", type=int) parser.add_argument("-d", "--database", action="store", default=None, dest="database", help="database name", type=str, required=True) parser.add_argument("-f", "--file", action="store", default=None, dest="filenames", help="condor log files", nargs='+', type=str, required=True) parser.add_argument("-v", "--verbose", action="store_true", dest="verbose", help="verbose") args = parser.parse_args() host = args.host port = args.port database = args.database # # get database authorization info # dbAuth = DbAuth() user = dbAuth.username(host, str(port)) password = dbAuth.password(host,str(port)) # connect to the database dbm = DatabaseManager(host, port, user, password) # create the database if it doesn't exist if not dbm.dbExists(database): dbm.createDb(database) # create the LogIngestor, which creates all the tables, and will # be used to consolidate file information logIngestor = LogIngestor(dbm, database) # go through the list of files and ingest them, ignoring any # that don't exist. for filename in args.filenames: if not os.path.exists(filename): if args.verbose: print "warning: %s does not exist." % filename continue logIngestor.ingest(filename)
def __init__(self, database=None, driver='sqlite', host=None, port=None, trackingDbverbose=False): """ Instantiate the results database, creating metrics, plots and summarystats tables. """ self.verbose = trackingDbverbose # Connect to database # for sqlite, connecting to non-existent database creates it automatically if database is None: # Default is a file in the current directory. self.database = os.path.join(os.getcwd(), 'trackingDb_sqlite.db') self.driver = 'sqlite' else: self.database = database self.driver = driver self.host = host self.port = port if self.driver == 'sqlite': dbAddress = url.URL(drivername=self.driver, database=self.database) else: dbAddress = url.URL( self.driver, username=DbAuth.username(self.host, str(self.port)), password=DbAuth.password(self.host, str(self.port)), host=self.host, port=self.port, database=self.database) engine = create_engine(dbAddress, echo=self.verbose) if self.verbose: print('Created or connected to MAF tracking %s database at %s' % (self.driver, self.database)) self.Session = sessionmaker(bind=engine) self.session = self.Session() # Create the tables, if they don't already exist. try: Base.metadata.create_all(engine) except DatabaseError: raise DatabaseError( "Cannot create a %s database at %s. Check directory exists." % (self.driver, self.database))
def run(shouldExit=False): config = SelectSdssImagesTask.ConfigClass() try: user = DbAuth.username(config.host, str(config.port)), except Exception: print "Warning: did not find host=%s, port=%s in your db-auth file; skipping SelectSdssImagesTask unit tests" % \ (config.host, str(config.port)) return utilsTests.run(suite(), shouldExit)
def main(): # Setup command line options parser = argparse.ArgumentParser( description="Program which transposes a key-value table into a table " "where each key is mapped to a column.") addDbOptions(parser) parser.add_argument( "-s", "--skip-keys", dest="skipKeys", help="Comma separated list of metadata keys to omit in the output table" ) parser.add_argument("-c", "--compress", dest="compress", action="store_true", help="Lift keys with constant values into a view") parser.add_argument( "database", help="Name of database containing metadata table to transpose") parser.add_argument("metadataTable", help="Name of metadata table to transpose") parser.add_argument("idCol", help="Primary key column name for metadata table") parser.add_argument("outputTable", help="Name of output table to create") ns = parser.parse_args() db, metadataTable, idCol, outputTable = args if DbAuth.available(ns.host, str(ns.port)): ns.user = DbAuth.username(ns.host, str(ns.port)) passwd = DbAuth.password(ns.host, str(ns.port)) elif os.path.exists(os.path.join(os.environ["HOME"], ".mysql.cnf")): passwd = None else: passwd = getpass.getpass("%s's MySQL password: "******",")]) run(ns.host, ns.port, ns.user, passwd, db, metadataTable, idCol, outputTable, skipCols, ns.compress)
def __init__(self, host, database, user, port=3306, password=None): self.host = host self.port = port self.user = user self.database = database if password is None: if self.host is not None and self.port is not None and \ DbAuth.available(self.host, str(self.port)): self.user = DbAuth.username(self.host, str(self.port)) password = DbAuth.password(self.host, str(self.port)) elif not os.path.exists(os.path.join(os.environ['HOME'], ".my.cnf")): password = getpass.getpass("%s's MySQL password: " % user) self.password = password self.mysqlCmd = ['mysql'] if host is not None: self.mysqlCmd += ['-h', self.host] if port is not None: self.mysqlCmd += ['-P', str(self.port)] if user is not None: self.mysqlCmd += ['-u', self.user] if password is not None: self.mysqlCmd += ['-p' + self.password]
def __init__(self, database=None, driver='sqlite', host=None, port=None, trackingDbverbose=False): """ Instantiate the results database, creating metrics, plots and summarystats tables. """ self.verbose = trackingDbverbose # Connect to database # for sqlite, connecting to non-existent database creates it automatically if database is None: # Default is a file in the current directory. self.database = os.path.join(os.getcwd(), 'trackingDb_sqlite.db') self.driver = 'sqlite' else: self.database = database self.driver = driver self.host = host self.port = port if self.driver == 'sqlite': dbAddress = url.URL(drivername=self.driver, database=self.database) else: dbAddress = url.URL(self.driver, username=DbAuth.username(self.host, str(self.port)), password=DbAuth.password(self.host, str(self.port)), host=self.host, port=self.port, database=self.database) engine = create_engine(dbAddress, echo=self.verbose) if self.verbose: print 'Created or connected to MAF tracking %s database at %s' %(self.driver, self.database) self.Session = sessionmaker(bind=engine) self.session = self.Session() # Create the tables, if they don't already exist. try: Base.metadata.create_all(engine) except DatabaseError: raise DatabaseError("Cannot create a %s database at %s. Check directory exists." %(self.driver, self.database))
def dbConnection(self): """ The pymssql connection to the catsim database used to query refrence objects """ if self._dbConnection is None: config = bcm.BaseCatalogConfig() config.load(os.path.join(getPackageDir("sims_catUtils"), "config", "db.py")) username = DbAuth.username(config.host, config.port) password = DbAuth.password(config.host, config.port) hostname = config.host if self.dbHostName is not None: hostname = self.dbHostName DBConnection = pymssql.connect(user=username, password=password, host=hostname, database=config.database, port=config.port) return DBConnection else: return self._dbConnection
def setUp(self): pol = Policy(os.path.join(ROOT, "testDbAuth.paf")) DbAuth.setPolicy(pol)
def run(self, coordList, filter): """Select Decam images suitable for coaddition in a particular region @param[in] filter: filter for images (one of g", "r", "i", "z", Y") @param[in] coordList: list of coordinates defining region of interest @return a pipeBase Struct containing: - exposureInfoList: a list of ExposureInfo objects """ if filter not in set(("g", "r", "i", "z", "Y")): raise RuntimeError("filter=%r is an invalid name" % (filter, )) read_default_file = os.path.expanduser("~/.my.cnf") try: open(read_default_file) kwargs = dict(read_default_file=read_default_file, ) except IOError: kwargs = dict( user=DbAuth.username(self.config.host, str(self.config.port)), passwd=DbAuth.password(self.config.host, str(self.config.port)), ) db = MySQLdb.connect(host=self.config.host, port=self.config.port, db=self.config.database, **kwargs) cursor = db.cursor() columnNames = tuple(ExposureInfo.getColumnNames()) if not columnNames: raise RuntimeError("Bug: no column names") queryStr = "select %s " % (", ".join(columnNames), ) dataTuple = () # tuple(columnNames) if coordList is not None: # look for exposures that overlap the specified region # create table scisql.Region containing patch region coordStrList = [ "%s, %s" % (c.getLongitude().asDegrees(), c.getLatitude().asDegrees()) for c in coordList ] coordStr = ", ".join(coordStrList) coordCmd = "call scisql.scisql_s2CPolyRegion(scisql_s2CPolyToBin(%s), 10)" % ( coordStr, ) cursor.execute(coordCmd) cursor.nextset() # ignore one-line result of coordCmd queryStr += """ from %s as ccdExp, (select distinct id from y1CcdQuality_To_Htm10 as ccdHtm inner join scisql.Region on (ccdHtm.htmId10 between scisql.Region.htmMin and scisql.Region.htmMax) where ccdHtm.filter = %%s) as idList where ccdExp.id = idList.id and """ % (self.config.table, ) dataTuple += (filter, ) else: # no region specified; look over the whole sky queryStr += " from %s as ccdExp where " % (self.config.table, ) # compute where clauses as a list of (clause, data) whereDataList = [ ("filter = %s", filter), ] queryStr += " and ".join(wd[0] for wd in whereDataList) dataTuple += tuple(wd[1] for wd in whereDataList) self.log.info("queryStr=%r; dataTuple=%s" % (queryStr, dataTuple)) cursor.execute(queryStr, dataTuple) exposureInfoList = [ExposureInfo(result) for result in cursor] return pipeBase.Struct(exposureInfoList=exposureInfoList, )
def testSetPolicy(self): self.assert_(DbAuth.available("lsst10.ncsa.uiuc.edu", "3306")) self.assertEqual(DbAuth.authString("lsst10.ncsa.uiuc.edu", "3306"), "test:globular.test") self.assertEqual(DbAuth.username("lsst10.ncsa.uiuc.edu", "3306"), "test") self.assertEqual(DbAuth.password("lsst10.ncsa.uiuc.edu", "3306"), "globular.test") self.assert_(DbAuth.available("lsst10.ncsa.uiuc.edu", "3307")) self.assertEqual(DbAuth.authString("lsst10.ncsa.uiuc.edu", "3307"), "boris:natasha") self.assertEqual(DbAuth.username("lsst10.ncsa.uiuc.edu", "3307"), "boris") self.assertEqual(DbAuth.password("lsst10.ncsa.uiuc.edu", "3307"), "natasha") self.assert_(DbAuth.available("lsst9.ncsa.uiuc.edu", "3306")) self.assertEqual(DbAuth.authString("lsst9.ncsa.uiuc.edu", "3306"), "rocky:squirrel") self.assertEqual(DbAuth.username("lsst9.ncsa.uiuc.edu", "3306"), "rocky") self.assertEqual(DbAuth.password("lsst9.ncsa.uiuc.edu", "3306"), "squirrel")
def run(self, coordList, filter): """Select Decam images suitable for coaddition in a particular region @param[in] filter: filter for images (one of g", "r", "i", "z", Y") @param[in] coordList: list of coordinates defining region of interest @return a pipeBase Struct containing: - exposureInfoList: a list of ExposureInfo objects """ if filter not in set(("g", "r", "i", "z", "Y")): raise RuntimeError("filter=%r is an invalid name" % (filter,)) read_default_file = os.path.expanduser("~/.my.cnf") try: open(read_default_file) kwargs = dict(read_default_file=read_default_file) except IOError: kwargs = dict( user=DbAuth.username(self.config.host, str(self.config.port)), passwd=DbAuth.password(self.config.host, str(self.config.port)), ) db = MySQLdb.connect(host=self.config.host, port=self.config.port, db=self.config.database, **kwargs) cursor = db.cursor() columnNames = tuple(ExposureInfo.getColumnNames()) if not columnNames: raise RuntimeError("Bug: no column names") queryStr = "select %s " % (", ".join(columnNames),) dataTuple = () # tuple(columnNames) if coordList is not None: # look for exposures that overlap the specified region # create table scisql.Region containing patch region coordStrList = ["%s, %s" % (c.getLongitude().asDegrees(), c.getLatitude().asDegrees()) for c in coordList] coordStr = ", ".join(coordStrList) coordCmd = "call scisql.scisql_s2CPolyRegion(scisql_s2CPolyToBin(%s), 10)" % (coordStr,) cursor.execute(coordCmd) cursor.nextset() # ignore one-line result of coordCmd queryStr += """ from %s as ccdExp, (select distinct id from y1CcdQuality_To_Htm10 as ccdHtm inner join scisql.Region on (ccdHtm.htmId10 between scisql.Region.htmMin and scisql.Region.htmMax) where ccdHtm.filter = %%s) as idList where ccdExp.id = idList.id and """ % ( self.config.table, ) dataTuple += (filter,) else: # no region specified; look over the whole sky queryStr += " from %s as ccdExp where " % (self.config.table,) # compute where clauses as a list of (clause, data) whereDataList = [("filter = %s", filter)] queryStr += " and ".join(wd[0] for wd in whereDataList) dataTuple += tuple(wd[1] for wd in whereDataList) self.log.info("queryStr=%r; dataTuple=%s" % (queryStr, dataTuple)) cursor.execute(queryStr, dataTuple) exposureInfoList = [ExposureInfo(result) for result in cursor] return pipeBase.Struct(exposureInfoList=exposureInfoList)
def run(self, coordList, filter): """Select LSST images suitable for coaddition in a particular region @param[in] coordList: list of coordinates defining region of interest; if None then select all images @param[in] filter: filter (e.g. "g", "r", "i"...) @return a pipeBase Struct containing: - exposureInfoList: a list of ExposureInfo objects, which have the following fields: - dataId: data ID of exposure (a dict) - coordList: a list of corner coordinates of the exposure (list of afwCoord.IcrsCoord) - fwhm: fwhm column """ db = MySQLdb.connect( host = self.config.host, port = self.config.port, user = DbAuth.username(self.config.host, str(self.config.port)), passwd = DbAuth.password(self.config.host, str(self.config.port)), db = self.config.database, ) cursor = db.cursor() if coordList is not None: # look for exposures that overlap the specified region # create table scisql.Region containing patch region coordStrList = ["%s, %s" % (c.getLongitude().asDegrees(), c.getLatitude().asDegrees()) for c in coordList] coordStr = ", ".join(coordStrList) coordCmd = "call scisql.scisql_s2CPolyRegion(scisql_s2CPolyToBin(%s), 10)" % (coordStr,) cursor.execute(coordCmd) cursor.nextset() # ignore one-line result of coordCmd # find exposures queryStr = ("""select %s from Science_Ccd_Exposure as ccdExp, (select distinct scienceCcdExposureId from Science_Ccd_Exposure_To_Htm10 as ccdHtm inner join scisql.Region on (ccdHtm.htmId10 between scisql.Region.htmMin and scisql.Region.htmMax)) as idList where ccdExp.scienceCcdExposureId = idList.scienceCcdExposureId and filterName = %%s and fwhm < %%s """ % ExposureInfo.getColumnNames()) else: # no region specified; look over the whole sky queryStr = ("""select %s from Science_Ccd_Exposure where filterName = %%s and fwhm < %%s """ % ExposureInfo.getColumnNames()) if self.config.maxExposures is not None: queryStr += " limit %s" % (self.config.maxExposures,) dataTuple = (filter, self.config.maxFwhm) self.log.info("queryStr=%r; dataTuple=%s" % (queryStr, dataTuple)) cursor.execute(queryStr, dataTuple) exposureInfoList = [ExposureInfo(result) for result in cursor] return pipeBase.Struct( exposureInfoList = exposureInfoList, )
"--verbose", action="store_true", dest="verbose", help="verbose") args = parser.parse_args() host = args.host port = args.port database = args.database # # get database authorization info # dbAuth = DbAuth() user = dbAuth.username(host, str(port)) password = dbAuth.password(host, str(port)) # connect to the database dbm = DatabaseManager(host, port, user, password) # create the database if it doesn't exist if not dbm.dbExists(database): dbm.createDb(database) # create the LogIngestor, which creates all the tables, and will # be used to consolidate file information logIngestor = LogIngestor(dbm, database) # go through the list of files and ingest them, ignoring any
def run(self, coordList, filter): """Select LSST images suitable for coaddition in a particular region @param[in] coordList: list of coordinates defining region of interest; if None then select all images @param[in] filter: filter (e.g. "g", "r", "i"...) @return a pipeBase Struct containing: - exposureInfoList: a list of ExposureInfo objects, which have the following fields: - dataId: data ID of exposure (a dict) - coordList: a list of corner coordinates of the exposure (list of afwCoord.IcrsCoord) - fwhm: fwhm column """ db = MySQLdb.connect( host=self.config.host, port=self.config.port, user=DbAuth.username(self.config.host, str(self.config.port)), passwd=DbAuth.password(self.config.host, str(self.config.port)), db=self.config.database, ) cursor = db.cursor() if coordList is not None: # look for exposures that overlap the specified region # create table scisql.Region containing patch region coordStrList = [ "%s, %s" % (c.getLongitude().asDegrees(), c.getLatitude().asDegrees()) for c in coordList ] coordStr = ", ".join(coordStrList) coordCmd = "call scisql.scisql_s2CPolyRegion(scisql_s2CPolyToBin(%s), 10)" % ( coordStr, ) cursor.execute(coordCmd) cursor.nextset() # ignore one-line result of coordCmd # find exposures queryStr = ("""select %s from Science_Ccd_Exposure as ccdExp, (select distinct scienceCcdExposureId from Science_Ccd_Exposure_To_Htm10 as ccdHtm inner join scisql.Region on (ccdHtm.htmId10 between scisql.Region.htmMin and scisql.Region.htmMax)) as idList where ccdExp.scienceCcdExposureId = idList.scienceCcdExposureId and filterName = %%s and fwhm < %%s """ % ExposureInfo.getColumnNames()) else: # no region specified; look over the whole sky queryStr = ("""select %s from Science_Ccd_Exposure where filterName = %%s and fwhm < %%s """ % ExposureInfo.getColumnNames()) if self.config.maxExposures is not None: queryStr += " limit %s" % (self.config.maxExposures, ) dataTuple = (filter, self.config.maxFwhm) self.log.info("queryStr=%r; dataTuple=%s", queryStr, dataTuple) cursor.execute(queryStr, dataTuple) exposureInfoList = [ExposureInfo(result) for result in cursor] return pipeBase.Struct(exposureInfoList=exposureInfoList, )
def setUp(self): self.pol = Policy("tests/testDbAuth.paf") DbAuth.setPolicy(self.pol)
def run(self, coordList, filter, strip=None): """Select SDSS images suitable for coaddition in a particular region @param[in] filter: filter for images (one of "u", "g", "r", "i" or "z") @param[in] coordList: list of coordinates defining region of interest @return a pipeBase Struct containing: - exposureInfoList: a list of ExposureInfo objects @raise RuntimeError if filter not one of "u", "g", "r", "i" or "z" """ if filter not in set(("u", "g", "r", "i", "z")): raise RuntimeError("filter=%r is an invalid name" % (filter,)) read_default_file=os.path.expanduser("~/.my.cnf") try: open(read_default_file) kwargs = dict( read_default_file=read_default_file, ) except IOError: kwargs = dict( user = DbAuth.username(self.config.host, str(self.config.port)), passwd = DbAuth.password(self.config.host, str(self.config.port)), ) db = MySQLdb.connect( host=self.config.host, port=self.config.port, db=self.config.database, **kwargs ) cursor = db.cursor() columnNames = tuple(ExposureInfo.getColumnNames()) if not columnNames: raise RuntimeError("Bug: no column names") queryStr = "select %s " % (", ".join(columnNames),) dataTuple = () # tuple(columnNames) if coordList is not None: # look for exposures that overlap the specified region # create table scisql.Region containing patch region coordStrList = ["%s, %s" % (c.getLongitude().asDegrees(), c.getLatitude().asDegrees()) for c in coordList] coordStr = ", ".join(coordStrList) coordCmd = "call scisql.scisql_s2CPolyRegion(scisql_s2CPolyToBin(%s), 10)" % (coordStr,) cursor.execute(coordCmd) cursor.nextset() # ignore one-line result of coordCmd queryStr += """ from %s as ccdExp, (select distinct fieldid from SeasonFieldQuality_To_Htm10 as ccdHtm inner join scisql.Region on (ccdHtm.htmId10 between scisql.Region.htmMin and scisql.Region.htmMax) where ccdHtm.filter = %%s) as idList where ccdExp.fieldid = idList.fieldid and """ % (self.config.table,) dataTuple += (filter,) else: # no region specified; look over the whole sky queryStr += """ from %s as ccdExp where """ % (self.config.table,) # compute where clauses as a list of (clause, data) whereDataList = [ ("filter = %s", filter), ] if self.config.camcols is not None: whereDataList.append(_whereDataFromList("camcol", self.config.camcols)) if strip is not None: # None corresponds to query for both strips: no constraint added whereDataList.append(("strip = %s", strip)) queryStr += " and ".join(wd[0] for wd in whereDataList) dataTuple += tuple(wd[1] for wd in whereDataList) self.log.info("queryStr=%r; dataTuple=%s" % (queryStr, dataTuple)) cursor.execute(queryStr, dataTuple) exposureInfoList = [ExposureInfo(result) for result in cursor] runExpInfoSetDict = dict() for expInfo in exposureInfoList: run = expInfo.dataId["run"] expInfoSet = runExpInfoSetDict.get(run) if expInfoSet: expInfoSet.add(expInfo) else: runExpInfoSetDict[run] = set([expInfo]) self.log.info("Before quality cuts found %d exposures in %d runs" % \ (len(exposureInfoList), len(runExpInfoSetDict))) goodRunSet = set() goodExposureInfoList = [] if self.config.rejectWholeRuns: # reject runs for which any exposure does not meet our quality criteria # or the run begins or ends in the region regionRaRange = None regionCtrRa = None if coordList is not None: regionRaRange = _computeRaRange(coordList) regionCtrRa = (regionRaRange[0] + regionRaRange[1]) * 0.5 numRangeCuts = 0 for run, expInfoSet in runExpInfoSetDict.iteritems(): runRaRange = None for expInfo in expInfoSet: if self._isBadExposure(expInfo): break if regionRaRange is not None: expRaRange = _computeRaRange(expInfo.coordList, ctrRa=regionCtrRa) if runRaRange is None: runRaRange = expRaRange else: runRaRange = (min(runRaRange[0], expRaRange[0]), max(runRaRange[1], expRaRange[1])) else: # all exposures in this run are valid; # if approriate, check that the run starts and ends outside the region if regionRaRange is not None: if (runRaRange[0] > regionRaRange[0]) or (runRaRange[1] < regionRaRange[1]): numRangeCuts += 1 continue goodExposureInfoList += list(expInfoSet) goodRunSet.add(run) self.log.info("Rejected %d whole runs, including %d for incomplete range" % \ (len(runExpInfoSetDict) - len(goodRunSet), numRangeCuts)) else: # reject individual exposures which do not meet our quality criteria for expInfo in exposureInfoList: if not self._isBadExposure(expInfo): goodExposureInfoList.append(expInfo) goodRunSet.add(expInfo.dataId["run"]) self.log.info("Rejected %d individual exposures" % \ (len(exposureInfoList) - len(goodExposureInfoList),)) exposureInfoList = goodExposureInfoList self.log.info("After quality cuts, found %d exposures in %d runs" % \ (len(exposureInfoList), len(goodRunSet))) if exposureInfoList: # compute qscore according to RHL's formula and sort by it qArr = numpy.array([expInfo.q for expInfo in exposureInfoList]) qMax = numpy.percentile(qArr, 95.0) for expInfo in exposureInfoList: expInfo.qscore = (expInfo.q / qMax) - expInfo.quality exposureInfoList.sort(key=lambda expInfo: expInfo.qscore) if self.config.maxExposures is not None: # select config.maxExposures exposures with the highest qscore exposureInfoList = exposureInfoList[0:self.config.maxExposures] self.log.info("After maxExposures cut, found %d exposures" % \ (len(exposureInfoList),)) elif self.config.maxRuns is not None: # select config.maxRuns runs with the highest median qscore # (of those exposures that overlap the patch) runQualListDict = dict() for expInfo in exposureInfoList: run = expInfo.dataId["run"] qualList = runQualListDict.get(run) if qualList: qualList.append(expInfo.qscore) else: runQualListDict[run] = [expInfo.qscore] if len(runQualListDict) > self.config.maxRuns: qualRunList = [] for run, qualList in runQualListDict.iteritems(): runQscore = numpy.median(qualList) qualRunList.append((runQscore, run)) qualRunList.sort() qualRunList = qualRunList[0:self.config.maxRuns] goodRunSet = set(qr[1] for qr in qualRunList) exposureInfoList = [ei for ei in exposureInfoList if ei.dataId["run"] in goodRunSet] return pipeBase.Struct( exposureInfoList = exposureInfoList, )
def tearDown(self): DbAuth.resetPolicy()
def tearDown(self): DbAuth.setPolicy(Policy()) del self.pol
def run(self, dataId, coordList): """Select flugMag0's of SDSS images for a particular run @param[in] dataId: a dataId containing at least a run and filter @param[in] coordList: list of coordinates defining region of interest @return a pipeBase Struct containing: - fluxMagInfoList: a list of FluxMagInfo objects """ argDict = self.runArgDictFromDataId(dataId) run = argDict["run"] filter = argDict["filter"] if filter not in set(("u", "g", "r", "i", "z")): raise RuntimeError("filter=%r is an invalid name" % (filter,)) filterDict = {"u": 0, "g": 1, "r": 2, "i": 3, "z": 4} if self._display: self.log.info(self.config.database) db = MySQLdb.connect( host = self.config.host, port = self.config.port, db = self.config.database, user = DbAuth.username(self.config.host, str(self.config.port)), passwd = DbAuth.password(self.config.host, str(self.config.port)), ) cursor = db.cursor() columnNames = tuple(FluxMagInfo.getColumnNames()) queryStr = "select %s from Science_Ccd_Exposure where " % (", ".join(columnNames)) dataTuple = () if coordList is not None: # look for exposures that overlap the specified region for c in coordList: dataTuple += (c.getLongitude().asDegrees(), c.getLatitude().asDegrees()) queryStr += " scisql_s2PtInCPoly(ra, decl" queryStr += ", %s, %s" * len(coordList) queryStr += ") = 1 and " # compute where clauses as a list of (clause, data) whereDataList = [ ("filterId = %s", filterDict[filter]), ("run = %s", run), ] queryStr += " and ".join(wd[0] for wd in whereDataList) dataTuple += tuple(wd[1] for wd in whereDataList) queryStr += " order by field desc" if self._display: self.log.info("queryStr=%r; dataTuple=%s" % (queryStr, dataTuple)) cursor.execute(queryStr, dataTuple) exposureInfoList = [FluxMagInfo(result) for result in cursor] if self._display: self.log.info("Found %d exposures" % \ (len(exposureInfoList))) return pipeBase.Struct( fluxMagInfoList = exposureInfoList, )
def __init__(self, args): self.datetime = time.strftime("%Y_%m%d_%H%M%S") self.user = pwd.getpwuid(os.getuid())[0] if self.user == 'buildbot': RunConfiguration.pipeQaBase = re.sub(r'dev', 'buildbot', RunConfiguration.pipeQaBase) RunConfiguration.pipeQaDir = re.sub(r'dev', 'buildbot', RunConfiguration.pipeQaDir) self.dbUser = DbAuth.username(RunConfiguration.dbHost, str(RunConfiguration.dbPort)) self.hostname = socket.getfqdn() self.fromAddress = "%s@%s" % (self.user, self.hostname) self.options, self.args = self.parseOptions(args) # Handle immediate commands if self.options.printStatus: self.printStatus() sys.exit(0) if self.options.report is not None: self.report( os.path.join(self.options.output, self.options.report, "run", "run.log")) sys.exit(0) if self.options.listRuns: self.listRuns(self.options.listRuns) sys.exit(0) if self.options.listInputs: self.listInputs() sys.exit(0) if self.options.linkLatest is not None: self.linkLatest(self.options.linkLatest) sys.exit(0) if self.options.kill is not None: self.kill(self.options.kill) sys.exit(0) if self.options.hosts is not None: self.hosts() sys.exit(0) if self.arch is None: if self.options.arch is None: raise RuntimeError("Architecture is required") self.arch = self.options.arch if re.search(r'[^a-zA-Z0-9_]', self.options.runType): raise RuntimeError("Run type '%s' must be one word" % (self.options.runType, )) self.collectionName = re.sub(r'\.', '_', RunConfiguration.collection) runIdProperties = dict(user=self.user, dbUser=self.dbUser, coll=self.collectionName, runType=self.options.runType, datetime=self.datetime) # When resuming a run, use provided runID if self.options.resumeRunId is None: self.runId = RunConfiguration.runIdPattern % runIdProperties else: self.runId = self.options.resumeRunId runIdProperties['runid'] = self.runId dbNamePattern = "%(dbUser)s_%(coll)s_u_%(runid)s" self.dbName = dbNamePattern % runIdProperties self.inputBase = os.path.join(RunConfiguration.inputBase, self.options.input) self.inputDirectory = os.path.join(self.inputBase, RunConfiguration.collection) self.outputDirectory = os.path.join(self.options.output, self.runId) self.outputDirectory = os.path.abspath(self.outputDirectory) if self.options.resumeRunId is None: if os.path.exists(self.outputDirectory): raise RuntimeError("Output directory %s already exists" % (self.outputDirectory, )) os.mkdir(self.outputDirectory) elif not os.path.exists(self.outputDirectory): raise RuntimeError( "Output directory %s does not exist for resumed run" % (self.outputDirectory, )) self.pipeQaUrl = RunConfiguration.pipeQaBase + self.dbName + "/" self.eupsPath = os.environ['EUPS_PATH'] e = eups.Eups(readCache=False) self.setups = dict() for product in e.getSetupProducts(): if product.name != "eups": self.setups[product.name] = \ re.sub(r'^LOCAL:', "-r ", product.version) # TODO -- load policy and apply overrides self.options.override = None
def setUp(self): # Turn on tracing log.Trace.setVerbosity('', 10) log.ScreenLog.createDefaultLog(True, log.Log.INFO) # Eventually, these should be read from a policy somewhere self.dbServer = 'lsst10.ncsa.uiuc.edu' self.dbPort = '3306' self.dbType = 'mysql' if not DbAuth.available(self.dbServer, self.dbPort): self.fail("Cannot access database server %s:%s" % (self.dbServer, self.dbPort)) # Construct test run database name self.runId = DbAuth.username(self.dbServer, self.dbPort) +\ time.strftime("_test_ap_%y%m%d_%H%M%S", time.gmtime()) # Tweak these to run on different input data, or with a different number of slices self.universeSize = 2 self.visitId = 708125 self.filter = 'u' self.ra = 333.880166667 self.dec = -17.7374166667 self.dbUrlPrefix = ''.join( [self.dbType, '://', self.dbServer, ':', self.dbPort, '/']) self.dbUrl = self.dbUrlPrefix + self.runId self.substitutions = { 'visitId': self.visitId, 'filter': self.filter, 'runId': self.runId } # Create a database specifically for the test (copy relevant # tables from the test_ap database) mysqlStatements = [ """CREATE DATABASE %(runId)s""", """USE %(runId)s""", """CREATE TABLE VarObject LIKE test_ap.Object""", """CREATE TABLE NonVarObject LIKE test_ap.Object""", """CREATE TABLE DIASource LIKE test_ap.DIASource""", """CREATE TABLE prv_Filter LIKE test_ap.prv_Filter""", """INSERT INTO prv_Filter SELECT * FROM test_ap.prv_Filter""", """CREATE TABLE _tmp_v%(visitId)d_DIASource LIKE test_ap._tmp_v%(visitId)d_DIASource""", """INSERT INTO _tmp_v%(visitId)d_DIASource SELECT * FROM test_ap._tmp_v%(visitId)d_DIASource""", """CREATE TABLE _tmp_v%(visitId)d_Preds LIKE test_ap._tmp_v%(visitId)d_Preds""", """INSERT INTO _tmp_v%(visitId)d_Preds SELECT * FROM test_ap._tmp_v%(visitId)d_Preds""", """CREATE TABLE _tmpl_MatchPair LIKE test_ap._tmpl_MatchPair""", """CREATE TABLE _tmpl_IdPair LIKE test_ap._tmpl_IdPair""", """CREATE TABLE _tmpl_InMemoryObject LIKE test_ap._tmpl_InMemoryObject""", """CREATE TABLE _tmpl_InMemoryMatchPair LIKE test_ap._tmpl_InMemoryMatchPair""", """CREATE TABLE _tmpl_InMemoryId LIKE test_ap._tmpl_InMemoryId""", """CREATE TABLE _ap_DIASourceToObjectMatches LIKE test_ap._ap_DIASourceToObjectMatches""", """CREATE TABLE _ap_PredToDIASourceMatches LIKE test_ap._ap_PredToDIASourceMatches""", """CREATE TABLE _ap_DIASourceToNewObject LIKE test_ap._ap_DIASourceToNewObject""", """CREATE TABLE _mops_Prediction LIKE test_ap._mops_Prediction""" ] db = DbStorage() db.setPersistLocation(LogicalLocation(self.dbUrlPrefix + 'test_ap')) try: for stmt in mysqlStatements: db.executeSql(stmt % self.substitutions) # Specify list of stages ... self.stages = [ ap.LoadStage, InputStage, ap.MatchDiaSourcesStage, OutputStage, InputStage, ap.MatchMopsPredsStage, OutputStage, ap.StoreStage ] # and read in stage policy for each stage policyDir = os.path.join(os.environ['AP_DIR'], 'pipeline', 'examples', 'policy') self.policies = [ Policy(os.path.join(policyDir, 'LoadStage.paf')), Policy(os.path.join(policyDir, 'MatchDiaSourcesStageInput.paf')), None, Policy( os.path.join(policyDir, 'MatchDiaSourcesStageOutput.paf')), Policy(os.path.join(policyDir, 'MatchMopsPredsStageInput.paf')), None, Policy(os.path.join(policyDir, 'MatchMopsPredsStageOutput.paf')), Policy(os.path.join(policyDir, 'StoreStage.paf')) ] # construct PropertySet for string interpolation psSubs = PropertySet() psSubs.setInt('visitId', self.visitId) psSubs.setString('runId', self.runId) psSubs.setString('filter', self.filter) psSubs.setString('work', '.') psSubs.setString('input', '/tmp') psSubs.setString('output', '/tmp') psSubs.setString('update', '/tmp') psSubs.setString('dbUrl', self.dbUrl) LogicalLocation.setLocationMap(psSubs) except: # cleanup database in case of error db.executeSql("DROP DATABASE %(runId)s" % self.substitutions) raise
def analyzeLogs(self, runId, inProgress=False): import MySQLdb jobStartRegex = re.compile(r"Processing job:" r"(\s+raft=(?P<raft>\d,\d)" r"|\s+sensor=(?P<sensor>\d,\d)" r"|\s+type=calexp" r"|\s+visit=(?P<visit>\d+)){4}") host = RunConfiguration.dbHost port = RunConfiguration.dbPort with MySQLdb.connect(host=host, port=port, user=self.dbUser, passwd=DbAuth.password(host, str(port))) as conn: runpat = '%' + runId + '%' conn.execute("SHOW DATABASES LIKE %s", (runpat, )) ret = conn.fetchall() if ret is None or len(ret) == 0: raise NoMatchError("No match for run %s" % (runId, )) elif len(ret) > 1: raise RuntimeError("Multiple runs match:\n" + str([r[0] for r in ret])) dbName = ret[0][0] result = "" try: conn = MySQLdb.connect(host=host, port=port, user=self.dbUser, passwd=DbAuth.password(host, str(port)), db=dbName) cursor = conn.cursor() cursor.execute("""SELECT TIMESTAMP, timereceived FROM Logs WHERE id = (SELECT MIN(id) FROM Logs)""") row = cursor.fetchone() if row is None: if inProgress: return "No log entries yet\n" else: return "*** No log entries written\n" startTime, start = row result += "First orca log entry: %s\n" % (start, ) cursor = conn.cursor() cursor.execute("""SELECT TIMESTAMP, timereceived FROM Logs WHERE id = (SELECT MAX(id) FROM Logs)""") stopTime, stop = cursor.fetchone() result += "Last orca log entry: %s\n" % (stop, ) elapsed = long(stopTime) - long(startTime) elapsedHr = elapsed / 3600 / 1000 / 1000 / 1000 elapsed -= elapsedHr * 3600 * 1000 * 1000 * 1000 elapsedMin = elapsed / 60 / 1000 / 1000 / 1000 elapsed -= elapsedMin * 60 * 1000 * 1000 * 1000 elapsedSec = elapsed / 1.0e9 result += "Orca elapsed time: %d:%02d:%06.3f\n" % ( elapsedHr, elapsedMin, elapsedSec) cursor = conn.cursor() cursor.execute(""" SELECT COUNT(DISTINCT workerid) FROM (SELECT workerid FROM Logs LIMIT 10000) AS sample""") nPipelines = cursor.fetchone()[0] result += "%d pipelines used\n" % (nPipelines, ) cursor = conn.cursor() cursor.execute(""" SELECT CASE gid WHEN 1 THEN 'pipeline shutdowns seen' WHEN 2 THEN 'CCDs attempted' WHEN 3 THEN 'src writes' WHEN 4 THEN 'calexp writes' END AS descr, COUNT(*) FROM ( SELECT CASE WHEN COMMENT LIKE 'Processing job:% visit=0' THEN 1 WHEN COMMENT LIKE 'Processing job:%' AND COMMENT NOT LIKE '% visit=0' THEN 2 WHEN COMMENT LIKE 'Ending write to BoostStorage%/src%' THEN 3 WHEN COMMENT LIKE 'Ending write to FitsStorage%/calexp%' THEN 4 ELSE 0 END AS gid FROM Logs ) AS stats WHERE gid > 0 GROUP BY gid""") nShutdown = 0 for d, n in cursor.fetchall(): result += "%d %s\n" % (n, d) if d == 'pipeline shutdowns seen': nShutdown = n if nShutdown != nPipelines: if not inProgress: if nShutdown == 0: result += "\n*** No pipelines were shut down properly\n" else: result += "\n*** Shutdowns do not match pipelines\n" cursor = conn.cursor() cursor.execute(""" SELECT workerid, COMMENT FROM Logs JOIN (SELECT MAX(id) AS last FROM Logs GROUP BY workerid) AS a ON (Logs.id = a.last)""") for worker, msg in cursor.fetchall(): if inProgress: result += "Pipeline %s last status: %s\n" % (worker, msg) else: result += "Pipeline %s ended with: %s\n" % (worker, msg) cursor = conn.cursor() cursor.execute(""" SELECT COUNT(*) FROM Logs WHERE ( COMMENT LIKE '%rror%' OR COMMENT LIKE '%xception%' OR COMMENT LIKE '%arning%' OR COMMENT LIKE 'Fail' OR COMMENT LIKE 'fail' ) AND COMMENT NOT LIKE '%failureStage%' AND COMMENT NOT LIKE '%failure stage%' AND COMMENT NOT LIKE 'failSerialName%' AND COMMENT NOT LIKE 'failParallelName%' AND COMMENT NOT LIKE 'Distortion fitter failed to improve%' AND COMMENT NOT LIKE '%magnitude error column%' AND COMMENT NOT LIKE '%errorFlagged%' AND COMMENT NOT LIKE 'Skipping process due to error' """) result += "%s failures seen\n" % cursor.fetchone() cursor = conn.cursor(MySQLdb.cursors.DictCursor) cursor.execute(""" SELECT * FROM Logs WHERE COMMENT LIKE 'Processing job:%' OR ( ( COMMENT LIKE '%rror%' OR COMMENT LIKE '%xception%' OR COMMENT LIKE '%arning%' OR COMMENT LIKE '%Fail%' OR COMMENT LIKE '%fail%' ) AND COMMENT NOT LIKE '%failureStage%' AND COMMENT NOT LIKE '%failure stage%' AND COMMENT NOT LIKE 'failSerialName%' AND COMMENT NOT LIKE 'failParallelName%' AND COMMENT NOT LIKE 'Distortion fitter failed to improve%' AND COMMENT NOT LIKE '%magnitude error column%' AND COMMENT NOT LIKE '%errorFlagged%' AND COMMENT NOT LIKE 'Skipping process due to error' ) ORDER BY id;""") jobs = dict() for d in cursor.fetchall(): match = jobStartRegex.search(d['COMMENT']) if match: jobs[d['workerid']] = "Visit %s Raft %s Sensor %s" % ( match.group("visit"), match.group("raft"), match.group("sensor")) elif not d['COMMENT'].startswith('Processing job:'): if jobs.has_key(d['workerid']): job = jobs[d['workerid']] else: job = "unknown" result += "\n*** Error in %s in stage %s on %s:\n" % ( job, d['stagename'], d['workerid']) lines = d['COMMENT'].split('\n') i = len(lines) - 1 message = lines[i].strip() # Skip blank lines at end while i > 0 and message == "": i -= 1 message = lines[i].strip() # Go back until we find a traceback line with " in " while i > 0 and lines[i].find(" in ") == -1: i -= 1 message = lines[i].strip() + "\n" + message result += message + "\n" finally: conn.close() outputDir = os.path.join(self.options.output, runId) logFile = os.path.join(outputDir, "run", "unifiedPipeline.log") with open(logFile, "r") as log: try: log.seek(-500, 2) except: pass tail = log.read(500) if not tail.endswith("logger handled...and...done!\n"): result += "\n*** Unified pipeline log file\n" result += "(last 500 bytes)... " + tail + "\n" for logFile in glob.glob( os.path.join(outputDir, "work", "*", "launch.log")): with open(logFile, "r") as log: try: log.seek(-500, 2) except: pass tail = log.read(500) if not re.search(r"harness.runPipeline: workerid \w+$", tail) \ and not re.search(r"Applying aperture", tail) \ and tail != "done. Now starting job office\n": result += "\n*** " + logFile + "\n" result += "(last 500 bytes)... " + tail + "\n" return result
def run(self, dataId, coordList): """Select flugMag0's of SDSS images for a particular run @param[in] dataId: a dataId containing at least a run and filter @param[in] coordList: list of coordinates defining region of interest @return a pipeBase Struct containing: - fluxMagInfoList: a list of FluxMagInfo objects """ argDict = self.runArgDictFromDataId(dataId) run = argDict["run"] filter = argDict["filter"] if filter not in set(("u", "g", "r", "i", "z")): raise RuntimeError("filter=%r is an invalid name" % (filter,)) filterDict = {"u": 0, "g": 1, "r": 2, "i": 3, "z": 4} if self._display: self.log.info(self.config.database) db = MySQLdb.connect( host=self.config.host, port=self.config.port, db=self.config.database, user=DbAuth.username(self.config.host, str(self.config.port)), passwd=DbAuth.password(self.config.host, str(self.config.port)), ) cursor = db.cursor() columnNames = tuple(FluxMagInfo.getColumnNames()) queryStr = "select %s from Science_Ccd_Exposure where " % (", ".join(columnNames)) dataTuple = () if coordList is not None: # look for exposures that overlap the specified region for c in coordList: dataTuple += (c.getLongitude().asDegrees(), c.getLatitude().asDegrees()) queryStr += " scisql_s2PtInCPoly(ra, decl" queryStr += ", %s, %s" * len(coordList) queryStr += ") = 1 and " # compute where clauses as a list of (clause, data) whereDataList = [ ("filterId = %s", filterDict[filter]), ("run = %s", run), ] queryStr += " and ".join(wd[0] for wd in whereDataList) dataTuple += tuple(wd[1] for wd in whereDataList) queryStr += " order by field desc" if self._display: self.log.info("queryStr=%r; dataTuple=%s" % (queryStr, dataTuple)) cursor.execute(queryStr, dataTuple) exposureInfoList = [FluxMagInfo(result) for result in cursor] if self._display: self.log.info("Found %d exposures" % (len(exposureInfoList))) return pipeBase.Struct( fluxMagInfoList=exposureInfoList, )
def report(): basename = os.path.basename(sys.argv[0]) parser = argparse.ArgumentParser( prog=basename, description='''A statistics reporting utility. Use to print out information about what happened during a run. Takes as an argument previously ingested run information one of the ingest utilities in a named database.''', epilog='''example: report.py -H kaboom.ncsa.illinois.edu -p 3303 -d srp_2013_0601_140432 -S''') parser.add_argument("-H", "--host", action="store", default=None, dest="host", help="mysql server host", type=str, required=True) parser.add_argument("-p", "--port", action="store", default=3306, dest="port", help="mysql server port", type=int) parser.add_argument("-d", "--database", action="store", default=None, dest="database", help="database name", type=str, required=True) parser.add_argument("-I", "--submits-per-interval", action="store_true", default=None, dest="submits", help="number of submits to queue per interval") parser.add_argument("-S", "--slots-used-each-second", action="store_true", default=None, dest="slots", help="slots used each second") parser.add_argument("-N", "--slots-used-each-interval", type=int, default=-1, dest="interval", help="slots used each interval") parser.add_argument("-L", "--local-time-zone", action="store_true", default=False, dest="localTimeZone", help="output dates converted to local time zone") parser.add_argument("-v", "--verbose", action="store_true", dest="verbose", help="verbose") args = parser.parse_args() host = args.host port = args.port database = args.database # # get database authorization info # dbAuth = DbAuth() user = dbAuth.username(host, str(port)) password = dbAuth.password(host, str(port)) # connect to the database dbm = DatabaseManager(host, port, user, password) dbm.execCommand0('use ' + database) # command line arguments values = None submitTimes = SubmissionTimes(dbm) entries = submitTimes.getEntries() r = Report(dbm, args.localTimeZone) if args.submits: submitsPerInterval = SubmitsPerInterval(dbm, 1) values = submitsPerInterval.getValues() r.writePerTimeIntervals(values) elif args.slots: slotsPerSecond = SlotsPerSecond(dbm, entries) values = slotsPerSecond.getValues() r.writePerTimeIntervals(values) elif args.interval > -1: slotsPerInterval = SlotsPerInterval(dbm, entries, args.interval) values = slotsPerInterval.getValues() r.writePerTimeIntervals(values) else: printSummary(r) dbm.close()
def run(self, coordList, filter, strip=None): """Select SDSS images suitable for coaddition in a particular region @param[in] filter: filter for images (one of "u", "g", "r", "i" or "z") @param[in] coordList: list of coordinates defining region of interest @return a pipeBase Struct containing: - exposureInfoList: a list of ExposureInfo objects @raise RuntimeError if filter not one of "u", "g", "r", "i" or "z" """ if filter not in set(("u", "g", "r", "i", "z")): raise RuntimeError("filter=%r is an invalid name" % (filter, )) read_default_file = os.path.expanduser("~/.my.cnf") try: open(read_default_file) kwargs = dict(read_default_file=read_default_file, ) except IOError: kwargs = dict( user=DbAuth.username(self.config.host, str(self.config.port)), passwd=DbAuth.password(self.config.host, str(self.config.port)), ) db = MySQLdb.connect(host=self.config.host, port=self.config.port, db=self.config.database, **kwargs) cursor = db.cursor() columnNames = tuple(ExposureInfo.getColumnNames()) if not columnNames: raise RuntimeError("Bug: no column names") queryStr = "select %s " % (", ".join(columnNames), ) dataTuple = () # tuple(columnNames) if coordList is not None: # look for exposures that overlap the specified region # create table scisql.Region containing patch region coordStrList = [ "%s, %s" % (c.getLongitude().asDegrees(), c.getLatitude().asDegrees()) for c in coordList ] coordStr = ", ".join(coordStrList) coordCmd = "call scisql.scisql_s2CPolyRegion(scisql_s2CPolyToBin(%s), 10)" % ( coordStr, ) cursor.execute(coordCmd) cursor.nextset() # ignore one-line result of coordCmd queryStr += """ from %s as ccdExp, (select distinct fieldid from SeasonFieldQuality_To_Htm10 as ccdHtm inner join scisql.Region on (ccdHtm.htmId10 between scisql.Region.htmMin and scisql.Region.htmMax) where ccdHtm.filter = %%s) as idList where ccdExp.fieldid = idList.fieldid and """ % (self.config.table, ) dataTuple += (filter, ) else: # no region specified; look over the whole sky queryStr += """ from %s as ccdExp where """ % (self.config.table, ) # compute where clauses as a list of (clause, data) whereDataList = [ ("filter = %s", filter), ] if self.config.camcols is not None: whereDataList.append( _whereDataFromList("camcol", self.config.camcols)) if strip is not None: # None corresponds to query for both strips: no constraint added whereDataList.append(("strip = %s", strip)) queryStr += " and ".join(wd[0] for wd in whereDataList) dataTuple += tuple(wd[1] for wd in whereDataList) self.log.info("queryStr=%r; dataTuple=%s" % (queryStr, dataTuple)) cursor.execute(queryStr, dataTuple) exposureInfoList = [ExposureInfo(result) for result in cursor] runExpInfoSetDict = dict() for expInfo in exposureInfoList: run = expInfo.dataId["run"] expInfoSet = runExpInfoSetDict.get(run) if expInfoSet: expInfoSet.add(expInfo) else: runExpInfoSetDict[run] = set([expInfo]) self.log.info("Before quality cuts found %d exposures in %d runs" % (len(exposureInfoList), len(runExpInfoSetDict))) goodRunSet = set() goodExposureInfoList = [] if self.config.rejectWholeRuns: # reject runs for which any exposure does not meet our quality criteria # or the run begins or ends in the region regionRaRange = None regionCtrRa = None if coordList is not None: regionRaRange = _computeRaRange(coordList) regionCtrRa = (regionRaRange[0] + regionRaRange[1]) * 0.5 numRangeCuts = 0 for run, expInfoSet in runExpInfoSetDict.items(): runRaRange = None for expInfo in expInfoSet: if self._isBadExposure(expInfo): break if regionRaRange is not None: expRaRange = _computeRaRange(expInfo.coordList, ctrRa=regionCtrRa) if runRaRange is None: runRaRange = expRaRange else: runRaRange = (min(runRaRange[0], expRaRange[0]), max(runRaRange[1], expRaRange[1])) else: # all exposures in this run are valid; # if approriate, check that the run starts and ends outside the region if regionRaRange is not None: if (runRaRange[0] > regionRaRange[0]) or ( runRaRange[1] < regionRaRange[1]): numRangeCuts += 1 continue goodExposureInfoList += list(expInfoSet) goodRunSet.add(run) self.log.info( "Rejected %d whole runs, including %d for incomplete range" % (len(runExpInfoSetDict) - len(goodRunSet), numRangeCuts)) else: # reject individual exposures which do not meet our quality criteria for expInfo in exposureInfoList: if not self._isBadExposure(expInfo): goodExposureInfoList.append(expInfo) goodRunSet.add(expInfo.dataId["run"]) self.log.info( "Rejected %d individual exposures" % (len(exposureInfoList) - len(goodExposureInfoList), )) exposureInfoList = goodExposureInfoList self.log.info("After quality cuts, found %d exposures in %d runs" % (len(exposureInfoList), len(goodRunSet))) if exposureInfoList: # compute qscore according to RHL's formula and sort by it qArr = np.array([expInfo.q for expInfo in exposureInfoList]) qMax = np.percentile(qArr, 95.0) for expInfo in exposureInfoList: expInfo.qscore = (expInfo.q / qMax) - expInfo.quality exposureInfoList.sort(key=lambda expInfo: expInfo.qscore) if self.config.maxExposures is not None: # select config.maxExposures exposures with the highest qscore exposureInfoList = exposureInfoList[0:self.config.maxExposures] self.log.info("After maxExposures cut, found %d exposures" % (len(exposureInfoList), )) elif self.config.maxRuns is not None: # select config.maxRuns runs with the highest median qscore # (of those exposures that overlap the patch) runQualListDict = dict() for expInfo in exposureInfoList: run = expInfo.dataId["run"] qualList = runQualListDict.get(run) if qualList: qualList.append(expInfo.qscore) else: runQualListDict[run] = [expInfo.qscore] if len(runQualListDict) > self.config.maxRuns: qualRunList = [] for run, qualList in runQualListDict.items(): runQscore = np.median(qualList) qualRunList.append((runQscore, run)) qualRunList.sort() qualRunList = qualRunList[0:self.config.maxRuns] goodRunSet = set(qr[1] for qr in qualRunList) exposureInfoList = [ ei for ei in exposureInfoList if ei.dataId["run"] in goodRunSet ] return pipeBase.Struct(exposureInfoList=exposureInfoList, )
database = os.path.join(outDir, database) self.database = database self.driver = driver else: # If not sqlite, then 'outDir' doesn't make much sense. self.database = database self.driver = driver self.host = host self.port = port if self.driver == 'sqlite': dbAddress = url.URL(self.driver, database=self.database) else: dbAddress = url.URL( self.driver, username=DbAuth.username(self.host, str(self.port)), password=DbAuth.password(self.host, str(self.port)), host=self.host, port=self.port, database=self.database) engine = create_engine(dbAddress, echo=verbose) self.Session = sessionmaker(bind=engine) self.session = self.Session() # Create the tables, if they don't already exist. try: Base.metadata.create_all(engine) except DatabaseError: raise ValueError( "Cannot create a %s database at %s. Check directory exists." % (self.driver, self.database))
def setUp(self): # Turn on tracing log.Trace.setVerbosity('', 10) log.ScreenLog.createDefaultLog(True, log.Log.INFO) # Eventually, these should be read from a policy somewhere self.dbServer = 'lsst10.ncsa.uiuc.edu' self.dbPort = '3306' self.dbType = 'mysql' if not DbAuth.available(self.dbServer, self.dbPort): self.fail("Cannot access database server %s:%s" % (self.dbServer, self.dbPort)) # Construct test run database name self.runId = DbAuth.username(self.dbServer, self.dbPort) +\ time.strftime("_test_ap_%y%m%d_%H%M%S", time.gmtime()) # Tweak these to run on different input data, or with a different number of slices self.universeSize = 2 self.visitId = 708125 self.filter = 'u' self.ra = 333.880166667 self.dec = -17.7374166667 self.dbUrlPrefix = ''.join([self.dbType, '://', self.dbServer, ':', self.dbPort, '/']) self.dbUrl = self.dbUrlPrefix + self.runId self.substitutions = { 'visitId': self.visitId, 'filter': self.filter, 'runId': self.runId } # Create a database specifically for the test (copy relevant # tables from the test_ap database) mysqlStatements = [ """CREATE DATABASE %(runId)s""", """USE %(runId)s""", """CREATE TABLE VarObject LIKE test_ap.Object""", """CREATE TABLE NonVarObject LIKE test_ap.Object""", """CREATE TABLE DIASource LIKE test_ap.DIASource""", """CREATE TABLE prv_Filter LIKE test_ap.prv_Filter""", """INSERT INTO prv_Filter SELECT * FROM test_ap.prv_Filter""", """CREATE TABLE _tmp_v%(visitId)d_DIASource LIKE test_ap._tmp_v%(visitId)d_DIASource""", """INSERT INTO _tmp_v%(visitId)d_DIASource SELECT * FROM test_ap._tmp_v%(visitId)d_DIASource""", """CREATE TABLE _tmp_v%(visitId)d_Preds LIKE test_ap._tmp_v%(visitId)d_Preds""", """INSERT INTO _tmp_v%(visitId)d_Preds SELECT * FROM test_ap._tmp_v%(visitId)d_Preds""", """CREATE TABLE _tmpl_MatchPair LIKE test_ap._tmpl_MatchPair""", """CREATE TABLE _tmpl_IdPair LIKE test_ap._tmpl_IdPair""", """CREATE TABLE _tmpl_InMemoryObject LIKE test_ap._tmpl_InMemoryObject""", """CREATE TABLE _tmpl_InMemoryMatchPair LIKE test_ap._tmpl_InMemoryMatchPair""", """CREATE TABLE _tmpl_InMemoryId LIKE test_ap._tmpl_InMemoryId""", """CREATE TABLE _ap_DIASourceToObjectMatches LIKE test_ap._ap_DIASourceToObjectMatches""", """CREATE TABLE _ap_PredToDIASourceMatches LIKE test_ap._ap_PredToDIASourceMatches""", """CREATE TABLE _ap_DIASourceToNewObject LIKE test_ap._ap_DIASourceToNewObject""", """CREATE TABLE _mops_Prediction LIKE test_ap._mops_Prediction""" ] db = DbStorage() db.setPersistLocation(LogicalLocation(self.dbUrlPrefix + 'test_ap')) try: for stmt in mysqlStatements: db.executeSql(stmt % self.substitutions) # Specify list of stages ... self.stages = [ ap.LoadStage, InputStage, ap.MatchDiaSourcesStage, OutputStage, InputStage, ap.MatchMopsPredsStage, OutputStage, ap.StoreStage ] # and read in stage policy for each stage policyDir = os.path.join(os.environ['AP_DIR'], 'pipeline', 'examples', 'policy') self.policies = [ Policy(os.path.join(policyDir,'LoadStage.paf')), Policy(os.path.join(policyDir,'MatchDiaSourcesStageInput.paf')), None, Policy(os.path.join(policyDir,'MatchDiaSourcesStageOutput.paf')), Policy(os.path.join(policyDir,'MatchMopsPredsStageInput.paf')), None, Policy(os.path.join(policyDir,'MatchMopsPredsStageOutput.paf')), Policy(os.path.join(policyDir,'StoreStage.paf')) ] # construct PropertySet for string interpolation psSubs = PropertySet() psSubs.setInt('visitId', self.visitId) psSubs.setString('runId', self.runId) psSubs.setString('filter', self.filter) psSubs.setString('work', '.') psSubs.setString('input', '/tmp') psSubs.setString('output', '/tmp') psSubs.setString('update', '/tmp') psSubs.setString('dbUrl', self.dbUrl) LogicalLocation.setLocationMap(psSubs) except: # cleanup database in case of error db.executeSql("DROP DATABASE %(runId)s" % self.substitutions) raise
from lsst.daf.persistence import DbAuth import lsst.afw.coord as afwCoord import lsst.afw.geom as afwGeom from lsst.obs.sdss.selectSdssImages import SelectSdssImagesTask Database = "test_select_sdss_images" config = SelectSdssImagesTask.ConfigClass() # Some of the tests require loading SDSS images from "lsst-db.ncsa.illinois.edu" and # require a login name and password. If the test is unable to connect to the external data, # some of the tests are skipped. noConnectionStr = "" noConnection = False try: DbAuth.username(config.host, str(config.port)), except Exception as e: noConnectionStr = ( "No remote connection to SDSS image database\n" "Did not find host={0}, port={1} in your db-auth file;\n" "Warning generated: {2} ".format(config.host, str(config.port), e)) noConnection = True def getCoordList(minRa, minDec, maxRa, maxDec): degList = ( (minRa, minDec), (maxRa, minDec), (maxRa, maxDec), (minRa, maxDec), )
def __init__(self, outDir=None, database=None, driver='sqlite', host=None, port=None, verbose=False): """ Instantiate the results database, creating metrics, plots and summarystats tables. """ # Connect to database # for sqlite, connecting to non-existent database creates it automatically if database is None: # Using default value for database name, should specify directory. if outDir is None: outDir = '.' # Check for output directory, make if needed. if not os.path.isdir(outDir): try: os.makedirs(outDir) except OSError as msg: raise OSError( msg, '\n (If this was the database file (not outDir), ' 'remember to use kwarg "database")') self.database = os.path.join(outDir, 'resultsDb_sqlite.db') self.driver = 'sqlite' else: if driver == 'sqlite': # Using non-default database, but may also specify directory root. if outDir is not None: database = os.path.join(outDir, database) self.database = database self.driver = driver else: # If not sqlite, then 'outDir' doesn't make much sense. self.database = database self.driver = driver self.host = host self.port = port if self.driver == 'sqlite': dbAddress = url.URL(self.driver, database=self.database) else: dbAddress = url.URL( self.driver, username=DbAuth.username(self.host, str(self.port)), password=DbAuth.password(self.host, str(self.port)), host=self.host, port=self.port, database=self.database) engine = create_engine(dbAddress, echo=verbose) self.Session = sessionmaker(bind=engine) self.session = self.Session() # Create the tables, if they don't already exist. try: Base.metadata.create_all(engine) except DatabaseError: raise ValueError( "Cannot create a %s database at %s. Check directory exists." % (self.driver, self.database)) self.slen = 1024
from lsst.daf.persistence import DbAuth import lsst.afw.geom as afwGeom import lsst.afw.image as afwImage import lsst.afw.math as afwMath import lsst.afw.coord as afwCoord from lsst.obs.sdss.scaleSdssZeroPoint import ScaleSdssZeroPointTask from lsst.obs.sdss.selectFluxMag0 import SelectSdssFluxMag0Task config = ScaleSdssZeroPointTask.ConfigClass() # Some of the tests require loading SDSS images from "lsst-db.ncsa.illinois.edu" and # require a login name and password. If the test is unable to connect to the external data, # some of the tests are skipped. noConnection = False try: DbAuth.username(config.selectFluxMag0.host, str(config.selectFluxMag0.port)) except Exception as e: print( "Did not find host={0}, port={1} in your db-auth file; \nWarning generated: {2} " .format(config.selectFluxMag0.host, str(config.selectFluxMag0.port), e), file=sys.stderr) noConnection = True class WrapDataId(object): """A container for dataId that looks like dataRef to computeImageScaler() """ def __init__(self, dataId): self.dataId = dataId
if outDir is not None: database = os.path.join(outDir, database) self.database = database self.driver = driver else: # If not sqlite, then 'outDir' doesn't make much sense. self.database = database self.driver = driver self.host = host self.port = port if self.driver == 'sqlite': dbAddress = url.URL(self.driver, database=self.database) else: dbAddress = url.URL(self.driver, username=DbAuth.username(self.host, str(self.port)), password=DbAuth.password(self.host, str(self.port)), host=self.host, port=self.port, database=self.database) engine = create_engine(dbAddress, echo=verbose) self.Session = sessionmaker(bind=engine) self.session = self.Session() # Create the tables, if they don't already exist. try: Base.metadata.create_all(engine) except DatabaseError: raise ValueError("Cannot create a %s database at %s. Check directory exists." %(self.driver, self.database)) self.slen = 1024 self.stype = 'S%d' %(self.slen)
def analyzeLogs(self, runId, inProgress=False): import MySQLdb jobStartRegex = re.compile( r"Processing job:" r"(\s+filter=(?P<filter>\w)" r"|\s+field=(?P<field>\d+)" r"|\s+camcol=(?P<camcol>\d)" r"|\s+run=(?P<run>\d+)" r"|\s+type=calexp){5}" ) host = RunConfiguration.dbHost port = RunConfiguration.dbPort with MySQLdb.connect( host=host, port=port, user=self.dbUser, passwd=DbAuth.password(host, str(port))) as conn: runpat = '%' + runId + '%' conn.execute("SHOW DATABASES LIKE %s", (runpat,)) ret = conn.fetchall() if ret is None or len(ret) == 0: raise NoMatchError("No match for run %s" % (runId,)) elif len(ret) > 1: raise RuntimeError("Multiple runs match:\n" + str([r[0] for r in ret])) dbName = ret[0][0] result = "" try: conn = MySQLdb.connect( host=host, port=port, user=self.dbUser, passwd=DbAuth.password(host, str(port)), db=dbName) cursor = conn.cursor() cursor.execute("""SELECT TIMESTAMP, timereceived FROM Logs WHERE id = (SELECT MIN(id) FROM Logs)""") row = cursor.fetchone() if row is None: if inProgress: return "No log entries yet\n" else: return "*** No log entries written\n" startTime, start = row result += "First orca log entry: %s\n" % (start,) cursor = conn.cursor() cursor.execute("""SELECT TIMESTAMP, timereceived FROM Logs WHERE id = (SELECT MAX(id) FROM Logs)""") stopTime, stop = cursor.fetchone() result += "Last orca log entry: %s\n" % (stop,) elapsed = long(stopTime) - long(startTime) elapsedHr = elapsed / 3600 / 1000 / 1000 / 1000 elapsed -= elapsedHr * 3600 * 1000 * 1000 * 1000 elapsedMin = elapsed / 60 / 1000 / 1000 / 1000 elapsed -= elapsedMin * 60 * 1000 * 1000 * 1000 elapsedSec = elapsed / 1.0e9 result += "Orca elapsed time: %d:%02d:%06.3f\n" % (elapsedHr, elapsedMin, elapsedSec) cursor = conn.cursor() cursor.execute(""" SELECT COUNT(DISTINCT workerid) FROM (SELECT workerid FROM Logs LIMIT 10000) AS sample""") nPipelines = cursor.fetchone()[0] result += "%d pipelines used\n" % (nPipelines,) cursor = conn.cursor() cursor.execute(""" SELECT CASE gid WHEN 1 THEN 'pipeline shutdowns seen' WHEN 2 THEN 'CCDs attempted' WHEN 3 THEN 'src writes' WHEN 4 THEN 'calexp writes' END AS descr, COUNT(*) FROM ( SELECT CASE WHEN COMMENT LIKE 'Processing job:% filter=0%' THEN 1 WHEN COMMENT LIKE 'Processing job:%' AND COMMENT NOT LIKE '% filter=0%' THEN 2 WHEN COMMENT LIKE 'Ending write to BoostStorage%/src%' THEN 3 WHEN COMMENT LIKE 'Ending write to FitsStorage%/calexp%' THEN 4 ELSE 0 END AS gid FROM Logs ) AS stats WHERE gid > 0 GROUP BY gid""") nShutdown = 0 for d, n in cursor.fetchall(): result += "%d %s\n" % (n, d) if d == 'pipeline shutdowns seen': nShutdown = n if nShutdown != nPipelines: if not inProgress: if nShutdown == 0: result += "\n*** No pipelines were shut down properly\n" else: result += "\n*** Shutdowns do not match pipelines\n" cursor = conn.cursor() cursor.execute(""" SELECT workerid, COMMENT FROM Logs JOIN (SELECT MAX(id) AS last FROM Logs GROUP BY workerid) AS a ON (Logs.id = a.last)""") for worker, msg in cursor.fetchall(): if inProgress: result += "Pipeline %s last status: %s\n" % (worker, msg) else: result += "Pipeline %s ended with: %s\n" % (worker, msg) cursor = conn.cursor() cursor.execute(""" SELECT COUNT(*) FROM Logs WHERE ( COMMENT LIKE '%rror%' OR COMMENT LIKE '%xception%' OR COMMENT LIKE '%arning%' OR COMMENT LIKE 'Fail' OR COMMENT LIKE 'fail' ) AND COMMENT NOT LIKE '%failureStage%' AND COMMENT NOT LIKE '%failure stage%' AND COMMENT NOT LIKE 'failSerialName%' AND COMMENT NOT LIKE 'failParallelName%' AND COMMENT NOT LIKE 'Distortion fitter failed to improve%' AND COMMENT NOT LIKE '%magnitude error column%' AND COMMENT NOT LIKE '%errorFlagged%' AND COMMENT NOT LIKE 'Skipping process due to error' """) result += "%s failures seen\n" % cursor.fetchone() cursor = conn.cursor(MySQLdb.cursors.DictCursor) cursor.execute(""" SELECT * FROM Logs WHERE COMMENT LIKE 'Processing job:%' OR ( ( COMMENT LIKE '%rror%' OR COMMENT LIKE '%xception%' OR COMMENT LIKE '%arning%' OR COMMENT LIKE '%Fail%' OR COMMENT LIKE '%fail%' ) AND COMMENT NOT LIKE '%failureStage%' AND COMMENT NOT LIKE '%failure stage%' AND COMMENT NOT LIKE 'failSerialName%' AND COMMENT NOT LIKE 'failParallelName%' AND COMMENT NOT LIKE 'Distortion fitter failed to improve%' AND COMMENT NOT LIKE '%magnitude error column%' AND COMMENT NOT LIKE '%errorFlagged%' AND COMMENT NOT LIKE 'Skipping process due to error' ) ORDER BY id;""") jobs = dict() for d in cursor.fetchall(): match = jobStartRegex.search(d['COMMENT']) if match: jobs[d['workerid']] = "Band %s Run %s Camcol %s Frame %s" % ( match.group("filter"), match.group("run"), match.group("camcol"), match.group("field")) elif not d['COMMENT'].startswith('Processing job:'): if jobs.has_key(d['workerid']): job = jobs[d['workerid']] else: job = "unknown" result += "\n*** Error in %s in stage %s on %s:\n" % ( job, d['stagename'], d['workerid']) lines = d['COMMENT'].split('\n') i = len(lines) - 1 message = lines[i].strip() # Skip blank lines at end while i > 0 and message == "": i -= 1 message = lines[i].strip() # Go back until we find a traceback line with " in " while i > 0 and lines[i].find(" in ") == -1: i -= 1 message = lines[i].strip() + "\n" + message result += message + "\n" finally: conn.close() outputDir = os.path.join(self.options.output, runId) logFile = os.path.join(outputDir, "run", "unifiedPipeline.log") with open(logFile, "r") as log: try: log.seek(-500, 2) except: pass tail = log.read(500) if not tail.endswith("logger handled...and...done!\n"): result += "\n*** Unified pipeline log file\n" result += "(last 500 bytes)... " + tail + "\n" for logFile in glob.glob( os.path.join(outputDir, "work", "*", "launch.log")): with open(logFile, "r") as log: try: log.seek(-500, 2) except: pass tail = log.read(500) if not re.search(r"harness.runPipeline: workerid \w+$", tail) \ and not re.search(r"Applying aperture", tail) \ and tail != "done. Now starting job office\n": result += "\n*** " + logFile + "\n" result += "(last 500 bytes)... " + tail + "\n" return result
def __init__(self, args): self.datetime = time.strftime("%Y_%m%d_%H%M%S") self.user = pwd.getpwuid(os.getuid())[0] if self.user == 'buildbot': RunConfiguration.pipeQaBase = re.sub(r'dev', 'buildbot', RunConfiguration.pipeQaBase) RunConfiguration.pipeQaDir = re.sub(r'dev', 'buildbot', RunConfiguration.pipeQaDir) self.dbUser = DbAuth.username(RunConfiguration.dbHost, str(RunConfiguration.dbPort)) self.hostname = socket.getfqdn() self.fromAddress = "%s@%s" % (self.user, self.hostname) self.options, self.args = self.parseOptions(args) # Handle immediate commands if self.options.printStatus: self.printStatus() sys.exit(0) if self.options.report is not None: self.report(os.path.join(self.options.output, self.options.report, "run", "run.log")) sys.exit(0) if self.options.listRuns: self.listRuns(self.options.listRuns) sys.exit(0) if self.options.listInputs: self.listInputs() sys.exit(0) if self.options.linkLatest is not None: self.linkLatest(self.options.linkLatest) sys.exit(0) if self.options.kill is not None: self.kill(self.options.kill) sys.exit(0) if self.options.hosts is not None: self.hosts() sys.exit(0) if self.arch is None: if self.options.arch is None: raise RuntimeError("Architecture is required") self.arch = self.options.arch if re.search(r'[^a-zA-Z0-9_]', self.options.runType): raise RuntimeError("Run type '%s' must be one word" % (self.options.runType,)) self.collectionName = re.sub(r'\.', '_', RunConfiguration.collection) runIdProperties = dict( user=self.user, dbUser=self.dbUser, coll=self.collectionName, runType=self.options.runType, datetime=self.datetime) # When resuming a run, use provided runID if self.options.resumeRunId is None: self.runId = RunConfiguration.runIdPattern % runIdProperties else: self.runId = self.options.resumeRunId runIdProperties['runid'] = self.runId dbNamePattern = "%(dbUser)s_%(coll)s_u_%(runid)s" self.dbName = dbNamePattern % runIdProperties self.inputBase = os.path.join(RunConfiguration.inputBase, self.options.input) self.inputDirectory = os.path.join(self.inputBase, RunConfiguration.collection) self.outputDirectory = os.path.join(self.options.output, self.runId) self.outputDirectory = os.path.abspath(self.outputDirectory) if self.options.resumeRunId is None : if os.path.exists(self.outputDirectory): raise RuntimeError("Output directory %s already exists" % (self.outputDirectory,)) os.mkdir(self.outputDirectory) elif not os.path.exists(self.outputDirectory): raise RuntimeError("Output directory %s does not exist for resumed run" % (self.outputDirectory,)) self.pipeQaUrl = RunConfiguration.pipeQaBase + self.dbName + "/" self.eupsPath = os.environ['EUPS_PATH'] e = eups.Eups(readCache=False) self.setups = dict() for product in e.getSetupProducts(): if product.name != "eups": self.setups[product.name] = \ re.sub(r'^LOCAL:', "-r ", product.version) # TODO -- load policy and apply overrides self.options.override = None
def testSetPolicy(self): self.assertTrue(DbAuth.available("lsst-db.ncsa.illinois.edu", "3306")) self.assertEqual( DbAuth.authString("lsst-db.ncsa.illinois.edu", "3306"), "test:globular.test") self.assertEqual(DbAuth.username("lsst-db.ncsa.illinois.edu", "3306"), "test") self.assertEqual(DbAuth.password("lsst-db.ncsa.illinois.edu", "3306"), "globular.test") self.assertTrue(DbAuth.available("lsst-db.ncsa.illinois.edu", "3307")) self.assertEqual( DbAuth.authString("lsst-db.ncsa.illinois.edu", "3307"), "boris:natasha") self.assertEqual(DbAuth.username("lsst-db.ncsa.illinois.edu", "3307"), "boris") self.assertEqual(DbAuth.password("lsst-db.ncsa.illinois.edu", "3307"), "natasha") self.assertTrue(DbAuth.available("lsst9.ncsa.illinois.edu", "3306")) self.assertEqual(DbAuth.authString("lsst9.ncsa.illinois.edu", "3306"), "rocky:squirrel") self.assertEqual(DbAuth.username("lsst9.ncsa.illinois.edu", "3306"), "rocky") self.assertEqual(DbAuth.password("lsst9.ncsa.illinois.edu", "3306"), "squirrel")
dest="filenames", help="condor log files", nargs='+', type=str, required=True) parser.add_argument("-v", "--verbose", action="store_true", dest="verbose", help="verbose") args = parser.parse_args() host = args.host port = args.port database = args.database # # get database authorization info # dbAuth = DbAuth() user = dbAuth.username(host, str(port)) password = dbAuth.password(host, str(port)) # connect to the database dbm = DatabaseManager(host, port, user, password) # create the database if it doesn't exist if not dbm.dbExists(database): dbm.createDb(database) # create the LogIngestor, which creates all the tables, and will # be used to consolidate file information logIngestor = LogIngestor(dbm, database) # go through the list of files and ingest them, ignoring any