예제 #1
0
def main():
    # Setup command line options
    parser = argparse.ArgumentParser(description="Program which transposes a key-value table into a table "
                                     "where each key is mapped to a column.")
    addDbOptions(parser)
    parser.add_argument(
        "-s", "--skip-keys", dest="skipKeys",
        help="Comma separated list of metadata keys to omit in the output table")
    parser.add_argument(
        "-c", "--compress", dest="compress", action="store_true",
        help="Lift keys with constant values into a view")
    parser.add_argument(
        "database", help="Name of database containing metadata table to transpose")
    parser.add_argument(
        "metadataTable", help="Name of metadata table to transpose")
    parser.add_argument(
        "idCol", help="Primary key column name for metadata table")
    parser.add_argument(
        "outputTable", help="Name of output table to create")
    ns = parser.parse_args()
    db, metadataTable, idCol, outputTable = args
    if DbAuth.available(ns.host, str(ns.port)):
        ns.user = DbAuth.username(ns.host, str(ns.port))
        passwd = DbAuth.password(ns.host, str(ns.port))
    elif os.path.exists(os.path.join(os.environ["HOME"], ".mysql.cnf")):
        passwd = None
    else:
        passwd = getpass.getpass("%s's MySQL password: "******",")])
    run(ns.host, ns.port, ns.user, passwd, db, metadataTable,
        idCol, outputTable, skipCols, ns.compress)
예제 #2
0
    def __init__(self, broker, host, port, runid, database):
        super(Logger, self).__init__(broker, runid)
        # set the highwater mark for the number of messages retrieved before attempting to drain them.
        self.highwatermark = 10000

        self.database = database
        
        #
        # get database authorization info
        #
        home = os.getenv("HOME")
        pol = Policy(home+"/.lsst/db-auth.paf")
        
        dbAuth = DbAuth()
        dbAuth.setPolicy(pol)
        
        user = dbAuth.username(host,port)
        password = dbAuth.password(host,port)
        
        #
        # create the logger for the database and connect to it
        #
        self.dbLogger = DatabaseLogger(host, int(port))
        
        self.dbLogger.connect(user, password, self.database)
예제 #3
0
    def _connect_to_engine(self):

        #DbAuth will not look up hosts that are None, '' or 0
        if self._host:
            try:
                authDict = {'username': DbAuth.username(self._host, str(self._port)),
                            'password': DbAuth.password(self._host, str(self._port))}
            except:
                if self._driver == 'mssql+pymssql':
                    print("\nFor more information on database authentication using the db-auth.paf"
                          " policy file see: "
                          "https://confluence.lsstcorp.org/display/SIM/Accessing+the+UW+CATSIM+Database\n")
                raise

            dbUrl = url.URL(self._driver,
                            host=self._host,
                            port=self._port,
                            database=self._database,
                            **authDict)
        else:
            dbUrl = url.URL(self._driver,
                            database=self._database)


        self._engine = create_engine(dbUrl, echo=self._verbose)

        if self._engine.dialect.name == 'sqlite':
            event.listen(self._engine, 'checkout', declareTrigFunctions)

        self._session = scoped_session(sessionmaker(autoflush=True,
                                                    bind=self._engine))
        self._metadata = MetaData(bind=self._engine)
예제 #4
0
def run():
    basename = os.path.basename(sys.argv[0])

    parser = argparse.ArgumentParser(prog=basename, 
                description='''A statistics reporting utility.  Use to print 
                            out information about what happened during a run.
                            Takes as an argument previously ingested run 
                            information one of the ingest utilities  in 
                            a named database.''',
                epilog='''example:
report.py -H kaboom.ncsa.illinois.edu -p 3303 -d srp_2013_0601_140432 -S''')
    parser.add_argument("-H", "--host", action="store", default=None, dest="host", help="mysql server host", type=str, required=True)
    parser.add_argument("-p", "--port", action="store", default=3306, dest="port", help="mysql server port", type=int)
    parser.add_argument("-d", "--database", action="store", default=None, dest="database", help="database name", type=str, required=True)
    parser.add_argument("-I", "--submits-per-interval", action="store_true", default=None, dest="submits", help="number of submits to the condor queue per interval")
    parser.add_argument("-C", "--cores-used-each-second", action="store_true", default=None, dest="cores", help="cores used each second")
    parser.add_argument("-N", "--cores-used-each-interval", type=int, default=-1, dest="interval", help="cores used each interval")
    parser.add_argument("-S", "--summary", action="store_true", default=None, dest="summary", help="summary of run")
    parser.add_argument("-v", "--verbose", action="store_true", dest="verbose", help="verbose")

    args = parser.parse_args()

    host = args.host
    port = args.port
    database = args.database
    
    #
    # get database authorization info
    #
    dbAuth = DbAuth()
    user = dbAuth.username(host, str(port))
    password = dbAuth.password(host,str(port))

    # connect to the database
    dbm = DatabaseManager(host, port, user, password)


    dbm.execCommand0('use '+database)

    # command line arguments
    values = None
    submitTimes = SubmissionTimes(dbm)
    entries = submitTimes.getEntries()
    if args.submits == True:
        submitsPerInterval = SubmitsPerInterval(dbm, 1)
        values = submitsPerInterval.getValues()
        writeDateValues(values)
    elif args.cores == True:
        coresPerSecond = CoresPerSecond(dbm, entries)
        values = coresPerSecond.getValues()
        writeDateValues(values)
    elif args.interval > -1:
        coresPerInterval = CoresPerInterval(dbm,entries, args.interval)
        values = coresPerInterval.getValues()
        writeDateValues(values)
    elif args.summary == True:
        printSummary(dbm, entries)
예제 #5
0
 def setUp(self):
     """Initialize the DB connection.  Raise SkipTest if unable to access DB."""
     config = SelectLsstImagesTask.ConfigClass()
     try:
         DbAuth.username(config.host, str(config.port)),
     except RuntimeError as e:
         reason = "Warning: did not find host=%s, port=%s in your db-auth file; or %s " \
                  "skipping unit tests" % \
                  (config.host, str(config.port), e)
         raise unittest.SkipTest(reason)
예제 #6
0
 def setUp(self):
     """Initialize the DB connection.  Raise SkipTest if unable to access DB."""
     config = SpatialScaleZeroPointTask.ConfigClass()
     config.selectFluxMag0.retarget(SelectLsstSimFluxMag0Task)
     try:
         DbAuth.username(config.selectFluxMag0.host, str(config.selectFluxMag0.port)),
     except RuntimeError as e:
         reason = "Warning: did not find host=%s, port=%s in your db-auth file; or %s " \
                  "skipping unit tests" % \
                  (config.selectFluxMag0.host, str(config.selectFluxMag0.port), e)
         raise unittest.SkipTest(reason)
예제 #7
0
파일: resultsDb.py 프로젝트: lsst/sims_maf
    def __init__(self, outDir= None, database=None, driver='sqlite',
                 host=None, port=None, verbose=False):
        """
        Instantiate the results database, creating metrics, plots and summarystats tables.
        """
        # Connect to database
        # for sqlite, connecting to non-existent database creates it automatically
        if database is None:
            # Using default value for database name, should specify directory.
            if outDir is None:
                outDir = '.'
            # Check for output directory, make if needed.
            if not os.path.isdir(outDir):
                try:
                    os.makedirs(outDir)
                except OSError as msg:
                    raise OSError(msg, '\n  (If this was the database file (not outDir), '
                                       'remember to use kwarg "database")')
            self.database = os.path.join(outDir, 'resultsDb_sqlite.db')
            self.driver = 'sqlite'
        else:
            if driver == 'sqlite':
                # Using non-default database, but may also specify directory root.
                if outDir is not None:
                    database = os.path.join(outDir, database)
                self.database = database
                self.driver = driver
            else:
                # If not sqlite, then 'outDir' doesn't make much sense.
                self.database = database
                self.driver = driver
                self.host = host
                self.port = port

        if self.driver == 'sqlite':
            dbAddress = url.URL(self.driver, database=self.database)
        else:
            dbAddress = url.URL(self.driver,
                            username=DbAuth.username(self.host, str(self.port)),
                            password=DbAuth.password(self.host, str(self.port)),
                            host=self.host,
                            port=self.port,
                            database=self.database)

        engine = create_engine(dbAddress, echo=verbose)
        self.Session = sessionmaker(bind=engine)
        self.session = self.Session()
        # Create the tables, if they don't already exist.
        try:
            Base.metadata.create_all(engine)
        except DatabaseError:
            raise ValueError("Cannot create a %s database at %s. Check directory exists." %(self.driver, self.database))
        self.slen = 1024
예제 #8
0
 def connect(host, port, db, user=None):
     kwargs = dict(host=host, port=port, db=db)
     if user is not None:
         kwargs["user"] = user
     try:
         # See if we can connect without a password (e.g. via my.cnf)
         return MySQLdb.connect(**kwargs)
     except:
         # Fallback to DbAuth
         kwargs["user"] = DbAuth.username(host, str(port))
         kwargs["passwd"] = DbAuth.password(host, str(port))
         return MySQLdb.connect(**kwargs)
예제 #9
0
    def run(self, dataId):
        """Select flugMag0's of LsstSim images for a particular visit

        @param[in] visit: visit id

        @return a pipeBase Struct containing:
        - fluxMagInfoList: a list of FluxMagInfo objects
        """
        try:
            runArgDict = self.runArgDictFromDataId(dataId)
            visit = runArgDict["visit"]
        except Exception:
            self.log.fatal(
                "dataId does not contain mandatory visit key: dataId: %s",
                dataId)

        if self._display:
            self.log.info(self.config.database)

        db = MySQLdb.connect(
            host=self.config.host,
            port=self.config.port,
            db=self.config.database,
            user=DbAuth.username(self.config.host, str(self.config.port)),
            passwd=DbAuth.password(self.config.host, str(self.config.port)),
        )
        cursor = db.cursor()

        columnNames = tuple(FluxMagInfo.getColumnNames())

        queryStr = "select %s from Science_Ccd_Exposure where " % (
            ", ".join(columnNames))
        dataTuple = ()

        # compute where clauses as a list of (clause, data)
        whereDataList = [
            ("visit = %s", visit),
        ]

        queryStr += " and ".join(wd[0] for wd in whereDataList)
        dataTuple += tuple(wd[1] for wd in whereDataList)

        if self._display:
            self.log.info("queryStr=%r; dataTuple=%s", queryStr, dataTuple)

        cursor.execute(queryStr, dataTuple)
        result = cursor.fetchall()
        fluxMagInfoList = [FluxMagInfo(r) for r in result]
        if self._display:
            self.log.info("Found %d exposures", len(fluxMagInfoList))

        return pipeBase.Struct(fluxMagInfoList=fluxMagInfoList, )
    def run(self, dataId):
        """Select flugMag0's of LsstSim images for a particular visit

        @param[in] visit: visit id

        @return a pipeBase Struct containing:
        - fluxMagInfoList: a list of FluxMagInfo objects
        """
        try:
            runArgDict = self.runArgDictFromDataId(dataId)
            visit = runArgDict["visit"]
        except Exception:
            self.log.fatal("dataId does not contain mandatory visit key: dataId: %s"%dataId)

        if self._display:
            self.log.info(self.config.database)

        db = MySQLdb.connect(
            host = self.config.host,
            port = self.config.port,
            db = self.config.database,
            user = DbAuth.username(self.config.host, str(self.config.port)),
            passwd = DbAuth.password(self.config.host, str(self.config.port)),
        )
        cursor = db.cursor()

        columnNames = tuple(FluxMagInfo.getColumnNames())

        queryStr = "select %s from Science_Ccd_Exposure where "%(", ".join(columnNames))
        dataTuple = ()

        # compute where clauses as a list of (clause, data)
        whereDataList = [
            ("visit = %s", visit),
        ]

        queryStr += " and ".join(wd[0] for wd in whereDataList)
        dataTuple += tuple(wd[1] for wd in whereDataList)

        if self._display:
            self.log.info("queryStr=%r; dataTuple=%s" % (queryStr, dataTuple))

        cursor.execute(queryStr, dataTuple)
        result = cursor.fetchall()
        fluxMagInfoList = [FluxMagInfo(r) for r in result]
        if self._display:
            self.log.info("Found %d exposures" % \
                      (len(fluxMagInfoList)))

        return pipeBase.Struct(
            fluxMagInfoList = fluxMagInfoList,
        )
예제 #11
0
 def connect(host, port, db, user=None):
     """Connect to the specified MySQL database server."""
     kwargs = dict(host=host, port=port, db=db)
     if user is not None:
         kwargs["user"] = user
     try:
         # See if we can connect without a password (e.g. via my.cnf)
         return MySQLdb.connect(**kwargs)
     except:
         # Fallback to DbAuth
         kwargs["user"] = DbAuth.username(host, str(port))
         kwargs["passwd"] = DbAuth.password(host, str(port))
         return MySQLdb.connect(**kwargs)
def run(shouldExit=False):
    """Run the tests"""

    config = SpatialScaleZeroPointTask.ConfigClass()
    config.selectFluxMag0.retarget(SelectLsstSimFluxMag0Task)
    print config
    try:
        DbAuth.username(config.selectFluxMag0.host, str(config.selectFluxMag0.port)),
    except Exception, e:
        print "Warning: did not find host=%s, port=%s in your db-auth file; or %s " \
              "skipping unit tests" % \
            (config.selectFluxMag0.host, str(config.selectFluxMag0.port), e)
        return
def run():
    basename = os.path.basename(sys.argv[0])

    parser = argparse.ArgumentParser(prog=basename,
                description='''Takes a list of log files and ingests them into a
database''',
                epilog='''example:
condorLogIngest.py -H lsst10 -d testing -f worker.log'''
)
    parser.add_argument("-H", "--host", action="store", default=None, dest="host", help="mysql server host", type=str, required=True)
    parser.add_argument("-p", "--port", action="store", default=3306, dest="port", help="mysql server port", type=int)
    parser.add_argument("-d", "--database", action="store", default=None, dest="database", help="database name", type=str, required=True)
    parser.add_argument("-f", "--file", action="store", default=None, dest="filenames", help="condor log files", nargs='+', type=str, required=True)
    parser.add_argument("-v", "--verbose", action="store_true", dest="verbose", help="verbose")

    args = parser.parse_args()

    host = args.host
    port = args.port
    database = args.database
    
    #
    # get database authorization info
    #
    
    dbAuth = DbAuth()
    user = dbAuth.username(host, str(port))
    password = dbAuth.password(host,str(port))

    # connect to the database
    dbm = DatabaseManager(host, port, user, password)

    # create the database if it doesn't exist
    if not dbm.dbExists(database):
        dbm.createDb(database) 

    # create the LogIngestor, which creates all the tables, and will
    # be used to consolidate file information
    logIngestor = LogIngestor(dbm, database)

    # go through the list of files and ingest them, ignoring any
    # that don't exist.
    for filename in args.filenames:
        if not os.path.exists(filename):
            if args.verbose:
                print "warning: %s does not exist." % filename
            continue
        logIngestor.ingest(filename)
예제 #14
0
    def __init__(self,
                 database=None,
                 driver='sqlite',
                 host=None,
                 port=None,
                 trackingDbverbose=False):
        """
        Instantiate the results database, creating metrics, plots and summarystats tables.
        """
        self.verbose = trackingDbverbose
        # Connect to database
        # for sqlite, connecting to non-existent database creates it automatically
        if database is None:
            # Default is a file in the current directory.
            self.database = os.path.join(os.getcwd(), 'trackingDb_sqlite.db')
            self.driver = 'sqlite'
        else:
            self.database = database
            self.driver = driver
            self.host = host
            self.port = port

        if self.driver == 'sqlite':
            dbAddress = url.URL(drivername=self.driver, database=self.database)
        else:
            dbAddress = url.URL(
                self.driver,
                username=DbAuth.username(self.host, str(self.port)),
                password=DbAuth.password(self.host, str(self.port)),
                host=self.host,
                port=self.port,
                database=self.database)

        engine = create_engine(dbAddress, echo=self.verbose)
        if self.verbose:
            print('Created or connected to MAF tracking %s database at %s' %
                  (self.driver, self.database))
        self.Session = sessionmaker(bind=engine)
        self.session = self.Session()
        # Create the tables, if they don't already exist.
        try:
            Base.metadata.create_all(engine)
        except DatabaseError:
            raise DatabaseError(
                "Cannot create a %s database at %s. Check directory exists." %
                (self.driver, self.database))
def run(shouldExit=False):
    config = SelectSdssImagesTask.ConfigClass()
    try:
        user = DbAuth.username(config.host, str(config.port)),
    except Exception:
        print "Warning: did not find host=%s, port=%s in your db-auth file; skipping SelectSdssImagesTask unit tests" % \
            (config.host, str(config.port))
        return

    utilsTests.run(suite(), shouldExit)
예제 #16
0
def main():
    # Setup command line options
    parser = argparse.ArgumentParser(
        description="Program which transposes a key-value table into a table "
        "where each key is mapped to a column.")
    addDbOptions(parser)
    parser.add_argument(
        "-s",
        "--skip-keys",
        dest="skipKeys",
        help="Comma separated list of metadata keys to omit in the output table"
    )
    parser.add_argument("-c",
                        "--compress",
                        dest="compress",
                        action="store_true",
                        help="Lift keys with constant values into a view")
    parser.add_argument(
        "database",
        help="Name of database containing metadata table to transpose")
    parser.add_argument("metadataTable",
                        help="Name of metadata table to transpose")
    parser.add_argument("idCol",
                        help="Primary key column name for metadata table")
    parser.add_argument("outputTable", help="Name of output table to create")
    ns = parser.parse_args()
    db, metadataTable, idCol, outputTable = args
    if DbAuth.available(ns.host, str(ns.port)):
        ns.user = DbAuth.username(ns.host, str(ns.port))
        passwd = DbAuth.password(ns.host, str(ns.port))
    elif os.path.exists(os.path.join(os.environ["HOME"], ".mysql.cnf")):
        passwd = None
    else:
        passwd = getpass.getpass("%s's MySQL password: "******",")])
    run(ns.host, ns.port, ns.user, passwd, db, metadataTable, idCol,
        outputTable, skipCols, ns.compress)
예제 #17
0
 def __init__(self, host, database, user, port=3306, password=None):
     self.host = host
     self.port = port
     self.user = user
     self.database = database
     if password is None:
         if self.host is not None and self.port is not None and \
                 DbAuth.available(self.host, str(self.port)):
             self.user = DbAuth.username(self.host, str(self.port))
             password = DbAuth.password(self.host, str(self.port))
         elif not os.path.exists(os.path.join(os.environ['HOME'], ".my.cnf")):
             password = getpass.getpass("%s's MySQL password: " % user)
     self.password = password
     self.mysqlCmd = ['mysql']
     if host is not None:
         self.mysqlCmd += ['-h', self.host]
     if port is not None:
         self.mysqlCmd += ['-P', str(self.port)]
     if user is not None:
         self.mysqlCmd += ['-u', self.user]
     if password is not None:
         self.mysqlCmd += ['-p' + self.password]
예제 #18
0
    def __init__(self, database=None, driver='sqlite', host=None, port=None,
                 trackingDbverbose=False):
        """
        Instantiate the results database, creating metrics, plots and summarystats tables.
        """
        self.verbose = trackingDbverbose
        # Connect to database
        # for sqlite, connecting to non-existent database creates it automatically
        if database is None:
            # Default is a file in the current directory.
            self.database = os.path.join(os.getcwd(), 'trackingDb_sqlite.db')
            self.driver = 'sqlite'
        else:
            self.database  = database
            self.driver = driver
            self.host = host
            self.port = port

        if self.driver == 'sqlite':
            dbAddress = url.URL(drivername=self.driver, database=self.database)
        else:
            dbAddress = url.URL(self.driver,
                            username=DbAuth.username(self.host, str(self.port)),
                            password=DbAuth.password(self.host, str(self.port)),
                            host=self.host,
                            port=self.port,
                            database=self.database)

        engine = create_engine(dbAddress, echo=self.verbose)
        if self.verbose:
            print 'Created or connected to MAF tracking %s database at %s' %(self.driver, self.database)
        self.Session = sessionmaker(bind=engine)
        self.session = self.Session()
        # Create the tables, if they don't already exist.
        try:
            Base.metadata.create_all(engine)
        except DatabaseError:
            raise DatabaseError("Cannot create a %s database at %s. Check directory exists." %(self.driver, self.database))
예제 #19
0
 def __init__(self, host, database, user, port=3306, password=None):
     self.host = host
     self.port = port
     self.user = user
     self.database = database
     if password is None:
         if self.host is not None and self.port is not None and \
                 DbAuth.available(self.host, str(self.port)):
             self.user = DbAuth.username(self.host, str(self.port))
             password = DbAuth.password(self.host, str(self.port))
         elif not os.path.exists(os.path.join(os.environ['HOME'],
                                              ".my.cnf")):
             password = getpass.getpass("%s's MySQL password: " % user)
     self.password = password
     self.mysqlCmd = ['mysql']
     if host is not None:
         self.mysqlCmd += ['-h', self.host]
     if port is not None:
         self.mysqlCmd += ['-P', str(self.port)]
     if user is not None:
         self.mysqlCmd += ['-u', self.user]
     if password is not None:
         self.mysqlCmd += ['-p' + self.password]
예제 #20
0
파일: truth.py 프로젝트: heather999/Monitor
    def dbConnection(self):
        """
        The pymssql connection to the catsim database used to query refrence
        objects
        """
        if self._dbConnection is None:
            config = bcm.BaseCatalogConfig()
            config.load(os.path.join(getPackageDir("sims_catUtils"), "config",
                                     "db.py"))

            username = DbAuth.username(config.host, config.port)
            password = DbAuth.password(config.host, config.port)
            hostname = config.host
            if self.dbHostName is not None:
                hostname = self.dbHostName
            DBConnection = pymssql.connect(user=username,
                                           password=password,
                                           host=hostname,
                                           database=config.database,
                                           port=config.port)
            return DBConnection
        else:
            return self._dbConnection
 def setUp(self):
     pol = Policy(os.path.join(ROOT, "testDbAuth.paf"))
     DbAuth.setPolicy(pol)
예제 #22
0
    def run(self, coordList, filter):
        """Select Decam images suitable for coaddition in a particular region

        @param[in] filter: filter for images (one of g", "r", "i", "z", Y")
        @param[in] coordList: list of coordinates defining region of interest

        @return a pipeBase Struct containing:
        - exposureInfoList: a list of ExposureInfo objects
        """
        if filter not in set(("g", "r", "i", "z", "Y")):
            raise RuntimeError("filter=%r is an invalid name" % (filter, ))

        read_default_file = os.path.expanduser("~/.my.cnf")

        try:
            open(read_default_file)
            kwargs = dict(read_default_file=read_default_file, )
        except IOError:
            kwargs = dict(
                user=DbAuth.username(self.config.host, str(self.config.port)),
                passwd=DbAuth.password(self.config.host,
                                       str(self.config.port)),
            )

        db = MySQLdb.connect(host=self.config.host,
                             port=self.config.port,
                             db=self.config.database,
                             **kwargs)
        cursor = db.cursor()

        columnNames = tuple(ExposureInfo.getColumnNames())
        if not columnNames:
            raise RuntimeError("Bug: no column names")
        queryStr = "select %s " % (", ".join(columnNames), )
        dataTuple = ()  # tuple(columnNames)

        if coordList is not None:
            # look for exposures that overlap the specified region

            # create table scisql.Region containing patch region
            coordStrList = [
                "%s, %s" %
                (c.getLongitude().asDegrees(), c.getLatitude().asDegrees())
                for c in coordList
            ]
            coordStr = ", ".join(coordStrList)
            coordCmd = "call scisql.scisql_s2CPolyRegion(scisql_s2CPolyToBin(%s), 10)" % (
                coordStr, )
            cursor.execute(coordCmd)
            cursor.nextset()  # ignore one-line result of coordCmd

            queryStr += """
    from %s as ccdExp,
    (select distinct id
    from y1CcdQuality_To_Htm10 as ccdHtm inner join scisql.Region
    on (ccdHtm.htmId10 between scisql.Region.htmMin and scisql.Region.htmMax)
    where ccdHtm.filter = %%s) as idList
    where ccdExp.id = idList.id and """ % (self.config.table, )
            dataTuple += (filter, )
        else:
            # no region specified; look over the whole sky
            queryStr += " from %s as ccdExp where " % (self.config.table, )

        # compute where clauses as a list of (clause, data)
        whereDataList = [
            ("filter = %s", filter),
        ]

        queryStr += " and ".join(wd[0] for wd in whereDataList)
        dataTuple += tuple(wd[1] for wd in whereDataList)
        self.log.info("queryStr=%r; dataTuple=%s" % (queryStr, dataTuple))

        cursor.execute(queryStr, dataTuple)
        exposureInfoList = [ExposureInfo(result) for result in cursor]

        return pipeBase.Struct(exposureInfoList=exposureInfoList, )
예제 #23
0
 def testSetPolicy(self):
     self.assert_(DbAuth.available("lsst10.ncsa.uiuc.edu", "3306"))
     self.assertEqual(DbAuth.authString("lsst10.ncsa.uiuc.edu", "3306"),
             "test:globular.test")
     self.assertEqual(DbAuth.username("lsst10.ncsa.uiuc.edu", "3306"),
             "test")
     self.assertEqual(DbAuth.password("lsst10.ncsa.uiuc.edu", "3306"),
             "globular.test")
     self.assert_(DbAuth.available("lsst10.ncsa.uiuc.edu", "3307"))
     self.assertEqual(DbAuth.authString("lsst10.ncsa.uiuc.edu", "3307"),
             "boris:natasha")
     self.assertEqual(DbAuth.username("lsst10.ncsa.uiuc.edu", "3307"),
             "boris")
     self.assertEqual(DbAuth.password("lsst10.ncsa.uiuc.edu", "3307"),
             "natasha")
     self.assert_(DbAuth.available("lsst9.ncsa.uiuc.edu", "3306"))
     self.assertEqual(DbAuth.authString("lsst9.ncsa.uiuc.edu", "3306"),
             "rocky:squirrel")
     self.assertEqual(DbAuth.username("lsst9.ncsa.uiuc.edu", "3306"),
             "rocky")
     self.assertEqual(DbAuth.password("lsst9.ncsa.uiuc.edu", "3306"),
             "squirrel")
예제 #24
0
    def run(self, coordList, filter):
        """Select Decam images suitable for coaddition in a particular region

        @param[in] filter: filter for images (one of g", "r", "i", "z", Y")
        @param[in] coordList: list of coordinates defining region of interest

        @return a pipeBase Struct containing:
        - exposureInfoList: a list of ExposureInfo objects
        """
        if filter not in set(("g", "r", "i", "z", "Y")):
            raise RuntimeError("filter=%r is an invalid name" % (filter,))

        read_default_file = os.path.expanduser("~/.my.cnf")

        try:
            open(read_default_file)
            kwargs = dict(read_default_file=read_default_file)
        except IOError:
            kwargs = dict(
                user=DbAuth.username(self.config.host, str(self.config.port)),
                passwd=DbAuth.password(self.config.host, str(self.config.port)),
            )

        db = MySQLdb.connect(host=self.config.host, port=self.config.port, db=self.config.database, **kwargs)
        cursor = db.cursor()

        columnNames = tuple(ExposureInfo.getColumnNames())
        if not columnNames:
            raise RuntimeError("Bug: no column names")
        queryStr = "select %s " % (", ".join(columnNames),)
        dataTuple = ()  # tuple(columnNames)

        if coordList is not None:
            # look for exposures that overlap the specified region

            # create table scisql.Region containing patch region
            coordStrList = ["%s, %s" % (c.getLongitude().asDegrees(), c.getLatitude().asDegrees()) for c in coordList]
            coordStr = ", ".join(coordStrList)
            coordCmd = "call scisql.scisql_s2CPolyRegion(scisql_s2CPolyToBin(%s), 10)" % (coordStr,)
            cursor.execute(coordCmd)
            cursor.nextset()  # ignore one-line result of coordCmd

            queryStr += """
    from %s as ccdExp,
    (select distinct id
    from y1CcdQuality_To_Htm10 as ccdHtm inner join scisql.Region
    on (ccdHtm.htmId10 between scisql.Region.htmMin and scisql.Region.htmMax)
    where ccdHtm.filter = %%s) as idList
    where ccdExp.id = idList.id and """ % (
                self.config.table,
            )
            dataTuple += (filter,)
        else:
            # no region specified; look over the whole sky
            queryStr += " from %s as ccdExp where " % (self.config.table,)

        # compute where clauses as a list of (clause, data)
        whereDataList = [("filter = %s", filter)]

        queryStr += " and ".join(wd[0] for wd in whereDataList)
        dataTuple += tuple(wd[1] for wd in whereDataList)
        self.log.info("queryStr=%r; dataTuple=%s" % (queryStr, dataTuple))

        cursor.execute(queryStr, dataTuple)
        exposureInfoList = [ExposureInfo(result) for result in cursor]

        return pipeBase.Struct(exposureInfoList=exposureInfoList)
    def run(self, coordList, filter):
        """Select LSST images suitable for coaddition in a particular region
        
        @param[in] coordList: list of coordinates defining region of interest; if None then select all images
        @param[in] filter: filter (e.g. "g", "r", "i"...)
        
        @return a pipeBase Struct containing:
        - exposureInfoList: a list of ExposureInfo objects, which have the following fields:
            - dataId: data ID of exposure (a dict)
            - coordList: a list of corner coordinates of the exposure (list of afwCoord.IcrsCoord)
            - fwhm: fwhm column
        """
        db = MySQLdb.connect(
            host = self.config.host,
            port = self.config.port,
            user = DbAuth.username(self.config.host, str(self.config.port)),
            passwd = DbAuth.password(self.config.host, str(self.config.port)),
            db = self.config.database,
        )
        cursor = db.cursor()

        if coordList is not None:
            # look for exposures that overlap the specified region

            # create table scisql.Region containing patch region
            coordStrList = ["%s, %s" % (c.getLongitude().asDegrees(),
                                        c.getLatitude().asDegrees()) for c in coordList]
            coordStr = ", ".join(coordStrList)
            coordCmd = "call scisql.scisql_s2CPolyRegion(scisql_s2CPolyToBin(%s), 10)" % (coordStr,)
            cursor.execute(coordCmd)
            cursor.nextset() # ignore one-line result of coordCmd
        
            # find exposures
            queryStr = ("""select %s
                from Science_Ccd_Exposure as ccdExp,
                    (select distinct scienceCcdExposureId
                    from Science_Ccd_Exposure_To_Htm10 as ccdHtm inner join scisql.Region
                    on (ccdHtm.htmId10 between scisql.Region.htmMin and scisql.Region.htmMax)) as idList
                where ccdExp.scienceCcdExposureId = idList.scienceCcdExposureId
                    and filterName = %%s
                    and fwhm < %%s
                """ % ExposureInfo.getColumnNames())
        else:
            # no region specified; look over the whole sky
            queryStr = ("""select %s
                from Science_Ccd_Exposure
                where filterName = %%s
                    and fwhm < %%s
                """ % ExposureInfo.getColumnNames())
        
        if self.config.maxExposures is not None:
            queryStr += " limit %s" % (self.config.maxExposures,)

        dataTuple = (filter, self.config.maxFwhm)

        self.log.info("queryStr=%r; dataTuple=%s" % (queryStr, dataTuple))

        cursor.execute(queryStr, dataTuple)
        exposureInfoList = [ExposureInfo(result) for result in cursor]

        return pipeBase.Struct(
            exposureInfoList = exposureInfoList,
        )
예제 #26
0
                        "--verbose",
                        action="store_true",
                        dest="verbose",
                        help="verbose")

    args = parser.parse_args()

    host = args.host
    port = args.port
    database = args.database

    #
    # get database authorization info
    #

    dbAuth = DbAuth()
    user = dbAuth.username(host, str(port))
    password = dbAuth.password(host, str(port))

    # connect to the database
    dbm = DatabaseManager(host, port, user, password)

    # create the database if it doesn't exist
    if not dbm.dbExists(database):
        dbm.createDb(database)

    # create the LogIngestor, which creates all the tables, and will
    # be used to consolidate file information
    logIngestor = LogIngestor(dbm, database)

    # go through the list of files and ingest them, ignoring any
예제 #27
0
    def run(self, coordList, filter):
        """Select LSST images suitable for coaddition in a particular region

        @param[in] coordList: list of coordinates defining region of interest; if None then select all images
        @param[in] filter: filter (e.g. "g", "r", "i"...)

        @return a pipeBase Struct containing:
        - exposureInfoList: a list of ExposureInfo objects, which have the following fields:
            - dataId: data ID of exposure (a dict)
            - coordList: a list of corner coordinates of the exposure (list of afwCoord.IcrsCoord)
            - fwhm: fwhm column
        """
        db = MySQLdb.connect(
            host=self.config.host,
            port=self.config.port,
            user=DbAuth.username(self.config.host, str(self.config.port)),
            passwd=DbAuth.password(self.config.host, str(self.config.port)),
            db=self.config.database,
        )
        cursor = db.cursor()

        if coordList is not None:
            # look for exposures that overlap the specified region

            # create table scisql.Region containing patch region
            coordStrList = [
                "%s, %s" %
                (c.getLongitude().asDegrees(), c.getLatitude().asDegrees())
                for c in coordList
            ]
            coordStr = ", ".join(coordStrList)
            coordCmd = "call scisql.scisql_s2CPolyRegion(scisql_s2CPolyToBin(%s), 10)" % (
                coordStr, )
            cursor.execute(coordCmd)
            cursor.nextset()  # ignore one-line result of coordCmd

            # find exposures
            queryStr = ("""select %s
                from Science_Ccd_Exposure as ccdExp,
                    (select distinct scienceCcdExposureId
                    from Science_Ccd_Exposure_To_Htm10 as ccdHtm inner join scisql.Region
                    on (ccdHtm.htmId10 between scisql.Region.htmMin and scisql.Region.htmMax)) as idList
                where ccdExp.scienceCcdExposureId = idList.scienceCcdExposureId
                    and filterName = %%s
                    and fwhm < %%s
                """ % ExposureInfo.getColumnNames())
        else:
            # no region specified; look over the whole sky
            queryStr = ("""select %s
                from Science_Ccd_Exposure
                where filterName = %%s
                    and fwhm < %%s
                """ % ExposureInfo.getColumnNames())

        if self.config.maxExposures is not None:
            queryStr += " limit %s" % (self.config.maxExposures, )

        dataTuple = (filter, self.config.maxFwhm)

        self.log.info("queryStr=%r; dataTuple=%s", queryStr, dataTuple)

        cursor.execute(queryStr, dataTuple)
        exposureInfoList = [ExposureInfo(result) for result in cursor]

        return pipeBase.Struct(exposureInfoList=exposureInfoList, )
예제 #28
0
 def setUp(self):
     self.pol = Policy("tests/testDbAuth.paf")
     DbAuth.setPolicy(self.pol)
    def run(self, coordList, filter, strip=None):
        """Select SDSS images suitable for coaddition in a particular region
        
        @param[in] filter: filter for images (one of "u", "g", "r", "i" or "z")
        @param[in] coordList: list of coordinates defining region of interest
        
        @return a pipeBase Struct containing:
        - exposureInfoList: a list of ExposureInfo objects
    
        @raise RuntimeError if filter not one of "u", "g", "r", "i" or "z"
        """
        if filter not in set(("u", "g", "r", "i", "z")):
            raise RuntimeError("filter=%r is an invalid name" % (filter,))

        read_default_file=os.path.expanduser("~/.my.cnf")

        try:
            open(read_default_file)
            kwargs = dict(
                read_default_file=read_default_file,
                )
        except IOError:
            kwargs = dict(
                user = DbAuth.username(self.config.host, str(self.config.port)),
                passwd = DbAuth.password(self.config.host, str(self.config.port)),
            )


        db = MySQLdb.connect(
            host=self.config.host,
            port=self.config.port,
            db=self.config.database,
            **kwargs
        )
        cursor = db.cursor()
        
        columnNames = tuple(ExposureInfo.getColumnNames())
        if not columnNames:
            raise RuntimeError("Bug: no column names")
        queryStr = "select %s " % (", ".join(columnNames),)
        dataTuple = () # tuple(columnNames)

        if coordList is not None:
            # look for exposures that overlap the specified region

            # create table scisql.Region containing patch region
            coordStrList = ["%s, %s" % (c.getLongitude().asDegrees(),
                                        c.getLatitude().asDegrees()) for c in coordList]
            coordStr = ", ".join(coordStrList)
            coordCmd = "call scisql.scisql_s2CPolyRegion(scisql_s2CPolyToBin(%s), 10)" % (coordStr,)
            cursor.execute(coordCmd)
            cursor.nextset() # ignore one-line result of coordCmd

            queryStr += """
from %s as ccdExp,
    (select distinct fieldid
    from SeasonFieldQuality_To_Htm10 as ccdHtm inner join scisql.Region
    on (ccdHtm.htmId10 between scisql.Region.htmMin and scisql.Region.htmMax)
    where ccdHtm.filter = %%s) as idList
where ccdExp.fieldid = idList.fieldid and """ % (self.config.table,)
            dataTuple += (filter,)
        else:
            # no region specified; look over the whole sky
            queryStr += """
from %s as ccdExp where """ % (self.config.table,)
        
        # compute where clauses as a list of (clause, data)
        whereDataList = [
            ("filter = %s", filter),
        ]

        if self.config.camcols is not None:
            whereDataList.append(_whereDataFromList("camcol", self.config.camcols))

        if strip is not None: # None corresponds to query for both strips: no constraint added
            whereDataList.append(("strip = %s", strip))
        
        queryStr += " and ".join(wd[0] for wd in whereDataList)
        dataTuple += tuple(wd[1] for wd in whereDataList)
        
        self.log.info("queryStr=%r; dataTuple=%s" % (queryStr, dataTuple))

        cursor.execute(queryStr, dataTuple)
        exposureInfoList = [ExposureInfo(result) for result in cursor]
        
        runExpInfoSetDict = dict()
        for expInfo in exposureInfoList:
            run = expInfo.dataId["run"]
            expInfoSet = runExpInfoSetDict.get(run)
            if expInfoSet:
                expInfoSet.add(expInfo)
            else:
                runExpInfoSetDict[run] = set([expInfo])
        
        self.log.info("Before quality cuts found %d exposures in %d runs" % \
            (len(exposureInfoList), len(runExpInfoSetDict)))
        
        goodRunSet = set()
        goodExposureInfoList = []
        if self.config.rejectWholeRuns:
            # reject runs for which any exposure does not meet our quality criteria
            # or the run begins or ends in the region
            regionRaRange = None
            regionCtrRa = None
            if coordList is not None:
                regionRaRange = _computeRaRange(coordList)
                regionCtrRa = (regionRaRange[0] + regionRaRange[1]) * 0.5

            numRangeCuts = 0
            for run, expInfoSet in runExpInfoSetDict.iteritems():
                runRaRange = None
                for expInfo in expInfoSet:
                    if self._isBadExposure(expInfo):
                        break
                    
                    if regionRaRange is not None:
                        expRaRange = _computeRaRange(expInfo.coordList, ctrRa=regionCtrRa)
                        if runRaRange is None:
                            runRaRange = expRaRange
                        else:
                            runRaRange = (min(runRaRange[0], expRaRange[0]), max(runRaRange[1], expRaRange[1]))
                else:
                    # all exposures in this run are valid;
                    # if approriate, check that the run starts and ends outside the region
                    if regionRaRange is not None:
                        if (runRaRange[0] > regionRaRange[0]) or (runRaRange[1] < regionRaRange[1]):
                            numRangeCuts += 1
                            continue

                    goodExposureInfoList += list(expInfoSet)
                    goodRunSet.add(run)
            self.log.info("Rejected %d whole runs, including %d for incomplete range" % \
                (len(runExpInfoSetDict) - len(goodRunSet), numRangeCuts))
        else:
            # reject individual exposures which do not meet our quality criteria
            for expInfo in exposureInfoList:
                if not self._isBadExposure(expInfo):
                    goodExposureInfoList.append(expInfo)
                    goodRunSet.add(expInfo.dataId["run"])
            self.log.info("Rejected %d individual exposures" % \
                (len(exposureInfoList) - len(goodExposureInfoList),))

        exposureInfoList = goodExposureInfoList
        
        self.log.info("After quality cuts, found %d exposures in %d runs" % \
            (len(exposureInfoList), len(goodRunSet)))
        
        if exposureInfoList:
            # compute qscore according to RHL's formula and sort by it
            qArr = numpy.array([expInfo.q for expInfo in exposureInfoList])
            qMax = numpy.percentile(qArr, 95.0)
            for expInfo in exposureInfoList:
                expInfo.qscore = (expInfo.q / qMax) - expInfo.quality
            exposureInfoList.sort(key=lambda expInfo: expInfo.qscore)
    
            if self.config.maxExposures is not None:
                # select config.maxExposures exposures with the highest qscore
                exposureInfoList = exposureInfoList[0:self.config.maxExposures]
                self.log.info("After maxExposures cut, found %d exposures" % \
                    (len(exposureInfoList),))
    
            elif self.config.maxRuns is not None:
                # select config.maxRuns runs with the highest median qscore
                # (of those exposures that overlap the patch)
                runQualListDict = dict()
                for expInfo in exposureInfoList:
                    run = expInfo.dataId["run"]
                    qualList = runQualListDict.get(run)
                    if qualList:
                        qualList.append(expInfo.qscore)
                    else:
                        runQualListDict[run] = [expInfo.qscore]
                
                if len(runQualListDict) > self.config.maxRuns:
                    qualRunList = []
                    for run, qualList in runQualListDict.iteritems():
                        runQscore = numpy.median(qualList)
                        qualRunList.append((runQscore, run))
                    qualRunList.sort()
                    qualRunList = qualRunList[0:self.config.maxRuns]
                    
                    goodRunSet = set(qr[1] for qr in qualRunList)
                    exposureInfoList = [ei for ei in exposureInfoList if ei.dataId["run"] in goodRunSet]

        return pipeBase.Struct(
            exposureInfoList = exposureInfoList,
        )
 def tearDown(self):
     DbAuth.resetPolicy()
예제 #31
0
 def tearDown(self):
     DbAuth.setPolicy(Policy())
     del self.pol
예제 #32
0
    def run(self, dataId, coordList):
        """Select flugMag0's of SDSS images for a particular run

        @param[in] dataId: a dataId containing at least a run and filter
        @param[in] coordList: list of coordinates defining region of interest

        @return a pipeBase Struct containing:
        - fluxMagInfoList: a list of FluxMagInfo objects
        """
        argDict = self.runArgDictFromDataId(dataId)
        run = argDict["run"]
        filter = argDict["filter"]

        if filter not in set(("u", "g", "r", "i", "z")):
            raise RuntimeError("filter=%r is an invalid name" % (filter,))

        filterDict = {"u": 0,
                      "g": 1,
                      "r": 2,
                      "i": 3,
                      "z": 4}

        if self._display:
            self.log.info(self.config.database)

        db = MySQLdb.connect(
            host = self.config.host,
            port = self.config.port,
            db = self.config.database,
            user = DbAuth.username(self.config.host, str(self.config.port)),
            passwd = DbAuth.password(self.config.host, str(self.config.port)),
        )
        cursor = db.cursor()

        columnNames = tuple(FluxMagInfo.getColumnNames())

        queryStr = "select %s from Science_Ccd_Exposure where " % (", ".join(columnNames))
        dataTuple = ()

        if coordList is not None:
            # look for exposures that overlap the specified region
            for c in coordList:
                dataTuple += (c.getLongitude().asDegrees(), c.getLatitude().asDegrees())
            queryStr += " scisql_s2PtInCPoly(ra, decl"
            queryStr += ", %s, %s" * len(coordList)
            queryStr += ") = 1 and "

        # compute where clauses as a list of (clause, data)
        whereDataList = [
            ("filterId = %s", filterDict[filter]),
            ("run = %s", run),
        ]

        queryStr += " and ".join(wd[0] for wd in whereDataList)
        dataTuple += tuple(wd[1] for wd in whereDataList)

        queryStr += " order by field desc"
        if self._display:
            self.log.info("queryStr=%r; dataTuple=%s" % (queryStr, dataTuple))

        cursor.execute(queryStr, dataTuple)
        exposureInfoList = [FluxMagInfo(result) for result in cursor]
        if self._display:
            self.log.info("Found %d exposures" % \
                      (len(exposureInfoList)))

        return pipeBase.Struct(
            fluxMagInfoList = exposureInfoList,
        )
예제 #33
0
    def __init__(self, args):
        self.datetime = time.strftime("%Y_%m%d_%H%M%S")
        self.user = pwd.getpwuid(os.getuid())[0]
        if self.user == 'buildbot':
            RunConfiguration.pipeQaBase = re.sub(r'dev', 'buildbot',
                                                 RunConfiguration.pipeQaBase)
            RunConfiguration.pipeQaDir = re.sub(r'dev', 'buildbot',
                                                RunConfiguration.pipeQaDir)
        self.dbUser = DbAuth.username(RunConfiguration.dbHost,
                                      str(RunConfiguration.dbPort))
        self.hostname = socket.getfqdn()
        self.fromAddress = "%s@%s" % (self.user, self.hostname)

        self.options, self.args = self.parseOptions(args)

        # Handle immediate commands
        if self.options.printStatus:
            self.printStatus()
            sys.exit(0)
        if self.options.report is not None:
            self.report(
                os.path.join(self.options.output, self.options.report, "run",
                             "run.log"))
            sys.exit(0)
        if self.options.listRuns:
            self.listRuns(self.options.listRuns)
            sys.exit(0)
        if self.options.listInputs:
            self.listInputs()
            sys.exit(0)
        if self.options.linkLatest is not None:
            self.linkLatest(self.options.linkLatest)
            sys.exit(0)
        if self.options.kill is not None:
            self.kill(self.options.kill)
            sys.exit(0)
        if self.options.hosts is not None:
            self.hosts()
            sys.exit(0)

        if self.arch is None:
            if self.options.arch is None:
                raise RuntimeError("Architecture is required")
            self.arch = self.options.arch

        if re.search(r'[^a-zA-Z0-9_]', self.options.runType):
            raise RuntimeError("Run type '%s' must be one word" %
                               (self.options.runType, ))

        self.collectionName = re.sub(r'\.', '_', RunConfiguration.collection)
        runIdProperties = dict(user=self.user,
                               dbUser=self.dbUser,
                               coll=self.collectionName,
                               runType=self.options.runType,
                               datetime=self.datetime)
        # When resuming a run, use provided runID
        if self.options.resumeRunId is None:
            self.runId = RunConfiguration.runIdPattern % runIdProperties
        else:
            self.runId = self.options.resumeRunId
        runIdProperties['runid'] = self.runId
        dbNamePattern = "%(dbUser)s_%(coll)s_u_%(runid)s"
        self.dbName = dbNamePattern % runIdProperties

        self.inputBase = os.path.join(RunConfiguration.inputBase,
                                      self.options.input)
        self.inputDirectory = os.path.join(self.inputBase,
                                           RunConfiguration.collection)
        self.outputDirectory = os.path.join(self.options.output, self.runId)
        self.outputDirectory = os.path.abspath(self.outputDirectory)
        if self.options.resumeRunId is None:
            if os.path.exists(self.outputDirectory):
                raise RuntimeError("Output directory %s already exists" %
                                   (self.outputDirectory, ))
            os.mkdir(self.outputDirectory)
        elif not os.path.exists(self.outputDirectory):
            raise RuntimeError(
                "Output directory %s does not exist for resumed run" %
                (self.outputDirectory, ))

        self.pipeQaUrl = RunConfiguration.pipeQaBase + self.dbName + "/"

        self.eupsPath = os.environ['EUPS_PATH']
        e = eups.Eups(readCache=False)
        self.setups = dict()
        for product in e.getSetupProducts():
            if product.name != "eups":
                self.setups[product.name] = \
                        re.sub(r'^LOCAL:', "-r ", product.version)

        # TODO -- load policy and apply overrides
        self.options.override = None
예제 #34
0
    def setUp(self):
        # Turn on tracing
        log.Trace.setVerbosity('', 10)
        log.ScreenLog.createDefaultLog(True, log.Log.INFO)

        # Eventually, these should be read from a policy somewhere
        self.dbServer = 'lsst10.ncsa.uiuc.edu'
        self.dbPort = '3306'
        self.dbType = 'mysql'
        if not DbAuth.available(self.dbServer, self.dbPort):
            self.fail("Cannot access database server %s:%s" %
                      (self.dbServer, self.dbPort))
        # Construct test run database name
        self.runId = DbAuth.username(self.dbServer, self.dbPort) +\
                     time.strftime("_test_ap_%y%m%d_%H%M%S", time.gmtime())

        # Tweak these to run on different input data, or with a different number of slices
        self.universeSize = 2
        self.visitId = 708125
        self.filter = 'u'
        self.ra = 333.880166667
        self.dec = -17.7374166667

        self.dbUrlPrefix = ''.join(
            [self.dbType, '://', self.dbServer, ':', self.dbPort, '/'])
        self.dbUrl = self.dbUrlPrefix + self.runId
        self.substitutions = {
            'visitId': self.visitId,
            'filter': self.filter,
            'runId': self.runId
        }
        # Create a database specifically for the test (copy relevant
        # tables from the test_ap database)
        mysqlStatements = [
            """CREATE DATABASE %(runId)s""", """USE %(runId)s""",
            """CREATE TABLE VarObject LIKE test_ap.Object""",
            """CREATE TABLE NonVarObject LIKE test_ap.Object""",
            """CREATE TABLE DIASource LIKE test_ap.DIASource""",
            """CREATE TABLE prv_Filter LIKE test_ap.prv_Filter""",
            """INSERT INTO prv_Filter SELECT * FROM test_ap.prv_Filter""",
            """CREATE TABLE _tmp_v%(visitId)d_DIASource
               LIKE test_ap._tmp_v%(visitId)d_DIASource""",
            """INSERT INTO _tmp_v%(visitId)d_DIASource
               SELECT * FROM test_ap._tmp_v%(visitId)d_DIASource""",
            """CREATE TABLE _tmp_v%(visitId)d_Preds
               LIKE test_ap._tmp_v%(visitId)d_Preds""",
            """INSERT INTO _tmp_v%(visitId)d_Preds
               SELECT * FROM test_ap._tmp_v%(visitId)d_Preds""",
            """CREATE TABLE _tmpl_MatchPair LIKE test_ap._tmpl_MatchPair""",
            """CREATE TABLE _tmpl_IdPair LIKE test_ap._tmpl_IdPair""",
            """CREATE TABLE _tmpl_InMemoryObject LIKE test_ap._tmpl_InMemoryObject""",
            """CREATE TABLE _tmpl_InMemoryMatchPair LIKE test_ap._tmpl_InMemoryMatchPair""",
            """CREATE TABLE _tmpl_InMemoryId LIKE test_ap._tmpl_InMemoryId""",
            """CREATE TABLE _ap_DIASourceToObjectMatches LIKE test_ap._ap_DIASourceToObjectMatches""",
            """CREATE TABLE _ap_PredToDIASourceMatches LIKE test_ap._ap_PredToDIASourceMatches""",
            """CREATE TABLE _ap_DIASourceToNewObject LIKE test_ap._ap_DIASourceToNewObject""",
            """CREATE TABLE _mops_Prediction LIKE test_ap._mops_Prediction"""
        ]
        db = DbStorage()
        db.setPersistLocation(LogicalLocation(self.dbUrlPrefix + 'test_ap'))
        try:
            for stmt in mysqlStatements:
                db.executeSql(stmt % self.substitutions)

            # Specify list of stages ...
            self.stages = [
                ap.LoadStage, InputStage, ap.MatchDiaSourcesStage, OutputStage,
                InputStage, ap.MatchMopsPredsStage, OutputStage, ap.StoreStage
            ]

            # and read in stage policy for each stage
            policyDir = os.path.join(os.environ['AP_DIR'], 'pipeline',
                                     'examples', 'policy')
            self.policies = [
                Policy(os.path.join(policyDir, 'LoadStage.paf')),
                Policy(os.path.join(policyDir,
                                    'MatchDiaSourcesStageInput.paf')), None,
                Policy(
                    os.path.join(policyDir, 'MatchDiaSourcesStageOutput.paf')),
                Policy(os.path.join(policyDir,
                                    'MatchMopsPredsStageInput.paf')), None,
                Policy(os.path.join(policyDir,
                                    'MatchMopsPredsStageOutput.paf')),
                Policy(os.path.join(policyDir, 'StoreStage.paf'))
            ]

            # construct PropertySet for string interpolation
            psSubs = PropertySet()
            psSubs.setInt('visitId', self.visitId)
            psSubs.setString('runId', self.runId)
            psSubs.setString('filter', self.filter)
            psSubs.setString('work', '.')
            psSubs.setString('input', '/tmp')
            psSubs.setString('output', '/tmp')
            psSubs.setString('update', '/tmp')
            psSubs.setString('dbUrl', self.dbUrl)
            LogicalLocation.setLocationMap(psSubs)
        except:
            # cleanup database in case of error
            db.executeSql("DROP DATABASE %(runId)s" % self.substitutions)
            raise
예제 #35
0
    def analyzeLogs(self, runId, inProgress=False):
        import MySQLdb
        jobStartRegex = re.compile(r"Processing job:"
                                   r"(\s+raft=(?P<raft>\d,\d)"
                                   r"|\s+sensor=(?P<sensor>\d,\d)"
                                   r"|\s+type=calexp"
                                   r"|\s+visit=(?P<visit>\d+)){4}")

        host = RunConfiguration.dbHost
        port = RunConfiguration.dbPort
        with MySQLdb.connect(host=host,
                             port=port,
                             user=self.dbUser,
                             passwd=DbAuth.password(host, str(port))) as conn:
            runpat = '%' + runId + '%'
            conn.execute("SHOW DATABASES LIKE %s", (runpat, ))
            ret = conn.fetchall()
            if ret is None or len(ret) == 0:
                raise NoMatchError("No match for run %s" % (runId, ))
            elif len(ret) > 1:
                raise RuntimeError("Multiple runs match:\n" +
                                   str([r[0] for r in ret]))
            dbName = ret[0][0]

        result = ""
        try:
            conn = MySQLdb.connect(host=host,
                                   port=port,
                                   user=self.dbUser,
                                   passwd=DbAuth.password(host, str(port)),
                                   db=dbName)

            cursor = conn.cursor()
            cursor.execute("""SELECT TIMESTAMP, timereceived FROM Logs
                WHERE id = (SELECT MIN(id) FROM Logs)""")
            row = cursor.fetchone()
            if row is None:
                if inProgress:
                    return "No log entries yet\n"
                else:
                    return "*** No log entries written\n"
            startTime, start = row
            result += "First orca log entry: %s\n" % (start, )

            cursor = conn.cursor()
            cursor.execute("""SELECT TIMESTAMP, timereceived FROM Logs
                WHERE id = (SELECT MAX(id) FROM Logs)""")
            stopTime, stop = cursor.fetchone()
            result += "Last orca log entry: %s\n" % (stop, )
            elapsed = long(stopTime) - long(startTime)
            elapsedHr = elapsed / 3600 / 1000 / 1000 / 1000
            elapsed -= elapsedHr * 3600 * 1000 * 1000 * 1000
            elapsedMin = elapsed / 60 / 1000 / 1000 / 1000
            elapsed -= elapsedMin * 60 * 1000 * 1000 * 1000
            elapsedSec = elapsed / 1.0e9
            result += "Orca elapsed time: %d:%02d:%06.3f\n" % (
                elapsedHr, elapsedMin, elapsedSec)

            cursor = conn.cursor()
            cursor.execute("""
                SELECT COUNT(DISTINCT workerid) FROM
                    (SELECT workerid FROM Logs LIMIT 10000) AS sample""")
            nPipelines = cursor.fetchone()[0]
            result += "%d pipelines used\n" % (nPipelines, )

            cursor = conn.cursor()
            cursor.execute("""
                SELECT CASE gid
                    WHEN 1 THEN 'pipeline shutdowns seen'
                    WHEN 2 THEN 'CCDs attempted'
                    WHEN 3 THEN 'src writes'
                    WHEN 4 THEN 'calexp writes'
                END AS descr, COUNT(*) FROM (
                    SELECT CASE
                        WHEN COMMENT LIKE 'Processing job:% visit=0'
                        THEN 1
                        WHEN COMMENT LIKE 'Processing job:%'
                            AND COMMENT NOT LIKE '% visit=0'
                        THEN 2
                        WHEN COMMENT LIKE 'Ending write to BoostStorage%/src%'
                        THEN 3
                        WHEN COMMENT LIKE 'Ending write to FitsStorage%/calexp%'
                        THEN 4
                        ELSE 0
                    END AS gid
                    FROM Logs
                ) AS stats WHERE gid > 0 GROUP BY gid""")
            nShutdown = 0
            for d, n in cursor.fetchall():
                result += "%d %s\n" % (n, d)
                if d == 'pipeline shutdowns seen':
                    nShutdown = n
            if nShutdown != nPipelines:
                if not inProgress:
                    if nShutdown == 0:
                        result += "\n*** No pipelines were shut down properly\n"
                    else:
                        result += "\n*** Shutdowns do not match pipelines\n"
                cursor = conn.cursor()
                cursor.execute("""
                    SELECT workerid, COMMENT
                    FROM Logs JOIN
                    (SELECT MAX(id) AS last FROM Logs GROUP BY workerid) AS a
                    ON (Logs.id = a.last)""")
                for worker, msg in cursor.fetchall():
                    if inProgress:
                        result += "Pipeline %s last status: %s\n" % (worker,
                                                                     msg)
                    else:
                        result += "Pipeline %s ended with: %s\n" % (worker,
                                                                    msg)

            cursor = conn.cursor()
            cursor.execute("""
SELECT COUNT(*) FROM Logs
WHERE
(
    	COMMENT LIKE '%rror%'
	OR COMMENT LIKE '%xception%'
	OR COMMENT LIKE '%arning%'
	OR COMMENT LIKE 'Fail'
	OR COMMENT LIKE 'fail'
)
AND COMMENT NOT LIKE '%failureStage%'
AND COMMENT NOT LIKE '%failure stage%'
AND COMMENT NOT LIKE 'failSerialName%'
AND COMMENT NOT LIKE 'failParallelName%'
AND COMMENT NOT LIKE 'Distortion fitter failed to improve%'
AND COMMENT NOT LIKE '%magnitude error column%'
AND COMMENT NOT LIKE '%errorFlagged%'
AND COMMENT NOT LIKE 'Skipping process due to error'
            """)
            result += "%s failures seen\n" % cursor.fetchone()

            cursor = conn.cursor(MySQLdb.cursors.DictCursor)
            cursor.execute("""
                SELECT * FROM Logs
                WHERE COMMENT LIKE 'Processing job:%'
                    OR (
                        (
                            COMMENT LIKE '%rror%'
                            OR COMMENT LIKE '%xception%'
                            OR COMMENT LIKE '%arning%'
                            OR COMMENT LIKE '%Fail%'
                            OR COMMENT LIKE '%fail%'
                        )
                        AND COMMENT NOT LIKE '%failureStage%'
                        AND COMMENT NOT LIKE '%failure stage%'
                        AND COMMENT NOT LIKE 'failSerialName%'
                        AND COMMENT NOT LIKE 'failParallelName%'
                        AND COMMENT NOT LIKE 'Distortion fitter failed to improve%'
                        AND COMMENT NOT LIKE '%magnitude error column%'
                        AND COMMENT NOT LIKE '%errorFlagged%'
                        AND COMMENT NOT LIKE 'Skipping process due to error'
                    )
                ORDER BY id;""")
            jobs = dict()
            for d in cursor.fetchall():
                match = jobStartRegex.search(d['COMMENT'])
                if match:
                    jobs[d['workerid']] = "Visit %s Raft %s Sensor %s" % (
                        match.group("visit"), match.group("raft"),
                        match.group("sensor"))
                elif not d['COMMENT'].startswith('Processing job:'):
                    if jobs.has_key(d['workerid']):
                        job = jobs[d['workerid']]
                    else:
                        job = "unknown"
                    result += "\n*** Error in %s in stage %s on %s:\n" % (
                        job, d['stagename'], d['workerid'])
                    lines = d['COMMENT'].split('\n')
                    i = len(lines) - 1
                    message = lines[i].strip()
                    # Skip blank lines at end
                    while i > 0 and message == "":
                        i -= 1
                        message = lines[i].strip()
                    # Go back until we find a traceback line with " in "
                    while i > 0 and lines[i].find(" in ") == -1:
                        i -= 1
                        message = lines[i].strip() + "\n" + message
                    result += message + "\n"

        finally:
            conn.close()

        outputDir = os.path.join(self.options.output, runId)
        logFile = os.path.join(outputDir, "run", "unifiedPipeline.log")
        with open(logFile, "r") as log:
            try:
                log.seek(-500, 2)
            except:
                pass
            tail = log.read(500)
            if not tail.endswith("logger handled...and...done!\n"):
                result += "\n*** Unified pipeline log file\n"
                result += "(last 500 bytes)... " + tail + "\n"

        for logFile in glob.glob(
                os.path.join(outputDir, "work", "*", "launch.log")):
            with open(logFile, "r") as log:
                try:
                    log.seek(-500, 2)
                except:
                    pass
                tail = log.read(500)
                if not re.search(r"harness.runPipeline: workerid \w+$", tail) \
                        and not re.search(r"Applying aperture", tail) \
                        and tail != "done. Now starting job office\n":
                    result += "\n*** " + logFile + "\n"
                    result += "(last 500 bytes)... " + tail + "\n"

        return result
예제 #36
0
    def run(self, dataId, coordList):
        """Select flugMag0's of SDSS images for a particular run

        @param[in] dataId: a dataId containing at least a run and filter
        @param[in] coordList: list of coordinates defining region of interest

        @return a pipeBase Struct containing:
        - fluxMagInfoList: a list of FluxMagInfo objects
        """
        argDict = self.runArgDictFromDataId(dataId)
        run = argDict["run"]
        filter = argDict["filter"]

        if filter not in set(("u", "g", "r", "i", "z")):
            raise RuntimeError("filter=%r is an invalid name" % (filter,))

        filterDict = {"u": 0,
                      "g": 1,
                      "r": 2,
                      "i": 3,
                      "z": 4}

        if self._display:
            self.log.info(self.config.database)

        db = MySQLdb.connect(
            host=self.config.host,
            port=self.config.port,
            db=self.config.database,
            user=DbAuth.username(self.config.host, str(self.config.port)),
            passwd=DbAuth.password(self.config.host, str(self.config.port)),
        )
        cursor = db.cursor()

        columnNames = tuple(FluxMagInfo.getColumnNames())

        queryStr = "select %s from Science_Ccd_Exposure where " % (", ".join(columnNames))
        dataTuple = ()

        if coordList is not None:
            # look for exposures that overlap the specified region
            for c in coordList:
                dataTuple += (c.getLongitude().asDegrees(), c.getLatitude().asDegrees())
            queryStr += " scisql_s2PtInCPoly(ra, decl"
            queryStr += ", %s, %s" * len(coordList)
            queryStr += ") = 1 and "

        # compute where clauses as a list of (clause, data)
        whereDataList = [
            ("filterId = %s", filterDict[filter]),
            ("run = %s", run),
        ]

        queryStr += " and ".join(wd[0] for wd in whereDataList)
        dataTuple += tuple(wd[1] for wd in whereDataList)

        queryStr += " order by field desc"
        if self._display:
            self.log.info("queryStr=%r; dataTuple=%s" % (queryStr, dataTuple))

        cursor.execute(queryStr, dataTuple)
        exposureInfoList = [FluxMagInfo(result) for result in cursor]
        if self._display:
            self.log.info("Found %d exposures" %
                          (len(exposureInfoList)))

        return pipeBase.Struct(
            fluxMagInfoList=exposureInfoList,
        )
예제 #37
0
def report():
    basename = os.path.basename(sys.argv[0])

    parser = argparse.ArgumentParser(
        prog=basename,
        description='''A statistics reporting utility.  Use to print
                            out information about what happened during a run.
                            Takes as an argument previously ingested run
                            information one of the ingest utilities  in
                            a named database.''',
        epilog='''example:
report.py -H kaboom.ncsa.illinois.edu -p 3303 -d srp_2013_0601_140432 -S''')
    parser.add_argument("-H",
                        "--host",
                        action="store",
                        default=None,
                        dest="host",
                        help="mysql server host",
                        type=str,
                        required=True)
    parser.add_argument("-p",
                        "--port",
                        action="store",
                        default=3306,
                        dest="port",
                        help="mysql server port",
                        type=int)
    parser.add_argument("-d",
                        "--database",
                        action="store",
                        default=None,
                        dest="database",
                        help="database name",
                        type=str,
                        required=True)
    parser.add_argument("-I",
                        "--submits-per-interval",
                        action="store_true",
                        default=None,
                        dest="submits",
                        help="number of submits to queue per interval")
    parser.add_argument("-S",
                        "--slots-used-each-second",
                        action="store_true",
                        default=None,
                        dest="slots",
                        help="slots used each second")

    parser.add_argument("-N",
                        "--slots-used-each-interval",
                        type=int,
                        default=-1,
                        dest="interval",
                        help="slots used each interval")

    parser.add_argument("-L",
                        "--local-time-zone",
                        action="store_true",
                        default=False,
                        dest="localTimeZone",
                        help="output dates converted to local time zone")

    parser.add_argument("-v",
                        "--verbose",
                        action="store_true",
                        dest="verbose",
                        help="verbose")

    args = parser.parse_args()

    host = args.host
    port = args.port
    database = args.database

    #
    # get database authorization info
    #
    dbAuth = DbAuth()
    user = dbAuth.username(host, str(port))
    password = dbAuth.password(host, str(port))

    # connect to the database
    dbm = DatabaseManager(host, port, user, password)

    dbm.execCommand0('use ' + database)

    # command line arguments
    values = None
    submitTimes = SubmissionTimes(dbm)
    entries = submitTimes.getEntries()
    r = Report(dbm, args.localTimeZone)
    if args.submits:
        submitsPerInterval = SubmitsPerInterval(dbm, 1)
        values = submitsPerInterval.getValues()
        r.writePerTimeIntervals(values)
    elif args.slots:
        slotsPerSecond = SlotsPerSecond(dbm, entries)
        values = slotsPerSecond.getValues()
        r.writePerTimeIntervals(values)
    elif args.interval > -1:
        slotsPerInterval = SlotsPerInterval(dbm, entries, args.interval)
        values = slotsPerInterval.getValues()
        r.writePerTimeIntervals(values)
    else:
        printSummary(r)
    dbm.close()
예제 #38
0
    def run(self, coordList, filter, strip=None):
        """Select SDSS images suitable for coaddition in a particular region

        @param[in] filter: filter for images (one of "u", "g", "r", "i" or "z")
        @param[in] coordList: list of coordinates defining region of interest

        @return a pipeBase Struct containing:
        - exposureInfoList: a list of ExposureInfo objects

        @raise RuntimeError if filter not one of "u", "g", "r", "i" or "z"
        """
        if filter not in set(("u", "g", "r", "i", "z")):
            raise RuntimeError("filter=%r is an invalid name" % (filter, ))

        read_default_file = os.path.expanduser("~/.my.cnf")

        try:
            open(read_default_file)
            kwargs = dict(read_default_file=read_default_file, )
        except IOError:
            kwargs = dict(
                user=DbAuth.username(self.config.host, str(self.config.port)),
                passwd=DbAuth.password(self.config.host,
                                       str(self.config.port)),
            )

        db = MySQLdb.connect(host=self.config.host,
                             port=self.config.port,
                             db=self.config.database,
                             **kwargs)
        cursor = db.cursor()

        columnNames = tuple(ExposureInfo.getColumnNames())
        if not columnNames:
            raise RuntimeError("Bug: no column names")
        queryStr = "select %s " % (", ".join(columnNames), )
        dataTuple = ()  # tuple(columnNames)

        if coordList is not None:
            # look for exposures that overlap the specified region

            # create table scisql.Region containing patch region
            coordStrList = [
                "%s, %s" %
                (c.getLongitude().asDegrees(), c.getLatitude().asDegrees())
                for c in coordList
            ]
            coordStr = ", ".join(coordStrList)
            coordCmd = "call scisql.scisql_s2CPolyRegion(scisql_s2CPolyToBin(%s), 10)" % (
                coordStr, )
            cursor.execute(coordCmd)
            cursor.nextset()  # ignore one-line result of coordCmd

            queryStr += """
from %s as ccdExp,
    (select distinct fieldid
    from SeasonFieldQuality_To_Htm10 as ccdHtm inner join scisql.Region
    on (ccdHtm.htmId10 between scisql.Region.htmMin and scisql.Region.htmMax)
    where ccdHtm.filter = %%s) as idList
where ccdExp.fieldid = idList.fieldid and """ % (self.config.table, )
            dataTuple += (filter, )
        else:
            # no region specified; look over the whole sky
            queryStr += """
from %s as ccdExp where """ % (self.config.table, )

        # compute where clauses as a list of (clause, data)
        whereDataList = [
            ("filter = %s", filter),
        ]

        if self.config.camcols is not None:
            whereDataList.append(
                _whereDataFromList("camcol", self.config.camcols))

        if strip is not None:  # None corresponds to query for both strips: no constraint added
            whereDataList.append(("strip = %s", strip))

        queryStr += " and ".join(wd[0] for wd in whereDataList)
        dataTuple += tuple(wd[1] for wd in whereDataList)

        self.log.info("queryStr=%r; dataTuple=%s" % (queryStr, dataTuple))

        cursor.execute(queryStr, dataTuple)
        exposureInfoList = [ExposureInfo(result) for result in cursor]

        runExpInfoSetDict = dict()

        for expInfo in exposureInfoList:
            run = expInfo.dataId["run"]
            expInfoSet = runExpInfoSetDict.get(run)
            if expInfoSet:
                expInfoSet.add(expInfo)
            else:
                runExpInfoSetDict[run] = set([expInfo])

        self.log.info("Before quality cuts found %d exposures in %d runs" %
                      (len(exposureInfoList), len(runExpInfoSetDict)))

        goodRunSet = set()
        goodExposureInfoList = []
        if self.config.rejectWholeRuns:
            # reject runs for which any exposure does not meet our quality criteria
            # or the run begins or ends in the region
            regionRaRange = None
            regionCtrRa = None
            if coordList is not None:
                regionRaRange = _computeRaRange(coordList)
                regionCtrRa = (regionRaRange[0] + regionRaRange[1]) * 0.5

            numRangeCuts = 0
            for run, expInfoSet in runExpInfoSetDict.items():
                runRaRange = None
                for expInfo in expInfoSet:
                    if self._isBadExposure(expInfo):
                        break

                    if regionRaRange is not None:
                        expRaRange = _computeRaRange(expInfo.coordList,
                                                     ctrRa=regionCtrRa)
                        if runRaRange is None:
                            runRaRange = expRaRange
                        else:
                            runRaRange = (min(runRaRange[0], expRaRange[0]),
                                          max(runRaRange[1], expRaRange[1]))
                else:
                    # all exposures in this run are valid;
                    # if approriate, check that the run starts and ends outside the region
                    if regionRaRange is not None:
                        if (runRaRange[0] > regionRaRange[0]) or (
                                runRaRange[1] < regionRaRange[1]):
                            numRangeCuts += 1
                            continue

                    goodExposureInfoList += list(expInfoSet)
                    goodRunSet.add(run)
            self.log.info(
                "Rejected %d whole runs, including %d for incomplete range" %
                (len(runExpInfoSetDict) - len(goodRunSet), numRangeCuts))
        else:
            # reject individual exposures which do not meet our quality criteria
            for expInfo in exposureInfoList:
                if not self._isBadExposure(expInfo):
                    goodExposureInfoList.append(expInfo)
                    goodRunSet.add(expInfo.dataId["run"])
            self.log.info(
                "Rejected %d individual exposures" %
                (len(exposureInfoList) - len(goodExposureInfoList), ))

        exposureInfoList = goodExposureInfoList

        self.log.info("After quality cuts, found %d exposures in %d runs" %
                      (len(exposureInfoList), len(goodRunSet)))

        if exposureInfoList:
            # compute qscore according to RHL's formula and sort by it
            qArr = np.array([expInfo.q for expInfo in exposureInfoList])
            qMax = np.percentile(qArr, 95.0)
            for expInfo in exposureInfoList:
                expInfo.qscore = (expInfo.q / qMax) - expInfo.quality
            exposureInfoList.sort(key=lambda expInfo: expInfo.qscore)

            if self.config.maxExposures is not None:
                # select config.maxExposures exposures with the highest qscore
                exposureInfoList = exposureInfoList[0:self.config.maxExposures]
                self.log.info("After maxExposures cut, found %d exposures" %
                              (len(exposureInfoList), ))

            elif self.config.maxRuns is not None:
                # select config.maxRuns runs with the highest median qscore
                # (of those exposures that overlap the patch)
                runQualListDict = dict()
                for expInfo in exposureInfoList:
                    run = expInfo.dataId["run"]
                    qualList = runQualListDict.get(run)
                    if qualList:
                        qualList.append(expInfo.qscore)
                    else:
                        runQualListDict[run] = [expInfo.qscore]

                if len(runQualListDict) > self.config.maxRuns:
                    qualRunList = []
                    for run, qualList in runQualListDict.items():
                        runQscore = np.median(qualList)
                        qualRunList.append((runQscore, run))
                    qualRunList.sort()
                    qualRunList = qualRunList[0:self.config.maxRuns]

                    goodRunSet = set(qr[1] for qr in qualRunList)
                    exposureInfoList = [
                        ei for ei in exposureInfoList
                        if ei.dataId["run"] in goodRunSet
                    ]

        return pipeBase.Struct(exposureInfoList=exposureInfoList, )
예제 #39
0
                    database = os.path.join(outDir, database)
                self.database = database
                self.driver = driver
            else:
                # If not sqlite, then 'outDir' doesn't make much sense.
                self.database = database
                self.driver = driver
                self.host = host
                self.port = port

        if self.driver == 'sqlite':
            dbAddress = url.URL(self.driver, database=self.database)
        else:
            dbAddress = url.URL(
                self.driver,
                username=DbAuth.username(self.host, str(self.port)),
                password=DbAuth.password(self.host, str(self.port)),
                host=self.host,
                port=self.port,
                database=self.database)

        engine = create_engine(dbAddress, echo=verbose)
        self.Session = sessionmaker(bind=engine)
        self.session = self.Session()
        # Create the tables, if they don't already exist.
        try:
            Base.metadata.create_all(engine)
        except DatabaseError:
            raise ValueError(
                "Cannot create a %s database at %s. Check directory exists." %
                (self.driver, self.database))
예제 #40
0
    def setUp(self):
        # Turn on tracing
        log.Trace.setVerbosity('', 10)
        log.ScreenLog.createDefaultLog(True, log.Log.INFO)

        # Eventually, these should be read from a policy somewhere
        self.dbServer = 'lsst10.ncsa.uiuc.edu'
        self.dbPort = '3306'
        self.dbType = 'mysql'
        if not DbAuth.available(self.dbServer, self.dbPort):
            self.fail("Cannot access database server %s:%s" % (self.dbServer, self.dbPort))
        # Construct test run database name
        self.runId = DbAuth.username(self.dbServer, self.dbPort) +\
                     time.strftime("_test_ap_%y%m%d_%H%M%S", time.gmtime())

        # Tweak these to run on different input data, or with a different number of slices
        self.universeSize = 2
        self.visitId = 708125
        self.filter = 'u'
        self.ra = 333.880166667
        self.dec = -17.7374166667

        self.dbUrlPrefix = ''.join([self.dbType, '://', self.dbServer, ':', self.dbPort, '/'])
        self.dbUrl = self.dbUrlPrefix + self.runId
        self.substitutions = { 'visitId': self.visitId,
                               'filter': self.filter,
                               'runId': self.runId }
        # Create a database specifically for the test (copy relevant
        # tables from the test_ap database)
        mysqlStatements = [
            """CREATE DATABASE %(runId)s""",
            """USE %(runId)s""",
            """CREATE TABLE VarObject LIKE test_ap.Object""",
            """CREATE TABLE NonVarObject LIKE test_ap.Object""",
            """CREATE TABLE DIASource LIKE test_ap.DIASource""",
            """CREATE TABLE prv_Filter LIKE test_ap.prv_Filter""",
            """INSERT INTO prv_Filter SELECT * FROM test_ap.prv_Filter""",
            """CREATE TABLE _tmp_v%(visitId)d_DIASource
               LIKE test_ap._tmp_v%(visitId)d_DIASource""",
            """INSERT INTO _tmp_v%(visitId)d_DIASource
               SELECT * FROM test_ap._tmp_v%(visitId)d_DIASource""",
            """CREATE TABLE _tmp_v%(visitId)d_Preds
               LIKE test_ap._tmp_v%(visitId)d_Preds""",
            """INSERT INTO _tmp_v%(visitId)d_Preds
               SELECT * FROM test_ap._tmp_v%(visitId)d_Preds""",
            """CREATE TABLE _tmpl_MatchPair LIKE test_ap._tmpl_MatchPair""",
            """CREATE TABLE _tmpl_IdPair LIKE test_ap._tmpl_IdPair""",
            """CREATE TABLE _tmpl_InMemoryObject LIKE test_ap._tmpl_InMemoryObject""",
            """CREATE TABLE _tmpl_InMemoryMatchPair LIKE test_ap._tmpl_InMemoryMatchPair""",
            """CREATE TABLE _tmpl_InMemoryId LIKE test_ap._tmpl_InMemoryId""",
            """CREATE TABLE _ap_DIASourceToObjectMatches LIKE test_ap._ap_DIASourceToObjectMatches""",
            """CREATE TABLE _ap_PredToDIASourceMatches LIKE test_ap._ap_PredToDIASourceMatches""",
            """CREATE TABLE _ap_DIASourceToNewObject LIKE test_ap._ap_DIASourceToNewObject""",
            """CREATE TABLE _mops_Prediction LIKE test_ap._mops_Prediction"""
        ]
        db = DbStorage()
        db.setPersistLocation(LogicalLocation(self.dbUrlPrefix + 'test_ap'))
        try:
            for stmt in mysqlStatements:
                db.executeSql(stmt % self.substitutions)
            
            # Specify list of stages ...
            self.stages = [ ap.LoadStage,
                            InputStage,
                            ap.MatchDiaSourcesStage,
                            OutputStage,
                            InputStage,
                            ap.MatchMopsPredsStage,
                            OutputStage,
                            ap.StoreStage ]

            # and read in stage policy for each stage
            policyDir = os.path.join(os.environ['AP_DIR'], 'pipeline', 'examples', 'policy')
            self.policies = [ Policy(os.path.join(policyDir,'LoadStage.paf')),
                              Policy(os.path.join(policyDir,'MatchDiaSourcesStageInput.paf')),
                              None,
                              Policy(os.path.join(policyDir,'MatchDiaSourcesStageOutput.paf')),
                              Policy(os.path.join(policyDir,'MatchMopsPredsStageInput.paf')),
                              None,
                              Policy(os.path.join(policyDir,'MatchMopsPredsStageOutput.paf')),
                              Policy(os.path.join(policyDir,'StoreStage.paf')) ]

            # construct PropertySet for string interpolation
            psSubs = PropertySet()
            psSubs.setInt('visitId', self.visitId)
            psSubs.setString('runId', self.runId)
            psSubs.setString('filter', self.filter)
            psSubs.setString('work', '.')
            psSubs.setString('input', '/tmp')
            psSubs.setString('output', '/tmp')
            psSubs.setString('update', '/tmp')
            psSubs.setString('dbUrl', self.dbUrl)
            LogicalLocation.setLocationMap(psSubs)
        except:
            # cleanup database in case of error
            db.executeSql("DROP DATABASE %(runId)s" % self.substitutions)
            raise
from lsst.daf.persistence import DbAuth
import lsst.afw.coord as afwCoord
import lsst.afw.geom as afwGeom
from lsst.obs.sdss.selectSdssImages import SelectSdssImagesTask

Database = "test_select_sdss_images"

config = SelectSdssImagesTask.ConfigClass()

# Some of the tests require loading SDSS images from "lsst-db.ncsa.illinois.edu" and
# require a login name and password. If the test is unable to connect to the external data,
# some of the tests are skipped.
noConnectionStr = ""
noConnection = False
try:
    DbAuth.username(config.host, str(config.port)),
except Exception as e:
    noConnectionStr = (
        "No remote connection to SDSS image database\n"
        "Did not find host={0}, port={1} in your db-auth file;\n"
        "Warning generated: {2} ".format(config.host, str(config.port), e))
    noConnection = True


def getCoordList(minRa, minDec, maxRa, maxDec):
    degList = (
        (minRa, minDec),
        (maxRa, minDec),
        (maxRa, maxDec),
        (minRa, maxDec),
    )
 def tearDown(self):
     DbAuth.resetPolicy()
예제 #43
0
    def __init__(self,
                 outDir=None,
                 database=None,
                 driver='sqlite',
                 host=None,
                 port=None,
                 verbose=False):
        """
        Instantiate the results database, creating metrics, plots and summarystats tables.
        """
        # Connect to database
        # for sqlite, connecting to non-existent database creates it automatically
        if database is None:
            # Using default value for database name, should specify directory.
            if outDir is None:
                outDir = '.'
            # Check for output directory, make if needed.
            if not os.path.isdir(outDir):
                try:
                    os.makedirs(outDir)
                except OSError as msg:
                    raise OSError(
                        msg,
                        '\n  (If this was the database file (not outDir), '
                        'remember to use kwarg "database")')
            self.database = os.path.join(outDir, 'resultsDb_sqlite.db')
            self.driver = 'sqlite'
        else:
            if driver == 'sqlite':
                # Using non-default database, but may also specify directory root.
                if outDir is not None:
                    database = os.path.join(outDir, database)
                self.database = database
                self.driver = driver
            else:
                # If not sqlite, then 'outDir' doesn't make much sense.
                self.database = database
                self.driver = driver
                self.host = host
                self.port = port

        if self.driver == 'sqlite':
            dbAddress = url.URL(self.driver, database=self.database)
        else:
            dbAddress = url.URL(
                self.driver,
                username=DbAuth.username(self.host, str(self.port)),
                password=DbAuth.password(self.host, str(self.port)),
                host=self.host,
                port=self.port,
                database=self.database)

        engine = create_engine(dbAddress, echo=verbose)
        self.Session = sessionmaker(bind=engine)
        self.session = self.Session()
        # Create the tables, if they don't already exist.
        try:
            Base.metadata.create_all(engine)
        except DatabaseError:
            raise ValueError(
                "Cannot create a %s database at %s. Check directory exists." %
                (self.driver, self.database))
        self.slen = 1024
예제 #44
0
from lsst.daf.persistence import DbAuth
import lsst.afw.geom as afwGeom
import lsst.afw.image as afwImage
import lsst.afw.math as afwMath
import lsst.afw.coord as afwCoord
from lsst.obs.sdss.scaleSdssZeroPoint import ScaleSdssZeroPointTask
from lsst.obs.sdss.selectFluxMag0 import SelectSdssFluxMag0Task

config = ScaleSdssZeroPointTask.ConfigClass()

# Some of the tests require loading SDSS images from "lsst-db.ncsa.illinois.edu" and
# require a login name and password. If the test is unable to connect to the external data,
# some of the tests are skipped.
noConnection = False
try:
    DbAuth.username(config.selectFluxMag0.host,
                    str(config.selectFluxMag0.port))
except Exception as e:
    print(
        "Did not find host={0}, port={1} in your db-auth file; \nWarning generated: {2} "
        .format(config.selectFluxMag0.host, str(config.selectFluxMag0.port),
                e),
        file=sys.stderr)
    noConnection = True


class WrapDataId(object):
    """A container for dataId that looks like dataRef to computeImageScaler()
    """
    def __init__(self, dataId):
        self.dataId = dataId
예제 #45
0
                if outDir is not None:
                    database = os.path.join(outDir, database)
                self.database = database
                self.driver = driver
            else:
                # If not sqlite, then 'outDir' doesn't make much sense.
                self.database = database
                self.driver = driver
                self.host = host
                self.port = port

        if self.driver == 'sqlite':
            dbAddress = url.URL(self.driver, database=self.database)
        else:
            dbAddress = url.URL(self.driver,
                            username=DbAuth.username(self.host, str(self.port)),
                            password=DbAuth.password(self.host, str(self.port)),
                            host=self.host,
                            port=self.port,
                            database=self.database)

        engine = create_engine(dbAddress, echo=verbose)
        self.Session = sessionmaker(bind=engine)
        self.session = self.Session()
        # Create the tables, if they don't already exist.
        try:
            Base.metadata.create_all(engine)
        except DatabaseError:
            raise ValueError("Cannot create a %s database at %s. Check directory exists." %(self.driver, self.database))
        self.slen = 1024
        self.stype = 'S%d' %(self.slen)
    def analyzeLogs(self, runId, inProgress=False):
        import MySQLdb
        jobStartRegex = re.compile(
                r"Processing job:"
                r"(\s+filter=(?P<filter>\w)"
                r"|\s+field=(?P<field>\d+)"
                r"|\s+camcol=(?P<camcol>\d)"
                r"|\s+run=(?P<run>\d+)"
                r"|\s+type=calexp){5}"
        )

        host = RunConfiguration.dbHost
        port = RunConfiguration.dbPort
        with MySQLdb.connect(
                host=host,
                port=port,
                user=self.dbUser,
                passwd=DbAuth.password(host, str(port))) as conn:
            runpat = '%' + runId + '%'
            conn.execute("SHOW DATABASES LIKE %s", (runpat,))
            ret = conn.fetchall()
            if ret is None or len(ret) == 0:
                raise NoMatchError("No match for run %s" % (runId,))
            elif len(ret) > 1:
                raise RuntimeError("Multiple runs match:\n" +
                        str([r[0] for r in ret]))
            dbName = ret[0][0]

        result = ""
        try:
            conn = MySQLdb.connect(
                host=host,
                port=port,
                user=self.dbUser,
                passwd=DbAuth.password(host, str(port)),
                db=dbName)

            cursor = conn.cursor()
            cursor.execute("""SELECT TIMESTAMP, timereceived FROM Logs
                WHERE id = (SELECT MIN(id) FROM Logs)""")
            row = cursor.fetchone()
            if row is None:
                if inProgress:
                    return "No log entries yet\n"
                else:
                    return "*** No log entries written\n"
            startTime, start = row
            result += "First orca log entry: %s\n" % (start,)
    
            cursor = conn.cursor()
            cursor.execute("""SELECT TIMESTAMP, timereceived FROM Logs
                WHERE id = (SELECT MAX(id) FROM Logs)""")
            stopTime, stop = cursor.fetchone()
            result += "Last orca log entry: %s\n" % (stop,)
            elapsed = long(stopTime) - long(startTime)
            elapsedHr = elapsed / 3600 / 1000 / 1000 / 1000
            elapsed -= elapsedHr * 3600 * 1000 * 1000 * 1000
            elapsedMin = elapsed / 60 / 1000 / 1000 / 1000
            elapsed -= elapsedMin * 60 * 1000 * 1000 * 1000
            elapsedSec = elapsed / 1.0e9
            result += "Orca elapsed time: %d:%02d:%06.3f\n" % (elapsedHr,
                    elapsedMin, elapsedSec)
    
            cursor = conn.cursor()
            cursor.execute("""
                SELECT COUNT(DISTINCT workerid) FROM
                    (SELECT workerid FROM Logs LIMIT 10000) AS sample""")
            nPipelines = cursor.fetchone()[0]
            result += "%d pipelines used\n" % (nPipelines,)
    
            cursor = conn.cursor()
            cursor.execute("""
                SELECT CASE gid
                    WHEN 1 THEN 'pipeline shutdowns seen'
                    WHEN 2 THEN 'CCDs attempted'
                    WHEN 3 THEN 'src writes'
                    WHEN 4 THEN 'calexp writes'
                END AS descr, COUNT(*) FROM (
                    SELECT CASE
                        WHEN COMMENT LIKE 'Processing job:% filter=0%'
                        THEN 1
                        WHEN COMMENT LIKE 'Processing job:%'
                            AND COMMENT NOT LIKE '% filter=0%'
                        THEN 2
                        WHEN COMMENT LIKE 'Ending write to BoostStorage%/src%'
                        THEN 3
                        WHEN COMMENT LIKE 'Ending write to FitsStorage%/calexp%'
                        THEN 4
                        ELSE 0
                    END AS gid
                    FROM Logs
                ) AS stats WHERE gid > 0 GROUP BY gid""")
            nShutdown = 0
            for d, n in cursor.fetchall():
                result += "%d %s\n" % (n, d)
                if d == 'pipeline shutdowns seen':
                    nShutdown = n
            if nShutdown != nPipelines:
                if not inProgress:
                    if nShutdown == 0:
                        result += "\n*** No pipelines were shut down properly\n"
                    else:
                        result += "\n*** Shutdowns do not match pipelines\n"
                cursor = conn.cursor()
                cursor.execute("""
                    SELECT workerid, COMMENT
                    FROM Logs JOIN
                    (SELECT MAX(id) AS last FROM Logs GROUP BY workerid) AS a
                    ON (Logs.id = a.last)""")
                for worker, msg in cursor.fetchall():
                    if inProgress:
                        result += "Pipeline %s last status: %s\n" % (worker,
                                msg)
                    else:
                        result += "Pipeline %s ended with: %s\n" % (worker, msg)
    
            cursor = conn.cursor()
            cursor.execute("""
SELECT COUNT(*) FROM Logs
WHERE
(
    	COMMENT LIKE '%rror%'
	OR COMMENT LIKE '%xception%'
	OR COMMENT LIKE '%arning%'
	OR COMMENT LIKE 'Fail'
	OR COMMENT LIKE 'fail'
)
AND COMMENT NOT LIKE '%failureStage%'
AND COMMENT NOT LIKE '%failure stage%'
AND COMMENT NOT LIKE 'failSerialName%'
AND COMMENT NOT LIKE 'failParallelName%'
AND COMMENT NOT LIKE 'Distortion fitter failed to improve%'
AND COMMENT NOT LIKE '%magnitude error column%'
AND COMMENT NOT LIKE '%errorFlagged%'
AND COMMENT NOT LIKE 'Skipping process due to error'
            """)
            result += "%s failures seen\n" % cursor.fetchone()

            cursor = conn.cursor(MySQLdb.cursors.DictCursor)
            cursor.execute("""
                SELECT * FROM Logs
                WHERE COMMENT LIKE 'Processing job:%'
                    OR (
                        (
                            COMMENT LIKE '%rror%'
                            OR COMMENT LIKE '%xception%'
                            OR COMMENT LIKE '%arning%'
                            OR COMMENT LIKE '%Fail%'
                            OR COMMENT LIKE '%fail%'
                        )
                        AND COMMENT NOT LIKE '%failureStage%'
                        AND COMMENT NOT LIKE '%failure stage%'
                        AND COMMENT NOT LIKE 'failSerialName%'
                        AND COMMENT NOT LIKE 'failParallelName%'
                        AND COMMENT NOT LIKE 'Distortion fitter failed to improve%'
                        AND COMMENT NOT LIKE '%magnitude error column%'
                        AND COMMENT NOT LIKE '%errorFlagged%'
                        AND COMMENT NOT LIKE 'Skipping process due to error'
                    )
                ORDER BY id;""")
            jobs = dict()
            for d in cursor.fetchall():
                match = jobStartRegex.search(d['COMMENT'])
                if match:
                    jobs[d['workerid']] = "Band %s Run %s Camcol %s Frame %s" % (
                            match.group("filter"), match.group("run"),
                            match.group("camcol"), match.group("field"))
                elif not d['COMMENT'].startswith('Processing job:'):
                    if jobs.has_key(d['workerid']):
                        job = jobs[d['workerid']]
                    else:
                        job = "unknown"
                    result += "\n*** Error in %s in stage %s on %s:\n" % (
                                job, d['stagename'], d['workerid'])
                    lines = d['COMMENT'].split('\n')
                    i = len(lines) - 1
                    message = lines[i].strip()
                    # Skip blank lines at end
                    while i > 0 and message == "":
                        i -= 1
                        message = lines[i].strip()
                    # Go back until we find a traceback line with " in "
                    while i > 0 and lines[i].find(" in ") == -1:
                        i -= 1
                        message = lines[i].strip() + "\n" + message
                    result += message + "\n"

        finally:
            conn.close()

        outputDir = os.path.join(self.options.output, runId)
        logFile = os.path.join(outputDir, "run", "unifiedPipeline.log")
        with open(logFile, "r") as log:
            try:
                log.seek(-500, 2)
            except:
                pass 
            tail = log.read(500)
            if not tail.endswith("logger handled...and...done!\n"):
                result += "\n*** Unified pipeline log file\n"
                result += "(last 500 bytes)... " + tail + "\n"

        for logFile in glob.glob(
                os.path.join(outputDir, "work", "*", "launch.log")):
            with open(logFile, "r") as log:
                try:
                    log.seek(-500, 2)
                except:
                    pass
                tail = log.read(500)
                if not re.search(r"harness.runPipeline: workerid \w+$", tail) \
                        and not re.search(r"Applying aperture", tail) \
                        and tail != "done. Now starting job office\n":
                    result += "\n*** " + logFile + "\n"
                    result += "(last 500 bytes)... " + tail + "\n"

        return result
 def setUp(self):
     pol = Policy(os.path.join(ROOT, "testDbAuth.paf"))
     DbAuth.setPolicy(pol)
    def __init__(self, args):
        self.datetime = time.strftime("%Y_%m%d_%H%M%S")
        self.user = pwd.getpwuid(os.getuid())[0]
        if self.user == 'buildbot':
            RunConfiguration.pipeQaBase = re.sub(r'dev', 'buildbot',
                    RunConfiguration.pipeQaBase)
            RunConfiguration.pipeQaDir = re.sub(r'dev', 'buildbot',
                    RunConfiguration.pipeQaDir)
        self.dbUser = DbAuth.username(RunConfiguration.dbHost,
                str(RunConfiguration.dbPort))
        self.hostname = socket.getfqdn()
        self.fromAddress = "%s@%s" % (self.user, self.hostname)

        self.options, self.args = self.parseOptions(args)

        # Handle immediate commands
        if self.options.printStatus:
            self.printStatus()
            sys.exit(0)
        if self.options.report is not None:
            self.report(os.path.join(self.options.output,
                self.options.report, "run", "run.log"))
            sys.exit(0)
        if self.options.listRuns:
            self.listRuns(self.options.listRuns)
            sys.exit(0)
        if self.options.listInputs:
            self.listInputs()
            sys.exit(0)
        if self.options.linkLatest is not None:
            self.linkLatest(self.options.linkLatest)
            sys.exit(0)
        if self.options.kill is not None:
            self.kill(self.options.kill)
            sys.exit(0)
        if self.options.hosts is not None:
            self.hosts()
            sys.exit(0)

        if self.arch is None:
            if self.options.arch is None:
                raise RuntimeError("Architecture is required")
            self.arch = self.options.arch

        if re.search(r'[^a-zA-Z0-9_]', self.options.runType):
            raise RuntimeError("Run type '%s' must be one word" %
                    (self.options.runType,))

        self.collectionName = re.sub(r'\.', '_', RunConfiguration.collection)
        runIdProperties = dict(
                user=self.user,
                dbUser=self.dbUser,
                coll=self.collectionName,
                runType=self.options.runType,
                datetime=self.datetime)
        # When resuming a run, use provided runID
        if self.options.resumeRunId is None:
            self.runId = RunConfiguration.runIdPattern % runIdProperties
        else:
            self.runId = self.options.resumeRunId
        runIdProperties['runid'] = self.runId
        dbNamePattern = "%(dbUser)s_%(coll)s_u_%(runid)s"
        self.dbName = dbNamePattern % runIdProperties

        self.inputBase = os.path.join(RunConfiguration.inputBase,
                self.options.input)
        self.inputDirectory = os.path.join(self.inputBase,
                RunConfiguration.collection)
        self.outputDirectory = os.path.join(self.options.output, self.runId)
        self.outputDirectory = os.path.abspath(self.outputDirectory)

        if self.options.resumeRunId is None :
            if os.path.exists(self.outputDirectory):
                raise RuntimeError("Output directory %s already exists" %
                    (self.outputDirectory,))
            os.mkdir(self.outputDirectory)
        elif not os.path.exists(self.outputDirectory):
            raise RuntimeError("Output directory %s does not exist for resumed run" %
                (self.outputDirectory,))

        self.pipeQaUrl = RunConfiguration.pipeQaBase + self.dbName + "/"

        self.eupsPath = os.environ['EUPS_PATH']
        e = eups.Eups(readCache=False)
        self.setups = dict()
        for product in e.getSetupProducts():
            if product.name != "eups":
                self.setups[product.name] = \
                        re.sub(r'^LOCAL:', "-r ", product.version)

        # TODO -- load policy and apply overrides
        self.options.override = None
 def testSetPolicy(self):
     self.assertTrue(DbAuth.available("lsst-db.ncsa.illinois.edu", "3306"))
     self.assertEqual(
         DbAuth.authString("lsst-db.ncsa.illinois.edu", "3306"),
         "test:globular.test")
     self.assertEqual(DbAuth.username("lsst-db.ncsa.illinois.edu", "3306"),
                      "test")
     self.assertEqual(DbAuth.password("lsst-db.ncsa.illinois.edu", "3306"),
                      "globular.test")
     self.assertTrue(DbAuth.available("lsst-db.ncsa.illinois.edu", "3307"))
     self.assertEqual(
         DbAuth.authString("lsst-db.ncsa.illinois.edu", "3307"),
         "boris:natasha")
     self.assertEqual(DbAuth.username("lsst-db.ncsa.illinois.edu", "3307"),
                      "boris")
     self.assertEqual(DbAuth.password("lsst-db.ncsa.illinois.edu", "3307"),
                      "natasha")
     self.assertTrue(DbAuth.available("lsst9.ncsa.illinois.edu", "3306"))
     self.assertEqual(DbAuth.authString("lsst9.ncsa.illinois.edu", "3306"),
                      "rocky:squirrel")
     self.assertEqual(DbAuth.username("lsst9.ncsa.illinois.edu", "3306"),
                      "rocky")
     self.assertEqual(DbAuth.password("lsst9.ncsa.illinois.edu", "3306"),
                      "squirrel")
예제 #50
0
                        dest="filenames", help="condor log files",
                        nargs='+', type=str, required=True)
    parser.add_argument("-v", "--verbose", action="store_true", dest="verbose",
                        help="verbose")

    args = parser.parse_args()

    host = args.host
    port = args.port
    database = args.database

    #
    # get database authorization info
    #

    dbAuth = DbAuth()
    user = dbAuth.username(host, str(port))
    password = dbAuth.password(host, str(port))

    # connect to the database
    dbm = DatabaseManager(host, port, user, password)

    # create the database if it doesn't exist
    if not dbm.dbExists(database):
        dbm.createDb(database)

    # create the LogIngestor, which creates all the tables, and will
    # be used to consolidate file information
    logIngestor = LogIngestor(dbm, database)

    # go through the list of files and ingest them, ignoring any