Example #1
0
def ESBuilder(args):
    """ESBuilder is a main injection tool. It supports two types of DBs:
    MySQL and SQLite. The injection can be done for variety of file formats:
    evio, hddm, idxa. For option information and usage please use '-help' option.
    For option description '--help'.
    For specific injection types please use '-examples' option.

    Please note, ESBuilder is a wrapper shell script around ESBuilder.py
    module which does the work.
    """
    localOpt = ["[ -add <dir or file or pattern of files> ]"]
    localOpt.append("[ -grade <grade> ] [ -time <timeStamp> ]")
    localOpt.append("[ -dataVersionName <name> ]  [ -view <skim> ]")
    localOpt.append("[ -listOfParents <dataVersionName's> ]")
    localOpt.append("[ -output <dir> ] [ -HSMDir <HSM directory> ]")
    localOpt.append("[ -dupRead <fileName> ] [ -skim ] [ -no-skim ]")
    localOpt.append("[ -masterDB <name@host:port:socket or fileName> ]")
    usage = es_init.helpMsg("ESBuilder", localOpt)
    usageDescription = """
Option description:
*   -grade:   specifies the grade, e.g. "physics", "p2-unchecked"
*   -add:     adds data file(s) to the EventStore
	      You may specify: directory, file name or a list of files
	      For patterns use '*', e.g MC*tau*.pds
*   -output:  output location for storing key/location files
*   -dataVersionName: specifies the data version name (aka svName)

    -time:    specifies the timeStamp, e.g. 20090227. If time is not provided 
              will try to append to existing grade/dataVersionName or use a 
	      one day in a future as new timeStamp if no grade/dataVersionName
	      combination is found.
    -view:    specifies the view, e.g. "tau"
    -listOfParents specifies list of parents for given injection,
	      e.g. while injecting p2-unchecked grade its parent is 'daq'.
    -newDB:   force the creation of a new EventStore
    -sqlite   use the SQLite version of EventStore
	      default sqlite.db, otherwise a fileName needs to be provided
    -mysql    use the MySQL version of EventStore. In order to access MySQL
	      you need either provide login/password through the -user/-password
	      options or create $HOME/.esdb.conf with user:password entry
    -masterDB specifies host and db name of the master you want to use
    -verbose: verbose mode, a lot of useful printout
    -idleMode when this flag is specified, no key/location file will be
	      generated (useful once you have them and want reproduce DB
	      content). But content of DB will be updated. USE WITH CAUTION.
    -delete   delete a grade from EventStore. USE WITH CAUTION.
	      You need to provide the grade and the timeStamp.
    -HSMDir   specifies output HSM directory.
    -logFile  specifies the log file name. You may either provide a full file name 
              (including path) or 'stdout' or 'stderr' to redirect your log to
	      appropriate I/O stream. During injection an intermidiate files
	      esdb.log.YYYYMMDD_HHMMSS_PID will be created.
	      Once a job successfully finishes, the esdb.log.YYYYMMDD_HHMMSS_PID 
	      is moved to your logFile, otherwise esdb.log.YYYYMMDD_HHMMSS_PID remains.
    -profile  perform internal profiling.
    -dupRead  in the case of duplicated records force to use this source
    -skim     force ESBuilder to use input files as a skim, i.e. find their parents
              and build combined location file for all of them
    -no-skim  force ESBuilder to use input files as is

Please note: required parameters are marked with (*). All options can be
specified in any order. By default: view='all', EventStoreTMP DB is used and key/location
files are generated.

    """

    examples = es_init.ESExamples()
    userCommand = "ESBuilder.py"
    optList, dictOpt = es_init.ESOptions(userCommand, args, usage,
                                         usageDescription)
    dbName, dbHost, userName, userPass, dbPort, dbSocket = optList[0]
    historyFile, logFile, verbose, profile = optList[1]
    userCommand = optList[2]

    # default values
    grade = ""
    timeS = gen_util.dayAhead()
    oDir = ""
    view = "all"
    run = 0
    file = ""
    newDB = 0
    delete = 0
    genMode = 1
    minRun = 0
    maxRun = 1000000
    localtime = time.strftime("%Y%m%d_%H%M%S", time.localtime())
    uname = os.uname()
    svName = ""
    tempLogFile = "esdb.log.%s_%s" % (localtime, os.getpid())
    fileList = []
    listOfParents = []
    oHSMDir = ""
    dupRead = ""
    skim = 0
    noskim = 0
    masterDBName = dbName
    masterDBHost = dbHost
    master = ""
    masterDB = ""
    masterDBPort = dbPort
    masterDBSocket = dbSocket
    # parse the rest of the options and form user's command
    x = 1
    doNotRead = 0
    while x < len(args):
        try:
            if args[x] == "-newDB":
                newDB = 1
                x += 1
                continue
            if args[x] == "-HSMDir":
                oHSMDir = args[x + 1]
                checkArg([oHSMDir])
                x += 2
                continue
            if args[x] == "-dupRead":
                dupRead = args[x + 1]
                checkArg([dupRead])
                x += 2
                continue
            if args[x] == "-dataVersionName":
                svName = args[x + 1]
                checkArg([svName])
                x += 2
                continue
            if args[x] == "-grade":
                grade = args[x + 1]
                checkArg([grade])
                x += 2
                continue
            if args[x] == "-time":
                timeS = args[x + 1]
                checkArg([timeS])
                x += 2
                continue
            if args[x] == "-output":
                oDir = args[x + 1] + "/"
                checkArg([oDir])
                x += 2
                continue
            if args[x] == "-runRange":
                minRun = int(args[x + 1])
                maxRun = int(args[x + 2])
                checkArg([minRun, maxRun])
                x += 3
                continue
            if args[x] == "-listOfParents":
                x += 1
                while (args[x][0] != "-"):
                    newArg = args[x]
                    listOfParents.append(args[x])
                    x += 1
                    if len(args) == x:
                        break
                checkArg(listOfParents)
                continue
            if args[x] == "-add":
                file = os_path_util.formAbsolutePath(args[x + 1])
                # first check if pattern is present
                if len(args) > x + 2 and args[x + 2][0] != "-":
                    counter = 0
                    for idx in xrange(x + 1, len(args)):
                        newArg = args[idx]
                        if newArg[0] == "-":
                            break
                        counter += 1
                        if os.path.isfile(newArg):
                            fileList.append(
                                os_path_util.formAbsolutePath(newArg))
                    x += counter + 1
                    continue
                elif os.path.isdir(file):
                    dir = file + "/"
                    for f in os.listdir(dir):
                        if string.split(f, ".")[-1] != "pds": continue
                        fileName = dir + f
                        fileList.append(
                            os_path_util.formAbsolutePath(fileName))
                    x += 2
                    continue
                elif os_path_util.isFile(file):
                    if file[-5:] == ".list":
                        tmpList = open(file).readlines()
                        for item in tmpList:
                            fileList.append(string.split(item)[0])
                    else:
                        fileList = [file]
                    x += 2
                    continue
                # check if this file exists
                else:
                    print "ESBuilder: no such file", file
                    raise
                checkArg(fileList)
            if args[x] == "-view":
                view = args[x + 1]
                checkArg([view])
                x += 2
                continue
            if args[x] == "-idleMode":
                genMode = 0
                x += 1
                continue
            if args[x] == "-skim":
                skim = 1
                x += 1
                continue
            if args[x] == "-no-skim":
                noskim = 1
                x += 1
                continue
            if args[x] == "-masterDB":
                masterDB = args[x + 1]
                master = 1
                checkArg([masterDB])
                x += 2
                continue
            # if we reach here, that means we found unkown option
            if dictOpt.has_key(args[x]):
                x += dictOpt[args[x]]
            else:
                print "Option '%s' is not allowed" % args[x]
                raise
        except:
            sys.exit(1)

    ### AUTHENTICATION???
    # check that USER=pass2, otherwise exit
    authUsers = ['gluex', 'sdobbs']  ### CHECK
    # check if USER environment is set up, otherwise use LOGNAME
    env = os.environ
    if not env.has_key('USER'):
        os.environ['USER'] = env['LOGNAME']
    if not authUsers.count(
            os.environ["USER"]) and dbName == "EventStore" and string.find(
                dbHost, 'hallddb') != -1:
        print "ERROR: Injection to db='EventStore' should be done from official (gluex) account for %s DB\n" % dbName
        print "For your own injection please use another db name"
        sys.exit(1)
    # check underlying OS, so far we only allow to inject from SunOS
    #if os.environ["USER"]=="pass2" and uname[0]!="SunOS":
    #   print "ERROR: for pass2 account the EventStore injection should be done from SunOS\n"
    #   sys.exit(1)
    #######################################

    # form normalized abosulte paths
    oDir = os_path_util.formAbsolutePath(oDir)

    # check required parameters
    if not len(grade):
        print "ESBuilder requires to specify a grade, see -grade option"
        sys.exit(1)
    if string.find(grade, "unchecked") == -1 and view == "all":  ### CHECK
        print "ESBuilder only allow to inject 'unchecked' grades"
        print "  daq-unechecked, p2-unchecked, physics-unchecked"
        print "Either specify different view or inject as unchecked grade"
        print "Given grade='%s' view='%s'" % (grade, view)
        sys.exit(1)
    if not len(fileList):
        print "ESBuilder requires to specify input file(s) with -add option"
        sys.exit(1)

    # check permissions and access to output dir
    if not os.path.isdir(oDir):
        print "Output directory '%s' doesn't exists" % oDir
        print "ESBuilder requires to specify output dir to store key/location files, see -output option"
        sys.exit(1)
    if oDir and not os_path_util.checkPermission(oDir):
        print "You don't have permission to write to output area '%s'" % oDir
        sys.exit(1)

    # check permission to write to HSM
    if oHSMDir and not os.path.isdir(oHSMDir):
        print "HSM directory '%s' does not exists" % oHSMDir
        sys.exit(1)
        if not os_path_util.checkPermission(oHSMDir):
            print "You don't have permission to write to HSM location '%s'" % oHSMDir
            sys.exit(1)

    # check that all inputs are in place
    for file in fileList:
        if not os.path.isfile(file):
            print "File '%s' does not exists" % file
            sys.exit(1)
    if dupRead and not os.path.isfile(dupRead):
        print "File '%s' does not exists" % dupRead
        sys.exit(1)

    # connect to MySQL EventStoreDB
    outputLog, globalLog = es_init.ESOutputLog(logFile)
    db, dbType = es_init.ESDBConnector(dbHost, dbName, userName, userPass, '',
                                       dbPort, dbSocket)
    es_init.ESInput(userCommand, outputLog, dbType)

    # Be verbose
    dbinfo = "\t grade\t'%s'\n\t timeStamp\t'%s'\n\t view\t\t'%s'\n" % (
        grade, timeS, view)
    if newDB:
        if verbose:
            print "Creating new tables DB:"
            print dbinfo
    else:
        if verbose:
            print "Updating existing tables in DB:"
            print dbinfo
    if genMode == 0 and verbose:
        print "\n\t ===> Process running in Idle mode"

    # create instance of ESManager class
    mydb = ESManager.ESManager(db, dbType, outputLog)
    # set-up all parameters
    mydb.setOutputDir(oDir)
    mydb.setGenerateDB(newDB)
    mydb.setSVName(svName)
    mydb.setParents(listOfParents)
    mydb.setGrade(grade)
    mydb.setTimeStamp(timeS)
    mydb.setView(view)
    mydb.setMinRun(minRun)
    mydb.setMaxRun(maxRun)
    mydb.setVerboseLevel(verbose)
    mydb.setReadDuplicatesSource(dupRead)
    mydb.setSkimFlag(skim)
    mydb.setNoSkimFlag(noskim)
    mydb.setDBHost(dbHost)
    mydb.setDBName(dbName)
    mydb.setDBPort(dbPort)
    mydb.setDBSocket(dbSocket)

    # interpret the master option
    if masterDB:
        dbComponents = string.split(masterDB, "@")
        if len(dbComponents) == 2:
            masterDBName = dbComponents[0]
            newComponents = string.split(dbComponents[1], ":")
            masterDBHost = newComponents[0]
            port = socket = ""
            if len(newComponents) == 2:
                port = newComponents[1]
            elif len(newComponents) == 3:
                socket = newComponents[2]
#           masterDBHost,port,socket=string.split(dbComponents[1],":")
            if port:
                masterDBPort = port
            if socket:
                masterDBSocket = socket
        else:
            masterDBHost = dbComponents[0]
    else:
        login, adminInfo, cMasterName, cMasterHost, cMasterPort, cMasterSocket = esdb_auth.readConfigFile(
        )
        if cMasterHost:
            masterDBHost = cMasterHost
            masterDBName = cMasterName
            masterDBPort = cMasterPort
            masterDBSocket = cMasterSocket
    mydb.setMasterDB(masterDBName, masterDBHost, masterDBPort, masterDBSocket)

    # update DB using transaction
    if delete:
        status = mydb.deleteGrade(delGrade, delTime)
    else:  # for anything else
        try:
            status = mydb.updateDB(genMode, fileList, oHSMDir)
        except:
            print "ERROR: fail to process:"
            for item in fileList:
                print item
            print "--------------- See traceback ----------------"
            raise

    # close connection to db
    mydb.commit()
    mydb.close()

    returnStatus = es_init.ESOutput(status, userCommand, historyFile,
                                    outputLog, globalLog)
    return returnStatus
Example #2
0
def ESFixPath(args):
    """Fix paths in EventStoreDB. The CLEOc data path specifications:
    /cleo/{detector,simulated}/{event,calibration}/{daq,pass2_version}/123400/123456/{specific_version_path}
    """
    localOpt = ["[ -prefix <Site CLEOc data prefix, e.g. /cdat> ]"]
    usage    = es_init.helpMsg("ESFixPath",localOpt)
    usageDescription=""
	    
    examples="""
    """

    userCommand="ESFixPath.py"
    optList, dictOpt = es_init.ESOptions(userCommand,args,usage,usageDescription,examples)
    dbName,dbHost,userName,userPass,dbPort,dbSocket = optList[0]
    historyFile,logFile,verbose,profile             = optList[1]
    userCommand                                     = optList[2]
    
    prefix = "/cdat/"   ### CHECK
    x         = 1
    while x < len(args):
	try:
	    if string.lower(args[x]) == "-prefix":
	       prefix = args[x+1]
	       x+=2
	       continue
	    if dictOpt.has_key(args[x]):
	       x+=dictOpt[args[x]]
	    else:
	       print "Option '%s' is not allowed"%args[x]
	       raise
	except:
	    sys.exit(1)

    if prefix[0]!="/":
       print "Prefix should start with /"
       print usage
       sys.exit(1)

    # initialize log
    outputLog, globalLog = es_init.ESOutputLog(logFile)
	
    # connect to dbIn EventStore
    db, dbType = es_init.ESDBConnector(dbHost,dbName,userName,userPass)
    sql        = sql_util.SQLUtil(db,dbType,outputLog)
    tableList  = sql.getTables()
    
    # Initialize ESManager for dbOut
    es_init.ESInput(userCommand,outputLog,dbType)

    # lock all tables for write operation in dbOut
    sql.lockTables()
    
    # postpone all commits to DB, once job will finish we'll invoke commit.
    sql.setCommitFlag(0)
	
    sql.startTxn()
    try:
        # read all releases (from Solaris) which would be used to find out pass2_version
        relList = os.listdir(RELPATH)
	
	# start processing
	print "Processing:"
	sys.__stdout__.flush()
	# we can replace retrieval of all files from DB, by using min/max fileId and then
	# loop over file Ids.
#        query="SELECT fileId,fileName FROM FileID"
#        tup  = sql.fetchAll(query)
#        for item in tup:
#            fileId = item[0]
#            file   = item[1]
        query="SELECT MIN(fileId),MAX(fileId) FROM FileID"
	tup  = sql.fetchOne(query)
	minId= long(tup[0])
	maxId= long(tup[1])+1
	for fileId in xrange(minId,maxId):
	    query  = "SELECT fileName FROM FileID WHERE fileId='%s'"%fileId
	    tup    = sql.fetchOne(query)
	    if not tup: continue
	    file   = tup[0]
	    if not os_path_util.isFile(file):
	       print "Found non existing file",file
	       continue
	    # Open KeyFile table, locate fileId, graphid=>svName (with all parents)
	    # create a new link
	    keyFile= file
	    query  ="SELECT run,graphid FROM KeyFile WHERE keyFileId='%s'"%fileId
	    tup    = sql.fetchAll(query)
	    for item in tup:
		run  = item[0]
		gid  = item[1]
		query="""SELECT svName FROM SpecificVersion,GraphPath 
		WHERE GraphPath.svid=SpecificVersion.svid 
		AND GraphPath.graphid='%s'"""%gid
		tup = sql.fetchOne(query) # FIXME, there're many svid's assigned to gid
		svName  = tup[0]
		dir,fileName = os.path.split(keyFile)
		dList,idList,dict,dictId,graph=sql.getAllParents(svName)
	        parentList = newParentList(sql,run,dList)
	        if string.lower(svName)=='daq':
		   release = 'daq'
		else:
                   release = getRelease(relList,svName)
		newPath = formNewPath(prefix,run,release,svName,parentList)
		newDir  = os.path.join(newPath,'index')
		if not os.path.isdir(newDir):
		   os.makedirs(newDir)
		newFile = os.path.join(newDir,fileName)
		print "Link (key)",newFile,"=>",keyFile
		sys.__stdout__.flush()
		if not os_path_util.isFile(newFile):
		   os.symlink(file,newFile)
	        # change db entry
	        query="UPDATE FileID SET fileName='%s' WHERE fileId='%s'"%(newFile,fileId)
	        sql.updateDBAndLog(query)
	    # Open Location table, locate fileId, graphid=>svName and open
	    # loc. file header to locate data files
	    locFile  = file
	    query    = "SELECT run,graphid FROM Location WHERE locationFileId='%s'"%fileId
	    tup      = sql.fetchAll(query)
	    for item in tup:
		run  = item[0]
		gid  = item[1]
		query="""SELECT svName FROM SpecificVersion,GraphPath 
		WHERE GraphPath.svid=SpecificVersion.svid 
		AND GraphPath.graphid='%s'"""%gid
		tup = sql.fetchOne(query) # FIXME, there're many svid's assigned to gid
		svName  = tup[0]
		dir,fileName = os.path.split(locFile)
		dList,idList,dict,dictId,graph=sql.getAllParents(svName)
	        parentList = newParentList(sql,run,dList)
	        if string.lower(svName)=='daq':
		   release = 'daq'
		else:
   	           release = getRelease(relList,svName)
		newPath = formNewPath(prefix,run,release,svName,parentList)
		newDir  = os.path.join(newPath,'index')
		if not os.path.isdir(newDir):
		   os.makedirs(newDir)
		newFile = os.path.join(newDir,fileName)
		print "Link (loc)",newFile,"=>",locFile
		sys.__stdout__.flush()
		if not os_path_util.isFile(newFile):
		   os.symlink(locFile,newFile)
	        # change db entry
	        query="UPDATE FileID SET fileName='%s' WHERE fileId='%s'"%(newFile,fileId)
	        sql.updateDBAndLog(query)
	
		# open loc. file header and get data file id's
		query="SELECT fileName,fileId FROM FileID WHERE"
		count=0
		for id in file_util.getFileIds(locFile):
		    if not count:
		       query+= " fileId='%s'"%id
		       count =1
		    else:
		       query+= " OR fileId='%s'"%id
		tup = sql.fetchAll(query)
		for item in tup:
		    datFile = '%s'%item[0]
	            if not os_path_util.isFile(datFile):
	               print "Found non existing file",datFile
	               continue
		    fileId  = item[1]
		    dir,fileName = os.path.split(datFile)
		    newDir  = os.path.join(newPath,'data')
		    if not os.path.isdir(newDir):
		       os.makedirs(newDir)
		    newFile = os.path.join(newDir,fileName)
		    print "Link (dat)",newFile,"=>",datFile
		    sys.__stdout__.flush()
		    if not os_path_util.isFile(newFile):
		       os.symlink(datFile,newFile)
	            # change db entry
	            query="UPDATE FileID SET fileName='%s' WHERE fileId='%s'"%(newFile,fileId)
	            sql.updateDBAndLog(query)
    except:
        print "Caught an error during merging step."
        gen_util.printExcept()
	db.rollback()
	return

    # everything is ready for commit
    sql.setCommitFlag(1)
    sql.endTxn()
    sql.commit()
    sql.unlockTables()
    sql.close()
    returnStatus = es_init.ESOutput(1,userCommand,historyFile,outputLog,globalLog)
    return returnStatus
Example #3
0
def fileType(fileName):   
    """A high-level method to determine file type. For ES files, it uses file signature:
       - KEYSIGNATURE=2718281
       - LOCSIGNATURE=2951413,
    For HDDM files
    It is endian compliant."""
    KEYSIGNATURE=2718281 # magic number for Key file
    LOCSIGNATURE=2951413 # magic number for Location file
    SIGNATURE=0          # signature we read from input file
    SWAPPEDSIGNATURE=0   # signature we read from input file (w/ swapped bytes)

    #print "CHECKING FILE TYPE"

    if os_path_util.isFile(fileName):
       fileDesc = open(fileName,'rb')
       fileDesc.seek(0)
    else:
       print "file_util: file %s not found"%fileName
       sys.exit(1)

    ## test if file is IDXA format first
    fileDesc.seek(0)
    if string.find(fileDesc.readline(),"IDXA")!=-1:
        fileDesc.close()
        return "idxa"

    ## check to see if file begins with "<HDDM"
    ## if so, use file "class" to determine type
    fileDesc.seek(0)
    snippet = fileDesc.read(5)
    #print "first snippet check = %s"%snippet
    if( snippet == "<?xml" ):
        fileDesc.seek(-5,1)
        xmlspec = fileDesc.readline()
        #print "XML spec = %s"%xmlspec
        snippet = fileDesc.read(5)
    #print "first snippet check = %s"%snippet
    if( snippet == "<HDDM" ):
        fileDesc.seek(-5,1)
        hddmspec = fileDesc.readline()
        #print "HDDM spec = %s"%hddmspec
        entries = hddmspec.split()
        #print "entries = %s"%str(entries)
        (key,value) = entries[1].split('=')
        fileDesc.close()
        #print "key = %s  value = %s"%(key,value)
        if key == "class":
            #print "FOUND CLASS"
            #print "TYPE = %s"%value
            if value == "\"r\"":
                #print "REST FILE"
                return "rest"
            elif value == "\"s\"":
                return "mc"
        return "hddm"   ## if all else fails at least we know it's some type of HDDM file?

    ## See if the file is some EventStore file
    fileDesc.seek(0)
    headerHeader = array.array('I')
    headerHeader.fromfile(fileDesc, 3)

    # read file signature
    SIGNATURE = headerHeader[0]>>8

    # look if we match any of signatures
    what = ""
    if SIGNATURE == KEYSIGNATURE: what="ikey"
    #elif SIGNATURE == LOCSIGNATURE: what="lpds"

    # if nothing matches, swap bytes and check again
    headerHeader.byteswap()
    SWAPPEDSIGNATURE = headerHeader[0]>>8

    if SWAPPEDSIGNATURE == KEYSIGNATURE: what="ikey"
    #elif SWAPPEDSIGNATURE == LOCSIGNATURE: what="lpds"

       
    # if file still not determined, try its extension
    if not what:
       what = string.split(fileName,".")[-1]
    fileDesc.close()

    return what
Example #4
0
def ESFixPath(args):
    """Fix paths in EventStoreDB. The CLEOc data path specifications:
    /cleo/{detector,simulated}/{event,calibration}/{daq,pass2_version}/123400/123456/{specific_version_path}
    """
    localOpt = ["[ -prefix <Site CLEOc data prefix, e.g. /cdat> ]"]
    usage = es_init.helpMsg("ESFixPath", localOpt)
    usageDescription = ""

    examples = """
    """

    userCommand = "ESFixPath.py"
    optList, dictOpt = es_init.ESOptions(userCommand, args, usage,
                                         usageDescription, examples)
    dbName, dbHost, userName, userPass, dbPort, dbSocket = optList[0]
    historyFile, logFile, verbose, profile = optList[1]
    userCommand = optList[2]

    prefix = "/cdat/"  ### CHECK
    x = 1
    while x < len(args):
        try:
            if string.lower(args[x]) == "-prefix":
                prefix = args[x + 1]
                x += 2
                continue
            if dictOpt.has_key(args[x]):
                x += dictOpt[args[x]]
            else:
                print "Option '%s' is not allowed" % args[x]
                raise
        except:
            sys.exit(1)

    if prefix[0] != "/":
        print "Prefix should start with /"
        print usage
        sys.exit(1)

    # initialize log
    outputLog, globalLog = es_init.ESOutputLog(logFile)

    # connect to dbIn EventStore
    db, dbType = es_init.ESDBConnector(dbHost, dbName, userName, userPass)
    sql = sql_util.SQLUtil(db, dbType, outputLog)
    tableList = sql.getTables()

    # Initialize ESManager for dbOut
    es_init.ESInput(userCommand, outputLog, dbType)

    # lock all tables for write operation in dbOut
    sql.lockTables()

    # postpone all commits to DB, once job will finish we'll invoke commit.
    sql.setCommitFlag(0)

    sql.startTxn()
    try:
        # read all releases (from Solaris) which would be used to find out pass2_version
        relList = os.listdir(RELPATH)

        # start processing
        print "Processing:"
        sys.__stdout__.flush()
        # we can replace retrieval of all files from DB, by using min/max fileId and then
        # loop over file Ids.
        #        query="SELECT fileId,fileName FROM FileID"
        #        tup  = sql.fetchAll(query)
        #        for item in tup:
        #            fileId = item[0]
        #            file   = item[1]
        query = "SELECT MIN(fileId),MAX(fileId) FROM FileID"
        tup = sql.fetchOne(query)
        minId = long(tup[0])
        maxId = long(tup[1]) + 1
        for fileId in xrange(minId, maxId):
            query = "SELECT fileName FROM FileID WHERE fileId='%s'" % fileId
            tup = sql.fetchOne(query)
            if not tup: continue
            file = tup[0]
            if not os_path_util.isFile(file):
                print "Found non existing file", file
                continue
            # Open KeyFile table, locate fileId, graphid=>svName (with all parents)
            # create a new link
            keyFile = file
            query = "SELECT run,graphid FROM KeyFile WHERE keyFileId='%s'" % fileId
            tup = sql.fetchAll(query)
            for item in tup:
                run = item[0]
                gid = item[1]
                query = """SELECT svName FROM SpecificVersion,GraphPath 
		WHERE GraphPath.svid=SpecificVersion.svid 
		AND GraphPath.graphid='%s'""" % gid
                tup = sql.fetchOne(
                    query)  # FIXME, there're many svid's assigned to gid
                svName = tup[0]
                dir, fileName = os.path.split(keyFile)
                dList, idList, dict, dictId, graph = sql.getAllParents(svName)
                parentList = newParentList(sql, run, dList)
                if string.lower(svName) == 'daq':
                    release = 'daq'
                else:
                    release = getRelease(relList, svName)
                newPath = formNewPath(prefix, run, release, svName, parentList)
                newDir = os.path.join(newPath, 'index')
                if not os.path.isdir(newDir):
                    os.makedirs(newDir)
                newFile = os.path.join(newDir, fileName)
                print "Link (key)", newFile, "=>", keyFile
                sys.__stdout__.flush()
                if not os_path_util.isFile(newFile):
                    os.symlink(file, newFile)
            # change db entry
                query = "UPDATE FileID SET fileName='%s' WHERE fileId='%s'" % (
                    newFile, fileId)
                sql.updateDBAndLog(query)
            # Open Location table, locate fileId, graphid=>svName and open
            # loc. file header to locate data files
            locFile = file
            query = "SELECT run,graphid FROM Location WHERE locationFileId='%s'" % fileId
            tup = sql.fetchAll(query)
            for item in tup:
                run = item[0]
                gid = item[1]
                query = """SELECT svName FROM SpecificVersion,GraphPath 
		WHERE GraphPath.svid=SpecificVersion.svid 
		AND GraphPath.graphid='%s'""" % gid
                tup = sql.fetchOne(
                    query)  # FIXME, there're many svid's assigned to gid
                svName = tup[0]
                dir, fileName = os.path.split(locFile)
                dList, idList, dict, dictId, graph = sql.getAllParents(svName)
                parentList = newParentList(sql, run, dList)
                if string.lower(svName) == 'daq':
                    release = 'daq'
                else:
                    release = getRelease(relList, svName)
                newPath = formNewPath(prefix, run, release, svName, parentList)
                newDir = os.path.join(newPath, 'index')
                if not os.path.isdir(newDir):
                    os.makedirs(newDir)
                newFile = os.path.join(newDir, fileName)
                print "Link (loc)", newFile, "=>", locFile
                sys.__stdout__.flush()
                if not os_path_util.isFile(newFile):
                    os.symlink(locFile, newFile)
            # change db entry
                query = "UPDATE FileID SET fileName='%s' WHERE fileId='%s'" % (
                    newFile, fileId)
                sql.updateDBAndLog(query)

                # open loc. file header and get data file id's
                query = "SELECT fileName,fileId FROM FileID WHERE"
                count = 0
                for id in file_util.getFileIds(locFile):
                    if not count:
                        query += " fileId='%s'" % id
                        count = 1
                    else:
                        query += " OR fileId='%s'" % id
                tup = sql.fetchAll(query)
                for item in tup:
                    datFile = '%s' % item[0]
                    if not os_path_util.isFile(datFile):
                        print "Found non existing file", datFile
                        continue
                    fileId = item[1]
                    dir, fileName = os.path.split(datFile)
                    newDir = os.path.join(newPath, 'data')
                    if not os.path.isdir(newDir):
                        os.makedirs(newDir)
                    newFile = os.path.join(newDir, fileName)
                    print "Link (dat)", newFile, "=>", datFile
                    sys.__stdout__.flush()
                    if not os_path_util.isFile(newFile):
                        os.symlink(datFile, newFile)
            # change db entry
                    query = "UPDATE FileID SET fileName='%s' WHERE fileId='%s'" % (
                        newFile, fileId)
                    sql.updateDBAndLog(query)
    except:
        print "Caught an error during merging step."
        gen_util.printExcept()
        db.rollback()
        return

    # everything is ready for commit
    sql.setCommitFlag(1)
    sql.endTxn()
    sql.commit()
    sql.unlockTables()
    sql.close()
    returnStatus = es_init.ESOutput(1, userCommand, historyFile, outputLog,
                                    globalLog)
    return returnStatus
Example #5
0
def ESBuilder(args):
    """ESBuilder is a main injection tool. It supports two types of DBs:
    MySQL and SQLite. The injection can be done for variety of file formats:
    evio, hddm, idxa. For option information and usage please use '-help' option.
    For option description '--help'.
    For specific injection types please use '-examples' option.

    Please note, ESBuilder is a wrapper shell script around ESBuilder.py
    module which does the work.
    """
    localOpt = ["[ -add <dir or file or pattern of files> ]"]
    localOpt.append("[ -grade <grade> ] [ -time <timeStamp> ]")
    localOpt.append("[ -dataVersionName <name> ]  [ -view <skim> ]")
    localOpt.append("[ -listOfParents <dataVersionName's> ]")
    localOpt.append("[ -output <dir> ] [ -HSMDir <HSM directory> ]")
    localOpt.append("[ -dupRead <fileName> ] [ -skim ] [ -no-skim ]")
    localOpt.append("[ -masterDB <name@host:port:socket or fileName> ]")
    usage = es_init.helpMsg("ESBuilder", localOpt)
    usageDescription = """
Option description:
*   -grade:   specifies the grade, e.g. "physics", "p2-unchecked"
*   -add:     adds data file(s) to the EventStore
	      You may specify: directory, file name or a list of files
	      For patterns use '*', e.g MC*tau*.pds
*   -output:  output location for storing key/location files
*   -dataVersionName: specifies the data version name (aka svName)

    -time:    specifies the timeStamp, e.g. 20090227. If time is not provided 
              will try to append to existing grade/dataVersionName or use a 
	      one day in a future as new timeStamp if no grade/dataVersionName
	      combination is found.
    -view:    specifies the view, e.g. "tau"
    -listOfParents specifies list of parents for given injection,
	      e.g. while injecting p2-unchecked grade its parent is 'daq'.
    -newDB:   force the creation of a new EventStore
    -sqlite   use the SQLite version of EventStore
	      default sqlite.db, otherwise a fileName needs to be provided
    -mysql    use the MySQL version of EventStore. In order to access MySQL
	      you need either provide login/password through the -user/-password
	      options or create $HOME/.esdb.conf with user:password entry
    -masterDB specifies host and db name of the master you want to use
    -verbose: verbose mode, a lot of useful printout
    -idleMode when this flag is specified, no key/location file will be
	      generated (useful once you have them and want reproduce DB
	      content). But content of DB will be updated. USE WITH CAUTION.
    -delete   delete a grade from EventStore. USE WITH CAUTION.
	      You need to provide the grade and the timeStamp.
    -HSMDir   specifies output HSM directory.
    -logFile  specifies the log file name. You may either provide a full file name 
              (including path) or 'stdout' or 'stderr' to redirect your log to
	      appropriate I/O stream. During injection an intermidiate files
	      esdb.log.YYYYMMDD_HHMMSS_PID will be created.
	      Once a job successfully finishes, the esdb.log.YYYYMMDD_HHMMSS_PID 
	      is moved to your logFile, otherwise esdb.log.YYYYMMDD_HHMMSS_PID remains.
    -profile  perform internal profiling.
    -dupRead  in the case of duplicated records force to use this source
    -skim     force ESBuilder to use input files as a skim, i.e. find their parents
              and build combined location file for all of them
    -no-skim  force ESBuilder to use input files as is

Please note: required parameters are marked with (*). All options can be
specified in any order. By default: view='all', EventStoreTMP DB is used and key/location
files are generated.

    """

    examples = es_init.ESExamples()
    userCommand = "ESBuilder.py"
    optList, dictOpt = es_init.ESOptions(userCommand, args, usage, usageDescription)
    dbName, dbHost, userName, userPass, dbPort, dbSocket = optList[0]
    historyFile, logFile, verbose, profile = optList[1]
    userCommand = optList[2]

    # default values
    grade = ""
    timeS = gen_util.dayAhead()
    oDir = ""
    view = "all"
    run = 0
    file = ""
    newDB = 0
    delete = 0
    genMode = 1
    minRun = 0
    maxRun = 1000000
    localtime = time.strftime("%Y%m%d_%H%M%S", time.localtime())
    uname = os.uname()
    svName = ""
    tempLogFile = "esdb.log.%s_%s" % (localtime, os.getpid())
    fileList = []
    listOfParents = []
    oHSMDir = ""
    dupRead = ""
    skim = 0
    noskim = 0
    masterDBName = dbName
    masterDBHost = dbHost
    master = ""
    masterDB = ""
    masterDBPort = dbPort
    masterDBSocket = dbSocket
    # parse the rest of the options and form user's command
    x = 1
    doNotRead = 0
    while x < len(args):
        try:
            if args[x] == "-newDB":
                newDB = 1
                x += 1
                continue
            if args[x] == "-HSMDir":
                oHSMDir = args[x + 1]
                checkArg([oHSMDir])
                x += 2
                continue
            if args[x] == "-dupRead":
                dupRead = args[x + 1]
                checkArg([dupRead])
                x += 2
                continue
            if args[x] == "-dataVersionName":
                svName = args[x + 1]
                checkArg([svName])
                x += 2
                continue
            if args[x] == "-grade":
                grade = args[x + 1]
                checkArg([grade])
                x += 2
                continue
            if args[x] == "-time":
                timeS = args[x + 1]
                checkArg([timeS])
                x += 2
                continue
            if args[x] == "-output":
                oDir = args[x + 1] + "/"
                checkArg([oDir])
                x += 2
                continue
            if args[x] == "-runRange":
                minRun = int(args[x + 1])
                maxRun = int(args[x + 2])
                checkArg([minRun, maxRun])
                x += 3
                continue
            if args[x] == "-listOfParents":
                x += 1
                while args[x][0] != "-":
                    newArg = args[x]
                    listOfParents.append(args[x])
                    x += 1
                    if len(args) == x:
                        break
                checkArg(listOfParents)
                continue
            if args[x] == "-add":
                file = os_path_util.formAbsolutePath(args[x + 1])
                # first check if pattern is present
                if len(args) > x + 2 and args[x + 2][0] != "-":
                    counter = 0
                    for idx in xrange(x + 1, len(args)):
                        newArg = args[idx]
                        if newArg[0] == "-":
                            break
                        counter += 1
                        if os.path.isfile(newArg):
                            fileList.append(os_path_util.formAbsolutePath(newArg))
                    x += counter + 1
                    continue
                elif os.path.isdir(file):
                    dir = file + "/"
                    for f in os.listdir(dir):
                        if string.split(f, ".")[-1] != "pds":
                            continue
                        fileName = dir + f
                        fileList.append(os_path_util.formAbsolutePath(fileName))
                    x += 2
                    continue
                elif os_path_util.isFile(file):
                    if file[-5:] == ".list":
                        tmpList = open(file).readlines()
                        for item in tmpList:
                            fileList.append(string.split(item)[0])
                    else:
                        fileList = [file]
                    x += 2
                    continue
                    # check if this file exists
                else:
                    print "ESBuilder: no such file", file
                    raise
                checkArg(fileList)
            if args[x] == "-view":
                view = args[x + 1]
                checkArg([view])
                x += 2
                continue
            if args[x] == "-idleMode":
                genMode = 0
                x += 1
                continue
            if args[x] == "-skim":
                skim = 1
                x += 1
                continue
            if args[x] == "-no-skim":
                noskim = 1
                x += 1
                continue
            if args[x] == "-masterDB":
                masterDB = args[x + 1]
                master = 1
                checkArg([masterDB])
                x += 2
                continue
                # if we reach here, that means we found unkown option
            if dictOpt.has_key(args[x]):
                x += dictOpt[args[x]]
            else:
                print "Option '%s' is not allowed" % args[x]
                raise
        except:
            sys.exit(1)

            ### AUTHENTICATION???
            # check that USER=pass2, otherwise exit
    authUsers = ["gluex", "sdobbs"]  ### CHECK
    # check if USER environment is set up, otherwise use LOGNAME
    env = os.environ
    if not env.has_key("USER"):
        os.environ["USER"] = env["LOGNAME"]
    if not authUsers.count(os.environ["USER"]) and dbName == "EventStore" and string.find(dbHost, "hallddb") != -1:
        print "ERROR: Injection to db='EventStore' should be done from official (gluex) account for %s DB\n" % dbName
        print "For your own injection please use another db name"
        sys.exit(1)
    # check underlying OS, so far we only allow to inject from SunOS
    # if os.environ["USER"]=="pass2" and uname[0]!="SunOS":
    #   print "ERROR: for pass2 account the EventStore injection should be done from SunOS\n"
    #   sys.exit(1)
    #######################################

    # form normalized abosulte paths
    oDir = os_path_util.formAbsolutePath(oDir)

    # check required parameters
    if not len(grade):
        print "ESBuilder requires to specify a grade, see -grade option"
        sys.exit(1)
    if string.find(grade, "unchecked") == -1 and view == "all":  ### CHECK
        print "ESBuilder only allow to inject 'unchecked' grades"
        print "  daq-unechecked, p2-unchecked, physics-unchecked"
        print "Either specify different view or inject as unchecked grade"
        print "Given grade='%s' view='%s'" % (grade, view)
        sys.exit(1)
    if not len(fileList):
        print "ESBuilder requires to specify input file(s) with -add option"
        sys.exit(1)

    # check permissions and access to output dir
    if not os.path.isdir(oDir):
        print "Output directory '%s' doesn't exists" % oDir
        print "ESBuilder requires to specify output dir to store key/location files, see -output option"
        sys.exit(1)
    if oDir and not os_path_util.checkPermission(oDir):
        print "You don't have permission to write to output area '%s'" % oDir
        sys.exit(1)

    # check permission to write to HSM
    if oHSMDir and not os.path.isdir(oHSMDir):
        print "HSM directory '%s' does not exists" % oHSMDir
        sys.exit(1)
        if not os_path_util.checkPermission(oHSMDir):
            print "You don't have permission to write to HSM location '%s'" % oHSMDir
            sys.exit(1)

    # check that all inputs are in place
    for file in fileList:
        if not os.path.isfile(file):
            print "File '%s' does not exists" % file
            sys.exit(1)
    if dupRead and not os.path.isfile(dupRead):
        print "File '%s' does not exists" % dupRead
        sys.exit(1)

    # connect to MySQL EventStoreDB
    outputLog, globalLog = es_init.ESOutputLog(logFile)
    db, dbType = es_init.ESDBConnector(dbHost, dbName, userName, userPass, "", dbPort, dbSocket)
    es_init.ESInput(userCommand, outputLog, dbType)

    # Be verbose
    dbinfo = "\t grade\t'%s'\n\t timeStamp\t'%s'\n\t view\t\t'%s'\n" % (grade, timeS, view)
    if newDB:
        if verbose:
            print "Creating new tables DB:"
            print dbinfo
    else:
        if verbose:
            print "Updating existing tables in DB:"
            print dbinfo
    if genMode == 0 and verbose:
        print "\n\t ===> Process running in Idle mode"

    # create instance of ESManager class
    mydb = ESManager.ESManager(db, dbType, outputLog)
    # set-up all parameters
    mydb.setOutputDir(oDir)
    mydb.setGenerateDB(newDB)
    mydb.setSVName(svName)
    mydb.setParents(listOfParents)
    mydb.setGrade(grade)
    mydb.setTimeStamp(timeS)
    mydb.setView(view)
    mydb.setMinRun(minRun)
    mydb.setMaxRun(maxRun)
    mydb.setVerboseLevel(verbose)
    mydb.setReadDuplicatesSource(dupRead)
    mydb.setSkimFlag(skim)
    mydb.setNoSkimFlag(noskim)
    mydb.setDBHost(dbHost)
    mydb.setDBName(dbName)
    mydb.setDBPort(dbPort)
    mydb.setDBSocket(dbSocket)

    # interpret the master option
    if masterDB:
        dbComponents = string.split(masterDB, "@")
        if len(dbComponents) == 2:
            masterDBName = dbComponents[0]
            newComponents = string.split(dbComponents[1], ":")
            masterDBHost = newComponents[0]
            port = socket = ""
            if len(newComponents) == 2:
                port = newComponents[1]
            elif len(newComponents) == 3:
                socket = newComponents[2]
            #           masterDBHost,port,socket=string.split(dbComponents[1],":")
            if port:
                masterDBPort = port
            if socket:
                masterDBSocket = socket
        else:
            masterDBHost = dbComponents[0]
    else:
        login, adminInfo, cMasterName, cMasterHost, cMasterPort, cMasterSocket = esdb_auth.readConfigFile()
        if cMasterHost:
            masterDBHost = cMasterHost
            masterDBName = cMasterName
            masterDBPort = cMasterPort
            masterDBSocket = cMasterSocket
    mydb.setMasterDB(masterDBName, masterDBHost, masterDBPort, masterDBSocket)

    # update DB using transaction
    if delete:
        status = mydb.deleteGrade(delGrade, delTime)
    else:  # for anything else
        try:
            status = mydb.updateDB(genMode, fileList, oHSMDir)
        except:
            print "ERROR: fail to process:"
            for item in fileList:
                print item
            print "--------------- See traceback ----------------"
            raise

    # close connection to db
    mydb.commit()
    mydb.close()

    returnStatus = es_init.ESOutput(status, userCommand, historyFile, outputLog, globalLog)
    return returnStatus
Example #6
0
def fileType(fileName):
    """A high-level method to determine file type. For ES files, it uses file signature:
       - KEYSIGNATURE=2718281
       - LOCSIGNATURE=2951413,
    For HDDM files
    It is endian compliant."""
    KEYSIGNATURE = 2718281  # magic number for Key file
    LOCSIGNATURE = 2951413  # magic number for Location file
    SIGNATURE = 0  # signature we read from input file
    SWAPPEDSIGNATURE = 0  # signature we read from input file (w/ swapped bytes)

    #print "CHECKING FILE TYPE"

    if os_path_util.isFile(fileName):
        fileDesc = open(fileName, 'rb')
        fileDesc.seek(0)
    else:
        print "file_util: file %s not found" % fileName
        sys.exit(1)

    ## test if file is IDXA format first
    fileDesc.seek(0)
    if string.find(fileDesc.readline(), "IDXA") != -1:
        fileDesc.close()
        return "idxa"

    ## check to see if file begins with "<HDDM"
    ## if so, use file "class" to determine type
    fileDesc.seek(0)
    snippet = fileDesc.read(5)
    #print "first snippet check = %s"%snippet
    if (snippet == "<?xml"):
        fileDesc.seek(-5, 1)
        xmlspec = fileDesc.readline()
        #print "XML spec = %s"%xmlspec
        snippet = fileDesc.read(5)
    #print "first snippet check = %s"%snippet
    if (snippet == "<HDDM"):
        fileDesc.seek(-5, 1)
        hddmspec = fileDesc.readline()
        #print "HDDM spec = %s"%hddmspec
        entries = hddmspec.split()
        #print "entries = %s"%str(entries)
        (key, value) = entries[1].split('=')
        fileDesc.close()
        #print "key = %s  value = %s"%(key,value)
        if key == "class":
            #print "FOUND CLASS"
            #print "TYPE = %s"%value
            if value == "\"r\"":
                #print "REST FILE"
                return "rest"
            elif value == "\"s\"":
                return "mc"
        return "hddm"  ## if all else fails at least we know it's some type of HDDM file?

    ## See if the file is some EventStore file
    fileDesc.seek(0)
    headerHeader = array.array('I')
    headerHeader.fromfile(fileDesc, 3)

    # read file signature
    SIGNATURE = headerHeader[0] >> 8

    # look if we match any of signatures
    what = ""
    if SIGNATURE == KEYSIGNATURE: what = "ikey"
    #elif SIGNATURE == LOCSIGNATURE: what="lpds"

    # if nothing matches, swap bytes and check again
    headerHeader.byteswap()
    SWAPPEDSIGNATURE = headerHeader[0] >> 8

    if SWAPPEDSIGNATURE == KEYSIGNATURE: what = "ikey"
    #elif SWAPPEDSIGNATURE == LOCSIGNATURE: what="lpds"

    # if file still not determined, try its extension
    if not what:
        what = string.split(fileName, ".")[-1]
    fileDesc.close()

    return what