Beispiel #1
0
def main(argv):
    # Parse arguments. User must input a job name.
    parser = argparse.ArgumentParser(description='Main program to start or restart ' + \
             'calibration spinup for the National Water Model')
    parser.add_argument('jobID',metavar='jobID',type=str,nargs='+',
                        help='Job ID specific to calibration spinup.')
    parser.add_argument('--optDbPath',type=str,nargs='?',
                        help='Optional alternative path to SQLite DB file.')
    
    args = parser.parse_args()
    
    # If the SQLite file does not exist, throw an error.
    if args.optDbPath is not None:
        if not os.path.isfile(args.optDbPath):
            print "ERROR: " + args.optDbPath + " Does Not Exist."
            sys.exit(1)
        else:
            dbPath = args.optDbPath
    else:
        dbPath = topDir + "wrfHydroCalib.db"
        if not os.path.isfile(dbPath):
            print "ERROR: SQLite3 DB file: " + dbPath + " Does Not Exist."
            sys.exit(1)
    
    # Establish the beginning timestamp for this program.
    begTimeStamp = datetime.datetime.now()
    
    # Get current user who is running this program.
    userTmp = pwd.getpwuid(os.getuid()).pw_name
    
    # Initialize object to hold status and job information
    jobData = statusMod.statusMeta()
    jobData.jobID = int(args.jobID[0])
    jobData.dbPath = dbPath
    
    # Establish database connection.
    db = dbMod.Database(jobData)
    try:
        db.connect(jobData)
    except:
        print jobData.errMsg
        sys.exit(1)
        
    # Extract job data from database
    try:
        db.jobStatus(jobData)
    except:
        print jobData.errMsg
        sys.exit(1)
    
    # Pull extensive meta-data describing the job from the config file.
    configPath = str(jobData.jobDir) + "/setup.config"
    if not os.path.isfile(configPath):
        print "ERROR: Configuration file: " + configPath + " not found."
        sys.exit(1)

    try:        
        staticData = configMod.readConfig(configPath)
    except:
        print "ERROR: Failure to read configuration file: " + configPath
        sys.exit(1)
        
    if staticData.coldStart == 1:
        print "ERROR: User has specified a cold-start option for calibration. Exiting...."
        sys.exit(0)
    if staticData.optSpinFlag == 1:
        print "ERROR: User has specified an optional spinup file. Exiting...."
        sys.exit(0)
    
    # Assign the SQL command from the config file into the jobData structure
    jobData.gSQL = staticData.gSQL
        
    # Check gages in directory to match what's in the database
    try:
        jobData.checkGages2(db)
    except:
        errMod.errOut(jobData)
    
    # Establish LOCK file to secure this Python program to make sure
    # no other instances over-step here. This is mostly designed to deal
    # with nohup processes being kicked off Yellowstone/Cheyenne/Crontabs arbitrarily.
    # Just another check/balance here.
    pyLockPath = str(jobData.jobDir) + "/PYTHON.LOCK"
    if os.path.isfile(pyLockPath):
        # Either a job is still running, or was running
        # and was killed.

        print 'LOCK FILE FOUND.'
        # Read in to get PID number
        pidObj = pd.read_csv(pyLockPath)
        pidCheck = int(pidObj.PID[0])
        if errMod.check_pid(pidCheck):
                print "JOB: " + str(pidCheck) + \
                      " Is still running."
                sys.exit(0)
        else:
                print "JOB: " + str(pidCheck) + \
                      " Has Failed. Removing LOCK " + \
                      " file."
                os.remove(pyLockPath)
                fileObj = open(pyLockPath,'w')
                fileObj.write('\"PID\"\n')
                fileObj.write(str(os.getpid()))
                fileObj.close()
        # TEMPORARY FOR CHEYENNE. Since all cron jobs are launched
        # from an administrative node, we cannot monitor the process at 
        # all, which is an inconvenience. So.... we will check the last
        # modified time. If it's more than 15 minutes old, we will assume
        # the process is no longer running and can continue on with calibration.
        #dtRunCheck = datetime.datetime.now() - datetime.datetime.fromtimestamp(os.path.getmtime(pyLockPath))
        #if dtRunCheck.seconds/60.0 < 15.0:
        #    # We are going to assume a previous process is still running on the system. 
        #    # exit gracefully.
        #    print 'ASSUMING PROCESS STILL RUNNING'
        #    sys.exit(0)
        #else:
        #    # We are assuming the process is no longer running on the system. Alow
        #    # the workflow to continue. 
        #    print 'ALLOWING WORKFLOW TO CONINUE. REMOVING LOCK FILE'
        #    os.remove(pyLockPath)
        #    fileObj = open(pyLockPath,'w')
        #    fileObj.write('\"PID\"\n')
        #    fileObj.write(str(os.getpid()))
        #    fileObj.close()
    else:
        # Write a LOCK file for this program.
        fileObj = open(pyLockPath,'w')
        fileObj.write('\"PID\"\n')
        fileObj.write(str(os.getpid()))
        fileObj.close()
    
    # Some house keeping here. If the spinup is already complete, throw an error. 
    # also, if this is a re-initiation under a different user, require the new
    # user to enter a new contact that will be unpdated in the database. 
    if int(jobData.spinComplete) == 1:
        jobData.errMsg = "ERROR: Spinup for job ID: " + str(jobData.jobID) + \
                         " has already completed."
        errMod.errOut(jobData)
        
    if userTmp != jobData.owner:
        print "User: "******" is requesting to takeover jobID: " + \
              str(jobData.jobID) + " from owner: " + str(jobData.owner)
        strTmp = "Please enter new email address. Leave blank if no email " + \
                 "change is desired. NOTE if you leave both email and Slack " + \
                 "information blank, no change in contact will occur. Only " + \
                 "the owner will be modified:"
        newEmail = raw_input(strTmp)
        #strTmp = "Please enter Slack channel:"
        #newSlackChannel = raw_input(strTmp)
        #strTmp = "Please enter Slack token:"
        #newSlackToken = raw_input(strTmp)
        #strTmp = "Please enter Slack user name:"
        #newSlackUName = raw_input(strTmp)
        changeFlag = 1
        #if len(newSlackChannel) != 0 and len(newSlackToken) == 0:
        #    print "ERROR: You must specify an associated Slacker API token."
        #    sys.exit(1)
        #if len(newSlackChannel) != 0 and len(newSlackUName) == 0:
        #    print "ERROR: You must specify an associated Slacker user name."
        #    sys.exit(1)
        #if len(newSlackToken) != 0 and len(newSlackChannel) == 0:
        #    print "ERROR: You must specify an associated Slacker channel name."
        #    sys.exit(1)
        #if len(newSlackToken) != 0 and len(newSlackUName) == 0:
        #    print "ERROR: You must specify an associated Slacker user name."
        #    sys.exit(1)
        #if len(newSlackUName) != 0 and len(newSlackChannel) == 0:
        #    print "ERROR: You must specify an associated Slacker channel name."
        #    sys.exit(1)
        #if len(newSlackUName) != 0 and len(newSlackToken) == 0:
        #    print "ERROR: You must specify an associated Slacker API token."
        #    sys.exit(1)
        #if len(newSlackChannel) != 0 and len(newEmail) != 0:
        #    print "ERROR: You cannot specify both email and Slack for notifications."
        #    sys.exit(1)
        #if len(newSlackChannel) == 0 and len(newEmail) == 0:
        #    changeFlag = 0
            
        # PLACEHOLDER FOR CHECKING SLACK CREDENTIALS
        
        # TEMPORARY FOR VERSION 1.2 NWM CALIBRATION!!!!
        # If a new owner takes over, simply change the owner, but keep all 
        # other contact information the same.
        newEmail = jobData.email
        newSlackChannel = jobData.slChan
        newSlackToken = jobData.slToken
        newSlackUName = jobData.slUser
        if not newEmail:
            newEmail = ''
        if not newSlackChannel:
            newSlackChannel = ''
            newSlackToken = ''
            newSlackUName = ''
            
        try:
            db.updateJobOwner(jobData,userTmp,newEmail,newSlackChannel,newSlackToken,newSlackUName,changeFlag)
        except:
            errMod.errOut(jobData)
            
        jobData.genMsg = "MSG: User: "******" Is Taking Over JobID: " + str(jobData.jobID) + \
                         " From Owner: " + str(jobData.owner)
        errMod.sendMsg(jobData)
        
    # Begin an "infinite" do loop. This loop will continue to loop through all 
    # the basins until spinups are complete. Basins are allowed ONE failure. A restart
    # will be attempted. If the restart fails again, a LOCK file is placed into the
    # run directory and an error email is sent to the user.
    completeStatus = False
    
    # Create a "key" array. This array is of length [numBasins] and is initialized to 0.0.
    # Each array element can have the following values based on current model status:
    # 0.0 - Initial value
    # 0.5 - Model simulation in progress
    # 1.0 - Model simulation complete
    # -0.5 - Model simulation failed once and a restart has been attempted
    # -1.0 - Model has failed twice. A LOCK file has been created.
    # Once all array elements are 1.0, then completeStatus goes to True, an entry into
    # the database occurs, and the program will complete.
    keySlot = np.empty(len(jobData.gages))
    keySlot[:] = 0.0
    entryValue = float(len(jobData.gages))
    
    # Create an array to hold systme job ID values. This will only be used for
    # PBS as qstat has demonstrated slow behavior when doing a full qstat command. 
    # We will track job ID values and do a qstat <jobID> and populate this array
    # to keep track of things. 
    pbsJobId = np.empty([len(jobData.gages)],np.int64)
    pbsJobId[:] = -9999

    while not completeStatus:
        # Walk through spinup directory for each basin. Determine the status of
        # the model runs by the files available. If restarting, modify the 
        # namelist files appropriately. Then, restart the model. Once all
        # basins have been accounted for, fire off the monitoring program through
        # nohup to keep track of the models. If anything goes wrong, notifications
        # will either be emailed per the user's info, or piped to Slack for group
        # notification.
        # Loop through each basin. Perform the following steps:
        # 1.) If status is -0.5,0.0, or 0.5, check to see if the model is running
        #     for this basin.
        # 2.) If the model is not running, check for expected output and perform
        #     necessary logistics. Continue to the next basin.
        # If the status goes to -1.0, a LOCK file is created and must be manually
        # removed from the user. Once the program detects this, it will restart the
        # model and the status goes back to 0.5.
        # If the status is -0.5 and no job is running, output must be complete, or 
        # status goes to -1.0.
        # If output is not complete, the model is still running, status stays at 0.5.
        # If job is not running, and output has been completed, status goes to 1.0.
        # This continues indefinitely until statuses for ALL basins go to 1.0.
        for basin in range(0,len(jobData.gages)):
            try:
                spinupMod.runModel(jobData,staticData,db,jobData.gageIDs[basin],jobData.gages[basin],keySlot,basin,pbsJobId)
            except:
                errMod.errOut(jobData)
            
            # TEMPORARY FOR CHEYENNE
            # Check to make sure program hasn't passed a prescribed time limit. If it has,
            # exit gracefully.
            #timeCheckStamp = datetime.datetime.now()
            #programDtCheck = timeCheckStamp - begTimeStamp
            #if programDtCheck.seconds/60.0 > 90.0: 
            #    # 90-minutes)
            #    try:
            #        fileObj = open(pyLockPath,'a')
            #        fileObj.write('WORKFLOW HAS HIT TIME LIMIT - EXITING....\n')
            #        fileObj.close()
            #    except:
            #        jobData.errMsg = "ERROR: Unable to update workflow LOCK file: " + pyLockPath
            #        errMod.errOut(jobData)
        
        # Check to see if program requirements have been met.
        if keySlot.sum() == entryValue:
            jobData.spinComplete = 1
            try:
                db.updateSpinupStatus(jobData)
            except:
                errMod.errout(jobData)
            jobData.genMsg = "SPINUP FOR JOB ID: " + str(jobData.jobID) + " COMPLETE."
            errMod.sendMsg(jobData)
            completeStatus = True
            
        # Open the Python LOCK file. Write a blank line to the file and close it.
        # This action will simply modify the file modification time while only adding
        # a blank line.
        try:
            fileObj = open(pyLockPath,'a')
            fileObj.write('\n')
            fileObj.close()
        except:
            jobData.errMsg = "ERROR: Unable to update workflow LOCK file: " + pyLockPath
            errMod.errOut(jobData)
            
    # Remove LOCK file
    os.remove(pyLockPath)
def main(argv):
    # Parse arguments. User must input a job name and directory.
    parser = argparse.ArgumentParser(description='Utility program to report the position' + \
                                     ' of a calibration job.')
    parser.add_argument('jobID',
                        metavar='jobID',
                        type=str,
                        nargs='+',
                        help='Job ID specific to calibration spinup.')
    parser.add_argument('contactFlag',
                        metavar='ctFlag',
                        type=int,
                        nargs='+',
                        help='1 = send to job contact, 0 = print to screen.')
    parser.add_argument('inDB',
                        metavar='inDB',
                        type=str,
                        nargs='+',
                        help='Required path to sqllite3 DB file.')
    parser.add_argument('--email',
                        nargs='?',
                        help='Optional email to pipe output to.')
    #parser.add_argument('--hostname',type=str,nargs='?',
    #                    help='Optional hostname MySQL DB resides on. Will use localhost if not passed.')
    #parser.add_argument('--pwd',metavar='pwd',type=str,nargs='?',help='Password to the Database.')

    args = parser.parse_args()

    # If the sqllite DB file does not exist, throw an error to the user.
    if not os.path.isfile(args.inDB[0]):
        print "ERROR: Unable to locate DB file: " + args.inDB[0]
        sys.exit(1)

    # Create dictionary of specified status messages.
    msgDict = {
        '-1.0': 'MODEL RUN LOCKED.',
        '-0.75': 'MAIN CALIBRATION PROGRAM LOCKED',
        '-0.5': 'MODEL FAILED ONCE - RUNNING AGAIN',
        '-0.25': 'MODEL FAILED ONCE - WAITING',
        '-0.1': 'CALIBRATON PROGRAM FOR DEFAULT PARAMETERS LOCKED',
        '0.0': 'NOT STARTED',
        '0.25': 'CALIBRATION PROGRAM FOR DEFAULT PROGRAM RUNNING',
        '0.5': 'MODEL CURRENTLY RUNNING',
        '0.75': 'MODEL COMPLETE READY FOR PARAMETER ESTIMATION',
        '0.9': 'PARAMETER ESTIMATION OCCURRING',
        '1.0': 'MODEL ITERATION COMPLETE'
    }

    # Initialize object to hold status and job information
    jobData = statusMod.statusMeta()
    jobData.jobID = int(args.jobID[0])
    jobData.dbPath = args.inDB[0]

    # Lookup database username/login credentials based on username
    # running program.
    #if not args.pwd:
    #    try:
    #        pwdTmp = getpass.getpass('Enter Database Password: '******'WH_Calib_rw'
    #        jobData.dbPwd = str(pwdTmp)
    #    except:
    #        print "ERROR: Unable to authenticate credentials for database."
    #        sys.exit(1)
    #else:
    #    jobData.dbPwd = args.pwd
    #jobData.dbUName= 'WH_Calib_rw'
    #jobData.port = 5432

    #if not args.hostname:
    #    # We will assume localhost for Postgres DB
    #    hostTmp = 'localhost'
    #else:
    #    hostTmp = str(args.hostname)
    #jobData.host = hostTmp

    # Establish database connection.
    db = dbMod.Database(jobData)
    try:
        db.connect(jobData)
    except:
        print jobData.errMsg
        sys.exit(1)

    # Extract job data from database
    try:
        db.jobStatus(jobData)
    except:
        print jobData.errMsg
        sys.exit(1)

    # Pull extensive meta-data describing the job from the config file.
    configPath = str(jobData.jobDir) + "/setup.config"
    if not os.path.isfile(configPath):
        print "ERROR: Configuration file: " + configPath + " not found."
        sys.exit(1)
    staticData = configMod.readConfig(configPath)
    try:
        staticData = configMod.readConfig(configPath)
    except:
        print "ERROR: Failure to read configuration file: " + configPath
        sys.exit(1)

    # Assign the SQL command from the config file into the jobData structure
    jobData.gSQL = staticData.gSQL

    # Check gages in directory to match what's in the database
    try:
        jobData.checkGages2(db)
    except:
        errMod.errOut(jobData)

    # If an optional email was passed to the program, update the job object to
    # reflect this for information dissemination.
    if args.email:
        jobData.slackObj = None
        jobData.email = str(args.email)

    # Loop through each basin. Determine if which iteration we are on, then report the status
    # of the job for this basin.
    msgOut = ''
    iterArray = np.empty([int(jobData.nIter)], np.int)
    completeArray = np.empty([int(jobData.nIter)], np.float)

    meanSum = 0.0
    for basin in range(0, len(jobData.gages)):
        iterArray[:] = 0
        completeArray[:] = 0.0
        keyStatus = 0.0
        keyStatusPrev = 0.0
        domainID = jobData.gageIDs[basin]
        iterComplete = 1
        statusData = db.iterationStatus(jobData, domainID,
                                        str(jobData.gages[basin]))
        for iteration in range(0, int(jobData.nIter)):
            keyStatus = float(statusData[iteration][1])
            iterationTmp = int(statusData[iteration][0])
            iterArray[iteration] = iterationTmp
            completeArray[iteration] = keyStatus

        indComplete = np.where(completeArray == 1)
        indCheck1 = np.where(completeArray != 1.0)
        indCheck2 = np.where(completeArray == 0.0)
        meanSum = meanSum + len(indComplete[0])
        if len(indComplete[0]) == int(jobData.nIter):
            msgOut = msgOut + "BASIN: " + str(jobData.gages[basin]) + \
                            ": CALIBRATION COMPLETE.\n"
        else:
            if len(indCheck2[0]) == int(jobData.nIter):
                msgOut = msgOut + "BASIN: " + str(jobData.gages[basin]) + \
                         " - HAS NOT BEGUN CALIBRATION.\n"
            else:
                iterLastComplete = len(indComplete[0])
                iterCurrent = iterLastComplete + 1
                indCurrent = np.where(iterArray == iterCurrent)
                statusCurrent = completeArray[indCurrent[0][0]]
                ind2 = np.where(completeArray != 0.0) and np.where(
                    completeArray != 1.0)
                iterTmp = iterArray[ind2[0][0]]
                msgOut = msgOut + "BASIN: " + str(jobData.gages[basin]) + \
                         ": " + str(msgDict[str(statusCurrent)]) + \
                         " - ITERATION: " + str(iterCurrent) + "\n"

    print "MEAN COMPLETENESS = " + str(float(meanSum) / len(jobData.gages))
    jobData.genMsg = msgOut
    if int(args.contactFlag[0]) == 0:
        print jobData.genMsg
    else:
        errMod.sendMsg(jobData)
def main(argv):
    # Parse arguments. User must input a job name.
    parser = argparse.ArgumentParser(description='Main program to start or restart ' + \
             'sensitivity analysis for WRF-Hydro')
    parser.add_argument('jobID',metavar='jobID',type=str,nargs='+',
                        help='Job ID specific to your sensitivity/caliration workflow job.')
    parser.add_argument('--optDbPath',type=str,nargs='?',
                        help='Optional alternative path to SQLite DB file.')
    
    args = parser.parse_args()
    
    # If the SQLite file does not exist, throw an error.
    if args.optDbPath is not None:
        if not os.path.isfile(args.optDbPath):
            print "ERROR: " + args.optDbPath + " Does Not Exist."
            sys.exit(1)
        else:
            dbPath = args.optDbPath
    else:
        dbPath = topDir + "wrfHydroCalib.db"
        if not os.path.isfile(dbPath):
            print "ERROR: SQLite3 DB file: " + dbPath + " Does Not Exist."
            sys.exit(1)
    
    # Establish the beginning timestamp for this program.
    begTimeStamp = datetime.datetime.now()
    
    # Get current user who is running this program.
    userTmp = pwd.getpwuid(os.getuid()).pw_name
    
    # Initialize object to hold status and job information
    jobData = statusMod.statusMeta()
    jobData.jobID = int(args.jobID[0])
    jobData.dbPath = dbPath
    
    # Establish database connection.
    db = dbMod.Database(jobData)
    try:
        db.connect(jobData)
    except:
        print jobData.errMsg
        sys.exit(1)
        
    # Extract job data from database
    try:
        db.jobStatus(jobData)
    except:
        print jobData.errMsg
        sys.exit(1)
        
    # If the sensitivity flag is 0, simply exit gracefully as the user specified
    # not to run calibration.
    if jobData.sensFlag != 1:
        print "ERROR: Sensitivity flag was set to 0 for this workflow."
        sys.exit(1)
        
    # Establish LOCK file to secure this Python program to make sure
    # no other instances over-step here. This is mostly designed to deal
    # with nohup processes being kicked off Yellowstone/Cheyenne/Crontabs arbitrarily.
    # Just another check/balance here.
    lockPath = str(jobData.jobDir) + "/PYTHON.LOCK"
    if os.path.isfile(lockPath):
        # Either a job is still running, or was running
        # and was killed.

        print 'LOCK FILE FOUND.'
        # Read in to get PID number
        pidObj = pd.read_csv(lockPath)
        pidCheck = int(pidObj.PID[0])
        if errMod.check_pid(pidCheck):
                print "JOB: " + str(pidCheck) + \
                      " Is still running."
                sys.exit(0)
        else:
                print "JOB: " + str(pidCheck) + \
                      " Has Failed. Removing LOCK " + \
                      " file."
                os.remove(lockPath)
                fileObj = open(lockPath,'w')
                fileObj.write('\"PID\"\n')
                fileObj.write(str(os.getpid()))
                fileObj.close()
        # TEMPORARY FOR CHEYENNE. Since all cron jobs are launched
        # from an administrative node, we cannot monitor the process at 
        # all, which is an inconvenience. So.... we will check the last
        # modified time. If it's more than 30 minutes old, we will assume
        # the process is no longer running and can continue on with calibration.
        #dtRunCheck = datetime.datetime.now() - datetime.datetime.fromtimestamp(os.path.getmtime(lockPath))
        #if dtRunCheck.seconds/60.0 < 15.0:
        #    # We are going to assume a previous process is still running on the system. 
        #    # exit gracefully.
        #    print 'ASSUMING PROCESS STILL RUNNING'
        #    sys.exit(0)
        #else:
        #    # We are assuming the process is no longer running on the system. Alow
        #    # the workflow to continue. 
        #    print 'ALLOWING WORKFLOW TO CONINUE. REMOVING LOCK FILE'
        #    os.remove(lockPath)
        #    fileObj = open(lockPath,'w')
        #    fileObj.write('\"PID\"\n')
        #    fileObj.write(str(os.getpid()))
        #    fileObj.close()
    else:
        # Write a LOCK file for this program.
        fileObj = open(lockPath,'w')
        fileObj.write('\"PID\"\n')
        fileObj.write(str(os.getpid()))
        fileObj.close()
        
    # Pull extensive meta-data describing the job from the config file.
    configPath = str(jobData.jobDir) + "/setup.config"
    if not os.path.isfile(configPath):
        print "ERROR: Configuration file: " + configPath + " not found."
        sys.exit(1)
    try:        
        staticData = configMod.readConfig(configPath)
    except:
        print "ERROR: Failure to read configuration file: " + configPath
        sys.exit(1)
        
    # Assign the SQL command from the config file into the jobData structure
    jobData.gSQL = staticData.gSQL
    
    # Check gages in directory to match what's in the database
    try:
        jobData.checkGages2(db)
    except:
        errMod.errOut(jobData)
        
    # Some house keeping here. If the sensitivity is already complete, throw an error. 
    # Also ensure the spinup has been entered as complete. This is necessary for the 
    # sensitivity to run.
    # also, if this is a re-initiation under a different user, require the new
    # user to enter a new contact that will be unpdated in the database. 
    if int(jobData.spinComplete) != 1:
        # Check to see if optional spinup options were enabled. If so, update the spinup status.
        if staticData.coldStart == 1 or staticData.optSpinFlag != 0:
            print "Found optional spinup alternatives"
            jobData.spinComplete = 1
            try:
                db.updateSpinupStatus(jobData)
            except:
                errMod.errOut(jobData)
        else:
            jobData.errMsg = "ERROR: Spinup for job ID: " + str(jobData.jobID) + \
                             " is NOT complete. You must complete the spinup in order" + \
                             " to run calibration."
            errMod.errOut(jobData)
        
    if int(jobData.sensComplete) == 1:
        jobData.errMsg = "ERROR: Sensitivity for job ID: " + str(jobData.jobID) + \
                         " has already completed."
        errMod.errOut(jobData)
        
    if userTmp != jobData.owner:
        print "User: "******" is requesting to takeover jobID: " + \
              str(jobData.jobID) + " from owner: " + str(jobData.owner)
        strTmp = "Please enter new email address. Leave blank if no email " + \
                 "change is desired. NOTE if you leave both email and Slack " + \
                 "information blank, no change in contact will occur. Only " + \
                 "the owner will be modified:"
        newEmail = raw_input(strTmp)
        #strTmp = "Please enter Slack channel:"
        #newSlackChannel = raw_input(strTmp)
        #strTmp = "Please enter Slack token:"
        #newSlackToken = raw_input(strTmp)
        #strTmp = "Please enter Slack user name:"
        #newSlackUName = raw_input(strTmp)
        changeFlag = 1
        #if len(newSlackChannel) != 0 and len(newSlackToken) == 0:
        #    print "ERROR: You must specify an associated Slacker API token."
        #    sys.exit(1)
        #if len(newSlackChannel) != 0 and len(newSlackUName) == 0:
        #    print "ERROR: You must specify an associated Slacker user name."
        #    sys.exit(1)
        #if len(newSlackToken) != 0 and len(newSlackChannel) == 0:
        #    print "ERROR: You must specify an associated Slacker channel name."
        #    sys.exit(1)
        #if len(newSlackToken) != 0 and len(newSlackUName) == 0:
        #    print "ERROR: You must specify an associated Slacker user name."
        #    sys.exit(1)
        #if len(newSlackUName) != 0 and len(newSlackChannel) == 0:
        #    print "ERROR: You must specify an associated Slacker channel name."
        #    sys.exit(1)
        #if len(newSlackUName) != 0 and len(newSlackToken) == 0:
        #    print "ERROR: You must specify an associated Slacker API token."
        #    sys.exit(1)
        #if len(newSlackChannel) != 0 and len(newEmail) != 0:
        #    print "ERROR: You cannot specify both email and Slack for notifications."
        #    sys.exit(1)
        #if len(newSlackChannel) == 0 and len(newEmail) == 0:
        #    changeFlag = 0
            
        # PLACEHOLDER FOR CHECKING SLACK CREDENTIALS
            
        jobData.genMsg = "MSG: User: "******" Is Taking Over JobID: " + str(jobData.jobID) + \
                         " From Owner: " + str(jobData.owner)
        errMod.sendMsg(jobData)
        
        # TEMPORARY FOR VERSION 1.2 NWM CALIBRATION!!!!
        # If a new owner takes over, simply change the owner, but keep all 
        # other contact information the same.
        newEmail = jobData.email
        newSlackChannel = jobData.slChan
        newSlackToken = jobData.slToken
        newSlackUName = jobData.slUser
        if not newEmail:
            newEmail = ''
        if not newSlackChannel:
            newSlackChannel = ''
            newSlackToken = ''
            
        try:
            db.updateJobOwner(jobData,userTmp,newEmail,newSlackChannel,newSlackToken,newSlackUName,changeFlag)
        except:
            errMod.errOut(jobData)
            
    # Begin an "infinite" do loop. This loop will continue to loop through all 
    # the basins until sensitivity jobs are complete. Basins are allowed ONE failure. A restart
    # will be attempted. If the restart fails again, a LOCK file is placed into the
    # run directory and an error email is sent to the user.
    completeStatus = False
    
    # Create a "key" array. This array is of length [numBasins] and is initialized to 0.0.
    # Each array element can have the following values based on current model status:
    # 0.0 - Initial value
    # 0.10 - Job to generate parameter grids for each model job is being ran.
    # 0.25 - Job to generate parameter grids is complete. Ready to run models.....
    # 0.5 - Model simulationa are in progress
    # 0.75 - Job to read in model output and run sensitivity analysis is ready to be ran. 
    # 0.90 - Job to read in model output and run sensitivity analysis is running. 
    # 1.0 - Sensitivity analysis complete
    # -0.1 - Parameter generation failed. A LOCK file has been created. 
    # -0.5 - Model simulation failed once and a restart has been attempted
    # -0.90 - Sensitivity analysis job has failed. A LOCK file has been created. 
    # -1.0 - Model has failed twice. A LOCK file has been created.
    # Once all array elements are 1.0, then completeStatus goes to True, an entry into
    # the database occurs, and the program will complete.
    keySlot = np.empty([len(jobData.gages),int(jobData.nSensIter)])
    keySlot[:,:] = 0.0
    entryValue = float(len(jobData.gages)*int(jobData.nSensIter)*2.0)
    
    # Create an array to hold systme job ID values. This will only be used for
    # PBS as qstat has demonstrated slow behavior when doing a full qstat command. 
    # We will track job ID values and do a qstat <jobID> and populate this array
    # to keep track of things. 
    pbsJobId = np.empty([len(jobData.gages),int(jobData.nSensIter)],np.int64)
    pbsJobId[:,:] = -9999
    pbsCollectId = np.empty([len(jobData.gages),int(jobData.nSensIter)],np.int64)
    pbsCollectId[:,:] = -9999
    pbsPreId = np.empty([len(jobData.gages)],np.int64)
    pbsPreId[:] = -9999
    pbsPostId = np.empty([len(jobData.gages)],np.int64)
    pbsPostId[:] = -9999
    
    # Pull all the status values into the keySlot array. 
    for basin in range(0,len(jobData.gages)):
        domainID = jobData.gageIDs[basin]
            
        if domainID == -9999:
            jobData.errMsg = "ERROR: Unable to locate domainID for gage: " + str(jobData.gages[basin])
            errMod.errOut(jobData)
            
        # We are going to pull all values for one basin, then place them into the array.
        # This is faster then looping over each iteration at a time. 
        statusData = db.sensIterationStatus(jobData,domainID,str(jobData.gages[basin]))
        statusData = [list(item) for item in statusData]
        for iteration in range(0,int(jobData.nSensIter)):
            for iteration2 in range(0,int(jobData.nSensIter)):
                if statusData[iteration2][0] == iteration+1:
                    keySlot[basin,iteration] = float(statusData[iteration2][1])
            
    if len(np.where(keySlot != 0.0)[0]) == 0:
        # We need to either check to see if pre-processing has taken place, or
        # run it.
        preProcStatus = False
        
    while not completeStatus:
        # Walk through each basin undergoing sensitivity analysis. 
        for basin in range(0,len(jobData.gages)):
            print "GAGE: " + jobData.gages[basin]
            # Establish a status value for pre-processing the parameter values from R/Python code. 
            preProcStatus = False 
    
            # Establish a status value for post-processing the model output and running sensitivity
            # analysis.
            postProcStatus = False
            
            # Calculate the number of "batches" we are going to run
            nBatches = int(jobData.nSensIter/jobData.nSensBatch)
            entryValueBatch = float(jobData.nSensBatch)
            
            # If we have a pre-processing complete file, set our pre-proc status to True. 
            # Also, log parameter values generated if the log file hasn't been created. 
            preProcComplete = jobData.jobDir + "/" + jobData.gages[basin] + "/RUN.SENSITIVITY/preProc.COMPLETE"
            parmsLogged =  jobData.jobDir + "/" + jobData.gages[basin] + "/RUN.SENSITIVITY/PARAMS_LOGGED.COMPLETE"
            parmTxtFile = jobData.jobDir + "/" + jobData.gages[basin] + "/RUN.SENSITIVITY/params_new.txt"
            sensLogged = jobData.jobDir + "/" + jobData.gages[basin] + "/RUN.SENSITIVITY/SENS_LOGGED.COMPLETE"
            sensStats = jobData.jobDir + "/" + jobData.gages[basin] + "/RUN.SENSITIVITY/stat_sensitivity.txt"
            missingFlag = jobData.jobDir + "/" + jobData.gages[basin] + "/RUN.SENSITIVITY/CALC_STATS_MISSING"
            if os.path.isfile(preProcComplete):
                preProcStatus = True
                print "PRE PROCESSING COMPLETE!"
                if not os.path.isfile(parmsLogged):
                    # Log parameter values generated by pre-processing.
                    print "LOGGING PRE-PROC PARAM FILES."
                    try:
                        db.insertSensParms(jobData,parmsLogged,parmTxtFile,jobData.gageIDs[basin])
                    except:
                        jobData.errMsg = ("WARNING: Unable to log sensitivity parameters for basin: " + str(basin) + \
                                          " Job: " + str(jobData.jobID))
                        errMod.errOut(jobData)
            if not preProcStatus:
                try:
                    sensitivityMod.preProc(preProcStatus,jobData,staticData,db,jobData.gageIDs[basin],jobData.gages[basin],pbsPreId,basin)
                except:
                    errMod.errOut(jobData)
            else:
                # The goal here is to only operate on a fixed number of model runs at a time.
                # If you have a large parameter sample size, it's possible to have hundreds,
                # if not thousands of model permuatations. This worflow allows for 
                # only batches of model runs to be ran at a time as to not bog down the system. 
                for batchIter in range(0,nBatches):
                    time.sleep(30)
                    batchCheck = keySlot[basin,(batchIter*jobData.nSensBatch):((batchIter+1)*jobData.nSensBatch)]
                    if batchIter == 0:
                        batchCheckPrev = entryValueBatch
                    else:
                        batchCheckPrev = keySlot[basin,((batchIter-1)*jobData.nSensBatch):(batchIter*jobData.nSensBatch)]
                        batchCheckPrev = batchCheckPrev.sum()
                    if batchCheck.sum() != entryValueBatch and batchCheckPrev == entryValueBatch:
                        for iterTmp in range(0,jobData.nSensBatch):
                            iteration = batchIter*jobData.nSensBatch + iterTmp
                            keyCheck1 = keySlot[basin,iteration]
                            if keyCheck1 < 1:
                                # This model iteration has not completed. 
                                try:
                                    sensitivityMod.runModel(jobData,staticData,db,jobData.gageIDs[basin],jobData.gages[basin],keySlot,basin,iteration,pbsJobId,pbsCollectId)
                                except:
                                    errMod.errOut(jobData)
                                
                                if keySlot[basin,iteration] == 0.0 and keyCheck1 == 0.5:
                                    # Put some spacing between launching model simulations to slow down que geting 
                                    # overloaded.
                                    time.sleep(3)
                                    
                                # Update the temporary status array as it will be checked for this batch of model runs.
                                batchCheck[iterTmp] = keySlot[basin,iteration]
                                
            # Run post-processing ONLY when all model simulations are finished.
            if not postProcStatus and preProcStatus and len(np.where(batchCheck != 1.0)[0]) == 0:
                print "READY FOR POST PROCESSING"
                try:
                    sensitivityMod.postProc(postProcStatus,jobData,staticData,db,jobData.gageIDs[basin],jobData.gages[basin],pbsPostId,basin)
                except:
                    errMod.errOut(jobData)
                                
            postProcComplete = jobData.jobDir + "/" + jobData.gages[basin] + "/RUN.SENSITIVITY/postProc.COMPLETE"
            if os.path.isfile(postProcComplete):
                if not os.path.isfile(sensLogged) and not os.path.isfile(missingFlag):
                    # Log sensitivity statistics into the database.
                    if not os.path.isfile(sensStats):
                        jobData.errMsg = "ERROR: Expected to find: " + sensStats + " after post-processing. Not found."
                        errMod.errOut(jobData)
                    else:
                        try:
                            db.logSensStats(jobData,sensStats,jobData.gageIDs[basin],sensLogged)
                        except:
                            errMod.errOut(jobData)
                    # Check for complete flag on logging sensitivity statistics. 
                    if os.path.isfile(sensLogged):
                        postProcStatus = True
                        # Upgrade key status values as necessary
                        for iterTmp in range(0,jobData.nSensIter):
                            keySlot[basin,iterTmp] = 2.0
                elif os.path.isfile(sensLogged):
                    # Post-processing complete and statistics were sucessfully logged.
                    postProcStatus = True
                    # Upgrade key status values as necessary
                    for iterTmp in range(0,jobData.nSensIter):
                        keySlot[basin,iterTmp] = 2.0
                elif os.path.isfile(missingFlag):
                    # Missing obs were found. We will default to making this basin complete.
                    for iterTmp in range(0,jobData.nSensIter):
                        keySlot[basin,iterTmp] = 2.0
                        
            # TEMPORARY FOR CHEYENNE
            # Check to make sure program hasn't passed a prescribed time limit. If it has,
            # exit gracefully.
            #timeCheckStamp = datetime.datetime.now()
            #programDtCheck = timeCheckStamp - begTimeStamp
            #if programDtCheck.seconds/60.0 > 90.0: 
            #    # 90-minutes)
            #    try:
            #        fileObj = open(lockPath,'a')
            #        fileObj.write('WORKFLOW HAS HIT TIME LIMIT - EXITING....\n')
            #        fileObj.close()
            #    except:
            #        jobData.errMsg = "ERROR: Unable to update workflow LOCK file: " + lockPath
            #        errMod.errOut(jobData)
            
        # Check to see if program requirements have been met.
        if keySlot.sum() == entryValue and postProcStatus:
            jobData.sensComplete = 1
            try:
                db.updateSensStatus(jobData)
            except:
                errMod.errOut(jobData)
            jobData.genMsg = "SENSITIVITY FOR JOB ID: " + str(jobData.jobID) + " COMPLETE."
            errMod.sendMsg(jobData)
            completeStatus = True
            
        # Open the Python LOCK file. Write a blank line to the file and close it.
        # This action will simply modify the file modification time while only adding
        # a blank line.
        try:
            fileObj = open(lockPath,'a')
            fileObj.write('\n')
            fileObj.close()
        except:
            jobData.errMsg = "ERROR: Unable to update workflow LOCK file: " + lockPath
            errMod.errOut(jobData)
            
    # Remove LOCK file
    os.remove(lockPath)
Beispiel #4
0
def main(argv):
    # Parse arguments. User must input a job name.
    parser = argparse.ArgumentParser(description='Main program to start or restart ' + \
             'calibration for WRF-Hydro')
    parser.add_argument('jobID',
                        metavar='jobID',
                        type=str,
                        nargs='+',
                        help='Job ID specific to calibration spinup.')
    parser.add_argument('--optDbPath',
                        type=str,
                        nargs='?',
                        help='Optional alternative path to SQLite DB file.')

    args = parser.parse_args()

    # If the SQLite file does not exist, throw an error.
    if args.optDbPath is not None:
        if not os.path.isfile(args.optDbPath):
            print "ERROR: " + args.optDbPath + " Does Not Exist."
            sys.exit(1)
        else:
            dbPath = args.optDbPath
    else:
        dbPath = topDir + "wrfHydroCalib.db"
        if not os.path.isfile(dbPath):
            print "ERROR: SQLite3 DB file: " + dbPath + " Does Not Exist."
            sys.exit(1)

    # Establish the beginning timestamp for this program.
    begTimeStamp = datetime.datetime.now()

    # Get current user who is running this program.
    userTmp = pwd.getpwuid(os.getuid()).pw_name

    # Initialize object to hold status and job information
    jobData = statusMod.statusMeta()
    jobData.jobID = int(args.jobID[0])
    jobData.dbPath = dbPath

    # Establish database connection.
    db = dbMod.Database(jobData)
    try:
        db.connect(jobData)
    except:
        print jobData.errMsg
        sys.exit(1)

    # Extract job data from database
    try:
        db.jobStatus(jobData)
    except:
        print jobData.errMsg
        sys.exit(1)

    # If the calibration flag is 0, simply exit gracefully as the user specified
    # not to run calibration.
    if jobData.calibFlag != 1:
        print "ERROR: Calibration flag was set to 0 for this workflow."
        sys.exit(1)

    # Establish LOCK file to secure this Python program to make sure
    # no other instances over-step here. This is mostly designed to deal
    # with nohup processes being kicked off Yellowstone/Cheyenne/Crontabs arbitrarily.
    # Just another check/balance here.
    lockPath = str(jobData.jobDir) + "/PYTHON.LOCK"
    if os.path.isfile(lockPath):
        # Either a job is still running, or was running
        # and was killed.

        print 'LOCK FILE FOUND.'
        # Read in to get PID number
        pidObj = pd.read_csv(lockPath)
        pidCheck = int(pidObj.PID[0])
        if errMod.check_pid(pidCheck):
            print "JOB: " + str(pidCheck) + \
                  " Is still running."
            sys.exit(0)
        else:
            print "JOB: " + str(pidCheck) + \
                  " Has Failed. Removing LOCK " + \
                  " file."
            os.remove(lockPath)
            fileObj = open(lockPath, 'w')
            fileObj.write('\"PID\"\n')
            fileObj.write(str(os.getpid()))
            fileObj.close()
        # TEMPORARY FOR CHEYENNE. Since all cron jobs are launched
        # from an administrative node, we cannot monitor the process at
        # all, which is an inconvenience. So.... we will check the last
        # modified time. If it's more than 30 minutes old, we will assume
        # the process is no longer running and can continue on with calibration.
        #dtRunCheck = datetime.datetime.now() - datetime.datetime.fromtimestamp(os.path.getmtime(lockPath))
        #if dtRunCheck.seconds/60.0 < 15.0:
        #    # We are going to assume a previous process is still running on the system.
        #    # exit gracefully.
        #    print 'ASSUMING PROCESS STILL RUNNING'
        #    sys.exit(0)
        #else:
        #    # We are assuming the process is no longer running on the system. Alow
        #    # the workflow to continue.
        #    print 'ALLOWING WORKFLOW TO CONINUE. REMOVING LOCK FILE'
        #    os.remove(lockPath)
        #    fileObj = open(lockPath,'w')
        #    fileObj.write('\"PID\"\n')
        #    fileObj.write(str(os.getpid()))
        #    fileObj.close()
    else:
        print 'LOCK FILE NOT FOUND.'
        # Write a LOCK file for this program.
        fileObj = open(lockPath, 'w')
        fileObj.write('\"PID\"\n')
        fileObj.write(str(os.getpid()))
        fileObj.close()

    # Pull extensive meta-data describing the job from the config file.
    configPath = str(jobData.jobDir) + "/setup.config"
    if not os.path.isfile(configPath):
        print "ERROR: Configuration file: " + configPath + " not found."
        sys.exit(1)
    try:
        staticData = configMod.readConfig(configPath)
    except:
        print "ERROR: Failure to read configuration file: " + configPath
        sys.exit(1)

    # Assign the SQL command from the config file into the jobData structure
    jobData.gSQL = staticData.gSQL

    # Check gages in directory to match what's in the database
    try:
        jobData.checkGages2(db)
    except:
        errMod.errOut(jobData)

    # Some house keeping here. If the calibration is already complete, throw an error.
    # Also ensure the spinup has been entered as complete. This is necessary for the
    # calibration to run.
    # also, if this is a re-initiation under a different user, require the new
    # user to enter a new contact that will be unpdated in the database.
    if int(jobData.spinComplete) != 1:
        # Check to see if optional spinup options were enabled. If so, update the spinup status.
        if staticData.coldStart == 1 or staticData.optSpinFlag != 0:
            print "Found optional spinup alternatives"
            jobData.spinComplete = 1
            try:
                db.updateSpinupStatus(jobData)
            except:
                errMod.errOut(jobData)
        else:
            jobData.errMsg = "ERROR: Spinup for job ID: " + str(jobData.jobID) + \
                             " is NOT complete. You must complete the spinup in order" + \
                             " to run calibration."
            errMod.errOut(jobData)

    if int(jobData.calibComplete) == 1:
        jobData.errMsg = "ERROR: Calibration for job ID: " + str(jobData.jobID) + \
                         " has already completed."
        errMod.errOut(jobData)

    if userTmp != jobData.owner:
        print "User: "******" is requesting to takeover jobID: " + \
              str(jobData.jobID) + " from owner: " + str(jobData.owner)
        strTmp = "Please enter new email address. Leave blank if no email " + \
                 "change is desired. NOTE if you leave both email and Slack " + \
                 "information blank, no change in contact will occur. Only " + \
                 "the owner will be modified:"
        newEmail = raw_input(strTmp)
        #strTmp = "Please enter Slack channel:"
        #newSlackChannel = raw_input(strTmp)
        #strTmp = "Please enter Slack token:"
        #newSlackToken = raw_input(strTmp)
        #strTmp = "Please enter Slack user name:"
        #newSlackUName = raw_input(strTmp)
        # V1.2 NOTE!!!!!
        # Given the automation of the workflow on Yellowstone, we are simply
        # keeping contact information the same, but only changing the ownership
        # of the workflow
        changeFlag = 1
        #if len(newSlackChannel) != 0 and len(newSlackToken) == 0:
        #    print "ERROR: You must specify an associated Slacker API token."
        #    sys.exit(1)
        #if len(newSlackChannel) != 0 and len(newSlackUName) == 0:
        #    print "ERROR: You must specify an associated Slacker user name."
        #    sys.exit(1)
        #if len(newSlackToken) != 0 and len(newSlackChannel) == 0:
        #    print "ERROR: You must specify an associated Slacker channel name."
        #    sys.exit(1)
        #if len(newSlackToken) != 0 and len(newSlackUName) == 0:
        #    print "ERROR: You must specify an associated Slacker user name."
        #    sys.exit(1)
        #if len(newSlackUName) != 0 and len(newSlackChannel) == 0:
        #    print "ERROR: You must specify an associated Slacker channel name."
        #    sys.exit(1)
        #if len(newSlackUName) != 0 and len(newSlackToken) == 0:
        #    print "ERROR: You must specify an associated Slacker API token."
        #    sys.exit(1)
        #if len(newSlackChannel) != 0 and len(newEmail) != 0:
        #    print "ERROR: You cannot specify both email and Slack for notifications."
        #    sys.exit(1)
        #if len(newSlackChannel) == 0 and len(newEmail) == 0:
        #    changeFlag = 0

        # PLACEHOLDER FOR CHECKING SLACK CREDENTIALS

        jobData.genMsg = "MSG: User: "******" Is Taking Over JobID: " + str(jobData.jobID) + \
                         " From Owner: " + str(jobData.owner)
        errMod.sendMsg(jobData)

        # If a new owner takes over, simply change the owner, but keep all
        # other contact information the same.
        newEmail = jobData.email
        newSlackChannel = jobData.slChan
        newSlackToken = jobData.slToken
        newSlackUName = jobData.slUser
        if not newEmail:
            newEmail = ''
        if not newSlackChannel:
            newSlackChannel = ''
            newSlackToken = ''

        try:
            db.updateJobOwner(jobData, userTmp, newEmail, newSlackChannel,
                              newSlackToken, newSlackUName, changeFlag)
        except:
            errMod.errOut(jobData)

    # Begin an "infinite" do loop. This loop will continue to loop through all
    # the basins until calibrations are complete. Basins are allowed ONE failure. A restart
    # will be attempted. If the restart fails again, a LOCK file is placed into the
    # run directory and an error email is sent to the user. Additionally, if the R calibration
    # code fails, a seperate LOCK file will be placed into the directory, and the user
    # will be notified about the failure.
    completeStatus = False

    # Create a "key" array. This array is of length [numBasins] and is initialized to 0.0.
    # Each array element can have the following values based on current model status:
    # 0.0 - Initial value
    # 0.25 - This is a special value for the first iteration. The initial default
    #        parameter values specified in the parameter table by the user are being
    #        applied and entered into the DB.
    # 0.5 - Model simulation in progress
    # 0.75 - The model simulation has completed. We are ready to run the R code to
    #        generate the next set of parameter values and enter evaluation statistics
    #        into the DB.
    # 0.90 - The R code is running to generate new parameters estimates. Python is
    #        also generating new files.
    # 1.0 - R/Python code is complete and param/stats have been enetered into the DB. Ready to
    #       run the next model iteration.
    # -0.1 - The R code to generate the initial parameter values has failed. CALIB.LOCK
    #        is put into place.
    # -0.25 - The workflow has found the model simulation to have failed.
    # -0.5 - Model simulation failed once and a restart is being ran.
    # -0.75 - The R/Python code to generate new parameters/stats has failed. CALIB.LOCK
    #         is put into place.
    # -1.0 - Model has failed twice. A RUN.LOCK file has been created.
    # Once all array elements are 1.0, then completeStatus goes to True, an entry into
    # the database occurs, and the program will complete.
    keySlot = np.empty([len(jobData.gages), int(jobData.nIter)])
    keySlot[:, :] = 0.0

    # Create an array to hold systme job ID values. This will only be used for
    # PBS as qstat has demonstrated slow behavior when doing a full qstat command.
    # We will track job ID values and do a qstat <jobID> and populate this array
    # to keep track of things.
    pbsJobId = np.empty([len(jobData.gages)], np.int64)
    pbsJobId[:] = -9999

    # NOTE this is different from the spinup. We have a 2D array of values to account
    # for all the iterations.
    entryValue = float(len(jobData.gages) * int(jobData.nIter))

    # Pull all the status values into the keySlot array.
    for basin in range(0, len(jobData.gages)):
        domainID = jobData.gageIDs[basin]

        if domainID == -9999:
            jobData.errMsg = "ERROR: Unable to locate domainID for gage: " + str(
                jobData.gages[basin])
            errMod.errOut(jobData)

        # We are going to pull all values for one basin, then place them into the array.
        # This is faster then looping over each iteration at a time.
        statusData = db.iterationStatus(jobData, domainID,
                                        str(jobData.gages[basin]))
        statusData = [list(item) for item in statusData]
        for iteration in range(0, int(jobData.nIter)):
            for iteration2 in range(0, int(jobData.nIter)):
                if statusData[iteration2][0] == iteration + 1:
                    keySlot[basin,
                            iteration] = float(statusData[iteration2][1])

    while not completeStatus:
        # Walk through calibration directories for each basin. Determine the status of
        # the model runs by the files available. If restarting, modify the
        # namelist files appropriately. Then, restart the model. If anything goes wrong, notifications
        # will either be emailed per the user's info, or piped to Slack for group
        # notification. A simulation is deemed complete when all expected RESTART
        # files are present and there are no jobs running for the basin. The parameter
        # estimation is deemed complete when CALIB_ITER.COMPLETE is present and
        # no calibration jobs for this basin are running.
        # Loop through each basin. Perform the following steps:
        # 1.) If status is -0.5,0.0, or 0.5, check to see if the model is running
        #     for this basin or if parameter estimation is occurring.
        # 2.) If the model is not running, check for expected output and perform
        #     necessary logistics. Continue to parameter estimation. Note that
        #     for the first iteration, R needs to be ran before the model to get
        #     initial default parameters.
        # 3.) Once the model is complete, the status goes to 0.75.
        # 4.) Fire off a job to run R/Python code for parameter estimation, generation
        #     , plot generation, and generation of model eval statistics to be
        #     entered into the DB. Status goes to 0.90.
        # 5.) Once the calibration job is complete, the status goes to 1.0 and the
        #     workflow is ready for the next iteration.
        # If the status goes to -1.0, a LOCK file is created and must be manually
        # removed from the user. Once the program detects this, it will restart the
        # model and the status goes back to 0.5.
        # If the status goes to -0.75, a LOCK file is created and needs to be removed
        # manually by the user before the workflow can continue.

        for basin in range(0, len(jobData.gages)):
            for iteration in range(0, int(jobData.nIter)):
                # Holding onto the status value before the workflow iterates for checking below.
                keyStatusCheck1 = keySlot[basin, iteration]
                # If the status is already 1.0, then continue the loop as now work needs to be done.
                if keyStatusCheck1 == 1.0:
                    continue
                else:
                    try:
                        calibMod.runModel(jobData, staticData, db,
                                          jobData.gageIDs[basin],
                                          jobData.gages[basin], keySlot, basin,
                                          iteration, pbsJobId)
                    except:
                        errMod.errOut(jobData)
                # Temporary for Cheyenne to slow down the strain on PBS.
                keyStatusCheck2 = keySlot[basin, iteration]
                # Put some spacing between launching model simulations to slow down que geting
                # overloaded.
                if keyStatusCheck1 == 0.25 and keyStatusCheck2 == 0.5:
                    time.sleep(15)
                if keyStatusCheck1 == 0.0 and keyStatusCheck2 == 0.5:
                    time.sleep(15)
                if keyStatusCheck1 == 0.5 and keyStatusCheck2 == 0.5:
                    time.sleep(15)
                if keyStatusCheck1 == 0.0 and keyStatusCheck2 == 0.25:
                    time.sleep(15)
                if keyStatusCheck1 == 0.5 and keyStatusCheck2 == 0.75:
                    time.sleep(15)
                if keyStatusCheck1 == 0.75 and keyStatusCheck2 == 0.9:
                    time.sleep(15)
                if keyStatusCheck1 == 0.5 and keyStatusCheck2 == 0.9:
                    time.sleep(15)
                if keyStatusCheck1 == 0.9 and keyStatusCheck2 == 0.9:
                    time.sleep(15)
                if keyStatusCheck1 == 0.9 and keyStatusCheck2 == 1.0:
                    time.sleep(15)

                # TEMPORARY FOR CHEYENNE
                # Check to make sure program hasn't passed a prescribed time limit. If it has,
                # exit gracefully.
                #timeCheckStamp = datetime.datetime.now()
                #programDtCheck = timeCheckStamp - begTimeStamp
                #if programDtCheck.seconds/60.0 > 90.0:
                #    # 90-minutes)
                #    try:
                #        fileObj = open(lockPath,'a')
                #        fileObj.write('WORKFLOW HAS HIT TIME LIMIT - EXITING....\n')
                #        fileObj.close()
                #    except:
                #        jobData.errMsg = "ERROR: Unable to update workflow LOCK file: " + lockPath
                #        errMod.errOut(jobData)

        # Check to see if program requirements have been met.
        if keySlot.sum() == entryValue:
            jobData.calibComplete = 1
            try:
                db.updateCalibStatus(jobData)
            except:
                errMod.errout(jobData)
            jobData.genMsg = "CALIBRATION FOR JOB ID: " + str(
                jobData.jobID) + " COMPLETE."
            errMod.sendMsg(jobData)
            completeStatus = True

        # Open the Python LOCK file. Write a blank line to the file and close it.
        # This action will simply modify the file modification time while only adding
        # a blank line.
        try:
            fileObj = open(lockPath, 'a')
            fileObj.write('\n')
            fileObj.close()
        except:
            jobData.errMsg = "ERROR: Unable to update workflow LOCK file: " + lockPath
            errMod.errOut(jobData)

    # Remove LOCK file
    os.remove(lockPath)