Ejemplo n.º 1
0
    def makeInputDat(self):
        # Adjust memory value in input.dat
        logging.info("Calculating memory value for input.dat...")
        adjustedMem = int(((self.mem * self.memAdjust) / self.cpus) / 1000000)
        newmem = "memory " + str(adjustedMem) + " MB"

        # Creates the input.dat file in the job folder
        from jobConfig import JobConfig
        self.jobConfig = JobConfig()
        intder = self.jobConfig.intderIn(self.displacements)
        if intder != None:
            f = open('intder.in', 'w')
            f.write(intder)
            f.flush()
            f.close()

            # Run Intder2005 to produce the geometries
            logging.info("Running Intder2005...")
            myinput = open('intder.in')
            myoutput = open('intder.out', 'w')
            p = subprocess.Popen("Intder2005.x",
                                 stdin=myinput,
                                 stdout=myoutput)
            p.wait()
            myoutput.flush()
            myoutput.close()
            logging.info("Finished running Intder2005...")

            # Read the intder output and produce an input.dat file from the geometries
            logging.info("Reading file07...")
            f = open('file07')
            file07 = f.readlines()
            f.close
        else:
            file07 = None

        if len(self.errors) > 0:
            inputdat = self.jobConfig.inputDat(newmem, self.jobCategory,
                                               file07, self.errors[-1])
        else:
            inputdat = self.jobConfig.inputDat(newmem, self.jobCategory,
                                               file07)

        # Write input.dat contents to file
        f = open('input.dat', 'w')
        f.write(inputdat)
        # Append print_variables() call as a preventive measure, since that is
        #    where we get the final energy value.
        f.write("\nprint_variables()\n")
        f.flush()
        f.close()
        logging.info("File input.dat written to disk.")
Ejemplo n.º 2
0
	def makeInputDat(self):
		# Adjust memory value in input.dat
		logging.info("Calculating memory value for input.dat...")
		adjustedMem = int(((self.mem * self.memAdjust) / self.cpus)/1000000)
		adjustedMem = max(adjustedMem, self.memMax)
		newmem = "memory " + str(adjustedMem) + " MB"

		# Creates the input.dat file in the job folder
		from jobConfig import JobConfig
		self.jobConfig = JobConfig()
		intder = self.jobConfig.intderIn(self.displacements)
		if intder != None:
			f = open('intder.in', 'w')
			f.write(intder)
			f.flush()
			f.close()
		
		
			# Run Intder2005 to produce the geometries
			logging.info("Running Intder2005...")
			myinput = open('intder.in')
			myoutput = open('intder.out', 'w')
			p = subprocess.Popen("Intder2005.x", stdin=myinput, stdout=myoutput)
			p.wait()
			myoutput.flush()
			myoutput.close()
			logging.info("Finished running Intder2005...")

			# Read the intder output and produce an input.dat file from the geometries
			logging.info("Reading file07...")
			f = open('file07')
			file07 = f.readlines()
			f.close
		else:
			file07 = None

		if len(self.errors) > 0:
			inputdat = self.jobConfig.inputDat(newmem, self.jobCategory, file07, self.errors[-1])
			self.cpuOverride = self.jobConfig.checkThreads(self.errors[-1])
		else:
			inputdat = self.jobConfig.inputDat(newmem, self.jobCategory, file07)
			self.cpuOverride = None

		# Write input.dat contents to file
		f=open('input.dat', 'w')
		f.write(inputdat)
		# Append print_variables() call as a preventive measure, since that is
		#    where we get the final energy value.
		f.write("\nprint_variables()\n")
		f.flush()
		f.close()
		logging.info("File input.dat written to disk.")
Ejemplo n.º 3
0
class Myriad:
    def __init__(self):
        self.config = []
        self.maestroAPIGateway = None
        self.myriadJobsFolderOnAWS = None
        self.cpus = 1
        self.mem = 1
        self.displacements = None
        self.jobID = None
        self.executionID = None
        self.jobGroup = None
        self.jobCategory = None
        self.jobFolder = None
        self.errors = []
        self.ip = None
        self.jobConfig = None
        self.parsedJob = None
        self.jobStarted = None
        self.jobName = None
        self.ami = None
        self.instanceID = None
        self.region = None
        self.cmdBacklog = []

        self.memAdjust = 0.75  # Only use 75% of the available memory

    def getInstanceID(self):
        # Load the configuration values from file
        f = open('instance-id.txt')
        lines = f.readlines()
        f.close()
        return lines[0].strip()

    def getAmi(self):
        # Load the configuration values from file
        f = open('ami-id.txt')
        lines = f.readlines()
        f.close()
        return lines[0].strip()

    def getRegion(self):
        # lazy load region value
        if self.region == None:
            r = requests.get(
                'http://169.254.169.254/latest/dynamic/instance-identity/document'
            )
            if r.status_code == 200:
                j = json.loads(r.text)
                self.region = str(j['region'])
        return self.region

    def loadEndpoints(self):
        # Load the configuration values from file
        f = open('config.txt')
        lines = f.readlines()
        f.close()
        for line in lines:
            if line.startswith('Maestro_api_gateway '):
                self.maestroAPIGateway = line.split(' ')[1].strip()
                logging.info('JobRunner GET endpoint set to ' +
                             self.maestroAPIGateway)
            elif line.startswith('Myriad_AWS '):
                self.myriadJobsFolderOnAWS = line.split(' ')[1].strip()
                logging.info('Myriad AWS endpoint set to ' +
                             self.myriadJobsFolderOnAWS)

    def getJob(self, jobGroup=None, jobCategory=None):
        logging.info("Requesting a new job from " +
                     str(self.maestroAPIGateway))
        if jobGroup != None and jobCategory != None:
            logging.info("Job group set to " + str(jobGroup))
            logging.info("Job category set to " + str(jobCategory))
            p = {"jobGroup": jobGroup, "jobCategory": jobCategory}
            r = requests.get(self.maestroAPIGateway, params=p)
        elif jobGroup != None and jobCategory == None:
            logging.info("Job group set to " + str(jobGroup))
            p = {"jobGroup": jobGroup}
            r = requests.get(self.maestroAPIGateway, params=p)
        elif jobGroup == None and jobCategory != None:
            logging.info("Job category set to " + str(jobCategory))
            p = {"jobCategory": jobCategory}
            r = requests.get(self.maestroAPIGateway, params=p)
        else:
            logging.info("No job group or sub group specified")
            r = requests.get(self.maestroAPIGateway)

        # Check for good HTTP response
        if r.status_code == 200:
            logging.info("*** Begin get job response ***")
            logging.info(r.text)
            logging.info("*** End get job response ***")

            # Check for logical error in response
            if not "errorMessage" in r.text:
                logging.info("Good response:\n" + str(r.text))
                return self.parseJob(r.text)
            else:
                # logic error
                logging.warn("Error from web service:\n" + str(r.text))
                return ResultCode.failure
        else:
            # HTTP error
            logging.warn("HTTP error: " + str(r.status_code))
            return ResultCode.failure

    def parseJob(self, job):
        # The response should look something like this...
        #	{
        #	  "JobID": "12345",
        #	  "JobGroup": "NS2",
        #	  "JobCategory": "5Z",
        #	  "JobName": "NS2-5Z-1",
        #	  "JobDefinition": {"Displacements":"-1,-1,-2"},
        #	  "Created": "2016-07-17 15:26:45"
        #	}
        logging.info("Parsing job")
        self.parsedJob = json.loads(job)
        self.jobID = self.parsedJob['JobID']
        self.executionID = self.parsedJob['ExecutionID']
        self.jobGroup = self.parsedJob['JobGroup']
        self.jobCategory = self.parsedJob['JobCategory']
        self.jobName = self.parsedJob['JobName']
        self.displacements = self.parsedJob['JobDefinition']['Displacements']
        return ResultCode.success

    def getJobSupportFiles(self):
        result = ResultCode.success
        # download job-specific script(s) to the parent folder
        url = self.myriadJobsFolderOnAWS + "/" + self.jobGroup + "/jobConfig.py"
        logging.info("Retrieving job config from " + url)
        r = requests.get(url)

        # Check for web errors (404, 500, etc.)
        if "<html>" in r.text:
            logging.warn("Bad jobConfig.py")
            result = ResultCode.failure
        # logging.info(r.text)

        f = open("jobConfig.py", "w")
        f.write(r.text)
        f.flush()
        f.close()
        return result

    def getSystemSpecs(self):
        self.cpus = psutil.cpu_count()
        cpus = self.readTag('cpus')
        logging.info('Number of cores set to ' + str(self.cpus))
        if cpus != None:
            logging.info('Overriding number of cores to: ' + str(self.cpus))
            self.cpus = int(cpus)
        os.environ["OMP_NUM_THREADS"] = str(self.cpus)
        os.environ["MKL_NUM_THREADS"] = str(self.cpus)
        self.mem = psutil.virtual_memory().available
        logging.info('Bytes of available memory ' + str(self.mem))

    def recordDiskUsage(self):
        myoutput = open('diskspace.out', 'w')
        df = subprocess.Popen("df", stdout=myoutput)
        myoutput.flush()
        myoutput.close()

    def shutdownMyriad(self):
        if os.path.isfile('../shutdown.myriad'):
            logging.info(
                'shutdownMyriad() found shutdown file. Returning True')
            return True
        r = requests.get(
            'http://169.254.169.254/latest/meta-data/spot/termination-time')
        if r.status_code == 200:
            if re.search('.*T.*Z', r.text):
                logging.info(
                    'shutdownMyriad() determined that AWS is terminating this spot instance. Returning True'
                )
                f = open('../shutdown.myriad', 'w')
                f.write(' ')
                f.flush()
                f.close()
                return True
        return False

    def runPsi4(self):
        self.jobStarted = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        result = ResultCode.success
        myoutput = open('psi4.out', 'w')
        myerror = open('psi4.err', 'w')
        exitcode = 0
        try:
            self.postJobStatus(True, "Started")
            p = subprocess.Popen("psi4", stdout=myoutput, stderr=myerror)
            waiting = True
            waitCounter = 0
            shutdown = False
            while waiting:
                try:
                    exitcode = p.wait(5)
                    logging.info("Call to p.wait() completed")
                    waiting = False
                except subprocess.TimeoutExpired:
                    waiting = True
                    waitCounter = waitCounter + 1
                    if self.shutdownMyriad():
                        p.kill()
                        self.postJobStatus(True, "Terminated")
                        exitcode = 1
                        shutdown = True
                        waiting = False
                    else:
                        if waitCounter == 60:
                            waitCounter = 0
                            self.postJobStatus(True, "Running")

            logging.info("psi4 exited with exit code of " + str(exitcode))
            if exitcode == 0:
                result = ResultCode.success
            else:
                if shutdown:
                    logging.info('Setting result code to ResultCode.shutdown')
                    result = ResultCode.shutdown
                else:
                    result = ResultCode.failure

        except RuntimeError as e:
            self.postJobStatus(False, str(e))
            result = ResultCode.failure

        finally:
            myoutput.flush()
            myerror.flush()
            myoutput.close()
            myerror.close()
            self.recordDiskUsage()

        return result

    def uploadResults(self):
        logging.info("Extracting results from output.dat")
        f = open("output.dat", "r")
        lines = f.readlines()
        energy = None
        for line in reversed(lines):
            if "CURRENT ENERGY" in line:
                energy = line.split(">")
                energy = energy[1].strip()
                break
        f.close()
        logging.info("Energy = " + str(energy))
        if energy == None:
            logging.warn("No energy found")
            return ResultCode.failure

        logging.info("Posting results to the web service at " +
                     str(self.maestroAPIGateway))
        n = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        j = {
            "JobID": self.jobID,
            "Started": self.jobStarted,
            "Completed": n,
            "JobResults": energy,
            "job": self.parsedJob
        }
        logging.info("Job results encoded as: " + str(j))
        r = requests.post(self.maestroAPIGateway, json=j)
        # Check for good HTTP response
        if r.status_code == 200:
            # Check for logical error in response
            if not "errorMessage" in r.text:
                logging.info("Good response:\n" + str(r.text))
            else:
                # logic error
                logging.warn("Error from web service:\n" + str(r.text))
                return ResultCode.failure
        else:
            # HTTP error
            logging.warn("HTTP error: " + str(r.status_code))
            return ResultCode.failure

    def clearScratch(self):
        logging.info("Clearing the scratch folder. Some errors are normal.")
        folder = os.environ['PSI_SCRATCH']
        for the_file in os.listdir(folder):
            file_path = os.path.join(folder, the_file)
            try:
                if os.path.isfile(file_path):
                    os.unlink(file_path)
                elif os.path.isdir(file_path):
                    shutil.rmtree(file_path)
            except Exception as e:
                logging.warn(e)
        logging.info("Finished clearing the scratch folder.")

    def makeJobFolder(self):
        self.jobFolder = self.jobName + "_" + datetime.datetime.now().strftime(
            "%Y%m%d_%H%M%S_" + str(self.jobID))
        os.mkdir(self.jobFolder)
        os.chdir(self.jobFolder)

    def closeJobFolder(self):
        os.chdir("..")

    def makeInputDat(self):
        # Adjust memory value in input.dat
        logging.info("Calculating memory value for input.dat...")
        adjustedMem = int(((self.mem * self.memAdjust) / self.cpus) / 1000000)
        newmem = "memory " + str(adjustedMem) + " MB"

        # Creates the input.dat file in the job folder
        from jobConfig import JobConfig
        self.jobConfig = JobConfig()
        intder = self.jobConfig.intderIn(self.displacements)
        if intder != None:
            f = open('intder.in', 'w')
            f.write(intder)
            f.flush()
            f.close()

            # Run Intder2005 to produce the geometries
            logging.info("Running Intder2005...")
            myinput = open('intder.in')
            myoutput = open('intder.out', 'w')
            p = subprocess.Popen("Intder2005.x",
                                 stdin=myinput,
                                 stdout=myoutput)
            p.wait()
            myoutput.flush()
            myoutput.close()
            logging.info("Finished running Intder2005...")

            # Read the intder output and produce an input.dat file from the geometries
            logging.info("Reading file07...")
            f = open('file07')
            file07 = f.readlines()
            f.close
        else:
            file07 = None

        if len(self.errors) > 0:
            inputdat = self.jobConfig.inputDat(newmem, self.jobCategory,
                                               file07, self.errors[-1])
        else:
            inputdat = self.jobConfig.inputDat(newmem, self.jobCategory,
                                               file07)

        # Write input.dat contents to file
        f = open('input.dat', 'w')
        f.write(inputdat)
        # Append print_variables() call as a preventive measure, since that is
        #    where we get the final energy value.
        f.write("\nprint_variables()\n")
        f.flush()
        f.close()
        logging.info("File input.dat written to disk.")

    def postJobStatus(self, status, message=None):
        n = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        logging.info("Posting job status to " + str(self.maestroAPIGateway))
        if status == True:
            statusStr = "Success"
        else:
            statusStr = "Failure"
        if message == None:
            j = {
                "ExecutionID": self.executionID,
                "LastUpdate": n,
                "Status": statusStr,
                "job": self.parsedJob
            }
        else:
            j = {
                "JobID": self.jobID,
                "LastUpdate": n,
                "Status": statusStr,
                "Message": message,
                "job": self.parsedJob
            }
        logging.info("Job status encoded as: " + str(j))
        try:
            r = requests.put(self.maestroAPIGateway, json=j)
        except:
            logging.warn("Error posting status. Ignoring.")

        # If there's a failed tagging command in the queue, pop it and run it
        if len(self.cmdBacklog) > 0:
            command = self.cmdBacklog.pop()
            self.runCommand(command)

    def zipJobFolder(self):
        # Get IP address
        f = open('ip.txt')
        self.ip = f.readline().strip()
        f.close()
        if self.ip == None:
            self.ip = ""

        try:
            logging.info("Compressing job folder...")
            myZipFile = zipfile.ZipFile(
                "ip_" + self.ip + "_" + self.jobFolder + ".zip", "w")
            listing = os.listdir(self.jobFolder)
            for f in listing:
                myZipFile.write(self.jobFolder + "/" + f)
            myZipFile.close()
            logging.info("Job folder compressed. Removing original...")
            shutil.rmtree(self.jobFolder)
            logging.info("Done removing original job folder")
        except Exception as e:
            logging.warn("Error compressing job folder: " + str(e))

    def readTag(self, key):
        # aws ec2 describe-tags --filters "Name=resource-id,Values=i-1234567890abcdef8" "Name=key,Values=threads"
        # 'Key="ExecutionID",Value="3bd99202-5d7f-49c2-a350-f1fdf2235ad3"'
        command = 'aws ec2 describe-tags --region ' + self.region + ' --filters "Name=resource-id,Values=' + str(
            self.ami) + '" "Name=key,Values=' + str(key) + '"'
        proc = subprocess.Popen(command,
                                shell=True,
                                stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE)
        stdout, stderr = proc.communicate()
        tag = json.loads(stdout)
        if 'Tags' in tag and len(
                tag['Tags']) > 0 and 'Value' in tag['Tags'][0]:
            return str(tag['Tags'][0]['Value'])
        else:
            return None

    def doModifyTag(self, action, key, value):
        # aws ec2 delete-tags --resources ami-78a54011 --region us-east-1 --tags Key=Stack
        # aws ec2 create-tags --resources ami-78a54011 --region us-east-1 --tags Key=Stack,Value=foo
        # 'Key="ExecutionID",Value="3bd99202-5d7f-49c2-a350-f1fdf2235ad3"'
        command = "aws ec2 " + action + " --resources " + str(
            self.instanceID) + " --region " + str(
                self.region) + " --tags 'Key=" + str(key)
        if value != None:
            command += ',Value="' + str(value) + '"'
        command += "'"
        self.runCommand(command)

    def runCommand(self, command):
        logging.info("Invoking " + str(command))
        process = subprocess.Popen(command,
                                   stdout=subprocess.PIPE,
                                   stderr=subprocess.PIPE,
                                   shell=True)
        out, err = process.communicate()
        if out:
            logging.info("doModifyTag() subprocess.Popen stdout...")
            logging.info(out)
        if err:
            logging.warn("doModifyTag() subprocess.Popen stderr...")
            logging.warn(err)
        logging.info("doModifyTag() subprocess.Popen returncode...")
        logging.info(process.returncode)

        # If we get back a RequestLimitExceeded error make a note to try again later...
        if process.returncode == 255 and "RequestLimitExceeded" in str(err):
            self.cmdBacklog.append(command)

    def tagInstance(self):
        self.downloadCredentials()
        self.doModifyTag("create-tags", "Name", self.jobName)
        self.doModifyTag("create-tags", "ExecutionID", self.executionID)
        self.doModifyTag("create-tags", "JobID", self.jobID)
        self.doModifyTag("create-tags", "StartTime",
                         datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
        self.doModifyTag("create-tags", "Displacements", self.displacements)
        #self.displacements

    def untagInstance(self):
        self.downloadCredentials()
        self.doModifyTag("create-tags", "Name", "Waiting")
        self.doModifyTag("delete-tags", "ExecutionID", None)
        self.doModifyTag("delete-tags", "JobID", None)
        self.doModifyTag("delete-tags", "StartTime", None)
        self.doModifyTag("delete-tags", "Displacements", None)

    def downloadCredentials(self):
        logging.info("Retrieving credentials...")
        r = requests.get(
            "http://169.254.169.254/latest/meta-data/iam/security-credentials/S3FullAccess"
        )
        if r.status_code == 200:
            j = json.loads(r.text)
            os.environ["AWS_ACCESS_KEY_ID"] = str(j['AccessKeyId'])
            os.environ["AWS_SECRET_ACCESS_KEY"] = str(j['SecretAccessKey'])
            os.environ["AWS_SECURITY_TOKEN"] = str(j['Token'])
            logging.info("Credentials exported to environment variables")
        else:
            logging.warn("Failed to retrieve credentials")

    # Main
    def runOnce(self, jobGroup=None, jobCategory=None, error=None):
        logging.info("Myriad.runOnce invoked...")
        logging.info("Job group = " + str(jobGroup))
        logging.info("Job sub group = " + str(jobCategory))
        logging.info("Error = " + str(error))
        self.jobGroup = jobGroup
        self.jobCategory = jobCategory

        # if we have seen this error before, bail out.
        # We couldn't fix it the first time. Why should this time be any different?
        if error != None and error in self.errors:
            self.postJobStatus(False, "Error repeated: " + str(error))
            return ResultCode.failure

        # add the error condition to the stack of prior errors if we've never seen it before
        if error != None:
            self.errors.append(error)

        # load the endpoints for web service calls and get ami-id for this machine
        self.loadEndpoints()
        self.ami = self.getAmi()
        self.instanceID = self.getInstanceID()
        self.region = self.getRegion()

        # if no error, get a new job.
        # if there is an error code, we're going to re-run the job we have
        if error == None:
            result = self.getJob(self.jobGroup, self.jobCategory)
        else:
            logging.info("Running current job again to correct for errors")
            result = ResultCode.success

        if result == ResultCode.success:
            newerror = None
            result = self.getJobSupportFiles()
            if result == ResultCode.success:
                self.getSystemSpecs()
                self.clearScratch()
                self.makeJobFolder()
                self.makeInputDat()
                self.tagInstance()
                result = self.runPsi4()
                if result == ResultCode.success:
                    logging.info("runPsi4() returned success code")
                    while self.uploadResults() == ResultCode.failure:
                        logging.info(
                            "Failure uploading results. Retrying in 60 seconds..."
                        )
                        time.sleep(60)
                else:
                    if result != ResultCode.shutdown:
                        # Check for known error situations in output.dat
                        logging.warn(
                            "runPsi4() returned failure code. Checking for known errors"
                        )
                        newerror = self.jobConfig.checkError()
                        self.postJobStatus(False,
                                           "PSI4 error: " + str(newerror))
                        logging.info("CheckError() result: " + str(newerror))

                self.closeJobFolder()
                if result != ResultCode.shutdown:
                    self.zipJobFolder()
                    self.clearScratch()

                # if we encounter a known error, try the job again and compensate
                if newerror != None:
                    logging.info("Re-executing job due to known error: " +
                                 str(newerror))
                    result = self.runOnce(self.jobGroup, self.jobCategory,
                                          newerror)
            else:
                logging.warn("Error retrieving support files")

        else:
            result = ResultCode.noaction

        self.untagInstance()
        return result
Ejemplo n.º 4
0
class Myriad:
	
	def __init__(self):
		self.config = []
		self.maestroAPIGateway = None
		self.myriadJobsFolderOnAWS = None
		self.cpus = 1
		self.mem = 1
		self.displacements = None
		self.jobID = None
		self.executionID = None
		self.jobGroup = None
		self.jobCategory = None
		self.jobFolder = None
		self.errors = []
		self.ip = None
		self.jobConfig = None
		self.parsedJob = None
		self.jobStarted = None
		self.jobName = None
		self.ami = None
		self.instanceID = None
		self.region = None
		self.cmdBacklog = []

		self.memAdjust = 0.75  # Only use 75% of the available memory

	def getInstanceID(self):
		# Load the configuration values from file
		f = open('instance-id.txt')
		lines = f.readlines()
		f.close()
		return lines[0].strip()

	def getAmi(self):
		# Load the configuration values from file
		f = open('ami-id.txt')
		lines = f.readlines()
		f.close()
		return lines[0].strip()

	def getRegion(self):
		# lazy load region value
		if self.region == None:
			r = requests.get('http://169.254.169.254/latest/dynamic/instance-identity/document')
			if r.status_code == 200:
				j = json.loads(r.text)
				self.region = str(j['region'])
		return self.region

	def loadEndpoints(self):
		# Load the configuration values from file
		f = open('config.txt')
		lines = f.readlines()
		f.close()
		for line in lines:
			if line.startswith('Maestro_api_gateway '):
				self.maestroAPIGateway = line.split(' ')[1].strip()
				logging.info('JobRunner GET endpoint set to ' + self.maestroAPIGateway)
			elif line.startswith('Myriad_AWS '):
				self.myriadJobsFolderOnAWS = line.split(' ')[1].strip()
				logging.info('Myriad AWS endpoint set to ' + self.myriadJobsFolderOnAWS)

	def getJob(self, jobGroup=None, jobCategory=None):
		logging.info("Requesting a new job from " + str(self.maestroAPIGateway))
		if jobGroup != None and jobCategory != None:
			logging.info("Job group set to " + str(jobGroup))
			logging.info("Job category set to " + str(jobCategory))
			p = {"jobGroup": jobGroup, "jobCategory": jobCategory}
			r = requests.get(self.maestroAPIGateway, params=p)
		elif jobGroup != None and jobCategory == None:
			logging.info("Job group set to " + str(jobGroup))
			p = {"jobGroup": jobGroup}
			r = requests.get(self.maestroAPIGateway, params=p)
		elif jobGroup == None and jobCategory != None:
			logging.info("Job category set to " + str(jobCategory))
			p = {"jobCategory": jobCategory}
			r = requests.get(self.maestroAPIGateway, params=p)
		else:
			logging.info("No job group or sub group specified")
			r = requests.get(self.maestroAPIGateway)

		# Check for good HTTP response
		if r.status_code == 200:
			logging.info("*** Begin get job response ***")
			logging.info(r.text)
			logging.info("*** End get job response ***")

			# Check for logical error in response
			if not "errorMessage" in r.text:
				logging.info("Good response:\n" + str(r.text))
				return self.parseJob(r.text)
			else:
				# logic error
				logging.warn("Error from web service:\n" + str(r.text))
				return ResultCode.failure
		else:
			# HTTP error
			logging.warn("HTTP error: " + str(r.status_code))
			return ResultCode.failure

	def parseJob(self, job):
		# The response should look something like this...
		#	{
		#	  "JobID": "12345",
		#	  "JobGroup": "NS2",
		#	  "JobCategory": "5Z",
		#	  "JobName": "NS2-5Z-1",
		#	  "JobDefinition": {"Displacements":"-1,-1,-2"},
		#	  "Created": "2016-07-17 15:26:45"
		#	}
		logging.info("Parsing job")
		self.parsedJob = json.loads(job)
		self.jobID = self.parsedJob['JobID']
		self.executionID = self.parsedJob['ExecutionID']
		self.jobGroup = self.parsedJob['JobGroup']
		self.jobCategory = self.parsedJob['JobCategory']
		self.jobName = self.parsedJob['JobName']
		self.displacements = self.parsedJob['JobDefinition']['Displacements']
		return ResultCode.success

	def getJobSupportFiles(self):
		result = ResultCode.success
		# download job-specific script(s) to the parent folder
		url = self.myriadJobsFolderOnAWS + "/" + self.jobGroup + "/jobConfig.py"
		logging.info("Retrieving job config from " + url)
		r = requests.get(url)

		# Check for web errors (404, 500, etc.)
		if "<html>" in r.text:
			logging.warn("Bad jobConfig.py")
			result = ResultCode.failure
		# logging.info(r.text)

		f = open("jobConfig.py", "w")
		f.write(r.text)
		f.flush()
		f.close()
		return result

	def getSystemSpecs(self):
		self.cpus = psutil.cpu_count()
		cpus = self.readTag('cpus')
		logging.info('Number of cores set to ' + str(self.cpus))
		if cpus != None:
			logging.info('Overriding number of cores to: ' + str(self.cpus))
			self.cpus = int(cpus)
		os.environ["OMP_NUM_THREADS"] = str(self.cpus)
		os.environ["MKL_NUM_THREADS"] = str(self.cpus)
		self.mem = psutil.virtual_memory().available
		logging.info('Bytes of available memory ' + str(self.mem))

	def recordDiskUsage(self):
		myoutput = open('diskspace.out', 'w')
		df = subprocess.Popen("df", stdout=myoutput)
		myoutput.flush()
		myoutput.close()
		
	def shutdownMyriad(self):
		if os.path.isfile('../shutdown.myriad'):
			logging.info('shutdownMyriad() found shutdown file. Returning True')
			return True
		r = requests.get('http://169.254.169.254/latest/meta-data/spot/termination-time')
		if r.status_code == 200:
			if re.search('.*T.*Z', r.text):
				logging.info('shutdownMyriad() determined that AWS is terminating this spot instance. Returning True')
				f = open('../shutdown.myriad', 'w')
				f.write(' ')
				f.flush()
				f.close()
				return True
		return False

	def runPsi4(self):
		self.jobStarted = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
		result = ResultCode.success
		myoutput = open('psi4.out', 'w')
		myerror = open('psi4.err', 'w')
		exitcode = 0
		try:
			self.postJobStatus(True, "Started")
			p = subprocess.Popen("psi4", stdout=myoutput, stderr=myerror)
			waiting = True
			waitCounter = 0
			shutdown = False
			while waiting:
				try:
					exitcode = p.wait(5)
					logging.info("Call to p.wait() completed")
					waiting = False
				except subprocess.TimeoutExpired:
					waiting = True
					waitCounter = waitCounter + 1
					if self.shutdownMyriad():
						p.kill()
						self.postJobStatus(True, "Terminated")
						exitcode = 1
						shutdown = True
						waiting = False
					else:
						if waitCounter == 60:
							waitCounter = 0
							self.postJobStatus(True, "Running")

			logging.info("psi4 exited with exit code of " + str(exitcode))
			if exitcode == 0:
				result = ResultCode.success
			else:
				if shutdown:
					logging.info('Setting result code to ResultCode.shutdown')
					result = ResultCode.shutdown
				else:
					result = ResultCode.failure

		except RuntimeError as e:
			self.postJobStatus(False, str(e))
			result = ResultCode.failure

		finally:
			myoutput.flush()
			myerror.flush()
			myoutput.close()
			myerror.close()
			self.recordDiskUsage()
			
		return result

	def uploadResults(self):
		logging.info("Extracting results from output.dat")
		f = open("output.dat", "r")
		lines = f.readlines()
		energy = None
		for line in reversed(lines):
			if "CURRENT ENERGY" in line:
				energy = line.split(">")
				energy = energy[1].strip()
				break
		f.close()
		logging.info("Energy = " + str(energy))
		if energy == None:
			logging.warn("No energy found")
			return ResultCode.failure

		logging.info("Posting results to the web service at " + str(self.maestroAPIGateway))
		n = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
		j = { "JobID" : self.jobID, "Started" : self.jobStarted, "Completed" : n, "JobResults" : energy, "job" : self.parsedJob }
		logging.info("Job results encoded as: " + str(j))
		r = requests.post(self.maestroAPIGateway, json=j)
		# Check for good HTTP response
		if r.status_code == 200:
			# Check for logical error in response
			if not "errorMessage" in r.text:
				logging.info("Good response:\n" + str(r.text))
			else:
				# logic error
				logging.warn("Error from web service:\n" + str(r.text))
				return ResultCode.failure
		else:
			# HTTP error
			logging.warn("HTTP error: " + str(r.status_code))
			return ResultCode.failure

	def clearScratch(self):
		logging.info("Clearing the scratch folder. Some errors are normal.")
		folder = os.environ['PSI_SCRATCH']
		for the_file in os.listdir(folder):
			file_path = os.path.join(folder, the_file)
			try:
				if os.path.isfile(file_path):
					os.unlink(file_path)
				elif os.path.isdir(file_path):
					shutil.rmtree(file_path)
			except Exception as e:
				logging.warn(e)
		logging.info("Finished clearing the scratch folder.")

	def makeJobFolder(self):
		self.jobFolder = self.jobName + "_" + datetime.datetime.now().strftime("%Y%m%d_%H%M%S_"+str(self.jobID))
		os.mkdir(self.jobFolder)
		os.chdir(self.jobFolder)

	def closeJobFolder(self):
		os.chdir("..")

	def makeInputDat(self):
		# Adjust memory value in input.dat
		logging.info("Calculating memory value for input.dat...")
		adjustedMem = int(((self.mem * self.memAdjust) / self.cpus)/1000000)
		newmem = "memory " + str(adjustedMem) + " MB"

		# Creates the input.dat file in the job folder
		from jobConfig import JobConfig
		self.jobConfig = JobConfig()
		intder = self.jobConfig.intderIn(self.displacements)
		if intder != None:
			f = open('intder.in', 'w')
			f.write(intder)
			f.flush()
			f.close()
		
		
			# Run Intder2005 to produce the geometries
			logging.info("Running Intder2005...")
			myinput = open('intder.in')
			myoutput = open('intder.out', 'w')
			p = subprocess.Popen("Intder2005.x", stdin=myinput, stdout=myoutput)
			p.wait()
			myoutput.flush()
			myoutput.close()
			logging.info("Finished running Intder2005...")

			# Read the intder output and produce an input.dat file from the geometries
			logging.info("Reading file07...")
			f = open('file07')
			file07 = f.readlines()
			f.close
		else:
			file07 = None

		if len(self.errors) > 0:
			inputdat = self.jobConfig.inputDat(newmem, self.jobCategory, file07, self.errors[-1])
		else:
			inputdat = self.jobConfig.inputDat(newmem, self.jobCategory, file07)

		# Write input.dat contents to file
		f=open('input.dat', 'w')
		f.write(inputdat)
		# Append print_variables() call as a preventive measure, since that is
		#    where we get the final energy value.
		f.write("\nprint_variables()\n")
		f.flush()
		f.close()
		logging.info("File input.dat written to disk.")

	def postJobStatus(self, status, message=None):
		n = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
		logging.info("Posting job status to " + str(self.maestroAPIGateway))
		if status == True:
			statusStr = "Success"
		else:
			statusStr = "Failure"
		if message == None:
			j = { "ExecutionID" : self.executionID, "LastUpdate":n, "Status":statusStr, "job" : self.parsedJob }
		else:
			j = { "JobID" : self.jobID, "LastUpdate":n, "Status":statusStr, "Message":message, "job" : self.parsedJob }
		logging.info("Job status encoded as: " + str(j))
		try:
			r = requests.put(self.maestroAPIGateway, json=j)
		except:
			logging.warn("Error posting status. Ignoring.")

		# If there's a failed tagging command in the queue, pop it and run it
		if len(self.cmdBacklog) > 0:
			command = self.cmdBacklog.pop()
			self.runCommand(command)

	def zipJobFolder(self):
		# Get IP address
		f = open('ip.txt')
		self.ip = f.readline().strip()
		f.close()
		if self.ip == None:
			self.ip = ""
		
		try:
			logging.info("Compressing job folder...")
			myZipFile = zipfile.ZipFile("ip_" + self.ip + "_" + self.jobFolder + ".zip", "w" )
			listing = os.listdir(self.jobFolder)
			for f in listing:
				myZipFile.write(self.jobFolder + "/" + f)
			myZipFile.close()
			logging.info("Job folder compressed. Removing original...")
			shutil.rmtree(self.jobFolder)
			logging.info("Done removing original job folder")
		except Exception as e:
			logging.warn("Error compressing job folder: " + str(e))
			
	def readTag(self, key):
		# aws ec2 describe-tags --filters "Name=resource-id,Values=i-1234567890abcdef8" "Name=key,Values=threads"
		# 'Key="ExecutionID",Value="3bd99202-5d7f-49c2-a350-f1fdf2235ad3"'
		command = 'aws ec2 describe-tags --region '+self.region+' --filters "Name=resource-id,Values=' + str(self.ami) + '" "Name=key,Values='+str(key)+'"'
		proc = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
		stdout, stderr = proc.communicate()
		tag = json.loads(stdout)
		if 'Tags' in tag and len(tag['Tags']) > 0 and 'Value' in tag['Tags'][0]:
			return str(tag['Tags'][0]['Value'])
		else:
			return None


	def doModifyTag(self, action, key, value):
		# aws ec2 delete-tags --resources ami-78a54011 --region us-east-1 --tags Key=Stack
		# aws ec2 create-tags --resources ami-78a54011 --region us-east-1 --tags Key=Stack,Value=foo
		# 'Key="ExecutionID",Value="3bd99202-5d7f-49c2-a350-f1fdf2235ad3"'
		command = "aws ec2 " + action + " --resources " + str(self.instanceID) + " --region " + str(self.region) + " --tags 'Key="+str(key)
		if value != None:
			command += ',Value="' + str(value) + '"'
		command += "'"
		self.runCommand(command)
	
	def runCommand(self, command):
		logging.info("Invoking " + str(command))
		process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
		out, err = process.communicate()
		if out:
			logging.info("doModifyTag() subprocess.Popen stdout...")
			logging.info(out)
		if err:
			logging.warn("doModifyTag() subprocess.Popen stderr...")
			logging.warn(err)
		logging.info("doModifyTag() subprocess.Popen returncode...")
		logging.info(process.returncode)

		# If we get back a RequestLimitExceeded error make a note to try again later...
		if process.returncode == 255 and "RequestLimitExceeded" in str(err):
			self.cmdBacklog.append(command)

	def tagInstance(self):
		self.downloadCredentials()
		self.doModifyTag("create-tags", "Name", self.jobName)
		self.doModifyTag("create-tags", "ExecutionID", self.executionID)
		self.doModifyTag("create-tags", "JobID", self.jobID)
		self.doModifyTag("create-tags", "StartTime", datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
		self.doModifyTag("create-tags", "Displacements", self.displacements)
		#self.displacements
	
	def untagInstance(self):
		self.downloadCredentials()
		self.doModifyTag("create-tags", "Name", "Waiting")
		self.doModifyTag("delete-tags", "ExecutionID", None)
		self.doModifyTag("delete-tags", "JobID", None)
		self.doModifyTag("delete-tags", "StartTime", None)
		self.doModifyTag("delete-tags", "Displacements", None)
	
	def downloadCredentials(self):
		logging.info("Retrieving credentials...")
		r = requests.get("http://169.254.169.254/latest/meta-data/iam/security-credentials/S3FullAccess")
		if r.status_code == 200:
			j = json.loads(r.text)
			os.environ["AWS_ACCESS_KEY_ID"] = str(j['AccessKeyId'])
			os.environ["AWS_SECRET_ACCESS_KEY"] = str(j['SecretAccessKey'])
			os.environ["AWS_SECURITY_TOKEN"] = str(j['Token'])
			logging.info("Credentials exported to environment variables")
		else:
			logging.warn("Failed to retrieve credentials")

	# Main
	def runOnce(self, jobGroup=None, jobCategory=None, error=None):
		logging.info("Myriad.runOnce invoked...")
		logging.info("Job group = " + str(jobGroup))
		logging.info("Job sub group = " + str(jobCategory))
		logging.info("Error = " + str(error))
		self.jobGroup = jobGroup
		self.jobCategory = jobCategory

		# if we have seen this error before, bail out.
		# We couldn't fix it the first time. Why should this time be any different?
		if error != None and error in self.errors:
			self.postJobStatus(False, "Error repeated: " + str(error))
			return ResultCode.failure
		
		# add the error condition to the stack of prior errors if we've never seen it before
		if error != None:
			self.errors.append(error)

		# load the endpoints for web service calls and get ami-id for this machine
		self.loadEndpoints()
		self.ami = self.getAmi()
		self.instanceID = self.getInstanceID()
		self.region = self.getRegion()

		# if no error, get a new job.
		# if there is an error code, we're going to re-run the job we have
		if error == None:
			result = self.getJob(self.jobGroup, self.jobCategory)
		else:
			logging.info("Running current job again to correct for errors")
			result = ResultCode.success

		if result == ResultCode.success:
			newerror = None
			result = self.getJobSupportFiles()
			if result == ResultCode.success:
				self.getSystemSpecs()
				self.clearScratch()
				self.makeJobFolder()
				self.makeInputDat()
				self.tagInstance()
				result = self.runPsi4()
				if result == ResultCode.success:
					logging.info("runPsi4() returned success code")
					while self.uploadResults() == ResultCode.failure:
						logging.info("Failure uploading results. Retrying in 60 seconds...")
						time.sleep(60)
				else:
					if result != ResultCode.shutdown:
						# Check for known error situations in output.dat
						logging.warn("runPsi4() returned failure code. Checking for known errors")
						newerror = self.jobConfig.checkError()
						self.postJobStatus(False, "PSI4 error: " + str(newerror))
						logging.info("CheckError() result: " + str(newerror))

				self.closeJobFolder()
				if result != ResultCode.shutdown:
					self.zipJobFolder()
					self.clearScratch()

				# if we encounter a known error, try the job again and compensate
				if newerror != None:
					logging.info("Re-executing job due to known error: " + str(newerror))
					result = self.runOnce(self.jobGroup, self.jobCategory, newerror)
			else:
				logging.warn("Error retrieving support files")

		else:
			result = ResultCode.noaction


		self.untagInstance()
		return result