def route_create_ensemble_workflow(ensemble): dao = Ensembles(g.session) e = dao.get_ensemble(g.user.username, ensemble) name = request.form.get("name", None) if name is None: raise EMError("Specify ensemble workflow 'name'") priority = request.form.get("priority", 0) basedir = request.form.get("basedir") if basedir is None: raise EMError( "Specify 'basedir' where plan command should be executed") plan_command = request.form.get("plan_command") if plan_command is None: raise EMError( "Specify 'plan_command' that should be executed to plan workflow") dao.create_ensemble_workflow(e.id, name, basedir, priority, plan_command) g.session.commit() return api.json_created( url_for("route_get_ensemble_workflow", ensemble=ensemble, workflow=name))
def get_dashboard_state_for_running_workflow(self): """Get the latest state of the workflow from the dashboard tables where timestamp is > last updated of the ensemble workflow""" # We can only use this for running workflows because we are assuming # that the last state of the workflow should be after the updated # timestamp of the ensemble workflow. That might not be true for # workflows in states other than RUNNING. if self.workflow.state != EnsembleWorkflowStates.RUNNING: raise EMError("This method should only be called for running workflows") w = self.get_dashboard() if w is None: raise EMError("Dashboard workflow not found") # Need to compute the unix ts for updated in this ugly way updated = ( self.workflow.updated - datetime.datetime(1970, 1, 1) ).total_seconds() # Get the last event for the workflow where the event timestamp is # greater than the last updated ts for the ensemble workflow ws = ( self.dao.session.query(MasterWorkflowstate) .filter_by(wf_id=w.wf_id) .filter(MasterWorkflowstate.timestamp >= updated) .order_by(text("timestamp desc")) .first() ) if ws is None: name = self.workflow.name log.info("No recent workflow state records for workflow %s" % name) return ws
def run(self): "Run the workflow using pegasus-run" submitdir = self.workflow.submitdir if submitdir is None: raise EMError("Workflow submitdir not set") if not os.path.isdir(submitdir): raise EMError("Workflow submit dir does not exist: %s" % submitdir) logfile = self.workflow.get_logfile() runscript("pegasus-run %s >>%s 2>&1" % (submitdir, logfile), env=get_script_env())
def runscript(script, cwd=None, env=None): # Make sure the cwd is OK if cwd is not None and not os.path.isdir(cwd): raise EMError("Working directory does not exist: %s" % cwd) if env is None: env = dict(os.environ) p = subprocess.Popen(script, shell=True, env=env, cwd=cwd) rc = p.wait() if rc != 0: raise EMError("Script failed with exitcode %d" % rc)
def get_wf_uuid(self): "Get the workflow UUID from the braindump file" submitdir = self.find_submitdir() braindump_file_path = os.path.join(submitdir, "braindump.yml") if not os.path.isfile(braindump_file_path): raise EMError("braindump.yml not found") with open(braindump_file_path) as f: bd = braindump.load(f) if bd.wf_uuid is None: raise EMError("wf_uuid not found in braindump.yml") return bd.wf_uuid
def plan(self): "Launch the pegasus planner" w = self.workflow basedir = w.get_basedir() pidfile = w.get_pidfile() logfile = w.get_logfile() runfile = w.get_runfile() resultfile = w.get_resultfile() plan_command = w.get_plan_command() if os.path.isfile(pidfile) and self.planning(): raise EMError("Planner already running") # When we re-plan, we need to remove all the old # files so that the ensemble manager doesn't get # confused. files = [runfile, resultfile, pidfile] for f in files: if os.path.isfile(f): os.remove(f) script = "({}) 2>&1 | tee -a {} | grep pegasus-run >{} ; /bin/echo $? >{}".format( plan_command, logfile, runfile, resultfile, ) forkscript(script, cwd=basedir, pidfile=pidfile, env=get_script_env())
def pathfind(exe): PATH = os.getenv("PATH", "/bin:/usr/bin:/usr/local/bin") PATH = PATH.split(":") for prefix in PATH: exepath = os.path.join(prefix, exe) if os.path.isfile(exepath): return exepath raise EMError("%s not found on PATH" % exe)
def find_submitdir(self): "Get the workflow submitdir from the workflow log" logfile = self.workflow.get_runfile() if not os.path.isfile(logfile): raise EMError("Workflow run file not found: %s" % logfile) submitdir = None f = open(logfile) try: for l in f: if l.startswith("pegasus-run"): submitdir = l.split()[1] finally: f.close() if submitdir is None: raise EMError("No pegasus-run found in the workflow run file: %s" % logfile) return submitdir
def forkscript(script, pidfile=None, cwd=None, env=None): # This does a double fork to detach the process from the python # interpreter so that we don't have to call wait() on it # Make sure the cwd is OK if cwd is not None and not os.path.isdir(cwd): raise EMError("Working directory does not exist: %s" % cwd) if env is None: env = dict(os.environ) # This is just to ensure we get an exception if there is # something wrong with the pidfile if pidfile is not None: try: open(pidfile, "w").close() except Exception: raise EMError("Unable to write pidfile: %s" % pidfile) pid1 = os.fork() if pid1 == 0: if cwd is not None: os.chdir(cwd) pid2 = os.fork() if pid2 == 0: os.execve("/bin/sh", ["/bin/sh", "-c", script], env) os._exit(255) if pidfile is not None: f = open(pidfile, "w") f.write("%d\n" % pid2) f.close() os._exit(0) pid, exitcode = os.waitpid(pid1, 0) if exitcode != 0: raise EMError("Non-zero exitcode launching script: %d" % exitcode)
def route_create_ensemble(): name = request.form.get("name", None) if name is None: raise EMError("Specify ensemble name") max_running = request.form.get("max_running", 1) max_planning = request.form.get("max_planning", 1) dao = Ensembles(g.session) dao.create_ensemble(g.user.username, name, max_running, max_planning) g.session.commit() return api.json_created(url_for("route_get_ensemble", name=name, _external=True))
def get_bin(name, exe): # Try to find NAME/bin using 1) NAME env var, 2) NAME config # variable, 3) PATH env var exepath = None HOME = os.getenv(name, emapp.config.get(name, None)) if HOME is not None: if not os.path.isdir(HOME): raise EMError("{} is not a directory: {}".format(name, HOME)) BIN = os.path.join(HOME, "bin") if not os.path.isdir(BIN): raise EMError("{}/bin is not a directory: {}".format(name, BIN)) exepath = os.path.join(BIN, exe) exepath = exepath or pathfind(exe) if not os.path.isfile(exepath): raise EMError("{} not found: {}".format(exe, exepath)) BIN = os.path.dirname(exepath) return BIN
def get_wf_uuid(self): "Get the workflow UUID from the braindump file" submitdir = self.find_submitdir() braindump = os.path.join(submitdir, "braindump.txt") if not os.path.isfile(braindump): raise EMError("braindump.txt not found") wf_uuid = None f = open(braindump, "r") try: for l in f: if l.startswith("wf_uuid"): wf_uuid = l.split()[1] finally: f.close() if wf_uuid is None: raise EMError("wf_uuid not found in braindump.txt") return wf_uuid
def get_dashboard(self): "Get the dashboard record for the workflow" wf_uuid = self.workflow.wf_uuid if wf_uuid is None: raise EMError("wf_uuid is none") try: w = (self.dao.session.query(DashboardWorkflow).filter_by( wf_uuid=str(wf_uuid)).one()) return w except NoResultFound: name = self.workflow.name log.debug("No dashboard record for workflow %s" % name) return None
def planning_successful(self): "Check to see if planning was successful" resultfile = self.workflow.get_resultfile() if not os.path.exists(resultfile): raise EMError("Result file not found: %s" % resultfile) exitcode = int(open(resultfile, "r").read()) if exitcode != 0: return False try: self.find_submitdir() except Exception, e: log.exception(e) return False
def planning(self): "Check pidfile to see if the planner is still running" pidfile = self.workflow.get_pidfile() if not os.path.exists(pidfile): raise EMError("pidfile missing") pid = int(open(pidfile,"r").read()) try: os.kill(pid, 0) # If that succeeds, the process is still running return True except OSError, e: # errno 3 is No Such Process if e.errno != 3: raise
def running_successful(self): "Assuming the workflow is done running, did it finish successfully?" ws = self.get_dashboard_state_for_running_workflow() if ws is None or ws.state == "WORKFLOW_STARTED": raise EMError("Workflow is running") return ws.status == 0