def process_event(self, event): etype = event['type'] if etype.startswith('task-'): # The pipeline philosophy is to track all jobs in terms of what we refer to # as their 'jobid'. This is a string of the form # TE_XXXXXXXXXXXX_XXX-and-MO-XXXXXXXXXXXX_XXX-[epoch] # This is also the uuid of a non-error Test Result jobid = event['uuid'] time = event['timestamp'] stub = task_map.get(etype.split('-')[1], '') retries = event.get('retries', 0) if stub == '': logger.error('failed to translate %r' % etype) return if (stub == 'queued' and not retries and event.get('name') == 'pipeline.tasks.worker_run'): te, mo = database.jobid_to_kimcodes(jobid) te, mo = str(kimobjects.kim_obj(te)), str(kimobjects.kim_obj(mo)) mongodb.task_log(jobid, status=stub, job=(te,mo), ctime=time) logger.info('Logging %s', jobid) if mongodb.task_update(jobid, status=stub, mtime=time): logger.info('Updating %s -> %s', jobid, stub) if etype.startswith('worker-'): pass
def _insert_match(self, obj0, obj1, check=False): """ Insert the match data for a single kim object into `matches` """ cursor = self.conn.cursor() te,mo = [str(i) for i in [obj0, obj1]] # make sure it is in runner, subject order if database.get_leader(te).lower() != 'te': te,mo = mo,te # if we have this item already, return its index o = cursor.execute("SELECT * FROM matches WHERE test==? AND model==?", (te, mo)).fetchone() if o is not None: return o[0] # if it actually isn't a match, don't put it in! if check and not kimapi.valid_match(kimobjects.kim_obj(te), kimobjects.kim_obj(mo)): return None # insert if not present cursor.execute( "INSERT INTO matches VALUES (NULL, ?, ?)", (str(te), str(mo)) ) matchid = cursor.lastrowid self.conn.commit() return matchid
def runpair(self, runner, subject, status='approved', priority='normal'): """ Run a specific (runner, subject) pair """ priority = network.transform_priority(priority) self.logger.info( "Director got update message for (%r, %r) (%r | %r)" % (runner, subject, status, priority) ) self.synchronize() runner = kimobjects.kim_obj(runner) subject = kimobjects.kim_obj(subject) self.check_dependencies_and_push((runner, subject), priority, status)
def insert(self, obj): """ Insert KIM item into the object collection, compiling it and calculating all possible matches along the way. Insertion, for this reason can be quite slow. """ obj = str(obj) kobj = kimobjects.kim_obj(obj) typ = self._typ(obj) # check that its not already in our cache and built if self.get(obj): return logger.info("Inserting into database %r" % obj) # if it is a driver, insert it and all of its children if (isinstance(kobj, kimobjects.TestDriver) or isinstance(kobj, kimobjects.ModelDriver)): kobj.make() self._insert_driver(kobj) for test in kobj.children: self.insert(test) return # if it is not a model or test, leave if (isinstance(kobj, kimobjects.Test) or isinstance(kobj, kimobjects.Model)): kobj.make() self._insert_obj(kobj) return
def result_pair(uuid): query = { "database": "obj", "limit": 1, "history": "on", "query": {"test-result-id": uuid}, "project": ["runner.kimcode", "subject.kimcode"] } return (kimobjects.kim_obj(a) for a in kimquery.query(query, decode=True))
def _create_tempdir(self): """ Create a temporary running directory and copy over the test contents """ tempname = self.runner.kim_code_name + "_running" + self.result_code + "__" + self.runner.kim_code_id self.runner_temp = kimobjects.kim_obj(self.runner.kim_code, search=False, subdir=tempname) shutil.copytree(self.runner.path, self.runner_temp.path)
def rsync_kimid(self, kimid, approved=True, precheck=0): # read the single updated item rsync_tools.gateway_read(kimid, approved=approved, precheck=precheck) if precheck == 0: obj = kimobjects.kim_obj(kimid) elif precheck == 1 or precheck == 2: obj = kimobjects.kim_obj(kimid, precheck=True) else: self.logger.error("Function rsync_kimid on Gateway received invalid precheck code %r" % precheck) raise Exception("Function rsync_kimid on Gateway received invalid precheck code %r" % precheck) # also download potential drivers if kimobjects.kim_obj(kimid).kim_code_leader in ['TE', 'MO']: drivers = obj.drivers for drv in drivers: rsync_tools.gateway_read(str(drv), approved=approved, precheck=precheck)
def path(cand): obj = kimobjects.kim_obj(cand) try: p = obj.executable except AttributeError: p = obj.path logger.debug("thinks the path is %r", p) return p
def path(cand): obj = kimobjects.kim_obj(cand) try: p = obj.executable except AttributeError: p = obj.path logger.debug("thinks the path is %r",p) return p
def result_pair(uuid): query = { "database": "obj", "limit": 1, "query": { "uuid": uuid }, "project": ["runner.kimcode", "subject.kimcode"] } return (kimobjects.kim_obj(a) for a in kimquery.query(query, decode=True))
def build(self, obj): """ Build a particular object. However, the cache makes sure that the objects are built when they are in build table, so let's check first """ obj = str(obj) if not self.isbuilt(obj): kobj = kimobjects.kim_obj(str(obj)) kobj.make() cursor = self.conn.cursor() cursor.execute("INSERT INTO build VALUES(?)", (obj,)) self.conn.commit()
def _create_tempdir(self): """ Create a temporary running directory and copy over the test contents """ if not os.path.exists(cf.WORKER_RUNNING_PATH): os.makedirs(cf.WORKER_RUNNING_PATH) tdir = "{name}_running{result_code}__{id}".format( name=self.runner.kim_code_name, result_code=self.result_code, id=self.runner.kim_code_id ) tempname = os.path.join(cf.WORKER_RUNNING_PATH, tdir) self.runner_temp = kimobjects.kim_obj( self.runner.kim_code, search=False, abspath=tempname ) shutil.copytree(self.runner.path, self.runner_temp.path)
def delete(self, kimid): self.logger.info("Deleting KIMObject %r from director" % kimid) if database.iskimcode(kimid): obj = kimobjects.kim_obj(kimid) # If kimid is a Test Driver or Model Driver, remove all of its associated # Tests or Models # (Note that Tests, Models, Test Verifications, and Model Verifications do # not have any children) children = obj.children if children: for child in children: # Remove from internal database of Director self.db.delete_objects([str(child)]) # Delete from local repository of Director child.delete() # Finally, remove kimid itself from the Director's internal database and # delete it from its local repository self.db.delete_objects([kimid]) obj.delete()
def _insert_obj(self, kobj): """ Insert an object into the `obj` table """ if len(kobj.drivers) > 0: driver = str(kobj.drivers[0]) else: driver = None cursor = self.conn.cursor() cursor.execute( "INSERT OR IGNORE INTO obj VALUES (?,?,?)", (str(kobj), driver, kobj.kim_code_leader.lower())) self.conn.commit() for match in kobj.matches: pid = self._insert_match(kobj, match) # next sections only apply to tests if not isinstance(kobj, kimobjects.Test): continue for dep in kobj.runtime_dependencies(match): did = self._insert_match(*[kimobjects.kim_obj(i) for i in dep], check=True) self._insert_dep(pid, did)
def process_kimcode_approved(self, kimid, priority): """ Push all of the jobs that need to be done given an update """ # first, sync up the local respository and enter new items self.synchronize() # process the update, get the type, etc. obj = kimobjects.kim_obj(kimid) leader = obj.kim_code_leader # Case out each type of update we might recieve and submit the appropriate # jobs to the queue. For verification checks, submit all complimentary # objects, otherwise use the DB to submit matches only if leader == "TV": subjects = list(kimobjects.Test.all()) runners = [obj]*ll(subjects) elif leader == "MV": subjects = list(kimobjects.Model.all()) runners = [obj]*ll(subjects) elif leader in ["TE", "MO", "TD", "MD"]: # get the valid matches from the database which is current up to # this update request. handle the runner, subject pairs matches = self.db.matches(obj) runners, subjects = [], [] for t,m in matches: runners.append(kimobjects.Test(t)) subjects.append(kimobjects.Model(m)) else: self.logger.error("Tried to update an invalid KIM ID!: %r",kimid) for test, model in zip(runners, subjects): logger.debug("About to push (%r, %r), checking dependencies" % (test, model)) self.check_dependencies_and_push((test,model), priority, 'approved')
def list2obj(l): return ( kimobjects.kim_obj(str(l[0]), search=True), kimobjects.kim_obj(str(l[1]), search=True) )
def run(self, job, jobid, depends=[], status='approved'): self.logger.info("Worker got job request %r", jobid) self.check_kimid(job[0]) self.check_kimid(job[1]) # check to see if this is a verifier or an actual test pending = (status == 'pending') self.logger.info("Rsyncing %r %r", job, depends) locker.lock_rsync_read(job, depends, pending) runner_kcode, subject_kcode = job runner = kimobjects.kim_obj(runner_kcode) subject = kimobjects.kim_obj(subject_kcode) self.logger.info("Building %r %r", runner, subject) locker.lock_build(runner) locker.lock_build(subject) if runner.kim_code_leader == 'TE' and subject.kim_code_leader == 'MO': # First check that the thing we are trying to compute is a match! if not kimapi.valid_match(runner, subject): self.logger.error("Invalid match (%r, %r)", runner, subject) raise cf.PipelineRuntimeError( "Invalid match reached worker (%r, %r)" % (runner, subject) ) self.logger.info("Running (%r,%r)", runner, subject) comp = compute.Computation(runner, subject, result_code=jobid) # The next bit of logic is complicated in order to get the job to # resend upon SystemExit or KeyboardInterrupt errors, but for no other # reason. error = None try: comp.run(extrainfo=self.boxinfo) except (KeyboardInterrupt, SystemExit) as e: raise e except Exception as error: self.logger.error("Encountered errors when running %r" % jobid) else: self.logger.debug("Sending result message back") # Try to rsync the results back to the gateway. If this fails, it is # not really the Test's fault. Therefore, treat this as another type of # failure (RsyncRuntimeError) so that the task is sent back into the # queue for a retry if comp.result_path: try: locker.lock_rsync_write(jobid, comp.result_path) self.send_result(jobid, comp.uuid) except (KeyboardInterrupt, SystemExit) as e: raise e except Exception as e: self.logger.exception("Worker could not rsync results! %r", e) raise cf.RsyncRuntimeError() if error: raise error self.logger.info("Worker completed job %r", jobid)
def run(self): """ Start to listen, tunnels should be open and ready """ self.connect() self.bean.watch(cf.TUBE_JOBS) """ Endless loop that awaits jobs to run """ while True: self.logger.info("Waiting for jobs...") job = self.bean.reserve() self.job = job # if appears that there is a 120sec re-birth of jobs that have been reserved # and I do not want to put an artificial time limit, so let's bury jobs # when we get them job.bury() self.comm.send_msg("running", job.body) # update the repository, attempt to run the job and return the results to the director try: jobmsg = network.Message(string=job.body) pending = True if jobmsg == "pending" else False except simplejson.JSONDecodeError: # message is not JSON decodeable self.logger.error("Did not recieve valid JSON, {}".format( job.body)) job.delete() continue except KeyError: # message does not have the right keys self.logger.error( "Did not recieve a valid message, missing key: {}".format( job.body)) job.delete() continue self.jobmsg = jobmsg try: name, leader, num, version = database.parse_kim_code( jobmsg.job[0]) except InvalidKIMID as e: # we were not given a valid kimid self.logger.error("Could not parse {} as a valid KIMID".format( jobmsg.job[0])) self.job_message(jobmsg, errors=e, tube=cf.TUBE_ERRORS) job.delete() continue try: # check to see if this is a verifier or an actual test with self.rsynclock: self.logger.info("Rsyncing from repo %r", jobmsg.job + jobmsg.depends) rsync_tools.worker_read(*jobmsg.job, depends=jobmsg.depends, pending=pending) runner_kcode, subject_kcode = jobmsg.job runner = kimobjects.kim_obj(runner_kcode) subject = kimobjects.kim_obj(subject_kcode) self.builder.lock_build(runner) self.builder.lock_build(subject) self.logger.info("Running (%r,%r)", runner, subject) comp = compute.Computation(runner, subject, result_code=jobmsg.jobid) errormsg = None try: comp.run(extrainfo=self.boxinfo) except Exception as e: errormsg = e self.logger.exception("Errors occured, moving to er/") else: self.logger.debug("Sending result message back") finally: self.logger.info("Rsyncing results %r", jobmsg.jobid) with self.rsynclock: rsync_tools.worker_write(comp.result_path) if errormsg: self.job_message(jobmsg, errors=e, tube=cf.TUBE_ERRORS) else: self.job_message(jobmsg, tube=cf.TUBE_RESULTS) job.delete() except Exception as e: self.logger.exception( "Failed to initalize run, deleting... %r" % e) self.job_message(jobmsg, errors=e, tube=cf.TUBE_ERRORS) job.delete() self.job = None self.jobsmsg = None
def precheck(self, kimid, status='pending'): """ Perform pre-checks on a new object """ errors = [] if cf.DIRECTOR_NOSYNC: errors.append( 'Precheck failed: DIRECTOR_NOSYNC must be set to False' ) self.logger.error('%r', errors) return errors approved = (status == 'approved') self.logger.info("Running pre-checks for KIM item %r" % kimid) # try to actually get the kimobject if not cf.DIRECTOR_NOSYNC: if approved: # Read from Gateway's local 'precheck' repository to local 'precheck' repository rsync_tools.director_approved_read(precheck=2) else: rsync_tools.director_pending_read(kimid, precheck=2) # Can we use our ORM? # NOTE: This is redundant with a check done on the Gateway, but it's not expensive try: obj = kimobjects.kim_obj(kimid, precheck=True) except Exception as e: errors.append( 'Could not initialize KIMObjects ORM:\n%r' % e ) self.logger.error('%r', errors) return errors # try to get the drivers so we can start to build, etc try: if not cf.DIRECTOR_NOSYNC: drivers = list(obj.drivers) self.logger.info("Retrieving drivers for %r, %r" % (obj, drivers)) for driver in drivers: rsync_tools.director_pending_read(str(driver), precheck=2) except Exception as e: errors.append( 'Could not find drivers associated with object:\n%r' % e ) self.logger.error('%r', errors) return errors # can it be built? try: obj.make(precheck=True) except Exception as e: errors.append( 'KIMObject could not be built using `make`:\n%r' % e ) self.logger.error('%r', errors) return errors def _assert(condition): if not condition: raise AssertionError() checks_orm = [ [lambda o: o.drivers, 'Searching for drivers failed'], [lambda o: _assert(o.kimspec), 'Does not contain kimspec'], [lambda o: _assert(o.kim_api_version), 'No KIM API version specified'], [lambda o: _assert(o.pipeline_api_version), 'No Pipeline API version specified'] ] checks_runner = [ [lambda t: t.processed_infile(next(kimobjects.Test.all(precheck=True))), 'Could not template pipeline.stdin file'], [lambda t: t.runtime_dependencies('blank'), 'Could not template dependencies file'], [lambda t: list(t.matches), 'Matches could not be generated, exceptions'], [lambda t: _assert(list(t.matches)), 'No valid matches found in system'] ] checks_subject = [ [lambda t: list(t.matches), 'Matches could not be generated, errors.'], [lambda t: _assert(list(t.matches)), 'No valid matches found in system'] ] def _run_checks(check_list, *args): for check in check_list: try: check[0](*args) except Exception as e: errors.append(check[1]) self.logger.error('%s:\n%r' % (check[1], e)) _run_checks(checks_orm, obj) if isinstance(obj, kimobjects.Runner): _run_checks(checks_runner, obj) if isinstance(obj, kimobjects.Subject): _run_checks(checks_subject, obj) if errors: self.logger.error( "Returning errors for precheck of %r:\n%r" % (kimid, errors) ) else: self.logger.info("No errors found during precheck for %r" % kimid) return errors
def sync_pending(self, kimid): """ Get an object and its drivers from pending area """ rsync_tools.director_pending_read(kimid) for driver in kimobjects.kim_obj(kimid).drivers: rsync_tools.director_pending_read(kimid)
def list2obj(l): return (kimobjects.kim_obj(l[0], search=True), kimobjects.kim_obj(l[1], search=True))