def get(self, timeout=0): # @UnusedVariable if not self.told_you_ready: raise CompmakeBug("I didnt tell you it was ready.") if self.already_read: msg = 'Compmake BUG: should not call twice.' raise CompmakeBug(msg) self.already_read = True assert os.path.exists(self.retcode) ret_str = open(self.retcode, 'r').read() try: ret = int(ret_str) except ValueError: msg = 'Could not interpret file %r: %r.' % (self.retcode, ret_str) raise HostFailed(host='localhost', job_id=self.job_id, reason=msg, bt='') # # # raise HostFailed(host="xxx", # job_id=self.job_id, # reason=reason, bt="") # XXX # try: stderr = open(self.stderr, 'r').read() stdout = open(self.stdout, 'r').read() stderr = 'Contents of %s:\n' % self.stderr + stderr stdout = 'Contents of %s:\n' % self.stdout + stdout # if ret == CompmakeConstants.RET_CODE_JOB_FAILED: # msg = 'SGE Job failed (ret: %s)\n' % ret # msg += indent(stderr, '| ') # # mark_as_failed(self.job_id, msg, None) # raise JobFailed(msg) # elif ret != 0: # msg = 'SGE Job failed (ret: %s)\n' % ret # error(msg) # msg += indent(stderr, '| ') # raise JobFailed(msg) if not os.path.exists(self.out_results): msg = 'job succeeded but no %r found' % self.out_results msg += '\n' + indent(stderr, 'stderr') msg += '\n' + indent(stdout, 'stdout') raise CompmakeBug(msg) res = safe_pickle_load(self.out_results) result_dict_raise_if_error(res) return res finally: fs = [self.stderr, self.stdout, self.out_results, self.retcode] for filename in fs: if os.path.exists(filename): os.unlink(filename)
def generate_job_id(base, context): """ Generates a unique job_id for the specified commmand. Takes into account job_prefix if that's defined. """ stack = context.currently_executing # print('generating an ID with base = %s and stack %s' % (base, stack)) job_prefix = context.get_comp_prefix() # Use the job id as prefix if job_prefix is None and len(stack) > 1: job_prefix = stack[-1] max_options = 1000 * 1000 def get_options(): counters = context.generate_job_id_counters if not job_prefix in counters: counters[job_prefix] = 2 if job_prefix: yield '%s-%s' % (job_prefix, base) while counters[job_prefix] <= max_options: yield '%s-%s-%d' % (job_prefix, base, counters[job_prefix]) counters[job_prefix] += 1 else: yield base while counters[job_prefix] <= max_options: yield '%s-%d' % (base, counters[job_prefix]) counters[job_prefix] += 1 db = context.get_compmake_db() cq = CacheQueryDB(db) for x in get_options(): defined = context.was_job_defined_in_this_session(x) if defined: continue exists = defined or cq.job_exists(x) if not exists: #print('u') return x else: # if it is the same job defined in the same stack defined_by = cq.get_job(x).defined_by #print('a') #print(' Found, he was defined by %s' % defined_by) if defined_by == stack: #print('x') return x else: #print('-') continue raise CompmakeBug('Could not generate a job id')
def get_job_userobject_resolved(job_id, db): """ This gets the job's result, and recursively substitute all dependencies. """ ob = get_job_userobject(job_id, db) all_deps = collect_dependencies(ob) for dep in all_deps: if not job_userobject_exists(dep, db): msg = 'Cannot resolve %r: dependency %r was not done.' % (job_id, dep) raise CompmakeBug(msg) return substitute_dependencies(ob, db)
def db_job_add_dynamic_children(job_id, children, returned_by, db): job = get_job(job_id, db) if not returned_by in job.children: msg = '%r does not know it has child %r' % (job_id, returned_by) raise CompmakeBug(msg) job.children.update(children) job.dynamic_children[returned_by] = children set_job(job_id, job, db) job2 = get_job(job_id, db) assert job2.children == job.children, 'Race condition' assert job2.dynamic_children == job.dynamic_children, 'Race condition'
def jobs_defined(job_id, db): """ Gets the jobs defined by the given job. The job must be DONE. """ check_isinstance(job_id, six.string_types) with trace_bugs('jobs_defined(%r)' % job_id): cache = get_job_cache(job_id, db=db) if cache.state != Cache.DONE: msg = ('Cannot get jobs_defined for job not done ' + '(status: %s)' % Cache.state2desc[cache.state]) raise CompmakeBug(msg) return set(cache.jobs_defined)
def __getitem__(self, key): if trace_queries: logger.debug('R %s' % str(key)) self.check_existence() filename = self.filename_for_key(key) if not os.path.exists(filename): msg = 'Could not find key %r.' % key msg += '\n file: %s' % filename raise CompmakeBug(msg) try: return safe_pickle_load(filename) except Exception as e: msg = ("Could not unpickle data for key %r. \n file: %s" % (key, filename)) logger.error(msg) logger.exception(e) msg += "\n" + traceback.format_exc() raise CompmakeBug(msg)
def result_dict_raise_if_error(res): from compmake.exceptions import JobFailed from compmake.exceptions import HostFailed from compmake.exceptions import CompmakeBug from compmake.exceptions import JobInterrupted result_dict_check(res) if 'fail' in res: raise JobFailed.from_dict(res) if 'abort' in res: raise HostFailed.from_dict(res) if 'bug' in res: raise CompmakeBug.from_dict(res) if 'interrupted' in res: raise JobInterrupted.from_dict(res)
def ready(self): if self.told_you_ready: raise CompmakeBug('should not call ready() twice') if self.npolls % 20 == 1: try: qacct = get_qacct(self.sge_id) # print('job: %s sgejob: %s res: %s' % (self.job_id, # self.sge_id, qacct)) if 'failed' in qacct and qacct['failed'] != '0': reason = 'Job schedule failed: %s\n%s' % (qacct['failed'], qacct) raise HostFailed(host="xxx", job_id=self.job_id, reason=reason, bt="") # XXX except JobNotRunYet: qacct = None pass else: qacct = None self.npolls += 1 if os.path.exists(self.retcode): self.told_you_ready = True return True else: if qacct is not None: msg = 'The file %r does not exist but it looks like the job ' \ 'is done' % self.retcode msg += '\n %s ' % qacct # All right, this is simply NFS that is not updated yet # raise CompmakeBug(msg) return False
def mvac_job_rdb_worker(job_id, rdb_basepath, cwd, misc): from compmake.jobs.actions import make rdb= StorageFilesystem(rdb_basepath) context = Context(rdb) if not os.path.exists(cwd): print('cwd %r not existing', cwd) os.makedirs(cwd) os.chdir(cwd) try: res = make(job_id, context=context) except JobFailed as e: res = e.get_result_dict() except HostFailed as e: res= e.get_result_dict() except CompmakeBug as e: res = e.get_result_dict() except Exception as e: res= CompmakeBug(str(e)).get_result_dict() result_dict_check(res) print('res: %r' % res) return res
def check_consistency(args, context, cq, raise_if_error=False): # @ReservedAssignment """ Checks in the DB that the relations between jobs are consistent. """ db = context.get_compmake_db() # Do not use cq if not args: job_list = all_jobs(db=db) else: job_list = parse_job_list(args, context=context) job_list = list(job_list) #print('Checking consistency of %d jobs.' % len(job_list)) errors = {} for job_id in job_list: try: ok, reasons = check_job(job_id, context) if not ok: errors[job_id] = reasons except CompmakeBug as e: errors[job_id] = ['bug: %s' % e] if errors: msg = "Inconsistency with %d jobs:" % len(errors) for job_id, es in errors.items(): msg += '\n- job %r:\n%s' % (job_id, '\n'.join(es)) if raise_if_error: raise CompmakeBug(msg) else: error(msg) return 0
def parmake_job2_new_process(args): """ Starts the job in a new compmake process. """ (job_id, context) = args compmake_bin = which('compmake') db = context.get_compmake_db() storage = db.basepath # XXX: where = os.path.join(storage, 'parmake_job2_new_process') if not os.path.exists(storage): try: os.makedirs(storage) except: pass out_result = os.path.join(where, '%s.results.pickle' % job_id) out_result = os.path.abspath(out_result) cmd = [compmake_bin, storage] if not all_disabled(): cmd += ['--contracts'] cmd += [ '--status_line_enabled', '0', '--colorize', '0', '-c', 'make_single out_result=%s %s' % (out_result, job_id), ] cwd = os.getcwd() cmd_res = system_cmd_result(cwd, cmd, display_stdout=False, display_stderr=False, raise_on_error=False, capture_keyboard_interrupt=False) ret = cmd_res.ret if ret == CompmakeConstants.RET_CODE_JOB_FAILED: # XXX: msg = 'Job %r failed in external process' % job_id msg += indent(cmd_res.stdout, 'stdout| ') msg += indent(cmd_res.stderr, 'stderr| ') res = safe_pickle_load(out_result) os.unlink(out_result) result_dict_check(res) raise JobFailed.from_dict(res) elif ret != 0: msg = 'Host failed while doing %r' % job_id msg += '\n cmd: %s' % " ".join(cmd) msg += '\n' + indent(cmd_res.stdout, 'stdout| ') msg += '\n' + indent(cmd_res.stderr, 'stderr| ') raise CompmakeBug(msg) # XXX: res = safe_pickle_load(out_result) os.unlink(out_result) result_dict_check(res) return res