def mvac_job_rdb_instance(context, job_id, volumes, rdb_vol_name, rdb_db, cwd): import multyvac layer = get_compmake_config('multyvac_layer') if not layer: layer = None all_volumes = volumes + [rdb_vol_name] command, _, _ = get_job_args(job_id, db=context.get_compmake_db()) misc = dict(deps=[command]) #print('Instancing (volumes: %r, layer=%r)' % (all_volumes, layer)) core = get_compmake_config('multyvac_core') multyvac_job_id = multyvac.submit(mvac_job_rdb_worker, job_id=job_id, rdb_basepath=rdb_db.basepath, misc=misc, cwd=cwd, _core=core, _name=job_id, _layer=layer, _vol=all_volumes) #print('Getting job %r' % multyvac_job_id) multyvac_job = multyvac.get(multyvac_job_id) #print('Got job') return multyvac_job
def mvac_instance(db, job_id, volumes, cwd): import multyvac layer = get_compmake_config('multyvac_layer') if not layer: layer = None command, args, kwargs = get_cmd_args_kwargs(job_id=job_id, db=db) core = get_compmake_config('multyvac_core') multyvac_job_id = multyvac.submit(command, *args, _layer=layer, _vol=volumes, _name=job_id, _core=core, **kwargs) multyvac_job = multyvac.get(multyvac_job_id) return multyvac_job
def job_failed(context, event): # @UnusedVariable job_id = event.kwargs['job_id'] reason = event.kwargs['reason'] bt = event.kwargs['bt'] msg = 'Job %r failed:' % job_id # s = reason.strip # if get_compmake_config('echo'): # s += '\n' + bt msg += '\n' + indent(reason.strip(), '| ') if get_compmake_config('echo'): msg += '\n' + indent(bt.strip(), '> ') else: msg += '\nUse "config echo 1" to have errors displayed.' msg += '\nWrite "details %s" to inspect the error.' % job_id error(my_prefix + msg)
def sync_data_up(context, skipsync=False): """ Synchronizes the data to the cloud. Returns the list of volume names. """ syncdirs = get_compmake_config('multyvac_sync_up') if not syncdirs: return [] volumes = set() for syncdir in syncdirs.split(':'): if not syncdir: continue v = sync_data_up_dir(syncdir, skipsync) volumes.add(v) return sorted(volumes)
def job_failed(context, event): # @UnusedVariable job_id = event.kwargs['job_id'] reason = event.kwargs['reason'] bt = event.kwargs['bt'] msg = 'Job %r failed:' % job_id # s = reason.strip # if get_compmake_config('echo'): # s += '\n' + bt msg += '\n' + indent(reason.strip(), '| ') if get_compmake_config('echo'): s = bt.strip() msg += '\n' + indent(s, '> ') else: msg += '\nUse "config echo 1" to have errors displayed.' msg += '\nWrite "details %s" to inspect the error.' % job_id error(my_prefix + msg)
def loop_until_something_finishes(self): self.check_invariants() manager_wait = get_compmake_config('manager_wait') # TODO: this should be loop_a_bit_and_then_let's try to instantiate # jobs in the ready queue for _ in range(10): # XXX received = self.check_any_finished() if received: break else: publish(self.context, 'manager-loop', processing=list(self.processing)) time.sleep(manager_wait) # TODO: make param # Process events self.event_check() self.check_invariants()
def get_sync_dirs_down(): syncdirs = get_compmake_config('multyvac_sync_down') for syncdir in syncdirs.split(':'): if not syncdir: continue yield syncdir
def get_resources_status(self): resource_available = {} # Scale up softly time_from_last = time.time() - self.last_accepted min_interval = get_compmake_config('min_proc_interval') if time_from_last < min_interval: resource_available['soft'] = (False, 'interval: %.2f < %.1f' % ( time_from_last, min_interval)) else: resource_available['soft'] = (True, '') # only one job at a time process_limit_ok = len(self.processing) < self.max_num_processing if not process_limit_ok: resource_available['nproc'] = (False, 'max %d nproc' % ( self.max_num_processing)) # this is enough to continue return resource_available else: resource_available['nproc'] = (True, '') # TODO: add disk stats = CompmakeGlobalState.system_stats if not stats.available(): # psutil not installed resource_available['cpu'] = (True, 'n/a') resource_available['mem'] = (True, 'n/a') else: # avg_cpu = stats.avg_cpu_percent() max_cpu = stats.max_cpu_percent() cur_mem = stats.cur_phymem_usage_percent() cur_swap = stats.cur_virtmem_usage_percent() num_processing = len(self.processing) if num_processing > 0: # at least one if ncpus > 2: # Do this only for big machines # XXX: assumes we are cpu-bound estimated_cpu_increase = 1.0 / ncpus estimated_cpu = max_cpu + estimated_cpu_increase max_cpu_load = get_compmake_config('max_cpu_load') if max_cpu_load < 100 and estimated_cpu > max_cpu_load: reason = ('cpu %d%%, proj %d%% > %d%%' % (max_cpu, estimated_cpu, max_cpu_load)) resource_available['cpu'] = (False, reason) else: resource_available['cpu'] = (True, '') max_mem_load = get_compmake_config('max_mem_load') if cur_mem > max_mem_load: reason = 'mem %s > %s' % (cur_mem, max_mem_load) resource_available['mem'] = (False, reason) # print('Memory load too high: %s\n\n' % cpu_load) else: resource_available['mem'] = (True, '') max_swap = get_compmake_config('max_swap') if cur_swap > max_swap: reason = 'swap %s > %s' % (cur_swap, max_swap) resource_available['swap'] = (False, reason) # print('Memory load too high: %s\n\n' % cpu_load) else: resource_available['swap'] = (True, '') # cooperating between parmake instances: # to balance the jobs, accept with probability # 1 / (1+n), where n is the number of current processes if True: autobal_after = get_compmake_config('autobal_after') n = len(self.processing) q = max(0, n - autobal_after) probability = 1.0 / (1 + q) if random.random() > probability: # Unlucky, let's try next time reason = ( 'after %d, p=%.2f' % (autobal_after, probability)) resource_available['autobal'] = (False, reason) else: resource_available['autobal'] = (True, '') return resource_available
def get_resources_status(self): resource_available = {} # Scale up softly time_from_last = time.time() - self.last_accepted min_interval = get_compmake_config('min_proc_interval') if time_from_last < min_interval: resource_available['soft'] = (False, 'interval: %.2f < %.1f' % (time_from_last, min_interval)) else: resource_available['soft'] = (True, '') # only one job at a time process_limit_ok = len(self.processing) < self.max_num_processing if not process_limit_ok: resource_available['nproc'] = (False, 'max %d nproc' % (self.max_num_processing)) # this is enough to continue return resource_available else: resource_available['nproc'] = (True, '') # TODO: add disk stats = CompmakeGlobalState.system_stats if not stats.available(): # psutil not installed resource_available['cpu'] = (True, 'n/a') resource_available['mem'] = (True, 'n/a') else: # avg_cpu = stats.avg_cpu_percent() max_cpu = stats.max_cpu_percent() cur_mem = stats.cur_phymem_usage_percent() cur_swap = stats.cur_virtmem_usage_percent() num_processing = len(self.processing) if num_processing > 0: # at least one if ncpus > 2: # Do this only for big machines # XXX: assumes we are cpu-bound estimated_cpu_increase = 1.0 / ncpus estimated_cpu = max_cpu + estimated_cpu_increase max_cpu_load = get_compmake_config('max_cpu_load') if max_cpu_load < 100 and estimated_cpu > max_cpu_load: reason = ('cpu %d%%, proj %d%% > %d%%' % (max_cpu, estimated_cpu, max_cpu_load)) resource_available['cpu'] = (False, reason) else: resource_available['cpu'] = (True, '') max_mem_load = get_compmake_config('max_mem_load') if cur_mem > max_mem_load: reason = 'mem %s > %s' % (cur_mem, max_mem_load) resource_available['mem'] = (False, reason) # print('Memory load too high: %s\n\n' % cpu_load) else: resource_available['mem'] = (True, '') max_swap = get_compmake_config('max_swap') if cur_swap > max_swap: reason = 'swap %s > %s' % (cur_swap, max_swap) resource_available['swap'] = (False, reason) # print('Memory load too high: %s\n\n' % cpu_load) else: resource_available['swap'] = (True, '') # cooperating between parmake instances: # to balance the jobs, accept with probability # 1 / (1+n), where n is the number of current processes if True: autobal_after = get_compmake_config('autobal_after') n = len(self.processing) q = max(0, n - autobal_after) probability = 1.0 / (1 + q) if random.random() > probability: # Unlucky, let's try next time reason = ('after %d, p=%.2f' % (autobal_after, probability)) resource_available['autobal'] = (False, reason) else: resource_available['autobal'] = (True, '') return resource_available
def disable_logging_if_config(context): """ Disables Multyvac's logging if specified in config. """ import logging if not get_compmake_config('multyvac_debug'): logging.getLogger("multyvac").setLevel(logging.WARNING)