def __init__(self, *args, **kwargs): ClusterBaseSystem.__init__(self, *args, **kwargs) self.sleep_interval = kwargs.get("sleep_interval", 0) self.fraction = kwargs.get("cluster_fraction", 1) self.sim_start = kwargs.get("c_trace_start", 0) self.sim_end = kwargs.get("c_trace_end", sys.maxint) self.anchor = kwargs.get("anchor", 0) self.workload_file = kwargs.get("cjob") self.output_log = MACHINE_NAME + "-" + kwargs.get("outputlog", "") self.bgjob = kwargs.get("bgjob") self.event_manager = ComponentProxy("event-manager") walltime_prediction = get_histm_config("walltime_prediction", False) # *AdjEst* print "walltime_prediction=", walltime_prediction if walltime_prediction in ["True", "true"]: self.walltime_prediction = True else: self.walltime_prediction = False self.time_stamps = [('I', '0', 0, {})] self.cur_time_index = 0 self.queues = SimQueueDict(policy=None) # self.invisible_job_dict = {} # for jobs not submitted, {jobid:job_instance} self.unsubmitted_job_spec_dict = {} #{jobid: jobspec} self.num_running = 0 self.num_waiting = 0 self.num_busy = 0 self.num_end = 0 self.total_job = 0 self.total_nodes = len(self.all_nodes) self.init_queues() #initialize PBS-style logger self.pbslog = PBSlogger(self.output_log) #initialize debug logger if self.output_log: self.dbglog = PBSlogger(self.output_log+"-debug") else: self.dbglog = PBSlogger(".debug") #finish tag self.finished = False #register local alias "system" for this component local_components["cluster-system"] = self #initialize capacity loss self.capacity_loss = 0 #starting job(id)s at current time stamp. used for calculating capacity loss self.starting_jobs = [] self.user_utility_functions = {} self.builtin_utility_functions = {} self.define_builtin_utility_functions() self.define_user_utility_functions() self.cosched_scheme_tup = kwargs.get("coscheduling", (0,0)) self.cosched_scheme = self.cosched_scheme_tup[1] self.cosched_scheme_remote = self.cosched_scheme_tup[0] self.mate_vicinity = kwargs.get("vicinity", 0) self.mate_ratio = kwargs.get("mate_ratio", 0) valid_cosched_schemes = ["hold", "yield"] if self.cosched_scheme in valid_cosched_schemes and self.cosched_scheme_remote in valid_cosched_schemes: self.coscheduling = True else: self.coscheduling = False if not kwargs.get("bgjob", None): self.coscheduling = False self.mate_job_dict = {} if self.coscheduling: self.jobid_qtime_pairs = self.init_jobid_qtime_pairs() try: self.remote_jobid_qtime_pairs = ComponentProxy(REMOTE_QUEUE_MANAGER).get_jobid_qtime_pairs() except: self.logger.error("fail to connect to remote queue-manager component!") self.coscheduling = False if self.mate_vicinity: print "start init mate job dict, vicinity=", self.mate_vicinity self.init_mate_job_dict_by_vicinity() elif self.mate_ratio: print "start init mate job dict, mate_ratio=", self.mate_ratio self.init_mate_job_dict_by_ratio(self.mate_ratio) else: self.logger.error("fail to initialize mate job dict!") matejobs = len(self.mate_job_dict.keys()) proportion = float(matejobs) / self.total_job #recording holding job id and holden resource self.job_hold_dict = {} #record holding job's holding time jobid:first hold (sec) self.first_hold_time_dict = {} #record yield jobs's first yielding time, for calculating the extra waiting time self.first_yield_hold_time_dict = {} #record yield job ids. update dynamically self.yielding_job_list = [] if self.coscheduling: remote_mate_job_dict = dict((v,k) for k, v in self.mate_job_dict.iteritems()) try: ComponentProxy(REMOTE_QUEUE_MANAGER).set_mate_job_dict(remote_mate_job_dict) except: self.logger.error("failed to connect to remote queue-manager component!") self.coscheduling = False print "number of mate job pairs: %s, proportion in cluster jobs: %s%%" \ % (len(self.mate_job_dict.keys()), round(proportion *100, 1) ) self.max_holding_sys_util = DEFAULT_MAX_HOLDING_SYS_UTIL
def __init__ (self, *args, **kwargs): ClusterBaseSystem.__init__(self, *args, **kwargs) self.process_groups.item_cls = ClusterProcessGroup
def __setstate__(self, state): ClusterBaseSystem.__setstate__(self, state) self.process_groups.item_cls = ClusterProcessGroup
def __getstate__(self): state = {} state.update(ClusterBaseSystem.__getstate__(self)) # state.update({ # "cluster_system_version": 1 }) return state