def post(self, request): _post = request.POST my_sge_info = get_sge_info() my_sge_info.update() _salt_addons(request) rms_info = _fetch_rms_info(request) latest_possible_end_time = cluster_timezone.localize( datetime.datetime.fromtimestamp(int(_post["jobinfo_jobsfrom"]))) done_jobs = rms_job_run.objects.all().filter( Q(end_time__gt=latest_possible_end_time)).select_related("rms_job") def xml_to_jobid(jobxml): return [ int(jobxml.findall("job_id")[0].text), jobxml.findall("task_id")[0].text ] json_resp = { "jobs_running": sorted(map(xml_to_jobid, rms_info.run_job_list)), "jobs_waiting": sorted(map(xml_to_jobid, rms_info.wait_job_list)), "jobs_finished": sorted([ job.rms_job.jobid, job.rms_job.taskid if job.rms_job.taskid else "" ] for job in done_jobs), } return HttpResponse(json.dumps(json_resp), content_type="application/json")
def close_job_run(self): _latest_run = self.get_latest_job_run() if _latest_run: _latest_run.end_time_py = cluster_timezone.localize( datetime.datetime.now()) _latest_run.save(update_fields=["end_time_py"]) return _latest_run
def _get_missing_dict(self): # clean old jobs without a valid accounting log invalid_runs = rms_job_run.objects.filter( Q(qacct_called=False) & Q(end_time=None) & Q(start_time=None) & Q(start_time_py__lt=cluster_timezone.localize(datetime.datetime.now()) - datetime.timedelta(seconds=31 * 24 * 3600)) ) self.log("invalid runs found: {:d}".format(invalid_runs.count())) _missing_ids = rms_job_run.objects.filter( Q(qacct_called=False) ).values_list( "idx", "rms_job__jobid", "rms_job__taskid" ) _mis_dict = {} for _entry in _missing_ids: if _entry[2]: _id = "{:d}.{:d}".format( _entry[1], _entry[2], ) else: _id = "{:d}".format(_entry[1]) _mis_dict.setdefault(_id, []).append(_entry[0]) return _mis_dict
def add_job_run(self, _dev_name, _dev): new_run = rms_job_run( rms_job=self, device=_dev, hostname=_dev_name, start_time_py=cluster_timezone.localize(datetime.datetime.now()), ) return new_run
def retrieve(self, request, *args, **kwargs): timespans = _device_status_history_util.get_timespans_db_from_request( request) if len(timespans): data = { 'status': 'found', 'start': timespans[0].start_date, 'end': timespans[0].end_date, "db_ids": [timespans[0].idx], # partial data found "partial": False, } else: data = {'status': 'not found'} start, end, duration_type = _device_status_history_util.get_timespan_tuple_from_request( request) # return most recent data type if this type is not yet finished try: latest_timespan_db = mon_icinga_log_aggregated_timespan.objects.filter( duration_type=duration_type.ID).latest('start_date') except mon_icinga_log_aggregated_timespan.DoesNotExist: pass # no data at all, can't do anything useful else: date = duration_utils.parse_date(request.GET["date"]) # check for current datetime in the requested timespan _now = cluster_timezone.localize(datetime.datetime.now()) _now_covered = start < _now < end if _now_covered: # print "Now covered" shorter_duration = duration_type.get_shorter_duration() _shorter = list( mon_icinga_log_aggregated_timespan.objects.filter( duration_type=shorter_duration.ID, start_date__range=(start, end - datetime.timedelta( seconds=1))).order_by("start_date")) if len(_shorter): data = { "start": _shorter[0].start_date, "end": _shorter[-1].end_date, "status": "found", "db_ids": [_db.idx for _db in _shorter], "partial": True, } else: # check for earlier data # print latest_timespan_db.end_date, date, latest_timespan_db.end_date < date if latest_timespan_db.end_date < date: data = { 'status': 'found earlier', 'start': latest_timespan_db.start_date, "db_ids": [latest_timespan_db.idx], 'end': latest_timespan_db.end_date, "partial": False, } # print data return Response(data)
def close(self): _run_time = cluster_timezone.localize( datetime.datetime.now()) - cluster_timezone.normalize(self.date) _run_time = _run_time.microseconds / 1000 + 1000 * _run_time.seconds self.active = False self.run_time = _run_time self.save() # close current lock and return a list of (what, level) lines return [("closed {}".format(unicode(self)), logging_tools.LOG_LEVEL_OK) ]
def set_value(self, value): if type(value) == datetime.datetime: self.var_type = "d" self.val_date = cluster_timezone.localize(value) elif type(value) in [int, long] or (isinstance(value, basestring) and value.isdigit()): self.var_type = "i" self.val_int = int(value) else: self.var_type = "s" self.val_str = value self._clear()
def _interpret_qacct(self, cur_out, needed): _found, _matched = (0, 0) _dict_list = [] _dict = {} for _line in cur_out.split("\n"): if _line.startswith("==="): if "jobnumber" in _dict: _found += 1 _matched += self._feed_qacct(_dict) _dict_list.append(_dict) _dict = {} else: if _line.strip(): _parts = _line.strip().split(None, 1) if len(_parts) > 1: # simple cleanup _key, _value = _parts if _value.isdigit(): _value = int(_value) elif _value in ["NONE", "undefined", "-/-"]: _value = None elif _key.endswith("time") and len(_value.split()) > 4: _value = cluster_timezone.localize(datetime.datetime.strptime(_value, "%a %b %d %H:%M:%S %Y")) _dict[_key] = _value if self["exit_requested"]: self.log("exiting accounting loop due to exit requst", logging_tools.LOG_LEVEL_WARN) break if "jobnumber" in _dict: _found += 1 _matched += self._feed_qacct(_dict) _dict_list.append(_dict) if needed == _found and not _matched: # print _dict_list[0] _to_del = rms_job_run.objects.filter( Q(rms_job__jobid=_dict_list[0]["jobnumber"]) & Q(rms_job__taskid=_dict_list[0]["taskid"]) ) self.log( " all matches found, removing old rms_job_run entries ({:d})".format( _to_del.count() ) ) _to_del.delete() _matched = 0 for _dict in _dict_list: _matched += self._feed_qacct(_dict, force=True) return _found, _matched
def create_bg_job(server_pk, user_obj, cmd, cause, obj, **kwargs): # late import to break import loop from initat.cluster.backbone.models import background_job, device, BackgroundJobState srv_com = server_command.srv_command(command=cmd, ) timeout = kwargs.get("timeout", 60 * 5) _bld = srv_com.builder() if obj is None: obj_list = None elif isinstance(obj, list): obj_list = obj cause = "{} of {}".format( cause, logging_tools.get_plural("object", len(obj_list))) else: obj_list = [obj] cause = "{} of {}".format(cause, str(obj)) if obj_list is not None: srv_com[None] = _bld.objects(*[ _bld.object( str(obj), model=obj._meta.model_name, app=obj._meta.app_label, pk="{:d}".format(obj.pk), ) for obj in obj_list ]) # print "***", server_pk _new_job = background_job( command=cmd, cause=cause[:255], options=kwargs.get("options", ""), initiator=device.objects.get(Q(pk=server_pk)), user=user_obj, command_xml=str(srv_com), num_objects=len(obj_list) if obj_list else 0, # valid for 4 hours valid_until=cluster_timezone.localize(datetime.datetime.now() + datetime.timedelta( seconds=timeout)), ) _new_job.set_state(kwargs.get("state", BackgroundJobState.pre_init)) # print srv_com.pretty_print() return _new_job
def consume(self, request): if request.is_lock: # is a lock, we dont consume anything request.weight = 0 request.valid = False else: # consum from egg_consumer # if request.valid is False, try to consume it # if request.valid is True, check the target weight _target_weight = self.multiplier if not request.valid: _to_consume = _target_weight else: _to_consume = _target_weight - request.weight if _to_consume: # something to consume, resolve egg_cradle _avail = self.egg_cradle.available else: _avail = 1 if _avail > _to_consume: if _to_consume: # nothing to consume (request was already fullfilled) self.egg_cradle.available -= _to_consume self.consumed += _to_consume self.save(update_fields=["consumed"]) self.egg_cradle.save(update_fields=["available"]) if self.timeframe_secs: request.valid_until = cluster_timezone.localize( datetime.datetime.now() + datetime.timedelta(seconds=self.timeframe_secs)) else: request.valid_until = None request.valid = True else: request.valid = False request.weight = _target_weight request.save(update_fields=["weight", "valid", "valid_until"]) return request.valid
def create_bg_job(server_pk, user_obj, cmd, cause, obj, **kwargs): # late import to break import loop from initat.cluster.backbone.models import background_job, device srv_com = server_command.srv_command( command=cmd, ) _bld = srv_com.builder() if type(obj) == list: obj_list = obj else: obj_list = [obj] srv_com[None] = _bld.objects( *[ _bld.object( unicode(obj), model=obj._meta.model_name, app=obj._meta.app_label, pk="{:d}".format(obj.pk), ) for obj in obj_list ] ) # print "***", server_pk _new_job = background_job.objects.create( command=cmd, cause=u"{} of '{}'".format(cause, unicode(obj))[:255], state="pre-init", options=kwargs.get("options", ""), initiator=device.objects.get(Q(pk=server_pk)), user=user_obj, command_xml=unicode(srv_com), num_objects=len(obj_list), # valid for 4 hours valid_until=cluster_timezone.localize(datetime.datetime.now() + datetime.timedelta(seconds=60 * 5)), # 3600 * 4)), ) # print srv_com.pretty_print() return _new_job
def ok(self): self.end = cluster_timezone.localize(datetime.datetime.now()) self.success = True self.save(update_fields=["end", "success"])