Ejemplo n.º 1
0
	def _refresh_access_token(self):
		timeleft_before = str_time_long(self._get_timeleft(cached=False))
		LocalProcess(self._kinit_exec, '-R').finish(timeout=10)
		LocalProcess(self._aklog_exec).finish(timeout=10)
		timeleft_after = str_time_long(self._get_timeleft(cached=False))
		self._log.log(logging.INFO2, 'Time left for access token "%s" changed from %s to %s',
			self.get_object_name(), timeleft_before, timeleft_after)
Ejemplo n.º 2
0
 def can_submit(self, needed_time, can_currently_submit):
     if self._get_timeleft(cached=True) < 0:
         raise UserError(
             'Your access token (%s) expired %s ago! (Required lifetime: %s)'
             % (self.get_object_name(),
                str_time_long(-self._get_timeleft(cached=True)),
                str_time_long(self._min_life_time)))
     if not self._check_time_left(self._min_life_time):
         raise UserError(
             'Your access token (%s) only has %d seconds left! (Required are %s)'
             % (self.get_object_name(), self._get_timeleft(cached=True),
                str_time_long(self._min_life_time)))
     if self._ignore_time or (needed_time < 0):
         return True
     if not self._check_time_left(self._min_life_time +
                                  needed_time) and can_currently_submit:
         self._log.log_time(
             logging.WARNING,
             'Access token (%s) lifetime (%s) does not meet the access and walltime (%s) requirements!',
             self.get_object_name(),
             str_time_long(self._get_timeleft(cached=False)),
             str_time_long(self._min_life_time + needed_time))
         self._log.log_time(logging.WARNING, 'Disabling job submission')
         return False
     return True
Ejemplo n.º 3
0
	def can_submit(self, needed_time, can_currently_submit):
		if not self._check_time_left(self._min_life_time):
			raise UserError('Your access token (%s) only has %d seconds left! (Required are %s)' %
				(self.get_object_name(), self._get_timeleft(cached=True), str_time_long(self._min_life_time)))
		if self._ignore_time or (needed_time < 0):
			return True
		if not self._check_time_left(self._min_life_time + needed_time) and can_currently_submit:
			self._log.log_time(logging.WARNING,
				'Access token (%s) lifetime (%s) does not meet the access and walltime (%s) requirements!',
				self.get_object_name(), str_time_long(self._get_timeleft(cached=False)),
				str_time_long(self._min_life_time + needed_time))
			self._log.log_time(logging.WARNING, 'Disabling job submission')
			return False
		return True
Ejemplo n.º 4
0
	def __init__(self, config, datasource_name, repository, keep_old=True):
		BaseDataParameterSource.__init__(self, config, datasource_name, repository)

		# hide provider property set by __new__
		self._provider = self.provider
		del self.provider

		if self._provider.need_init_query():
			self._provider.get_block_list_cached(show_stats=False)

		data_src_text = 'Dataset source %r' % datasource_name
		# Select dataset refresh rate
		data_refresh = config.get_time('%s refresh' % datasource_name, -1, on_change=None)
		if data_refresh >= 0:
			data_refresh = max(data_refresh, self._provider.get_query_interval())
			self._log.info('%s will be queried every %s', data_src_text, str_time_long(data_refresh))
		self.setup_resync(interval=data_refresh, force=config.get_state('resync', detail='datasets'))

		splitter_name = config.get('%s splitter' % datasource_name, 'FileBoundarySplitter')
		splitter_cls = self._provider.check_splitter(DataSplitter.get_class(splitter_name))
		self._splitter = splitter_cls(config, datasource_name)

		# Settings:
		(self._dn, self._keep_old) = (config.get_work_path(), keep_old)
		ensure_dir_exists(self._dn, 'partition map directory', DatasetError)
		self._set_reader(self._init_reader())

		if not self.get_parameter_len():
			if data_refresh < 0:
				raise UserError('%s does not provide jobs to process' % data_src_text)
			self._log.warning('%s does not provide jobs to process', data_src_text)
Ejemplo n.º 5
0
	def _check_time_left(self, needed_time):  # check for time left
		delta = time.time() - self._last_update
		timeleft = max(0, self._get_timeleft(cached=True) - delta)
		# recheck token => after > 30min have passed or when time is running out (max every 5 minutes)
		if (delta > self._min_query_time) or (timeleft < needed_time and delta > self._max_query_time):
			self._last_update = time.time()
			timeleft = self._get_timeleft(cached=False)
			self._log.log_time(logging.INFO, 'Time left for access token "%s": %s',
				self.get_object_name(), str_time_long(timeleft))
		return timeleft >= needed_time
Ejemplo n.º 6
0
	def _check_time_left(self, needed_time):  # check for time left
		delta = time.time() - self._last_update
		timeleft = max(0, self._get_timeleft(cached=True) - delta)
		# recheck token => after > 30min have passed or when time is running out (max every 5 minutes)
		if (delta > self._min_query_time) or (timeleft < needed_time and delta > self._max_query_time):
			self._last_update = time.time()
			timeleft = self._get_timeleft(cached=False)
			self._log.log_time(logging.INFO, 'Time left for access token "%s": %s',
				self.get_object_name(), str_time_long(timeleft))
		return timeleft >= needed_time
Ejemplo n.º 7
0
	def _explain_failure(self, task, job_obj):
		map_error_code2msg = dict(task.map_error_code2msg)
		msg_list = []
		exit_code = job_obj.get('retcode')
		if exit_code:
			msg_list.append('error code: %d' % exit_code)
			if self._log_status.isEnabledFor(logging.DEBUG) and (exit_code in map_error_code2msg):
				msg_list.append(map_error_code2msg[exit_code])
		job_location = job_obj.get_job_location()
		if job_location:
			msg_list.append(job_location)
		if (job_obj.get('runtime') is not None) and ((job_obj.get('runtime') or 0) >= 0):
			msg_list.append('runtime %s' % str_time_long(job_obj.get('runtime') or 0))
		return str.join(' - ', msg_list)
Ejemplo n.º 8
0
    def __init__(self, config, datasource_name, repository, keep_old=True):
        BaseDataParameterSource.__init__(self, config, datasource_name,
                                         repository)

        # hide provider property set by __new__
        self._provider = self.provider
        del self.provider

        if self._provider.need_init_query():
            self._provider.get_block_list_cached(show_stats=False)

        data_src_text = 'Dataset source %r' % datasource_name
        # Select dataset refresh rate
        data_refresh = config.get_time('%s refresh' % datasource_name,
                                       -1,
                                       on_change=None)
        if data_refresh >= 0:
            data_refresh = max(data_refresh,
                               self._provider.get_query_interval())
            self._log.info('%s will be queried every %s', data_src_text,
                           str_time_long(data_refresh))
        self.setup_resync(interval=data_refresh,
                          force=config.get_state('resync', detail='datasets'))

        splitter_name = config.get('%s splitter' % datasource_name,
                                   'FileBoundarySplitter')
        splitter_cls = self._provider.check_splitter(
            DataSplitter.get_class(splitter_name))
        self._splitter = splitter_cls(config, datasource_name)

        # Settings:
        (self._dn, self._keep_old) = (config.get_work_path(), keep_old)
        ensure_dir_exists(self._dn, 'partition map directory', DatasetError)
        self._set_reader(self._init_reader())

        if not self.get_parameter_len():
            if data_refresh < 0:
                raise UserError('%s does not provide jobs to process' %
                                data_src_text)
            self._log.warning('%s does not provide jobs to process',
                              data_src_text)
Ejemplo n.º 9
0
    def on_job_state_change(self,
                            job_db_len,
                            jobnum,
                            job_obj,
                            old_state,
                            new_state,
                            reason=None):
        jobnum_len = int(math.log10(max(1, job_db_len)) + 1)
        job_status_str_list = [
            'Job %s state changed from %s to %s' %
            (str(jobnum).ljust(jobnum_len), Job.enum2str(old_state),
             Job.enum2str(new_state))
        ]

        if reason:
            job_status_str_list.append('(%s)' % reason)
        if self._show_wms and job_obj.gc_id:
            job_status_str_list.append('(WMS:%s)' %
                                       job_obj.gc_id.split('.')[1])
        if (new_state == Job.SUBMITTED) and (job_obj.attempt > 1):
            job_status_str_list.append('(retry #%s)' % (job_obj.attempt - 1))
        elif (new_state
              == Job.QUEUED) and (job_obj.get_job_location() != 'N/A'):
            job_status_str_list.append('(%s)' % job_obj.get_job_location())
        elif (new_state in [Job.WAITING, Job.ABORTED, Job.DISABLED
                            ]) and job_obj.get('reason'):
            job_status_str_list.append('(%s)' % job_obj.get('reason'))
        elif (new_state == Job.SUCCESS) and (job_obj.get('runtime')
                                             is not None):
            if (job_obj.get('runtime') or 0) >= 0:
                job_status_str_list.append(
                    '(runtime %s)' %
                    str_time_long(job_obj.get('runtime') or 0))
        elif new_state == Job.FAILED:
            fail_msg = self._explain_failure(job_obj)
            if fail_msg:
                job_status_str_list.append('(%s)' % fail_msg)
        self._log_status.log_time(logging.INFO,
                                  str.join(' ', job_status_str_list))
Ejemplo n.º 10
0
	def on_job_state_change(self, task, job_db_len,
			jobnum, job_obj, old_state, new_state, reason=None):
		jobnum_len = int(math.log10(max(1, job_db_len)) + 1)
		job_status_str_list = ['Job %s state changed from %s to %s' % (
			str(jobnum).ljust(jobnum_len), Job.enum2str(old_state), Job.enum2str(new_state))]

		if reason:
			job_status_str_list.append('(%s)' % reason)
		if self._show_wms and job_obj.gc_id:
			job_status_str_list.append('(WMS:%s)' % job_obj.gc_id.split('.')[1])
		if (new_state == Job.SUBMITTED) and (job_obj.attempt > 1):
			job_status_str_list.append('(retry #%s)' % (job_obj.attempt - 1))
		elif (new_state == Job.QUEUED) and (job_obj.get_job_location() != 'N/A'):
			job_status_str_list.append('(%s)' % job_obj.get_job_location())
		elif (new_state in [Job.WAITING, Job.ABORTED, Job.DISABLED]) and job_obj.get('reason'):
			job_status_str_list.append('(%s)' % job_obj.get('reason'))
		elif (new_state == Job.SUCCESS) and (job_obj.get('runtime') is not None):
			if (job_obj.get('runtime') or 0) >= 0:
				job_status_str_list.append('(runtime %s)' % str_time_long(job_obj.get('runtime') or 0))
		elif new_state == Job.FAILED:
			fail_msg = self._explain_failure(task, job_obj)
			if fail_msg:
				job_status_str_list.append('(%s)' % fail_msg)
		self._log_status.log_time(logging.INFO, str.join(' ', job_status_str_list))
Ejemplo n.º 11
0
	def show_report(self, job_db, jobnum_list):
		jr_iter = imap(lambda jobnum: job_db.get_job_transient(jobnum).get('runtime', 0), jobnum_list)
		cpu_time = sum(ifilter(lambda rt: rt > 0, jr_iter))
		msg1 = 'Consumed wall time: %-20s' % str_time_long(cpu_time)
		msg2 = 'Estimated cost: $%.2f' % ((cpu_time / 60. / 60.) * self._dollar_per_hour)
		self._show_line(msg1 + msg2.rjust(65 - len(msg1)))