Beispiel #1
0
class JobInfoProcessor(OutputProcessor):
    def __init__(self):
        OutputProcessor.__init__(self)
        self._df = DictFormat()

    def process(self, dn):
        fn = os.path.join(dn, 'job.info')
        if not os.path.exists(fn):
            raise JobResultError('Job result file %r does not exist' % fn)
        try:
            info_content = open(fn, 'r').read()
        except Exception:
            raise JobResultError('Unable to read job result file %r' % fn)
        if not info_content:
            raise JobResultError('Job result file %r is empty' % fn)
        try:
            data = self._df.parse(info_content, keyParser={None: str})
        except Exception:
            raise JobResultError('Unable to parse job result file %r' % fn)
        try:
            jobNum = data.pop('JOBID')
            exitCode = data.pop('EXITCODE')
            return {
                JobResult.JOBNUM: jobNum,
                JobResult.EXITCODE: exitCode,
                JobResult.RAW: data
            }
        except Exception:
            raise JobResultError('Job result file %r is incomplete' % fn)
class JobInfoProcessor(OutputProcessor):
	alias_list = ['jobinfo']

	def __init__(self):
		OutputProcessor.__init__(self)
		self._df = DictFormat()

	def process(self, dn):
		fn = os.path.join(dn, 'job.info')
		try:
			if not os.path.exists(fn):
				raise JobResultError('Job result file %r does not exist' % fn)
			try:
				info_content = SafeFile(fn).read_close()
			except Exception:
				raise JobResultError('Unable to read job result file %r' % fn)
			if not info_content:
				raise JobResultError('Job result file %r is empty' % fn)
			data = self._df.parse(info_content, key_parser={None: str})  # impossible to fail
			try:
				jobnum = data.pop('JOBID')
				exit_code = data.pop('EXITCODE')
				message = data.pop('MESSAGE', None)
				return {JobResult.JOBNUM: jobnum, JobResult.EXITCODE: exit_code,
					JobResult.MESSAGE: message, JobResult.RAW: data}
			except Exception:
				raise JobResultError('Job result file %r is incomplete' % fn)
		except Exception:
			raise JobResultError('Unable to process output directory %r' % dn)
class JobInfoProcessor(OutputProcessor):
    alias_list = ['jobinfo']

    def __init__(self):
        OutputProcessor.__init__(self)
        self._df = DictFormat()

    def process(self, dn):
        fn = os.path.join(dn, 'job.info')
        try:
            if not os.path.exists(fn):
                raise JobResultError('Job result file %r does not exist' % fn)
            try:
                info_content = SafeFile(fn).read_close()
            except Exception:
                raise JobResultError('Unable to read job result file %r' % fn)
            if not info_content:
                raise JobResultError('Job result file %r is empty' % fn)
            data = self._df.parse(info_content,
                                  key_parser={None: str})  # impossible to fail
            try:
                jobnum = data.pop('JOBID')
                exit_code = data.pop('EXITCODE')
                message = data.pop('MESSAGE', None)
                return {
                    JobResult.JOBNUM: jobnum,
                    JobResult.EXITCODE: exit_code,
                    JobResult.MESSAGE: message,
                    JobResult.RAW: data
                }
            except Exception:
                raise JobResultError('Job result file %r is incomplete' % fn)
        except Exception:
            raise JobResultError('Unable to process output directory %r' % dn)
class JobInfoProcessor(OutputProcessor):
	def __init__(self):
		OutputProcessor.__init__(self)
		self._df = DictFormat()

	def process(self, dn):
		fn = os.path.join(dn, 'job.info')
		try:
			if not os.path.exists(fn):
				raise JobResultError('Job result file %r does not exist' % fn)
			try:
				info_content = open(fn, 'r').read()
			except Exception:
				raise JobResultError('Unable to read job result file %r' % fn)
			if not info_content:
				raise JobResultError('Job result file %r is empty' % fn)
			data = self._df.parse(info_content, keyParser = {None: str}) # impossible to fail
			try:
				jobNum = data.pop('JOBID')
				exitCode = data.pop('EXITCODE')
				return {JobResult.JOBNUM: jobNum, JobResult.EXITCODE: exitCode, JobResult.RAW: data}
			except Exception:
				raise JobResultError('Job result file %r is incomplete' % fn)
		except Exception:
			raise JobResultError('Unable to process output directory %r' % dn)
Beispiel #5
0
class TarPartitionReader(FilePartitionReader):
	def __init__(self, path):
		activity = Activity('Reading dataset partition file')
		self._fmt = DictFormat()
		try:
			self._tar = tarfile.open(path, 'r:')

			metadata = self._fmt.parse(self._tar.extractfile('Metadata').readlines(), key_parser={None: str})
			FilePartitionReader.__init__(self, path, metadata.pop('MaxJobs'))
			self._metadata = metadata
			activity.finish()
		except Exception:
			raise PartitionReaderError('No valid dataset splitting found in %s' % path)

		self._map_enum2parser = {
			None: str,
			DataSplitter.NEntries: int, DataSplitter.Skipped: int,
			DataSplitter.Invalid: parse_bool,
			DataSplitter.Locations: lambda x: parse_list(x, ','),
			DataSplitter.MetadataHeader: parse_json,
			DataSplitter.Metadata: lambda x: parse_json(x.strip("'"))
		}
		(self._cache_nested_fn, self._cache_nested_tar) = (None, None)

	def _combine_partition_parts(self, partition, url_list):
		if DataSplitter.CommonPrefix in partition:
			common_prefix = partition.pop(DataSplitter.CommonPrefix)
			url_list = imap(lambda x: '%s/%s' % (common_prefix, x), url_list)
		partition[DataSplitter.FileList] = lmap(str.strip, url_list)
		return partition

	def _get_nested_tar(self, nested_fn):
		if self._cache_nested_fn != nested_fn:  # caching gives 3-4x speedup for sequential access
			self._cache_nested_tar = self._open_nested_tar(nested_fn)
			self._cache_nested_fn = nested_fn
		return self._cache_nested_tar

	def _open_nested_tar(self, nested_fn):
		nested_tar_fp = self._tar.extractfile(nested_fn)
		nested_tar_fp = BytesBuffer(gzip.GzipFile(fileobj=nested_tar_fp).read())
		return tarfile.open(mode='r', fileobj=nested_tar_fp)
class TextFileJobDB(JobDB):
	alias_list = ['textdb']

	def __init__(self, config, job_limit=-1, job_selector=None):
		JobDB.__init__(self, config, job_limit, job_selector)
		self._path_db = config.get_work_path('jobs')
		self._fmt = DictFormat(escape_strings=True)
		try:
			self._job_map = self._read_jobs(self._job_limit)
		except Exception:
			raise JobError('Unable to read stored job information!')
		if self._job_limit < 0 and len(self._job_map) > 0:
			self._job_limit = max(self._job_map) + 1

	def commit(self, jobnum, job_obj):
		with_file(SafeFile(os.path.join(self._path_db, 'job_%d.txt' % jobnum), 'w'),
			lambda fp: fp.writelines(self._fmt.format(self._serialize_job_obj(job_obj))))
		self._job_map[jobnum] = job_obj

	def get_job(self, jobnum):
		return self._job_map.get(jobnum)

	def get_job_persistent(self, jobnum):
		return self._job_map.get(jobnum, Job())

	def get_job_transient(self, jobnum):
		return self._job_map.get(jobnum, self._default_job_obj)

	def _create_job_obj(self, name, data):
		try:
			job = Job()
			job.state = Job.str2enum(data.pop('status'), Job.UNKNOWN)

			if 'id' in data:
				gc_id = data.pop('id')
				if not gc_id.startswith('WMSID'):  # Legacy support
					data['legacy_gc_id'] = gc_id
					if gc_id.startswith('https'):
						gc_id = 'WMSID.GLITEWMS.%s' % gc_id
					else:
						wms_id, wms_name = tuple(gc_id.split('.', 1))
						gc_id = 'WMSID.%s.%s' % (wms_name, wms_id)
				job.gc_id = gc_id

			for key in ['attempt', 'submitted', 'changed']:
				if key in data:
					setattr(job, key, data[key])
			if 'runtime' not in data:
				if 'submitted' in data and (job.submitted > 0):
					data['runtime'] = time.time() - float(job.submitted)
				else:
					data['runtime'] = 0
			for key in irange(1, job.attempt + 1):
				if ('history_' + str(key)).strip() in data:
					job.history[key] = data['history_' + str(key)]
			job.set_dict(data)
		except Exception:
			raise JobError('Unable to parse data in %s:\n%r' % (name, data))
		return job

	def _read_jobs(self, job_limit):
		ensure_dir_exists(self._path_db, 'job database directory', JobError)

		candidates = []
		for job_fn in fnmatch.filter(os.listdir(self._path_db), 'job_*.txt'):
			try:  # 2xsplit is faster than regex
				jobnum = int(job_fn.split(".")[0].split("_")[1])
			except Exception:
				clear_current_exception()
				continue
			candidates.append((jobnum, job_fn))

		(job_map, max_job_len) = ({}, len(candidates))
		activity = Activity('Reading job infos')
		idx = 0
		for (jobnum, job_fn) in sorted(candidates):
			idx += 1
			if jobnum >= job_limit >= 0:
				self._log.info('Stopped reading job infos at job #%d out of %d available job files, ' +
					'since the limit of %d jobs is reached', jobnum, len(candidates), job_limit)
				break
			try:
				job_fn_full = os.path.join(self._path_db, job_fn)
				data = self._fmt.parse(SafeFile(job_fn_full).iter_close())
				job_obj = self._create_job_obj(job_fn_full, data)
			except Exception:
				raise JobError('Unable to process job file %r' % job_fn_full)
			job_map[jobnum] = job_obj
			activity.update('Reading job infos %d [%d%%]' % (idx, (100.0 * idx) / max_job_len))
		activity.finish()
		return job_map

	def _serialize_job_obj(self, job_obj):
		data = job_obj.get_dict_full()
		for key, value in job_obj.history.items():
			data['history_' + str(key)] = value
		if job_obj.gc_id is not None:
			data['id'] = job_obj.get('legacy_gc_id') or job_obj.gc_id  # store legacy gc_id
		return data
Beispiel #7
0
class TextFileJobDB(JobDB):
    alias_list = ['textdb']

    def __init__(self, config, job_limit=-1, job_selector=None):
        JobDB.__init__(self, config, job_limit, job_selector)
        self._path_db = config.get_work_path('jobs')
        self._fmt = DictFormat(escape_strings=True)
        try:
            self._job_map = self._read_jobs(self._job_limit)
        except Exception:
            raise JobError('Unable to read stored job information!')
        if self._job_limit < 0 and len(self._job_map) > 0:
            self._job_limit = max(self._job_map) + 1

    def commit(self, jobnum, job_obj):
        with_file(
            SafeFile(os.path.join(self._path_db, 'job_%d.txt' % jobnum), 'w'),
            lambda fp: fp.writelines(
                self._fmt.format(self._serialize_job_obj(job_obj))))
        self._job_map[jobnum] = job_obj

    def get_job(self, jobnum):
        return self._job_map.get(jobnum)

    def get_job_persistent(self, jobnum):
        return self._job_map.get(jobnum, Job())

    def get_job_transient(self, jobnum):
        return self._job_map.get(jobnum, self._default_job_obj)

    def _create_job_obj(self, name, data):
        try:
            job = Job()
            job.state = Job.str2enum(data.pop('status'), Job.UNKNOWN)

            if 'id' in data:
                gc_id = data.pop('id')
                if not gc_id.startswith('WMSID'):  # Legacy support
                    data['legacy_gc_id'] = gc_id
                    if gc_id.startswith('https'):
                        gc_id = 'WMSID.GLITEWMS.%s' % gc_id
                    else:
                        wms_id, wms_name = tuple(gc_id.split('.', 1))
                        gc_id = 'WMSID.%s.%s' % (wms_name, wms_id)
                job.gc_id = gc_id

            for key in ['attempt', 'submitted', 'changed']:
                if key in data:
                    setattr(job, key, data[key])
            if 'runtime' not in data:
                if 'submitted' in data and (job.submitted > 0):
                    data['runtime'] = time.time() - float(job.submitted)
                else:
                    data['runtime'] = 0
            for key in irange(1, job.attempt + 1):
                if ('history_' + str(key)).strip() in data:
                    job.history[key] = data['history_' + str(key)]
            job.set_dict(data)
        except Exception:
            raise JobError('Unable to parse data in %s:\n%r' % (name, data))
        return job

    def _read_jobs(self, job_limit):
        ensure_dir_exists(self._path_db, 'job database directory', JobError)

        candidates = []
        for job_fn in fnmatch.filter(os.listdir(self._path_db), 'job_*.txt'):
            try:  # 2xsplit is faster than regex
                jobnum = int(job_fn.split(".")[0].split("_")[1])
            except Exception:
                clear_current_exception()
                continue
            candidates.append((jobnum, job_fn))

        (job_map, max_job_len) = ({}, len(candidates))
        activity = Activity('Reading job infos')
        idx = 0
        for (jobnum, job_fn) in sorted(candidates):
            idx += 1
            if jobnum >= job_limit >= 0:
                self._log.info(
                    'Stopped reading job infos at job #%d out of %d available job files, '
                    + 'since the limit of %d jobs is reached', jobnum,
                    len(candidates), job_limit)
                break
            try:
                job_fn_full = os.path.join(self._path_db, job_fn)
                data = self._fmt.parse(SafeFile(job_fn_full).iter_close())
                job_obj = self._create_job_obj(job_fn_full, data)
            except Exception:
                raise JobError('Unable to process job file %r' % job_fn_full)
            job_map[jobnum] = job_obj
            activity.update('Reading job infos %d [%d%%]' %
                            (idx, (100.0 * idx) / max_job_len))
        activity.finish()
        return job_map

    def _serialize_job_obj(self, job_obj):
        data = job_obj.get_dict_full()
        for key, value in job_obj.history.items():
            data['history_' + str(key)] = value
        if job_obj.gc_id is not None:
            data['id'] = job_obj.get(
                'legacy_gc_id') or job_obj.gc_id  # store legacy gc_id
        return data