Esempio n. 1
0
 def getEntries(self, path, metadata, events, seList, objStore):
     jobNum = metadata['GC_JOBNUM']
     tar = tarfile.open(os.path.join(path, 'cmssw.dbs.tar.gz'), 'r')
     # Collect infos about transferred files
     fileSummaryMap = {}
     try:
         for rawdata in imap(str.split,
                             tar.extractfile('files').readlines()):
             fileSummaryMap[rawdata[2]] = {
                 'SE_OUTPUT_HASH_CRC32': rawdata[0],
                 'SE_OUTPUT_SIZE': int(rawdata[1])
             }
         objStore['CMSSW_FILES'] = fileSummaryMap
     except Exception:
         raise DatasetError('Could not read CMSSW file infos for job %d!' %
                            jobNum)
     # Collect infos about CMSSW processing steps
     cfgSummaryMap = {}
     self._processSteps(jobNum, tar, cfgSummaryMap, fileSummaryMap)
     for cfg in cfgSummaryMap:
         metadata.setdefault('CMSSW_CONFIG_JOBHASH', []).append(
             cfgSummaryMap[cfg]['CMSSW_CONFIG_HASH'])
     objStore.update({
         'CMSSW_CONFIG': cfgSummaryMap,
         'CMSSW_FILES': fileSummaryMap
     })
     tar.close()
     yield (path, metadata, events, seList, objStore)
Esempio n. 2
0
def create_tarball(match_info_iter, **kwargs):
    tar = tarfile.open(mode='w:gz', **kwargs)
    activity = Activity('Generating tarball')
    for match_info in match_info_iter:
        if isinstance(match_info, tuple):
            (path_source, path_target) = match_info
        else:
            (path_source, path_target) = (match_info, None)
        if isinstance(path_source, str):
            if not os.path.exists(path_source):
                raise PathError('File %s does not exist!' % path_source)
            tar.add(path_source,
                    path_target or os.path.basename(path_source),
                    recursive=False)
        elif path_source is None:  # Update activity
            activity.update('Generating tarball: %s' % path_target)
        else:  # File handle
            info, handle = path_source.get_tar_info()
            if path_target:
                info.name = path_target
            info.mtime = time.time()
            info.mode = stat.S_IRUSR + stat.S_IWUSR + stat.S_IRGRP + stat.S_IROTH
            if info.name.endswith('.sh') or info.name.endswith('.py'):
                info.mode += stat.S_IXUSR + stat.S_IXGRP + stat.S_IXOTH
            tar.addfile(info, handle)
            handle.close()
    activity.finish()
    tar.close()
Esempio n. 3
0
def create_tarball(match_info_iter, **kwargs):
	tar = tarfile.open(mode='w:gz', **kwargs)
	activity = Activity('Generating tarball')
	for match_info in match_info_iter:
		if isinstance(match_info, tuple):
			(path_source, path_target) = match_info
		else:
			(path_source, path_target) = (match_info, None)
		if isinstance(path_source, str):
			if not os.path.exists(path_source):
				raise PathError('File %s does not exist!' % path_source)
			tar.add(path_source, path_target or os.path.basename(path_source), recursive=False)
		elif path_source is None:  # Update activity
			activity.update('Generating tarball: %s' % path_target)
		else:  # File handle
			info, handle = path_source.get_tar_info()
			if path_target:
				info.name = path_target
			info.mtime = time.time()
			info.mode = stat.S_IRUSR + stat.S_IWUSR + stat.S_IRGRP + stat.S_IROTH
			if info.name.endswith('.sh') or info.name.endswith('.py'):
				info.mode += stat.S_IXUSR + stat.S_IXGRP + stat.S_IXOTH
			tar.addfile(info, handle)
			handle.close()
	activity.finish()
	tar.close()
Esempio n. 4
0
def genTarball(outFile, fileList):
    tar = tarfile.open(outFile, 'w:gz')
    activity = None
    for (pathAbs, pathRel, pathStatus) in fileList:
        if pathStatus is True:  # Existing file
            tar.add(pathAbs, pathRel, recursive=False)
        elif pathStatus is False:  # Existing file
            if not os.path.exists(pathAbs):
                raise UserError('File %s does not exist!' % pathRel)
            tar.add(pathAbs, pathRel, recursive=False)
        elif pathStatus is None:  # Directory
            del activity
            msg = QM(
                len(pathRel) > 50, pathRel[:15] + '...' + pathRel[-32:],
                pathRel)
            activity = ActivityLog('Generating tarball: %s' % msg)
        else:  # File handle
            info, handle = pathStatus.getTarInfo()
            info.mtime = time.time()
            info.mode = stat.S_IRUSR + stat.S_IWUSR + stat.S_IRGRP + stat.S_IROTH
            if info.name.endswith('.sh') or info.name.endswith('.py'):
                info.mode += stat.S_IXUSR + stat.S_IXGRP + stat.S_IXOTH
            tar.addfile(info, handle)
            handle.close()
    del activity
    tar.close()
Esempio n. 5
0
	def _iter_datasource_items(self, item, metadata_dict, entries, location_list, obj_dict):
		jobnum = metadata_dict['GC_JOBNUM']
		cms_log_fn = os.path.join(item, 'cmssw.dbs.tar.gz')
		if os.path.exists(cms_log_fn):
			tar = tarfile.open(cms_log_fn, 'r')
			# Collect infos about transferred files
			file_summary_map = {}
			try:
				file_info_str_list = tar.extractfile('files').readlines()
				for rawdata in imap(lambda value: bytes2str(value).split(), file_info_str_list):
					file_summary_map[rawdata[2]] = {
						'SE_OUTPUT_HASH_CRC32': rawdata[0],
						'SE_OUTPUT_SIZE': int(rawdata[1])
					}
				obj_dict['CMSSW_FILES'] = file_summary_map
			except Exception:
				raise DatasetError('Could not read CMSSW file infos for job %d!' % jobnum)
			# Collect infos about CMSSW processing steps
			config_summary_map = {}
			self._process_steps(jobnum, tar, config_summary_map, file_summary_map)
			for cfg in config_summary_map:
				job_hash_list = metadata_dict.setdefault('CMSSW_CONFIG_JOBHASH', [])
				job_hash_list.append(config_summary_map[cfg]['CMSSW_CONFIG_HASH'])
			obj_dict.update({'CMSSW_CONFIG': config_summary_map, 'CMSSW_FILES': file_summary_map})
			tar.close()
		yield (item, metadata_dict, entries, location_list, obj_dict)
Esempio n. 6
0
def get_cmssw_info(tar_fn):
	import xml.dom.minidom
	# Read framework report files to get number of events
	cmssw_tar = tarfile.open(tar_fn, 'r:gz')
	fwk_report_list = ifilter(lambda x: os.path.basename(x.name) == 'report.xml',
		cmssw_tar.getmembers())
	for fwk_report_fn in imap(cmssw_tar.extractfile, fwk_report_list):
		yield xml.dom.minidom.parse(fwk_report_fn)
Esempio n. 7
0
	def loadSplitting(self, path):
		try:
			version = int(tarfile.open(path, 'r:').extractfile('Version').read())
		except Exception:
			version = 1
		if version == 1:
			state = DataSplitterIO_V1().loadSplitting(path)
		else:
			state = DataSplitterIO_V2().loadSplitting(path)
		return state
Esempio n. 8
0
def getCMSSWInfo(tarPath):
	import xml.dom.minidom
	# Read framework report files to get number of events
	tarFile = tarfile.open(tarPath, 'r:gz')
	fwkReports = ifilter(lambda x: os.path.basename(x.name) == 'report.xml', tarFile.getmembers())
	for fwkReport in imap(tarFile.extractfile, fwkReports):
		try:
			yield xml.dom.minidom.parse(fwkReport)
		except Exception:
			logging.exception('Error while parsing %s', tarPath)
			raise
Esempio n. 9
0
def getCMSSWInfo(tarPath):
	import xml.dom.minidom
	# Read framework report files to get number of events
	tarFile = tarfile.open(tarPath, 'r:gz')
	fwkReports = ifilter(lambda x: os.path.basename(x.name) == 'report.xml', tarFile.getmembers())
	for fwkReport in imap(tarFile.extractfile, fwkReports):
		try:
			yield xml.dom.minidom.parse(fwkReport)
		except Exception:
			logging.exception('Error while parsing %s', tarPath)
			raise
Esempio n. 10
0
			def _getPartition(self, key):
				if not self._cacheKey == key / 100:
					self._cacheKey = key / 100
					subTarFileObj = self._tar.extractfile('%03dXX.tgz' % (key / 100))
					subTarFileObj = BytesBuffer(gzip.GzipFile(fileobj = subTarFileObj).read()) # 3-4x speedup for sequential access
					self._cacheTar = tarfile.open(mode = 'r', fileobj = subTarFileObj)
				data = self._fmt.parse(self._cacheTar.extractfile('%05d/info' % key).readlines(),
					keyParser = {None: int}, valueParser = self._parserMap)
				fileList = lmap(bytes2str, self._cacheTar.extractfile('%05d/list' % key).readlines())
				if DataSplitter.CommonPrefix in data:
					fileList = imap(lambda x: '%s/%s' % (data[DataSplitter.CommonPrefix], x), fileList)
				data[DataSplitter.FileList] = lmap(str.strip, fileList)
				return data
Esempio n. 11
0
	def getEntries(self, path, metadata, events, seList, objStore):
		jobNum = metadata['GC_JOBNUM']
		tar = tarfile.open(os.path.join(path, 'cmssw.dbs.tar.gz'), 'r')
		# Collect infos about transferred files
		fileSummaryMap = {}
		try:
			for rawdata in imap(str.split, tar.extractfile('files').readlines()):
				fileSummaryMap[rawdata[2]] = {'SE_OUTPUT_HASH_CRC32': rawdata[0], 'SE_OUTPUT_SIZE': int(rawdata[1])}
			objStore['CMSSW_FILES'] = fileSummaryMap
		except Exception:
			raise DatasetError('Could not read CMSSW file infos for job %d!' % jobNum)
		# Collect infos about CMSSW processing steps
		cfgSummaryMap = {}
		self._processSteps(jobNum, tar, cfgSummaryMap, fileSummaryMap)
		for cfg in cfgSummaryMap:
			metadata.setdefault('CMSSW_CONFIG_JOBHASH', []).append(cfgSummaryMap[cfg]['CMSSW_CONFIG_HASH'])
		objStore.update({'CMSSW_CONFIG': cfgSummaryMap, 'CMSSW_FILES': fileSummaryMap})
		tar.close()
		yield (path, metadata, events, seList, objStore)
Esempio n. 12
0
	def __init__(self, path):
		activity = Activity('Reading dataset partition file')
		self._lock = GCLock()
		self._fmt = utils.DictFormat()
		self._tar = tarfile.open(path, 'r:')
		(self._cacheKey, self._cacheTar) = (None, None)

		metadata = self._fmt.parse(self._tar.extractfile('Metadata').readlines(), keyParser = {None: str})
		self.maxJobs = metadata.pop('MaxJobs')
		self.classname = metadata.pop('ClassName')
		self.metadata = {'dataset': dict(ifilter(lambda k_v: not k_v[0].startswith('['), metadata.items()))}
		for (k, v) in ifilter(lambda k_v: k_v[0].startswith('['), metadata.items()):
			self.metadata.setdefault('dataset %s' % k.split(']')[0].lstrip('['), {})[k.split(']')[1].strip()] = v
		activity.finish()

		self._parserMap = { None: str, DataSplitter.NEntries: int, DataSplitter.Skipped: int,
			DataSplitter.DatasetID: int, DataSplitter.Invalid: parseBool,
			DataSplitter.Locations: lambda x: parseList(x, ','),
			DataSplitter.MetadataHeader: parseJSON,
			DataSplitter.Metadata: lambda x: parseJSON(x.strip("'")) }
Esempio n. 13
0
def genTarball(outFile, fileList):
	tar = tarfile.open(outFile, 'w:gz')
	activity = Activity('Generating tarball')
	for (pathAbs, pathRel, pathStatus) in fileList:
		if pathStatus is True: # Existing file
			tar.add(pathAbs, pathRel, recursive = False)
		elif pathStatus is False: # Existing file
			if not os.path.exists(pathAbs):
				raise UserError('File %s does not exist!' % pathRel)
			tar.add(pathAbs, pathRel, recursive = False)
		elif pathStatus is None: # Directory
			activity.update('Generating tarball: %s' % pathRel)
		else: # File handle
			info, handle = pathStatus.getTarInfo()
			info.mtime = time.time()
			info.mode = stat.S_IRUSR + stat.S_IWUSR + stat.S_IRGRP + stat.S_IROTH
			if info.name.endswith('.sh') or info.name.endswith('.py'):
				info.mode += stat.S_IXUSR + stat.S_IXGRP + stat.S_IXOTH
			tar.addfile(info, handle)
			handle.close()
	activity.finish()
	tar.close()
Esempio n. 14
0
	def __init__(self, path):
		activity = Activity('Reading dataset partition file')
		self._fmt = DictFormat()
		try:
			self._tar = tarfile.open(path, 'r:')

			metadata = self._fmt.parse(self._tar.extractfile('Metadata').readlines(), key_parser={None: str})
			FilePartitionReader.__init__(self, path, metadata.pop('MaxJobs'))
			self._metadata = metadata
			activity.finish()
		except Exception:
			raise PartitionReaderError('No valid dataset splitting found in %s' % path)

		self._map_enum2parser = {
			None: str,
			DataSplitter.NEntries: int, DataSplitter.Skipped: int,
			DataSplitter.Invalid: parse_bool,
			DataSplitter.Locations: lambda x: parse_list(x, ','),
			DataSplitter.MetadataHeader: parse_json,
			DataSplitter.Metadata: lambda x: parse_json(x.strip("'"))
		}
		(self._cache_nested_fn, self._cache_nested_tar) = (None, None)
Esempio n. 15
0
	def _open_nested_tar(self, nested_fn):
		nested_tar_fp = self._tar.extractfile(nested_fn)
		nested_tar_fp = BytesBuffer(gzip.GzipFile(fileobj=nested_tar_fp).read())
		return tarfile.open(mode='r', fileobj=nested_tar_fp)
Esempio n. 16
0
	def save_partitions(self, path, partition_iter, progress=None):
		outer_tar = tarfile.open(path, 'w:')
		self._save_partitions(outer_tar, partition_iter, progress)
		outer_tar.close()
Esempio n. 17
0
	def _createSubTar(self, subTarFileName):
		subTarFileObj = BytesBuffer()
		subTarFile = tarfile.open(mode = 'w:gz', fileobj = subTarFileObj)
		return (subTarFile, subTarFileObj, subTarFileName)
Esempio n. 18
0
	def _create_nested_tar(self, fn):
		nested_tar_fp = BytesBuffer()
		nested_tar = tarfile.open(mode='w:gz', fileobj=nested_tar_fp)
		nested_tar.nested_tar_fp = nested_tar_fp
		nested_tar.nested_fn = fn
		return nested_tar
Esempio n. 19
0
	def saveSplitting(self, path, meta, source, sourceLen, message = 'Writing job mapping file'):
		tar = tarfile.open(path, 'w:')
		self._saveStateToTar(tar, meta, source, sourceLen, message)
		tar.close()
Esempio n. 20
0
	def __new__(cls, path):
		version = ignore_exception(Exception, 1,
			lambda: int(tarfile.open(path, 'r:').extractfile('Version').read()))
		return FilePartitionReader.create_instance('version_%s' % version, path)