예제 #1
0
    def deployTask(self, task, monitor):
        self.outputFiles = lmap(lambda d_s_t: d_s_t[2],
                                self._getSandboxFilesOut(task))  # HACK
        task.validateVariables()

        self.smSEIn.addFiles(
            lmap(lambda d_s_t: d_s_t[2],
                 task.getSEInFiles()))  # add task SE files to SM
        # Transfer common SE files
        if self.config.getState('init', detail='storage'):
            self.smSEIn.doTransfer(task.getSEInFiles())

        def convert(fnList):
            for fn in fnList:
                if isinstance(fn, str):
                    yield (fn, os.path.basename(fn), False)
                else:
                    yield (None, os.path.basename(fn.name), fn)

        # Package sandbox tar file
        self._log.log(logging.INFO1, 'Packing sandbox')
        sandbox = self._getSandboxName(task)
        utils.ensureDirExists(os.path.dirname(sandbox), 'sandbox directory')
        if not os.path.exists(sandbox) or self.config.getState(
                'init', detail='sandbox'):
            utils.genTarball(
                sandbox,
                convert(
                    self._getSandboxFiles(task, monitor,
                                          [self.smSEIn, self.smSEOut])))
예제 #2
0
	def _readJobs(self, jobLimit):
		utils.ensureDirExists(self._dbPath, 'job database directory', JobError)

		candidates = []
		for jobFile in fnmatch.filter(os.listdir(self._dbPath), 'job_*.txt'):
			try: # 2xsplit is faster than regex
				jobNum = int(jobFile.split(".")[0].split("_")[1])
			except Exception:
				continue
			candidates.append((jobNum, jobFile))

		(jobMap, maxJobs) = ({}, len(candidates))
		activity = Activity('Reading job infos')
		idx = 0
		for (jobNum, jobFile) in sorted(candidates):
			idx += 1
			if (jobLimit >= 0) and (jobNum >= jobLimit):
				self._log.info('Stopped reading job infos at job #%d out of %d available job files, since the limit of %d jobs is reached',
					jobNum, len(candidates), jobLimit)
				break
			jobObj = self._load_job(os.path.join(self._dbPath, jobFile))
			jobMap[jobNum] = jobObj
			if idx % 100 == 0:
				activity.update('Reading job infos %d [%d%%]' % (idx, (100.0 * idx) / maxJobs))
		activity.finish()
		return jobMap
예제 #3
0
	def __init__(self, config, wmsName):
		WMS.__init__(self, config, wmsName)
		if self.wmsName != self.__class__.__name__.upper():
			utils.vprint('Using batch system: %s (%s)' % (self.__class__.__name__, self.wmsName), -1)
		else:
			utils.vprint('Using batch system: %s' % self.wmsName, -1)

		self.errorLog = config.getWorkPath('error.tar')
		self._runlib = config.getWorkPath('gc-run.lib')
		if not os.path.exists(self._runlib):
			fp = SafeFile(self._runlib, 'w')
			content = SafeFile(utils.pathShare('gc-run.lib')).read()
			fp.write(content.replace('__GC_VERSION__', __import__('grid_control').__version__))
			fp.close()
		self._outputPath = config.getWorkPath('output')
		utils.ensureDirExists(self._outputPath, 'output directory')
		self._failPath = config.getWorkPath('fail')

		# Initialise access token, broker and storage manager
		self._token = config.getCompositePlugin(['proxy', 'access token'], 'TrivialAccessToken',
			'MultiAccessToken', cls = AccessToken, inherit = True, tags = [self])

		# UI -> SE -> WN
		self.smSEIn = config.getPlugin('se input manager', 'SEStorageManager', cls = StorageManager,
			tags = [self], pargs = ('se', 'se input', 'SE_INPUT'))
		self.smSBIn = config.getPlugin('sb input manager', 'LocalSBStorageManager', cls = StorageManager,
			tags = [self], pargs = ('sandbox', 'sandbox', 'SB_INPUT'))
		# UI <- SE <- WN
		self.smSEOut = config.getPlugin('se output manager', 'SEStorageManager', cls = StorageManager,
			tags = [self], pargs = ('se', 'se output', 'SE_OUTPUT'))
		self.smSBOut = None
		
		self.fileNamesEnvironment = config.getBool("file names environment", True, onChange = None)
예제 #4
0
 def _initSockets(self, **kwargs):
     self._needSocket = kwargs.get("needSocket", True)
     self._socketMinSec = kwargs.get("socketMinSec", 300)
     self._socketCount = max(2, kwargs.get("socketCount", 2))
     self._socketIndex = 0
     self._socketMaxMiss = kwargs.get("socketMaxMiss", 2)
     self._socketMisses = 0
     # sockets should reside in secure, managed directory
     if kwargs.get("socketDir", "") and len(kwargs.get("socketDir")) < 105:
         self._socketDir = kwargs.get("socketDir")
         ensureDirExists(self._socketDir,
                         name="SSH connection socket container directory")
     else:
         self._socketDir = tempfile.mkdtemp()
     self._log(logging.DEBUG1, 'Using socket directoy %s' % self._socketDir)
     # create list of socket names and corresponding arguments to rotate through
     self._socketList = [
         os.path.join(self._socketDir, str(socketIndex))
         for socketIndex in irange(self._socketCount)
     ]
     self._socketArgList = [[
         "-oControlMaster=auto",
         "-oControlPath=%s" % socket
     ] for socket in self._socketList]
     self._socketProcs = {}
예제 #5
0
 def freezeConfig(self, writeConfig=True):
     self._curContainer.setReadOnly()
     # Inform the user about unused options
     unused = lfilter(
         lambda entry: ('!' not in entry.section) and not entry.accessed,
         self._view.iterContent())
     log = logging.getLogger('config.freeze')
     if unused:
         log.log(logging.INFO1, 'There are %s unused config options!',
                 len(unused))
     for entry in unused:
         log.log(logging.INFO1, '\t%s', entry.format(printSection=True))
     if writeConfig or not os.path.exists(self._oldCfgPath):
         ensureDirExists(os.path.dirname(self._oldCfgPath),
                         'config storage directory', ConfigError)
         # Write user friendly, flat config file and config file with saved settings
         self._write_file(self._flatCfgPath,
                          printDefault=False,
                          printUnused=False,
                          printMinimal=True,
                          printWorkdir=True)
         self._write_file(
             self._oldCfgPath,
             printDefault=True,
             printUnused=True,
             printMinimal=True,
             printSource=True,
             message=
             '; ==> DO NOT EDIT THIS FILE! <==\n; This file is used to find config changes!\n'
         )
예제 #6
0
    def _readJobs(self, jobLimit):
        utils.ensureDirExists(self._dbPath, 'job database directory', JobError)

        candidates = []
        for jobFile in fnmatch.filter(os.listdir(self._dbPath), 'job_*.txt'):
            try:  # 2xsplit is faster than regex
                jobNum = int(jobFile.split(".")[0].split("_")[1])
            except Exception:
                continue
            candidates.append((jobNum, jobFile))

        (jobMap, maxJobs) = ({}, len(candidates))
        activity = Activity('Reading job infos')
        idx = 0
        for (jobNum, jobFile) in sorted(candidates):
            idx += 1
            if (jobLimit >= 0) and (jobNum >= jobLimit):
                self._log.info(
                    'Stopped reading job infos at job #%d out of %d available job files, since the limit of %d jobs is reached',
                    jobNum, len(candidates), jobLimit)
                break
            jobObj = self._load_job(os.path.join(self._dbPath, jobFile))
            jobMap[jobNum] = jobObj
            if idx % 100 == 0:
                activity.update('Reading job infos %d [%d%%]' %
                                (idx, (100.0 * idx) / maxJobs))
        activity.finish()
        return jobMap
예제 #7
0
파일: wms.py 프로젝트: Fra-nk/grid-control
	def retrieveJobs(self, gcID_jobNum_List): # Process output sandboxes returned by getJobsOutput
		# Function to force moving a directory
		def forceMove(source, target):
			try:
				if os.path.exists(target):
					shutil.rmtree(target)
			except IOError:
				self._log.exception('%r cannot be removed', target)
				return False
			try:
				shutil.move(source, target)
			except IOError:
				self._log.exception('Error moving job output directory from %r to %r', source, target)
				return False
			return True

		retrievedJobs = []

		for inJobNum, pathName in self._getJobsOutput(gcID_jobNum_List):
			# inJobNum != None, pathName == None => Job could not be retrieved
			if pathName is None:
				if inJobNum not in retrievedJobs:
					yield (inJobNum, -1, {}, None)
				continue

			# inJobNum == None, pathName != None => Found leftovers of job retrieval
			if inJobNum is None:
				continue

			# inJobNum != None, pathName != None => Job retrieval from WMS was ok
			jobFile = os.path.join(pathName, 'job.info')
			try:
				job_info = self._job_parser.process(pathName)
			except Exception:
				self._log.exception('Unable to parse job.info')
				job_info = None
			if job_info:
				jobNum = job_info[JobResult.JOBNUM]
				if jobNum != inJobNum:
					raise BackendError('Invalid job id in job file %s' % jobFile)
				if forceMove(pathName, os.path.join(self._outputPath, 'job_%d' % jobNum)):
					retrievedJobs.append(inJobNum)
					yield (jobNum, job_info[JobResult.EXITCODE], job_info[JobResult.RAW], pathName)
				else:
					yield (jobNum, -1, {}, None)
				continue

			# Clean empty pathNames
			for subDir in imap(lambda x: x[0], os.walk(pathName, topdown=False)):
				try:
					os.rmdir(subDir)
				except Exception:
					clear_current_exception()

			if os.path.exists(pathName):
				# Preserve failed job
				utils.ensureDirExists(self._failPath, 'failed output directory')
				forceMove(pathName, os.path.join(self._failPath, os.path.basename(pathName)))

			yield (inJobNum, -1, {}, None)
예제 #8
0
	def main():
		configFactory = createConfigFactory(configFile = args[0], additional = [OptsConfigFiller(parser)])
		config = configFactory.getConfig()
		logging_setup(config.changeView(setSections = ['logging']))

		# Check work dir validity (default work directory is the config file name)
		if not os.path.exists(config.getWorkPath()):
			if not config.getState('init'):
				utils.vprint('Will force initialization of %s if continued!' % config.getWorkPath(), -1)
				config.setState(True, 'init')
			if config.getChoiceYesNo('workdir create', True,
					interactive = 'Do you want to create the working directory %s?' % config.getWorkPath()):
				utils.ensureDirExists(config.getWorkPath(), 'work directory')

		# Create workflow and freeze config settings
		globalConfig = config.changeView(setSections = ['global'])
		workflow = globalConfig.getPlugin('workflow', 'Workflow:global', cls = Workflow).getInstance()
		configFactory.freezeConfig(writeConfig = config.getState('init', detail = 'config'))

		# Give config help
		if opts.help_cfg or opts.help_scfg:
			config.write(sys.stdout, printDefault = opts.help_cfg, printUnused = False,
				printMinimal = opts.help_scfg, printSource = opts.help_cfg)
			sys.exit(os.EX_OK)

		# Check if user requested deletion / reset of jobs
		if opts.delete:
			workflow.jobManager.delete(workflow.wms, opts.delete)
			sys.exit(os.EX_OK)
		if opts.reset:
			workflow.jobManager.reset(workflow.wms, opts.reset)
			sys.exit(os.EX_OK)
		# Run the configured workflow
		workflow.run()
예제 #9
0
def gc_create_workflow(config):
    # set up signal handler for interrupts and debug session requests
    signal.signal(signal.SIGURG, handle_debug_interrupt)
    signal.signal(signal.SIGINT, handle_abort_interrupt)

    # Configure logging settings
    logging_setup(config.changeView(setSections=['logging']))

    global_config = config.changeView(setSections=['global'])
    # Check work dir validity (default work directory is the config file name)
    if not os.path.exists(global_config.getWorkPath()):
        if not global_config.getState('init'):
            logging.getLogger('user').warning('Starting initialization of %s!',
                                              global_config.getWorkPath())
            global_config.setState(True, 'init')
        if global_config.getChoiceYesNo(
                'workdir create',
                True,
                interactive_msg=
                'Do you want to create the working directory %s?' %
                global_config.getWorkPath()):
            utils.ensureDirExists(global_config.getWorkPath(),
                                  'work directory')
    for package_paths in global_config.getPaths('package paths', []):
        init_hpf_plugins(package_paths)

    # Query config settings before config is frozen
    help_cfg = global_config.getState('display', detail='config')
    help_scfg = global_config.getState('display', detail='minimal config')

    action_config = config.changeView(setSections=['action'])
    action_delete = action_config.get('delete', '', onChange=None)
    action_reset = action_config.get('reset', '', onChange=None)

    # Create workflow and freeze config settings
    workflow = global_config.getPlugin('workflow',
                                       'Workflow:global',
                                       cls='Workflow')
    config.factory.freezeConfig(
        writeConfig=config.getState('init', detail='config'))

    # Give config help
    if help_cfg or help_scfg:
        config.write(sys.stdout,
                     printDefault=help_cfg,
                     printUnused=False,
                     printMinimal=help_scfg,
                     printSource=help_cfg)
        sys.exit(os.EX_OK)

    # Check if user requested deletion / reset of jobs
    if action_delete:
        workflow.jobManager.delete(workflow.wms, action_delete)
        sys.exit(os.EX_OK)
    if action_reset:
        workflow.jobManager.reset(workflow.wms, action_reset)
        sys.exit(os.EX_OK)

    return workflow
예제 #10
0
	def _getJobsOutput(self, ids):
		if len(ids) == 0:
			raise StopIteration

		basePath = os.path.join(self._outputPath, 'tmp')
		try:
			if len(ids) == 1:
				# For single jobs create single subdir
				tmpPath = os.path.join(basePath, md5(ids[0][0]).hexdigest())
			else:
				tmpPath = basePath
			utils.ensureDirExists(tmpPath)
		except Exception:
			raise BackendError('Temporary path "%s" could not be created.' % tmpPath, BackendError)

		jobNumMap = dict(ids)
		jobs = self.writeWMSIds(ids)

		activity = Activity('retrieving %d job outputs' % len(ids))
		proc = LocalProcess(self._outputExec, '--noint', '--logfile', '/dev/stderr', '-i', jobs, '--dir', tmpPath)

		# yield output dirs
		todo = jobNumMap.values()
		currentJobNum = None
		for line in imap(str.strip, proc.stdout.iter(timeout = 60)):
			if line.startswith(tmpPath):
				todo.remove(currentJobNum)
				outputDir = line.strip()
				if os.path.exists(outputDir):
					if 'GC_WC.tar.gz' in os.listdir(outputDir):
						wildcardTar = os.path.join(outputDir, 'GC_WC.tar.gz')
						try:
							tarfile.TarFile.open(wildcardTar, 'r:gz').extractall(outputDir)
							os.unlink(wildcardTar)
						except Exception:
							self._log.error('Can\'t unpack output files contained in %s', wildcardTar)
				yield (currentJobNum, line.strip())
				currentJobNum = None
			else:
				currentJobNum = jobNumMap.get(self._createId(line), currentJobNum)
		retCode = proc.status(timeout = 0, terminate = True)
		activity.finish()

		if retCode != 0:
			if 'Keyboard interrupt raised by user' in proc.stderr.read(timeout = 0):
				utils.removeFiles([jobs, basePath])
				raise StopIteration
			else:
				self._log.log_process(proc, files = {'jobs': SafeFile(jobs).read()})
			self._log.error('Trying to recover from error ...')
			for dirName in os.listdir(basePath):
				yield (None, os.path.join(basePath, dirName))

		# return unretrievable jobs
		for jobNum in todo:
			yield (jobNum, None)

		utils.removeFiles([jobs, basePath])
예제 #11
0
	def saveToFile(path, dataBlocks, stripMetadata = False):
		if os.path.dirname(path):
			utils.ensureDirExists(os.path.dirname(path), 'dataset cache directory')
		fp = open(path, 'w')
		try:
			for _ in DataProvider.saveToStream(fp, dataBlocks, stripMetadata):
				pass
		finally:
			fp.close()
예제 #12
0
 def saveToFile(path, dataBlocks, stripMetadata=False):
     if os.path.dirname(path):
         utils.ensureDirExists(os.path.dirname(path),
                               'dataset cache directory')
     fp = open(path, 'w')
     try:
         for _ in DataProvider.saveToStream(fp, dataBlocks, stripMetadata):
             pass
     finally:
         fp.close()
예제 #13
0
    def __init__(self, config, name, checkExecutor, cancelExecutor):
        WMS.__init__(self, config, name)
        for executor in [checkExecutor, cancelExecutor]:
            executor.setup(self._log)
        (self._check_executor, self._cancel_executor) = (checkExecutor,
                                                         cancelExecutor)

        if self._name != self.__class__.__name__.upper():
            self._log.info('Using batch system: %s (%s)',
                           self.__class__.__name__, self._name)
        else:
            self._log.info('Using batch system: %s', self._name)

        self.errorLog = config.getWorkPath('error.tar')
        self._runlib = config.getWorkPath('gc-run.lib')
        if not os.path.exists(self._runlib):
            fp = SafeFile(self._runlib, 'w')
            content = SafeFile(utils.pathShare('gc-run.lib')).read()
            fp.write(
                content.replace('__GC_VERSION__',
                                __import__('grid_control').__version__))
            fp.close()
        self._outputPath = config.getWorkPath('output')
        self._filecachePath = config.getWorkPath('files')
        utils.ensureDirExists(self._outputPath, 'output directory')
        self._failPath = config.getWorkPath('fail')

        # Initialise access token and storage managers

        # UI -> SE -> WN
        self.smSEIn = config.getPlugin('se input manager',
                                       'SEStorageManager',
                                       cls=StorageManager,
                                       tags=[self],
                                       pargs=('se', 'se input', 'SE_INPUT'))
        self.smSBIn = config.getPlugin('sb input manager',
                                       'LocalSBStorageManager',
                                       cls=StorageManager,
                                       tags=[self],
                                       pargs=('sandbox', 'sandbox',
                                              'SB_INPUT'))
        # UI <- SE <- WN
        self.smSEOut = config.getPlugin('se output manager',
                                        'SEStorageManager',
                                        cls=StorageManager,
                                        tags=[self],
                                        pargs=('se', 'se output', 'SE_OUTPUT'))
        self.smSBOut = None

        self._token = config.getCompositePlugin(['proxy', 'access token'],
                                                'TrivialAccessToken',
                                                'MultiAccessToken',
                                                cls=AccessToken,
                                                inherit=True,
                                                tags=[self])
예제 #14
0
파일: wms.py 프로젝트: whahmad/grid-control
    def __init__(self, config, wmsName):
        WMS.__init__(self, config, wmsName)
        if self.wmsName != self.__class__.__name__.upper():
            utils.vprint(
                'Using batch system: %s (%s)' %
                (self.__class__.__name__, self.wmsName), -1)
        else:
            utils.vprint('Using batch system: %s' % self.wmsName, -1)

        self.errorLog = config.getWorkPath('error.tar')
        self._runlib = config.getWorkPath('gc-run.lib')
        if not os.path.exists(self._runlib):
            fp = SafeFile(self._runlib, 'w')
            content = SafeFile(utils.pathShare('gc-run.lib')).read()
            fp.write(
                content.replace('__GC_VERSION__',
                                __import__('grid_control').__version__))
            fp.close()
        self._outputPath = config.getWorkPath('output')
        utils.ensureDirExists(self._outputPath, 'output directory')
        self._failPath = config.getWorkPath('fail')

        # Initialise access token, broker and storage manager
        self._token = config.getCompositePlugin(['proxy', 'access token'],
                                                'TrivialAccessToken',
                                                'MultiAccessToken',
                                                cls=AccessToken,
                                                inherit=True,
                                                tags=[self])

        # UI -> SE -> WN
        self.smSEIn = config.getPlugin('se input manager',
                                       'SEStorageManager',
                                       cls=StorageManager,
                                       tags=[self],
                                       pargs=('se', 'se input', 'SE_INPUT'))
        self.smSBIn = config.getPlugin('sb input manager',
                                       'LocalSBStorageManager',
                                       cls=StorageManager,
                                       tags=[self],
                                       pargs=('sandbox', 'sandbox',
                                              'SB_INPUT'))
        # UI <- SE <- WN
        self.smSEOut = config.getPlugin('se output manager',
                                        'SEStorageManager',
                                        cls=StorageManager,
                                        tags=[self],
                                        pargs=('se', 'se output', 'SE_OUTPUT'))
        self.smSBOut = None

        self.fileNamesEnvironment = config.getBool("file names environment",
                                                   True,
                                                   onChange=None)
예제 #15
0
	def freezeConfig(self, writeConfig = True):
		self._curContainer.setReadOnly()
		# Inform the user about unused options
		unused = lfilter(lambda entry: ('!' not in entry.section) and not entry.accessed, self._view.iterContent())
		log = logging.getLogger('config.freeze')
		if unused:
			log.log(logging.INFO1, 'There are %s unused config options!', len(unused))
		for entry in unused:
			log.log(logging.INFO1, '\t%s', entry.format(printSection = True))
		if writeConfig or not os.path.exists(self._oldCfgPath):
			ensureDirExists(os.path.dirname(self._oldCfgPath), 'config storage directory', ConfigError)
			# Write user friendly, flat config file and config file with saved settings
			self._write_file(self._flatCfgPath, printDefault = False, printUnused = False, printMinimal = True,
				printWorkdir = True)
			self._write_file(self._oldCfgPath,  printDefault = True,  printUnused = True,  printMinimal = True, printSource = True,
				message = '; ==> DO NOT EDIT THIS FILE! <==\n; This file is used to find config changes!\n')
예제 #16
0
def gc_create_workflow(config):
	# set up signal handler for interrupts and debug session requests
	signal.signal(signal.SIGURG, handle_debug_interrupt)
	signal.signal(signal.SIGINT, handle_abort_interrupt)

	# Configure logging settings
	logging_setup(config.changeView(setSections = ['logging']))

	global_config = config.changeView(setSections = ['global'])
	# Check work dir validity (default work directory is the config file name)
	if not os.path.exists(global_config.getWorkPath()):
		if not global_config.getState('init'):
			logging.getLogger('user').warning('Starting initialization of %s!', global_config.getWorkPath())
			global_config.setState(True, 'init')
		if global_config.getChoiceYesNo('workdir create', True,
				interactive_msg = 'Do you want to create the working directory %s?' % global_config.getWorkPath()):
			utils.ensureDirExists(global_config.getWorkPath(), 'work directory')
	for package_paths in global_config.getPaths('package paths', []):
		init_hpf_plugins(package_paths)

	# Query config settings before config is frozen
	help_cfg = global_config.getState('display', detail = 'config')
	help_scfg = global_config.getState('display', detail = 'minimal config')

	action_config = config.changeView(setSections = ['action'])
	action_delete = action_config.get('delete', '', onChange = None)
	action_reset = action_config.get('reset', '', onChange = None)

	# Create workflow and freeze config settings
	workflow = global_config.getPlugin('workflow', 'Workflow:global', cls = 'Workflow')
	config.factory.freezeConfig(writeConfig = config.getState('init', detail = 'config'))

	# Give config help
	if help_cfg or help_scfg:
		config.write(sys.stdout, printDefault = help_cfg, printUnused = False,
			printMinimal = help_scfg, printSource = help_cfg)
		sys.exit(os.EX_OK)

	# Check if user requested deletion / reset of jobs
	if action_delete:
		workflow.jobManager.delete(workflow.wms, action_delete)
		sys.exit(os.EX_OK)
	if action_reset:
		workflow.jobManager.reset(workflow.wms, action_reset)
		sys.exit(os.EX_OK)

	return workflow
예제 #17
0
	def __init__(self, config, source):
		self._rawSource = source
		BasicParameterAdapter.__init__(self, config, source)
		self._mapJob2PID = {}
		utils.ensureDirExists(config.getWorkPath(), 'parameter storage directory', ParameterError)
		self._pathJob2PID = config.getWorkPath('params.map.gz')
		self._pathParams = config.getWorkPath('params.dat.gz')

		# Find out if init should be performed - overrides userResync!
		userInit = config.getState('init', detail = 'parameters')
		needInit = False
		if not (os.path.exists(self._pathParams) and os.path.exists(self._pathJob2PID)):
			needInit = True # Init needed if no parameter log exists
		if userInit and not needInit and (source.getMaxParameters() is not None):
			self._log.warning('Re-Initialization will overwrite the current mapping between jobs and parameter/dataset content! This can lead to invalid results!')
			if utils.getUserBool('Do you want to perform a syncronization between the current mapping and the new one to avoid this?', True):
				userInit = False
		doInit = userInit or needInit

		# Find out if resync should be performed
		userResync = config.getState('resync', detail = 'parameters')
		config.setState(False, 'resync', detail = 'parameters')
		needResync = False
		pHash = self._rawSource.getHash()
		self._storedHash = config.get('parameter hash', pHash, persistent = True)
		if self._storedHash != pHash:
			needResync = True # Resync needed if parameters have changed
			self._log.info('Parameter hash has changed')
			self._log.debug('\told hash: %s', self._storedHash)
			self._log.debug('\tnew hash: %s', pHash)
			config.setState(True, 'init', detail = 'config')
		doResync = (userResync or needResync) and not doInit

		if not doResync and not doInit: # Reuse old mapping
			activity = Activity('Loading cached parameter information')
			self._readJob2PID()
			activity.finish()
			return
		elif doResync: # Perform sync
			self._storedHash = None
			self._resync_state = self.resync(force = True)
		elif doInit: # Write current state
			self._writeJob2PID(self._pathJob2PID)
			ParameterSource.getClass('GCDumpParameterSource').write(self._pathParams, self)
		config.set('parameter hash', self._rawSource.getHash())
예제 #18
0
	def _initSockets(self, **kwargs):
		self._needSocket    = kwargs.get("needSocket", True)
		self._socketMinSec  = kwargs.get("socketMinSec", 300)
		self._socketCount   = max(2,kwargs.get("socketCount", 2))
		self._socketIndex   = 0
		self._socketMaxMiss = kwargs.get("socketMaxMiss", 2)
		self._socketMisses  = 0
		# sockets should reside in secure, managed directory
		if kwargs.get("socketDir","") and len(kwargs.get("socketDir")) < 105:
			self._socketDir = kwargs.get("socketDir")
			ensureDirExists(self._socketDir, name = "SSH connection socket container directory")
		else:
			self._socketDir = tempfile.mkdtemp()
		self._log(logging.DEBUG1, 'Using socket directoy %s' % self._socketDir)
		# create list of socket names and corresponding arguments to rotate through
		self._socketList = [ os.path.join(self._socketDir, str(socketIndex)) for socketIndex in irange(self._socketCount) ]
		self._socketArgList = [ ["-oControlMaster=auto","-oControlPath=%s" % socket] for socket in self._socketList ]
		self._socketProcs = {}
예제 #19
0
	def __init__(self, config, wmsName):
		WMS.__init__(self, config, wmsName)
		if self.wmsName != self.__class__.__name__.upper():
			utils.vprint('Using batch system: %s (%s)' % (self.__class__.__name__, self.wmsName), -1)
		else:
			utils.vprint('Using batch system: %s' % self.wmsName, -1)

		self.errorLog = config.getWorkPath('error.tar')
		self._outputPath = config.getWorkPath('output')
		utils.ensureDirExists(self._outputPath, 'output directory')
		self._failPath = config.getWorkPath('fail')

		# Initialise access token, broker and storage manager
		self._token = config.getCompositePlugin(['access token', 'proxy'], 'TrivialAccessToken',
			'MultiAccessToken', cls = AccessToken, inherit = True, tags = [self]).getInstance()

		# UI -> SE -> WN
		self.smSEIn = config.getPlugin('se input manager', 'SEStorageManager', cls = StorageManager, tags = [self]).getInstance('se', 'se input', 'SE_INPUT')
		self.smSBIn = config.getPlugin('sb input manager', 'LocalSBStorageManager', cls = StorageManager, tags = [self]).getInstance('sandbox', 'sandbox', 'SB_INPUT')
		# UI <- SE <- WN
		self.smSEOut = config.getPlugin('se output manager', 'SEStorageManager', cls = StorageManager, tags = [self]).getInstance('se', 'se output', 'SE_OUTPUT')
		self.smSBOut = None
예제 #20
0
파일: wms.py 프로젝트: Fra-nk/grid-control
	def deployTask(self, task, monitor, transferSE, transferSB):
		self.outputFiles = lmap(lambda d_s_t: d_s_t[2], self._getSandboxFilesOut(task)) # HACK
		task.validateVariables()

		self.smSEIn.addFiles(lmap(lambda d_s_t: d_s_t[2], task.getSEInFiles())) # add task SE files to SM
		# Transfer common SE files
		if transferSE:
			self.smSEIn.doTransfer(task.getSEInFiles())

		def convert(fnList):
			for fn in fnList:
				if isinstance(fn, str):
					yield (fn, os.path.basename(fn), False)
				else:
					yield (None, os.path.basename(fn.name), fn)

		# Package sandbox tar file
		self._log.log(logging.INFO1, 'Packing sandbox')
		sandbox = self._getSandboxName(task)
		utils.ensureDirExists(os.path.dirname(sandbox), 'sandbox directory')
		if not os.path.exists(sandbox) or transferSB:
			utils.genTarball(sandbox, convert(self._getSandboxFiles(task, monitor, [self.smSEIn, self.smSEOut])))
예제 #21
0
	def __init__(self, config, wmsName, wmsClass):
		WMS.__init__(self, config, wmsName, wmsClass)
		if self.wmsName != self.__class__.__name__.upper():
			utils.vprint('Using batch system: %s (%s)' % (self.__class__.__name__, self.wmsName), -1)
		else:
			utils.vprint('Using batch system: %s' % self.wmsName, -1)

		self.errorLog = config.getWorkPath('error.tar')
		self._outputPath = config.getWorkPath('output')
		utils.ensureDirExists(self._outputPath, 'output directory')
		self._failPath = config.getWorkPath('fail')

		# Initialise proxy, broker and storage manager
		self.proxy = ClassFactory(config, ('proxy', 'TrivialProxy'), ('proxy manager', 'MultiProxy'),
			cls = Proxy, tags = [self]).getInstance()

		# UI -> SE -> WN
		self.smSEIn = config.getClass('se input manager', 'SEStorageManager', cls = StorageManager, tags = [self]).getInstance('se', 'se input', 'SE_INPUT')
		self.smSBIn = config.getClass('sb input manager', 'LocalSBStorageManager', cls = StorageManager, tags = [self]).getInstance('sandbox', 'sandbox', 'SB_INPUT')
		# UI <- SE <- WN
		self.smSEOut = config.getClass('se output manager', 'SEStorageManager', cls = StorageManager, tags = [self]).getInstance('se', 'se output', 'SE_OUTPUT')
		self.smSBOut = None
예제 #22
0
	def deployTask(self, module, monitor):
		self.outputFiles = map(lambda (d, s, t): t, self._getSandboxFilesOut(module)) # HACK
		module.validateVariables()

		self.smSEIn.addFiles(map(lambda (d, s, t): t, module.getSEInFiles())) # add module SE files to SM
		# Transfer common SE files
		if self.config.getState(detail = 'storage'):
			self.smSEIn.doTransfer(module.getSEInFiles())

		def convert(fnList):
			for fn in fnList:
				if isinstance(fn, str):
					yield (fn, os.path.basename(fn), False)
				else:
					yield (None, os.path.basename(fn.name), fn)

		# Package sandbox tar file
		utils.vprint('Packing sandbox:')
		sandbox = self._getSandboxName(module)
		utils.ensureDirExists(os.path.dirname(sandbox), 'sandbox directory')
		if not os.path.exists(sandbox) or self.config.getState(detail = 'sandbox'):
			utils.genTarball(sandbox, convert(self._getSandboxFiles(module, monitor, [self.smSEIn, self.smSEOut])))
예제 #23
0
파일: wms.py 프로젝트: Fra-nk/grid-control
	def __init__(self, config, name, checkExecutor, cancelExecutor):
		WMS.__init__(self, config, name)
		for executor in [checkExecutor, cancelExecutor]:
			executor.setup(self._log)
		(self._check_executor, self._cancel_executor) = (checkExecutor, cancelExecutor)

		if self._name != self.__class__.__name__.upper():
			self._log.info('Using batch system: %s (%s)', self.__class__.__name__, self._name)
		else:
			self._log.info('Using batch system: %s', self._name)

		self.errorLog = config.getWorkPath('error.tar')
		self._runlib = config.getWorkPath('gc-run.lib')
		if not os.path.exists(self._runlib):
			fp = SafeFile(self._runlib, 'w')
			content = SafeFile(utils.pathShare('gc-run.lib')).read()
			fp.write(content.replace('__GC_VERSION__', __import__('grid_control').__version__))
			fp.close()
		self._outputPath = config.getWorkPath('output')
		self._filecachePath = config.getWorkPath('files')
		utils.ensureDirExists(self._outputPath, 'output directory')
		self._failPath = config.getWorkPath('fail')

		# Initialise access token and storage managers

		# UI -> SE -> WN
		self.smSEIn = config.getPlugin('se input manager', 'SEStorageManager', cls = StorageManager,
			tags = [self], pargs = ('se', 'se input', 'SE_INPUT'))
		self.smSBIn = config.getPlugin('sb input manager', 'LocalSBStorageManager', cls = StorageManager,
			tags = [self], pargs = ('sandbox', 'sandbox', 'SB_INPUT'))
		# UI <- SE <- WN
		self.smSEOut = config.getPlugin('se output manager', 'SEStorageManager', cls = StorageManager,
			tags = [self], pargs = ('se', 'se output', 'SE_OUTPUT'))
		self.smSBOut = None

		self._token = config.getCompositePlugin(['proxy', 'access token'], 'TrivialAccessToken',
			'MultiAccessToken', cls = AccessToken, inherit = True, tags = [self])
예제 #24
0
	def _secureLinkDirectory(self, sshLink, enforce = True):
		try:
			sshLinkDir = ensureDirExists(os.path.dirname(sshLink), 'SSH link direcory', BackendError)
		except Exception:
			if not self.socketEnforce:
				return False
			raise
		if sshLinkDir!=os.path.dirname(os.path.expanduser("~/.ssh/")):
			try:
				os.chmod(sshLinkDir, stat.S_IRWXU)
			except Exception:
				if self.socketEnforce:
					raise BackendError("Could not secure directory for SSHLink:\n	%s" % sshLinkDir)
				else:
					return False
		return True
예제 #25
0
    def retrieveJobs(
            self, ids):  # Process output sandboxes returned by getJobsOutput
        log = logging.getLogger('wms')

        # Function to force moving a directory
        def forceMove(source, target):
            try:
                if os.path.exists(target):
                    shutil.rmtree(target)
            except IOError:
                log.exception('%r cannot be removed', target)
                return False
            try:
                shutil.move(source, target)
            except IOError:
                log.exception(
                    'Error moving job output directory from %r to %r', source,
                    target)
                return False
            return True

        retrievedJobs = []

        for inJobNum, pathName in self._getJobsOutput(ids):
            # inJobNum != None, pathName == None => Job could not be retrieved
            if pathName is None:
                if inJobNum not in retrievedJobs:
                    yield (inJobNum, -1, {}, None)
                continue

            # inJobNum == None, pathName != None => Found leftovers of job retrieval
            if inJobNum is None:
                continue

            # inJobNum != None, pathName != None => Job retrieval from WMS was ok
            jobFile = os.path.join(pathName, 'job.info')
            jobInfo = WMS.parseJobInfo(jobFile)
            if jobInfo:
                (jobNum, jobExitCode, jobData) = jobInfo
                if jobNum != inJobNum:
                    raise BackendError('Invalid job id in job file %s' %
                                       jobFile)
                if forceMove(pathName,
                             os.path.join(self._outputPath,
                                          'job_%d' % jobNum)):
                    retrievedJobs.append(inJobNum)
                    yield (jobNum, jobExitCode, jobData, pathName)
                else:
                    yield (jobNum, -1, {}, None)
                continue

            # Clean empty pathNames
            for subDir in imap(lambda x: x[0], os.walk(pathName,
                                                       topdown=False)):
                try:
                    os.rmdir(subDir)
                except Exception:
                    pass

            if os.path.exists(pathName):
                # Preserve failed job
                utils.ensureDirExists(self._failPath,
                                      'failed output directory')
                forceMove(
                    pathName,
                    os.path.join(self._failPath, os.path.basename(pathName)))

            yield (inJobNum, -1, {}, None)
예제 #26
0
	def getSandboxPath(self, jobNum=''):
		sandpath = os.path.join(self.sandPath, str(jobNum), '' )
		return utils.ensureDirExists(sandpath, 'sandbox directory', BackendError)
예제 #27
0
	def _getJobsOutput(self, allIds):
		if len(allIds) == 0:
			raise StopIteration

		basePath = os.path.join(self._outputPath, 'tmp')
		try:
			if len(allIds) == 1:
				# For single jobs create single subdir
				basePath = os.path.join(basePath, md5(allIds[0][0]).hexdigest())
			utils.ensureDirExists(basePath)
		except Exception:
			raise BackendError('Temporary path "%s" could not be created.' % basePath, BackendError)
		
		activity = utils.ActivityLog('retrieving job outputs')
		for ids in imap(lambda x: allIds[x:x+self._nJobsPerChunk], irange(0, len(allIds), self._nJobsPerChunk)):
			jobNumMap = dict(ids)
			jobs = ' '.join(self._getRawIDs(ids))
			log = tempfile.mktemp('.log')

			#print self._outputExec, '--noint --logfile "%s" --dir "%s" %s' % (log, basePath, jobs)
			#import sys
			#sys.exit(1)
			proc = utils.LoggedProcess(self._outputExec,
				'--noint --logfile "%s" --dir "%s" %s' % (log, basePath, jobs))

			# yield output dirs
			todo = jobNumMap.values()
			done = []
			currentJobNum = None
			for line in imap(str.strip, proc.iter()):
				match = re.match(self._outputRegex, line)
				if match:
					currentJobNum = jobNumMap.get(self._createId(match.groupdict()['rawId']))
					todo.remove(currentJobNum)
					done.append(match.groupdict()['rawId'])
					outputDir = match.groupdict()['outputDir']
					if os.path.exists(outputDir):
						if 'GC_WC.tar.gz' in os.listdir(outputDir):
							wildcardTar = os.path.join(outputDir, 'GC_WC.tar.gz')
							try:
								tarfile.TarFile.open(wildcardTar, 'r:gz').extractall(outputDir)
								os.unlink(wildcardTar)
							except Exception:
								utils.eprint("Can't unpack output files contained in %s" % wildcardTar)
					yield (currentJobNum, outputDir)
					currentJobNum = None
			retCode = proc.wait()

			if retCode != 0:
				if 'Keyboard interrupt raised by user' in proc.getError():
					utils.removeFiles([log, basePath])
					raise StopIteration
				else:
					proc.logError(self.errorLog, log = log)
				utils.eprint('Trying to recover from error ...')
				for dirName in os.listdir(basePath):
					yield (None, os.path.join(basePath, dirName))
		del activity

		# return unretrievable jobs
		for jobNum in todo:
			yield (jobNum, None)
		
		purgeLog = tempfile.mktemp('.log')
		purgeProc = utils.LoggedProcess(self._purgeExec, '--noint --logfile "%s" %s' % (purgeLog, " ".join(done)))
		retCode = purgeProc.wait()
		if retCode != 0:
			if self.explainError(purgeProc, retCode):
				pass
			else:
				proc.logError(self.errorLog, log = purgeLog, jobs = done)
		utils.removeFiles([log, purgeLog, basePath])
예제 #28
0
    def retrieveJobs(self, gcID_jobNum_List
                     ):  # Process output sandboxes returned by getJobsOutput
        # Function to force moving a directory
        def forceMove(source, target):
            try:
                if os.path.exists(target):
                    shutil.rmtree(target)
            except IOError:
                self._log.exception('%r cannot be removed', target)
                return False
            try:
                shutil.move(source, target)
            except IOError:
                self._log.exception(
                    'Error moving job output directory from %r to %r', source,
                    target)
                return False
            return True

        retrievedJobs = []

        for inJobNum, pathName in self._getJobsOutput(gcID_jobNum_List):
            # inJobNum != None, pathName == None => Job could not be retrieved
            if pathName is None:
                if inJobNum not in retrievedJobs:
                    yield (inJobNum, -1, {}, None)
                continue

            # inJobNum == None, pathName != None => Found leftovers of job retrieval
            if inJobNum is None:
                continue

            # inJobNum != None, pathName != None => Job retrieval from WMS was ok
            jobFile = os.path.join(pathName, 'job.info')
            try:
                job_info = self._job_parser.process(pathName)
            except Exception:
                self._log.exception('Unable to parse job.info')
                job_info = None
            if job_info:
                jobNum = job_info[JobResult.JOBNUM]
                if jobNum != inJobNum:
                    raise BackendError('Invalid job id in job file %s' %
                                       jobFile)
                if forceMove(pathName,
                             os.path.join(self._outputPath,
                                          'job_%d' % jobNum)):
                    retrievedJobs.append(inJobNum)
                    yield (jobNum, job_info[JobResult.EXITCODE],
                           job_info[JobResult.RAW], pathName)
                else:
                    yield (jobNum, -1, {}, None)
                continue

            # Clean empty pathNames
            for subDir in imap(lambda x: x[0], os.walk(pathName,
                                                       topdown=False)):
                try:
                    os.rmdir(subDir)
                except Exception:
                    clear_current_exception()

            if os.path.exists(pathName):
                # Preserve failed job
                utils.ensureDirExists(self._failPath,
                                      'failed output directory')
                forceMove(
                    pathName,
                    os.path.join(self._failPath, os.path.basename(pathName)))

            yield (inJobNum, -1, {}, None)
예제 #29
0
						yield (jobNum, -1, {})
					continue
				except:
					# Something went wrong
					utils.eprint('Warning: "%s" seems broken.' % info)

			# Clean empty dirs
			for subDir in map(lambda x: x[0], os.walk(dir, topdown=False)):
				try:
					os.rmdir(subDir)
				except:
					pass

			if os.path.exists(dir):
				# Preserve failed job
				utils.ensureDirExists(self._failPath, 'failed output directory')
				forceMove(dir, os.path.join(self._failPath, os.path.basename(dir)))

			yield (inJobNum, -1, {})


	def _getSandboxName(self, module):
		return self.config.getWorkPath('files', module.taskID, self.wmsName, 'gc-sandbox.tar.gz')


	def _getSandboxFilesIn(self, module):
		return [
			('GC Runtime', utils.pathShare('gc-run.sh'), 'gc-run.sh'),
			('GC Runtime library', utils.pathShare('gc-run.lib'), 'gc-run.lib'),
			('GC Sandbox', self._getSandboxName(module), 'gc-sandbox.tar.gz'),
		]
예제 #30
0
	def _getJobsOutput(self, ids):
		if len(ids) == 0:
			raise StopIteration

		basePath = os.path.join(self._outputPath, 'tmp')
		try:
			if len(ids) == 1:
				# For single jobs create single subdir
				tmpPath = os.path.join(basePath, md5(ids[0][0]).hexdigest())
			else:
				tmpPath = basePath
			utils.ensureDirExists(tmpPath)
		except Exception:
			raise BackendError('Temporary path "%s" could not be created.' % tmpPath, RuntimeError)

		jobNumMap = dict(ids)
		jobs = self.writeWMSIds(ids)
		log = tempfile.mktemp('.log')

		activity = utils.ActivityLog('retrieving job outputs')
		proc = utils.LoggedProcess(self._outputExec,
			'--noint --logfile "%s" -i "%s" --dir "%s"' % (log, jobs, tmpPath))

		# yield output dirs
		todo = jobNumMap.values()
		currentJobNum = None
		for line in map(str.strip, proc.iter()):
			if line.startswith(tmpPath):
				todo.remove(currentJobNum)
				outputDir = line.strip()
				if os.path.exists(outputDir):
					if 'GC_WC.tar.gz' in os.listdir(outputDir):
						wildcardTar = os.path.join(outputDir, 'GC_WC.tar.gz')
						try:
							tarfile.TarFile.open(wildcardTar, 'r:gz').extractall(outputDir)
							os.unlink(wildcardTar)
						except Exception:
							utils.eprint("Can't unpack output files contained in %s" % wildcardTar)
							pass
				yield (currentJobNum, line.strip())
				currentJobNum = None
			else:
				currentJobNum = jobNumMap.get(self._createId(line), currentJobNum)
		retCode = proc.wait()
		del activity

		if retCode != 0:
			if 'Keyboard interrupt raised by user' in proc.getError():
				utils.removeFiles([log, jobs, basePath])
				raise StopIteration
			else:
				proc.logError(self.errorLog, log = log)
			utils.eprint('Trying to recover from error ...')
			for dirName in os.listdir(basePath):
				yield (None, os.path.join(basePath, dirName))

		# return unretrievable jobs
		for jobNum in todo:
			yield (jobNum, None)

		utils.removeFiles([log, jobs, basePath])
예제 #31
0
	def getSandboxPath(self, subdirToken=""):
		sandpath = os.path.join(self._sandboxDir, str(subdirToken), '' )
		return utils.ensureDirExists(sandpath, 'sandbox directory', BackendError)
예제 #32
0
 def getSandboxPath(self, subdirToken=""):
     sandpath = os.path.join(self._sandboxDir, str(subdirToken), '')
     return utils.ensureDirExists(sandpath, 'sandbox directory',
                                  BackendError)
예제 #33
0
    def _getJobsOutput(self, ids):
        if len(ids) == 0:
            raise StopIteration

        basePath = os.path.join(self._outputPath, 'tmp')
        try:
            if len(ids) == 1:
                # For single jobs create single subdir
                tmpPath = os.path.join(basePath, md5(ids[0][0]).hexdigest())
            else:
                tmpPath = basePath
            utils.ensureDirExists(tmpPath)
        except Exception:
            raise BackendError(
                'Temporary path "%s" could not be created.' % tmpPath,
                BackendError)

        jobNumMap = dict(ids)
        jobs = self.writeWMSIds(ids)

        activity = Activity('retrieving %d job outputs' % len(ids))
        proc = LocalProcess(self._outputExec, '--noint', '--logfile',
                            '/dev/stderr', '-i', jobs, '--dir', tmpPath)

        # yield output dirs
        todo = jobNumMap.values()
        currentJobNum = None
        for line in imap(str.strip, proc.stdout.iter(timeout=60)):
            if line.startswith(tmpPath):
                todo.remove(currentJobNum)
                outputDir = line.strip()
                if os.path.exists(outputDir):
                    if 'GC_WC.tar.gz' in os.listdir(outputDir):
                        wildcardTar = os.path.join(outputDir, 'GC_WC.tar.gz')
                        try:
                            tarfile.TarFile.open(wildcardTar,
                                                 'r:gz').extractall(outputDir)
                            os.unlink(wildcardTar)
                        except Exception:
                            self._log.error(
                                'Can\'t unpack output files contained in %s',
                                wildcardTar)
                yield (currentJobNum, line.strip())
                currentJobNum = None
            else:
                currentJobNum = jobNumMap.get(self._createId(line),
                                              currentJobNum)
        retCode = proc.status(timeout=0, terminate=True)
        activity.finish()

        if retCode != 0:
            if 'Keyboard interrupt raised by user' in proc.stderr.read(
                    timeout=0):
                utils.removeFiles([jobs, basePath])
                raise StopIteration
            else:
                self._log.log_process(proc,
                                      files={'jobs': SafeFile(jobs).read()})
            self._log.error('Trying to recover from error ...')
            for dirName in os.listdir(basePath):
                yield (None, os.path.join(basePath, dirName))

        # return unretrievable jobs
        for jobNum in todo:
            yield (jobNum, None)

        utils.removeFiles([jobs, basePath])
예제 #34
0
 def getSandboxPath(self, jobNum=''):
     sandpath = os.path.join(self.sandPath, str(jobNum), '')
     return utils.ensureDirExists(sandpath, 'sandbox directory',
                                  BackendError)
예제 #35
0
	def __init__(self, config):
		self._cache = []
		self._path = config.getPath('sandbox path', config.getWorkPath('sandbox'), mustExist = False)
		utils.ensureDirExists(self._path, 'sandbox base', BackendError)