def testRandomize(self): """ randomize tests """ # empty list aList = [] randList = List.randomize(aList) self.assertEqual(randList, []) # non empty aList = [1, 2, 3] randList = List.randomize(aList) self.assertEqual(len(aList), len(randList)) for x in aList: self.assertEqual(x in randList, True) for x in randList: self.assertEqual(x in aList, True)
def getSocket( self, hostAddress, **kwargs ): hostName = hostAddress[0] retVal = self.generateClientInfo( hostName, kwargs ) if not retVal[ 'OK' ]: return retVal socketInfo = retVal[ 'Value' ] retVal = Network.getIPsForHostName( hostName ) if not retVal[ 'OK' ]: return S_ERROR( "Could not resolve %s: %s" % ( hostName, retVal[ 'Message' ] ) ) ipList = List.randomize( retVal[ 'Value' ] ) for i in range( 3 ): connected = False errorsList = [] for ip in ipList : ipAddress = ( ip, hostAddress[1] ) retVal = self.__connect( socketInfo, ipAddress ) if retVal[ 'OK' ]: sslSocket = retVal[ 'Value' ] connected = True break errorsList.append( "%s: %s" % ( ipAddress, retVal[ 'Message' ] ) ) if not connected: return S_ERROR( "Could not connect to %s: %s" % ( hostAddress, "," .join( [ e for e in errorsList ] ) ) ) retVal = socketInfo.doClientHandshake() if retVal[ 'OK' ]: #Everything went ok. Don't need to retry break #Did the auth or the connection fail? if not retVal['OK']: return retVal if 'enableSessions' in kwargs and kwargs[ 'enableSessions' ]: sessionId = hash( hostAddress ) gSessionManager.set( sessionId, sslSocket.get_session() ) return S_OK( socketInfo )
def __submitPilots( self, taskQueueDict, pilotsToSubmit ): """ Try to insert the submission in the corresponding Thread Pool, disable the Thread Pool until next itration once it becomes full """ # Check if an specific MiddleWare is required if 'SubmitPools' in taskQueueDict: submitPools = taskQueueDict[ 'SubmitPools' ] else: submitPools = self.am_getOption( 'DefaultSubmitPools' ) submitPools = List.randomize( submitPools ) for submitPool in submitPools: self.log.verbose( 'Trying SubmitPool:', submitPool ) if not submitPool in self.directors or not self.directors[submitPool]['isEnabled']: self.log.verbose( 'Not Enabled' ) continue pool = self.pools[self.directors[submitPool]['pool']] director = self.directors[submitPool]['director'] ret = pool.generateJobAndQueueIt( director.submitPilots, args = ( taskQueueDict, pilotsToSubmit, self.workDir ), oCallback = self.callBack, oExceptionCallback = director.exceptionCallBack, blocking = False ) if not ret['OK']: # Disable submission until next iteration self.directors[submitPool]['isEnabled'] = False else: time.sleep( self.am_getOption( 'ThreadStartDelay' ) ) break return S_OK( pilotsToSubmit )
def putAndRegisterPROD3(args): """ simple wrapper to put and register all analysis files Keyword arguments: args -- a list of arguments in order [] """ outputpattern = args[0] outputpath = args[1] SEListArg = json.loads(args[2]) SEList = [] for SE in SEListArg: SEList.append(str(SE)) # # Init DataManager catalogs = ['DIRACFileCatalog'] prod3dm = Prod3DataManager(catalogs) # # Upload data files res = prod3dm._checkemptydir(outputpattern) if not res['OK']: return res for localfile in glob.glob(outputpattern): filename = os.path.basename(localfile) lfn = os.path.join(outputpath, filename) SEList = List.randomize(SEList) res = prod3dm._putAndRegisterToSEList(lfn, localfile, SEList) # ## check if failed if not res['OK']: return res return DIRAC.S_OK()
def _getSEList( self, SEType = 'ProductionOutputs', DataType = 'SimtelProd' ): """ get from CS the list of available SE for data upload """ opsHelper = Operations() optionName = os.path.join( SEType, DataType ) SEList = opsHelper.getValue( optionName , [] ) SEList = List.randomize( SEList ) DIRAC.gLogger.notice( 'List of %s SE: %s ' % ( SEType, SEList ) ) # # Check if the local SE is in the list. If yes try it first by reversing list order localSEList = [] res = getSEsForSite( DIRAC.siteName() ) if res['OK']: localSEList = res['Value'] retainedlocalSEList = [] for localSE in localSEList: if localSE in SEList: DIRAC.gLogger.notice( 'The local Storage Element is an available SE: ', localSE ) retainedlocalSEList.append( localSE ) SEList.remove( localSE ) SEList = retainedlocalSEList + SEList if len( SEList ) == 0: return DIRAC.S_ERROR( 'Error in building SEList' ) return DIRAC.S_OK( SEList )
def __findServiceURL( self ): if not self.__initStatus[ 'OK' ]: return self.__initStatus gatewayURL = False if self.KW_IGNORE_GATEWAYS not in self.kwargs or not self.kwargs[ self.KW_IGNORE_GATEWAYS ]: dRetVal = gConfig.getOption( "/DIRAC/Gateways/%s" % DIRAC.siteName() ) if dRetVal[ 'OK' ]: rawGatewayURL = List.randomize( List.fromChar( dRetVal[ 'Value'], "," ) )[0] gatewayURL = "/".join( rawGatewayURL.split( "/" )[:3] ) for protocol in gProtocolDict.keys(): if self._destinationSrv.find( "%s://" % protocol ) == 0: gLogger.debug( "Already given a valid url", self._destinationSrv ) if not gatewayURL: return S_OK( self._destinationSrv ) gLogger.debug( "Reconstructing given URL to pass through gateway" ) path = "/".join( self._destinationSrv.split( "/" )[3:] ) finalURL = "%s/%s" % ( gatewayURL, path ) gLogger.debug( "Gateway URL conversion:\n %s -> %s" % ( self._destinationSrv, finalURL ) ) return S_OK( finalURL ) if gatewayURL: gLogger.debug( "Using gateway", gatewayURL ) return S_OK( "%s/%s" % ( gatewayURL, self._destinationSrv ) ) try: urls = getServiceURL( self._destinationSrv, setup = self.setup ) except Exception, e: return S_ERROR( "Cannot get URL for %s in setup %s: %s" % ( self._destinationSrv, self.setup, str( e ) ) )
def __refresh(self): self.__lastUpdateTime = time.time() gLogger.debug("Refreshing configuration...") gatewayList = getGatewayURLs("Configuration/Server") updatingErrorsList = [] if gatewayList: lInitialListOfServers = gatewayList gLogger.debug("Using configuration gateway", str(lInitialListOfServers[0])) else: lInitialListOfServers = gConfigurationData.getServers() gLogger.debug("Refreshing from list %s" % str(lInitialListOfServers)) lRandomListOfServers = List.randomize(lInitialListOfServers) gLogger.debug("Randomized server list is %s" % ", ".join(lRandomListOfServers)) for sServer in lRandomListOfServers: from DIRAC.Core.DISET.RPCClient import RPCClient oClient = RPCClient( sServer, useCertificates=gConfigurationData.useServerCertificate(), skipCACheck=gConfigurationData.skipCACheck()) dRetVal = _updateFromRemoteLocation(oClient) if dRetVal['OK']: return dRetVal else: updatingErrorsList.append(dRetVal['Message']) gLogger.warn( "Can't update from server", "Error while updating from %s: %s" % (sServer, dRetVal['Message'])) return S_ERROR("Reason(s):\n\t%s" % "\n\t".join(List.uniqueElements(updatingErrorsList)))
def __refresh( self ): self.__lastUpdateTime = time.time() gLogger.debug( "Refreshing configuration..." ) gatewayList = getGatewayURLs( "Configuration/Server" ) updatingErrorsList = [] if gatewayList: lInitialListOfServers = gatewayList gLogger.debug( "Using configuration gateway", str( lInitialListOfServers[0] ) ) else: lInitialListOfServers = gConfigurationData.getServers() gLogger.debug( "Refreshing from list %s" % str( lInitialListOfServers ) ) lRandomListOfServers = List.randomize( lInitialListOfServers ) gLogger.debug( "Randomized server list is %s" % ", ".join( lRandomListOfServers ) ) for sServer in lRandomListOfServers: from DIRAC.Core.DISET.RPCClient import RPCClient oClient = RPCClient( sServer, useCertificates = gConfigurationData.useServerCertificate(), skipCACheck = gConfigurationData.skipCACheck() ) dRetVal = _updateFromRemoteLocation( oClient ) if dRetVal[ 'OK' ]: return dRetVal else: updatingErrorsList.append( dRetVal[ 'Message' ] ) gLogger.warn( "Can't update from server", "Error while updating from %s: %s" % ( sServer, dRetVal[ 'Message' ] ) ) if dRetVal[ 'Message' ].find( "Insane environment" ) > -1: break return S_ERROR( "Reason(s):\n\t%s" % "\n\t".join( List.uniqueElements( updatingErrorsList ) ) )
def _prepareJDL( self, taskQueueDict, workingDirectory, pilotOptions, pilotsToSubmit, ceMask, submitPrivatePilot, privateTQ ): """ Write JDL for Pilot Submission """ # RB = List.randomize( self.resourceBrokers )[0] LDs = [] NSs = [] LBs = [] # Select Randomly one RB from the list RB = List.randomize( self.resourceBrokers )[0] LDs.append( '"%s:9002"' % RB ) LBs.append( '"%s:9000"' % RB ) for LB in self.loggingServers: NSs.append( '"%s:7772"' % LB ) LD = ', '.join( LDs ) NS = ', '.join( NSs ) LB = ', '.join( LBs ) vo = getVO() if privateTQ or vo not in ['lhcb']: extraReq = "True" else: if submitPrivatePilot: extraReq = "! AllowsGenericPilot" else: extraReq = "AllowsGenericPilot" rbJDL = """ AllowsGenericPilot = Member( "VO-lhcb-pilot" , other.GlueHostApplicationSoftwareRunTimeEnvironment ); Requirements = pilotRequirements && other.GlueCEStateStatus == "Production" && %s; RetryCount = 0; ErrorStorage = "%s/pilotError"; OutputStorage = "%s/pilotOutput"; # ListenerPort = 44000; ListenerStorage = "%s/Storage"; VirtualOrganisation = "lhcb"; LoggingTimeout = 30; LoggingSyncTimeout = 30; LoggingDestination = { %s }; # Default NS logger level is set to 0 (null) # max value is 6 (very ugly) NSLoggerLevel = 0; DefaultLogInfoLevel = 0; DefaultStatusLevel = 0; NSAddresses = { %s }; LBAddresses = { %s }; MyProxyServer = "no-myproxy.cern.ch"; """ % ( extraReq, workingDirectory, workingDirectory, workingDirectory, LD, NS, LB ) pilotJDL, pilotRequirements = self._JobJDL( taskQueueDict, pilotOptions, ceMask ) jdl = os.path.join( workingDirectory, '%s.jdl' % taskQueueDict['TaskQueueID'] ) jdl = self._writeJDL( jdl, [pilotJDL, rbJDL] ) return {'JDL':jdl, 'Requirements':pilotRequirements + " && " + extraReq, 'Pilots': pilotsToSubmit, 'RB':RB }
def getGatewayURLs( serviceName = "" ): siteName = gConfigurationData.extractOptionFromCFG( "/LocalSite/Site" ) if not siteName: return False gatewayList = gConfigurationData.extractOptionFromCFG( "/DIRAC/Gateways/%s" % siteName ) if not gatewayList: return False if serviceName: gatewayList = [ "%s/%s" % ( "/".join( gw.split( "/" )[:3] ), serviceName ) for gw in List.fromChar( gatewayList, "," ) ] return List.randomize( gatewayList )
def getServiceURLs(system, service=None, setup=False, failover=False): """Generate url. :param str system: system name or full name e.g.: Framework/ProxyManager :param str service: service name, like 'ProxyManager'. :param str setup: DIRAC setup name, can be defined in dirac.cfg :param bool failover: to add failover URLs to end of result list :return: list -- complete urls. e.g. [dips://some-domain:3424/Framework/Service] """ system, service = divideFullName(system, service) resList = [] mainServers = None systemSection = getSystemSection(system, setup=setup) # Add failover URLs at the end of the list failover = "Failover" if failover else "" for fURLs in ["", "Failover"] if failover else [""]: urlList = [] urls = List.fromChar( gConfigurationData.extractOptionFromCFG( "%s/%sURLs/%s" % (systemSection, fURLs, service))) # Be sure that urls not None for url in urls or []: # Trying if we are refering to the list of main servers # which would be like dips://$MAINSERVERS$:1234/System/Component if "$MAINSERVERS$" in url: if not mainServers: # Operations cannot be imported at the beginning because of a bootstrap problem from DIRAC.ConfigurationSystem.Client.Helpers.Operations import Operations mainServers = Operations(setup=setup).getValue( "MainServers", []) if not mainServers: raise Exception("No Main servers defined") for srv in mainServers: _url = checkComponentURL(url.replace("$MAINSERVERS$", srv), system, service, pathMandatory=True) if _url not in urlList: urlList.append(_url) continue _url = checkComponentURL(url, system, service, pathMandatory=True) if _url not in urlList: urlList.append(_url) # Randomize list if needed resList.extend(List.randomize(urlList)) return resList
def _refresh(self, fromMaster=False): """ Refresh configuration """ self._lastUpdateTime = time.time() gLogger.debug("Refreshing configuration...") gatewayList = getGatewayURLs("Configuration/Server") updatingErrorsList = [] if gatewayList: initialServerList = gatewayList gLogger.debug("Using configuration gateway", str(initialServerList[0])) elif fromMaster: masterServer = gConfigurationData.getMasterServer() initialServerList = [masterServer] gLogger.debug("Refreshing from master %s" % masterServer) else: initialServerList = gConfigurationData.getServers() gLogger.debug("Refreshing from list %s" % str(initialServerList)) # If no servers in the initial list, we are supposed to use the local configuration only if not initialServerList: return S_OK() randomServerList = List.randomize(initialServerList) gLogger.debug("Randomized server list is %s" % ", ".join(randomServerList)) for sServer in randomServerList: from DIRAC.ConfigurationSystem.Client.ConfigurationClient import ConfigurationClient oClient = ConfigurationClient( url=sServer, useCertificates=gConfigurationData.useServerCertificate(), skipCACheck=gConfigurationData.skipCACheck(), ) dRetVal = _updateFromRemoteLocation(oClient) if dRetVal["OK"]: self._refreshTime = gConfigurationData.getRefreshTime() return dRetVal else: updatingErrorsList.append(dRetVal["Message"]) gLogger.warn( "Can't update from server", "Error while updating from %s: %s" % (sServer, dRetVal["Message"])) if dRetVal["Message"].find("Insane environment") > -1: break return S_ERROR("Reason(s):\n\t%s" % "\n\t".join(List.uniqueElements(updatingErrorsList)))
def getGatewayURLs(system="", service=None): """Get gateway URLs for service :param str system: system name or full name, like 'Framework/Service'. :param str service: service name, like 'ProxyManager'. :return: list or False """ if system: system, service = divideFullName(system, service) siteName = gConfigurationData.extractOptionFromCFG("/LocalSite/Site") if not siteName: return False gateways = gConfigurationData.extractOptionFromCFG("/DIRAC/Gateways/%s" % siteName) if not gateways: return False gateways = List.randomize(List.fromChar(gateways, ",")) return [checkComponentURL(u, system, service) for u in gateways if u] if system and service else gateways
def __refresh(self): self.__lastUpdateTime = time.time() gLogger.debug("Refreshing configuration...") gatewayList = getGatewayURLs("Configuration/Server") updatingErrorsList = [] if gatewayList: initialServerList = gatewayList gLogger.debug("Using configuration gateway", str(initialServerList[0])) else: initialServerList = gConfigurationData.getServers() gLogger.debug("Refreshing from list %s" % str(initialServerList)) # If no servers in the initial list, we are supposed to use the local configuration only if not initialServerList: return S_OK() randomServerList = List.randomize(initialServerList) gLogger.debug("Randomized server list is %s" % ", ".join(randomServerList)) for sServer in randomServerList: from DIRAC.Core.DISET.RPCClient import RPCClient oClient = RPCClient( sServer, useCertificates=gConfigurationData.useServerCertificate(), skipCACheck=gConfigurationData.skipCACheck()) dRetVal = _updateFromRemoteLocation(oClient) if dRetVal['OK']: return dRetVal else: updatingErrorsList.append(dRetVal['Message']) gLogger.warn( "Can't update from server", "Error while updating from %s: %s" % (sServer, dRetVal['Message'])) if dRetVal['Message'].find("Insane environment") > -1: break return S_ERROR("Reason(s):\n\t%s" % "\n\t".join(List.uniqueElements(updatingErrorsList)))
def configure( self, csSection, submitPool ): """ Here goes common configuration for all Grid PilotDirectors """ PilotDirector.configure( self, csSection, submitPool ) self.reloadConfiguration( csSection, submitPool ) self.__failingWMSCache.purgeExpired() self.__ticketsWMSCache.purgeExpired() for rb in self.__failingWMSCache.getKeys(): if rb in self.resourceBrokers: try: self.resourceBrokers.remove( rb ) except: pass self.resourceBrokers = List.randomize( self.resourceBrokers ) if self.gridEnv: self.log.info( ' GridEnv: ', self.gridEnv ) if self.resourceBrokers: self.log.info( ' ResourceBrokers:', ', '.join( self.resourceBrokers ) )
def __refresh(self): self.__lastUpdateTime = time.time() gLogger.debug("Refreshing configuration...") gatewayList = getGatewayURLs("Configuration/Server") updatingErrorsList = [] if gatewayList: initialServerList = gatewayList gLogger.debug("Using configuration gateway", str(initialServerList[0])) else: initialServerList = gConfigurationData.getServers() gLogger.debug("Refreshing from list %s" % str(initialServerList)) # If no servers in the initial list, we are supposed to use the local configuration only if not initialServerList: return S_OK() randomServerList = List.randomize(initialServerList) gLogger.debug("Randomized server list is %s" % ", ".join(randomServerList)) for sServer in randomServerList: from DIRAC.Core.DISET.RPCClient import RPCClient oClient = RPCClient( sServer, useCertificates=gConfigurationData.useServerCertificate(), skipCACheck=gConfigurationData.skipCACheck(), ) dRetVal = _updateFromRemoteLocation(oClient) if dRetVal["OK"]: return dRetVal else: updatingErrorsList.append(dRetVal["Message"]) gLogger.warn( "Can't update from server", "Error while updating from %s: %s" % (sServer, dRetVal["Message"]) ) if dRetVal["Message"].find("Insane environment") > -1: break return S_ERROR("Reason(s):\n\t%s" % "\n\t".join(List.uniqueElements(updatingErrorsList)))
def __findServiceURL(self): """ Discovers the URL of a service, taking into account gateways, multiple URLs, banned URLs If the site on which we run is configured to use gateways (/DIRAC/Gateways/<siteName>), these URLs will be used. To ignore the gateway, it is possible to set KW_IGNORE_GATEWAYS to False in kwargs. If self._destinationSrv (given as constructor attribute) is a properly formed URL, we just return this one. If we have to use a gateway, we just replace the server name in the url. The list of URLs defined in the CS (<System>/URLs/<Component>) is randomized This method also sets some attributes: * self.__nbOfUrls = number of URLs * self.__nbOfRetry = 2 if we have more than 2 urls, otherwise 3 * self.__bannedUrls is reinitialized if all the URLs are banned :return: S_OK(str)/S_ERROR() -- the selected URL """ if not self.__initStatus['OK']: return self.__initStatus # Load the Gateways URLs for the current site Name gatewayURL = False if not self.kwargs.get(self.KW_IGNORE_GATEWAYS): dRetVal = gConfig.getOption("/DIRAC/Gateways/%s" % DIRAC.siteName()) if dRetVal['OK']: rawGatewayURL = List.randomize(List.fromChar(dRetVal['Value'], ","))[0] gatewayURL = "/".join(rawGatewayURL.split("/")[:3]) # If what was given as constructor attribute is a properly formed URL, # we just return this one. # If we have to use a gateway, we just replace the server name in it for protocol in gProtocolDict: if self._destinationSrv.find("%s://" % protocol) == 0: gLogger.debug("Already given a valid url", self._destinationSrv) if not gatewayURL: return S_OK(self._destinationSrv) gLogger.debug("Reconstructing given URL to pass through gateway") path = "/".join(self._destinationSrv.split("/")[3:]) finalURL = "%s/%s" % (gatewayURL, path) gLogger.debug("Gateway URL conversion:\n %s -> %s" % (self._destinationSrv, finalURL)) return S_OK(finalURL) if gatewayURL: gLogger.debug("Using gateway", gatewayURL) return S_OK("%s/%s" % (gatewayURL, self._destinationSrv)) # We extract the list of URLs from the CS (System/URLs/Component) try: urls = getServiceURL(self._destinationSrv, setup=self.setup) except Exception as e: return S_ERROR("Cannot get URL for %s in setup %s: %s" % (self._destinationSrv, self.setup, repr(e))) if not urls: return S_ERROR("URL for service %s not found" % self._destinationSrv) failoverUrls = [] # Try if there are some failover URLs to use as last resort try: failoverUrlsStr = getServiceFailoverURL(self._destinationSrv, setup=self.setup) if failoverUrlsStr: failoverUrls = failoverUrlsStr.split(',') except Exception as e: pass # We randomize the list, and add at the end the failover URLs (System/FailoverURLs/Component) urlsList = List.randomize(List.fromChar(urls, ",")) + failoverUrls self.__nbOfUrls = len(urlsList) self.__nbOfRetry = 2 if self.__nbOfUrls > 2 else 3 # we retry 2 times all services, if we run more than 2 services if self.__nbOfUrls == len(self.__bannedUrls): self.__bannedUrls = [] # retry all urls gLogger.debug("Retrying again all URLs") if len(self.__bannedUrls) > 0 and len(urlsList) > 1: # we have host which is not accessible. We remove that host from the list. # We only remove if we have more than one instance for i in self.__bannedUrls: gLogger.debug("Removing banned URL", "%s" % i) urlsList.remove(i) # Take the first URL from the list # randUrls = List.randomize( urlsList ) + failoverUrls sURL = urlsList[0] # If we have banned URLs, and several URLs at disposals, we make sure that the selected sURL # is not on a host which is banned. If it is, we take the next one in the list using __selectUrl # If we have banned URLs, and several URLs at disposals, we make sure that the selected sURL # is not on a host which is banned. If it is, we take the next one in the list using __selectUrl if len(self.__bannedUrls) > 0 and self.__nbOfUrls > 2: # when we have multiple services then we can # have a situation when two services are running on the same machine with different ports... retVal = Network.splitURL(sURL) nexturl = None if retVal['OK']: nexturl = retVal['Value'] found = False for i in self.__bannedUrls: retVal = Network.splitURL(i) if retVal['OK']: bannedurl = retVal['Value'] else: break # We found a banned URL on the same host as the one we are running on if nexturl[1] == bannedurl[1]: found = True break if found: nexturl = self.__selectUrl(nexturl, urlsList[1:]) if nexturl: # an url found which is in different host sURL = nexturl gLogger.debug("Discovering URL for service", "%s -> %s" % (self._destinationSrv, sURL)) return S_OK(sURL)
path = "/".join( self._destinationSrv.split( "/" )[3:] ) finalURL = "%s/%s" % ( gatewayURL, path ) gLogger.debug( "Gateway URL conversion:\n %s -> %s" % ( self._destinationSrv, finalURL ) ) return S_OK( finalURL ) if gatewayURL: gLogger.debug( "Using gateway", gatewayURL ) return S_OK( "%s/%s" % ( gatewayURL, self._destinationSrv ) ) try: urls = getServiceURL( self._destinationSrv, setup = self.setup ) except Exception, e: return S_ERROR( "Cannot get URL for %s in setup %s: %s" % ( self._destinationSrv, self.setup, str( e ) ) ) if not urls: return S_ERROR( "URL for service %s not found" % self._destinationSrv ) sURL = List.randomize( List.fromChar( urls, "," ) )[0] gLogger.debug( "Discovering URL for service", "%s -> %s" % ( self._destinationSrv, sURL ) ) return S_OK( sURL ) def __checkThreadID( self ): if not self.__initStatus[ 'OK' ]: return self.__initStatus cThID = thread.get_ident() if not self.__allowedThreadID: self.__allowedThreadID = cThID elif cThID != self.__allowedThreadID : msgTxt = """ =======DISET client thread safety error======================== Client %s can only run on thread %s and this is thread %s
def _prepareJDL(self, taskQueueDict, workingDirectory, pilotOptions, pilotsToSubmit, ceMask, submitPrivatePilot, privateTQ): """ Write JDL for Pilot Submission """ rbList = [] # Select Randomly one RB from the list rb = List.randomize(self.resourceBrokers)[0] rbList.append('"https://%s:7443/glite_wms_wmproxy_server"' % rb) lbList = [] for lb in self.loggingServers: lbList.append('"https://%s:9000"' % lb) lbList = List.randomize(lbList) nPilots = 1 vo = gConfig.getValue('/DIRAC/VirtualOrganization', '') if privateTQ or vo not in ['lhcb']: extraReq = "True" else: if submitPrivatePilot: extraReq = "! AllowsGenericPilot" else: extraReq = "AllowsGenericPilot" myProxyServer = self.myProxyServer.strip() if not myProxyServer: #Random string to avoid caching myProxyServer = "%s.cern.ch" % md5(str( time.time())).hexdigest()[:10] wmsClientJDL = """ RetryCount = 0; ShallowRetryCount = 0; AllowsGenericPilot = Member( "VO-lhcb-pilot" , other.GlueHostApplicationSoftwareRunTimeEnvironment ); Requirements = pilotRequirements && %s; MyProxyServer = "%s"; WmsClient = [ ErrorStorage = "%s/pilotError"; OutputStorage = "%s/pilotOutput"; # ListenerPort = 44000; ListenerStorage = "%s/Storage"; RetryCount = 0; ShallowRetryCount = 0; WMProxyEndPoints = { %s }; LBEndPoints = { %s }; EnableServiceDiscovery = false; MyProxyServer = "%s"; JdlDefaultAttributes = [ requirements = ( other.GlueCEStateStatus == "Production" || other.GlueCEStateStatus == "Special" ); AllowZippedISB = true; SignificantAttributes = {"Requirements", "Rank", "FuzzyRank"}; PerusalFileEnable = false; ]; ]; """ % (extraReq, myProxyServer, workingDirectory, workingDirectory, workingDirectory, ', '.join(rbList), ', '.join(lbList), myProxyServer) if pilotsToSubmit > 1: wmsClientJDL += """ JobType = "Parametric"; Parameters= %s; ParameterStep =1; ParameterStart = 0; """ % pilotsToSubmit nPilots = pilotsToSubmit (pilotJDL, pilotRequirements) = self._JobJDL(taskQueueDict, pilotOptions, ceMask) jdl = os.path.join(workingDirectory, '%s.jdl' % taskQueueDict['TaskQueueID']) jdl = self._writeJDL(jdl, [pilotJDL, wmsClientJDL]) return { 'JDL': jdl, 'Requirements': pilotRequirements + " && " + extraReq, 'Pilots': nPilots, 'RB': rb }
def execute(self): """ Main execution function. """ #Have to work out if the module is part of the last step i.e. #user jobs can have any number of steps and we only want #to run the finalization once. Not a problem if this is not the last step so return S_OK() resultLS = self.isLastStep() if not resultLS['OK']: return S_OK() self.logWorkingDirectory() resultIV = self.resolveInputVariables() if not resultIV['OK']: self.log.error("Failed to resolve input parameters:", resultIV['Message']) return resultIV self.log.info('Initializing %s' % self.version) if not self.workflowStatus['OK'] or not self.stepStatus['OK']: self.log.verbose('Workflow status = %s, step status = %s' % (self.workflowStatus['OK'], self.stepStatus['OK'])) return S_OK('No output data upload attempted') if not self.userOutputData: self.log.info('No user output data is specified for this job, nothing to do') return S_OK('No output data to upload') #Determine the final list of possible output files for the #workflow and all the parameters needed to upload them. outputList = self.getOutputList() userOutputLFNs = [] if self.userOutputData: resultOLfn = self.constructOutputLFNs() if not resultOLfn['OK']: self.log.error('Could not create user LFNs', resultOLfn['Message']) return resultOLfn userOutputLFNs = resultOLfn['Value'] self.log.verbose('Calling getCandidateFiles( %s, %s, %s)' % (outputList, userOutputLFNs, self.outputDataFileMask)) self.log.debug("IgnoreAppErrors? '%s' " % self.ignoreapperrors) resultCF = self.getCandidateFiles(outputList, userOutputLFNs, self.outputDataFileMask) if not resultCF['OK']: if not self.ignoreapperrors: self.log.error(resultCF['Message']) self.setApplicationStatus(resultCF['Message']) return S_OK() fileDict = resultCF['Value'] resultFMD = self.getFileMetadata(fileDict) if not resultFMD['OK']: if not self.ignoreapperrors: self.log.error(resultFMD['Message']) self.setApplicationStatus(resultFMD['Message']) return S_OK() if not resultFMD['Value']: if not self.ignoreapperrors: self.log.info('No output data files were determined to be uploaded for this workflow') self.setApplicationStatus('No Output Data Files To Upload') return S_OK() fileMetadata = resultFMD['Value'] #First get the local (or assigned) SE to try first for upload and others in random fashion resultSEL = getDestinationSEList('Tier1-USER', DIRAC.siteName(), outputmode='local') if not resultSEL['OK']: self.log.error('Could not resolve output data SE', resultSEL['Message']) self.setApplicationStatus('Failed To Resolve OutputSE') return resultSEL localSE = resultSEL['Value'] orderedSEs = [ se for se in self.defaultOutputSE if se not in localSE and se not in self.userOutputSE] orderedSEs = localSE + List.randomize(orderedSEs) if self.userOutputSE: prependSEs = [] for userSE in self.userOutputSE: if not userSE in orderedSEs: prependSEs.append(userSE) orderedSEs = prependSEs + orderedSEs self.log.info('Ordered list of output SEs is: %s' % (', '.join(orderedSEs))) final = {} for fileName, metadata in fileMetadata.iteritems(): final[fileName] = metadata final[fileName]['resolvedSE'] = orderedSEs #At this point can exit and see exactly what the module will upload self.printOutputInfo(final) if not self.enable: return S_OK('Module is disabled by control flag') #Instantiate the failover transfer client with the global request object failoverTransfer = FailoverTransfer(self._getRequestContainer()) #One by one upload the files with failover if necessary filesToReplicate = {} filesToFailover = {} filesUploaded = [] if not self.failoverTest: self.transferAndRegisterFiles(final, failoverTransfer, filesToFailover, filesUploaded, filesToReplicate) else: filesToFailover = final ##if there are files to be failovered, we do it now resultTRFF = self.transferRegisterAndFailoverFiles(failoverTransfer, filesToFailover, filesUploaded) cleanUp = resultTRFF['Value']['cleanUp'] #For files correctly uploaded must report LFNs to job parameters if filesUploaded: report = ', '.join( filesUploaded ) self.jobReport.setJobParameter( 'UploadedOutputData', report ) self.workflow_commons['Request'] = failoverTransfer.request #If some or all of the files failed to be saved to failover if cleanUp: #Leave any uploaded files just in case it is useful for the user #do not try to replicate any files. return S_ERROR('Failed To Upload Output Data') #If there is now at least one replica for uploaded files can trigger replication datMan = DataManager( catalogs = self.userFileCatalog ) self.log.info('Sleeping for 10 seconds before attempting replication of recently uploaded files') time.sleep(10) for lfn, repSE in filesToReplicate.items(): resultRAR = datMan.replicateAndRegister(lfn, repSE) if not resultRAR['OK']: self.log.info('Replication failed with below error but file already exists in Grid storage with \ at least one replica:\n%s' % (resultRAR)) self.generateFailoverFile() self.setApplicationStatus('Job Finished Successfully') return S_OK('Output data uploaded')
def main(): from DIRAC.Core.Base import Script Script.registerSwitch("p:", "run_number=", "Run Number", setRunNumber) Script.registerSwitch("T:", "template=", "Template", setCorsikaTemplate) Script.registerSwitch("E:", "executable=", "Executable", setExecutable) Script.registerSwitch("S:", "simtelConfig=", "SimtelConfig", setConfig) Script.registerSwitch("V:", "version=", "Version", setVersion) Script.registerSwitch("M:", "mode=", "Mode", setMode) Script.registerSwitch("C:", "savecorsika=", "Save Corsika", setSaveCorsika) from DIRAC.Resources.Catalog.FileCatalogClient import FileCatalogClient from DIRAC.Resources.Catalog.FileCatalog import FileCatalog Script.parseCommandLine() global fcc, fcL, storage_element from CTADIRAC.Core.Utilities.SoftwareInstallation import getSoftwareEnviron from CTADIRAC.Core.Utilities.SoftwareInstallation import installSoftwareEnviron from CTADIRAC.Core.Utilities.SoftwareInstallation import workingArea from CTADIRAC.Core.Workflow.Modules.CorsikaApp import CorsikaApp from CTADIRAC.Core.Workflow.Modules.Read_CtaApp import Read_CtaApp from DIRAC.Core.Utilities.Subprocess import systemCall jobID = os.environ['JOBID'] jobID = int(jobID) global jobReport jobReport = JobReport(jobID) ########### ## Checking MD coherence fc = FileCatalog('LcgFileCatalog') res = fc._getCatalogConfigDetails('DIRACFileCatalog') print 'DFC CatalogConfigDetails:', res res = fc._getCatalogConfigDetails('LcgFileCatalog') print 'LCG CatalogConfigDetails:', res fcc = FileCatalogClient() fcL = FileCatalog('LcgFileCatalog') from DIRAC.Interfaces.API.Dirac import Dirac dirac = Dirac() ############# simtelConfigFilesPath = 'sim_telarray/multi' simtelConfigFile = simtelConfigFilesPath + '/multi_cta-ultra5.cfg' #simtelConfigFile = simtelConfigFilesPath + '/multi_cta-prod1s.cfg' createGlobalsFromConfigFiles('prodConfigFile', corsikaTemplate, version) ######################Building prod Directory Metadata ####################### resultCreateProdDirMD = createProdFileSystAndMD() if not resultCreateProdDirMD['OK']: DIRAC.gLogger.error('Failed to create prod Directory MD') jobReport.setApplicationStatus('Failed to create prod Directory MD') DIRAC.gLogger.error('Metadata coherence problem, no file produced') DIRAC.exit(-1) else: print 'prod Directory MD successfully created' ######################Building corsika Directory Metadata ####################### resultCreateCorsikaDirMD = createCorsikaFileSystAndMD() if not resultCreateCorsikaDirMD['OK']: DIRAC.gLogger.error('Failed to create corsika Directory MD') jobReport.setApplicationStatus('Failed to create corsika Directory MD') DIRAC.gLogger.error( 'Metadata coherence problem, no corsikaFile produced') DIRAC.exit(-1) else: print 'corsika Directory MD successfully created' ############ Producing Corsika File global CorsikaSimtelPack CorsikaSimtelPack = os.path.join('corsika_simhessarray', version, 'corsika_simhessarray') install_CorsikaSimtelPack(version, 'sim') cs = CorsikaApp() cs.setSoftwarePackage(CorsikaSimtelPack) cs.csExe = executable cs.csArguments = [ '--run-number', run_number, '--run', 'corsika', corsikaTemplate ] corsikaReturnCode = cs.execute() if corsikaReturnCode != 0: DIRAC.gLogger.error('Corsika Application: Failed') jobReport.setApplicationStatus('Corsika Application: Failed') DIRAC.exit(-1) ###################### rename of corsika output file ####################### rundir = 'run' + run_number filein = rundir + '/' + corsikaOutputFileName corsikaFileName = particle + '_' + thetaP + '_' + phiP + '_alt' + obslev + '_' + 'run' + run_number + '.corsika.gz' mv_cmd = 'mv ' + filein + ' ' + corsikaFileName if (os.system(mv_cmd)): DIRAC.exit(-1) ######################## ######################## ## files spread in 1000-runs subDirectories runNum = int(run_number) subRunNumber = '%03d' % runNum runNumModMille = runNum % 1000 runNumTrunc = (runNum - runNumModMille) / 1000 runNumSeriesDir = '%03dxxx' % runNumTrunc print 'runNumSeriesDir=', runNumSeriesDir ### create corsika tar luisa #################### corsikaTarName = particle + '_' + thetaP + '_' + phiP + '_alt' + obslev + '_' + 'run' + run_number + '.corsika.tar.gz' filetar1 = rundir + '/' + 'input' filetar2 = rundir + '/' + 'DAT' + run_number + '.dbase' filetar3 = rundir + '/run' + str(int(run_number)) + '.log' cmdTuple = [ '/bin/tar', 'zcf', corsikaTarName, filetar1, filetar2, filetar3 ] DIRAC.gLogger.notice('Executing command tuple:', cmdTuple) ret = systemCall(0, cmdTuple, sendOutput) if not ret['OK']: DIRAC.gLogger.error('Failed to execute tar') DIRAC.exit(-1) ###################################################### corsikaOutFileDir = os.path.join(corsikaDirPath, particle, 'Data', runNumSeriesDir) corsikaOutFileLFN = os.path.join(corsikaOutFileDir, corsikaFileName) corsikaRunNumberSeriesDirExist = fcc.isDirectory( corsikaOutFileDir)['Value']['Successful'][corsikaOutFileDir] newCorsikaRunNumberSeriesDir = ( corsikaRunNumberSeriesDirExist != True ) # if new runFileSeries, will need to add new MD #### create a file to DISABLE_WATCHDOG_CPU_WALLCLOCK_CHECK ################ f = open('DISABLE_WATCHDOG_CPU_WALLCLOCK_CHECK', 'w') f.close() if savecorsika == 'True': DIRAC.gLogger.notice('Put and register corsika File in LFC and DFC:', corsikaOutFileLFN) ret = dirac.addFile(corsikaOutFileLFN, corsikaFileName, storage_element) res = CheckCatalogCoherence(corsikaOutFileLFN) if res != DIRAC.S_OK: DIRAC.gLogger.error('Job failed: Catalog Coherence problem found') jobReport.setApplicationStatus('OutputData Upload Error') DIRAC.exit(-1) if not ret['OK']: DIRAC.gLogger.error('Error during addFile call:', ret['Message']) jobReport.setApplicationStatus('OutputData Upload Error') DIRAC.exit(-1) # put and register corsikaTarFile: corsikaTarFileDir = os.path.join(corsikaDirPath, particle, 'Log', runNumSeriesDir) corsikaTarFileLFN = os.path.join(corsikaTarFileDir, corsikaTarName) ##### If storage element is IN2P3-tape save simtel file on disk ############### if storage_element == 'CC-IN2P3-Tape': storage_element = 'CC-IN2P3-Disk' DIRAC.gLogger.notice( 'Put and register corsikaTar File in LFC and DFC:', corsikaTarFileLFN) ret = dirac.addFile(corsikaTarFileLFN, corsikaTarName, storage_element) ####Checking and restablishing catalog coherence ##################### res = CheckCatalogCoherence(corsikaTarFileLFN) if res != DIRAC.S_OK: DIRAC.gLogger.error('Job failed: Catalog Coherence problem found') jobReport.setApplicationStatus('OutputData Upload Error') DIRAC.exit(-1) if not ret['OK']: DIRAC.gLogger.error('Error during addFile call:', ret['Message']) jobReport.setApplicationStatus('OutputData Upload Error') DIRAC.exit(-1) ###################################################################### if newCorsikaRunNumberSeriesDir: insertRunFileSeriesMD(corsikaOutFileDir, runNumTrunc) insertRunFileSeriesMD(corsikaTarFileDir, runNumTrunc) ###### insert corsika File Level metadata ############################################ corsikaFileMD = {} corsikaFileMD['runNumber'] = int(run_number) corsikaFileMD['jobID'] = jobID corsikaFileMD['corsikaReturnCode'] = corsikaReturnCode corsikaFileMD['nbShowers'] = nbShowers result = fcc.setMetadata(corsikaOutFileLFN, corsikaFileMD) print "result setMetadata=", result if not result['OK']: print 'ResultSetMetadata:', result['Message'] result = fcc.setMetadata(corsikaTarFileLFN, corsikaFileMD) print "result setMetadata=", result if not result['OK']: print 'ResultSetMetadata:', result['Message'] ##### Exit now if only corsika simulation required if (mode == 'corsika_standalone'): DIRAC.exit() ############ Producing SimTel File ######################Building simtel Directory Metadata ####################### cfg_dict = { "4MSST": 'cta-prod2-4m-dc', "SCSST": 'cta-prod2-sc-sst', "STD": 'cta-prod2', "NSBX3": 'cta-prod2', "ASTRI": 'cta-prod2-astri', "SCMST": 'cta-prod2-sc3', "NORTH": 'cta-prod2n' } if simtelConfig == "6INROW": all_configs = ["4MSST", "SCSST", "ASTRI", "NSBX3", "STD", "SCMST"] elif simtelConfig == "5INROW": all_configs = ["4MSST", "SCSST", "ASTRI", "NSBX3", "STD"] elif simtelConfig == "3INROW": all_configs = ["SCSST", "STD", "SCMST"] else: all_configs = [simtelConfig] ############################################ #for current_conf in all_configs: #DIRAC.gLogger.notice('current conf is',current_conf) #if current_conf == "SCMST": #current_version = version + '_sc3' #DIRAC.gLogger.notice('current version is', current_version) #if os.path.isdir('sim_telarray'): #DIRAC.gLogger.notice('Package found in the local area. Removing package...') #cmd = 'rm -R sim_telarray corsika-6990 hessioxxx corsika-run' #if(os.system(cmd)): #DIRAC.exit( -1 ) #install_CorsikaSimtelPack(current_version) #else: #current_version = version #DIRAC.gLogger.notice('current version is', current_version) ############################################################# for current_conf in all_configs: DIRAC.gLogger.notice('current conf is', current_conf) if current_conf == "SCMST": current_version = version + '_sc3' DIRAC.gLogger.notice('current version is', current_version) installSoftwareEnviron(CorsikaSimtelPack, workingArea(), 'sim-sc3') else: current_version = version DIRAC.gLogger.notice('current version is', current_version) ######################################################## global simtelDirPath global simtelProdVersion simtelProdVersion = current_version + '_simtel' simtelDirPath = os.path.join(corsikaParticleDirPath, simtelProdVersion) resultCreateSimtelDirMD = createSimtelFileSystAndMD(current_conf) if not resultCreateSimtelDirMD['OK']: DIRAC.gLogger.error('Failed to create simtelArray Directory MD') jobReport.setApplicationStatus( 'Failed to create simtelArray Directory MD') DIRAC.gLogger.error( 'Metadata coherence problem, no simtelArray File produced') DIRAC.exit(-1) else: DIRAC.gLogger.notice('simtel Directory MD successfully created') ############## check simtel data file LFN exists ######################## simtelFileName = particle + '_' + str(thetaP) + '_' + str( phiP) + '_alt' + str( obslev) + '_' + 'run' + run_number + '.simtel.gz' simtelDirPath_conf = simtelDirPath + '_' + current_conf simtelOutFileDir = os.path.join(simtelDirPath_conf, 'Data', runNumSeriesDir) simtelOutFileLFN = os.path.join(simtelOutFileDir, simtelFileName) res = CheckCatalogCoherence(simtelOutFileLFN) if res == DIRAC.S_OK: DIRAC.gLogger.notice('Current conf already done', current_conf) continue #### execute simtelarray ################ fd = open('run_sim.sh', 'w') fd.write("""#! /bin/sh source ./Corsika_simhessarrayEnv.sh export SVNPROD2=$PWD export SVNTAG=SVN-PROD2_rev10503 export CORSIKA_IO_BUFFER=800MB cp ../grid_prod2-repro.sh . ln -s ../%s ln -s ../$SVNTAG ./grid_prod2-repro.sh %s %s""" % (corsikaFileName, corsikaFileName, current_conf)) fd.close() #################################### os.system('chmod u+x run_sim.sh') cmdTuple = ['./run_sim.sh'] ret = systemCall(0, cmdTuple, sendOutputSimTel) simtelReturnCode, stdout, stderr = ret['Value'] if (os.system('grep Broken simtel.log') == 0): DIRAC.gLogger.error('Broken string found in simtel.log') jobReport.setApplicationStatus('Broken pipe') DIRAC.exit(-1) if not ret['OK']: DIRAC.gLogger.error('Failed to execute run_sim.sh') DIRAC.gLogger.error('run_sim.sh status is:', simtelReturnCode) DIRAC.exit(-1) ## check simtel data/log/histo Output File exist cfg = cfg_dict[current_conf] #cmd = 'mv Data/sim_telarray/' + cfg + '/0.0deg/Data/*.simtel.gz ' + simtelFileName if current_conf == "SCMST": cmdprefix = 'mv sim-sc3/Data/sim_telarray/' + cfg + '/0.0deg/' else: cmdprefix = 'mv sim/Data/sim_telarray/' + cfg + '/0.0deg/' cmd = cmdprefix + 'Data/*' + cfg + '_*.simtel.gz ' + simtelFileName if (os.system(cmd)): DIRAC.exit(-1) ############################################ simtelRunNumberSeriesDirExist = fcc.isDirectory( simtelOutFileDir)['Value']['Successful'][simtelOutFileDir] newSimtelRunFileSeriesDir = ( simtelRunNumberSeriesDirExist != True ) # if new runFileSeries, will need to add new MD simtelLogFileName = particle + '_' + str(thetaP) + '_' + str( phiP) + '_alt' + str(obslev) + '_' + 'run' + run_number + '.log.gz' #cmd = 'mv Data/sim_telarray/' + cfg + '/0.0deg/Log/*.log.gz ' + simtelLogFileName cmd = cmdprefix + 'Log/*' + cfg + '_*.log.gz ' + simtelLogFileName if (os.system(cmd)): DIRAC.exit(-1) simtelOutLogFileDir = os.path.join(simtelDirPath_conf, 'Log', runNumSeriesDir) simtelOutLogFileLFN = os.path.join(simtelOutLogFileDir, simtelLogFileName) simtelHistFileName = particle + '_' + str(thetaP) + '_' + str( phiP) + '_alt' + str( obslev) + '_' + 'run' + run_number + '.hdata.gz' #cmd = 'mv Data/sim_telarray/' + cfg + '/0.0deg/Histograms/*.hdata.gz ' + simtelHistFileName cmd = cmdprefix + 'Histograms/*' + cfg + '_*.hdata.gz ' + simtelHistFileName if (os.system(cmd)): DIRAC.exit(-1) simtelOutHistFileDir = os.path.join(simtelDirPath_conf, 'Histograms', runNumSeriesDir) simtelOutHistFileLFN = os.path.join(simtelOutHistFileDir, simtelHistFileName) ########### quality check on Histo ############################################# fd = open('check_histo.sh', 'w') fd.write("""#! /bin/sh nsim=$(list_histograms %s|fgrep 'Histogram 6 '|sed 's/^.*contents: //'| sed 's:/.*$::') nevents=%d if [ $nsim -lt $(( $nevents - 20 )) ]; then echo 'nsim found:' $nsim echo 'nsim expected:' $nevents exit 1 else echo 'nsim found:' $nsim echo 'nsim expected:' $nevents fi """ % (simtelHistFileName, int(nbShowers) * int(cscat))) fd.close() ret = getSoftwareEnviron(CorsikaSimtelPack) if not ret['OK']: error = ret['Message'] DIRAC.gLogger.error(error, CorsikaSimtelPack) DIRAC.exit(-1) corsikaEnviron = ret['Value'] os.system('chmod u+x check_histo.sh') cmdTuple = ['./check_histo.sh'] DIRAC.gLogger.notice('Executing command tuple:', cmdTuple) ret = systemCall(0, cmdTuple, sendOutput, env=corsikaEnviron) checkHistoReturnCode, stdout, stderr = ret['Value'] if not ret['OK']: DIRAC.gLogger.error('Failed to execute check_histo.sh') DIRAC.gLogger.error('check_histo.sh status is:', checkHistoReturnCode) DIRAC.exit(-1) if (checkHistoReturnCode != 0): DIRAC.gLogger.error('Failure during check_histo.sh') DIRAC.gLogger.error('check_histo.sh status is:', checkHistoReturnCode) jobReport.setApplicationStatus('Histo check Failed') DIRAC.exit(-1) ########## quality check on Log ############################# cmd = 'zcat %s | grep Finished.' % simtelLogFileName DIRAC.gLogger.notice('Executing system call:', cmd) if (os.system(cmd)): jobReport.setApplicationStatus('Log check Failed') DIRAC.exit(-1) ################################################ from DIRAC.Core.Utilities import List from DIRAC.ConfigurationSystem.Client.Helpers.Operations import Operations opsHelper = Operations() global seList seList = opsHelper.getValue('ProductionOutputs/SimtelProd', []) seList = List.randomize(seList) DIRAC.gLogger.notice('SeList is:', seList) ######### Upload simtel data/log/histo ############################################## res = upload_to_seList(simtelOutFileLFN, simtelFileName) if res != DIRAC.S_OK: DIRAC.gLogger.error('OutputData Upload Error', simtelOutFileLFN) jobReport.setApplicationStatus('OutputData Upload Error') DIRAC.exit(-1) res = CheckCatalogCoherence(simtelOutLogFileLFN) if res == DIRAC.S_OK: DIRAC.gLogger.notice('Log file already exists. Removing:', simtelOutLogFileLFN) ret = dirac.removeFile(simtelOutLogFileLFN) res = upload_to_seList(simtelOutLogFileLFN, simtelLogFileName) if res != DIRAC.S_OK: DIRAC.gLogger.error('Upload simtel Log Error', simtelOutLogFileLFN) DIRAC.gLogger.notice('Removing simtel data file:', simtelOutFileLFN) ret = dirac.removeFile(simtelOutFileLFN) jobReport.setApplicationStatus('OutputData Upload Error') DIRAC.exit(-1) res = CheckCatalogCoherence(simtelOutHistFileLFN) if res == DIRAC.S_OK: DIRAC.gLogger.notice('Histo file already exists. Removing:', simtelOutHistFileLFN) ret = dirac.removeFile(simtelOutHistFileLFN) res = upload_to_seList(simtelOutHistFileLFN, simtelHistFileName) if res != DIRAC.S_OK: DIRAC.gLogger.error('Upload simtel Histo Error', simtelOutHistFileLFN) DIRAC.gLogger.notice('Removing simtel data file:', simtelOutFileLFN) ret = dirac.removeFile(simtelOutFileLFN) DIRAC.gLogger.notice('Removing simtel log file:', simtelOutLogFileLFN) ret = dirac.removeFile(simtelOutLogFileLFN) jobReport.setApplicationStatus('OutputData Upload Error') DIRAC.exit(-1) # simtelRunNumberSeriesDirExist = fcc.isDirectory(simtelOutFileDir)['Value']['Successful'][simtelOutFileDir] # newSimtelRunFileSeriesDir = (simtelRunNumberSeriesDirExist != True) # if new runFileSeries, will need to add new MD if newSimtelRunFileSeriesDir: print 'insertRunFileSeriesMD' insertRunFileSeriesMD(simtelOutFileDir, runNumTrunc) insertRunFileSeriesMD(simtelOutLogFileDir, runNumTrunc) insertRunFileSeriesMD(simtelOutHistFileDir, runNumTrunc) else: print 'NotinsertRunFileSeriesMD' ###### simtel File level metadata ############################################ simtelFileMD = {} simtelFileMD['runNumber'] = int(run_number) simtelFileMD['jobID'] = jobID simtelFileMD['simtelReturnCode'] = simtelReturnCode result = fcc.setMetadata(simtelOutFileLFN, simtelFileMD) print "result setMetadata=", result if not result['OK']: print 'ResultSetMetadata:', result['Message'] result = fcc.setMetadata(simtelOutLogFileLFN, simtelFileMD) print "result setMetadata=", result if not result['OK']: print 'ResultSetMetadata:', result['Message'] result = fcc.setMetadata(simtelOutHistFileLFN, simtelFileMD) print "result setMetadata=", result if not result['OK']: print 'ResultSetMetadata:', result['Message'] if savecorsika == 'True': result = fcc.addFileAncestors( {simtelOutFileLFN: { 'Ancestors': [corsikaOutFileLFN] }}) print 'result addFileAncestor:', result result = fcc.addFileAncestors( {simtelOutLogFileLFN: { 'Ancestors': [corsikaOutFileLFN] }}) print 'result addFileAncestor:', result result = fcc.addFileAncestors( {simtelOutHistFileLFN: { 'Ancestors': [corsikaOutFileLFN] }}) print 'result addFileAncestor:', result ##### Exit now if only corsika simulation required if (mode == 'corsika_simtel'): continue ######### run read_cta ####################################### rcta = Read_CtaApp() rcta.setSoftwarePackage(CorsikaSimtelPack) rcta.rctaExe = 'read_cta' powerlaw_dict = { 'gamma': '-2.57', 'gamma_ptsrc': '-2.57', 'proton': '-2.70', 'electron': '-3.21' } dstFileName = particle + '_' + str(thetaP) + '_' + str( phiP) + '_alt' + str( obslev) + '_' + 'run' + run_number + '.simtel-dst0.gz' dstHistoFileName = particle + '_' + str(thetaP) + '_' + str( phiP) + '_alt' + str( obslev) + '_' + 'run' + run_number + '.hdata-dst0.gz' ## added some options starting from Armazones_2K prod. rcta.rctaArguments = [ '-r', '4', '-u', '--integration-scheme', '4', '--integration-window', '7,3', '--tail-cuts', '6,8', '--min-pix', '2', '--min-amp', '20', '--type', '1,0,0,400', '--tail-cuts', '9,12', '--min-amp', '20', '--type', '2,0,0,100', '--tail-cuts', '8,11', '--min-amp', '19', '--type', '3,0,0,40', '--tail-cuts', '6,9', '--min-amp', '15', '--type', '4,0,0,15', '--tail-cuts', '3.7,5.5', '--min-amp', '8', '--type', '5,0,0,70,5.6', '--tail-cuts', '2.4,3.2', '--min-amp', '5.6', '--dst-level', '0', '--dst-file', dstFileName, '--histogram-file', dstHistoFileName, '--powerlaw', powerlaw_dict[particle], simtelFileName ] rctaReturnCode = rcta.execute() if rctaReturnCode != 0: DIRAC.gLogger.error('read_cta Application: Failed') jobReport.setApplicationStatus('read_cta Application: Failed') DIRAC.exit(-1) ######## run dst quality checks ###################################### fd = open('check_dst_histo.sh', 'w') fd.write("""#! /bin/sh dsthistfilename=%s dstfile=%s n6="$(list_histograms -h 6 ${dsthistfilename} | grep 'Histogram of type' | sed 's/.*bins, //' | sed 's/ entries.//')" n12001="$(list_histograms -h 12001 ${dsthistfilename} | grep 'Histogram of type' | sed 's/.*bins, //' | sed 's/ entries.//')" if [ $n6 -ne $n12001 ]; then echo 'n6 found:' $n6 echo 'n12001 found:' $n12001 exit 1 else echo 'n6 found:' $n6 echo 'n12001 found:' $n12001 fi n12002="$(list_histograms -h 12002 ${dsthistfilename} | grep 'Histogram of type' | sed 's/.*bins, //' | sed 's/ entries.//')" nev="$(statio ${dstfile} | egrep '^2010' | cut -f2)" if [ -z "$nev" ]; then nev="0"; fi if [ $nev -ne $n12002 ]; then echo 'nev found:' $nev echo 'n12002 found:' $n12002 exit 1 else echo 'nev found:' $nev echo 'n12002 found:' $n12002 fi """ % (dstHistoFileName, dstFileName)) fd.close() os.system('chmod u+x check_dst_histo.sh') cmdTuple = ['./check_dst_histo.sh'] DIRAC.gLogger.notice('Executing command tuple:', cmdTuple) ret = systemCall(0, cmdTuple, sendOutput, env=corsikaEnviron) checkHistoReturnCode, stdout, stderr = ret['Value'] if not ret['OK']: DIRAC.gLogger.error('Failed to execute check_dst_histo.sh') DIRAC.gLogger.error('check_dst_histo.sh status is:', checkHistoReturnCode) DIRAC.exit(-1) if (checkHistoReturnCode != 0): DIRAC.gLogger.error('Failure during check_dst_histo.sh') DIRAC.gLogger.error('check_dst_histo.sh status is:', checkHistoReturnCode) jobReport.setApplicationStatus('Histo check Failed') DIRAC.exit(-1) ############create MD and upload dst data/histo ########################################################## global dstDirPath global dstProdVersion dstProdVersion = current_version + '_dst' dstDirPath = os.path.join(simtelDirPath_conf, dstProdVersion) dstOutFileDir = os.path.join(dstDirPath, 'Data', runNumSeriesDir) dstOutFileLFN = os.path.join(dstOutFileDir, dstFileName) resultCreateDstDirMD = createDstFileSystAndMD() if not resultCreateDstDirMD['OK']: DIRAC.gLogger.error('Failed to create Dst Directory MD') jobReport.setApplicationStatus('Failed to create Dst Directory MD') DIRAC.gLogger.error( 'Metadata coherence problem, no Dst File produced') DIRAC.exit(-1) else: DIRAC.gLogger.notice('Dst Directory MD successfully created') ############################################################ res = CheckCatalogCoherence(dstOutFileLFN) if res == DIRAC.S_OK: DIRAC.gLogger.notice('dst file already exists. Removing:', dstOutFileLFN) ret = dirac.removeFile(dstOutFileLFN) res = upload_to_seList(dstOutFileLFN, dstFileName) if res != DIRAC.S_OK: DIRAC.gLogger.error('Upload dst Error', dstOutFileLFN) jobReport.setApplicationStatus('OutputData Upload Error') DIRAC.exit(-1) ############################################################## dstHistoFileDir = os.path.join(dstDirPath, 'Histograms', runNumSeriesDir) dstHistoFileLFN = os.path.join(dstHistoFileDir, dstHistoFileName) res = CheckCatalogCoherence(dstHistoFileLFN) if res == DIRAC.S_OK: DIRAC.gLogger.notice('dst histo file already exists. Removing:', dstHistoFileLFN) ret = dirac.removeFile(dstHistoFileLFN) res = upload_to_seList(dstHistoFileLFN, dstHistoFileName) if res != DIRAC.S_OK: DIRAC.gLogger.error('Upload dst Error', dstHistoFileName) jobReport.setApplicationStatus('OutputData Upload Error') DIRAC.exit(-1) ########### Insert RunNumSeries MD ########################## dstRunNumberSeriesDirExist = fcc.isDirectory( dstOutFileDir)['Value']['Successful'][dstOutFileDir] newDstRunFileSeriesDir = ( dstRunNumberSeriesDirExist != True ) # if new runFileSeries, will need to add new MD if newDstRunFileSeriesDir: insertRunFileSeriesMD(dstOutFileDir, runNumTrunc) insertRunFileSeriesMD(dstHistoFileDir, runNumTrunc) ####### dst File level metadata ############################################### dstFileMD = {} dstFileMD['runNumber'] = int(run_number) dstFileMD['jobID'] = jobID dstFileMD['rctaReturnCode'] = rctaReturnCode result = fcc.setMetadata(dstOutFileLFN, dstFileMD) print "result setMetadata=", result if not result['OK']: print 'ResultSetMetadata:', result['Message'] result = fcc.setMetadata(dstHistoFileLFN, dstFileMD) print "result setMetadata=", result if not result['OK']: print 'ResultSetMetadata:', result['Message'] ########## set the ancestors for dst ##################################### result = fcc.addFileAncestors( {dstOutFileLFN: { 'Ancestors': [simtelOutFileLFN] }}) print 'result addFileAncestor:', result result = fcc.addFileAncestors( {dstHistoFileLFN: { 'Ancestors': [simtelOutFileLFN] }}) print 'result addFileAncestor:', result ###################################################### DIRAC.exit()
def __findServiceURL(self): if not self.__initStatus['OK']: return self.__initStatus gatewayURL = False if self.KW_IGNORE_GATEWAYS not in self.kwargs or not self.kwargs[ self.KW_IGNORE_GATEWAYS]: dRetVal = gConfig.getOption("/DIRAC/Gateways/%s" % DIRAC.siteName()) if dRetVal['OK']: rawGatewayURL = List.randomize( List.fromChar(dRetVal['Value'], ","))[0] gatewayURL = "/".join(rawGatewayURL.split("/")[:3]) for protocol in gProtocolDict.keys(): if self._destinationSrv.find("%s://" % protocol) == 0: gLogger.debug("Already given a valid url", self._destinationSrv) if not gatewayURL: return S_OK(self._destinationSrv) gLogger.debug( "Reconstructing given URL to pass through gateway") path = "/".join(self._destinationSrv.split("/")[3:]) finalURL = "%s/%s" % (gatewayURL, path) gLogger.debug("Gateway URL conversion:\n %s -> %s" % (self._destinationSrv, finalURL)) return S_OK(finalURL) if gatewayURL: gLogger.debug("Using gateway", gatewayURL) return S_OK("%s/%s" % (gatewayURL, self._destinationSrv)) try: urls = getServiceURL(self._destinationSrv, setup=self.setup) except Exception as e: return S_ERROR("Cannot get URL for %s in setup %s: %s" % (self._destinationSrv, self.setup, repr(e))) if not urls: return S_ERROR("URL for service %s not found" % self._destinationSrv) urlsList = List.fromChar(urls, ",") self.__nbOfUrls = len(urlsList) self.__nbOfRetry = 2 if self.__nbOfUrls > 2 else 3 # we retry 2 times all services, if we run more than 2 services if len(urlsList) == len(self.__bannedUrls): self.__bannedUrls = [] # retry all urls gLogger.debug("Retrying again all URLs") if len(self.__bannedUrls) > 0 and len(urlsList) > 1: # we have host which is not accessible. We remove that host from the list. # We only remove if we have more than one instance for i in self.__bannedUrls: gLogger.debug("Removing banned URL", "%s" % i) urlsList.remove(i) randUrls = List.randomize(urlsList) sURL = randUrls[0] if len( self.__bannedUrls ) > 0 and self.__nbOfUrls > 2: # when we have multiple services then we can have a situation # when two service are running on the same machine with different port... retVal = Network.splitURL(sURL) nexturl = None if retVal['OK']: nexturl = retVal['Value'] found = False for i in self.__bannedUrls: retVal = Network.splitURL(i) if retVal['OK']: bannedurl = retVal['Value'] else: break if nexturl[1] == bannedurl[1]: found = True break if found: nexturl = self.__selectUrl(nexturl, randUrls[1:]) if nexturl: # an url found which is in different host sURL = nexturl gLogger.debug("Discovering URL for service", "%s -> %s" % (self._destinationSrv, sURL)) return S_OK(sURL)
def execute(self): """ Main execution function. """ #Have to work out if the module is part of the last step i.e. #user jobs can have any number of steps and we only want #to run the finalization once. Not a problem if this is not the last step so return S_OK() resultLS = self.isLastStep() if not resultLS['OK']: return S_OK() self.logWorkingDirectory() resultIV = self.resolveInputVariables() if not resultIV['OK']: self.log.error("Failed to resolve input parameters:", resultIV['Message']) return resultIV self.log.info('Initializing %s' % self.version) if not self.workflowStatus['OK'] or not self.stepStatus['OK']: self.log.verbose('Workflow status = %s, step status = %s' % (self.workflowStatus['OK'], self.stepStatus['OK'])) return S_OK('No output data upload attempted') if not self.userOutputData: self.log.info('No user output data is specified for this job, nothing to do') return S_OK('No output data to upload') #Determine the final list of possible output files for the #workflow and all the parameters needed to upload them. outputList = self.getOutputList() userOutputLFNs = [] if self.userOutputData: resultOLfn = self.constructOutputLFNs() if not resultOLfn['OK']: self.log.error('Could not create user LFNs', resultOLfn['Message']) return resultOLfn userOutputLFNs = resultOLfn['Value'] self.log.verbose('Calling getCandidateFiles( %s, %s, %s)' % (outputList, userOutputLFNs, self.outputDataFileMask)) self.log.debug("IgnoreAppErrors? '%s' " % self.ignoreapperrors) resultCF = self.getCandidateFiles(outputList, userOutputLFNs, self.outputDataFileMask) if not resultCF['OK']: if not self.ignoreapperrors: self.log.error(resultCF['Message']) self.setApplicationStatus(resultCF['Message']) return S_OK() fileDict = resultCF['Value'] resultFMD = self.getFileMetadata(fileDict) if not resultFMD['OK']: if not self.ignoreapperrors: self.log.error(resultFMD['Message']) self.setApplicationStatus(resultFMD['Message']) return S_OK() if not resultFMD['Value']: if not self.ignoreapperrors: self.log.info('No output data files were determined to be uploaded for this workflow') self.setApplicationStatus('No Output Data Files To Upload') return S_OK() fileMetadata = resultFMD['Value'] #First get the local (or assigned) SE to try first for upload and others in random fashion resultSEL = getDestinationSEList('Tier1-USER', DIRAC.siteName(), outputmode='local') if not resultSEL['OK']: self.log.error('Could not resolve output data SE', resultSEL['Message']) self.setApplicationStatus('Failed To Resolve OutputSE') return resultSEL localSE = resultSEL['Value'] orderedSEs = [ se for se in self.defaultOutputSE if se not in localSE and se not in self.userOutputSE] orderedSEs = localSE + List.randomize(orderedSEs) if self.userOutputSE: prependSEs = [] for userSE in self.userOutputSE: if userSE not in orderedSEs: prependSEs.append(userSE) orderedSEs = prependSEs + orderedSEs self.log.info('Ordered list of output SEs is: %s' % (', '.join(orderedSEs))) final = {} for fileName, metadata in fileMetadata.iteritems(): final[fileName] = metadata final[fileName]['resolvedSE'] = orderedSEs #At this point can exit and see exactly what the module will upload self.printOutputInfo(final) if not self.enable: return S_OK('Module is disabled by control flag') self.injectJobIndex( final ) #Instantiate the failover transfer client with the global request object failoverTransfer = FailoverTransfer(self._getRequestContainer()) #One by one upload the files with failover if necessary filesToReplicate = {} filesToFailover = {} filesUploaded = [] if not self.failoverTest: self.transferAndRegisterFiles(final, failoverTransfer, filesToFailover, filesUploaded, filesToReplicate) else: filesToFailover = final ##if there are files to be failovered, we do it now resultTRFF = self.transferRegisterAndFailoverFiles(failoverTransfer, filesToFailover, filesUploaded) cleanUp = resultTRFF['Value']['cleanUp'] #For files correctly uploaded must report LFNs to job parameters if filesUploaded: report = ', '.join( filesUploaded ) self.jobReport.setJobParameter( 'UploadedOutputData', report ) self.workflow_commons['Request'] = failoverTransfer.request #If some or all of the files failed to be saved to failover if cleanUp: #Leave any uploaded files just in case it is useful for the user #do not try to replicate any files. return S_ERROR('Failed To Upload Output Data') #If there is now at least one replica for uploaded files can trigger replication datMan = DataManager( catalogs = self.userFileCatalog ) self.log.info('Sleeping for 10 seconds before attempting replication of recently uploaded files') time.sleep(10) for lfn, repSE in filesToReplicate.items(): resultRAR = datMan.replicateAndRegister(lfn, repSE) if not resultRAR['OK']: self.log.info('Replication failed with below error but file already exists in Grid storage with \ at least one replica:\n%s' % (resultRAR)) self.generateFailoverFile() self.setApplicationStatus('Job Finished Successfully') return S_OK('Output data uploaded')
def main(): from DIRAC.Core.Base import Script Script.registerSwitch("S:", "simtelConfig=", "SimtelConfig", setConfig) Script.registerSwitch("V:", "version=", "Version", setVersion) from DIRAC.Resources.Catalog.FileCatalogClient import FileCatalogClient from DIRAC.Resources.Catalog.FileCatalog import FileCatalog Script.parseCommandLine() DIRAC.gLogger.setLevel('INFO') global fcc, fcL from CTADIRAC.Core.Utilities.SoftwareInstallation import checkSoftwarePackage from CTADIRAC.Core.Utilities.SoftwareInstallation import installSoftwarePackage from CTADIRAC.Core.Utilities.SoftwareInstallation import installSoftwareEnviron from CTADIRAC.Core.Utilities.SoftwareInstallation import localArea from CTADIRAC.Core.Utilities.SoftwareInstallation import sharedArea from CTADIRAC.Core.Utilities.SoftwareInstallation import workingArea from DIRAC.Core.Utilities.Subprocess import systemCall from DIRAC.WorkloadManagementSystem.Client.JobReport import JobReport global jobID jobID = os.environ['JOBID'] jobReport = JobReport(int(jobID)) ########### ## Checking MD coherence fc = FileCatalog('LcgFileCatalog') res = fc._getCatalogConfigDetails('DIRACFileCatalog') print 'DFC CatalogConfigDetails:', res res = fc._getCatalogConfigDetails('LcgFileCatalog') print 'LCG CatalogConfigDetails:', res fcc = FileCatalogClient() fcL = FileCatalog('LcgFileCatalog') from DIRAC.Interfaces.API.Dirac import Dirac dirac = Dirac() ############################ install_CorsikaSimtelPack(version) ############# # simtelConfigFile should be built from ??? #simtelConfigFilesPath = 'sim_telarray/multi' #simtelConfigFile = simtelConfigFilesPath + '/multi_cta-ultra5.cfg' #createGlobalsFromConfigFiles(simtelConfigFile) #createGlobalsFromConfigFiles(current_version) ####################### ## files spread in 1000-runs subDirectories global corsikaFileLFN corsikaFileLFN = dirac.getJobJDL(jobID)['Value']['InputData'] print 'corsikaFileLFN is ' + corsikaFileLFN corsikaFileName = os.path.basename(corsikaFileLFN) run_number = corsikaFileName.split('run')[1].split('.corsika.gz')[ 0] # run001412.corsika.gz runNum = int(run_number) subRunNumber = '%03d' % runNum runNumModMille = runNum % 1000 runNumTrunc = (runNum - runNumModMille) / 1000 runNumSeriesDir = '%03dxxx' % runNumTrunc print 'runNumSeriesDir=', runNumSeriesDir f = open('DISABLE_WATCHDOG_CPU_WALLCLOCK_CHECK', 'w') f.close() ############ Producing SimTel File ######################Building simtel Directory Metadata ####################### cfg_dict = { "4MSST": 'cta-prod2-4m-dc', "SCSST": 'cta-prod2-sc-sst', "STD": 'cta-prod2', "NSBX3": 'cta-prod2', "ASTRI": 'cta-prod2-astri', "SCMST": 'cta-prod2-sc3', "NORTH": 'cta-prod2n' } if simtelConfig == "6INROW": all_configs = ["4MSST", "SCSST", "ASTRI", "NSBX3", "STD", "SCMST"] elif simtelConfig == "5INROW": all_configs = ["4MSST", "SCSST", "ASTRI", "NSBX3", "STD"] elif simtelConfig == "3INROW": all_configs = ["SCSST", "STD", "SCMST"] else: all_configs = [simtelConfig] for current_conf in all_configs: DIRAC.gLogger.notice('current conf is', current_conf) if current_conf == "SCMST": current_version = version + '_sc3' DIRAC.gLogger.notice('current version is', current_version) if os.path.isdir('sim_telarray'): DIRAC.gLogger.notice( 'Package found in the local area. Removing package...') cmd = 'rm -R sim_telarray corsika-6990 hessioxxx corsika-run' if (os.system(cmd)): DIRAC.exit(-1) install_CorsikaSimtelPack(current_version) else: current_version = version DIRAC.gLogger.notice('current version is', current_version) ######################################################## createGlobalsFromConfigFiles(current_version) resultCreateSimtelDirMD = createSimtelFileSystAndMD( current_conf, current_version) if not resultCreateSimtelDirMD['OK']: DIRAC.gLogger.error('Failed to create simtelArray Directory MD') jobReport.setApplicationStatus( 'Failed to create simtelArray Directory MD') DIRAC.gLogger.error( 'Metadata coherence problem, no simtelArray File produced') DIRAC.exit(-1) else: print 'simtel Directory MD successfully created' ############## introduce file existence check here ######################## simtelFileName = particle + '_' + str(thetaP) + '_' + str( phiP) + '_alt' + str( obslev) + '_' + 'run' + run_number + '.simtel.gz' simtelDirPath_conf = simtelDirPath + '_' + current_conf simtelOutFileDir = os.path.join(simtelDirPath_conf, 'Data', runNumSeriesDir) simtelOutFileLFN = os.path.join(simtelOutFileDir, simtelFileName) res = CheckCatalogCoherence(simtelOutFileLFN) if res == DIRAC.S_OK: DIRAC.gLogger.notice('Current conf already done', current_conf) continue #### execute simtelarray ################ fd = open('run_sim.sh', 'w') fd.write("""#! /bin/sh export SVNPROD2=$PWD export SVNTAG=SVN-PROD2 export CORSIKA_IO_BUFFER=800MB ./grid_prod2-repro.sh %s %s""" % (corsikaFileName, current_conf)) fd.close() os.system('chmod u+x grid_prod2-repro.sh') os.system('chmod u+x run_sim.sh') cmdTuple = ['./run_sim.sh'] ret = systemCall(0, cmdTuple, sendOutputSimTel) simtelReturnCode, stdout, stderr = ret['Value'] if (os.system('grep Broken simtel.log')): DIRAC.gLogger.notice('not broken') else: DIRAC.gLogger.notice('broken') jobReport.setApplicationStatus('Broken pipe') DIRAC.exit(-1) if not ret['OK']: DIRAC.gLogger.error('Failed to execute run_sim.sh') DIRAC.gLogger.error('run_sim.sh status is:', simtelReturnCode) DIRAC.exit(-1) ## putAndRegister simtel data/log/histo Output File: cfg = cfg_dict[current_conf] cmd = 'mv Data/sim_telarray/' + cfg + '/0.0deg/Data/*.simtel.gz ' + simtelFileName if (os.system(cmd)): DIRAC.exit(-1) ############################################ simtelRunNumberSeriesDirExist = fcc.isDirectory( simtelOutFileDir)['Value']['Successful'][simtelOutFileDir] newSimtelRunFileSeriesDir = ( simtelRunNumberSeriesDirExist != True ) # if new runFileSeries, will need to add new MD simtelLogFileName = particle + '_' + str(thetaP) + '_' + str( phiP) + '_alt' + str(obslev) + '_' + 'run' + run_number + '.log.gz' cmd = 'mv Data/sim_telarray/' + cfg + '/0.0deg/Log/*.log.gz ' + simtelLogFileName if (os.system(cmd)): DIRAC.exit(-1) simtelOutLogFileDir = os.path.join(simtelDirPath_conf, 'Log', runNumSeriesDir) simtelOutLogFileLFN = os.path.join(simtelOutLogFileDir, simtelLogFileName) simtelHistFileName = particle + '_' + str(thetaP) + '_' + str( phiP) + '_alt' + str( obslev) + '_' + 'run' + run_number + '.hdata.gz' cmd = 'mv Data/sim_telarray/' + cfg + '/0.0deg/Histograms/*.hdata.gz ' + simtelHistFileName if (os.system(cmd)): DIRAC.exit(-1) simtelOutHistFileDir = os.path.join(simtelDirPath_conf, 'Histograms', runNumSeriesDir) simtelOutHistFileLFN = os.path.join(simtelOutHistFileDir, simtelHistFileName) ########### quality check on Histo Missing because it needs the NSHOW ############################################# ########## quality check on Log ############################# cmd = 'zcat %s | grep Finished.' % simtelLogFileName DIRAC.gLogger.notice('Executing system call:', cmd) if (os.system(cmd)): jobReport.setApplicationStatus('Log check Failed') DIRAC.exit(-1) ################################################ from DIRAC.Core.Utilities import List from DIRAC.ConfigurationSystem.Client.Helpers.Operations import Operations opsHelper = Operations() global seList seList = opsHelper.getValue('ProductionOutputs/SimtelProd', []) seList = List.randomize(seList) DIRAC.gLogger.notice('SeList is:', seList) ######### Upload simtel data/log/histo ############################################## res = upload_to_seList(simtelOutFileLFN, simtelFileName) if res != DIRAC.S_OK: DIRAC.gLogger.error('OutputData Upload Error', simtelOutFileLFN) jobReport.setApplicationStatus('OutputData Upload Error') DIRAC.exit(-1) res = CheckCatalogCoherence(simtelOutLogFileLFN) if res == DIRAC.S_OK: DIRAC.gLogger.notice('Log file already exists. Removing:', simtelOutLogFileLFN) ret = dirac.removeFile(simtelOutLogFileLFN) res = upload_to_seList(simtelOutLogFileLFN, simtelLogFileName) if res != DIRAC.S_OK: DIRAC.gLogger.error('Upload simtel Log Error', simtelOutLogFileLFN) DIRAC.gLogger.notice('Removing simtel data file:', simtelOutFileLFN) ret = dirac.removeFile(simtelOutFileLFN) jobReport.setApplicationStatus('OutputData Upload Error') DIRAC.exit(-1) res = CheckCatalogCoherence(simtelOutHistFileLFN) if res == DIRAC.S_OK: DIRAC.gLogger.notice('Histo file already exists. Removing:', simtelOutHistFileLFN) ret = dirac.removeFile(simtelOutHistFileLFN) res = upload_to_seList(simtelOutHistFileLFN, simtelHistFileName) if res != DIRAC.S_OK: DIRAC.gLogger.error('Upload simtel Histo Error', simtelOutHistFileLFN) DIRAC.gLogger.notice('Removing simtel data file:', simtelOutFileLFN) ret = dirac.removeFile(simtelOutFileLFN) DIRAC.gLogger.notice('Removing simtel log file:', simtelOutLogFileLFN) ret = dirac.removeFile(simtelOutLogFileLFN) jobReport.setApplicationStatus('OutputData Upload Error') DIRAC.exit(-1) #################################################################### if newSimtelRunFileSeriesDir: insertRunFileSeriesMD(simtelOutFileDir, runNumTrunc) insertRunFileSeriesMD(simtelOutLogFileDir, runNumTrunc) insertRunFileSeriesMD(simtelOutHistFileDir, runNumTrunc) ###### simtel File level metadata ############################################ simtelFileMD = {} simtelFileMD['runNumber'] = int(run_number) simtelFileMD['jobID'] = jobID simtelFileMD['simtelReturnCode'] = simtelReturnCode result = fcc.setMetadata(simtelOutFileLFN, simtelFileMD) print "result setMetadata=", result if not result['OK']: print 'ResultSetMetadata:', result['Message'] result = fcc.setMetadata(simtelOutLogFileLFN, simtelFileMD) print "result setMetadata=", result if not result['OK']: print 'ResultSetMetadata:', result['Message'] result = fcc.setMetadata(simtelOutHistFileLFN, simtelFileMD) print "result setMetadata=", result if not result['OK']: print 'ResultSetMetadata:', result['Message'] result = fcc.addFileAncestors( {simtelOutFileLFN: { 'Ancestors': [corsikaFileLFN] }}) print 'result addFileAncestor:', result result = fcc.addFileAncestors( {simtelOutLogFileLFN: { 'Ancestors': [corsikaFileLFN] }}) print 'result addFileAncestor:', result result = fcc.addFileAncestors( {simtelOutHistFileLFN: { 'Ancestors': [corsikaFileLFN] }}) print 'result addFileAncestor:', result result = fcc.setMetadata(simtelOutFileLFN, simtelFileMD) if not result['OK']: print 'ResultSetMetadata:', result['Message'] DIRAC.exit()
def __findServiceURL( self ): if not self.__initStatus[ 'OK' ]: return self.__initStatus gatewayURL = False if self.KW_IGNORE_GATEWAYS not in self.kwargs or not self.kwargs[ self.KW_IGNORE_GATEWAYS ]: dRetVal = gConfig.getOption( "/DIRAC/Gateways/%s" % DIRAC.siteName() ) if dRetVal[ 'OK' ]: rawGatewayURL = List.randomize( List.fromChar( dRetVal[ 'Value'], "," ) )[0] gatewayURL = "/".join( rawGatewayURL.split( "/" )[:3] ) for protocol in gProtocolDict.keys(): if self._destinationSrv.find( "%s://" % protocol ) == 0: gLogger.debug( "Already given a valid url", self._destinationSrv ) if not gatewayURL: return S_OK( self._destinationSrv ) gLogger.debug( "Reconstructing given URL to pass through gateway" ) path = "/".join( self._destinationSrv.split( "/" )[3:] ) finalURL = "%s/%s" % ( gatewayURL, path ) gLogger.debug( "Gateway URL conversion:\n %s -> %s" % ( self._destinationSrv, finalURL ) ) return S_OK( finalURL ) if gatewayURL: gLogger.debug( "Using gateway", gatewayURL ) return S_OK( "%s/%s" % ( gatewayURL, self._destinationSrv ) ) try: urls = getServiceURL( self._destinationSrv, setup = self.setup ) except Exception as e: return S_ERROR( "Cannot get URL for %s in setup %s: %s" % ( self._destinationSrv, self.setup, repr( e ) ) ) if not urls: return S_ERROR( "URL for service %s not found" % self._destinationSrv ) urlsList = List.fromChar( urls, "," ) self.__nbOfUrls = len( urlsList ) self.__nbOfRetry = 2 if self.__nbOfUrls > 2 else 3 # we retry 2 times all services, if we run more than 2 services if len( urlsList ) == len( self.__bannedUrls ): self.__bannedUrls = [] # retry all urls gLogger.debug( "Retrying again all URLs" ) if len( self.__bannedUrls ) > 0 and len( urlsList ) > 1 : # we have host which is not accessible. We remove that host from the list. # We only remove if we have more than one instance for i in self.__bannedUrls: gLogger.debug( "Removing banned URL", "%s" % i ) urlsList.remove( i ) randUrls = List.randomize( urlsList ) sURL = randUrls[0] if len( self.__bannedUrls ) > 0 and self.__nbOfUrls > 2: # when we have multiple services then we can have a situation # when two service are running on the same machine with different port... retVal = Network.splitURL( sURL ) nexturl = None if retVal['OK']: nexturl = retVal['Value'] found = False for i in self.__bannedUrls: retVal = Network.splitURL( i ) if retVal['OK']: bannedurl = retVal['Value'] else: break if nexturl[1] == bannedurl[1]: found = True break if found: nexturl = self.__selectUrl( nexturl, randUrls[1:] ) if nexturl: # an url found which is in different host sURL = nexturl gLogger.debug( "Discovering URL for service", "%s -> %s" % ( self._destinationSrv, sURL ) ) return S_OK( sURL )
def execute(self): """ Main execution function. """ #Have to work out if the module is part of the last step i.e. #user jobs can have any number of steps and we only want #to run the finalization once. currentStep = int(self.step_commons['STEP_NUMBER']) totalSteps = int(self.workflow_commons['TotalSteps']) if currentStep == totalSteps: self.lastStep = True else: self.log.verbose('Current step = %s, total steps of workflow = %s, UserJobFinalization will enable itself only \ at the last workflow step.' % (currentStep, totalSteps)) if not self.lastStep: return S_OK() result = self.resolveInputVariables() if not result['OK']: self.log.error(result['Message']) return result self.log.info('Initializing %s' % self.version) if not self.workflowStatus['OK'] or not self.stepStatus['OK']: self.log.verbose('Workflow status = %s, step status = %s' % (self.workflowStatus['OK'], self.stepStatus['OK'])) return S_OK('No output data upload attempted') if not self.userOutputData: self.log.info('No user output data is specified for this job, nothing to do') return S_OK('No output data to upload') #Determine the final list of possible output files for the #workflow and all the parameters needed to upload them. outputList = [] for i in self.userOutputData: outputList.append({'outputPath' : string.upper(string.split(i, '.')[-1]), 'outputDataSE' : self.userOutputSE, 'outputFile' : os.path.basename(i)}) userOutputLFNs = [] if self.userOutputData: self.log.info('Constructing user output LFN(s) for %s' % (string.join(self.userOutputData, ', '))) if not self.jobID: self.jobID = 12345 owner = '' if self.workflow_commons.has_key('Owner'): owner = self.workflow_commons['Owner'] else: res = self.getCurrentOwner() if not res['OK']: return S_ERROR('Could not obtain owner from proxy') owner = res['Value'] vo = '' if self.workflow_commons.has_key('VO'): vo = self.workflow_commons['VO'] else: res = self.getCurrentVO() if not res['OK']: return S_ERROR('Could not obtain VO from proxy') vo = res['Value'] result = constructUserLFNs(int(self.jobID), vo, owner, self.userOutputData, self.userOutputPath) if not result['OK']: self.log.error('Could not create user LFNs', result['Message']) return result userOutputLFNs = result['Value'] self.log.verbose('Calling getCandidateFiles( %s, %s, %s)' % (outputList, userOutputLFNs, self.outputDataFileMask)) result = self.getCandidateFiles(outputList, userOutputLFNs, self.outputDataFileMask) if not result['OK']: if not self.ignoreapperrors: self.setApplicationStatus(result['Message']) return S_OK() fileDict = result['Value'] result = self.getFileMetadata(fileDict) if not result['OK']: if not self.ignoreapperrors: self.setApplicationStatus(result['Message']) return S_OK() if not result['Value']: if not self.ignoreapperrors: self.log.info('No output data files were determined to be uploaded for this workflow') self.setApplicationStatus('No Output Data Files To Upload') return S_OK() fileMetadata = result['Value'] #First get the local (or assigned) SE to try first for upload and others in random fashion result = getDestinationSEList('Tier1-USER', DIRAC.siteName(), outputmode='local') if not result['OK']: self.log.error('Could not resolve output data SE', result['Message']) self.setApplicationStatus('Failed To Resolve OutputSE') return result localSE = result['Value'] self.log.verbose('Site Local SE for user outputs is: %s' % (localSE)) orderedSEs = self.defaultOutputSE for se in localSE: if se in orderedSEs: orderedSEs.remove(se) for se in self.userOutputSE: if se in orderedSEs: orderedSEs.remove(se) orderedSEs = localSE + List.randomize(orderedSEs) if self.userOutputSE: prependSEs = [] for userSE in self.userOutputSE: if not userSE in orderedSEs: prependSEs.append(userSE) orderedSEs = prependSEs + orderedSEs self.log.info('Ordered list of output SEs is: %s' % (string.join(orderedSEs, ', '))) final = {} for fileName, metadata in fileMetadata.items(): final[fileName] = metadata final[fileName]['resolvedSE'] = orderedSEs #At this point can exit and see exactly what the module will upload if not self.enable: self.log.info('Module is disabled by control flag, would have attempted \ to upload the following files %s' % string.join(final.keys(), ', ')) for fileName, metadata in final.items(): self.log.info('--------%s--------' % fileName) for n, v in metadata.items(): self.log.info('%s = %s' %(n, v)) return S_OK('Module is disabled by control flag') #Instantiate the failover transfer client with the global request object failoverTransfer = FailoverTransfer(self.request) #One by one upload the files with failover if necessary replication = {} failover = {} uploaded = [] if not self.failoverTest: for fileName, metadata in final.items(): self.log.info("Attempting to store file %s to the following SE(s):\n%s" % (fileName, string.join(metadata['resolvedSE'], ', '))) result = failoverTransfer.transferAndRegisterFile(fileName, metadata['localpath'], metadata['lfn'], metadata['resolvedSE'], fileGUID = metadata['guid'], fileCatalog = self.userFileCatalog) if not result['OK']: self.log.error('Could not transfer and register %s with metadata:\n %s' % (fileName, metadata)) failover[fileName] = metadata else: #Only attempt replication after successful upload lfn = metadata['lfn'] uploaded.append(lfn) seList = metadata['resolvedSE'] replicateSE = '' if result['Value'].has_key('uploadedSE'): uploadedSE = result['Value']['uploadedSE'] for se in seList: if not se == uploadedSE: replicateSE = se break if replicateSE and lfn: self.log.info('Will attempt to replicate %s to %s' % (lfn, replicateSE)) replication[lfn] = replicateSE else: failover = final cleanUp = False for fileName, metadata in failover.items(): random.shuffle(self.failoverSEs) targetSE = metadata['resolvedSE'][0] metadata['resolvedSE'] = self.failoverSEs result = failoverTransfer.transferAndRegisterFileFailover(fileName, metadata['localpath'], metadata['lfn'], targetSE, metadata['resolvedSE'], fileGUID = metadata['guid'], fileCatalog = self.userFileCatalog) if not result['OK']: self.log.error('Could not transfer and register %s with metadata:\n %s' % (fileName, metadata)) cleanUp = True continue #for users can continue even if one completely fails else: lfn = metadata['lfn'] uploaded.append(lfn) #For files correctly uploaded must report LFNs to job parameters if uploaded: report = string.join( uploaded, ', ' ) self.jobReport.setJobParameter( 'UploadedOutputData', report ) #Now after all operations, retrieve potentially modified request object result = failoverTransfer.getRequestObject() if not result['OK']: self.log.error(result) return S_ERROR('Could Not Retrieve Modified Request') self.request = result['Value'] #If some or all of the files failed to be saved to failover if cleanUp: self.workflow_commons['Request'] = self.request #Leave any uploaded files just in case it is useful for the user #do not try to replicate any files. return S_ERROR('Failed To Upload Output Data') #If there is now at least one replica for uploaded files can trigger replication rm = ReplicaManager() self.log.info('Sleeping for 10 seconds before attempting replication of recently uploaded files') time.sleep(10) for lfn, repSE in replication.items(): result = rm.replicateAndRegister(lfn, repSE, catalog = self.userFileCatalog) if not result['OK']: self.log.info('Replication failed with below error but file already exists in Grid storage with \ at least one replica:\n%s' % (result)) self.workflow_commons['Request'] = self.request #Now must ensure if any pending requests are generated that these are propagated to the job wrapper reportRequest = None if self.jobReport: result = self.jobReport.generateRequest() if not result['OK']: self.log.warn('Could not generate request for job report with result:\n%s' % (result)) else: reportRequest = result['Value'] if reportRequest: self.log.info('Populating request with job report information') self.request.update(reportRequest) if not self.request.isEmpty()['Value']: request_string = self.request.toXML()['Value'] # Write out the request string fname = 'user_job_%s_request.xml' % (self.jobID) xmlfile = open(fname, 'w') xmlfile.write(request_string) xmlfile.close() self.log.info('Creating failover request for deferred operations for job %s:' % self.jobID) result = self.request.getDigest() if result['OK']: digest = result['Value'] self.log.info(digest) self.setApplicationStatus('Job Finished Successfully') return S_OK('Output data uploaded')
urls = List.fromChar( urls, "," ) self.__nbOfUrls = len( urls ) self.__nbOfRetry = 2 if self.__nbOfUrls > 2 else 3 # we retry 2 times all services, if we run more than 2 services if len( urls ) == len( self.__bannedUrls ): self.__bannedUrls = [] # retry all urls gLogger.debug( "Retrying again all URLs" ) if len( self.__bannedUrls ) > 0 and len( urls ) > 1 : # we have host which is not accessible. We remove that host from the list. # We only remove if we have more than one instance for i in self.__bannedUrls: gLogger.debug( "Removing banned URL", "%s" % i ) urls.remove( i ) sURL = List.randomize( urls )[0] gLogger.debug( "Discovering URL for service", "%s -> %s" % ( self._destinationSrv, sURL ) ) return S_OK( sURL ) def __checkThreadID( self ): if not self.__initStatus[ 'OK' ]: return self.__initStatus cThID = thread.get_ident() if not self.__allowedThreadID: self.__allowedThreadID = cThID elif cThID != self.__allowedThreadID : msgTxt = """ =======DISET client thread safety error======================== Client %s can only run on thread %s
def __findServiceURL(self): """ Discovers the URL of a service, taking into account gateways, multiple URLs, banned URLs If the site on which we run is configured to use gateways (/DIRAC/Gateways/<siteName>), these URLs will be used. To ignore the gateway, it is possible to set KW_IGNORE_GATEWAYS to False in kwargs. If self._destinationSrv (given as constructor attribute) is a properly formed URL, we just return this one. If we have to use a gateway, we just replace the server name in the url. The list of URLs defined in the CS (<System>/URLs/<Component>) is randomized This method also sets some attributes: * self.__nbOfUrls = number of URLs * self.__nbOfRetry = 2 if we have more than 2 urls, otherwise 3 * self.__bannedUrls is reinitialized if all the URLs are banned :return: the selected URL """ if not self.__initStatus['OK']: return self.__initStatus # Load the Gateways URLs for the current site Name gatewayURL = False if self.KW_IGNORE_GATEWAYS not in self.kwargs or not self.kwargs[self.KW_IGNORE_GATEWAYS]: dRetVal = gConfig.getOption("/DIRAC/Gateways/%s" % DIRAC.siteName()) if dRetVal['OK']: rawGatewayURL = List.randomize(List.fromChar(dRetVal['Value'], ","))[0] gatewayURL = "/".join(rawGatewayURL.split("/")[:3]) # If what was given as constructor attribute is a properly formed URL, # we just return this one. # If we have to use a gateway, we just replace the server name in it for protocol in gProtocolDict: if self._destinationSrv.find("%s://" % protocol) == 0: gLogger.debug("Already given a valid url", self._destinationSrv) if not gatewayURL: return S_OK(self._destinationSrv) gLogger.debug("Reconstructing given URL to pass through gateway") path = "/".join(self._destinationSrv.split("/")[3:]) finalURL = "%s/%s" % (gatewayURL, path) gLogger.debug("Gateway URL conversion:\n %s -> %s" % (self._destinationSrv, finalURL)) return S_OK(finalURL) if gatewayURL: gLogger.debug("Using gateway", gatewayURL) return S_OK("%s/%s" % (gatewayURL, self._destinationSrv)) # We extract the list of URLs from the CS (System/URLs/Component) try: urls = getServiceURL(self._destinationSrv, setup=self.setup) except Exception as e: return S_ERROR("Cannot get URL for %s in setup %s: %s" % (self._destinationSrv, self.setup, repr(e))) if not urls: return S_ERROR("URL for service %s not found" % self._destinationSrv) failoverUrls = [] # Try if there are some failover URLs to use as last resort try: failoverUrlsStr = getServiceFailoverURL(self._destinationSrv, setup=self.setup) if failoverUrlsStr: failoverUrls = failoverUrlsStr.split(',') except Exception as e: pass # We randomize the list, and add at the end the failover URLs (System/FailoverURLs/Component) urlsList = List.randomize(List.fromChar(urls, ",")) + failoverUrls self.__nbOfUrls = len(urlsList) self.__nbOfRetry = 2 if self.__nbOfUrls > 2 else 3 # we retry 2 times all services, if we run more than 2 services if self.__nbOfUrls == len(self.__bannedUrls): self.__bannedUrls = [] # retry all urls gLogger.debug("Retrying again all URLs") if len(self.__bannedUrls) > 0 and len(urlsList) > 1: # we have host which is not accessible. We remove that host from the list. # We only remove if we have more than one instance for i in self.__bannedUrls: gLogger.debug("Removing banned URL", "%s" % i) urlsList.remove(i) # Take the first URL from the list #randUrls = List.randomize( urlsList ) + failoverUrls sURL = urlsList[0] # If we have banned URLs, and several URLs at disposals, we make sure that the selected sURL # is not on a host which is banned. If it is, we take the next one in the list using __selectUrl # If we have banned URLs, and several URLs at disposals, we make sure that the selected sURL # is not on a host which is banned. If it is, we take the next one in the list using __selectUrl if len(self.__bannedUrls) > 0 and self.__nbOfUrls > 2: # when we have multiple services then we can # have a situation when two services are running on the same machine with different ports... retVal = Network.splitURL(sURL) nexturl = None if retVal['OK']: nexturl = retVal['Value'] found = False for i in self.__bannedUrls: retVal = Network.splitURL(i) if retVal['OK']: bannedurl = retVal['Value'] else: break # We found a banned URL on the same host as the one we are running on if nexturl[1] == bannedurl[1]: found = True break if found: nexturl = self.__selectUrl(nexturl, urlsList[1:]) if nexturl: # an url found which is in different host sURL = nexturl gLogger.debug("Discovering URL for service", "%s -> %s" % (self._destinationSrv, sURL)) return S_OK(sURL)
def _prepareJDL(self, taskQueueDict, workingDirectory, pilotOptions, pilotsToSubmit, ceMask, submitPrivatePilot, privateTQ): """ Write JDL for Pilot Submission """ # RB = List.randomize( self.resourceBrokers )[0] LDs = [] NSs = [] LBs = [] # Select Randomly one RB from the list RB = List.randomize(self.resourceBrokers)[0] LDs.append('"%s:9002"' % RB) LBs.append('"%s:9000"' % RB) for LB in self.loggingServers: NSs.append('"%s:7772"' % LB) LD = ', '.join(LDs) NS = ', '.join(NSs) LB = ', '.join(LBs) vo = getVO() if privateTQ or vo not in ['lhcb']: extraReq = "True" else: if submitPrivatePilot: extraReq = "! AllowsGenericPilot" else: extraReq = "AllowsGenericPilot" rbJDL = """ AllowsGenericPilot = Member( "VO-lhcb-pilot" , other.GlueHostApplicationSoftwareRunTimeEnvironment ); Requirements = pilotRequirements && other.GlueCEStateStatus == "Production" && %s; RetryCount = 0; ErrorStorage = "%s/pilotError"; OutputStorage = "%s/pilotOutput"; # ListenerPort = 44000; ListenerStorage = "%s/Storage"; VirtualOrganisation = "lhcb"; LoggingTimeout = 30; LoggingSyncTimeout = 30; LoggingDestination = { %s }; # Default NS logger level is set to 0 (null) # max value is 6 (very ugly) NSLoggerLevel = 0; DefaultLogInfoLevel = 0; DefaultStatusLevel = 0; NSAddresses = { %s }; LBAddresses = { %s }; MyProxyServer = "no-myproxy.cern.ch"; """ % (extraReq, workingDirectory, workingDirectory, workingDirectory, LD, NS, LB) pilotJDL, pilotRequirements = self._JobJDL(taskQueueDict, pilotOptions, ceMask) jdl = os.path.join(workingDirectory, '%s.jdl' % taskQueueDict['TaskQueueID']) jdl = self._writeJDL(jdl, [pilotJDL, rbJDL]) return { 'JDL': jdl, 'Requirements': pilotRequirements + " && " + extraReq, 'Pilots': pilotsToSubmit, 'RB': RB }
def _prepareJDL( self, taskQueueDict, workingDirectory, pilotOptions, pilotsToSubmit, ceMask, submitPrivatePilot, privateTQ ): """ Write JDL for Pilot Submission """ rbList = [] # Select Randomly one RB from the list rb = List.randomize( self.resourceBrokers )[0] rbList.append( '"https://%s:7443/glite_wms_wmproxy_server"' % rb ) lbList = [] for lb in self.loggingServers: lbList.append( '"https://%s:9000"' % lb ) lbList = List.randomize( lbList ) nPilots = 1 vo = gConfig.getValue( '/DIRAC/VirtualOrganization', '' ) if privateTQ or vo not in ['lhcb']: extraReq = "True" else: if submitPrivatePilot: extraReq = "! AllowsGenericPilot" else: extraReq = "AllowsGenericPilot" myProxyServer = self.myProxyServer.strip() if not myProxyServer: #Random string to avoid caching myProxyServer = "%s.cern.ch" % md5( str( time.time() ) ).hexdigest()[:10] wmsClientJDL = """ RetryCount = 0; ShallowRetryCount = 0; AllowsGenericPilot = Member( "VO-lhcb-pilot" , other.GlueHostApplicationSoftwareRunTimeEnvironment ); Requirements = pilotRequirements && %s; MyProxyServer = "%s"; WmsClient = [ ErrorStorage = "%s/pilotError"; OutputStorage = "%s/pilotOutput"; # ListenerPort = 44000; ListenerStorage = "%s/Storage"; RetryCount = 0; ShallowRetryCount = 0; WMProxyEndPoints = { %s }; LBEndPoints = { %s }; EnableServiceDiscovery = false; MyProxyServer = "%s"; JdlDefaultAttributes = [ requirements = ( other.GlueCEStateStatus == "Production" || other.GlueCEStateStatus == "Special" ); AllowZippedISB = true; SignificantAttributes = {"Requirements", "Rank", "FuzzyRank"}; PerusalFileEnable = false; ]; ]; """ % ( extraReq, myProxyServer, workingDirectory, workingDirectory, workingDirectory, ', '.join( rbList ), ', '.join( lbList ), myProxyServer ) if pilotsToSubmit > 1: wmsClientJDL += """ JobType = "Parametric"; Parameters= %s; ParameterStep =1; ParameterStart = 0; """ % pilotsToSubmit nPilots = pilotsToSubmit ( pilotJDL , pilotRequirements ) = self._JobJDL( taskQueueDict, pilotOptions, ceMask ) jdl = os.path.join( workingDirectory, '%s.jdl' % taskQueueDict['TaskQueueID'] ) jdl = self._writeJDL( jdl, [pilotJDL, wmsClientJDL] ) return {'JDL':jdl, 'Requirements':pilotRequirements + " && " + extraReq, 'Pilots':nPilots, 'RB':rb }