Ejemplo n.º 1
0
 def testRandomize(self):
     """ randomize tests """
     # empty list
     aList = []
     randList = List.randomize(aList)
     self.assertEqual(randList, [])
     # non empty
     aList = [1, 2, 3]
     randList = List.randomize(aList)
     self.assertEqual(len(aList), len(randList))
     for x in aList:
         self.assertEqual(x in randList, True)
     for x in randList:
         self.assertEqual(x in aList, True)
Ejemplo n.º 2
0
 def testRandomize(self):
     """ randomize tests """
     # empty list
     aList = []
     randList = List.randomize(aList)
     self.assertEqual(randList, [])
     # non empty
     aList = [1, 2, 3]
     randList = List.randomize(aList)
     self.assertEqual(len(aList), len(randList))
     for x in aList:
         self.assertEqual(x in randList, True)
     for x in randList:
         self.assertEqual(x in aList, True)
Ejemplo n.º 3
0
 def getSocket( self, hostAddress, **kwargs ):
   hostName = hostAddress[0]
   retVal = self.generateClientInfo( hostName, kwargs )
   if not retVal[ 'OK' ]:
     return retVal
   socketInfo = retVal[ 'Value' ]
   retVal = Network.getIPsForHostName( hostName )
   if not retVal[ 'OK' ]:
     return S_ERROR( "Could not resolve %s: %s" % ( hostName, retVal[ 'Message' ] ) )
   ipList = List.randomize( retVal[ 'Value' ] )
   for i in range( 3 ):
     connected = False
     errorsList = []
     for ip in ipList :
       ipAddress = ( ip, hostAddress[1] )
       retVal = self.__connect( socketInfo, ipAddress )
       if retVal[ 'OK' ]:
         sslSocket = retVal[ 'Value' ]
         connected = True
         break
       errorsList.append( "%s: %s" % ( ipAddress, retVal[ 'Message' ] ) )
     if not connected:
       return S_ERROR( "Could not connect to %s: %s" % ( hostAddress, "," .join( [ e for e in errorsList ] ) ) )
     retVal = socketInfo.doClientHandshake()
     if retVal[ 'OK' ]:
       #Everything went ok. Don't need to retry
       break
   #Did the auth or the connection fail?
   if not retVal['OK']:
     return retVal
   if 'enableSessions' in kwargs and kwargs[ 'enableSessions' ]:
     sessionId = hash( hostAddress )
     gSessionManager.set( sessionId, sslSocket.get_session() )
   return S_OK( socketInfo )
Ejemplo n.º 4
0
  def __submitPilots( self, taskQueueDict, pilotsToSubmit ):
    """
      Try to insert the submission in the corresponding Thread Pool, disable the Thread Pool
      until next itration once it becomes full
    """
    # Check if an specific MiddleWare is required
    if 'SubmitPools' in taskQueueDict:
      submitPools = taskQueueDict[ 'SubmitPools' ]
    else:
      submitPools = self.am_getOption( 'DefaultSubmitPools' )
    submitPools = List.randomize( submitPools )

    for submitPool in submitPools:
      self.log.verbose( 'Trying SubmitPool:', submitPool )

      if not submitPool in self.directors or not self.directors[submitPool]['isEnabled']:
        self.log.verbose( 'Not Enabled' )
        continue

      pool = self.pools[self.directors[submitPool]['pool']]
      director = self.directors[submitPool]['director']
      ret = pool.generateJobAndQueueIt( director.submitPilots,
                                        args = ( taskQueueDict, pilotsToSubmit, self.workDir ),
                                        oCallback = self.callBack,
                                        oExceptionCallback = director.exceptionCallBack,
                                        blocking = False )
      if not ret['OK']:
        # Disable submission until next iteration
        self.directors[submitPool]['isEnabled'] = False
      else:
        time.sleep( self.am_getOption( 'ThreadStartDelay' ) )
        break

    return S_OK( pilotsToSubmit )
def putAndRegisterPROD3(args):
    """ simple wrapper to put and register all analysis files

    Keyword arguments:
    args -- a list of arguments in order []
    """
    outputpattern = args[0]
    outputpath = args[1]
    SEListArg = json.loads(args[2])
    SEList = []
    for SE in SEListArg:
        SEList.append(str(SE))

    # # Init DataManager
    catalogs = ['DIRACFileCatalog']
    prod3dm = Prod3DataManager(catalogs)

    # # Upload data files
    res = prod3dm._checkemptydir(outputpattern)
    if not res['OK']:
        return res

    for localfile in glob.glob(outputpattern):
        filename = os.path.basename(localfile)
        lfn = os.path.join(outputpath, filename)
        SEList = List.randomize(SEList)
        res = prod3dm._putAndRegisterToSEList(lfn, localfile, SEList)
        # ##  check if failed
        if not res['OK']:
            return res

    return DIRAC.S_OK()
Ejemplo n.º 6
0
 def getSocket( self, hostAddress, **kwargs ):
   hostName = hostAddress[0]
   retVal = self.generateClientInfo( hostName, kwargs )
   if not retVal[ 'OK' ]:
     return retVal
   socketInfo = retVal[ 'Value' ]
   retVal = Network.getIPsForHostName( hostName )
   if not retVal[ 'OK' ]:
     return S_ERROR( "Could not resolve %s: %s" % ( hostName, retVal[ 'Message' ] ) )
   ipList = List.randomize( retVal[ 'Value' ] )
   for i in range( 3 ):
     connected = False
     errorsList = []
     for ip in ipList :
       ipAddress = ( ip, hostAddress[1] )
       retVal = self.__connect( socketInfo, ipAddress )
       if retVal[ 'OK' ]:
         sslSocket = retVal[ 'Value' ]
         connected = True
         break
       errorsList.append( "%s: %s" % ( ipAddress, retVal[ 'Message' ] ) )
     if not connected:
       return S_ERROR( "Could not connect to %s: %s" % ( hostAddress, "," .join( [ e for e in errorsList ] ) ) )
     retVal = socketInfo.doClientHandshake()
     if retVal[ 'OK' ]:
       #Everything went ok. Don't need to retry
       break
   #Did the auth or the connection fail?
   if not retVal['OK']:
     return retVal
   if 'enableSessions' in kwargs and kwargs[ 'enableSessions' ]:
     sessionId = hash( hostAddress )
     gSessionManager.set( sessionId, sslSocket.get_session() )
   return S_OK( socketInfo )
Ejemplo n.º 7
0
  def _getSEList( self, SEType = 'ProductionOutputs', DataType = 'SimtelProd' ):
    """ get from CS the list of available SE for data upload
    """
    opsHelper = Operations()
    optionName = os.path.join( SEType, DataType )
    SEList = opsHelper.getValue( optionName , [] )
    SEList = List.randomize( SEList )
    DIRAC.gLogger.notice( 'List of %s SE: %s ' % ( SEType, SEList ) )

    # # Check if the local SE is in the list. If yes try it first by reversing list order
    localSEList = []
    res = getSEsForSite( DIRAC.siteName() )
    if res['OK']:
      localSEList = res['Value']

    retainedlocalSEList = []
    for localSE in localSEList:
      if localSE in SEList:
        DIRAC.gLogger.notice( 'The local Storage Element is an available SE: ', localSE )
        retainedlocalSEList.append( localSE )
        SEList.remove( localSE )

    SEList = retainedlocalSEList + SEList
    if len( SEList ) == 0:
      return DIRAC.S_ERROR( 'Error in building SEList' )

    return DIRAC.S_OK( SEList )
Ejemplo n.º 8
0
  def __submitPilots( self, taskQueueDict, pilotsToSubmit ):
    """
      Try to insert the submission in the corresponding Thread Pool, disable the Thread Pool
      until next itration once it becomes full
    """
    # Check if an specific MiddleWare is required
    if 'SubmitPools' in taskQueueDict:
      submitPools = taskQueueDict[ 'SubmitPools' ]
    else:
      submitPools = self.am_getOption( 'DefaultSubmitPools' )
    submitPools = List.randomize( submitPools )

    for submitPool in submitPools:
      self.log.verbose( 'Trying SubmitPool:', submitPool )

      if not submitPool in self.directors or not self.directors[submitPool]['isEnabled']:
        self.log.verbose( 'Not Enabled' )
        continue

      pool = self.pools[self.directors[submitPool]['pool']]
      director = self.directors[submitPool]['director']
      ret = pool.generateJobAndQueueIt( director.submitPilots,
                                        args = ( taskQueueDict, pilotsToSubmit, self.workDir ),
                                        oCallback = self.callBack,
                                        oExceptionCallback = director.exceptionCallBack,
                                        blocking = False )
      if not ret['OK']:
        # Disable submission until next iteration
        self.directors[submitPool]['isEnabled'] = False
      else:
        time.sleep( self.am_getOption( 'ThreadStartDelay' ) )
        break

    return S_OK( pilotsToSubmit )
Ejemplo n.º 9
0
  def __findServiceURL( self ):
    if not self.__initStatus[ 'OK' ]:
      return self.__initStatus
    gatewayURL = False
    if self.KW_IGNORE_GATEWAYS not in self.kwargs or not self.kwargs[ self.KW_IGNORE_GATEWAYS ]:
      dRetVal = gConfig.getOption( "/DIRAC/Gateways/%s" % DIRAC.siteName() )
      if dRetVal[ 'OK' ]:
        rawGatewayURL = List.randomize( List.fromChar( dRetVal[ 'Value'], "," ) )[0]
        gatewayURL = "/".join( rawGatewayURL.split( "/" )[:3] )

    for protocol in gProtocolDict.keys():
      if self._destinationSrv.find( "%s://" % protocol ) == 0:
        gLogger.debug( "Already given a valid url", self._destinationSrv )
        if not gatewayURL:
          return S_OK( self._destinationSrv )
        gLogger.debug( "Reconstructing given URL to pass through gateway" )
        path = "/".join( self._destinationSrv.split( "/" )[3:] )
        finalURL = "%s/%s" % ( gatewayURL, path )
        gLogger.debug( "Gateway URL conversion:\n %s -> %s" % ( self._destinationSrv, finalURL ) )
        return S_OK( finalURL )

    if gatewayURL:
      gLogger.debug( "Using gateway", gatewayURL )
      return S_OK( "%s/%s" % ( gatewayURL, self._destinationSrv ) )

    try:
      urls = getServiceURL( self._destinationSrv, setup = self.setup )
    except Exception, e:
      return S_ERROR( "Cannot get URL for %s in setup %s: %s" % ( self._destinationSrv, self.setup, str( e ) ) )
Ejemplo n.º 10
0
    def __refresh(self):
        self.__lastUpdateTime = time.time()
        gLogger.debug("Refreshing configuration...")
        gatewayList = getGatewayURLs("Configuration/Server")
        updatingErrorsList = []
        if gatewayList:
            lInitialListOfServers = gatewayList
            gLogger.debug("Using configuration gateway",
                          str(lInitialListOfServers[0]))
        else:
            lInitialListOfServers = gConfigurationData.getServers()
            gLogger.debug("Refreshing from list %s" %
                          str(lInitialListOfServers))
        lRandomListOfServers = List.randomize(lInitialListOfServers)
        gLogger.debug("Randomized server list is %s" %
                      ", ".join(lRandomListOfServers))

        for sServer in lRandomListOfServers:
            from DIRAC.Core.DISET.RPCClient import RPCClient
            oClient = RPCClient(
                sServer,
                useCertificates=gConfigurationData.useServerCertificate(),
                skipCACheck=gConfigurationData.skipCACheck())
            dRetVal = _updateFromRemoteLocation(oClient)
            if dRetVal['OK']:
                return dRetVal
            else:
                updatingErrorsList.append(dRetVal['Message'])
                gLogger.warn(
                    "Can't update from server",
                    "Error while updating from %s: %s" %
                    (sServer, dRetVal['Message']))
        return S_ERROR("Reason(s):\n\t%s" %
                       "\n\t".join(List.uniqueElements(updatingErrorsList)))
Ejemplo n.º 11
0
  def __refresh( self ):
    self.__lastUpdateTime = time.time()
    gLogger.debug( "Refreshing configuration..." )
    gatewayList = getGatewayURLs( "Configuration/Server" )
    updatingErrorsList = []
    if gatewayList:
      lInitialListOfServers = gatewayList
      gLogger.debug( "Using configuration gateway", str( lInitialListOfServers[0] ) )
    else:
      lInitialListOfServers = gConfigurationData.getServers()
      gLogger.debug( "Refreshing from list %s" % str( lInitialListOfServers ) )
    lRandomListOfServers = List.randomize( lInitialListOfServers )
    gLogger.debug( "Randomized server list is %s" % ", ".join( lRandomListOfServers ) )

    for sServer in lRandomListOfServers:
      from DIRAC.Core.DISET.RPCClient import RPCClient
      oClient = RPCClient( sServer,
                         useCertificates = gConfigurationData.useServerCertificate(),
                         skipCACheck = gConfigurationData.skipCACheck() )
      dRetVal = _updateFromRemoteLocation( oClient )
      if dRetVal[ 'OK' ]:
        return dRetVal
      else:
        updatingErrorsList.append( dRetVal[ 'Message' ] )
        gLogger.warn( "Can't update from server", "Error while updating from %s: %s" % ( sServer, dRetVal[ 'Message' ] ) )
        if dRetVal[ 'Message' ].find( "Insane environment" ) > -1:
          break
    return S_ERROR( "Reason(s):\n\t%s" % "\n\t".join( List.uniqueElements( updatingErrorsList ) ) )
Ejemplo n.º 12
0
  def _prepareJDL( self, taskQueueDict, workingDirectory, pilotOptions, pilotsToSubmit, ceMask, submitPrivatePilot, privateTQ ):
    """
      Write JDL for Pilot Submission
    """
    # RB = List.randomize( self.resourceBrokers )[0]
    LDs = []
    NSs = []
    LBs = []
    # Select Randomly one RB from the list
    RB = List.randomize( self.resourceBrokers )[0]
    LDs.append( '"%s:9002"' % RB )
    LBs.append( '"%s:9000"' % RB )

    for LB in self.loggingServers:
      NSs.append( '"%s:7772"' % LB )

    LD = ', '.join( LDs )
    NS = ', '.join( NSs )
    LB = ', '.join( LBs )

    vo = getVO()
    if privateTQ or vo not in ['lhcb']:
      extraReq = "True"
    else:
      if submitPrivatePilot:
        extraReq = "! AllowsGenericPilot"
      else:
        extraReq = "AllowsGenericPilot"

    rbJDL = """
AllowsGenericPilot = Member( "VO-lhcb-pilot" , other.GlueHostApplicationSoftwareRunTimeEnvironment );
Requirements = pilotRequirements && other.GlueCEStateStatus == "Production" && %s;
RetryCount = 0;
ErrorStorage = "%s/pilotError";
OutputStorage = "%s/pilotOutput";
# ListenerPort = 44000;
ListenerStorage = "%s/Storage";
VirtualOrganisation = "lhcb";
LoggingTimeout = 30;
LoggingSyncTimeout = 30;
LoggingDestination = { %s };
# Default NS logger level is set to 0 (null)
# max value is 6 (very ugly)
NSLoggerLevel = 0;
DefaultLogInfoLevel = 0;
DefaultStatusLevel = 0;
NSAddresses = { %s };
LBAddresses = { %s };
MyProxyServer = "no-myproxy.cern.ch";
""" % ( extraReq, workingDirectory, workingDirectory, workingDirectory, LD, NS, LB )

    pilotJDL, pilotRequirements = self._JobJDL( taskQueueDict, pilotOptions, ceMask )

    jdl = os.path.join( workingDirectory, '%s.jdl' % taskQueueDict['TaskQueueID'] )
    jdl = self._writeJDL( jdl, [pilotJDL, rbJDL] )

    return {'JDL':jdl, 'Requirements':pilotRequirements + " && " + extraReq, 'Pilots': pilotsToSubmit, 'RB':RB }
Ejemplo n.º 13
0
def getGatewayURLs( serviceName = "" ):
  siteName = gConfigurationData.extractOptionFromCFG( "/LocalSite/Site" )
  if not siteName:
    return False
  gatewayList = gConfigurationData.extractOptionFromCFG( "/DIRAC/Gateways/%s" % siteName )
  if not gatewayList:
    return False
  if serviceName:
    gatewayList = [ "%s/%s" % ( "/".join( gw.split( "/" )[:3] ), serviceName ) for gw in List.fromChar( gatewayList, "," ) ]
  return List.randomize( gatewayList )
Ejemplo n.º 14
0
def getGatewayURLs( serviceName = "" ):
  siteName = gConfigurationData.extractOptionFromCFG( "/LocalSite/Site" )
  if not siteName:
    return False
  gatewayList = gConfigurationData.extractOptionFromCFG( "/DIRAC/Gateways/%s" % siteName )
  if not gatewayList:
    return False
  if serviceName:
    gatewayList = [ "%s/%s" % ( "/".join( gw.split( "/" )[:3] ), serviceName ) for gw in List.fromChar( gatewayList, "," ) ]
  return List.randomize( gatewayList )
Ejemplo n.º 15
0
def getServiceURLs(system, service=None, setup=False, failover=False):
    """Generate url.

    :param str system: system name or full name e.g.: Framework/ProxyManager
    :param str service: service name, like 'ProxyManager'.
    :param str setup: DIRAC setup name, can be defined in dirac.cfg
    :param bool failover: to add failover URLs to end of result list

    :return: list -- complete urls. e.g. [dips://some-domain:3424/Framework/Service]
    """
    system, service = divideFullName(system, service)
    resList = []
    mainServers = None
    systemSection = getSystemSection(system, setup=setup)

    # Add failover URLs at the end of the list
    failover = "Failover" if failover else ""
    for fURLs in ["", "Failover"] if failover else [""]:
        urlList = []
        urls = List.fromChar(
            gConfigurationData.extractOptionFromCFG(
                "%s/%sURLs/%s" % (systemSection, fURLs, service)))

        # Be sure that urls not None
        for url in urls or []:

            # Trying if we are refering to the list of main servers
            # which would be like dips://$MAINSERVERS$:1234/System/Component
            if "$MAINSERVERS$" in url:
                if not mainServers:
                    # Operations cannot be imported at the beginning because of a bootstrap problem
                    from DIRAC.ConfigurationSystem.Client.Helpers.Operations import Operations

                    mainServers = Operations(setup=setup).getValue(
                        "MainServers", [])
                if not mainServers:
                    raise Exception("No Main servers defined")

                for srv in mainServers:
                    _url = checkComponentURL(url.replace("$MAINSERVERS$", srv),
                                             system,
                                             service,
                                             pathMandatory=True)
                    if _url not in urlList:
                        urlList.append(_url)
                continue

            _url = checkComponentURL(url, system, service, pathMandatory=True)
            if _url not in urlList:
                urlList.append(_url)

        # Randomize list if needed
        resList.extend(List.randomize(urlList))

    return resList
Ejemplo n.º 16
0
    def _refresh(self, fromMaster=False):
        """
        Refresh configuration
        """
        self._lastUpdateTime = time.time()
        gLogger.debug("Refreshing configuration...")
        gatewayList = getGatewayURLs("Configuration/Server")
        updatingErrorsList = []
        if gatewayList:
            initialServerList = gatewayList
            gLogger.debug("Using configuration gateway",
                          str(initialServerList[0]))
        elif fromMaster:
            masterServer = gConfigurationData.getMasterServer()
            initialServerList = [masterServer]
            gLogger.debug("Refreshing from master %s" % masterServer)
        else:
            initialServerList = gConfigurationData.getServers()
            gLogger.debug("Refreshing from list %s" % str(initialServerList))

        # If no servers in the initial list, we are supposed to use the local configuration only
        if not initialServerList:
            return S_OK()

        randomServerList = List.randomize(initialServerList)
        gLogger.debug("Randomized server list is %s" %
                      ", ".join(randomServerList))

        for sServer in randomServerList:
            from DIRAC.ConfigurationSystem.Client.ConfigurationClient import ConfigurationClient

            oClient = ConfigurationClient(
                url=sServer,
                useCertificates=gConfigurationData.useServerCertificate(),
                skipCACheck=gConfigurationData.skipCACheck(),
            )
            dRetVal = _updateFromRemoteLocation(oClient)
            if dRetVal["OK"]:
                self._refreshTime = gConfigurationData.getRefreshTime()
                return dRetVal
            else:
                updatingErrorsList.append(dRetVal["Message"])
                gLogger.warn(
                    "Can't update from server",
                    "Error while updating from %s: %s" %
                    (sServer, dRetVal["Message"]))
                if dRetVal["Message"].find("Insane environment") > -1:
                    break
        return S_ERROR("Reason(s):\n\t%s" %
                       "\n\t".join(List.uniqueElements(updatingErrorsList)))
Ejemplo n.º 17
0
def getGatewayURLs(system="", service=None):
    """Get gateway URLs for service

    :param str system: system name or full name, like 'Framework/Service'.
    :param str service: service name, like 'ProxyManager'.

    :return: list or False
    """
    if system:
        system, service = divideFullName(system, service)
    siteName = gConfigurationData.extractOptionFromCFG("/LocalSite/Site")
    if not siteName:
        return False
    gateways = gConfigurationData.extractOptionFromCFG("/DIRAC/Gateways/%s" % siteName)
    if not gateways:
        return False
    gateways = List.randomize(List.fromChar(gateways, ","))
    return [checkComponentURL(u, system, service) for u in gateways if u] if system and service else gateways
Ejemplo n.º 18
0
    def __refresh(self):
        self.__lastUpdateTime = time.time()
        gLogger.debug("Refreshing configuration...")
        gatewayList = getGatewayURLs("Configuration/Server")
        updatingErrorsList = []
        if gatewayList:
            initialServerList = gatewayList
            gLogger.debug("Using configuration gateway",
                          str(initialServerList[0]))
        else:
            initialServerList = gConfigurationData.getServers()
            gLogger.debug("Refreshing from list %s" % str(initialServerList))

        # If no servers in the initial list, we are supposed to use the local configuration only
        if not initialServerList:
            return S_OK()

        randomServerList = List.randomize(initialServerList)
        gLogger.debug("Randomized server list is %s" %
                      ", ".join(randomServerList))

        for sServer in randomServerList:
            from DIRAC.Core.DISET.RPCClient import RPCClient
            oClient = RPCClient(
                sServer,
                useCertificates=gConfigurationData.useServerCertificate(),
                skipCACheck=gConfigurationData.skipCACheck())
            dRetVal = _updateFromRemoteLocation(oClient)
            if dRetVal['OK']:
                return dRetVal
            else:
                updatingErrorsList.append(dRetVal['Message'])
                gLogger.warn(
                    "Can't update from server",
                    "Error while updating from %s: %s" %
                    (sServer, dRetVal['Message']))
                if dRetVal['Message'].find("Insane environment") > -1:
                    break
        return S_ERROR("Reason(s):\n\t%s" %
                       "\n\t".join(List.uniqueElements(updatingErrorsList)))
Ejemplo n.º 19
0
  def configure( self, csSection, submitPool ):
    """
     Here goes common configuration for all Grid PilotDirectors
    """
    PilotDirector.configure( self, csSection, submitPool )
    self.reloadConfiguration( csSection, submitPool )

    self.__failingWMSCache.purgeExpired()
    self.__ticketsWMSCache.purgeExpired()
    for rb in self.__failingWMSCache.getKeys():
      if rb in self.resourceBrokers:
        try:
          self.resourceBrokers.remove( rb )
        except:
          pass

    self.resourceBrokers = List.randomize( self.resourceBrokers )

    if self.gridEnv:
      self.log.info( ' GridEnv:        ', self.gridEnv )
    if self.resourceBrokers:
      self.log.info( ' ResourceBrokers:', ', '.join( self.resourceBrokers ) )
Ejemplo n.º 20
0
  def configure( self, csSection, submitPool ):
    """
     Here goes common configuration for all Grid PilotDirectors
    """
    PilotDirector.configure( self, csSection, submitPool )
    self.reloadConfiguration( csSection, submitPool )

    self.__failingWMSCache.purgeExpired()
    self.__ticketsWMSCache.purgeExpired()
    for rb in self.__failingWMSCache.getKeys():
      if rb in self.resourceBrokers:
        try:
          self.resourceBrokers.remove( rb )
        except:
          pass

    self.resourceBrokers = List.randomize( self.resourceBrokers )

    if self.gridEnv:
      self.log.info( ' GridEnv:        ', self.gridEnv )
    if self.resourceBrokers:
      self.log.info( ' ResourceBrokers:', ', '.join( self.resourceBrokers ) )
Ejemplo n.º 21
0
    def __refresh(self):
        self.__lastUpdateTime = time.time()
        gLogger.debug("Refreshing configuration...")
        gatewayList = getGatewayURLs("Configuration/Server")
        updatingErrorsList = []
        if gatewayList:
            initialServerList = gatewayList
            gLogger.debug("Using configuration gateway", str(initialServerList[0]))
        else:
            initialServerList = gConfigurationData.getServers()
            gLogger.debug("Refreshing from list %s" % str(initialServerList))

        # If no servers in the initial list, we are supposed to use the local configuration only
        if not initialServerList:
            return S_OK()

        randomServerList = List.randomize(initialServerList)
        gLogger.debug("Randomized server list is %s" % ", ".join(randomServerList))

        for sServer in randomServerList:
            from DIRAC.Core.DISET.RPCClient import RPCClient

            oClient = RPCClient(
                sServer,
                useCertificates=gConfigurationData.useServerCertificate(),
                skipCACheck=gConfigurationData.skipCACheck(),
            )
            dRetVal = _updateFromRemoteLocation(oClient)
            if dRetVal["OK"]:
                return dRetVal
            else:
                updatingErrorsList.append(dRetVal["Message"])
                gLogger.warn(
                    "Can't update from server", "Error while updating from %s: %s" % (sServer, dRetVal["Message"])
                )
                if dRetVal["Message"].find("Insane environment") > -1:
                    break
        return S_ERROR("Reason(s):\n\t%s" % "\n\t".join(List.uniqueElements(updatingErrorsList)))
Ejemplo n.º 22
0
  def __findServiceURL(self):
    """ Discovers the URL of a service, taking into account gateways, multiple URLs, banned URLs


        If the site on which we run is configured to use gateways (/DIRAC/Gateways/<siteName>),
        these URLs will be used. To ignore the gateway, it is possible to set KW_IGNORE_GATEWAYS
        to False in kwargs.

        If self._destinationSrv (given as constructor attribute) is a properly formed URL,
        we just return this one. If we have to use a gateway, we just replace the server name in the url.

        The list of URLs defined in the CS (<System>/URLs/<Component>) is randomized

        This method also sets some attributes:
          * self.__nbOfUrls = number of URLs
          * self.__nbOfRetry = 2 if we have more than 2 urls, otherwise 3
          * self.__bannedUrls is reinitialized if all the URLs are banned

        :return: S_OK(str)/S_ERROR() -- the selected URL
    """
    if not self.__initStatus['OK']:
      return self.__initStatus

    # Load the Gateways URLs for the current site Name
    gatewayURL = False
    if not self.kwargs.get(self.KW_IGNORE_GATEWAYS):
      dRetVal = gConfig.getOption("/DIRAC/Gateways/%s" % DIRAC.siteName())
      if dRetVal['OK']:
        rawGatewayURL = List.randomize(List.fromChar(dRetVal['Value'], ","))[0]
        gatewayURL = "/".join(rawGatewayURL.split("/")[:3])

    # If what was given as constructor attribute is a properly formed URL,
    # we just return this one.
    # If we have to use a gateway, we just replace the server name in it
    for protocol in gProtocolDict:
      if self._destinationSrv.find("%s://" % protocol) == 0:
        gLogger.debug("Already given a valid url", self._destinationSrv)
        if not gatewayURL:
          return S_OK(self._destinationSrv)
        gLogger.debug("Reconstructing given URL to pass through gateway")
        path = "/".join(self._destinationSrv.split("/")[3:])
        finalURL = "%s/%s" % (gatewayURL, path)
        gLogger.debug("Gateway URL conversion:\n %s -> %s" % (self._destinationSrv, finalURL))
        return S_OK(finalURL)

    if gatewayURL:
      gLogger.debug("Using gateway", gatewayURL)
      return S_OK("%s/%s" % (gatewayURL, self._destinationSrv))

    # We extract the list of URLs from the CS (System/URLs/Component)
    try:
      urls = getServiceURL(self._destinationSrv, setup=self.setup)
    except Exception as e:
      return S_ERROR("Cannot get URL for %s in setup %s: %s" % (self._destinationSrv, self.setup, repr(e)))
    if not urls:
      return S_ERROR("URL for service %s not found" % self._destinationSrv)

    failoverUrls = []
    # Try if there are some failover URLs to use as last resort
    try:
      failoverUrlsStr = getServiceFailoverURL(self._destinationSrv, setup=self.setup)
      if failoverUrlsStr:
        failoverUrls = failoverUrlsStr.split(',')
    except Exception as e:
      pass

    # We randomize the list, and add at the end the failover URLs (System/FailoverURLs/Component)
    urlsList = List.randomize(List.fromChar(urls, ",")) + failoverUrls
    self.__nbOfUrls = len(urlsList)
    self.__nbOfRetry = 2 if self.__nbOfUrls > 2 else 3  # we retry 2 times all services, if we run more than 2 services
    if self.__nbOfUrls == len(self.__bannedUrls):
      self.__bannedUrls = []  # retry all urls
      gLogger.debug("Retrying again all URLs")

    if len(self.__bannedUrls) > 0 and len(urlsList) > 1:
      # we have host which is not accessible. We remove that host from the list.
      # We only remove if we have more than one instance
      for i in self.__bannedUrls:
        gLogger.debug("Removing banned URL", "%s" % i)
        urlsList.remove(i)

    # Take the first URL from the list
    # randUrls = List.randomize( urlsList ) + failoverUrls

    sURL = urlsList[0]

    # If we have banned URLs, and several URLs at disposals, we make sure that the selected sURL
    # is not on a host which is banned. If it is, we take the next one in the list using __selectUrl
    # If we have banned URLs, and several URLs at disposals, we make sure that the selected sURL
    # is not on a host which is banned. If it is, we take the next one in the list using __selectUrl

    if len(self.__bannedUrls) > 0 and self.__nbOfUrls > 2:  # when we have multiple services then we can
      # have a situation when two services are running on the same machine with different ports...
      retVal = Network.splitURL(sURL)
      nexturl = None
      if retVal['OK']:
        nexturl = retVal['Value']

        found = False
        for i in self.__bannedUrls:
          retVal = Network.splitURL(i)
          if retVal['OK']:
            bannedurl = retVal['Value']
          else:
            break
          # We found a banned URL on the same host as the one we are running on
          if nexturl[1] == bannedurl[1]:
            found = True
            break
        if found:
          nexturl = self.__selectUrl(nexturl, urlsList[1:])
          if nexturl:  # an url found which is in different host
            sURL = nexturl
    gLogger.debug("Discovering URL for service", "%s -> %s" % (self._destinationSrv, sURL))
    return S_OK(sURL)
Ejemplo n.º 23
0
        path = "/".join( self._destinationSrv.split( "/" )[3:] )
        finalURL = "%s/%s" % ( gatewayURL, path )
        gLogger.debug( "Gateway URL conversion:\n %s -> %s" % ( self._destinationSrv, finalURL ) )
        return S_OK( finalURL )

    if gatewayURL:
      gLogger.debug( "Using gateway", gatewayURL )
      return S_OK( "%s/%s" % ( gatewayURL, self._destinationSrv ) )

    try:
      urls = getServiceURL( self._destinationSrv, setup = self.setup )
    except Exception, e:
      return S_ERROR( "Cannot get URL for %s in setup %s: %s" % ( self._destinationSrv, self.setup, str( e ) ) )
    if not urls:
      return S_ERROR( "URL for service %s not found" % self._destinationSrv )
    sURL = List.randomize( List.fromChar( urls, "," ) )[0]
    gLogger.debug( "Discovering URL for service", "%s -> %s" % ( self._destinationSrv, sURL ) )
    return S_OK( sURL )

  def __checkThreadID( self ):
    if not self.__initStatus[ 'OK' ]:
      return self.__initStatus
    cThID = thread.get_ident()
    if not self.__allowedThreadID:
      self.__allowedThreadID = cThID
    elif cThID != self.__allowedThreadID :
      msgTxt = """
=======DISET client thread safety error========================
Client %s
can only run on thread %s
and this is thread %s
Ejemplo n.º 24
0
    def _prepareJDL(self, taskQueueDict, workingDirectory, pilotOptions,
                    pilotsToSubmit, ceMask, submitPrivatePilot, privateTQ):
        """
      Write JDL for Pilot Submission
    """
        rbList = []
        # Select Randomly one RB from the list
        rb = List.randomize(self.resourceBrokers)[0]
        rbList.append('"https://%s:7443/glite_wms_wmproxy_server"' % rb)

        lbList = []
        for lb in self.loggingServers:
            lbList.append('"https://%s:9000"' % lb)
        lbList = List.randomize(lbList)

        nPilots = 1
        vo = gConfig.getValue('/DIRAC/VirtualOrganization', '')
        if privateTQ or vo not in ['lhcb']:
            extraReq = "True"
        else:
            if submitPrivatePilot:
                extraReq = "! AllowsGenericPilot"
            else:
                extraReq = "AllowsGenericPilot"

        myProxyServer = self.myProxyServer.strip()
        if not myProxyServer:
            #Random string to avoid caching
            myProxyServer = "%s.cern.ch" % md5(str(
                time.time())).hexdigest()[:10]

        wmsClientJDL = """
RetryCount = 0;
ShallowRetryCount = 0;
AllowsGenericPilot = Member( "VO-lhcb-pilot" , other.GlueHostApplicationSoftwareRunTimeEnvironment );
Requirements = pilotRequirements && %s;
MyProxyServer = "%s";
WmsClient = [
  ErrorStorage = "%s/pilotError";
  OutputStorage = "%s/pilotOutput";
# ListenerPort = 44000;
  ListenerStorage = "%s/Storage";
  RetryCount = 0;
  ShallowRetryCount = 0;
  WMProxyEndPoints = { %s };
  LBEndPoints = { %s };
  EnableServiceDiscovery = false;
  MyProxyServer = "%s";
  JdlDefaultAttributes =  [
    requirements  =  ( other.GlueCEStateStatus == "Production" || other.GlueCEStateStatus == "Special" );
    AllowZippedISB  =  true;
    SignificantAttributes  =  {"Requirements", "Rank", "FuzzyRank"};
    PerusalFileEnable  =  false;
  ];
];
""" % (extraReq, myProxyServer, workingDirectory, workingDirectory,
        workingDirectory, ', '.join(rbList), ', '.join(lbList), myProxyServer)

        if pilotsToSubmit > 1:
            wmsClientJDL += """
JobType = "Parametric";
Parameters= %s;
ParameterStep =1;
ParameterStart = 0;
""" % pilotsToSubmit
            nPilots = pilotsToSubmit

        (pilotJDL, pilotRequirements) = self._JobJDL(taskQueueDict,
                                                     pilotOptions, ceMask)

        jdl = os.path.join(workingDirectory,
                           '%s.jdl' % taskQueueDict['TaskQueueID'])
        jdl = self._writeJDL(jdl, [pilotJDL, wmsClientJDL])

        return {
            'JDL': jdl,
            'Requirements': pilotRequirements + " && " + extraReq,
            'Pilots': nPilots,
            'RB': rb
        }
Ejemplo n.º 25
0
  def execute(self):
    """ Main execution function.
    """
    #Have to work out if the module is part of the last step i.e.
    #user jobs can have any number of steps and we only want
    #to run the finalization once. Not a problem if this is not the last step so return S_OK()
    resultLS = self.isLastStep()
    if not resultLS['OK']:
      return S_OK()

    self.logWorkingDirectory()

    resultIV = self.resolveInputVariables()
    if not resultIV['OK']:
      self.log.error("Failed to resolve input parameters:", resultIV['Message'])
      return resultIV

    self.log.info('Initializing %s' % self.version)
    if not self.workflowStatus['OK'] or not self.stepStatus['OK']:
      self.log.verbose('Workflow status = %s, step status = %s' % (self.workflowStatus['OK'],
                                                                   self.stepStatus['OK']))
      return S_OK('No output data upload attempted')

    if not self.userOutputData:
      self.log.info('No user output data is specified for this job, nothing to do')
      return S_OK('No output data to upload')

    #Determine the final list of possible output files for the
    #workflow and all the parameters needed to upload them.
    outputList = self.getOutputList()

    userOutputLFNs = []
    if self.userOutputData:
      resultOLfn = self.constructOutputLFNs()
      if not resultOLfn['OK']:
        self.log.error('Could not create user LFNs', resultOLfn['Message'])
        return resultOLfn
      userOutputLFNs = resultOLfn['Value']

    self.log.verbose('Calling getCandidateFiles( %s, %s, %s)' % (outputList, userOutputLFNs, self.outputDataFileMask))
    self.log.debug("IgnoreAppErrors? '%s' " % self.ignoreapperrors)
    resultCF = self.getCandidateFiles(outputList, userOutputLFNs, self.outputDataFileMask)
    if not resultCF['OK']:
      if not self.ignoreapperrors:
        self.log.error(resultCF['Message'])
        self.setApplicationStatus(resultCF['Message'])
        return S_OK()
    fileDict = resultCF['Value']

    resultFMD = self.getFileMetadata(fileDict)
    if not resultFMD['OK']:
      if not self.ignoreapperrors:
        self.log.error(resultFMD['Message'])
        self.setApplicationStatus(resultFMD['Message'])
        return S_OK()

    if not resultFMD['Value']:
      if not self.ignoreapperrors:
        self.log.info('No output data files were determined to be uploaded for this workflow')
        self.setApplicationStatus('No Output Data Files To Upload')
        return S_OK()

    fileMetadata = resultFMD['Value']

    #First get the local (or assigned) SE to try first for upload and others in random fashion
    resultSEL = getDestinationSEList('Tier1-USER', DIRAC.siteName(), outputmode='local')
    if not resultSEL['OK']:
      self.log.error('Could not resolve output data SE', resultSEL['Message'])
      self.setApplicationStatus('Failed To Resolve OutputSE')
      return resultSEL
    localSE = resultSEL['Value']

    orderedSEs = [ se for se in self.defaultOutputSE if se not in localSE and se not in self.userOutputSE]

    orderedSEs = localSE + List.randomize(orderedSEs)
    if self.userOutputSE:
      prependSEs = []
      for userSE in self.userOutputSE:
        if not userSE in orderedSEs:
          prependSEs.append(userSE)
      orderedSEs = prependSEs + orderedSEs

    self.log.info('Ordered list of output SEs is: %s' % (', '.join(orderedSEs)))
    final = {}
    for fileName, metadata in fileMetadata.iteritems():
      final[fileName] = metadata
      final[fileName]['resolvedSE'] = orderedSEs

    #At this point can exit and see exactly what the module will upload
    self.printOutputInfo(final)
    if not self.enable:
      return S_OK('Module is disabled by control flag')

    #Instantiate the failover transfer client with the global request object
    failoverTransfer = FailoverTransfer(self._getRequestContainer())

    #One by one upload the files with failover if necessary
    filesToReplicate = {}
    filesToFailover = {}
    filesUploaded = []
    if not self.failoverTest:
      self.transferAndRegisterFiles(final, failoverTransfer, filesToFailover, filesUploaded, filesToReplicate)
    else:
      filesToFailover = final

    ##if there are files to be failovered, we do it now
    resultTRFF = self.transferRegisterAndFailoverFiles(failoverTransfer, filesToFailover, filesUploaded)
    cleanUp = resultTRFF['Value']['cleanUp']

    #For files correctly uploaded must report LFNs to job parameters
    if filesUploaded:
      report = ', '.join( filesUploaded )
      self.jobReport.setJobParameter( 'UploadedOutputData', report )

    self.workflow_commons['Request'] = failoverTransfer.request

    #If some or all of the files failed to be saved to failover
    if cleanUp:
      #Leave any uploaded files just in case it is useful for the user
      #do not try to replicate any files.
      return S_ERROR('Failed To Upload Output Data')

    #If there is now at least one replica for uploaded files can trigger replication
    datMan = DataManager( catalogs = self.userFileCatalog )
    self.log.info('Sleeping for 10 seconds before attempting replication of recently uploaded files')
    time.sleep(10)
    for lfn, repSE in filesToReplicate.items():
      resultRAR = datMan.replicateAndRegister(lfn, repSE)
      if not resultRAR['OK']:
        self.log.info('Replication failed with below error but file already exists in Grid storage with \
        at least one replica:\n%s' % (resultRAR))

    self.generateFailoverFile()

    self.setApplicationStatus('Job Finished Successfully')
    return S_OK('Output data uploaded')
Ejemplo n.º 26
0
def main():

    from DIRAC.Core.Base import Script

    Script.registerSwitch("p:", "run_number=", "Run Number", setRunNumber)
    Script.registerSwitch("T:", "template=", "Template", setCorsikaTemplate)
    Script.registerSwitch("E:", "executable=", "Executable", setExecutable)
    Script.registerSwitch("S:", "simtelConfig=", "SimtelConfig", setConfig)
    Script.registerSwitch("V:", "version=", "Version", setVersion)
    Script.registerSwitch("M:", "mode=", "Mode", setMode)
    Script.registerSwitch("C:", "savecorsika=", "Save Corsika", setSaveCorsika)

    from DIRAC.Resources.Catalog.FileCatalogClient import FileCatalogClient
    from DIRAC.Resources.Catalog.FileCatalog import FileCatalog

    Script.parseCommandLine()
    global fcc, fcL, storage_element

    from CTADIRAC.Core.Utilities.SoftwareInstallation import getSoftwareEnviron
    from CTADIRAC.Core.Utilities.SoftwareInstallation import installSoftwareEnviron
    from CTADIRAC.Core.Utilities.SoftwareInstallation import workingArea
    from CTADIRAC.Core.Workflow.Modules.CorsikaApp import CorsikaApp
    from CTADIRAC.Core.Workflow.Modules.Read_CtaApp import Read_CtaApp
    from DIRAC.Core.Utilities.Subprocess import systemCall

    jobID = os.environ['JOBID']
    jobID = int(jobID)
    global jobReport
    jobReport = JobReport(jobID)

    ###########
    ## Checking MD coherence
    fc = FileCatalog('LcgFileCatalog')
    res = fc._getCatalogConfigDetails('DIRACFileCatalog')
    print 'DFC CatalogConfigDetails:', res
    res = fc._getCatalogConfigDetails('LcgFileCatalog')
    print 'LCG CatalogConfigDetails:', res

    fcc = FileCatalogClient()
    fcL = FileCatalog('LcgFileCatalog')

    from DIRAC.Interfaces.API.Dirac import Dirac
    dirac = Dirac()

    #############
    simtelConfigFilesPath = 'sim_telarray/multi'
    simtelConfigFile = simtelConfigFilesPath + '/multi_cta-ultra5.cfg'
    #simtelConfigFile = simtelConfigFilesPath + '/multi_cta-prod1s.cfg'
    createGlobalsFromConfigFiles('prodConfigFile', corsikaTemplate, version)

    ######################Building prod Directory Metadata #######################
    resultCreateProdDirMD = createProdFileSystAndMD()
    if not resultCreateProdDirMD['OK']:
        DIRAC.gLogger.error('Failed to create prod Directory MD')
        jobReport.setApplicationStatus('Failed to create prod Directory MD')
        DIRAC.gLogger.error('Metadata coherence problem, no file produced')
        DIRAC.exit(-1)
    else:
        print 'prod Directory MD successfully created'

    ######################Building corsika Directory Metadata #######################

    resultCreateCorsikaDirMD = createCorsikaFileSystAndMD()
    if not resultCreateCorsikaDirMD['OK']:
        DIRAC.gLogger.error('Failed to create corsika Directory MD')
        jobReport.setApplicationStatus('Failed to create corsika Directory MD')
        DIRAC.gLogger.error(
            'Metadata coherence problem, no corsikaFile produced')
        DIRAC.exit(-1)
    else:
        print 'corsika Directory MD successfully created'

    ############ Producing Corsika File
    global CorsikaSimtelPack
    CorsikaSimtelPack = os.path.join('corsika_simhessarray', version,
                                     'corsika_simhessarray')
    install_CorsikaSimtelPack(version, 'sim')
    cs = CorsikaApp()
    cs.setSoftwarePackage(CorsikaSimtelPack)
    cs.csExe = executable
    cs.csArguments = [
        '--run-number', run_number, '--run', 'corsika', corsikaTemplate
    ]
    corsikaReturnCode = cs.execute()

    if corsikaReturnCode != 0:
        DIRAC.gLogger.error('Corsika Application: Failed')
        jobReport.setApplicationStatus('Corsika Application: Failed')
        DIRAC.exit(-1)
###################### rename of corsika output file #######################
    rundir = 'run' + run_number
    filein = rundir + '/' + corsikaOutputFileName
    corsikaFileName = particle + '_' + thetaP + '_' + phiP + '_alt' + obslev + '_' + 'run' + run_number + '.corsika.gz'
    mv_cmd = 'mv ' + filein + ' ' + corsikaFileName
    if (os.system(mv_cmd)):
        DIRAC.exit(-1)
########################

########################
## files spread in 1000-runs subDirectories
    runNum = int(run_number)
    subRunNumber = '%03d' % runNum
    runNumModMille = runNum % 1000
    runNumTrunc = (runNum - runNumModMille) / 1000
    runNumSeriesDir = '%03dxxx' % runNumTrunc
    print 'runNumSeriesDir=', runNumSeriesDir

    ### create corsika tar luisa ####################
    corsikaTarName = particle + '_' + thetaP + '_' + phiP + '_alt' + obslev + '_' + 'run' + run_number + '.corsika.tar.gz'
    filetar1 = rundir + '/' + 'input'
    filetar2 = rundir + '/' + 'DAT' + run_number + '.dbase'
    filetar3 = rundir + '/run' + str(int(run_number)) + '.log'
    cmdTuple = [
        '/bin/tar', 'zcf', corsikaTarName, filetar1, filetar2, filetar3
    ]
    DIRAC.gLogger.notice('Executing command tuple:', cmdTuple)
    ret = systemCall(0, cmdTuple, sendOutput)
    if not ret['OK']:
        DIRAC.gLogger.error('Failed to execute tar')
        DIRAC.exit(-1)

######################################################
    corsikaOutFileDir = os.path.join(corsikaDirPath, particle, 'Data',
                                     runNumSeriesDir)
    corsikaOutFileLFN = os.path.join(corsikaOutFileDir, corsikaFileName)
    corsikaRunNumberSeriesDirExist = fcc.isDirectory(
        corsikaOutFileDir)['Value']['Successful'][corsikaOutFileDir]
    newCorsikaRunNumberSeriesDir = (
        corsikaRunNumberSeriesDirExist != True
    )  # if new runFileSeries, will need to add new MD

    #### create a file to DISABLE_WATCHDOG_CPU_WALLCLOCK_CHECK ################
    f = open('DISABLE_WATCHDOG_CPU_WALLCLOCK_CHECK', 'w')
    f.close()

    if savecorsika == 'True':
        DIRAC.gLogger.notice('Put and register corsika File in LFC and DFC:',
                             corsikaOutFileLFN)
        ret = dirac.addFile(corsikaOutFileLFN, corsikaFileName,
                            storage_element)

        res = CheckCatalogCoherence(corsikaOutFileLFN)

        if res != DIRAC.S_OK:
            DIRAC.gLogger.error('Job failed: Catalog Coherence problem found')
            jobReport.setApplicationStatus('OutputData Upload Error')
            DIRAC.exit(-1)

        if not ret['OK']:
            DIRAC.gLogger.error('Error during addFile call:', ret['Message'])
            jobReport.setApplicationStatus('OutputData Upload Error')
            DIRAC.exit(-1)

    # put and register corsikaTarFile:
        corsikaTarFileDir = os.path.join(corsikaDirPath, particle, 'Log',
                                         runNumSeriesDir)
        corsikaTarFileLFN = os.path.join(corsikaTarFileDir, corsikaTarName)

        ##### If storage element is IN2P3-tape save simtel file on disk ###############
        if storage_element == 'CC-IN2P3-Tape':
            storage_element = 'CC-IN2P3-Disk'

        DIRAC.gLogger.notice(
            'Put and register corsikaTar File in LFC and DFC:',
            corsikaTarFileLFN)
        ret = dirac.addFile(corsikaTarFileLFN, corsikaTarName, storage_element)

        ####Checking and restablishing catalog coherence #####################
        res = CheckCatalogCoherence(corsikaTarFileLFN)
        if res != DIRAC.S_OK:
            DIRAC.gLogger.error('Job failed: Catalog Coherence problem found')
            jobReport.setApplicationStatus('OutputData Upload Error')
            DIRAC.exit(-1)

        if not ret['OK']:
            DIRAC.gLogger.error('Error during addFile call:', ret['Message'])
            jobReport.setApplicationStatus('OutputData Upload Error')
            DIRAC.exit(-1)
######################################################################

        if newCorsikaRunNumberSeriesDir:
            insertRunFileSeriesMD(corsikaOutFileDir, runNumTrunc)
            insertRunFileSeriesMD(corsikaTarFileDir, runNumTrunc)

###### insert corsika File Level metadata ############################################
        corsikaFileMD = {}
        corsikaFileMD['runNumber'] = int(run_number)
        corsikaFileMD['jobID'] = jobID
        corsikaFileMD['corsikaReturnCode'] = corsikaReturnCode
        corsikaFileMD['nbShowers'] = nbShowers

        result = fcc.setMetadata(corsikaOutFileLFN, corsikaFileMD)
        print "result setMetadata=", result
        if not result['OK']:
            print 'ResultSetMetadata:', result['Message']

        result = fcc.setMetadata(corsikaTarFileLFN, corsikaFileMD)
        print "result setMetadata=", result
        if not result['OK']:
            print 'ResultSetMetadata:', result['Message']

#####  Exit now if only corsika simulation required
    if (mode == 'corsika_standalone'):
        DIRAC.exit()

############ Producing SimTel File
######################Building simtel Directory Metadata #######################

    cfg_dict = {
        "4MSST": 'cta-prod2-4m-dc',
        "SCSST": 'cta-prod2-sc-sst',
        "STD": 'cta-prod2',
        "NSBX3": 'cta-prod2',
        "ASTRI": 'cta-prod2-astri',
        "SCMST": 'cta-prod2-sc3',
        "NORTH": 'cta-prod2n'
    }

    if simtelConfig == "6INROW":
        all_configs = ["4MSST", "SCSST", "ASTRI", "NSBX3", "STD", "SCMST"]
    elif simtelConfig == "5INROW":
        all_configs = ["4MSST", "SCSST", "ASTRI", "NSBX3", "STD"]
    elif simtelConfig == "3INROW":
        all_configs = ["SCSST", "STD", "SCMST"]
    else:
        all_configs = [simtelConfig]

############################################
#for current_conf in all_configs:
#DIRAC.gLogger.notice('current conf is',current_conf)
#if current_conf == "SCMST":
#current_version = version + '_sc3'
#DIRAC.gLogger.notice('current version is', current_version)
#if os.path.isdir('sim_telarray'):
#DIRAC.gLogger.notice('Package found in the local area. Removing package...')
#cmd = 'rm -R sim_telarray corsika-6990 hessioxxx corsika-run'
#if(os.system(cmd)):
#DIRAC.exit( -1 )
#install_CorsikaSimtelPack(current_version)
#else:
#current_version = version
#DIRAC.gLogger.notice('current version is', current_version)
#############################################################

    for current_conf in all_configs:
        DIRAC.gLogger.notice('current conf is', current_conf)
        if current_conf == "SCMST":
            current_version = version + '_sc3'
            DIRAC.gLogger.notice('current version is', current_version)
            installSoftwareEnviron(CorsikaSimtelPack, workingArea(), 'sim-sc3')
        else:
            current_version = version
            DIRAC.gLogger.notice('current version is', current_version)

########################################################

        global simtelDirPath
        global simtelProdVersion

        simtelProdVersion = current_version + '_simtel'
        simtelDirPath = os.path.join(corsikaParticleDirPath, simtelProdVersion)

        resultCreateSimtelDirMD = createSimtelFileSystAndMD(current_conf)
        if not resultCreateSimtelDirMD['OK']:
            DIRAC.gLogger.error('Failed to create simtelArray Directory MD')
            jobReport.setApplicationStatus(
                'Failed to create simtelArray Directory MD')
            DIRAC.gLogger.error(
                'Metadata coherence problem, no simtelArray File produced')
            DIRAC.exit(-1)
        else:
            DIRAC.gLogger.notice('simtel Directory MD successfully created')

############## check simtel data file LFN exists ########################
        simtelFileName = particle + '_' + str(thetaP) + '_' + str(
            phiP) + '_alt' + str(
                obslev) + '_' + 'run' + run_number + '.simtel.gz'
        simtelDirPath_conf = simtelDirPath + '_' + current_conf
        simtelOutFileDir = os.path.join(simtelDirPath_conf, 'Data',
                                        runNumSeriesDir)
        simtelOutFileLFN = os.path.join(simtelOutFileDir, simtelFileName)
        res = CheckCatalogCoherence(simtelOutFileLFN)
        if res == DIRAC.S_OK:
            DIRAC.gLogger.notice('Current conf already done', current_conf)
            continue

#### execute simtelarray ################
        fd = open('run_sim.sh', 'w')
        fd.write("""#! /bin/sh  
source ./Corsika_simhessarrayEnv.sh
export SVNPROD2=$PWD
export SVNTAG=SVN-PROD2_rev10503
export CORSIKA_IO_BUFFER=800MB
cp ../grid_prod2-repro.sh .
ln -s ../%s
ln -s ../$SVNTAG
./grid_prod2-repro.sh %s %s""" %
                 (corsikaFileName, corsikaFileName, current_conf))
        fd.close()
        ####################################

        os.system('chmod u+x run_sim.sh')
        cmdTuple = ['./run_sim.sh']
        ret = systemCall(0, cmdTuple, sendOutputSimTel)
        simtelReturnCode, stdout, stderr = ret['Value']

        if (os.system('grep Broken simtel.log') == 0):
            DIRAC.gLogger.error('Broken string found in simtel.log')
            jobReport.setApplicationStatus('Broken pipe')
            DIRAC.exit(-1)

        if not ret['OK']:
            DIRAC.gLogger.error('Failed to execute run_sim.sh')
            DIRAC.gLogger.error('run_sim.sh status is:', simtelReturnCode)
            DIRAC.exit(-1)

##   check simtel data/log/histo Output File exist
        cfg = cfg_dict[current_conf]
        #cmd = 'mv Data/sim_telarray/' + cfg + '/0.0deg/Data/*.simtel.gz ' + simtelFileName
        if current_conf == "SCMST":
            cmdprefix = 'mv sim-sc3/Data/sim_telarray/' + cfg + '/0.0deg/'
        else:
            cmdprefix = 'mv sim/Data/sim_telarray/' + cfg + '/0.0deg/'

        cmd = cmdprefix + 'Data/*' + cfg + '_*.simtel.gz ' + simtelFileName
        if (os.system(cmd)):
            DIRAC.exit(-1)

############################################
        simtelRunNumberSeriesDirExist = fcc.isDirectory(
            simtelOutFileDir)['Value']['Successful'][simtelOutFileDir]
        newSimtelRunFileSeriesDir = (
            simtelRunNumberSeriesDirExist != True
        )  # if new runFileSeries, will need to add new MD

        simtelLogFileName = particle + '_' + str(thetaP) + '_' + str(
            phiP) + '_alt' + str(obslev) + '_' + 'run' + run_number + '.log.gz'
        #cmd = 'mv Data/sim_telarray/' + cfg + '/0.0deg/Log/*.log.gz ' + simtelLogFileName
        cmd = cmdprefix + 'Log/*' + cfg + '_*.log.gz ' + simtelLogFileName
        if (os.system(cmd)):
            DIRAC.exit(-1)
        simtelOutLogFileDir = os.path.join(simtelDirPath_conf, 'Log',
                                           runNumSeriesDir)
        simtelOutLogFileLFN = os.path.join(simtelOutLogFileDir,
                                           simtelLogFileName)

        simtelHistFileName = particle + '_' + str(thetaP) + '_' + str(
            phiP) + '_alt' + str(
                obslev) + '_' + 'run' + run_number + '.hdata.gz'
        #cmd = 'mv Data/sim_telarray/' + cfg + '/0.0deg/Histograms/*.hdata.gz ' + simtelHistFileName
        cmd = cmdprefix + 'Histograms/*' + cfg + '_*.hdata.gz ' + simtelHistFileName
        if (os.system(cmd)):
            DIRAC.exit(-1)
        simtelOutHistFileDir = os.path.join(simtelDirPath_conf, 'Histograms',
                                            runNumSeriesDir)
        simtelOutHistFileLFN = os.path.join(simtelOutHistFileDir,
                                            simtelHistFileName)

        ########### quality check on Histo #############################################
        fd = open('check_histo.sh', 'w')
        fd.write("""#! /bin/sh  
nsim=$(list_histograms %s|fgrep 'Histogram 6 '|sed 's/^.*contents: //'| sed 's:/.*$::')
nevents=%d
if [ $nsim -lt $(( $nevents - 20 )) ]; then
echo 'nsim found:' $nsim
echo 'nsim expected:' $nevents
exit 1
else
echo 'nsim found:' $nsim
echo 'nsim expected:' $nevents
fi
""" % (simtelHistFileName, int(nbShowers) * int(cscat)))
        fd.close()

        ret = getSoftwareEnviron(CorsikaSimtelPack)

        if not ret['OK']:
            error = ret['Message']
            DIRAC.gLogger.error(error, CorsikaSimtelPack)
            DIRAC.exit(-1)

        corsikaEnviron = ret['Value']

        os.system('chmod u+x check_histo.sh')
        cmdTuple = ['./check_histo.sh']
        DIRAC.gLogger.notice('Executing command tuple:', cmdTuple)
        ret = systemCall(0, cmdTuple, sendOutput, env=corsikaEnviron)
        checkHistoReturnCode, stdout, stderr = ret['Value']

        if not ret['OK']:
            DIRAC.gLogger.error('Failed to execute check_histo.sh')
            DIRAC.gLogger.error('check_histo.sh status is:',
                                checkHistoReturnCode)
            DIRAC.exit(-1)

        if (checkHistoReturnCode != 0):
            DIRAC.gLogger.error('Failure during check_histo.sh')
            DIRAC.gLogger.error('check_histo.sh status is:',
                                checkHistoReturnCode)
            jobReport.setApplicationStatus('Histo check Failed')
            DIRAC.exit(-1)

########## quality check on Log #############################
        cmd = 'zcat %s | grep Finished.' % simtelLogFileName
        DIRAC.gLogger.notice('Executing system call:', cmd)
        if (os.system(cmd)):
            jobReport.setApplicationStatus('Log check Failed')
            DIRAC.exit(-1)

################################################
        from DIRAC.Core.Utilities import List
        from DIRAC.ConfigurationSystem.Client.Helpers.Operations import Operations
        opsHelper = Operations()

        global seList
        seList = opsHelper.getValue('ProductionOutputs/SimtelProd', [])
        seList = List.randomize(seList)

        DIRAC.gLogger.notice('SeList is:', seList)

        #########  Upload simtel data/log/histo ##############################################

        res = upload_to_seList(simtelOutFileLFN, simtelFileName)

        if res != DIRAC.S_OK:
            DIRAC.gLogger.error('OutputData Upload Error', simtelOutFileLFN)
            jobReport.setApplicationStatus('OutputData Upload Error')
            DIRAC.exit(-1)

        res = CheckCatalogCoherence(simtelOutLogFileLFN)
        if res == DIRAC.S_OK:
            DIRAC.gLogger.notice('Log file already exists. Removing:',
                                 simtelOutLogFileLFN)
            ret = dirac.removeFile(simtelOutLogFileLFN)

        res = upload_to_seList(simtelOutLogFileLFN, simtelLogFileName)

        if res != DIRAC.S_OK:
            DIRAC.gLogger.error('Upload simtel Log Error', simtelOutLogFileLFN)
            DIRAC.gLogger.notice('Removing simtel data file:',
                                 simtelOutFileLFN)
            ret = dirac.removeFile(simtelOutFileLFN)
            jobReport.setApplicationStatus('OutputData Upload Error')
            DIRAC.exit(-1)

        res = CheckCatalogCoherence(simtelOutHistFileLFN)
        if res == DIRAC.S_OK:
            DIRAC.gLogger.notice('Histo file already exists. Removing:',
                                 simtelOutHistFileLFN)
            ret = dirac.removeFile(simtelOutHistFileLFN)

        res = upload_to_seList(simtelOutHistFileLFN, simtelHistFileName)

        if res != DIRAC.S_OK:
            DIRAC.gLogger.error('Upload simtel Histo Error',
                                simtelOutHistFileLFN)
            DIRAC.gLogger.notice('Removing simtel data file:',
                                 simtelOutFileLFN)
            ret = dirac.removeFile(simtelOutFileLFN)
            DIRAC.gLogger.notice('Removing simtel log file:',
                                 simtelOutLogFileLFN)
            ret = dirac.removeFile(simtelOutLogFileLFN)
            jobReport.setApplicationStatus('OutputData Upload Error')
            DIRAC.exit(-1)

#    simtelRunNumberSeriesDirExist = fcc.isDirectory(simtelOutFileDir)['Value']['Successful'][simtelOutFileDir]
#    newSimtelRunFileSeriesDir = (simtelRunNumberSeriesDirExist != True)  # if new runFileSeries, will need to add new MD

        if newSimtelRunFileSeriesDir:
            print 'insertRunFileSeriesMD'
            insertRunFileSeriesMD(simtelOutFileDir, runNumTrunc)
            insertRunFileSeriesMD(simtelOutLogFileDir, runNumTrunc)
            insertRunFileSeriesMD(simtelOutHistFileDir, runNumTrunc)
        else:
            print 'NotinsertRunFileSeriesMD'

###### simtel File level metadata ############################################
        simtelFileMD = {}
        simtelFileMD['runNumber'] = int(run_number)
        simtelFileMD['jobID'] = jobID
        simtelFileMD['simtelReturnCode'] = simtelReturnCode

        result = fcc.setMetadata(simtelOutFileLFN, simtelFileMD)
        print "result setMetadata=", result
        if not result['OK']:
            print 'ResultSetMetadata:', result['Message']

        result = fcc.setMetadata(simtelOutLogFileLFN, simtelFileMD)
        print "result setMetadata=", result
        if not result['OK']:
            print 'ResultSetMetadata:', result['Message']

        result = fcc.setMetadata(simtelOutHistFileLFN, simtelFileMD)
        print "result setMetadata=", result
        if not result['OK']:
            print 'ResultSetMetadata:', result['Message']

        if savecorsika == 'True':
            result = fcc.addFileAncestors(
                {simtelOutFileLFN: {
                    'Ancestors': [corsikaOutFileLFN]
                }})
            print 'result addFileAncestor:', result

            result = fcc.addFileAncestors(
                {simtelOutLogFileLFN: {
                    'Ancestors': [corsikaOutFileLFN]
                }})
            print 'result addFileAncestor:', result

            result = fcc.addFileAncestors(
                {simtelOutHistFileLFN: {
                    'Ancestors': [corsikaOutFileLFN]
                }})
            print 'result addFileAncestor:', result

#####  Exit now if only corsika simulation required
        if (mode == 'corsika_simtel'):
            continue

######### run read_cta #######################################

        rcta = Read_CtaApp()
        rcta.setSoftwarePackage(CorsikaSimtelPack)
        rcta.rctaExe = 'read_cta'

        powerlaw_dict = {
            'gamma': '-2.57',
            'gamma_ptsrc': '-2.57',
            'proton': '-2.70',
            'electron': '-3.21'
        }
        dstFileName = particle + '_' + str(thetaP) + '_' + str(
            phiP) + '_alt' + str(
                obslev) + '_' + 'run' + run_number + '.simtel-dst0.gz'
        dstHistoFileName = particle + '_' + str(thetaP) + '_' + str(
            phiP) + '_alt' + str(
                obslev) + '_' + 'run' + run_number + '.hdata-dst0.gz'

        ## added some options starting from Armazones_2K prod.
        rcta.rctaArguments = [
            '-r', '4', '-u', '--integration-scheme', '4',
            '--integration-window', '7,3', '--tail-cuts', '6,8', '--min-pix',
            '2', '--min-amp', '20', '--type', '1,0,0,400', '--tail-cuts',
            '9,12', '--min-amp', '20', '--type', '2,0,0,100', '--tail-cuts',
            '8,11', '--min-amp', '19', '--type', '3,0,0,40', '--tail-cuts',
            '6,9', '--min-amp', '15', '--type', '4,0,0,15', '--tail-cuts',
            '3.7,5.5', '--min-amp', '8', '--type', '5,0,0,70,5.6',
            '--tail-cuts', '2.4,3.2', '--min-amp', '5.6', '--dst-level', '0',
            '--dst-file', dstFileName, '--histogram-file', dstHistoFileName,
            '--powerlaw', powerlaw_dict[particle], simtelFileName
        ]

        rctaReturnCode = rcta.execute()

        if rctaReturnCode != 0:
            DIRAC.gLogger.error('read_cta Application: Failed')
            jobReport.setApplicationStatus('read_cta Application: Failed')
            DIRAC.exit(-1)

######## run dst quality checks ######################################

        fd = open('check_dst_histo.sh', 'w')
        fd.write("""#! /bin/sh  
dsthistfilename=%s
dstfile=%s
n6="$(list_histograms -h 6 ${dsthistfilename} | grep 'Histogram of type' | sed 's/.*bins, //' | sed 's/ entries.//')" 
n12001="$(list_histograms -h 12001 ${dsthistfilename} | grep 'Histogram of type' | sed 's/.*bins, //' | sed 's/ entries.//')" 
if [ $n6 -ne $n12001 ]; then
echo 'n6 found:' $n6
echo 'n12001 found:' $n12001
exit 1
else
echo 'n6 found:' $n6
echo 'n12001 found:' $n12001
fi

n12002="$(list_histograms -h 12002 ${dsthistfilename} | grep 'Histogram of type' | sed 's/.*bins, //' | sed 's/ entries.//')" 
nev="$(statio ${dstfile} | egrep '^2010' | cut -f2)"
if [ -z "$nev" ]; then nev="0"; fi

if [ $nev -ne $n12002 ]; then
echo 'nev found:' $nev
echo 'n12002 found:' $n12002
exit 1
else
echo 'nev found:' $nev
echo 'n12002 found:' $n12002
fi
""" % (dstHistoFileName, dstFileName))
        fd.close()

        os.system('chmod u+x check_dst_histo.sh')
        cmdTuple = ['./check_dst_histo.sh']
        DIRAC.gLogger.notice('Executing command tuple:', cmdTuple)
        ret = systemCall(0, cmdTuple, sendOutput, env=corsikaEnviron)
        checkHistoReturnCode, stdout, stderr = ret['Value']

        if not ret['OK']:
            DIRAC.gLogger.error('Failed to execute check_dst_histo.sh')
            DIRAC.gLogger.error('check_dst_histo.sh status is:',
                                checkHistoReturnCode)
            DIRAC.exit(-1)

        if (checkHistoReturnCode != 0):
            DIRAC.gLogger.error('Failure during check_dst_histo.sh')
            DIRAC.gLogger.error('check_dst_histo.sh status is:',
                                checkHistoReturnCode)
            jobReport.setApplicationStatus('Histo check Failed')
            DIRAC.exit(-1)

############create MD and upload dst data/histo ##########################################################

        global dstDirPath
        global dstProdVersion

        dstProdVersion = current_version + '_dst'
        dstDirPath = os.path.join(simtelDirPath_conf, dstProdVersion)

        dstOutFileDir = os.path.join(dstDirPath, 'Data', runNumSeriesDir)
        dstOutFileLFN = os.path.join(dstOutFileDir, dstFileName)

        resultCreateDstDirMD = createDstFileSystAndMD()
        if not resultCreateDstDirMD['OK']:
            DIRAC.gLogger.error('Failed to create Dst Directory MD')
            jobReport.setApplicationStatus('Failed to create Dst Directory MD')
            DIRAC.gLogger.error(
                'Metadata coherence problem, no Dst File produced')
            DIRAC.exit(-1)
        else:
            DIRAC.gLogger.notice('Dst Directory MD successfully created')
############################################################

        res = CheckCatalogCoherence(dstOutFileLFN)
        if res == DIRAC.S_OK:
            DIRAC.gLogger.notice('dst file already exists. Removing:',
                                 dstOutFileLFN)
            ret = dirac.removeFile(dstOutFileLFN)

        res = upload_to_seList(dstOutFileLFN, dstFileName)

        if res != DIRAC.S_OK:
            DIRAC.gLogger.error('Upload dst Error', dstOutFileLFN)
            jobReport.setApplicationStatus('OutputData Upload Error')
            DIRAC.exit(-1)

##############################################################
        dstHistoFileDir = os.path.join(dstDirPath, 'Histograms',
                                       runNumSeriesDir)
        dstHistoFileLFN = os.path.join(dstHistoFileDir, dstHistoFileName)

        res = CheckCatalogCoherence(dstHistoFileLFN)
        if res == DIRAC.S_OK:
            DIRAC.gLogger.notice('dst histo file already exists. Removing:',
                                 dstHistoFileLFN)
            ret = dirac.removeFile(dstHistoFileLFN)

        res = upload_to_seList(dstHistoFileLFN, dstHistoFileName)

        if res != DIRAC.S_OK:
            DIRAC.gLogger.error('Upload dst Error', dstHistoFileName)
            jobReport.setApplicationStatus('OutputData Upload Error')
            DIRAC.exit(-1)

########### Insert RunNumSeries MD ##########################

        dstRunNumberSeriesDirExist = fcc.isDirectory(
            dstOutFileDir)['Value']['Successful'][dstOutFileDir]
        newDstRunFileSeriesDir = (
            dstRunNumberSeriesDirExist != True
        )  # if new runFileSeries, will need to add new MD

        if newDstRunFileSeriesDir:
            insertRunFileSeriesMD(dstOutFileDir, runNumTrunc)
            insertRunFileSeriesMD(dstHistoFileDir, runNumTrunc)

####### dst File level metadata ###############################################
        dstFileMD = {}
        dstFileMD['runNumber'] = int(run_number)
        dstFileMD['jobID'] = jobID
        dstFileMD['rctaReturnCode'] = rctaReturnCode

        result = fcc.setMetadata(dstOutFileLFN, dstFileMD)
        print "result setMetadata=", result
        if not result['OK']:
            print 'ResultSetMetadata:', result['Message']

        result = fcc.setMetadata(dstHistoFileLFN, dstFileMD)
        print "result setMetadata=", result
        if not result['OK']:
            print 'ResultSetMetadata:', result['Message']

########## set the ancestors for dst #####################################

        result = fcc.addFileAncestors(
            {dstOutFileLFN: {
                'Ancestors': [simtelOutFileLFN]
            }})
        print 'result addFileAncestor:', result

        result = fcc.addFileAncestors(
            {dstHistoFileLFN: {
                'Ancestors': [simtelOutFileLFN]
            }})
        print 'result addFileAncestor:', result


######################################################

    DIRAC.exit()
Ejemplo n.º 27
0
    def __findServiceURL(self):
        if not self.__initStatus['OK']:
            return self.__initStatus
        gatewayURL = False
        if self.KW_IGNORE_GATEWAYS not in self.kwargs or not self.kwargs[
                self.KW_IGNORE_GATEWAYS]:
            dRetVal = gConfig.getOption("/DIRAC/Gateways/%s" %
                                        DIRAC.siteName())
            if dRetVal['OK']:
                rawGatewayURL = List.randomize(
                    List.fromChar(dRetVal['Value'], ","))[0]
                gatewayURL = "/".join(rawGatewayURL.split("/")[:3])

        for protocol in gProtocolDict.keys():
            if self._destinationSrv.find("%s://" % protocol) == 0:
                gLogger.debug("Already given a valid url",
                              self._destinationSrv)
                if not gatewayURL:
                    return S_OK(self._destinationSrv)
                gLogger.debug(
                    "Reconstructing given URL to pass through gateway")
                path = "/".join(self._destinationSrv.split("/")[3:])
                finalURL = "%s/%s" % (gatewayURL, path)
                gLogger.debug("Gateway URL conversion:\n %s -> %s" %
                              (self._destinationSrv, finalURL))
                return S_OK(finalURL)

        if gatewayURL:
            gLogger.debug("Using gateway", gatewayURL)
            return S_OK("%s/%s" % (gatewayURL, self._destinationSrv))

        try:
            urls = getServiceURL(self._destinationSrv, setup=self.setup)
        except Exception as e:
            return S_ERROR("Cannot get URL for %s in setup %s: %s" %
                           (self._destinationSrv, self.setup, repr(e)))
        if not urls:
            return S_ERROR("URL for service %s not found" %
                           self._destinationSrv)

        urlsList = List.fromChar(urls, ",")
        self.__nbOfUrls = len(urlsList)
        self.__nbOfRetry = 2 if self.__nbOfUrls > 2 else 3  # we retry 2 times all services, if we run more than 2 services
        if len(urlsList) == len(self.__bannedUrls):
            self.__bannedUrls = []  # retry all urls
            gLogger.debug("Retrying again all URLs")

        if len(self.__bannedUrls) > 0 and len(urlsList) > 1:
            # we have host which is not accessible. We remove that host from the list.
            # We only remove if we have more than one instance
            for i in self.__bannedUrls:
                gLogger.debug("Removing banned URL", "%s" % i)
                urlsList.remove(i)

        randUrls = List.randomize(urlsList)
        sURL = randUrls[0]

        if len(
                self.__bannedUrls
        ) > 0 and self.__nbOfUrls > 2:  # when we have multiple services then we can have a situation
            # when two service are running on the same machine with different port...

            retVal = Network.splitURL(sURL)
            nexturl = None
            if retVal['OK']:
                nexturl = retVal['Value']

                found = False
                for i in self.__bannedUrls:
                    retVal = Network.splitURL(i)
                    if retVal['OK']:
                        bannedurl = retVal['Value']
                    else:
                        break

                    if nexturl[1] == bannedurl[1]:
                        found = True
                        break
                if found:
                    nexturl = self.__selectUrl(nexturl, randUrls[1:])
                    if nexturl:  # an url found which is in different host
                        sURL = nexturl
        gLogger.debug("Discovering URL for service",
                      "%s -> %s" % (self._destinationSrv, sURL))
        return S_OK(sURL)
Ejemplo n.º 28
0
        path = "/".join( self._destinationSrv.split( "/" )[3:] )
        finalURL = "%s/%s" % ( gatewayURL, path )
        gLogger.debug( "Gateway URL conversion:\n %s -> %s" % ( self._destinationSrv, finalURL ) )
        return S_OK( finalURL )

    if gatewayURL:
      gLogger.debug( "Using gateway", gatewayURL )
      return S_OK( "%s/%s" % ( gatewayURL, self._destinationSrv ) )

    try:
      urls = getServiceURL( self._destinationSrv, setup = self.setup )
    except Exception, e:
      return S_ERROR( "Cannot get URL for %s in setup %s: %s" % ( self._destinationSrv, self.setup, str( e ) ) )
    if not urls:
      return S_ERROR( "URL for service %s not found" % self._destinationSrv )
    sURL = List.randomize( List.fromChar( urls, "," ) )[0]
    gLogger.debug( "Discovering URL for service", "%s -> %s" % ( self._destinationSrv, sURL ) )
    return S_OK( sURL )

  def __checkThreadID( self ):
    if not self.__initStatus[ 'OK' ]:
      return self.__initStatus
    cThID = thread.get_ident()
    if not self.__allowedThreadID:
      self.__allowedThreadID = cThID
    elif cThID != self.__allowedThreadID :
      msgTxt = """
=======DISET client thread safety error========================
Client %s
can only run on thread %s
and this is thread %s
Ejemplo n.º 29
0
  def execute(self):
    """ Main execution function.
    """
    #Have to work out if the module is part of the last step i.e.
    #user jobs can have any number of steps and we only want
    #to run the finalization once. Not a problem if this is not the last step so return S_OK()
    resultLS = self.isLastStep()
    if not resultLS['OK']:
      return S_OK()

    self.logWorkingDirectory()

    resultIV = self.resolveInputVariables()
    if not resultIV['OK']:
      self.log.error("Failed to resolve input parameters:", resultIV['Message'])
      return resultIV

    self.log.info('Initializing %s' % self.version)
    if not self.workflowStatus['OK'] or not self.stepStatus['OK']:
      self.log.verbose('Workflow status = %s, step status = %s' % (self.workflowStatus['OK'],
                                                                   self.stepStatus['OK']))
      return S_OK('No output data upload attempted')

    if not self.userOutputData:
      self.log.info('No user output data is specified for this job, nothing to do')
      return S_OK('No output data to upload')

    #Determine the final list of possible output files for the
    #workflow and all the parameters needed to upload them.
    outputList = self.getOutputList()

    userOutputLFNs = []
    if self.userOutputData:
      resultOLfn = self.constructOutputLFNs()
      if not resultOLfn['OK']:
        self.log.error('Could not create user LFNs', resultOLfn['Message'])
        return resultOLfn
      userOutputLFNs = resultOLfn['Value']

    self.log.verbose('Calling getCandidateFiles( %s, %s, %s)' % (outputList, userOutputLFNs, self.outputDataFileMask))
    self.log.debug("IgnoreAppErrors? '%s' " % self.ignoreapperrors)
    resultCF = self.getCandidateFiles(outputList, userOutputLFNs, self.outputDataFileMask)
    if not resultCF['OK']:
      if not self.ignoreapperrors:
        self.log.error(resultCF['Message'])
        self.setApplicationStatus(resultCF['Message'])
        return S_OK()
    fileDict = resultCF['Value']

    resultFMD = self.getFileMetadata(fileDict)
    if not resultFMD['OK']:
      if not self.ignoreapperrors:
        self.log.error(resultFMD['Message'])
        self.setApplicationStatus(resultFMD['Message'])
        return S_OK()

    if not resultFMD['Value']:
      if not self.ignoreapperrors:
        self.log.info('No output data files were determined to be uploaded for this workflow')
        self.setApplicationStatus('No Output Data Files To Upload')
        return S_OK()

    fileMetadata = resultFMD['Value']

    #First get the local (or assigned) SE to try first for upload and others in random fashion
    resultSEL = getDestinationSEList('Tier1-USER', DIRAC.siteName(), outputmode='local')
    if not resultSEL['OK']:
      self.log.error('Could not resolve output data SE', resultSEL['Message'])
      self.setApplicationStatus('Failed To Resolve OutputSE')
      return resultSEL
    localSE = resultSEL['Value']

    orderedSEs = [ se for se in self.defaultOutputSE if se not in localSE and se not in self.userOutputSE]

    orderedSEs = localSE + List.randomize(orderedSEs)
    if self.userOutputSE:
      prependSEs = []
      for userSE in self.userOutputSE:
        if userSE not in orderedSEs:
          prependSEs.append(userSE)
      orderedSEs = prependSEs + orderedSEs

    self.log.info('Ordered list of output SEs is: %s' % (', '.join(orderedSEs)))
    final = {}
    for fileName, metadata in fileMetadata.iteritems():
      final[fileName] = metadata
      final[fileName]['resolvedSE'] = orderedSEs

    #At this point can exit and see exactly what the module will upload
    self.printOutputInfo(final)
    if not self.enable:
      return S_OK('Module is disabled by control flag')

    self.injectJobIndex( final )

    #Instantiate the failover transfer client with the global request object
    failoverTransfer = FailoverTransfer(self._getRequestContainer())

    #One by one upload the files with failover if necessary
    filesToReplicate = {}
    filesToFailover = {}
    filesUploaded = []
    if not self.failoverTest:
      self.transferAndRegisterFiles(final, failoverTransfer, filesToFailover, filesUploaded, filesToReplicate)
    else:
      filesToFailover = final

    ##if there are files to be failovered, we do it now
    resultTRFF = self.transferRegisterAndFailoverFiles(failoverTransfer, filesToFailover, filesUploaded)
    cleanUp = resultTRFF['Value']['cleanUp']

    #For files correctly uploaded must report LFNs to job parameters
    if filesUploaded:
      report = ', '.join( filesUploaded )
      self.jobReport.setJobParameter( 'UploadedOutputData', report )

    self.workflow_commons['Request'] = failoverTransfer.request

    #If some or all of the files failed to be saved to failover
    if cleanUp:
      #Leave any uploaded files just in case it is useful for the user
      #do not try to replicate any files.
      return S_ERROR('Failed To Upload Output Data')

    #If there is now at least one replica for uploaded files can trigger replication
    datMan = DataManager( catalogs = self.userFileCatalog )
    self.log.info('Sleeping for 10 seconds before attempting replication of recently uploaded files')
    time.sleep(10)
    for lfn, repSE in filesToReplicate.items():
      resultRAR = datMan.replicateAndRegister(lfn, repSE)
      if not resultRAR['OK']:
        self.log.info('Replication failed with below error but file already exists in Grid storage with \
        at least one replica:\n%s' % (resultRAR))

    self.generateFailoverFile()

    self.setApplicationStatus('Job Finished Successfully')
    return S_OK('Output data uploaded')
Ejemplo n.º 30
0
def main():

    from DIRAC.Core.Base import Script

    Script.registerSwitch("S:", "simtelConfig=", "SimtelConfig", setConfig)
    Script.registerSwitch("V:", "version=", "Version", setVersion)

    from DIRAC.Resources.Catalog.FileCatalogClient import FileCatalogClient
    from DIRAC.Resources.Catalog.FileCatalog import FileCatalog

    Script.parseCommandLine()
    DIRAC.gLogger.setLevel('INFO')

    global fcc, fcL

    from CTADIRAC.Core.Utilities.SoftwareInstallation import checkSoftwarePackage
    from CTADIRAC.Core.Utilities.SoftwareInstallation import installSoftwarePackage
    from CTADIRAC.Core.Utilities.SoftwareInstallation import installSoftwareEnviron
    from CTADIRAC.Core.Utilities.SoftwareInstallation import localArea
    from CTADIRAC.Core.Utilities.SoftwareInstallation import sharedArea
    from CTADIRAC.Core.Utilities.SoftwareInstallation import workingArea
    from DIRAC.Core.Utilities.Subprocess import systemCall
    from DIRAC.WorkloadManagementSystem.Client.JobReport import JobReport

    global jobID
    jobID = os.environ['JOBID']
    jobReport = JobReport(int(jobID))

    ###########
    ## Checking MD coherence
    fc = FileCatalog('LcgFileCatalog')
    res = fc._getCatalogConfigDetails('DIRACFileCatalog')
    print 'DFC CatalogConfigDetails:', res
    res = fc._getCatalogConfigDetails('LcgFileCatalog')
    print 'LCG CatalogConfigDetails:', res

    fcc = FileCatalogClient()
    fcL = FileCatalog('LcgFileCatalog')

    from DIRAC.Interfaces.API.Dirac import Dirac
    dirac = Dirac()
    ############################

    install_CorsikaSimtelPack(version)
    #############
    # simtelConfigFile should be built from ???
    #simtelConfigFilesPath = 'sim_telarray/multi'
    #simtelConfigFile = simtelConfigFilesPath + '/multi_cta-ultra5.cfg'
    #createGlobalsFromConfigFiles(simtelConfigFile)
    #createGlobalsFromConfigFiles(current_version)
    #######################
    ## files spread in 1000-runs subDirectories

    global corsikaFileLFN
    corsikaFileLFN = dirac.getJobJDL(jobID)['Value']['InputData']
    print 'corsikaFileLFN is ' + corsikaFileLFN
    corsikaFileName = os.path.basename(corsikaFileLFN)
    run_number = corsikaFileName.split('run')[1].split('.corsika.gz')[
        0]  # run001412.corsika.gz

    runNum = int(run_number)
    subRunNumber = '%03d' % runNum
    runNumModMille = runNum % 1000
    runNumTrunc = (runNum - runNumModMille) / 1000
    runNumSeriesDir = '%03dxxx' % runNumTrunc
    print 'runNumSeriesDir=', runNumSeriesDir

    f = open('DISABLE_WATCHDOG_CPU_WALLCLOCK_CHECK', 'w')
    f.close()

    ############ Producing SimTel File
    ######################Building simtel Directory Metadata #######################

    cfg_dict = {
        "4MSST": 'cta-prod2-4m-dc',
        "SCSST": 'cta-prod2-sc-sst',
        "STD": 'cta-prod2',
        "NSBX3": 'cta-prod2',
        "ASTRI": 'cta-prod2-astri',
        "SCMST": 'cta-prod2-sc3',
        "NORTH": 'cta-prod2n'
    }

    if simtelConfig == "6INROW":
        all_configs = ["4MSST", "SCSST", "ASTRI", "NSBX3", "STD", "SCMST"]
    elif simtelConfig == "5INROW":
        all_configs = ["4MSST", "SCSST", "ASTRI", "NSBX3", "STD"]
    elif simtelConfig == "3INROW":
        all_configs = ["SCSST", "STD", "SCMST"]
    else:
        all_configs = [simtelConfig]

    for current_conf in all_configs:

        DIRAC.gLogger.notice('current conf is', current_conf)

        if current_conf == "SCMST":
            current_version = version + '_sc3'
            DIRAC.gLogger.notice('current version is', current_version)
            if os.path.isdir('sim_telarray'):
                DIRAC.gLogger.notice(
                    'Package found in the local area. Removing package...')
                cmd = 'rm -R sim_telarray corsika-6990 hessioxxx corsika-run'
                if (os.system(cmd)):
                    DIRAC.exit(-1)
                install_CorsikaSimtelPack(current_version)
        else:
            current_version = version
            DIRAC.gLogger.notice('current version is', current_version)

########################################################

        createGlobalsFromConfigFiles(current_version)

        resultCreateSimtelDirMD = createSimtelFileSystAndMD(
            current_conf, current_version)
        if not resultCreateSimtelDirMD['OK']:
            DIRAC.gLogger.error('Failed to create simtelArray Directory MD')
            jobReport.setApplicationStatus(
                'Failed to create simtelArray Directory MD')
            DIRAC.gLogger.error(
                'Metadata coherence problem, no simtelArray File produced')
            DIRAC.exit(-1)
        else:
            print 'simtel Directory MD successfully created'

############## introduce file existence check here ########################
        simtelFileName = particle + '_' + str(thetaP) + '_' + str(
            phiP) + '_alt' + str(
                obslev) + '_' + 'run' + run_number + '.simtel.gz'
        simtelDirPath_conf = simtelDirPath + '_' + current_conf
        simtelOutFileDir = os.path.join(simtelDirPath_conf, 'Data',
                                        runNumSeriesDir)
        simtelOutFileLFN = os.path.join(simtelOutFileDir, simtelFileName)

        res = CheckCatalogCoherence(simtelOutFileLFN)
        if res == DIRAC.S_OK:
            DIRAC.gLogger.notice('Current conf already done', current_conf)
            continue

#### execute simtelarray ################
        fd = open('run_sim.sh', 'w')
        fd.write("""#! /bin/sh  
  export SVNPROD2=$PWD
  export SVNTAG=SVN-PROD2
  export CORSIKA_IO_BUFFER=800MB
  ./grid_prod2-repro.sh %s %s""" % (corsikaFileName, current_conf))
        fd.close()

        os.system('chmod u+x grid_prod2-repro.sh')
        os.system('chmod u+x run_sim.sh')
        cmdTuple = ['./run_sim.sh']
        ret = systemCall(0, cmdTuple, sendOutputSimTel)
        simtelReturnCode, stdout, stderr = ret['Value']

        if (os.system('grep Broken simtel.log')):
            DIRAC.gLogger.notice('not broken')
        else:
            DIRAC.gLogger.notice('broken')
            jobReport.setApplicationStatus('Broken pipe')
            DIRAC.exit(-1)

        if not ret['OK']:
            DIRAC.gLogger.error('Failed to execute run_sim.sh')
            DIRAC.gLogger.error('run_sim.sh status is:', simtelReturnCode)
            DIRAC.exit(-1)

## putAndRegister simtel data/log/histo Output File:
        cfg = cfg_dict[current_conf]
        cmd = 'mv Data/sim_telarray/' + cfg + '/0.0deg/Data/*.simtel.gz ' + simtelFileName
        if (os.system(cmd)):
            DIRAC.exit(-1)

############################################
        simtelRunNumberSeriesDirExist = fcc.isDirectory(
            simtelOutFileDir)['Value']['Successful'][simtelOutFileDir]
        newSimtelRunFileSeriesDir = (
            simtelRunNumberSeriesDirExist != True
        )  # if new runFileSeries, will need to add new MD

        simtelLogFileName = particle + '_' + str(thetaP) + '_' + str(
            phiP) + '_alt' + str(obslev) + '_' + 'run' + run_number + '.log.gz'
        cmd = 'mv Data/sim_telarray/' + cfg + '/0.0deg/Log/*.log.gz ' + simtelLogFileName
        if (os.system(cmd)):
            DIRAC.exit(-1)
        simtelOutLogFileDir = os.path.join(simtelDirPath_conf, 'Log',
                                           runNumSeriesDir)
        simtelOutLogFileLFN = os.path.join(simtelOutLogFileDir,
                                           simtelLogFileName)

        simtelHistFileName = particle + '_' + str(thetaP) + '_' + str(
            phiP) + '_alt' + str(
                obslev) + '_' + 'run' + run_number + '.hdata.gz'
        cmd = 'mv Data/sim_telarray/' + cfg + '/0.0deg/Histograms/*.hdata.gz ' + simtelHistFileName
        if (os.system(cmd)):
            DIRAC.exit(-1)
        simtelOutHistFileDir = os.path.join(simtelDirPath_conf, 'Histograms',
                                            runNumSeriesDir)
        simtelOutHistFileLFN = os.path.join(simtelOutHistFileDir,
                                            simtelHistFileName)

        ########### quality check on Histo Missing because it needs the NSHOW #############################################
        ########## quality check on Log #############################
        cmd = 'zcat %s | grep Finished.' % simtelLogFileName
        DIRAC.gLogger.notice('Executing system call:', cmd)
        if (os.system(cmd)):
            jobReport.setApplicationStatus('Log check Failed')
            DIRAC.exit(-1)

################################################
        from DIRAC.Core.Utilities import List
        from DIRAC.ConfigurationSystem.Client.Helpers.Operations import Operations
        opsHelper = Operations()

        global seList
        seList = opsHelper.getValue('ProductionOutputs/SimtelProd', [])
        seList = List.randomize(seList)

        DIRAC.gLogger.notice('SeList is:', seList)

        #########  Upload simtel data/log/histo ##############################################
        res = upload_to_seList(simtelOutFileLFN, simtelFileName)

        if res != DIRAC.S_OK:
            DIRAC.gLogger.error('OutputData Upload Error', simtelOutFileLFN)
            jobReport.setApplicationStatus('OutputData Upload Error')
            DIRAC.exit(-1)

        res = CheckCatalogCoherence(simtelOutLogFileLFN)
        if res == DIRAC.S_OK:
            DIRAC.gLogger.notice('Log file already exists. Removing:',
                                 simtelOutLogFileLFN)
            ret = dirac.removeFile(simtelOutLogFileLFN)

        res = upload_to_seList(simtelOutLogFileLFN, simtelLogFileName)

        if res != DIRAC.S_OK:
            DIRAC.gLogger.error('Upload simtel Log Error', simtelOutLogFileLFN)
            DIRAC.gLogger.notice('Removing simtel data file:',
                                 simtelOutFileLFN)
            ret = dirac.removeFile(simtelOutFileLFN)
            jobReport.setApplicationStatus('OutputData Upload Error')
            DIRAC.exit(-1)

        res = CheckCatalogCoherence(simtelOutHistFileLFN)
        if res == DIRAC.S_OK:
            DIRAC.gLogger.notice('Histo file already exists. Removing:',
                                 simtelOutHistFileLFN)
            ret = dirac.removeFile(simtelOutHistFileLFN)

        res = upload_to_seList(simtelOutHistFileLFN, simtelHistFileName)

        if res != DIRAC.S_OK:
            DIRAC.gLogger.error('Upload simtel Histo Error',
                                simtelOutHistFileLFN)
            DIRAC.gLogger.notice('Removing simtel data file:',
                                 simtelOutFileLFN)
            ret = dirac.removeFile(simtelOutFileLFN)
            DIRAC.gLogger.notice('Removing simtel log file:',
                                 simtelOutLogFileLFN)
            ret = dirac.removeFile(simtelOutLogFileLFN)
            jobReport.setApplicationStatus('OutputData Upload Error')
            DIRAC.exit(-1)
####################################################################

        if newSimtelRunFileSeriesDir:
            insertRunFileSeriesMD(simtelOutFileDir, runNumTrunc)
            insertRunFileSeriesMD(simtelOutLogFileDir, runNumTrunc)
            insertRunFileSeriesMD(simtelOutHistFileDir, runNumTrunc)


###### simtel File level metadata ############################################
        simtelFileMD = {}
        simtelFileMD['runNumber'] = int(run_number)
        simtelFileMD['jobID'] = jobID
        simtelFileMD['simtelReturnCode'] = simtelReturnCode

        result = fcc.setMetadata(simtelOutFileLFN, simtelFileMD)
        print "result setMetadata=", result
        if not result['OK']:
            print 'ResultSetMetadata:', result['Message']

        result = fcc.setMetadata(simtelOutLogFileLFN, simtelFileMD)
        print "result setMetadata=", result
        if not result['OK']:
            print 'ResultSetMetadata:', result['Message']

        result = fcc.setMetadata(simtelOutHistFileLFN, simtelFileMD)
        print "result setMetadata=", result
        if not result['OK']:
            print 'ResultSetMetadata:', result['Message']

        result = fcc.addFileAncestors(
            {simtelOutFileLFN: {
                'Ancestors': [corsikaFileLFN]
            }})
        print 'result addFileAncestor:', result

        result = fcc.addFileAncestors(
            {simtelOutLogFileLFN: {
                'Ancestors': [corsikaFileLFN]
            }})
        print 'result addFileAncestor:', result

        result = fcc.addFileAncestors(
            {simtelOutHistFileLFN: {
                'Ancestors': [corsikaFileLFN]
            }})
        print 'result addFileAncestor:', result

        result = fcc.setMetadata(simtelOutFileLFN, simtelFileMD)
        if not result['OK']:
            print 'ResultSetMetadata:', result['Message']

    DIRAC.exit()
Ejemplo n.º 31
0
  def __findServiceURL( self ):
    if not self.__initStatus[ 'OK' ]:
      return self.__initStatus
    gatewayURL = False
    if self.KW_IGNORE_GATEWAYS not in self.kwargs or not self.kwargs[ self.KW_IGNORE_GATEWAYS ]:
      dRetVal = gConfig.getOption( "/DIRAC/Gateways/%s" % DIRAC.siteName() )
      if dRetVal[ 'OK' ]:
        rawGatewayURL = List.randomize( List.fromChar( dRetVal[ 'Value'], "," ) )[0]
        gatewayURL = "/".join( rawGatewayURL.split( "/" )[:3] )

    for protocol in gProtocolDict.keys():
      if self._destinationSrv.find( "%s://" % protocol ) == 0:
        gLogger.debug( "Already given a valid url", self._destinationSrv )
        if not gatewayURL:
          return S_OK( self._destinationSrv )
        gLogger.debug( "Reconstructing given URL to pass through gateway" )
        path = "/".join( self._destinationSrv.split( "/" )[3:] )
        finalURL = "%s/%s" % ( gatewayURL, path )
        gLogger.debug( "Gateway URL conversion:\n %s -> %s" % ( self._destinationSrv, finalURL ) )
        return S_OK( finalURL )

    if gatewayURL:
      gLogger.debug( "Using gateway", gatewayURL )
      return S_OK( "%s/%s" % ( gatewayURL, self._destinationSrv ) )

    try:
      urls = getServiceURL( self._destinationSrv, setup = self.setup )
    except Exception as e:
      return S_ERROR( "Cannot get URL for %s in setup %s: %s" % ( self._destinationSrv, self.setup, repr( e ) ) )
    if not urls:
      return S_ERROR( "URL for service %s not found" % self._destinationSrv )

    urlsList = List.fromChar( urls, "," )
    self.__nbOfUrls = len( urlsList )
    self.__nbOfRetry = 2 if self.__nbOfUrls > 2 else 3 # we retry 2 times all services, if we run more than 2 services
    if len( urlsList ) == len( self.__bannedUrls ):
      self.__bannedUrls = []  # retry all urls
      gLogger.debug( "Retrying again all URLs" )

    if len( self.__bannedUrls ) > 0 and len( urlsList ) > 1 :
      # we have host which is not accessible. We remove that host from the list.
      # We only remove if we have more than one instance
      for i in self.__bannedUrls:
        gLogger.debug( "Removing banned URL", "%s" % i )
        urlsList.remove( i )

    randUrls = List.randomize( urlsList )
    sURL = randUrls[0]

    if len( self.__bannedUrls ) > 0 and self.__nbOfUrls > 2:  # when we have multiple services then we can have a situation
      # when two service are running on the same machine with different port...

      retVal = Network.splitURL( sURL )
      nexturl = None
      if retVal['OK']:
        nexturl = retVal['Value']

        found = False
        for i in self.__bannedUrls:
          retVal = Network.splitURL( i )
          if retVal['OK']:
            bannedurl = retVal['Value']
          else:
            break

          if nexturl[1] == bannedurl[1]:
            found = True
            break
        if found:
          nexturl = self.__selectUrl( nexturl, randUrls[1:] )
          if nexturl:  # an url found which is in different host
            sURL = nexturl
    gLogger.debug( "Discovering URL for service", "%s -> %s" % ( self._destinationSrv, sURL ) )
    return S_OK( sURL )
Ejemplo n.º 32
0
  def execute(self):
    """ Main execution function.
    """
    #Have to work out if the module is part of the last step i.e. 
    #user jobs can have any number of steps and we only want 
    #to run the finalization once.
    currentStep = int(self.step_commons['STEP_NUMBER'])
    totalSteps = int(self.workflow_commons['TotalSteps'])
    if currentStep == totalSteps:
      self.lastStep = True
    else:
      self.log.verbose('Current step = %s, total steps of workflow = %s, UserJobFinalization will enable itself only \
      at the last workflow step.' % (currentStep, totalSteps))            
        
    if not self.lastStep:
      return S_OK()    
    
    result = self.resolveInputVariables()
    if not result['OK']:
      self.log.error(result['Message'])
      return result

    self.log.info('Initializing %s' % self.version)
    if not self.workflowStatus['OK'] or not self.stepStatus['OK']:
      self.log.verbose('Workflow status = %s, step status = %s' % (self.workflowStatus['OK'], 
                                                                   self.stepStatus['OK']))
      return S_OK('No output data upload attempted')
    
    if not self.userOutputData:
      self.log.info('No user output data is specified for this job, nothing to do')
      return S_OK('No output data to upload')
        
    #Determine the final list of possible output files for the
    #workflow and all the parameters needed to upload them.
    outputList = []
    for i in self.userOutputData:
      outputList.append({'outputPath' : string.upper(string.split(i, '.')[-1]),
                         'outputDataSE' : self.userOutputSE,
                         'outputFile' : os.path.basename(i)})

    userOutputLFNs = []
    if self.userOutputData:
      self.log.info('Constructing user output LFN(s) for %s' % (string.join(self.userOutputData, ', ')))
      if not self.jobID:
        self.jobID = 12345
      owner = ''
      if self.workflow_commons.has_key('Owner'):
        owner = self.workflow_commons['Owner']
      else:
        res = self.getCurrentOwner()
        if not res['OK']:
          return S_ERROR('Could not obtain owner from proxy')
        owner = res['Value']
      vo = ''
      if self.workflow_commons.has_key('VO'):
        vo = self.workflow_commons['VO']
      else:
        res = self.getCurrentVO()
        if not res['OK']:
          return S_ERROR('Could not obtain VO from proxy')
        vo = res['Value']
      
      result = constructUserLFNs(int(self.jobID), vo, owner, self.userOutputData, self.userOutputPath)
      if not result['OK']:
        self.log.error('Could not create user LFNs', result['Message'])
        return result
      userOutputLFNs = result['Value']

    self.log.verbose('Calling getCandidateFiles( %s, %s, %s)' % (outputList, userOutputLFNs, self.outputDataFileMask))
    result = self.getCandidateFiles(outputList, userOutputLFNs, self.outputDataFileMask)
    if not result['OK']:
      if not self.ignoreapperrors:
        self.setApplicationStatus(result['Message'])
        return S_OK()
    
    fileDict = result['Value']      
    result = self.getFileMetadata(fileDict)
    if not result['OK']:
      if not self.ignoreapperrors:
        self.setApplicationStatus(result['Message'])
        return S_OK()

    if not result['Value']:
      if not self.ignoreapperrors:
        self.log.info('No output data files were determined to be uploaded for this workflow')
        self.setApplicationStatus('No Output Data Files To Upload')
        return S_OK()

    fileMetadata = result['Value']
    
    #First get the local (or assigned) SE to try first for upload and others in random fashion
    result = getDestinationSEList('Tier1-USER', DIRAC.siteName(), outputmode='local')
    if not result['OK']:
      self.log.error('Could not resolve output data SE', result['Message'])
      self.setApplicationStatus('Failed To Resolve OutputSE')
      return result      
    
    localSE = result['Value']
    self.log.verbose('Site Local SE for user outputs is: %s' % (localSE))
    orderedSEs = self.defaultOutputSE  
    for se in localSE:
      if se in orderedSEs:
        orderedSEs.remove(se)
    for se in self.userOutputSE:
      if se in orderedSEs:
        orderedSEs.remove(se)  

    orderedSEs = localSE + List.randomize(orderedSEs)    
    if self.userOutputSE:
      prependSEs = []
      for userSE in self.userOutputSE:
        if not userSE in orderedSEs:
          prependSEs.append(userSE)
      orderedSEs = prependSEs + orderedSEs
    
    self.log.info('Ordered list of output SEs is: %s' % (string.join(orderedSEs, ', ')))    
    final = {}
    for fileName, metadata in fileMetadata.items():
      final[fileName] = metadata
      final[fileName]['resolvedSE'] = orderedSEs

    #At this point can exit and see exactly what the module will upload
    if not self.enable:
      self.log.info('Module is disabled by control flag, would have attempted \
      to upload the following files %s' % string.join(final.keys(), ', '))
      for fileName, metadata in final.items():
        self.log.info('--------%s--------' % fileName)
        for n, v in metadata.items():
          self.log.info('%s = %s' %(n, v))

      return S_OK('Module is disabled by control flag')

    #Instantiate the failover transfer client with the global request object
    failoverTransfer = FailoverTransfer(self.request)

    #One by one upload the files with failover if necessary
    replication = {}
    failover = {}
    uploaded = []
    if not self.failoverTest:
      for fileName, metadata in final.items():
        self.log.info("Attempting to store file %s to the following SE(s):\n%s" % (fileName, 
                                                                                   string.join(metadata['resolvedSE'], 
                                                                                               ', ')))
        result = failoverTransfer.transferAndRegisterFile(fileName, metadata['localpath'], metadata['lfn'],
                                                          metadata['resolvedSE'], fileGUID = metadata['guid'], 
                                                          fileCatalog = self.userFileCatalog)
        if not result['OK']:
          self.log.error('Could not transfer and register %s with metadata:\n %s' % (fileName, metadata))
          failover[fileName] = metadata
        else:
          #Only attempt replication after successful upload
          lfn = metadata['lfn']
          uploaded.append(lfn)          
          seList = metadata['resolvedSE']
          replicateSE = ''
          if result['Value'].has_key('uploadedSE'):
            uploadedSE = result['Value']['uploadedSE']            
            for se in seList:
              if not se == uploadedSE:
                replicateSE = se
                break
          
          if replicateSE and lfn:
            self.log.info('Will attempt to replicate %s to %s' % (lfn, replicateSE))    
            replication[lfn] = replicateSE            
    else:
      failover = final

    cleanUp = False
    for fileName, metadata in failover.items():
      random.shuffle(self.failoverSEs)
      targetSE = metadata['resolvedSE'][0]
      metadata['resolvedSE'] = self.failoverSEs
      result = failoverTransfer.transferAndRegisterFileFailover(fileName, metadata['localpath'], metadata['lfn'],
                                                                targetSE, metadata['resolvedSE'], 
                                                                fileGUID = metadata['guid'], 
                                                                fileCatalog = self.userFileCatalog)
      if not result['OK']:
        self.log.error('Could not transfer and register %s with metadata:\n %s' % (fileName, metadata))
        cleanUp = True
        continue #for users can continue even if one completely fails
      else:
        lfn = metadata['lfn']
        uploaded.append(lfn)

    #For files correctly uploaded must report LFNs to job parameters
    if uploaded:
      report = string.join( uploaded, ', ' )
      self.jobReport.setJobParameter( 'UploadedOutputData', report )

    #Now after all operations, retrieve potentially modified request object
    result = failoverTransfer.getRequestObject()
    if not result['OK']:
      self.log.error(result)
      return S_ERROR('Could Not Retrieve Modified Request')

    self.request = result['Value']

    #If some or all of the files failed to be saved to failover
    if cleanUp:
      self.workflow_commons['Request'] = self.request
      #Leave any uploaded files just in case it is useful for the user
      #do not try to replicate any files.
      return S_ERROR('Failed To Upload Output Data')
    
    #If there is now at least one replica for uploaded files can trigger replication
    rm = ReplicaManager()
    self.log.info('Sleeping for 10 seconds before attempting replication of recently uploaded files')
    time.sleep(10)
    for lfn, repSE in replication.items():
      result = rm.replicateAndRegister(lfn, repSE, catalog = self.userFileCatalog)
      if not result['OK']:
        self.log.info('Replication failed with below error but file already exists in Grid storage with \
        at least one replica:\n%s' % (result))

    self.workflow_commons['Request'] = self.request
    
    #Now must ensure if any pending requests are generated that these are propagated to the job wrapper
    reportRequest = None
    if self.jobReport:
      result = self.jobReport.generateRequest()
      if not result['OK']:
        self.log.warn('Could not generate request for job report with result:\n%s' % (result))
      else:
        reportRequest = result['Value']
    if reportRequest:
      self.log.info('Populating request with job report information')
      self.request.update(reportRequest)
    
    if not self.request.isEmpty()['Value']:
      request_string = self.request.toXML()['Value']
      # Write out the request string
      fname = 'user_job_%s_request.xml' % (self.jobID)
      xmlfile = open(fname, 'w')
      xmlfile.write(request_string)
      xmlfile.close()
      self.log.info('Creating failover request for deferred operations for job %s:' % self.jobID)
      result = self.request.getDigest()
      if result['OK']:
        digest = result['Value']
        self.log.info(digest)
    
    self.setApplicationStatus('Job Finished Successfully')
    return S_OK('Output data uploaded')
Ejemplo n.º 33
0
    
    urls = List.fromChar( urls, "," )
    self.__nbOfUrls = len( urls )
    self.__nbOfRetry = 2 if self.__nbOfUrls > 2 else 3 # we retry 2 times all services, if we run more than 2 services
    if len( urls ) == len( self.__bannedUrls ):
      self.__bannedUrls = []  # retry all urls
      gLogger.debug( "Retrying again all URLs" )      
      
    if len( self.__bannedUrls ) > 0 and len( urls ) > 1 :
      # we have host which is not accessible. We remove that host from the list.
      # We only remove if we have more than one instance
      for i in self.__bannedUrls:
        gLogger.debug( "Removing banned URL", "%s" % i )
        urls.remove( i )
        
    sURL = List.randomize( urls )[0]

    gLogger.debug( "Discovering URL for service", "%s -> %s" % ( self._destinationSrv, sURL ) )
    return S_OK( sURL )

  def __checkThreadID( self ):
    if not self.__initStatus[ 'OK' ]:
      return self.__initStatus
    cThID = thread.get_ident()
    if not self.__allowedThreadID:
      self.__allowedThreadID = cThID
    elif cThID != self.__allowedThreadID :
      msgTxt = """
=======DISET client thread safety error========================
Client %s
can only run on thread %s
Ejemplo n.º 34
0
  def __findServiceURL(self):
    """
        Discovers the URL of a service, taking into account gateways, multiple URLs, banned URLs


        If the site on which we run is configured to use gateways (/DIRAC/Gateways/<siteName>),
        these URLs will be used. To ignore the gateway, it is possible to set KW_IGNORE_GATEWAYS
        to False in kwargs.

        If self._destinationSrv (given as constructor attribute) is a properly formed URL,
        we just return this one. If we have to use a gateway, we just replace the server name in the url.

        The list of URLs defined in the CS (<System>/URLs/<Component>) is randomized

        This method also sets some attributes:
          * self.__nbOfUrls = number of URLs
          * self.__nbOfRetry = 2 if we have more than 2 urls, otherwise 3
          * self.__bannedUrls is reinitialized if all the URLs are banned

        :return: the selected URL

    """
    if not self.__initStatus['OK']:
      return self.__initStatus

    # Load the Gateways URLs for the current site Name
    gatewayURL = False
    if self.KW_IGNORE_GATEWAYS not in self.kwargs or not self.kwargs[self.KW_IGNORE_GATEWAYS]:
      dRetVal = gConfig.getOption("/DIRAC/Gateways/%s" % DIRAC.siteName())
      if dRetVal['OK']:
        rawGatewayURL = List.randomize(List.fromChar(dRetVal['Value'], ","))[0]
        gatewayURL = "/".join(rawGatewayURL.split("/")[:3])

    # If what was given as constructor attribute is a properly formed URL,
    # we just return this one.
    # If we have to use a gateway, we just replace the server name in it
    for protocol in gProtocolDict:
      if self._destinationSrv.find("%s://" % protocol) == 0:
        gLogger.debug("Already given a valid url", self._destinationSrv)
        if not gatewayURL:
          return S_OK(self._destinationSrv)
        gLogger.debug("Reconstructing given URL to pass through gateway")
        path = "/".join(self._destinationSrv.split("/")[3:])
        finalURL = "%s/%s" % (gatewayURL, path)
        gLogger.debug("Gateway URL conversion:\n %s -> %s" % (self._destinationSrv, finalURL))
        return S_OK(finalURL)

    if gatewayURL:
      gLogger.debug("Using gateway", gatewayURL)
      return S_OK("%s/%s" % (gatewayURL, self._destinationSrv))

    # We extract the list of URLs from the CS (System/URLs/Component)
    try:
      urls = getServiceURL(self._destinationSrv, setup=self.setup)
    except Exception as e:
      return S_ERROR("Cannot get URL for %s in setup %s: %s" % (self._destinationSrv, self.setup, repr(e)))
    if not urls:
      return S_ERROR("URL for service %s not found" % self._destinationSrv)

    failoverUrls = []
    # Try if there are some failover URLs to use as last resort
    try:
      failoverUrlsStr = getServiceFailoverURL(self._destinationSrv, setup=self.setup)
      if failoverUrlsStr:
        failoverUrls = failoverUrlsStr.split(',')
    except Exception as e:
      pass

    # We randomize the list, and add at the end the failover URLs (System/FailoverURLs/Component)
    urlsList = List.randomize(List.fromChar(urls, ",")) + failoverUrls
    self.__nbOfUrls = len(urlsList)
    self.__nbOfRetry = 2 if self.__nbOfUrls > 2 else 3  # we retry 2 times all services, if we run more than 2 services
    if self.__nbOfUrls == len(self.__bannedUrls):
      self.__bannedUrls = []  # retry all urls
      gLogger.debug("Retrying again all URLs")

    if len(self.__bannedUrls) > 0 and len(urlsList) > 1:
      # we have host which is not accessible. We remove that host from the list.
      # We only remove if we have more than one instance
      for i in self.__bannedUrls:
        gLogger.debug("Removing banned URL", "%s" % i)
        urlsList.remove(i)

    # Take the first URL from the list
    #randUrls = List.randomize( urlsList ) + failoverUrls

    sURL = urlsList[0]

    # If we have banned URLs, and several URLs at disposals, we make sure that the selected sURL
    # is not on a host which is banned. If it is, we take the next one in the list using __selectUrl
    # If we have banned URLs, and several URLs at disposals, we make sure that the selected sURL
    # is not on a host which is banned. If it is, we take the next one in the list using __selectUrl

    if len(self.__bannedUrls) > 0 and self.__nbOfUrls > 2:  # when we have multiple services then we can
      # have a situation when two services are running on the same machine with different ports...
      retVal = Network.splitURL(sURL)
      nexturl = None
      if retVal['OK']:
        nexturl = retVal['Value']

        found = False
        for i in self.__bannedUrls:
          retVal = Network.splitURL(i)
          if retVal['OK']:
            bannedurl = retVal['Value']
          else:
            break
          # We found a banned URL on the same host as the one we are running on
          if nexturl[1] == bannedurl[1]:
            found = True
            break
        if found:
          nexturl = self.__selectUrl(nexturl, urlsList[1:])
          if nexturl:  # an url found which is in different host
            sURL = nexturl
    gLogger.debug("Discovering URL for service", "%s -> %s" % (self._destinationSrv, sURL))
    return S_OK(sURL)
Ejemplo n.º 35
0
    def _prepareJDL(self, taskQueueDict, workingDirectory, pilotOptions,
                    pilotsToSubmit, ceMask, submitPrivatePilot, privateTQ):
        """
      Write JDL for Pilot Submission
    """
        # RB = List.randomize( self.resourceBrokers )[0]
        LDs = []
        NSs = []
        LBs = []
        # Select Randomly one RB from the list
        RB = List.randomize(self.resourceBrokers)[0]
        LDs.append('"%s:9002"' % RB)
        LBs.append('"%s:9000"' % RB)

        for LB in self.loggingServers:
            NSs.append('"%s:7772"' % LB)

        LD = ', '.join(LDs)
        NS = ', '.join(NSs)
        LB = ', '.join(LBs)

        vo = getVO()
        if privateTQ or vo not in ['lhcb']:
            extraReq = "True"
        else:
            if submitPrivatePilot:
                extraReq = "! AllowsGenericPilot"
            else:
                extraReq = "AllowsGenericPilot"

        rbJDL = """
AllowsGenericPilot = Member( "VO-lhcb-pilot" , other.GlueHostApplicationSoftwareRunTimeEnvironment );
Requirements = pilotRequirements && other.GlueCEStateStatus == "Production" && %s;
RetryCount = 0;
ErrorStorage = "%s/pilotError";
OutputStorage = "%s/pilotOutput";
# ListenerPort = 44000;
ListenerStorage = "%s/Storage";
VirtualOrganisation = "lhcb";
LoggingTimeout = 30;
LoggingSyncTimeout = 30;
LoggingDestination = { %s };
# Default NS logger level is set to 0 (null)
# max value is 6 (very ugly)
NSLoggerLevel = 0;
DefaultLogInfoLevel = 0;
DefaultStatusLevel = 0;
NSAddresses = { %s };
LBAddresses = { %s };
MyProxyServer = "no-myproxy.cern.ch";
""" % (extraReq, workingDirectory, workingDirectory, workingDirectory, LD, NS,
        LB)

        pilotJDL, pilotRequirements = self._JobJDL(taskQueueDict, pilotOptions,
                                                   ceMask)

        jdl = os.path.join(workingDirectory,
                           '%s.jdl' % taskQueueDict['TaskQueueID'])
        jdl = self._writeJDL(jdl, [pilotJDL, rbJDL])

        return {
            'JDL': jdl,
            'Requirements': pilotRequirements + " && " + extraReq,
            'Pilots': pilotsToSubmit,
            'RB': RB
        }
Ejemplo n.º 36
0
  def _prepareJDL( self, taskQueueDict, workingDirectory, pilotOptions,
                   pilotsToSubmit, ceMask, submitPrivatePilot, privateTQ ):
    """
      Write JDL for Pilot Submission
    """
    rbList = []
    # Select Randomly one RB from the list
    rb = List.randomize( self.resourceBrokers )[0]
    rbList.append( '"https://%s:7443/glite_wms_wmproxy_server"' % rb )

    lbList = []
    for lb in self.loggingServers:
      lbList.append( '"https://%s:9000"' % lb )
    lbList = List.randomize( lbList )

    nPilots = 1
    vo = gConfig.getValue( '/DIRAC/VirtualOrganization', '' )
    if privateTQ or vo not in ['lhcb']:
      extraReq = "True"
    else:
      if submitPrivatePilot:
        extraReq = "! AllowsGenericPilot"
      else:
        extraReq = "AllowsGenericPilot"

    myProxyServer = self.myProxyServer.strip()
    if not myProxyServer:
      #Random string to avoid caching
      myProxyServer = "%s.cern.ch" % md5( str( time.time() ) ).hexdigest()[:10]

    wmsClientJDL = """
RetryCount = 0;
ShallowRetryCount = 0;
AllowsGenericPilot = Member( "VO-lhcb-pilot" , other.GlueHostApplicationSoftwareRunTimeEnvironment );
Requirements = pilotRequirements && %s;
MyProxyServer = "%s";
WmsClient = [
  ErrorStorage = "%s/pilotError";
  OutputStorage = "%s/pilotOutput";
# ListenerPort = 44000;
  ListenerStorage = "%s/Storage";
  RetryCount = 0;
  ShallowRetryCount = 0;
  WMProxyEndPoints = { %s };
  LBEndPoints = { %s };
  EnableServiceDiscovery = false;
  MyProxyServer = "%s";
  JdlDefaultAttributes =  [
    requirements  =  ( other.GlueCEStateStatus == "Production" || other.GlueCEStateStatus == "Special" );
    AllowZippedISB  =  true;
    SignificantAttributes  =  {"Requirements", "Rank", "FuzzyRank"};
    PerusalFileEnable  =  false;
  ];
];
""" % ( extraReq, myProxyServer,
        workingDirectory, workingDirectory,
        workingDirectory, ', '.join( rbList ),
        ', '.join( lbList ), myProxyServer )

    if pilotsToSubmit > 1:
      wmsClientJDL += """
JobType = "Parametric";
Parameters= %s;
ParameterStep =1;
ParameterStart = 0;
""" % pilotsToSubmit
      nPilots = pilotsToSubmit


    ( pilotJDL , pilotRequirements ) = self._JobJDL( taskQueueDict, pilotOptions, ceMask )

    jdl = os.path.join( workingDirectory, '%s.jdl' % taskQueueDict['TaskQueueID'] )
    jdl = self._writeJDL( jdl, [pilotJDL, wmsClientJDL] )

    return {'JDL':jdl, 'Requirements':pilotRequirements + " && " + extraReq, 'Pilots':nPilots, 'RB':rb }