예제 #1
0
    def _logError( self , userMsg = None , logMsg = None ):

        if userMsg :
            sm = StatusManager()
            sm.setStatus( self.work_dir , Status( code = 5 , message =  userMsg ) )
            

        if logMsg :
            rf_log.error( "%s/%s : %s" %( self._service.getName() ,
                                          self._job.getKey() ,
                                          logMsg
                                          )
                        )
예제 #2
0
 def _run(self , serviceName, xmlEnv ):
     dispatcher = _cfg.getDispatcher()
     
     execution_config = dispatcher.getExecutionConfig( self.jobState )
     try:
         exec_engine = executionLoader( execution_config =  execution_config )
     except MobyleError ,err :
         msg = "unknown execution system : %s" %err
         rc_log.critical("%s : %s" %( serviceName ,
                                      msg
                                      ), exc_info = True 
         )
         sm = StatusManager()
         sm.setStatus( self._dirPath , Status( code = 5 , message = 'Mobyle internal server error' ) )
         raise MobyleError, msg
예제 #3
0
def getStatus(jobID):
    """
    @param jobID: the url of the job
    @type jobID: string
    @return: the current status of the job
    @rtype: string
    @raise MobyleError: if the job has no number or if the job doesn't exist anymore
    @raise OSError: if the user is not the owner of the process
    """
    from Mobyle.JobState import JobState, normUri
    from urlparse import urlparse
    from Mobyle.StatusManager import StatusManager

    path = normUri(jobID)
    protocol, host, path, a, b, c = urlparse(path)
    if protocol == "http":
        raise NotImplementedError, "trying to querying a distant server"

    if path[-9:] == "index.xml":
        path = path[:-10]
    sm = StatusManager()

    oldStatus = sm.getStatus(path)
    #'killed' , 'finished' , 'error' the status cannot change anymore
    #'building' these jobs have not yet batch number

    #  ( 'finished' , 'error' , 'killed' , 'building' ):
    if not oldStatus.isQueryable():
        return oldStatus
    else:
        adm = Admin(path)
        batch = adm.getExecutionAlias()
        jobNum = adm.getNumber()

        if batch is None or jobNum is None:
            return oldStatus
        try:
            exec_engine = executionLoader(jobID=jobID)
            newStatus = exec_engine.getStatus(jobNum)
        except MobyleError, err:
            u_log.error(str(err), exc_info=True)
            raise err
        if not newStatus.isKnown():
            return oldStatus
        if newStatus != oldStatus:
            sm.setStatus(path, newStatus)
        return newStatus
 def __init__( self , execution_config  ):
     """
     @param execution_config: the configuration of the Execution 
     @type execution_config: ExecutionConfig instance
     """
     self._cfg = Config()
     self.execution_config = execution_config
     self.execution_config_alias = self._cfg.getAliasFromConfig( self.execution_config )
     self.status_manager = StatusManager()
    def testCreation(self):
        #create( filename , status )
        unknown = Status( code = -1 )
        StatusManager.create( self.jobDir , unknown )
        doc = etree.parse( self.filename )
        root = doc.getroot()
        self.assertEqual( root.tag , 'status' )
        children = list(root)
        self.assertEqual( len( children ) , 2 )
        self.assertEqual( children[0].tag , 'value')
        self.assertEqual( children[0].text , 'unknown' )
        self.assertEqual( children[1].tag , 'message')
        self.assertEqual( children[1].text , None )
        os.unlink( self.filename )

        building = Status( code = 0 )
        StatusManager.create( self.jobDir, building )
        doc = etree.parse( self.filename )
        root = doc.getroot()
        self.assertEqual( root.tag , 'status' )
        children = list(root)
        self.assertEqual( len( children ) , 2 )
        self.assertEqual( children[0].tag , 'value')
        self.assertEqual( children[0].text , 'building' )
        self.assertEqual( children[1].tag , 'message')
        self.assertEqual( children[1].text , None )
        os.unlink( self.filename )

        submitted = Status( code = 1, message= 'test message' )
        StatusManager.create( self.jobDir, submitted )
        doc = etree.parse( self.filename )
        root = doc.getroot()
        self.assertEqual( root.tag , 'status' )
        children = list(root)
        self.assertEqual( len( children ) , 2 )
        self.assertEqual( children[0].tag , 'value')
        self.assertEqual( children[0].text , 'submitted' )
        self.assertEqual( children[1].tag , 'message')
        self.assertEqual( children[1].text , 'test message' )
        os.unlink( self.filename )

        running = Status( string='running' )
        StatusManager.create( self.jobDir, running )
        doc = etree.parse( self.filename )
        root = doc.getroot()
        self.assertEqual( root.tag , 'status' )
        children = list(root)
        self.assertEqual( len( children ) , 2 )
        self.assertEqual( children[0].tag , 'value')
        self.assertEqual( children[0].text , 'running' )
        self.assertEqual( children[1].tag , 'message')
        self.assertEqual( children[1].text , None )
        os.unlink( self.filename )
    def testGetStatus( self ):
        running = Status( string='running' )
        StatusManager.create( self.jobDir, running )
        sm = StatusManager()
        recieved_status = sm.getStatus( self.jobDir )
        self.assertEqual( recieved_status , running )
        os.unlink( self.filename )

        killed = Status( string='killed' , message= "your job has been canceled" )
        StatusManager.create( self.jobDir, killed )
        sm = StatusManager()
        recieved_status = sm.getStatus( self.jobDir )
        self.assertEqual( recieved_status , killed )
        os.unlink( self.filename )
예제 #7
0
def isExecuting(jobID):
    """
    @param jobID: the url of the job
    @type jobID: string
    @return True if the job is currently executing ( submitted , running , pending , hold ).
    False otherwise ( building, finished , error , killed )
    @rtype: boolean
    @raise MobyleError: if the job has no number 
    @raise OSError: if the user is not the owner of the process
    """
    from Mobyle.JobState import normUri
    from urlparse import urlparse
    from Mobyle.StatusManager import StatusManager

    path = normUri(jobID)
    protocol, host, path, a, b, c = urlparse(path)
    if protocol == "http":
        raise NotImplementedError, "trying to querying a distant server"

    if path[-9:] == "index.xml":
        path = path[:-10]
    adm = Admin(path)
    batch = adm.getExecutionAlias()
    jobNum = adm.getNumber()

    if batch is None or jobNum is None:
        sm = StatusManager()
        status = sm.getStatus(path)
        if not status.isQueryable():
            return False
        else:
            raise MobyleError("inconsistency in .admin file %s" % path)
    try:
        execKlass = executionLoader(jobID=jobID)
        newStatus = execKlass.getStatus(jobNum)
    except MobyleError, err:
        u_log.error(str(err), exc_info=True)
        raise err
    def testConcurency(self):
        status = Status( string='submitted' )
        StatusManager.create( self.jobDir, status )

        ## sub-process start
        childPid = os.fork()
        if childPid: #father
            sleep(1)
            sm = StatusManager()
            self.assertEqual( status , sm.getStatus( self.jobDir ) )
            self.assertRaises( IOError , sm.setStatus , self.jobDir, status )
            os.kill( childPid , signal.SIGALRM )
            os.wait()

        else: #child
            signal.signal(signal.SIGALRM, handler)
            File = open( self.filename , 'r' )
            fcntl.lockf( File , fcntl.LOCK_SH | fcntl.LOCK_NB )
            signal.pause()
            fcntl.lockf( File , fcntl.LOCK_UN  )
            File.close()
            os._exit(0)
        ## sub-process end

        ## sub-process start
        childPid = os.fork()
        if childPid: #father
            sleep(1)
            sm = StatusManager()
            recieved_status = sm.getStatus( self.jobDir )
            self.assertEqual( recieved_status , Status( string= "unknown" )  )
            self.assertRaises( IOError , sm.setStatus , self.jobDir, status )
            os.kill( childPid , signal.SIGALRM )
            os.wait()

        else: #child
            signal.signal(signal.SIGALRM, handler)
            File = open( self.filename , 'r+' )
            fcntl.lockf( File , fcntl.LOCK_EX | fcntl.LOCK_NB )
            signal.pause()
            fcntl.lockf( File , fcntl.LOCK_UN  )
            File.close()
            os._exit(0)
class ExecutionSystem(object):
    """
    abstract class
    manage the status by updating the file index.xml
    """

    def __init__( self , execution_config  ):
        """
        @param execution_config: the configuration of the Execution 
        @type execution_config: ExecutionConfig instance
        """
        self._cfg = Config()
        self.execution_config = execution_config
        self.execution_config_alias = self._cfg.getAliasFromConfig( self.execution_config )
        self.status_manager = StatusManager()
    
    def run( self , commandLine , dirPath , serviceName , jobState , xmlEnv = None):
        """
        @param execution_config: the configuration of the Execution 
        @type execution_config: ExecutionConfig instance
        @param commandLine: the command to be executed
        @type commandLine: String
        @param dirPath: the absolute path to directory where the job will be executed (normaly we are already in)
        @type dirPath: String
        @param serviceName: the name of the service
        @type serviceName: string
        @param jobState:
        @type jobState: a L{JobState} instance
        """
        self.jobState = jobState
        if dirPath[-1] == '/':
            dirPath = dirPath[:-1]
        jobKey = os.path.split( dirPath )[1]
        if os.getcwd() != os.path.abspath( dirPath ):
            msg = "the child process execute itself in a wrong directory"
            self._logError( dirPath , serviceName ,jobKey,
                            userMsg = "Mobyle internal server error" ,
                            logMsg = msg  )
            raise MobyleError , msg 
        
        protectedCommandLine = ''
        for c in commandLine:
            protectedCommandLine += '\\'+ c
            
        if xmlEnv is None:
            xmlEnv = {}
            
        dispatcher = self._cfg.getDispatcher()
        queue = dispatcher.getQueue( jobState )
        adm = Admin( dirPath )
        adm.setQueue( queue )
        adm.commit()
        
        new_path = ''
        binary_path = self._cfg.binary_path()
        if binary_path :
                new_path = ":".join( binary_path )        

        if xmlEnv.has_key( 'PATH' ) :      
                new_path = "%s:%s" %( xmlEnv[ 'PATH' ] , new_path )
        if new_path :
            xmlEnv[ 'PATH' ] = "%s:%s" %( new_path , os.environ[ 'PATH' ] ) 
        else:
            xmlEnv[ 'PATH' ] = os.environ[ 'PATH' ] 
        for var in os.environ.keys():
            if var != 'PATH':
                xmlEnv[ var ] = os.environ[ var ]
        self._returncode = None
        accounting = self._cfg.accounting()
        if accounting:
            beg_time = time.time()

        ###################################
        mobyleStatus = self._run( commandLine , dirPath , serviceName , jobKey , jobState , queue , xmlEnv )
        ###################################
        
        if accounting:
            end_time = time.time()
            elapsed_time = end_time - beg_time
            a_log = getLogger( 'Mobyle.account' )
            #%d trunc time to second
            #%f for millisecond
            a_log.info("%(serviceName)s/%(jobkey)s : %(exec_class)s/%(queue)s : %(beg_time)d-%(end_time)d %(ela_time)d : %(status)s" %{ 'serviceName':serviceName ,
                                                                                                                          'jobkey':jobKey,
                                                                                                                          'exec_class':self.execution_config.execution_class_name ,
                                                                                                                          'queue': queue,
                                                                                                                          'beg_time':beg_time ,
                                                                                                                          'end_time':end_time ,
                                                                                                                          'ela_time':elapsed_time ,
                                                                                                                          'status': mobyleStatus ,
                                                                                                                          }
                                                                                                              )
        self.status_manager.setStatus( dirPath , mobyleStatus )


    def getStatus( self ,  number ):
        """
        @param execution_config: a configuration object for this execution system
        @type execution_config: an ExecutionConfig subclass instance
        @param number:
        @type number:
        @return the status of the job
        @rtype:
        abstract method. this method must be implemented in child classes
        """
        raise NotImplementedError, "Must be Implemented in child classes"

    def kill(  self , number ):
        """
        kill the Job
        @param execution_config: a configuration object for this execution system
        @type execution_config: an ExecutionConfig subclass instance
        @param number:
        @type number:
        abstract method. this method must be implemented in child classes
        """
        raise NotImplementedError, "Must be Implemented in child classes"
            



    def _logError( self , dirPath , serviceName , jobKey , userMsg = None , logMsg = None ):


        if userMsg :
            self.status_manager.setStatus( dirPath, Status( code = 5 , message = userMsg ) )

        if logMsg :
            _log.error( "%s/%s : %s" %( serviceName ,
                                        jobKey ,
                                        logMsg
                                      )
                        )
    def testSetstatus( self):
        StatusManager.create( self.jobDir, Status( string='submitted' ) )

        pending = Status( string= 'pending' )
        sm = StatusManager()
        sm.setStatus( self.jobDir , pending )
        recieved_status = sm.getStatus( self.jobDir )
        self.assertEqual( recieved_status , pending )

        finished = Status( string='finished' , message = 'your job finnished with an unusual status code, check youre results carefully')
        sm.setStatus( self.jobDir , finished )
        recieved_status = sm.getStatus( self.jobDir )
        self.assertEqual( recieved_status , finished )

        #an ended status cannot be changed anymore
        running = Status( string= 'running')
        sm.setStatus( self.jobDir , running )
        recieved_status = sm.getStatus( self.jobDir )
        self.assertNotEqual( recieved_status , running )
        self.assertEqual( recieved_status , finished )
        os.unlink( self.filename )
예제 #11
0
 def __init__(self, id=None, workflow=None, email=None, email_notify = 'auto', session=None, workflowID = None):
     """
     @param id: the identifier of this workflow (it's used to rebuild WorkflowJob using it's id)
     @type id: string 
     @param workflow: the workflow definition used to create a new job
     @type workflow: a L{Workflow} instance
     @param email: the user email address
     @type email: L{EmailAddress} instance or a string
     @param email_notify: if the user must be or not notify of the results at the end of the Job.
     the 3 authorized values for this argument are: 
       - 'true' to notify the results to the user
       - 'false' to Not notify the results to the user
       - 'auto' to notify the results based on the job elapsed time and the config  EMAIL_DELAY
     @type email_notify: string 
     @param session: the session owner of this workflow (if session is set workflowID mut be None )
     @type session: a L{Session} instance
     @param workflowID: the ID of a the workflow owner of this workflow
     @type workflowID: string
     """
     self.cfg = ConfigManager.Config()
     self.status_manager = StatusManager()
     if id:
         log.debug("accessing WorkflowJob %s" %(id))
         self.id = id
         self.jobState = JobState( id )
     else:
         log.debug("creating WorkflowJob for workflow '%s'" %(workflow.name))
         self.workflow = workflow
         if session and workflowID:
             msg = "try to instanciate a workflow with 2 owners: session %s & workflowID %s" %( session.getKey(),
                                                                                                workflowID
                                                                                               )
             log.error( msg )
             raise MobyleError( msg )
         self.session = session
         if session :
             email = session.getEmail()
             if email:
                 self.email = EmailAddress( email )
             else:
                 self.email = None
         elif email : #there is an email without session
             if  not isinstance( email , EmailAddress ):
                 self.email = EmailAddress( email )
             else:
                 self.email = email
         
         self.email_notify =  email_notify  
         if self.email_notify != 'false' and not self.email:
             raise MobyleError( "email adress must be specified when email_notify is set to %s" % email_notify )
         
         self.parameters = {}
         for parameter in self.workflow.parameters:
             # setting parameters which have a default value (important for hidden parameters which are not 
             # accessed by JobFacade...
             if not(parameter.isout) and parameter.vdef is not None:
                 self.set_value(parameter.name, value=str(parameter.vdef))
         # job is just an "environment" folder for the job
         # it contains the instanciation of the job runner which seems to be hardcoded as "command runner"...
         self._job = Job( service = self.workflow,
                          cfg = self.cfg,
                          userEmail = self.email,
                          session = self.session,
                          workflowID = workflowID ,
                          )
         self.jobState = self._job.jobState
         self.id = self._job.getURL()
예제 #12
0
class WorkflowJob(object):
    
    def __init__(self, id=None, workflow=None, email=None, email_notify = 'auto', session=None, workflowID = None):
        """
        @param id: the identifier of this workflow (it's used to rebuild WorkflowJob using it's id)
        @type id: string 
        @param workflow: the workflow definition used to create a new job
        @type workflow: a L{Workflow} instance
        @param email: the user email address
        @type email: L{EmailAddress} instance or a string
        @param email_notify: if the user must be or not notify of the results at the end of the Job.
        the 3 authorized values for this argument are: 
          - 'true' to notify the results to the user
          - 'false' to Not notify the results to the user
          - 'auto' to notify the results based on the job elapsed time and the config  EMAIL_DELAY
        @type email_notify: string 
        @param session: the session owner of this workflow (if session is set workflowID mut be None )
        @type session: a L{Session} instance
        @param workflowID: the ID of a the workflow owner of this workflow
        @type workflowID: string
        """
        self.cfg = ConfigManager.Config()
        self.status_manager = StatusManager()
        if id:
            log.debug("accessing WorkflowJob %s" %(id))
            self.id = id
            self.jobState = JobState( id )
        else:
            log.debug("creating WorkflowJob for workflow '%s'" %(workflow.name))
            self.workflow = workflow
            if session and workflowID:
                msg = "try to instanciate a workflow with 2 owners: session %s & workflowID %s" %( session.getKey(),
                                                                                                   workflowID
                                                                                                  )
                log.error( msg )
                raise MobyleError( msg )
            self.session = session
            if session :
                email = session.getEmail()
                if email:
                    self.email = EmailAddress( email )
                else:
                    self.email = None
            elif email : #there is an email without session
                if  not isinstance( email , EmailAddress ):
                    self.email = EmailAddress( email )
                else:
                    self.email = email
            
            self.email_notify =  email_notify  
            if self.email_notify != 'false' and not self.email:
                raise MobyleError( "email adress must be specified when email_notify is set to %s" % email_notify )
            
            self.parameters = {}
            for parameter in self.workflow.parameters:
                # setting parameters which have a default value (important for hidden parameters which are not 
                # accessed by JobFacade...
                if not(parameter.isout) and parameter.vdef is not None:
                    self.set_value(parameter.name, value=str(parameter.vdef))
            # job is just an "environment" folder for the job
            # it contains the instanciation of the job runner which seems to be hardcoded as "command runner"...
            self._job = Job( service = self.workflow,
                             cfg = self.cfg,
                             userEmail = self.email,
                             session = self.session,
                             workflowID = workflowID ,
                             )
            self.jobState = self._job.jobState
            self.id = self._job.getURL()
            
    def getDir(self):        
        """ returns the absolute path of the workflow job directory """
        return self.jobState.getDir()
    
    def set_status(self, status):
        log.debug("setting job %s status to %s" % (self.id, status))
        self.status_manager.setStatus( self.getDir() , status )
        
    def set_value(self, parameter_name, value=None, src=None, srcFileName=None):
        wf_parameter = [p for p in self.workflow.parameters if p.name==parameter_name][0]
        if value is not None:
            log.debug("setting %s parameter value to %s" %(parameter_name, value))
        elif src is not None:
            log.debug("copying %s parameter value from %s/%s" %(parameter_name, src,srcFileName))
        else:
            log.error("no VALUE or SOURCE URL specified for %s parameter." % parameter_name)            
        """ set a parameter value """
        self.parameters[parameter_name] = value
        self.parameters[parameter_name + '.src'] = src
        self.parameters[parameter_name + '.srcFileName'] = srcFileName
        if value and value==wf_parameter.vdef:
            log.debug("setting %s parameter value to default value %s" %(parameter_name, wf_parameter.vdef))
            return            
        # save input value in a file
        # link this file from the JobState xml
        datatype_class = wf_parameter.type.datatype.class_name
        datatype_superclass = wf_parameter.type.datatype.superclass_name
        df = DataTypeFactory()
        if (datatype_superclass in [None,""] ):
            dt = df.newDataType(datatype_class)
        else:
            dt = df.newDataType(datatype_superclass, datatype_class)
        mt = MobyleType(dt)
        p = Parameter(mt, name=parameter_name)
        p._isout = wf_parameter.isout
        if dt.isFile():
            file_name = parameter_name+'.data'
            if src:
                src = DataProvider.get(src)
            file_name, size = mt.toFile( value , self , file_name, src , srcFileName  )
            if not(wf_parameter.isout):
                self.jobState.setInputDataFile(parameter_name, (file_name, size, None))
            else:
                self.jobState.setOutputDataFile(parameter_name, [(file_name, size, None)])
        else:
            if not(wf_parameter.isout):            
                self.jobState.setInputDataValue(parameter_name, value)
            else:
                raise NotImplementedError() # so far Mobyle does not manage non-file outputs
        self.jobState.commit()
    
    def setValue(self, parameter_name, value=None, src=None, srcFileName=None):
        """MobyleJob-style set value method, called from JobFacade"""
        if type(value)==tuple:
            return self.set_value(parameter_name, value=value[1], src=value[2],srcFileName=value[3])
        else:
            return self.set_value(parameter_name, value=value, src=src,srcFileName=srcFileName)
        
    def getJobid(self):
        """MobyleJob-style get job id method, called from JobFacade"""
        return self.id

    def getDate(self):
        """MobyleJob-style get date method, called from JobFacade"""
        return time.strptime(self.get_date(),"%x  %X")
    
    def getStatus(self):
        """MobyleJob-style get status method, called from JobFacade"""
        return self.status_manager.getStatus( self.getDir() )
            
    def get_value(self, parameter_name):
        """get a parameter value"""
        return self.parameters.get(parameter_name,None)

    def get_date(self):
        """get the job date as a string"""
        return self.jobState.getDate()

    def get_id(self):
        """get the job id"""
        return self.id
    
    def run(self):
        """submit the job asynchronously"""
        self.validate()
        self.set_status(Status( code = 1 )) # status = submitted
        
        #raise a UserValueError if nb of job is over the limit accepted
        if( self.email is not None ):
            self._job.over_limit( self.email , '' )
        
        self._child_pid = os.fork()
        if self._child_pid==0:
            #Child code
            os.setsid()
            log_fd = os.open("%s/log" % self.jobState.getDir(), os.O_APPEND | os.O_WRONLY | os.O_CREAT , 0664 )  
            devnull = os.open( "/dev/null" , os.O_RDWR )
            os.dup2( devnull , sys.stdin.fileno() )
            os.close( devnull)
            os.dup2( log_fd  , sys.stdout.fileno() )
            os.dup2( log_fd  , sys.stderr.fileno() )
            os.close( log_fd )
            atexit.register( self.log , "child exit for workflow id: %s" % self.get_id())
            
            ################################################
            service = self._job.getService()
            serviceName = service.getName()
            jobKey = self._job.getKey()
             
            linkName = ( "%s/%s.%s" %( self.cfg.admindir() ,
                                       serviceName ,
                                       jobKey
                                       )
                                       )
            try:
                
                os.symlink(
                           os.path.join( self.getDir() , '.admin') ,
                           linkName
                           )
            except OSError , err:
                self.set_status(Status(string="error", message="workflow execution failed"))
                msg = "can't create symbolic link %s in ADMINDIR: %s" %( linkName , err )
                log.critical( "%s/%s : %s" %( serviceName, jobKey, msg ), exc_info = True )
                raise WorkflowJobError , msg
        
            ################################################       
            t0 = time.time()
            self.srun()
            t1 = time.time()
            ################################################
            try:
                os.unlink( linkName )
            except OSError , err:
                self.set_status(Status(string="error", message="workflow execution failed"))
                msg = "can't remove symbolic link %s in ADMINDIR: %s" %( linkName , err )
                log.critical( "%s/%s : %s" %( serviceName, jobKey, msg ), exc_info= True )
                raise WorkflowJobError , msg
            ################################################
            try:
                zipFileName = self.zip_results()
            except Exception :
                msg = "an error occured during the zipping results :\n\n"
                log.critical( "%s : %s" %( self.id , msg ) , exc_info = True)
                zipFileName = None
                
            if self.email_notify == 'auto':
                if ( t1 - t0 ) > self.cfg.email_delay() :
                    emailResults(  self.cfg ,
                                   self.email, #userEmail, 
                                   registry, 
                                   self.id, 
                                   self.getDir(), 
                                   self.workflow.getName(),
                                   self._job.getKey(),  
                                   FileName = zipFileName )
                elif self.email_notify == 'true':
                    emailResults(  self.cfg ,
                                   self.email, #userEmail, 
                                   registry, 
                                   self.id, 
                                   self.getDir(), 
                                   self.workflow.getName(),
                                   self._job.getKey(),  
                                   FileName = zipFileName )
                else:
                    pass    
            sys.exit(0) #exit with no error