コード例 #1
0
    def registerToMaster(self):
            try:
                bootmaster = StandingCall(copy_ref_with_timeout(self.master,config.BOOTSTRAP_CONTACT_TIMEOUT), 1, max_repeat=config.BOOTSTRAP_CONTACT_REPEAT)

                self.wid,file_server_ior = bootmaster.registerWorker(self.uuid,1) # ordinary workers have capacity 1

                file_server = self.program.orb.string_to_object(file_server_ior)
                
                from diane.FileTransfer import FileTransferClient
                self.ftc = FileTransferClient(file_server,self.uuid)
                import diane.workspace
                # collect and upload vcard
                vcard = self.make_vcard()
                file('vcard.txt','w').write(repr(vcard))
                self.ftc.upload('vcard.txt',os.path.join(diane.workspace.workerSubdir(self.wid),'vcard.txt'))

                #TEST - multiple registrations
                #for i in range(1000):
                #    self.wid = self.master.registerWorker(self._this())
                #logger.info('master ping %s',str(self.master.ping()))

                self.heartbeat_thread = HeartbeatThread(self,self.master)
                self.heartbeat_thread.start()
            except CORBA.TRANSIENT:
                logger.error('unable to establish connection to the master after timeout=%d seconds'%config.BOOTSTRAP_CONTACT_TIMEOUT)
                raise
            except DIANE_CORBA.XHangup:
                logger.warning('I was refused a registration (my peer_id=%s)',self.uuid)
                raise
コード例 #2
0
 def __init__(self,ior,program):
     #from diane.CORBAProgram import CORBAProgram
     #program = CORBAProgram() #config_file=omniorb_config_filename,enable_GSI=enable_GSI)
     file_server=program.orb.string_to_object(ior)
     import DIANE_CORBA, CORBA
     from diane.FileTransfer import FileTransferClient   
     file_server=file_server._narrow(DIANE_CORBA.FileTransferServer)
     self.client = FileTransferClient(file_server)
     self.opts=None
     ## MD5OPTS - BEGIN
     from diane.FileTransfer import FileTransferOptions
     self.opts = FileTransferOptions(md5_lazy=True)      
コード例 #3
0
class DIANEFileTransferClient:
    def __init__(self,ior,program):
        #from diane.CORBAProgram import CORBAProgram
        #program = CORBAProgram() #config_file=omniorb_config_filename,enable_GSI=enable_GSI)
        file_server=program.orb.string_to_object(ior)
        import DIANE_CORBA, CORBA
        from diane.FileTransfer import FileTransferClient   
        file_server=file_server._narrow(DIANE_CORBA.FileTransferServer)
        self.client = FileTransferClient(file_server)
        self.opts=None
        ## MD5OPTS - BEGIN
        from diane.FileTransfer import FileTransferOptions
        self.opts = FileTransferOptions(md5_lazy=True)      
        ## MD5OPTS - END
        
    def upload_file(self,name,dest=None,opts=None):
        if not opts:
            opts=self.opts
        return self.client.upload(name,dest,opts=opts) ## MD5OPTS
    def download_file(self,name,dest=None):
        if not dest:
            dest=name
        return self.client.download(dest,name,opts=self.opts) ## MD5OPTS
コード例 #4
0
class WorkerAgent(BaseThread):
    """ Worker Agent.
    """
    def __init__(self,program,peer,ds_enabled,labels):
        BaseThread.__init__(self,name='WorkerAgent')
        from diane.util.compatibility import uuid
        self.uuid = str(uuid())
        self.program = program
        self.ds_enabled = ds_enabled
        if ds_enabled:
            self.ds = peer
            self.master = None
        else:
            self.ds = None
            self.master = peer
        self.labels = labels
        self.heartbeat_thread = None
        self.wid = None
        self.application = None
        self.finalization_cookie = None
        self.ftc = None #file transfer client connected to the default file transfer server
        
        # if this worker is run as a Ganga job (optional) then retrive the Ganga Job UUID to make it available
        # in monitoring/vcards etc...

        self.ganga_job_uuid =  os.environ.get('GANGA_JOB_UUID', '')

        if not self.ganga_job_uuid:
            try:
                self.ganga_job_uuid = open('ganga_job_uuid', 'r').read() 
            except IOError:
                pass 


    # we allow to pickle worker agent in order to give (restricted) access to attributes from the out-of-process Worker Servlet
    def __getstate__(self):
        state = self.__dict__.copy()
        state['program'] = None
        state['ds'] = None
        state['master'] = None
        state['application'] = None
        state['heartbeat_thread'] = None
        # remove all private data member (also the ones comming from the thread base classes)
        for s in state:
            if '_Thread__' in s: # FIXME: needs to be more generic
                state[s] = None
        return state
    
    def make_vcard(self):
        import vcard
        return vcard.make_vcard(extras = {'GANGA_JOB_UUID': self.ganga_job_uuid})


    def registerToMaster(self):
            try:
                bootmaster = StandingCall(copy_ref_with_timeout(self.master,config.BOOTSTRAP_CONTACT_TIMEOUT), 1, max_repeat=config.BOOTSTRAP_CONTACT_REPEAT)

                self.wid,file_server_ior = bootmaster.registerWorker(self.uuid,1) # ordinary workers have capacity 1

                file_server = self.program.orb.string_to_object(file_server_ior)
                
                from diane.FileTransfer import FileTransferClient
                self.ftc = FileTransferClient(file_server,self.uuid)
                import diane.workspace
                # collect and upload vcard
                vcard = self.make_vcard()
                file('vcard.txt','w').write(repr(vcard))
                self.ftc.upload('vcard.txt',os.path.join(diane.workspace.workerSubdir(self.wid),'vcard.txt'))

                #TEST - multiple registrations
                #for i in range(1000):
                #    self.wid = self.master.registerWorker(self._this())
                #logger.info('master ping %s',str(self.master.ping()))

                self.heartbeat_thread = HeartbeatThread(self,self.master)
                self.heartbeat_thread.start()
            except CORBA.TRANSIENT:
                logger.error('unable to establish connection to the master after timeout=%d seconds'%config.BOOTSTRAP_CONTACT_TIMEOUT)
                raise
            except DIANE_CORBA.XHangup:
                logger.warning('I was refused a registration (my peer_id=%s)',self.uuid)
                raise
                
    def run(self):

        import MSGWrap
        
        from diane.config import log_configuration
        log_configuration(title='initial configuration')        

        msg_data = { '_worker_uuid' : self.uuid }

        try:
            self.registerToMaster()

            master = StandingCall(self.master, config.HEARTBEAT_DELAY, should_stop = self.should_stop)
            
            
            app_boot,app_init = master.get_init_data(self.uuid) #(config.HEARTBEAT_DELAY,-1,self.should_stop,self.master,'get_init_data',self.uuid)
            _boot = streamer.loads(app_boot)
            msg_data['_master_uuid'] = _boot.master_uuid
            msg_data['_runid'] = _boot.runid
            import os
            msg_data['ganga_job_uuid'] = self.ganga_job_uuid

            # FIXME: if worker restart enabled, save diane.config.__all_configs and restore it after run has finished
            MSGWrap.sendStatus('_worker_create_application_proxy_start', msg_data)
            self.application = create_application_proxy(app_boot,app_init,agent=self)
            MSGWrap.sendStatus('_worker_create_application_proxy_finish', msg_data)
            
            self.program.registerAtExitHandler(self.finalize_application)
            
            MSGWrap.sendStatus('_worker_initialize_start', msg_data)
            app_init_output = self.application.initialize(app_init)
            MSGWrap.sendStatus('_worker_initialize_finish', msg_data)

            # config may have been updated and the value of config.HEARTBEAT_DELAY may have changed -> need to create the object again
            # FIXME: use a REFERENCE to config.HEARTBEAT_DELAY
            master = StandingCall(self.master, config.HEARTBEAT_DELAY, should_stop = self.should_stop)

            master.put_init_result(self.uuid,app_init_output,0) #(config.HEARTBEAT_DELAY,-1,self.should_stop,self.master,'put_init_result',self.uuid,app_init_output,0)

            while not self.should_stop():
                time.sleep(config.PULL_REQUEST_DELAY) # PENDING: this parameter should be dynamically controlled by the master
                tid,task_data = master.get_task_data(self.uuid) #(config.HEARTBEAT_DELAY,-1,self.should_stop,self.master,'get_task_data',self.uuid)
                try:
                    msg_data['tid'] = tid
                    MSGWrap.sendStatus('_worker_do_work_start', msg_data)
                    task_result = self.application.do_work(task_data)
                    MSGWrap.sendStatus('_worker_do_work_finish', msg_data)
                    error = 0
                except diane.application.ApplicationFailure,x: # recoverable problem
                    task_result = streamer.dumps(x)
                    error = 1
                    #FIXME: reporting failure is not yet well-defined
                
                master.put_task_result(self.uuid,tid,task_result,error) #(config.HEARTBEAT_DELAY,-1,self.should_stop,self.master,'put_task_result',self.uuid,tid,task_result,error)

        except diane.application.ApplicationFailure,x: # recoverable problem but raised by the application init
            pass
        except diane.application.ApplicationCritical,x: # unrecoverable problem
            pass