Beispiel #1
0
 def processLine(self):
     """
     Choose the OPERATION through the command line
     """
     try:
         worker = threading.Thread(target=self.do_CALLBACK, )
         worker.setDaemon(True)
         worker.start()
         self._configure = Configuration()
         pool = ThreadPool(self._min_thread, self._max_thread)
         while True:
             input = sys.stdin.readline().split()
             self.logger.debug(' '.join(input))
             OPERATION = input[0].upper()
             if len(input) == 4 and OPERATION in self.methods:
                 if OPERATION in ('FINALIZE', 'INIT', 'SUBMIT', 'RECOVER'):
                     self.methods[OPERATION](self, ' '.join(input))
                 else:
                     pool.add_task(self.methods[OPERATION], self,
                                   ' '.join(input))
             else:
                 out = 'WRONG COMMAND'
                 self.message.stdout(out)
                 self.logger.debug(out)
     except Exception as err:
         self.logger.warning(str(err), exc_info=1)
Beispiel #2
0
 def _update_com(self, host):
     with self._lock :
         self.logger.debug( "_update_com function - It's looking for a communicator for the host: %s" % host )
         if not self._configure:
             self._configure = Configuration()
         '''
         if self._configure.check_update() or not self._configure.resources :
             self._configure.load()
             errors = self._configure.check()
             if errors :
                 self.logger.error ( ' '.join( errors ) )
                 raise Exception ( ' '.join( errors ) )
         '''
         #This needs to be optimized
         self._configure.load()
         errors = self._configure.check()
         if errors :
             self.logger.error ( ' '.join( errors ) )
             raise Exception ( ' '.join( errors ) )
         for resname, resdict in list(self._configure.resources.items()) :
             self.logger.debug( "    The current resource to which it's being compared is %s" % resname )
             if 'cloud_provider' in self._configure.resources[ resname ].keys():
                 continue
             elif '::' in host :
                 _resname , _ = host.split( '::' )
                 if resname != _resname :
                     continue
             elif resname != host :
                 continue
             elif resname not in self._communicator:
                 self.logger.debug( "    Since they are the same, its Communicator() will be returned")
                 self._communicator[ resname ] = self._configure.make_communicators()[resname]
             self.logger.debug( "\nCommunicator for %s:\n    communicator: %s\n    username: %s\n    frontend: %s" % (resname, resdict[ 'communicator' ], self._communicator[ resname ].username, self._communicator[ resname ].frontend) )
             return self._communicator[ resname ]
Beispiel #3
0
def run(arg):
    # El logger no funcionaba porque el import logging se hace varias veces y coge la primera. Hacemos un reload para que funcione
    # https://stackoverflow.com/questions/20240464/python-logging-file-is-not-working-when-using-logging-basicconfig
    from imp import reload
    reload(logging)
    logging.basicConfig(format='%(message)s',
                        level=logging.DEBUG if arg['--dbg'] else logging.INFO,
                        stream=sys.stdout)

    try:
        config = Configuration()
        daemon = Daemon()
        if not daemon.is_alive():
            raise Exception('DRM4G is stopped.')
        resource = Resource(config)
        if arg['edit']:
            resource.edit()
        elif arg['check']:
            resource.check_frontends()
        else:
            resource.list()
    except KeyboardInterrupt:
        pass
    except Exception as err:
        logging.error(str(err))
Beispiel #4
0
def run(arg):
    try:
        daemon = Daemon()
        if not daemon.is_alive():
            raise Exception('DRM4G is stopped.')
        config = Configuration()
        config.load()
        if config.check():
            raise Exception("Review the configuration of '%s'." %
                            (arg['<resource_name>']))
        if arg['<resource_name>'] not in config.resources:
            raise Exception("'%s' is not a configured resource." %
                            (arg['<resource_name>']))
        lrms = config.resources.get(arg['<resource_name>'])['lrms']
        communicator = config.resources.get(
            arg['<resource_name>'])['communicator']
        if lrms != 'cream' and lrms != 'rocci' and communicator == 'local':
            raise Exception("'%s' does not have an identity to configure." %
                            (arg['<resource_name>']))
        if lrms == 'cream' or lrms == 'rocci':
            comm = config.make_communicators()[arg['<resource_name>']]
            if communicator == 'op_ssh':
                #paramiko will always be used to renew the grid certificate
                config.resources.get(
                    arg['<resource_name>'])['communicator'] = 'pk_ssh'
                comm = config.make_communicators()[arg['<resource_name>']]
            proxy = Proxy(config.resources[arg['<resource_name>']], comm)
            config.resources.get(
                arg['<resource_name>'])['communicator'] = communicator
            config.make_communicators()
        if communicator != 'local':
            agent = Agent(config.resources[arg['<resource_name>']])
        if arg['init']:
            if communicator != 'local':
                agent.start()
                agent.add_key(arg['--lifetime'])
                agent.copy_key()
            if lrms == 'cream' or lrms == 'rocci':
                proxy.configure()
                proxy.create(arg['--lifetime'])
        elif arg['delete']:
            if lrms == 'cream' or lrms == 'rocci':
                proxy.destroy()
            if communicator != 'local':
                agent.delete_key()
        else:
            if communicator != 'local':
                agent.list_key()
            if lrms == 'cream' or lrms == 'rocci':
                proxy.check()
    except Exception as err:
        logger.error(str(err))
Beispiel #5
0
 def processLine(self):
     """
     Choose the OPERATION through the command line
     """
     try:
         pool = ThreadPool( self._min_thread , self._max_thread )
         self._configure = Configuration()
         while True:
             input = sys.stdin.readline().split()
             self.logger.debug(' '.join(input))
             OPERATION = input[0].upper()
             if len(input) == 6 and OPERATION in self.methods:
                 if OPERATION == 'FINALIZE' or OPERATION == 'INIT':
                     self.methods[OPERATION](self, ' '.join(input))
                 else: pool.add_task(self.methods[OPERATION], self,' '.join(input))
             else:
                 out = 'WRONG COMMAND'
                 self.message.stdout(out)
                 self.logger.debug(out)
     except Exception as err :
         self.logger.warning( str ( err ) , exc_info=1 )
Beispiel #6
0
    def do_DISCOVER(self, args, output=True):
        """
        Discovers hosts (i.e. DISCOVER - - -)
        @param args : arguments of operation
        @type args : string
        """
        OPERATION, HID, HOST, ARGS = args.split()
        try:
            self._config = Configuration()
            self._config.load()
            errors = self._config.check()
            assert not errors, ' '.join(errors)

            self._resources = self._config.make_resources()
            communicators = self._config.make_communicators()
            hosts = ""
            for resname in sorted(self._resources.keys()):
                if self._config.resources[resname]['enable'].lower(
                ) == 'false':
                    continue
                if 'cloud_provider' in self._config.resources[resname].keys():
                    continue
                try:
                    self._resources[resname][
                        'Resource'].Communicator = communicators[resname]
                    self._resources[resname]['Resource'].Communicator.connect()
                    hosts = hosts + " " + self._resources[resname][
                        'Resource'].hosts()
                    self._resources[resname]['Resource'].Communicator.close()
                except Exception as err:
                    self.logger.error(err, exc_info=1)
            out = 'DISCOVER %s SUCCESS %s' % (HID, hosts)
        except Exception as err:
            out = 'DISCOVER - FAILURE %s' % str(err)
            self.logger.error(err, exc_info=1)
        if output:
            self.message.stdout(out)
        self.logger.debug(out)
Beispiel #7
0
def run( arg ) :
    logging.basicConfig( format = '%(message)s',
                         level  = logging.DEBUG if arg[ '--dbg' ] else logging.INFO,
                         stream = sys.stdout )
    try :
        config = Configuration()
        daemon = Daemon()
        if not daemon.is_alive() :
           raise Exception( 'DRM4G is stopped.' )
        resource = Resource( config )
        if arg[ 'edit' ] :
            resource.edit()
        elif arg[ 'check' ] :
            resource.check_frontends( )
        else :
            resource.list()
    except KeyboardInterrupt :
        pass
    except Exception as err :
        logging.error( str( err ) )
Beispiel #8
0
def run(arg):
    logging.basicConfig(format='%(message)s',
                        level=logging.DEBUG if arg['--dbg'] else logging.INFO,
                        stream=sys.stdout)
    try:
        daemon = Daemon()
        if not daemon.is_alive():
            raise Exception('DRM4G is stopped.')
        config = Configuration()
        config.load()
        if config.check():
            raise Exception("Review the configuration of '%s'." %
                            (arg['<resource>']))
        if arg['<resource>'] not in config.resources:
            raise Exception("'%s' is not a configured resource." %
                            (arg['<resource>']))
        lrms = config.resources.get(arg['<resource>'])['lrms']
        communicator = config.resources.get(arg['<resource>'])['communicator']
        if lrms != 'cream' and communicator != 'ssh':
            raise Exception("'%s' does not have an identity to configure." %
                            (arg['<resource>']))
        if lrms == 'cream':
            proxy = Proxy(config.resources[arg['<resource>']],
                          config.make_communicators()[arg['<resource>']])
        if communicator == 'ssh':
            agent = Agent(config.resources[arg['<resource>']])
        if arg['init']:
            if communicator == 'ssh':
                agent.start()
                agent.add_key(arg['--lifetime'])
                agent.copy_key()
            if lrms == 'cream':
                proxy.configure()
                proxy.create(arg['--lifetime'])
        elif arg['delete']:
            if lrms == 'cream':
                proxy.destroy()
            if communicator == 'ssh':
                agent.delete_key()
        else:
            if communicator == 'ssh':
                agent.list_key()
            if lrms == 'cream':
                proxy.check()
    except KeyboardInterrupt:
        pass
    except Exception as err:
        logging.error(str(err))
Beispiel #9
0
def run( arg ) :
    try :
        config = Configuration()
        resource = Resource( config )
        if arg[ 'edit' ] :
            resource.edit()
        else :
            daemon = Daemon()
            if not daemon.is_alive() :
                raise Exception( 'DRM4G is stopped.' )

            elif arg[ 'check' ] :
                resource.check_frontends( )
            elif arg[ 'create' ] :
                resource.create_vms()
            elif arg[ 'destroy' ] :
                resource.destroy_vms( )
            elif arg[ '--all' ] :
                resource.list_resources( )
            else :
                resource.list()
    except Exception as err :
        logger.error( str( err ) )
Beispiel #10
0
class GwTmMad (object):
    """
    Transfer manager MAD

    The File Transfer Driver interfaces with Grid Data Management Services
    and is responsible for file staging, remote working directory set-up
    and remote host clean up.

    The format to send a request to the Transfer MAD, through its standard
    input, is:

    OPERATION JID TID EXE_MODE SRC_URL DST_URL

    Where:

    -OPERATION: Can be one of the following:
        -INIT: Initializes the MAD, JID should be max number of jobs.
        -START: Init transfer associated with job JID
        -END: Finish transfer associated with job JID
        -MKDIR: Creates directory SRC_URL
        -RMDIR: Removes directory SRC_URL
        -CP: start a copy of SRC_URL  to DST_URL, with identification TID,
            and associated with job JID.
        -FINALIZE: Finalizes the MAD.
    -JID: Is a job identifier, chosen by GridWay.
    -TID: Transfer identifier, only relevant for command CP.
    -EXE_MODE: If equal to 'X' file will be given execution permissions,
        only relevant for command CP.

    The format to receive a response from the MAD, through its standard
    output, is:

    OPERATION JID TID RESULT INFO

    Where:

    -OPERATION: Is the operation specified in the request that originated
        the response or CALLBACK, in the case of an asynchronous notification
        of a state change.
    -JID: It is the job identifier, as provided in the START request.
    -TID: It is the transfer identifier, as provided in the CP request.
    -RESULT: It is the result of the operation. Could be SUCCESS or FAILURE.
    -INFO: If RESULT is FAILURE, it contains the cause of failure.

    """

    logger  = logging.getLogger(__name__)
    message = Send()

    def __init__(self):
        self._max_thread   = 30
        self._min_thread   = 5
        self._lock         = threading.Lock()
        self._communicator = dict()
        self._configure    = None

    def do_INIT(self, args):
        """
        INIT: Initializes the MAD, JID should be max number of jobs.
        (i.e. INIT JID - - - -)
        @param args : arguments of operation
        @type args : string
        """
        out = 'INIT - - SUCCESS -'
        self.message.stdout( out )
        self.logger.debug( out )

    def do_START(self, args):
        """
        START: Init transfer associated with job JID.(i.e. START JID - - - -)
        @param args : arguments of operation
        @type args : string
        """
        out = 'START %s - SUCCESS -' % ( args.split( ) [ 1 ] )
        self.message.stdout( out )
        self.logger.debug( out )

    def do_END(self, args):
        """
        END: Finish transfer associated with job JID .(i.e. END JID - - - -)
        @param args : arguments of operation
        @type args : string
        """
        out = 'END %s - SUCCESS -' % ( args.split( ) [ 1 ] )
        self.message.stdout( out )
        self.logger.debug( out )

    def do_FINALIZE(self, args):
        """
        Finalizes the MAD (i.e. FINALIZE - - - - -)
        @param args : arguments of operation
        @type args : string
        """
        out = 'FINALIZE %s - SUCCESS -' % ( args.split( ) [ 1 ] )
        self.message.stdout( out )
        self.logger.debug( out )
        sys.exit( 0 )

    def do_MKDIR(self, args):
        """
        MKDIR: Creates directory SRC_URL (i.e. MKDIR JID - - SRC_URL -)
        @param args : arguments of operation
        @type args : string
        """
        OPERATION, JID, TID, EXE_MODE, SRC_URL, DST_URL = args.split()
        try:
            com = self._update_com( urlparse( SRC_URL ).host )
            com.rmDirectory( SRC_URL )
            com.mkDirectory( SRC_URL )
            out = 'MKDIR %s - SUCCESS -' % ( JID )
        except Exception as err :
            out = 'MKDIR %s - FAILURE %s' % ( JID , str( err ) )
        self.message.stdout( out )
        self.logger.debug( out , exc_info=1 )

    def do_RMDIR(self, args):
        """
        RMDIR: Removes directory SRC_URL (i.e. RMDIR JID - - SRC_URL -)
        @param args : arguments of operation
        @type args : string
        """
        OPERATION, JID, TID, EXE_MODE, SRC_URL, DST_URL = args.split()
        try:
            com = self._update_com( urlparse( SRC_URL ).host )
            com.rmDirectory( SRC_URL )
            out = 'RMDIR %s - SUCCESS -' % ( JID )
        except Exception as err :
            out = 'RMDIR %s - FAILURE %s' % ( JID , str( err ) )
        self.message.stdout( out )
        self.logger.debug( out, exc_info=1 )

    def do_CP(self, args):
        """
        CP: start a copy of SRC_URL  to DST_URL, with identification TID,
        and associated with job JID.(i.e. CP JID TID - SRC_URL DST_URL)
        @param args : arguments of operation
        @type args : string
        """
        OPERATION, JID, TID, EXE_MODE, SRC_URL, DST_URL = args.split()
        if 'file:' in SRC_URL:
            url = DST_URL
        else:
            url = SRC_URL
        try:
            com = self._update_com( urlparse( url ).host )
            com.copy( SRC_URL , DST_URL , EXE_MODE )
            out = 'CP %s %s SUCCESS -' % ( JID , TID )
        except Exception as err :
            out = 'CP %s %s FAILURE %s' % ( JID , TID , str( err ) )
        self.message.stdout( out )
        self.logger.debug(out , exc_info=1 )

    methods = {'INIT'    : do_INIT,
               'START'   : do_START,
               'END'     : do_END,
               'MKDIR'   : do_MKDIR,
               'RMDIR'   : do_RMDIR,
               'CP'      : do_CP,
               'FINALIZE': do_FINALIZE}

    def processLine(self):
        """
        Choose the OPERATION through the command line
        """
        try:
            pool = ThreadPool( self._min_thread , self._max_thread )
            self._configure = Configuration()
            while True:
                input = sys.stdin.readline().split()
                self.logger.debug(' '.join(input))
                OPERATION = input[0].upper()
                if len(input) == 6 and OPERATION in self.methods:
                    if OPERATION == 'FINALIZE' or OPERATION == 'INIT':
                        self.methods[OPERATION](self, ' '.join(input))
                    else: pool.add_task(self.methods[OPERATION], self,' '.join(input))
                else:
                    out = 'WRONG COMMAND'
                    self.message.stdout(out)
                    self.logger.debug(out)
        except Exception as err :
            self.logger.warning( str ( err ) , exc_info=1 )

    def _update_com(self, host):
        with self._lock :
            self.logger.debug( "_update_com function - It's looking for a communicator for the host: %s" % host )
            if not self._configure:
                self._configure = Configuration()
            '''
            if self._configure.check_update() or not self._configure.resources :
                self._configure.load()
                errors = self._configure.check()
                if errors :
                    self.logger.error ( ' '.join( errors ) )
                    raise Exception ( ' '.join( errors ) )
            '''
            #This needs to be optimized
            self._configure.load()
            errors = self._configure.check()
            if errors :
                self.logger.error ( ' '.join( errors ) )
                raise Exception ( ' '.join( errors ) )
            for resname, resdict in list(self._configure.resources.items()) :
                self.logger.debug( "    The current resource to which it's being compared is %s" % resname )
                if 'cloud_provider' in self._configure.resources[ resname ].keys():
                    continue
                elif '::' in host :
                    _resname , _ = host.split( '::' )
                    if resname != _resname :
                        continue
                elif resname != host :
                    continue
                elif resname not in self._communicator:
                    self.logger.debug( "    Since they are the same, its Communicator() will be returned")
                    self._communicator[ resname ] = self._configure.make_communicators()[resname]
                self.logger.debug( "\nCommunicator for %s:\n    communicator: %s\n    username: %s\n    frontend: %s" % (resname, resdict[ 'communicator' ], self._communicator[ resname ].username, self._communicator[ resname ].frontend) )
                return self._communicator[ resname ]
Beispiel #11
0
class GwEmMad(object):
    """
    Execution manager MAD

    GridWay uses a Middleware Access Driver (MAD) module to submit,
    control and monitor the execution of jobs.

    The format to send a request to the Execution MAD, through its
    standard input, is:
    OPERATION JID HOST/JM RSL

    Where:

    -OPERATION: Can be one of the following:
        -INIT: Initializes the MAD (i.e. INIT - - -).
        -SUBMIT: Submits a job(i.e. SUBMIT JID HOST/JM RSL).
        -POLL: Polls a job to obtain its state (i.e. POLL JID - -).
    -CANCEL: Cancels a job (i.e. CANCEL JID - -).
    -FINALIZE:Finalizes the MAD (i.e. FINALIZE - - -).
    -JID: Is a job identifier, chosen by GridWay.
    -HOST: If the operation is SUBMIT, it specifies the resource contact
        to submit the job. Otherwise it is ignored.
    -JM: If the operation is SUBMIT, it specifies the job manager to submit
        the job. Otherwise it is ignored.
    -RSL: If the operation is SUBMIT, it specifies the resource specification
        to submit the job. Otherwise it is ignored.

    The format to receive a response from the MAD, through its standard output, is:

    OPERATION JID RESULT INFO

         Where:

    -OPERATION: Is the operation specified in the request that originated
        the response or CALLBACK, in the case of an asynchronous notification
        of a state change.
    -JID: It is the job identifier, as provided in the submission request.
    -RESULT: It is the result of the operation. Could be SUCCESS or FAILURE
    -INFO: If RESULT is FAILURE, it contains the cause of failure. Otherwise,
        if OPERATION is POLL or CALLBACK,it contains the state of the job.
    """
    logger = logging.getLogger(__name__)
    message = Send()

    def __init__(self):
        self._callback_interval = 30  #seconds
        self._max_thread = 10
        self._min_thread = 3
        self._job_list = List()
        self._configure = None
        self._communicators = dict()
        self._lock = threading.Lock()

    def do_INIT(self, args):
        """
        Initializes the MAD (i.e. INIT - - -)
        @param args : arguments of operation
        @type args : string
        """
        out = 'INIT - SUCCESS -'
        self.message.stdout(out)
        self.logger.debug(out)

    def do_SUBMIT(self, args):
        """
        Submits a job(i.e. SUBMIT JID HOST/JM RSL).
        @param args : arguments of operation
        @type args : string
        """
        OPERATION, JID, HOST_JM, RSL = args.split()
        try:
            HOST, JM = HOST_JM.rsplit('/', 1)
            # Init Job class
            job, communicator = self._update_resource(HOST)
            job.Communicator = communicator
            # Parse rsl
            rsl = Rsl2Parser(RSL).parser()
            if 'project' in job.resfeatures:
                rsl['project'] = job.resfeatures['project']
            if 'parallel_env' in job.resfeatures:
                rsl['parallel_env'] = job.resfeatures['parallel_env']
            if 'vo' in job.resfeatures and "::" in HOST:
                _, host = HOST.split('::')
                job.resfeatures['host'] = host
                job.resfeatures['jm'] = JM
                job.resfeatures['env_file'] = join(dirname(RSL), "job.env")
                job.resfeatures['queue'] = rsl['queue']
            # Update remote directories
            ABS_REMOTE_JOBS_DIR = job.get_abs_directory(
                job.resfeatures.get('scratch', REMOTE_JOBS_DIR))
            for key in ["stdout", "stderr", "executable"]:
                rsl[key] = join(ABS_REMOTE_JOBS_DIR, rsl[key])
            # Create and copy wrapper_drm4g file
            local_file = join(dirname(RSL),
                              "wrapper_drm4g.%s" % RSL.split('.')[-1])
            remote_file = join(dirname(rsl['executable']), 'wrapper_drm4g')
            job.createWrapper(local_file, job.jobTemplate(rsl))
            job.copyWrapper(local_file, remote_file)
            # Execute wrapper_drm4g
            job.JobId = job.jobSubmit(remote_file)
            self._job_list.put(JID, job)
            out = 'SUBMIT %s SUCCESS %s:%s' % (JID, HOST, job.JobId)
        except Exception as err:
            out = 'SUBMIT %s FAILURE %s' % (JID, str(err))
            self.logger.error(err, exc_info=1)
        self.message.stdout(out)
        self.logger.debug(out)

    def do_FINALIZE(self, args):
        """
        Finalizes the MAD (i.e. FINALIZE - - -).
        @param args : arguments of operation
        @type args : string
        """
        out = 'FINALIZE - SUCCESS -'
        self.message.stdout(out)
        self.logger.debug(out)
        sys.exit(0)

    def do_POLL(self, args):
        """
        Polls a job to obtain its state (i.e. POLL JID - -).
        @param args : arguments of operation
        @type args : string
        """
        OPERATION, JID, HOST_JM, RSL = args.split()
        try:
            if self._job_list.has_key(JID):
                job = self._job_list.get(JID)
                status = job.getStatus()
                out = 'POLL %s SUCCESS %s' % (JID, status)
            else:
                out = 'POLL %s FAILURE Job not submitted' % (JID)
        except Exception as err:
            out = 'POLL %s FAILURE %s' % (JID, str(err))
            self.logger.error(err, exc_info=1)
        self.message.stdout(out)
        self.logger.debug(out)

    def do_RECOVER(self, args):
        """
        Polls a job to obtain its state (i.e. RECOVER JID - -).
        @param args : arguments of operation
        @type args : string
        """
        OPERATION, JID, HOST_JM, RSL = args.split()
        try:
            HOST, remote_job_id = HOST_JM.split(':', 1)
            job, communicator = self._update_resource(HOST)
            job.Communicator = communicator
            job.JobId = remote_job_id
            job.refreshJobStatus()
            self._job_list.put(JID, job)
            out = 'RECOVER %s SUCCESS %s' % (JID, job.getStatus())
        except Exception as err:
            out = 'RECOVER %s FAILURE %s' % (JID, str(err))
            self.logger.error(err, exc_info=1)
        self.message.stdout(out)
        self.logger.debug(out)

    def do_CALLBACK(self):
        """
        Show the state of the job
        """
        while True:
            time.sleep(self._callback_interval)
            self.logger.debug("CALLBACK new iteration ...")
            for JID, job in self._job_list.items():
                try:
                    self.logger.debug("CALLBACK checking job '%s'" % JID)
                    oldStatus = job.getStatus()
                    job.refreshJobStatus()
                    newStatus = job.getStatus()
                    if oldStatus != newStatus or newStatus == 'DONE' or newStatus == 'FAILED':
                        if newStatus == 'DONE' or newStatus == 'FAILED':
                            self._job_list.delete(JID)
                            time.sleep(0.1)
                        out = 'CALLBACK %s SUCCESS %s' % (JID, newStatus)
                        self.message.stdout(out)
                        self.logger.debug(out)
                except Exception as err:
                    out = 'CALLBACK %s FAILURE %s' % (JID, str(err))
                    self.logger.error(err, exc_info=1)
                    self.message.stdout(out)

    def do_CANCEL(self, args):
        """
        Cancels a job (i.e. CANCEL JID - -).
        @param args : arguments of operation
        @type args : string
        """
        OPERATION, JID, HOST_JM, RSL = args.split()
        try:
            if self._job_list.has_key(JID):
                self._job_list.get(JID).jobCancel()
                out = 'CANCEL %s SUCCESS -' % (JID)
            else:
                out = 'CANCEL %s FAILURE Job not submitted' % (JID)
        except Exception as err:
            out = 'CANCEL %s FAILURE %s' % (JID, str(err))
            self.logger.error(err, exc_info=1)
        self.message.stdout(out)
        self.logger.debug(out)

    methods = {
        'INIT': do_INIT,
        'SUBMIT': do_SUBMIT,
        'POLL': do_POLL,
        'RECOVER': do_RECOVER,
        'CANCEL': do_CANCEL,
        'FINALIZE': do_FINALIZE
    }

    def processLine(self):
        """
        Choose the OPERATION through the command line
        """
        try:
            worker = threading.Thread(target=self.do_CALLBACK, )
            worker.setDaemon(True)
            worker.start()
            self._configure = Configuration()
            pool = ThreadPool(self._min_thread, self._max_thread)
            while True:
                input = sys.stdin.readline().split()
                self.logger.debug(' '.join(input))
                OPERATION = input[0].upper()
                if len(input) == 4 and OPERATION in self.methods:
                    if OPERATION in ('FINALIZE', 'INIT', 'SUBMIT', 'RECOVER'):
                        self.methods[OPERATION](self, ' '.join(input))
                    else:
                        pool.add_task(self.methods[OPERATION], self,
                                      ' '.join(input))
                else:
                    out = 'WRONG COMMAND'
                    self.message.stdout(out)
                    self.logger.debug(out)
        except Exception as err:
            self.logger.warning(str(err), exc_info=1)

    def _update_resource(self, host):
        with self._lock:
            if not self._configure.check_update(
            ) or not self._configure.resources:
                self._configure.load()
                errors = self._configure.check()
                if errors:
                    self.logger.error(' '.join(errors))
                    raise Exception(' '.join(errors))
            for resname, resdict in list(self._configure.resources.items()):
                if 'cloud_provider' in self._configure.resources[resname].keys(
                ):
                    continue
                if '::' in host:
                    _resname, _ = host.split('::')
                    if resname != _resname:
                        continue
                elif resname != host:
                    continue
                if resname not in self._communicators:
                    self._communicators[
                        resname] = self._configure.make_communicators(
                        )[resname]
                job = self._configure.make_resources()[resname]['Job']
                communicator = self._communicators[resname]
                return job, communicator
Beispiel #12
0
class GwImMad(object):
    """
    Information manager MAD

    The format to send a request to the Information MAD, through its standard input, is:

        OPERATION HID HOST ARGS

    Where:
    -OPERATION: Can be one of the following:
        -INIT: Initializes the MAD (i.e. INIT - - -).
        -DISCOVER: Discovers hosts (i.e. DISCOVER - - - ).
        -MONITOR: Monitors a host (i.e. MONITOR HID HOST -).
        -FINALIZE: Finalizes the MAD (i.e. FINALIZE - - -).
    -HID: if the operation is MONITOR, it is a host identifier, chosen by GridWay. Otherwise it is ignored.
    -HOST: If the operation is MONITOR it specifies the host to monitor. Otherwise it is ignored.

    The format to receive a response from the MAD, through its standard output, is:

        OPERATION HID RESULT INFO

    Where:
    -OPERATION: Is the operation specified in the request that originated the response.
    -HID: It is the host identifier, as provided in the submission request.
    -RESULT: It is the result of the operation. Could be SUCCESS or FAILURE.
    -INFO: If RESULT is FAILURE, it contains the cause of failure. Otherwise, if OPERATION
        is   DISCOVER, it contains a list of discovered host, or if OPERATION is MONITOR,
        it contains a list of host attributes.
    """

    logger = logging.getLogger(__name__)
    message = Send()

    def __init__(self):
        self._resources = dict()
        self._config = None

    def do_INIT(self, args):
        """
        Initializes the MAD (i.e. INIT - - -)
        @param args : arguments of operation
        @type args : string
        """
        out = 'INIT - SUCCESS -'
        self.message.stdout(out)
        self.logger.debug(out)

    def do_DISCOVER(self, args, output=True):
        """
        Discovers hosts (i.e. DISCOVER - - -)
        @param args : arguments of operation
        @type args : string
        """
        OPERATION, HID, HOST, ARGS = args.split()
        try:
            self._config = Configuration()
            self._config.load()
            errors = self._config.check()
            assert not errors, ' '.join(errors)

            self._resources = self._config.make_resources()
            communicators = self._config.make_communicators()
            hosts = ""
            for resname in sorted(self._resources.keys()):
                if self._config.resources[resname]['enable'].lower(
                ) == 'false':
                    continue
                if 'cloud_provider' in self._config.resources[resname].keys():
                    continue
                try:
                    self._resources[resname][
                        'Resource'].Communicator = communicators[resname]
                    self._resources[resname]['Resource'].Communicator.connect()
                    hosts = hosts + " " + self._resources[resname][
                        'Resource'].hosts()
                    self._resources[resname]['Resource'].Communicator.close()
                except Exception as err:
                    self.logger.error(err, exc_info=1)
            out = 'DISCOVER %s SUCCESS %s' % (HID, hosts)
        except Exception as err:
            out = 'DISCOVER - FAILURE %s' % str(err)
        if output:
            self.message.stdout(out)
        self.logger.debug(out, exc_info=1)

    def do_MONITOR(self, args, output=True):
        """
        Monitors a host (i.e. MONITOR HID HOST -)
        @param args : arguments of operation
        @type args : string
        """
        OPERATION, HID, HOST, ARGS = args.split()
        try:
            info = ""
            for resname, resdict in list(self._resources.items()):
                if self._config.resources[resname]['enable'].lower(
                ) == 'false':
                    raise Exception("Resource '%s' is not enable" % resname)
                if HOST in resdict['Resource'].host_list:
                    info = resdict['Resource'].host_properties(HOST)
                    resdict['Resource'].Communicator.close()
                    break
            assert info, "Host '%s' is not available" % HOST
            out = 'MONITOR %s SUCCESS %s' % (HID, info)
        except Exception as err:
            out = 'MONITOR %s FAILURE %s' % (HID, str(err))
        if output:
            self.message.stdout(out)
        self.logger.debug(out, exc_info=1)

    def do_FINALIZE(self, args):
        """
        Finalizes the MAD (i.e. FINALIZE - - -)
        @param args : arguments of operation
        @type args : string
        """
        out = 'FINALIZE - SUCCESS -'
        self.message.stdout(out)
        self.logger.debug(out)
        sys.exit(0)

    methods = {
        'INIT': do_INIT,
        'DISCOVER': do_DISCOVER,
        'MONITOR': do_MONITOR,
        'FINALIZE': do_FINALIZE,
    }

    def processLine(self):
        """
        Choose the OPERATION through the command line
        """
        try:
            while True:
                input = sys.stdin.readline().split()
                self.logger.debug(' '.join(input))
                OPERATION = input[0].upper()
                if len(input) == 4 and OPERATION in self.methods:
                    self.methods[OPERATION](self, ' '.join(input))
                else:
                    out = 'WRONG COMMAND'
                    self.message.stdout(out)
                    self.logger.debug(out)
        except Exception as e:
            self.logger.warning(str(e))