コード例 #1
0
ファイル: aviaryoperations.py プロジェクト: ssorj/boneyard
    def set_job_attribute(self, scheduler, job_id, name, value, callback, submission):
        assert callback
        
        def my_callback(result):
            self.job_client_pool.return_object(job_client)
            result = self._pretty_result(result, scheduler.Machine)
            # massage results for use by standard callback
            cb_args = self._cb_args_dataless(result)
            callback(*cb_args)

        job_client = self.job_client_pool.get_object()
        self._setup_client(job_client, 
                           self.job_servers,   # server lookup object
                           scheduler.Machine,  # host we want
                           "setJobAttribute")
                                   
        # Make a job id parameter (see job wsdl)
        jobId = job_client.factory.create('ns0:JobID')
        jobId.job = job_id
        jobId.pool = scheduler.Pool
        jobId.scheduler = scheduler.Name
        jobId.submission.name = submission.Name
        jobId.submission.owner = submission.Owner

        # Make attribute parameter from name and value
        aviary_attr = job_client.factory.create('ns0:Attribute')
        aviary_attr.name = name
        aviary_attr.type = "STRING"
        aviary_attr.value = value

        t = CallThread(self.call_client_retry, my_callback, 
                       job_client, "setJobAttribute", jobId, aviary_attr)
        t.start()
コード例 #2
0
ファイル: aviaryoperations.py プロジェクト: ssorj/boneyard
    def _control_job(self, scheduler, job_id, reason, submission,
                     meth_name, *args, **kwargs):

        callback = "callback" in kwargs and kwargs["callback"] or None
        default = "default" in kwargs and kwargs["default"] or None
        timeout = "timeout" in kwargs and kwargs["timeout"] or 5

        client = self.job_client_pool.get_object()
        self._setup_client(client, 
                           self.job_servers,    # server lookup object
                           scheduler.Machine,  # host we want
                           meth_name)

        meth = getattr(client.service, meth_name)

        # Make a job id parameter (see job wsdl)
        jobId = client.factory.create('ns0:JobID')
        jobId.job = job_id
        jobId.pool = scheduler.Pool
        jobId.scheduler = scheduler.Name
        jobId.submission.name = submission.Name
        jobId.submission.owner = submission.Owner

        if callback:
            def my_callback(result):
                self.job_client_pool.return_object(client)
                # Fix up the exception message if necessary
                result = self._pretty_result(result, scheduler.Machine)
                cb_args = self._cb_args_dataless(result)
                callback(*cb_args)

            t = CallThread(self.call_client_retry, my_callback,
                           client, meth_name, jobId, reason)
            t.start()
        else:
            def my_process_results(result):
                # Fix up the exception message if necessary
                result = self._pretty_result(result, scheduler.Machine)
                return self._cb_args_dataless(result)

            res = self._call_sync(my_process_results, self.call_client_retry, 
                                  client, meth_name, jobId, reason) 
            self.job_client_pool.return_object(client)
            return res;
コード例 #3
0
ファイル: aviaryoperations.py プロジェクト: ssorj/boneyard
    def get_job_summaries(self, submission, callback, machine_name):
        assert callback

        def to_int_seconds(dt):
            # Change a datetime.datetime into int seconds since epoch
            # Note, this works nicely if the datetime happens to include microseconds
            # since the call to timetuple will drop them.  Stuff coming back from
            # condor should not have microseconds anyway.
            return int(time.mktime(dt.timetuple()))

        def get_string(job, attr):
            # Cast suds text types into str so we have standard Py types
            # Handles optional strings as well
            if hasattr(job, attr):
                return str(getattr(job, attr))
            return ""

        def adapt(jobs):
            # Make an aviary job summary look like the canonical form
            # that cumin is expecting (actually the QMF form because of history).
            result = list()
            for job in jobs:
                cluster, proc = job.id.job.split(".")
                j = dict()
                j["ClusterId"]            = int(cluster)
                j["Cmd"]                  = str(job.cmd)
                j["EnteredCurrentStatus"] = to_int_seconds(job.last_update)

                # Note, GlobalJobId here will not match the same value from
                # QMF because the qdate portion of the name is missing
                j["GlobalJobId"]          = job.id.scheduler + \
                                            "#" + job.id.job

                j["JobStatus"]            = str(job.job_status)
                j["ProcId"]               = int(proc)
                j["QDate"]                = to_int_seconds(job.queued)
                
                # These may be null...
                j["Args"]                 = get_string(job, "args1")
                j["ReleaseReason"]        = get_string(job, "released")
                j["HoldReason"]           = get_string(job, "held")
                result.append(j)
            return result
                
        def my_callback(result):
            query_client.set_enable_attributes(False)
            self.query_client_pool.return_object(query_client)
            result = self._pretty_result(result, machine_name)
            if isinstance(result, Exception):
                callback(result, None)                
            else:
                status =  _AviaryCommon._get_status(result[0].status)
                if status == "OK" and hasattr(result[0], "jobs"):
                    data = {"Jobs": adapt(result[0].jobs)}
                else:
                    data = {"Jobs": None}
                callback(status, data)

        query_client = self.query_client_pool.get_object()
        self._setup_client(query_client, 
                           self.query_servers,  # server lookup object
                           machine_name,        # host we want
                           "getSubmissionSummary")

        # What we really want here is the job summaries from the
        # submission summary response.  To get those, we have to
        # set an extra attribute on the client...
        query_client.set_enable_attributes(True)
        query_client.set_attributes({"includeJobSummaries": "true"})

        # Make a submission id.  (see query wsdl)
        subId = query_client.factory.create('ns0:SubmissionID')
        subId.name = submission.Name
        subId.owner = submission.Owner

        t = CallThread(self.call_client_retry, my_callback, 
                       query_client, "getSubmissionSummary", subId)
        t.start()
コード例 #4
0
ファイル: aviaryoperations.py プロジェクト: ssorj/boneyard
    def submit_job(self, scheduler, ad, callback):
        assert callback

        def my_callback(result):
            # Turn this back off before we put it back in the pool
            # so allow_overrides isn't set for someone else...
            job_client.set_enable_attributes(False)
            self.job_client_pool.return_object(job_client)
            result = self._pretty_result(result, scheduler.Machine)
            if isinstance(result, Exception):
                callback(result, None)                
            else:
                # the aviary response has the job id available,
                # we'll pass it anyway even though Cumin does not care
                # at the present time
                status = _AviaryCommon._get_status(result.status)
                if status == "OK" and hasattr(result, "id"):
                    id = result.id
                else:
                    id = None
                callback(status, id)

        job_client = self.job_client_pool.get_object()
        self._setup_client(job_client, 
                           self.job_servers,    # server lookup object
                           scheduler.Machine,  # host we want
                           "submitJob")

        # Set basic attributes in the order defined by aviary-job.wsdl.
        args = list()
        basic_attrs = ("Cmd", "Args", "Owner", "Iwd", "Submission")
        for attr in basic_attrs:
            try:
                args.append(ad[attr])
            except:
                # Someone may be unhappy if this is a required param!
                # Let the downstream code generate an error
                pass

        # Add empty list for Aviary's basic requirement value...
        args.append([])

        # and let's let Requirements remain an unrestricted expression so that
        # we can just pass through the value from Cumin without interfering.
        # To do that, we need to specify Requirements through the
        # "extras" fields and set allowOverrides to True.
        # (otherwise, Requirements will be limited to particular
        # resource constraint types defined by aviary)
        job_client.set_enable_attributes(True)
        job_client.set_attributes({"allowOverrides": True})
        extras = list()
        for k, v in ad.iteritems():
            # We don't need to send descriptors down to aviary
            # and basic_attrs have already been filled in
            if k == "!!descriptors" or k in basic_attrs:
                continue
            
            extra = job_client.factory.create('ns0:Attribute')
            extra.name = k
            # But we do need to look in descriptors to find expressions...
            if k in ad["!!descriptors"]:
                extra.type = "EXPRESSION"
            else:
                try:
                    extra.type = self.type_to_aviary[type(v)]
                except KeyError:
                    extra.type = "UNDEFINED"
            extra.value = v
            extras.append(extra)

        # Important, extras itself must be added as an embedded list or 
        # suds will consider only a single item
        args.append(extras)

        t = CallThread(self.call_client_retry, my_callback, 
                       job_client, "submitJob", *args)
        t.start()
コード例 #5
0
ファイル: wallabyoperations.py プロジェクト: ssorj/boneyard
    class WallabyOperations(object):
        '''
        Wrapper around the Wallaby client library.
        '''
        def __init__(self, broker_uri, refresh_interval=None, sasl_mech_list=None):
            '''
            Constructor.

            broker_uri -- the URI used to connect to a QMF message broker
            where a Wallaby agent is connected.  The simplest URI is just a
            hostname but a full URI can specify scheme://user/password@host:port
            or a subset of those components as long as the host is included.
            Examples:

                localhost
                localhost:5672
                amqp://fred/[email protected]:1234

            refresh_interval -- default refresh interval in seconds for all items
            maintained by WallabyOperations' internal caching thread.  A value of 
            None causes the caching thread to wait forever before refreshing an 
            item after a successful call unless the refresh() method is used.
            The refresh interval may be set for items individually with the
            set_interval() method.

            sasl_mech_list -- restricts the list of sasl mechanisms
            that will be allowed when connecting to a QMF message broker.
            If the broker URL contains no credentials, default is ANONYMOUS.
            If the broker URL does contain credentials, default is 
            'PLAIN DIGEST-MD5'
            '''
            self.broker_uri = broker_uri
            self.sasl_mech_list = get_sasl_mechanisms(broker_uri, 
                                                      sasl_mech_list)

            # A wallaby Store object
            self._store = None

            # A QMF broker
            self._broker = None

            # The cache maintenance thread
            self._maintain_cache = None

            # Stop the maintenance thread
            self._stop = False

            # Cached data.  Each of the keys in this dictionary is the name of
            # an attribute on the Wallaby Store object, with the exception of
            # WBTypes.TAGS.  The TAGS data is a subset of the GROUPS produced
            # in this module.
            self._cache = {WBTypes.NODES:    self.CacheData(refresh_interval), 
                           WBTypes.GROUPS:   self.CacheData(refresh_interval),
                           WBTypes.FEATURES: self.CacheData(refresh_interval),
                           WBTypes.TAGS:     self.CacheData(refresh_interval,
                                                          synthetic=self._generate_tag_data)}

            # Cache a list of nodes that are members of a tag
            self._nodes_by_tag = dict()

            # Store the name of the partition group so we can filter it out
            # of tags/groups that we return
            self._partition_group = None

            # Lock is used for synchronization with the caching thread and
            # for thread safety of any and all data that could be accessed
            # by multiple threads.
            self._lock = Lock()
            self._condition = Condition(self._lock)

        def start(self, retry_secs=5):
            '''
            Start the caching thread.

            This thread will attempt to connect to the broker and retrieve
            a Store object from the Wallaby agent.  If successful, it will 
            periodically retrieve and cache data from Wallaby.

            Only one caching thread may run at a time.  The thread may
            be restarted if it has previously been stopped.

            Note, for the moment start() and stop() are not thread safe.  They
            should only be called from a single thread.

            retry_secs -- how often the caching thread will retry failed
            operations.  This includes attempts to connect to the broker
            and retrieve a Store object as well as calls to Wallaby that
            return no data.
            '''
            # The connection to the broker can actually take a long
            # time to complete. We don't want to hang a calling function, 
            # so we handle the connection and retrieval of the 
            # initial Store object from Wallaby in a thread.
            # (There may need to be more work here if the broker or wallaby
            # going away and coming back causes a problem, but with 
            # manageConnections=True and well-known agent/object ids for
            # Wallaby it appears to recover on its own...)

            # Similarly, getting node lists etc may take a long time
            # especially over a slow network.  So we use the same thread
            # to retrieve things like node lists at defined intervals.

            # 'self' here is really a term of art since this is a local
            # function, but it refers to the WallabyOperations object
            # so the code reads nicely
            def maintain_cache(self):

                # Get initinal connection and Store obect
                self.session = Session(manageConnections=True)
                self.broker = self.session.addBroker(self.broker_uri, mechanisms=self.sasl_mech_list)
                while not self._stop:
                    self._store = self._get_store()
                    if self._store is not None:
                        setup(self._store)
                        self._partition_group = self._store.getPartitionGroup().name
                        log.debug("WallabyOperations: found wallaby store object")
                        break

                    # Check stop inside the lock to make sure that we don't miss
                    # a signal or a "stop" that was set while we were iterating.
                    self._condition.acquire()
                    if not self._stop:
                        self._condition.wait(retry_secs)
                    self._condition.release()

                # Init remaining time til next update to 0 for each
                # cached item in case the thread was restarted
                for attr, val in self._cache.iteritems():
                    val.remaining = 0

                # Okay, now we're ready to retrieve data
                while not self._stop:
                    start_processing = time.time()
                    for attr, val in self._cache.iteritems():
                        if self._stop:
                            break

                        # val.remaining is the number of seconds left before
                        # the next update of this data item.  None is "forever".
                        # Synthetic items are not retreived from the store.
                        if not val.synthetic and \
                           val.remaining is not None and val.remaining <= 0:
                            d = get_values(attr, getattr, self._store, attr, [])
                            # If the data is empty, _set_cache will leave the
                            # remaining field set to 0 for the attribute so we
                            # will try to get it again on our next retry.
                            # Otherwise, remaining will be reset to the full
                            # interval for this attribute.
                            self._set_cache(attr, d)

                    # Now handle the synthetics.  val.synthetic generates
                    # and stores it's own results.
                    for attr, val in self._cache.iteritems():
                        if self._stop:
                            break

                        if val.synthetic and \
                           val.remaining is not None and val.remaining <= 0:
                            get_values(attr, val.synthetic, *val.args)
                            
                    log.debug("WallabyOperations: total refresh processing time %s" \
                              % (time.time() - start_processing))

                    # Find out how long we should sleep for.
                    # Based on min remaining times for all items
                    # If minimum is 0 because we have items waiting
                    # for a retry, we fall back on retry_secs as a minimum.
                    sleep_time = self._find_min_remaining(min=retry_secs)

                    self._condition.acquire()
                    if not self._stop:
                        # Could be signaled, so track the actual sleep time
                        log.debug("WallabyOperations: cache thread sleeping for"\
                                  " %s seconds" % sleep_time)
                        bed_time = time.time()
                        self._condition.wait(sleep_time)
                        slept = time.time() - bed_time
                        log.debug("WallabyOperations: cache thread slept for"\
                                  " %s seconds" % slept)

                        # When we wake up from sleep here, we already
                        # have the lock so we might as well check refresh
                        # and adjust the "remaining" values
                        for attr, val in self._cache.iteritems():
                            if val.refresh: # Force an update
                                val.remaining = 0
                                val.refresh = False
                            elif val.remaining is not None:
                                val.remaining -= slept
                    self._condition.release()

                # Clear cache if we have been stopped....
                for attr in self._cache:
                    self._set_cache(attr, [])
                self._store = None

                # Have to clean up the broker
                try:
                    self.session.delBroker(self.broker)
                except:
                    pass

            #end maintain_cache

            def get_values(attr, call, *args):
                log.debug("WallabyOperations: refreshing %s" % attr)
                try:
                    # Wallaby API uses extensions to __getattr__ on 
                    # the Store to retrieve objects from the Broker 
                    # and return a list of proxy objects.
                    start = time.time()
                    d = call(*args)
                except:
                    d = []
                delta = time.time() - start
                log.debug("WallabyOperations: %s seconds to refresh %s" % (delta, attr)) 
                return d

            # Wrap the entire cache thread with an exception handler
            def wrap_maintain_cache():
               try:
                  maintain_cache(self)
                  log.debug("WallabyOperations: cache maintenance thread exited")
               except:
                  pass

            if self._maintain_cache is not None and \
               self._maintain_cache.isAlive():
                # No, you can't start another one.
                return False

            self._stop = False

            if self.broker_uri is not None:
#              self._maintain_cache = CallThread(cProfile.runctx('maintain_cache(self)', globals(), locals(), filename='sage.stats'), None)
               self._maintain_cache = CallThread(wrap_maintain_cache, None)
               self._maintain_cache.daemon = True
               self._maintain_cache.start()
               log.debug("WallabyOperations: start cache maintenance thread")
               return True
            return False

        def stop(self, wait=False, timeout=None):
            '''
            Stop the caching thread.

            Wake the caching thread if asleep and cause it to exit.
            The thread may be restarted again with a call to start()
            once it has successfully exited.  On exit, the thread will
            null out cached data.

            wait -- if True the call will block until the thread exits or
            "timeout" seconds has passed if "timeout" is not None.

            timeout -- how long to wait for the thread to exit if "wait" is True.
            A value of None means wait forever.

            Note, for the moment start() and stop() are not thread safe.  They
            should only be called from a single thread.
            '''
            if self._maintain_cache is not None:
                self._condition.acquire()
                self._stop = True
                self._condition.notify()
                self._condition.release()
                if wait and self._maintain_cache.isAlive():
                    log.debug("WallabyOperations: waiting for cache maintenance thread to exit")
                    self._maintain_cache.join(timeout)
                log.debug("WallabyOperations: stopped cache maintenance thread")

        def refresh(self, *items):
            '''
            Wake the caching thread if asleep and cause it to iterate.

            items -- what data to refresh.  If "items" is an empty
            tuple, refresh all data otherwise refresh only the data specified.
            Attributes of WBTypes define valid values for elements of "items"
            '''
            self._condition.acquire()
            try:
                if len(items) == 0:
                    do_notify = True
                    for attr, val in self._cache.iteritems():
                        val.refresh = True
                else:
                    do_notify = False
                    for attr in items:
                        if attr in self._cache:
                            do_notify = True
                            self._cache[attr].refresh = True
                if do_notify:
                    self._condition.notify()
            finally:
                self._condition.release()

        def get_data(self, which, valuefilter=None):
            '''
            Return a list of cached values for the specified category.

            The values returned will be proxy objects constructed by
            the Wallaby client library.

            which -- specifies the category.  Attributes of WBTypes 
            define valid values for "which"
            '''
            d = []
            self._lock.acquire()
            try:
                if which in self._cache:
                    d = self._cache[which].data.values()
                # Here we handle the possible filtering of node names
                if which == WBTypes.NODES:
                    if valuefilter is not None and valuefilter["nodeName"] != "%%%":
                        filter = valuefilter["nodeName"].replace("%", "")
                        if filter != "":
                            d = [value for value in d if value.name.find(filter) > -1]
            finally:
                self._lock.release()
            return d

        def get_names(self, which):
            '''
            Return a list of cached names for the specified category.

            The values returned will be the names of objects constructed
            by the Wallaby client library.

            which -- specifies the category.  Attributes of WBTypes 
            define valid values for "which"
            '''
            d = []
            self._lock.acquire()
            try:
                if which in self._cache:
                    d = self._cache[which].data.keys()
            finally:
                self._lock.release()
            return d            

        def get_node_by_name(self, name):
            '''
            Return a cached wallaby.Node object by name.

            If name does not designate a currently cached
            object, None is returned.
            '''
            return self._lookup_by_name(WBTypes.NODES, name)

        def get_group_by_name(self, name):
            '''
            Return a cached wallaby.Group object by name.

            If name does not designate a currently cached
            object, None is returned.
            '''
            return self._lookup_by_name(WBTypes.GROUPS, name)

        def get_tag_by_name(self, name):
            '''
            Return a cached wallaby.Tag object by name.

            If name does not designate a currently cached
            object, None is returned.
            '''
            return self._lookup_by_name(WBTypes.TAGS, name)

        def get_feature_by_name(self, name):
            '''
            Return a cached wallaby.Feature object by name.

            If name does not designate a currently cached
            object, None is returned.
            '''
            return self._lookup_by_name(WBTypes.FEATURES, name)

        def get_node_names(self, tag):
            '''
            Return a list of node names associated with the tag.

            The return result is a list containing the names of nodes
            in the tag group.
            '''
            names = []
            if type(tag) in (str, unicode):
                n = tag
            else:
                n = tag.name
            self._lock.acquire()
            try:
                if n in self._nodes_by_tag:
                    names =  self._nodes_by_tag[n]
            finally:
                self._lock.release()
            return names

        def get_tag_names(self, node):
            '''
            Return a list of tag names associated with the node.

            The return result is a list containing the names of tags
            on the specified node.
            '''
            names = []
            n = None
            if type(node) in (str, unicode):
                n = node
            elif hasattr(node, "name"):
                n = node.name
            if n is None:
               log.debug("WallabyOperations: get_tag_names(), parameter 'node' yields no name, returning []")
            else:
               self._lock.acquire()
               try:
                  if n in self._cache[WBTypes.NODES].data:
                     names = self._cache[WBTypes.NODES].data[n].getTags()
               finally:
                  self._lock.release()
            return names
  
        def create_tags(self, names):
            '''
            Create new tags in the Wallaby store.

            Refresh the cached lists of groups and tags.
            '''
            if self._store is None:
                log.debug("WallabyOperations: create_tag, store object not yet created")
                return False
            try:
                self._lock.acquire()
                try:                    
                    for name in names:
                        self._store.addTag(name)
                except Exception, e:
                    log.debug("WallabyOperations: create_tag, exception suppressed, %s" % str(e))
                    return False
            finally:
                self._lock.release()
            return True

        def remove_tags(self, names):
            '''
            Remove a set of tags from the Wallaby store.

            Check the cached list of tags for the
            tag name first.  Refresh cached lists of
            groups, tags, and nodes.
            '''
            if self._store is None:
                log.debug("WallabyOperations: remove_tag, store object not yet created")
                return False

            for name in names:
                if self.get_tag_by_name(name) is not None:
                    try:
                        self._store.removeGroup(name)
                    except Exception, e:
                        log.debug("WallabyOperations: remove_tag, exception suppressed, %s" % str(e))
                        return False
            return True