def serve_forever(self): """ Run the container until killed. """ log.debug("In Container.serve_forever") if not self.proc_manager.proc_sup.running: self.start() # Exit if immediate==True and children len is ok num_procs = len(self.proc_manager.proc_sup.children) immediate = CFG.system.get('immediate', False) if immediate and num_procs == 1: # only spawned greenlet is the CC-Agent log.debug("Container.serve_forever exiting due to CFG.system.immediate") else: # print a warning just in case if immediate and num_procs != 1: log.warn("CFG.system.immediate=True but number of spawned processes is not 1 (%d)", num_procs) try: # This just waits in this Greenlet for all child processes to complete, # which is triggered somewhere else. self.proc_manager.proc_sup.join_children() except (KeyboardInterrupt, SystemExit) as ex: if hasattr(self, 'gl_parent_watch') and self.gl_parent_watch is not None: # Remove the greenlet that watches the parent process self.gl_parent_watch.kill() # Let the caller handle this raise except: log.exception('Unhandled error! Forcing container shutdown')
def on_channel_request_close(self, ch): """ Close callback for pooled Channels. When a new, pooled Channel is created that this Node manages, it will specify this as the close callback in order to prevent that Channel from actually closing. """ log.debug("NodeB: on_channel_request_close\n\tChType %s, Ch#: %d", ch.__class__, ch.get_channel_id()) assert ch.get_channel_id() in self._pool_map with self._lock: chid = self._pool_map.pop(ch.get_channel_id()) log.debug("Releasing BiDir pool Pika #%d, our id #%d", ch.get_channel_id(), chid) self._pool.release_id(chid) # reset channel ch.reset() # sanity check: if auto delete got turned on, we must remove this channel from the pool if ch._queue_auto_delete: log.warn( "A pooled channel now has _queue_auto_delete set true, we must remove it: check what caused this as it's likely a timing error" ) self._bidir_pool.pop(chid) self._pool._ids_free.remove(chid)
def on_channel_close(self, transport, code, text): """ Callback for when the Pika channel is closed. """ # make callback to user event if we've closed if self._close_event is not None: self._close_event.set() self._close_event = None # remove transport so we don't try to use it again # (all?) calls are protected via _ensure_transport, which raise a ChannelError if you try to do anything with it. self._transport = None # make callback if it exists! if not (code == 0 or code == 200) and self._closed_error_callback: # run in try block because this can shutter the entire connection try: self._closed_error_callback(self, code, text) except Exception as e: log.warn("Closed error callback caught an exception: %s", str(e)) # fixup channel state fsm, but only if we're not executing a transition right now if self._fsm.current_state != self.S_CLOSED and self._fsm.next_state is None: self._fsm.current_state = self.S_CLOSED
def deactivate_data_process(self, data_process_id=""): data_process_obj = self.read_data_process(data_process_id) if not data_process_obj.input_subscription_id: log.warn("data process '%s' has no subscription id to deactivate", data_process_id) return subscription_obj = self.clients.pubsub_management.read_subscription(data_process_obj.input_subscription_id) if subscription_obj.activated: #update the producer context with the deactivation time # todo: update the setting of this contect with the return vals from process_dispatcher:schedule_process after convert producer_obj = self._get_process_producer(data_process_id) producertype = type(producer_obj).__name__ if producer_obj.producer_context.type_ == OT.DataProcessProducerContext : log.debug("data_process '%s' (producer '%s'): deactivation_time = %s ", data_process_id, producer_obj._id, str(IonTime().to_string())) producer_obj.producer_context.deactivation_time = IonTime().to_string() self.clients.resource_registry.update(producer_obj) subscription_id = data_process_obj.input_subscription_id log.debug("Deactivating subscription '%s'", subscription_id) self.clients.pubsub_management.deactivate_subscription(subscription_id=subscription_id)
def _unregister_process(self, process_id, service_instance): # Remove process registration in resource registry if service_instance._proc_res_id: self.container.resource_registry.delete( service_instance._proc_res_id, del_associations=True) # Cleanup for specific process types if service_instance._proc_type == "service": # Check if this is the last process for this service and do auto delete service resources here svcproc_list, _ = self.container.resource_registry.find_objects( service_instance._proc_svc_id, "hasProcess", "Process", id_only=True) if not svcproc_list: self.container.resource_registry.delete( service_instance._proc_svc_id, del_associations=True) elif service_instance._proc_type == "agent": self.container.directory.unregister_safe("/Agents", service_instance.id) # Remove internal registration in container del self.procs[process_id] if service_instance._proc_name in self.procs_by_name: del self.procs_by_name[service_instance._proc_name] else: log.warn("Process name %s not in local registry", service_instance.name)
def _transform(self, obj): # Note: This check to detect an IonObject is a bit risky (only type_) if isinstance(obj, dict) and "type_" in obj: objc = obj otype = objc['type_'].encode('ascii') # Correct? # don't supply a dict - we want the object to initialize with all its defaults intact, # which preserves things like IonEnumObject and invokes the setattr behavior we want there. ion_obj = self._obj_registry.new(otype) # get outdated attributes in data that are not defined in the current schema extra_attributes = objc.viewkeys() - ion_obj._schema.viewkeys() - BUILT_IN_ATTRS for extra in extra_attributes: objc.pop(extra) log.info('discard %s not in current schema' % extra) for k, v in objc.iteritems(): # unicode translate to utf8 if isinstance(v, unicode): v = str(v.encode('utf8')) # CouchDB adds _attachments and puts metadata in it # in pyon metadata is in the document # so we discard _attachments while transforming between the two if k not in ("type_", "_attachments", "_conflicts"): setattr(ion_obj, k, v) if k == "_conflicts": log.warn("CouchDB conflict detected for ID=%S (ignored): %s", obj.get('_id', None), v) return ion_obj return obj
def find_res_by_lcstate(self, lcstate, restype=None, id_only=False, filter=None): log.debug("find_res_by_lcstate(lcstate=%s, restype=%s)", lcstate, restype) if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) if '_' in lcstate: log.warn("Search for compound lcstate restricted to maturity: %s", lcstate) lcstate,_ = lcstate.split("_", 1) filter = filter if filter is not None else {} qual_ds_name = self._get_datastore_name() if id_only: query = "SELECT id, name, type_, lcstate, availability FROM " + qual_ds_name else: query = "SELECT id, name, type_, lcstate, availability, doc FROM " + qual_ds_name query_clause = " WHERE " query_args = dict(type_=restype, lcstate=lcstate) is_maturity = lcstate not in AvailabilityStates if is_maturity: query_clause += "lcstate=%(lcstate)s" else: query_clause += "availability=%(lcstate)s" if restype: query_clause += " AND type_=%(type_)s" query_clause = self._add_access_filter(filter, qual_ds_name, query_clause, query_args) extra_clause = filter.get("extra_clause", "") with self.pool.cursor(**self.cursor_args) as cur: cur.execute(query + query_clause + extra_clause, query_args) rows = cur.fetchall() res_assocs = [dict(id=self._prep_id(row[0]), name=row[1], type=row[2], lcstate=row[3] if is_maturity else row[4]) for row in rows] log.debug("find_res_by_lcstate() found %s objects", len(res_assocs)) return self._prepare_find_return(rows, res_assocs, id_only=id_only)
def _ensure_transport(self): """ Ensures this Channel has been activated with the Node. """ # log.debug("BaseChannel._ensure_transport (current: %s)", self._transport is not None) if not self._transport: raise ChannelError("No transport attached") if not self._lock: raise ChannelError("No lock available") # is lock already acquired? spit out a notice if self._lock._is_owned(): log.warn("INTERLEAVE DETECTED:\n\nCURRENT STACK:\n%s\n\nSTACK THAT LOCKED: %s\n", "".join(traceback.format_stack()), "".join(self._lock_trace)) with self._lock: # we could wait and wait, and it gets closed, and unless we check again, we'd never know! if not self._transport: raise ChannelError("No transport attached") self._lock_trace = traceback.format_stack() try: yield finally: self._lock_trace = None
def _spawned_proc_failed(self, gproc): log.error("ProcManager._spawned_proc_failed: %s, %s", gproc, gproc.exception) # for now - don't worry about the mapping, if we get a failure, just kill the container. # leave the mapping in place for potential expansion later. # # look it up in mapping # if not gproc in self._spawned_proc_to_process: # log.warn("No record of gproc %s in our map (%s)", gproc, self._spawned_proc_to_process) # return # prc = self._spawned_proc_to_process.get(gproc, None) # # # make sure prc is in our list # if not prc in self.procs.values(): # log.warn("prc %s not found in procs list", prc) # return # stop the rest of the process if prc is not None: try: self.terminate_process(prc.id, False) except Exception as e: log.warn("Problem while stopping rest of failed process %s: %s", prc, e) finally: self._call_proc_state_changed(prc, ProcessStateEnum.FAILED) else: log.warn("No ION process found for failed proc manager child: %s", gproc) #self.container.fail_fast("Container process (%s) failed: %s" % (svc, gproc.exception)) # Stop the container if this was the last process if not self.procs and CFG.get_safe("container.processes.exit_once_empty", False): self.container.fail_fast("Terminating container after last process (%s) failed: %s" % (gproc, gproc.exception))
def update_service_access_policy(self, service_name, service_op='', delete_policy=False): if self.policy_decision_point_manager is not None: try: #First update any access policy rules rules = self.policy_client.get_active_service_access_policy_rules(service_name, self._container_org_name) self.policy_decision_point_manager.load_service_policy_rules(service_name, rules) except Exception, e: #If the resource does not exist, just ignore it - but log a warning. log.warn("The service %s is not found or there was an error applying access policy: %s" % ( service_name, e.message)) #Next update any precondition policies try: proc = self.container.proc_manager.get_a_local_process(service_name) if proc is not None: op_preconditions = self.policy_client.get_active_process_operation_preconditions(service_name, service_op, self._container_org_name) if op_preconditions: for op in op_preconditions: for pre in op.preconditions: self.unregister_process_operation_precondition(proc,op.op, pre) if not delete_policy: self.register_process_operation_precondition(proc, op.op, pre ) else: #Unregister all...just in case self.unregister_all_process_operation_precondition(proc,service_op) except Exception, e: #If the resource does not exist, just ignore it - but log a warning. log.warn("The process %s is not found for op %s or there was an error applying access policy: %s" % ( service_name, service_op, e.message))
def _ensure_transport(self): """ Ensures this Channel has been activated with the Node. """ # log.debug("BaseChannel._ensure_transport (current: %s)", self._transport is not None) if not self._transport: raise ChannelError("No transport attached") if not self._lock: raise ChannelError("No lock available") # is lock already acquired? spit out a notice if self._lock._is_owned(): log.warn( "INTERLEAVE DETECTED:\n\nCURRENT STACK:\n%s\n\nSTACK THAT LOCKED: %s\n", "".join(traceback.format_stack()), "".join(self._lock_trace)) with self._lock: # we could wait and wait, and it gets closed, and unless we check again, we'd never know! if not self._transport: raise ChannelError("No transport attached") self._lock_trace = traceback.format_stack() try: yield finally: self._lock_trace = None
def on_channel_close(self, code, text): """ Callback for when the Pika channel is closed. """ logmeth = log.debug if not (code == 0 or code == 200): logmeth = log.error logmeth( "BaseChannel.on_channel_close\n\tchannel number: %s\n\tcode: %d\n\ttext: %s", self.get_channel_id(), code, text, ) # remove amq_chan so we don't try to use it again # (all?) calls are protected via _ensure_amq_chan, which raise a ChannelError if you try to do anything with it. self._amq_chan = None # make callback if it exists! if not (code == 0 or code == 200) and self._closed_error_callback: # run in try block because this can shutter the entire connection try: self._closed_error_callback(self, code, text) except Exception, e: log.warn("Closed error callback caught an exception: %s", str(e))
def __init__(self, xp_name=None, event_type=None, origin=None, queue_name=None, sub_type=None, origin_type=None, pattern=None): self.event_type = event_type self.sub_type = sub_type self.origin_type = origin_type self.origin = origin xp_name = xp_name or get_events_exchange_point() if pattern: binding = pattern else: binding = self._topic(event_type, origin, sub_type, origin_type) self.binding = binding # TODO: Provide a case where we can have multiple bindings (e.g. different event_types) # prefix the queue_name, if specified, with the sysname # this is because queue names transcend xp boundaries (see R1 OOIION-477) if queue_name is not None: if not queue_name.startswith(bootstrap.get_sys_name()): queue_name = "%s.%s" % (bootstrap.get_sys_name(), queue_name) log.warn("queue_name specified, prepending sys_name to it: %s", queue_name) # set this name to be picked up by inherited folks self._ev_recv_name = (xp_name, queue_name)
def stop(self): log.debug("ProcManager stopping ...") from pyon.datastore.couchdb.couchdb_datastore import CouchDB_DataStore stats1 = CouchDB_DataStore._stats.get_stats() # Call quit on procs to give them ability to clean up # @TODO terminate_process is not gl-safe # gls = map(lambda k: spawn(self.terminate_process, k), self.procs.keys()) # join(gls) procs_list = sorted(self.procs.values(), key=lambda proc: proc._proc_start_time, reverse=True) for proc in procs_list: self.terminate_process(proc.id) # TODO: Have a choice of shutdown behaviors for waiting on children, timeouts, etc self.proc_sup.shutdown(CFG.cc.timeout.shutdown) if self.procs: log.warn("ProcManager procs not empty: %s", self.procs) if self.procs_by_name: log.warn("ProcManager procs_by_name not empty: %s", self.procs_by_name) # Remove Resource registration self.container.resource_registry.delete(self.cc_id, del_associations=True) # TODO: Check associations to processes stats2 = CouchDB_DataStore._stats.get_stats() stats3 = CouchDB_DataStore._stats.diff_stats(stats2, stats1) log.debug("Datastore stats difference during stop(): %s", stats3) log.debug("ProcManager stopped, OK.")
def process_csv(self, fuc): # CSV file open here csv_filename = fuc.get('path', None) if csv_filename is None: raise BadRequest("uploaded file has no path") # keep track of the number of calibrations we actually process nupdates = 0 updates = {} # keys are reference_designators, use to update object store after parsing CSV with open(csv_filename, 'rb') as csvfile: # eliminate blank lines csvfile = (row for row in csvfile if len(row.strip()) > 0) # eliminate commented lines csvfile = (row for row in csvfile if not row.startswith('#')) # open CSV reader csv_reader = csv.reader(csvfile, delimiter=',') # skip commented lines # iterate the rows returned by csv.reader for row in csv_reader: if len(row) != 6: log.warn("invalid calibration line %s" % ','.join(row)) continue try: ipn = row[0] # instrument_property_number name = row[1] # calibration_name value = float(row[2]) # calibration_value units = row[3] description = row[4] # description start_date = row[5] # start_date TODO date object? d = { 'value':value, 'units':units, 'description':description, 'start_date':start_date } except ValueError as e: continue #TODO error message? or just skip? # get ipn key if ipn not in updates: updates[ipn] = {} # initialize empty array if name not in updates[ipn]: updates[ipn][name] = [] # will be array of dicts updates[ipn][name].append(d) nupdates = nupdates + 1 # insert the updates into object store self.update_object_store(updates) # update FileUploadContext object (change status to complete) fuc['status'] = 'UploadCalibrationProcessing process complete - %d updates added to object store' % nupdates self.object_store.update_doc(fuc) # remove uploaded file try: os.remove(csv_filename) except OSError: pass # TODO take action to get this removed
def delete_doc(self, doc, datastore_name=""): if not datastore_name: datastore_name = self.datastore_name try: datastore_dict = self.root[datastore_name] except KeyError: raise BadRequest('Data store ' + datastore_name + ' does not exist.') if type(doc) is str: object_id = doc else: object_id = doc["_id"] log.info('Deleting object %s/%s' % (datastore_name, object_id)) if object_id in datastore_dict.keys(): if self._is_in_association(object_id, datastore_name): obj = self.read(object_id, "", datastore_name) log.warn( "XXXXXXX Attempt to delete object %s that still has associations" % str(obj)) # raise BadRequest("Object cannot be deleted until associations are broken") # Find all version dicts and delete them for key in datastore_dict.keys(): if key.find(object_id + '_version_') == 0: del datastore_dict[key] # Delete the HEAD dict del datastore_dict[object_id] # Delete the version counter dict del datastore_dict['__' + object_id + '_version_counter'] else: raise NotFound('Object with id ' + object_id + ' does not exist.') log.info('Delete result: True')
def read_all_docs(self): docs_each_pass = 10001 # +1 b/c will have to skip first doc on all except first read documents = [] not_first_pass = False more_to_read = True last_id = None while more_to_read: try: if not_first_pass: # begin at last doc returned in last read results = self.db.view( "_all_docs", include_docs=False, limit=docs_each_pass, startkey_docid=last_id ) rows = results.rows else: results = self.db.view("_all_docs", include_docs=False, limit=docs_each_pass - 1) rows = results.rows except: log.warn("exception reading all docs (done?)", exc_info=True) break if len(rows) < docs_each_pass: more_to_read = False if not_first_pass: rows = rows[1:] for row in rows: last_id = row.id documents.append(last_id) not_first_pass = True log.debug("read total %d ids", len(documents)) return documents
def serve_forever(self): """ Run the container until killed. """ log.debug("In Container.serve_forever") if not self.proc_manager.proc_sup.running: self.start() # serve forever short-circuits if immediate is on and children len is ok num_procs = len(self.proc_manager.proc_sup.children) immediate = CFG.system.get('immediate', False) if not (immediate and num_procs == 1): # only spawned greenlet is the CC-Agent # print a warning just in case if immediate and num_procs != 1: log.warn("CFG.system.immediate=True but number of spawned processes is not 1 (%d)", num_procs) try: # This just waits in this Greenlet for all child processes to complete, # which is triggered somewhere else. self.proc_manager.proc_sup.join_children() except (KeyboardInterrupt, SystemExit) as ex: log.info('Received a kill signal, shutting down the container.') watch_parent = CFG.system.get('watch_parent', None) if watch_parent: watch_parent.kill() except: log.exception('Unhandled error! Forcing container shutdown') else: log.debug("Container.serve_forever short-circuiting due to CFG.system.immediate") self.proc_manager.proc_sup.shutdown(CFG.cc.timeout.shutdown)
def _spawned_proc_failed(self, gproc): log.error("ProcManager._spawned_proc_failed: %s, %s", gproc, gproc.exception) # for now - don't worry about the mapping, if we get a failure, just kill the container. # leave the mapping in place for potential expansion later. # # look it up in mapping # if not gproc in self._spawned_proc_to_process: # log.warn("No record of gproc %s in our map (%s)", gproc, self._spawned_proc_to_process) # return # prc = self._spawned_proc_to_process.get(gproc, None) # # # make sure prc is in our list # if not prc in self.procs.values(): # log.warn("prc %s not found in procs list", prc) # return # stop the rest of the process if prc is not None: try: self.terminate_process(prc.id, False) except Exception as e: log.warn( "Problem while stopping rest of failed process %s: %s", prc, e) finally: self._call_proc_state_changed(prc, ProcessStateEnum.FAILED) else: log.warn("No ION process found for failed proc manager child: %s", gproc)
def _notify_stop(self): """ Called when the process is about to be shut down. Instructs all listeners to close, puts a StopIteration into the synchronized queue, and waits for the listeners to close and for the control queue to exit. """ for listener in self.listeners: try: listener.close() except Exception as ex: tb = traceback.format_exc() log.warn("Could not close listener, attempting to ignore: %s\nTraceback:\n%s", ex, tb) self._ctrl_queue.put(StopIteration) # wait_children will join them and then get() them, which may raise an exception if any of them # died with an exception. self.thread_manager.wait_children(30) PyonThread._notify_stop(self) # run the cleanup method if we have one if self._cleanup_method is not None: try: self._cleanup_method(self) except Exception as ex: log.warn("Cleanup method error, attempting to ignore: %s\nTraceback: %s", ex, traceback.format_exc())
def on_start(self): self.data_source_subscriber = EventSubscriber( event_type=OT.ResourceModifiedEvent, origin_type=RT.DataSource, callback=self._register_data_source) self.provider_subscriber = EventSubscriber( event_type=OT.ResourceModifiedEvent, origin_type=RT.ExternalDataProvider, callback=self._register_provider) self.data_source_subscriber.start() self.provider_subscriber.start() self.rr = self.container.resource_registry self.using_eoi_services = CFG.get_safe('eoi.meta.use_eoi_services', False) self.server = CFG.get_safe( 'eoi.importer_service.server', "localhost") + ":" + str( CFG.get_safe('eoi.importer_service.port', 8844)) log.info("Using geoservices=" + str(self.using_eoi_services)) if not self.using_eoi_services: log.warn("not using geoservices...") self.importer_service_available = self.check_for_importer_service() if not self.importer_service_available: log.warn("not using importer service...")
def serve_forever(self): """ Run the container until killed. """ log.debug("In Container.serve_forever") if not self.proc_manager.proc_sup.running: self.start() # serve forever short-circuits if immediate is on and children len is ok num_procs = len(self.proc_manager.proc_sup.children) immediate = CFG.system.get('immediate', False) if not (immediate and num_procs == 1): # only spawned greenlet is the CC-Agent # print a warning just in case if immediate and num_procs != 1: log.warn("CFG.system.immediate=True but number of spawned processes is not 1 (%d)", num_procs) try: # This just waits in this Greenlet for all child processes to complete, # which is triggered somewhere else. self.proc_manager.proc_sup.join_children() except (KeyboardInterrupt, SystemExit) as ex: log.info('Received a kill signal, shutting down the container.') if hasattr(self, 'gl_parent_watch') and self.gl_parent_watch is not None: self.gl_parent_watch.kill() except: log.exception('Unhandled error! Forcing container shutdown') else: log.debug("Container.serve_forever short-circuiting due to CFG.system.immediate") self.proc_manager.proc_sup.shutdown(CFG.cc.timeout.shutdown)
def target(self, *args, **kwargs): """ Control entrypoint. Setup the base properties for this process (mainly a listener). """ if self.name: threading.current_thread().name = "%s-target" % self.name # start time self._start_time = int(get_ion_ts()) # spawn control flow loop self._ctrl_thread = self.thread_manager.spawn(self._control_flow) # wait on control flow loop, heartbeating as appropriate while not self._ctrl_thread.ev_exit.wait(timeout=self._heartbeat_secs): hbst = self.heartbeat() if not all(hbst): log.warn("Heartbeat status for process %s returned %s", self, hbst) if self._heartbeat_stack is not None: stack_out = "".join( traceback.format_list(self._heartbeat_stack)) else: stack_out = "N/A" #raise PyonHeartbeatError("Heartbeat failed: %s, stacktrace:\n%s" % (hbst, stack_out)) log.warn("Heartbeat failed: %s, stacktrace:\n%s", hbst, stack_out) # this is almost a no-op as we don't fall out of the above loop without # exiting the ctrl_thread, but having this line here makes testing much # easier. self._ctrl_thread.join()
def delete_doc(self, doc, datastore_name=""): if not datastore_name: datastore_name = self.datastore_name try: db = self.server[datastore_name] except ValueError: raise BadRequest("Data store name %s is invalid" % datastore_name) if type(doc) is str: log.info('Deleting object %s/%s' % (datastore_name, doc)) if self._is_in_association(doc, datastore_name): obj = self.read(doc, datastore_name) log.warn("XXXXXXX Attempt to delete object %s that still has associations" % str(obj)) # raise BadRequest("Object cannot be deleted until associations are broken") try: del db[doc] except ResourceNotFound: raise NotFound('Object with id %s does not exist.' % str(doc)) else: log.info('Deleting object %s/%s' % (datastore_name, doc["_id"])) if self._is_in_association(doc["_id"], datastore_name): log.warn("XXXXXXX Attempt to delete object %s that still has associations" % str(doc)) # raise BadRequest("Object cannot be deleted until associations are broken") try: res = db.delete(doc) except ResourceNotFound: raise NotFound('Object with id %s does not exist.' % str(doc["_id"])) log.debug('Delete result: %s' % str(res))
def _notify_stop(self): """ Called when the process is about to be shut down. Instructs all listeners to close, puts a StopIteration into the synchronized queue, and waits for the listeners to close and for the control queue to exit. """ for listener in self.listeners: try: listener.close() except Exception as ex: tb = traceback.format_exc() log.warn( "Could not close listener, attempting to ignore: %s\nTraceback:\n%s", ex, tb) self._ctrl_queue.put(StopIteration) # wait_children will join them and then get() them, which may raise an exception if any of them # died with an exception. self.thread_manager.wait_children(30) PyonThread._notify_stop(self) # run the cleanup method if we have one if self._cleanup_method is not None: try: self._cleanup_method(self) except Exception as ex: log.warn( "Cleanup method error, attempting to ignore: %s\nTraceback: %s", ex, traceback.format_exc())
def _ems_available(self): """ Returns True if the EMS is (likely) available and the auto_register CFG entry is True. Has the side effect of bootstrapping the org_id and default_xs's id/rev from the RR. Therefore, cannot be a property. """ if CFG.container.get('exchange', {}).get('auto_register', False): # ok now make sure it's in the directory svc_de = self.container.directory.lookup( '/Services/exchange_management') if svc_de is not None: if not self.org_id: # find the default Org org_ids = self._rr_client.find_resources(RT.Org, id_only=True) if not (len(org_ids) and len(org_ids[0]) == 1): log.warn("EMS available but could not find Org") return False self.org_id = org_ids[0][0] log.debug( "Bootstrapped Container exchange manager with org id: %s", self.org_id) return True return False
def start(self): log.debug("GovernanceController starting ...") self._CFG = CFG self.enabled = CFG.get_safe('interceptor.interceptors.governance.config.enabled', False) if not self.enabled: log.warn("GovernanceInterceptor disabled by configuration") self.policy_event_subscriber = None # Containers default to not Org Boundary and ION Root Org self._is_container_org_boundary = CFG.get_safe('container.org_boundary', False) self._container_org_name = CFG.get_safe('container.org_name', CFG.get_safe('system.root_org', 'ION')) self._container_org_id = None self._system_root_org_name = CFG.get_safe('system.root_org', 'ION') self._is_root_org_container = (self._container_org_name == self._system_root_org_name) self.system_actor_id = None self.system_actor_user_header = None self.rr_client = ResourceRegistryServiceProcessClient(process=self.container) self.policy_client = PolicyManagementServiceProcessClient(process=self.container) if self.enabled: config = CFG.get_safe('interceptor.interceptors.governance.config') self.initialize_from_config(config) self.policy_event_subscriber = EventSubscriber(event_type=OT.PolicyEvent, callback=self.policy_event_callback) self.policy_event_subscriber.start() self._policy_snapshot = self._get_policy_snapshot() self._log_policy_update("start_governance_ctrl", message="Container start")
def get_method_arguments(module, method_name, **kwargs): """ Returns a dict of the allowable method parameters @param module: @param method_name: @param kwargs: @return: """ param_dict = {} if hasattr(module, method_name): try: #This will fail running unit tests with mock objects - BOO! method_args = inspect.getargspec(getattr(module, method_name)) for arg in method_args[0]: if kwargs.has_key(arg): param_dict[arg] = kwargs[arg] except Exception as e: #Log a warning and simply return an empty dict log.warn( 'Cannot determine the arguments for method: %s in module: %s: %s', module, method_name, e.message) return param_dict
def _spawn_agent_process(self, process_id, name, module, cls, config): """ Spawn a process acting as agent process. Attach to service pid. """ service_instance = self._create_service_instance(process_id, name, module, cls, config) if not isinstance(service_instance, ResourceAgent): raise ContainerConfigError("Agent process must extend ResourceAgent") self._service_init(service_instance) self._set_service_endpoint(service_instance, service_instance.id) self._service_start(service_instance) # Directory registration caps = service_instance.get_capabilities() self.container.directory.register("/Agents", service_instance.id, **dict(name=service_instance._proc_name, container=service_instance.container.id, resource_id=service_instance.resource_id, agent_id=service_instance.agent_id, def_id=service_instance.agent_def_id, capabilities=caps)) if not service_instance.resource_id: log.warn("Agent process id=%s does not define resource_id!!" % service_instance.id) return service_instance
def _cleanup_outdated_entries(self, dir_entries, common="key"): """ This function takes all DirEntry from the list and removes all but the most recent one by ts_updated timestamp. It returns the most recent DirEntry and removes the others by direct datastore operations. If there are multiple entries with most recent timestamp, the first encountered is kept and the others non-deterministically removed. Note: This operation can be called for DirEntries without common keys, e.g. for all entries registering an agent for a device. """ if not dir_entries: return newest_entry = dir_entries[0] try: for de in dir_entries: if int(de.ts_updated) > int(newest_entry.ts_updated): newest_entry = de remove_list = [de for de in dir_entries if de is not newest_entry] log.info("Attempting to cleanup these directory entries: %s" % remove_list) for de in remove_list: try: self.dir_store.delete(de) except Exception as ex: log.warn("Removal of outdated %s directory entry failed: %s" % (common, de)) log.info("Cleanup of %s old %s directory entries succeeded" % (len(remove_list), common)) except Exception as ex: log.warn("Cleanup of multiple directory entries for %s failed: %s" % ( common, str(ex))) return newest_entry
def _execute(self, cprefix, command): if not command: raise iex.BadRequest("execute argument 'command' not present") if not command.command: raise iex.BadRequest("command not set") cmd_res = IonObject("AgentCommandResult", command_id=command.command_id, command=command.command) cmd_func = getattr(self, cprefix + str(command.command), None) if cmd_func: cmd_res.ts_execute = get_ion_ts() try: res = cmd_func(*command.args, **command.kwargs) cmd_res.status = 0 cmd_res.result = res except iex.IonException as ex: # TODO: Distinguish application vs. uncaught exception cmd_res.status = getattr(ex, 'status_code', -1) cmd_res.result = str(ex) log.warn("Agent command %s failed with trace=%s" % (command.command, traceback.format_exc())) else: log.info("Agent command not supported: %s" % (command.command)) ex = iex.NotFound("Command not supported: %s" % command.command) cmd_res.status = iex.NotFound.status_code cmd_res.result = str(ex) sub_type = "%s.%s" % (command.command, cmd_res.status) post_event = self._event_publisher._create_event(event_type=self.COMMAND_EVENT_TYPE, origin=self.resource_id, origin_type=self.ORIGIN_TYPE, sub_type=sub_type, command=command, result=cmd_res) post_event = self._post_execute_event_hook(post_event) success = self._event_publisher._publish_event(post_event, origin=post_event.origin) return cmd_res
def _add_range(self, coverage_id=None, name=None, definition=None, axis=None, constraint=None, mesh_location='vertex', values_path=None, unit_of_measure_code=None, ): """ @brief Adds a coordinate axis range set to a coverage @param name The name of this range, i.e. temp_data @param definition URL to definition @param axis Name of physical Axis, i.e. Temperature, Time etc. @param constraint A constraint object @param mesh_location @param values_path Location of data points in HDF e.g. /fields/temp @param unit_of_measure_code Code for unit of measure """ if name in self._ident: log.warn('Field name "%s" already in identifiables!' % name) return 0 range = RangeSet( definition=definition, constraint=constraint, nil_values_ids= self._nil_value_name, mesh_location=CategoryElement(value=mesh_location), values_path=values_path, unit_of_measure=UnitReferenceProperty(code=unit_of_measure_code) ) self._ident[name] = range coverage = self._ident[coverage_id] coverage.range_id = name
def service_policy_event_callback(self, *args, **kwargs): service_policy_event = args[0] log.debug('Service related policy event received: %s', str(service_policy_event.__dict__)) policy_id = service_policy_event.origin service_name = service_policy_event.service_name service_op = service_policy_event.op if service_name: if self.container.proc_manager.is_local_service_process(service_name): self.update_service_access_policy(service_name, service_op) elif self.container.proc_manager.is_local_agent_process(service_name): self.update_service_access_policy(service_name, service_op) else: if self.policy_decision_point_manager is not None: try: rules = self.policy_client.get_active_service_access_policy_rules('', self._container_org_name) self.policy_decision_point_manager.load_common_service_policy_rules(rules) #Reload all policies for existing services for service_name in self.policy_decision_point_manager.get_list_service_policies(): if self.container.proc_manager.is_local_service_process(service_name): self.update_service_access_policy(service_name) except Exception, e: #If the resource does not exist, just ignore it - but log a warning. log.warn("There was an error applying access policy: %s" % e.message)
def delete_doc(self, doc, datastore_name="", del_associations=False): ds, datastore_name = self._get_datastore(datastore_name) doc_id = doc if type(doc) is str else doc["_id"] log.debug('Deleting object %s/%s', datastore_name, doc_id) if del_associations: assoc_ids = self.find_associations(anyobj=doc_id, id_only=True) self.delete_doc_mult(assoc_ids) # for aid in assoc_ids: # self.delete(aid, datastore_name=datastore_name) # log.info("Deleted %n associations for object %s", len(assoc_ids), doc_id) elif self._is_in_association(doc_id, datastore_name): bad_doc = self.read(doc_id) if doc: log.warn("XXXXXXX Attempt to delete %s object %s that still has associations" % (bad_doc.type_, doc_id)) else: log.warn("XXXXXXX Attempt to delete object %s that still has associations" % doc_id) # raise BadRequest("Object cannot be deleted until associations are broken") try: if type(doc) is str: del ds[doc_id] else: ds.delete(doc) self._count(delete=1) except ResourceNotFound: raise NotFound('Object with id %s does not exist.' % doc_id)
def delete_doc(self, doc, datastore_name=""): if not datastore_name: datastore_name = self.datastore_name try: datastore_dict = self.root[datastore_name] except KeyError: raise BadRequest('Data store ' + datastore_name + ' does not exist.') if type(doc) is str: object_id = doc else: object_id = doc["_id"] log.info('Deleting object %s/%s' % (datastore_name, object_id)) if object_id in datastore_dict.keys(): if self._is_in_association(object_id, datastore_name): obj = self.read(object_id, "", datastore_name) log.warn("XXXXXXX Attempt to delete object %s that still has associations" % str(obj)) # raise BadRequest("Object cannot be deleted until associations are broken") # Find all version dicts and delete them for key in datastore_dict.keys(): if key.find(object_id + '_version_') == 0: del datastore_dict[key] # Delete the HEAD dict del datastore_dict[object_id] # Delete the version counter dict del datastore_dict['__' + object_id + '_version_counter'] else: raise NotFound('Object with id ' + object_id + ' does not exist.') log.info('Delete result: True')
def target(self, *args, **kwargs): """ Control entrypoint. Setup the base properties for this process (mainly a listener). """ if self.name: threading.current_thread().name = "%s-target" % self.name # start time self._start_time = int(get_ion_ts()) # spawn control flow loop self._ctrl_thread = self.thread_manager.spawn(self._control_flow) # wait on control flow loop, heartbeating as appropriate while not self._ctrl_thread.ev_exit.wait(timeout=self._heartbeat_secs): hbst = self.heartbeat() if not all(hbst): log.warn("Heartbeat status for process %s returned %s", self, hbst) if self._heartbeat_stack is not None: stack_out = "".join(traceback.format_list(self._heartbeat_stack)) else: stack_out = "N/A" #raise PyonHeartbeatError("Heartbeat failed: %s, stacktrace:\n%s" % (hbst, stack_out)) log.warn("Heartbeat failed: %s, stacktrace:\n%s", hbst, stack_out) # this is almost a no-op as we don't fall out of the above loop without # exiting the ctrl_thread, but having this line here makes testing much # easier. self._ctrl_thread.join()
def _add_range( self, coverage_id=None, name=None, definition=None, axis=None, constraint=None, mesh_location='vertex', values_path=None, unit_of_measure_code=None, ): """ @brief Adds a coordinate axis range set to a coverage @param name The name of this range, i.e. temp_data @param definition URL to definition @param axis Name of physical Axis, i.e. Temperature, Time etc. @param constraint A constraint object @param mesh_location @param values_path Location of data points in HDF e.g. /fields/temp @param unit_of_measure_code Code for unit of measure """ if name in self._ident: log.warn('Field name "%s" already in identifiables!' % name) return 0 range = RangeSet( definition=definition, constraint=constraint, nil_values_ids=self._nil_value_name, mesh_location=CategoryElement(value=mesh_location), values_path=values_path, unit_of_measure=UnitReferenceProperty(code=unit_of_measure_code)) self._ident[name] = range coverage = self._ident[coverage_id] coverage.range_id = name
def delete_xs(self, xs, use_ems=True): """ @type xs ExchangeSpace """ log.debug("ExchangeManager.delete_xs: %s", xs) name = xs._exchange # @TODO this feels wrong self.xs_by_name.pop( name, None ) # EMS may be running on the same container, which touches this same dict # so delete in the safest way possible # @TODO: does this mean we need to sync xs_by_name and friends in the datastore? if use_ems and self._ems_available(): log.debug("Using EMS to delete_xs") xso = self._get_xs_obj(name) self._ems_client.delete_exchange_space( xso._id, headers=self._build_security_headers()) del self._xs_cache[name] else: try: xs.delete() except TransportError as ex: log.warn("Could not delete XS (%s): %s", name, ex)
def delete(self, object_id='', del_associations=False): res_obj = self.read(object_id) if not res_obj: raise NotFound("Resource %s does not exist" % object_id) if not del_associations: self._delete_owners(object_id) # Update first to RETIRED to give ElasticSearch a hint res_obj.lcstate = LCS.RETIRED res_obj.availability = AS.PRIVATE self.rr_store.update(res_obj) if del_associations: assoc_ids = self.find_associations(anyside=object_id, id_only=True) self.rr_store.delete_doc_mult(assoc_ids, object_type="Association") #log.debug("Deleted %s associations for resource %s", len(assoc_ids), object_id) elif self._is_in_association(object_id): log.warn("Deleting object %s that still has associations" % object_id) res = self.rr_store.delete(object_id) if self.container.has_capability(self.container.CCAP.EVENT_PUBLISHER): self.event_pub.publish_event(event_type="ResourceModifiedEvent", origin=res_obj._id, origin_type=res_obj._get_type(), sub_type="DELETE", mod_type=ResourceModificationType.DELETE) return res
def delete_xp(self, xp, use_ems=True): log.debug("ExchangeManager.delete_xp: name=%s", 'TODO') # xp.build_xname()) name = xp._exchange # @TODO: not right self.xn_by_name.pop( name, None ) # EMS may be running on the same container, which touches this same dict # so delete in the safest way possible # @TODO: does this mean we need to sync xs_by_name and friends in the datastore? if use_ems and self._ems_available(): log.debug("Using EMS to delete_xp") # find the XP object via RR xpo_ids = self._rr.find_resources(RT.ExchangePoint, name=name, id_only=True) if not (len(xpo_ids) and len(xpo_ids[0]) == 1): log.warn("Could not find XP in RR with name of %s", name) xpo_id = xpo_ids[0][0] self._ems_client.delete_exchange_point( xpo_id, headers=self._build_security_headers()) else: try: xp.delete() except TransportError as ex: log.warn("Could not delete XP (%s): %s", name, ex)
def _cleanup_outdated_entries(self, dir_entries, common="key"): """ This function takes all DirEntry from the list and removes all but the most recent one by ts_updated timestamp. It returns the most recent DirEntry and removes the others by direct datastore operations. """ if not dir_entries: return newest_entry = dir_entries[0] try: remove_list = [] for de in dir_entries: if int(de.ts_updated) > int(newest_entry.ts_updated): remove_list.append(newest_entry) newest_entry = de elif de.key != newest_entry.key: remove_list.append(de) log.info("Attempting to cleanup these directory entries: %s" % remove_list) for de in remove_list: try: self.dir_store.delete(de) except Exception as ex: log.warn("Removal of outdated %s directory entry failed: %s" % (common, de)) log.info("Cleanup of %s old %s directory entries succeeded" % (len(remove_list), common)) except Exception as ex: log.warn("Cleanup of multiple directory entries for %s failed: %s" % ( common, str(ex))) return newest_entry
def delete_xn(self, xn, use_ems=False): log.debug("ExchangeManager.delete_xn: name=%s", "TODO") # xn.build_xlname()) name = xn._queue # @TODO feels wrong self.xn_by_name.pop( name, None ) # EMS may be running on the same container, which touches this same dict # so delete in the safest way possible # @TODO: does this mean we need to sync xs_by_name and friends in the datastore? if use_ems and self._ems_available(): log.debug("Using EMS to delete_xn") # find the XN object via RR? xno_ids = self._rr.find_resources(RT.ExchangeName, name=name, id_only=True) if not (len(xno_ids) and len(xno_ids[0]) == 1): log.warn("Could not find XN in RR with name of %s", name) xno_id = xno_ids[0][0] self._ems_client.undeclare_exchange_name( xno_id, headers=self._build_security_headers( )) # "canonical name" currently understood to be RR id else: try: xn.delete() except TransportError as ex: log.warn("Could not delete XN (%s): %s", name, ex)
def is_service_available(self, service_name, local_rr_only=False): try: service_resource = None from pyon.core.bootstrap import container_instance from interface.objects import ServiceStateEnum #Use container direct RR connection if available, otherwise use messaging to the RR service if hasattr(container_instance, 'has_capability') and container_instance.has_capability('RESOURCE_REGISTRY'): service_resource, _ = container_instance.resource_registry.find_resources(restype='Service', name=service_name) else: if not local_rr_only: from interface.services.coi.iresource_registry_service import ResourceRegistryServiceClient rr_client = ResourceRegistryServiceClient(container_instance.node) service_resource, _ = rr_client.find_resources(restype='Service', name=service_name) #The service is available only of there is a single RR object for it and it is in one of these states: if service_resource and len(service_resource) > 1: log.warn("Found multiple service instances registered under name %s: %s", service_name, service_resource) if service_resource and ( service_resource[0].state == ServiceStateEnum.READY or service_resource[0].state == ServiceStateEnum.STEADY ): return True elif service_resource: log.warn("Call to is_service_available() failed although a Service resource exists: %s", service_resource) return False except Exception, e: return False
def _sync_call(self, func, cb_arg, *args, **kwargs): """ Functionally similar to the generic blocking_cb but with error support that's Channel specific. """ ar = AsyncResult() def cb(*args, **kwargs): ret = list(args) if len(kwargs): ret.append(kwargs) ar.set(ret) eb = lambda ch, *args: ar.set(TransportError("_sync_call could not complete due to an error (%s)" % args)) kwargs[cb_arg] = cb with self._push_close_cb(eb): func(*args, **kwargs) ret_vals = ar.get(timeout=10) if isinstance(ret_vals, TransportError): # mark this channel as poison, do not use again! # don't test for type here, we don't want to have to import PyonSelectConnection if hasattr(self._client.transport, 'connection') and hasattr(self._client.transport.connection, 'mark_bad_channel'): self._client.transport.connection.mark_bad_channel(self._client.channel_number) else: log.warn("Could not mark channel # (%s) as bad, Pika could be corrupt", self._client.channel_number) raise ret_vals if len(ret_vals) == 0: return None elif len(ret_vals) == 1: return ret_vals[0] return tuple(ret_vals)
def incoming(self, invocation): #log.debug("ValidateInterceptor.incoming: %s", invocation) if self.enabled: payload = invocation.message # If payload is IonObject, convert from dict to object for processing if "format" in invocation.headers and isinstance(payload, dict): clzz = invocation.headers["format"] if is_ion_object(clzz): payload = IonObject(clzz, payload) #log.debug("Payload, pre-validate: %s", payload) # IonObject _validate will throw AttributeError on validation failure. # Raise corresponding BadRequest exception into message stack. # Ideally the validator should pass on problems, but for now just log # any errors and keep going, since logging and seeing invalid situations are better # than skipping validation altogether. def validate_ionobj(obj): if isinstance(obj, IonObjectBase): obj._validate() return obj try: walk(payload, validate_ionobj) except AttributeError as e: if invocation.headers.has_key("raise-exception") and invocation.headers['raise-exception']: log.warn('message failed validation: %s\nheaders %s\npayload %s', e.message, invocation.headers, payload) raise BadRequest(e.message) else: log.warn('message failed validation, but allowing it anyway: %s\nheaders %s\npayload %s', e.message, invocation.headers, payload) return invocation
def _spawn_agent_process(self, process_id, name, module, cls, config): """ Spawn a process acting as agent process. Attach to service pid. """ service_instance = self._create_service_instance( process_id, name, module, cls, config) if not isinstance(service_instance, ResourceAgent): raise ContainerConfigError( "Agent process must extend ResourceAgent") # Set the resource ID if we get it through the config resource_id = get_safe(service_instance.CFG, "agent.resource_id") if resource_id: service_instance.resource_id = resource_id rsvc = ProcessRPCServer(node=self.container.node, from_name=service_instance.id, service=service_instance, process=service_instance) # cleanup method to delete process queue (@TODO: leaks a bit here - should use XOs) cleanup = lambda _: self._cleanup_method(service_instance.id, rsvc) proc = self.proc_sup.spawn(name=service_instance.id, service=service_instance, listeners=[rsvc], proc_name=service_instance._proc_name, cleanup_method=cleanup) self.proc_sup.ensure_ready( proc, "_spawn_agent_process for %s" % service_instance.id) # map gproc to service_instance self._spawned_proc_to_process[proc.proc] = service_instance # set service's reference to process service_instance._process = proc # Now call the on_init of the agent. self._service_init(service_instance) if not service_instance.resource_id: log.warn("New agent pid=%s has no resource_id set" % process_id) self._service_start(service_instance) proc.start_listeners() if service_instance.resource_id: # look to load any existing policies for this resource if self._is_policy_management_service_available( ) and self.container.governance_controller: self.container.governance_controller.update_resource_access_policy( service_instance.resource_id) else: log.warn("Agent process id=%s does not define resource_id!!" % service_instance.id) return service_instance
def set_process_stats_callback(stats_cb): """ Sets a callback function (hook) to push stats after a process operation call. """ global stats_callback if stats_cb is None: pass elif stats_callback: log.warn("Stats callback already defined") stats_callback = stats_cb
def find_user_info_by_email(self, user_email=''): #return self.clients.resource_registry.find_resources_ext(restype=RT.UserInfo, attr_name="contact.email", attr_value=user_email, id_only=False) user_infos, _ = self.clients.resource_registry.find_resources_ext(RT.UserInfo, attr_name="contact.email", attr_value=user_email) if len(user_infos) > 1: log.warn("More than one UserInfo found for email '%s': %s" % (user_email, [ui._id for ui in user_infos])) if user_infos: return user_infos[0] return None
def reject(self, delivery_tag, requeue=False): assert delivery_tag in self._unacked with self._lock_unacked: _, queue, m = self._unacked.pop(delivery_tag) if requeue: log.warn("REQUEUE: EXPERIMENTAL %s", delivery_tag) self._queues[queue].put(m)
def _cleanup_pid(self): if self.pidfile: log.debug("Cleanup pidfile: %s", self.pidfile) try: os.remove(self.pidfile) except Exception, e: log.warn("Pidfile could not be deleted: %s" % str(e)) self.pidfile = None
def start(self): log.debug("ExchangeManager.start") total_count = 0 def handle_failure(name, node): log.warn("Node %s could not be started", name) node.ready.set() # let it fall out below # Establish connection(s) to broker for name, cfgkey in CFG.container.messaging.server.iteritems(): if not cfgkey: continue if cfgkey not in CFG.server: raise ExchangeManagerError("Config key %s (name: %s) (from CFG.container.messaging.server) not in CFG.server" % (cfgkey, name)) total_count += 1 log.debug("Starting connection: %s", name) # start it with a zero timeout so it comes right back to us try: node, ioloop = messaging.make_node(CFG.server[cfgkey], name, 0) # install a finished handler directly on the ioloop just for this startup period fail_handle = lambda _: handle_failure(name, node) ioloop.link(fail_handle) # wait for the node ready event, with a large timeout just in case node_ready = node.ready.wait(timeout=15) # remove the finished handler, we don't care about it here ioloop.unlink(fail_handle) # only add to our list if we started successfully if not node.running: ioloop.kill() # make sure ioloop dead else: self._nodes[name] = node self._ioloops[name] = ioloop except socket.error as e: log.warn("Could not start connection %s due to socket error, continuing", name) fail_count = total_count - len(self._nodes) if fail_count > 0 or total_count == 0: if fail_count == total_count: raise ExchangeManagerError("No node connection was able to start (%d nodes attempted, %d nodes failed)" % (total_count, fail_count)) log.warn("Some nodes could not be started, ignoring for now") # @TODO change when ready self._transport = AMQPTransport.get_instance() # load interceptors into each map(lambda x: x.setup_interceptors(CFG.interceptor), self._nodes.itervalues()) log.debug("Started %d connections (%s)", len(self._nodes), ",".join(self._nodes.iterkeys()))
def get_values(self, field_name=''): hdf_path = self._get_hdf_path(field_name) try: array = self._decoder.read_hdf_dataset(hdf_path) except KeyError, ke: log.warn('Could not find requested dataset. Datasets present in hdf file: "%s"', self._decoder.list_datasets()) raise ke
def _spawn_agent_process(self, process_id, name, module, cls, config): """ Spawn a process acting as agent process. Attach to service pid. """ service_instance = self._create_service_instance( process_id, name, module, cls, config) if not isinstance(service_instance, ResourceAgent): raise ContainerConfigError( "Agent process must extend ResourceAgent") # Set the resource ID if we get it through the config resource_id = get_safe(service_instance.CFG, "agent.resource_id") if resource_id: service_instance.resource_id = resource_id # Now call the on_init of the agent. self._service_init(service_instance) if not service_instance.resource_id: log.warn("New agent pid=%s has no resource_id set" % process_id) self._service_start(service_instance) rsvc = ProcessRPCServer(node=self.container.node, from_name=service_instance.id, service=service_instance, process=service_instance) proc = self.proc_sup.spawn(name=service_instance.id, service=service_instance, listeners=[rsvc], proc_name=service_instance._proc_name) self.proc_sup.ensure_ready( proc, "_spawn_agent_process for %s" % service_instance.id) # map gproc to service_instance self._spawned_proc_to_process[proc.proc] = service_instance # set service's reference to process service_instance._process = proc # Directory registration caps = service_instance.get_capabilities() self.container.directory.register( "/Agents", service_instance.id, **dict(name=service_instance._proc_name, container=service_instance.container.id, resource_id=service_instance.resource_id, agent_id=service_instance.agent_id, def_id=service_instance.agent_def_id, capabilities=caps)) if not service_instance.resource_id: log.warn("Agent process id=%s does not define resource_id!!" % service_instance.id) return service_instance
def _get_agent_process_id(cls, resource_id): agent_procs = bootstrap.container_instance.directory.find_by_value('/Agents', 'resource_id', resource_id) if agent_procs: if len(agent_procs) > 1: log.warn("Inconsistency: More than one agent registered for resource_id=%s: %s" % ( resource_id, agent_procs)) agent_id = agent_procs[0].key return str(agent_id) return None
def find_events(self, event_type=None, origin=None, start_ts=None, end_ts=None, id_only=False, **kwargs): """ Returns an ordered list of event objects for given query arguments. Return format is list of (event_id, event_key, event object) tuples """ log.trace( "Retrieving persistent event for event_type=%s, origin=%s, start_ts=%s, end_ts=%s, descending=%s, limit=%s", event_type, origin, start_ts, end_ts, kwargs.get("descending", None), kwargs.get("limit", None)) events = None design_name = "event" view_name = None start_key = [] end_key = [] if origin and event_type: view_name = "by_origintype" start_key = [origin, event_type] end_key = [origin, event_type] elif origin: view_name = "by_origin" start_key = [origin] end_key = [origin] elif event_type: view_name = "by_type" start_key = [event_type] end_key = [event_type] elif start_ts or end_ts: view_name = "by_time" start_key = [] end_key = [] else: view_name = "by_time" if kwargs.get("limit", 0) < 1: kwargs["limit"] = 100 log.warn( "Querying all events, no limit given. Set limit to 100") if start_ts: start_key.append(start_ts) if end_ts: end_key.append(end_ts) events = self.event_store.find_by_view(design_name, view_name, start_key=start_key, end_key=end_key, id_only=id_only, **kwargs) return events