Example #1
0
    def test_get_valid_resource_commitment(self):
        from pyon.util.containers import get_ion_ts_millis

        # create ION org and an actor
        ion_org = IonObject(RT.Org, name='ION')
        ion_org_id, _ = self.rr.create(ion_org)
        ion_org._id = ion_org_id
        actor = IonObject(RT.ActorIdentity, name='actor1')
        actor_id, _ = self.rr.create(actor)

        # create an expired commitment in the org
        ts = get_ion_ts_millis() - 50000
        com_obj = IonObject(RT.Commitment, provider=ion_org_id, consumer=actor_id, commitment=True, expiration=ts)
        com_id, _ = self.rr.create(com_obj)
        id = self.rr.create_association(ion_org_id, PRED.hasCommitment, com_id)
        c = get_valid_resource_commitments(ion_org_id, actor_id)
        #verify that the commitment is not returned
        self.assertIsNone(c)

        # create a commitment that has not expired yet
        ts = get_ion_ts_millis() + 50000
        com_obj = IonObject(RT.Commitment, provider=ion_org_id, consumer=actor_id, commitment=True, expiration=ts)
        com_id, _ = self.rr.create(com_obj)
        id = self.rr.create_association(ion_org_id, PRED.hasCommitment, com_id)
        c = get_valid_resource_commitments(ion_org_id, actor_id)

        #verify that the commitment is returned
        self.assertIsNotNone(c)
Example #2
0
def get_valid_resource_commitments(resource_id=None, actor_id=None):
    '''
    Returns the list of valid commitments for the specified resource.
    If optional actor_id is supplied, then filtered by actor_id
    @param resource_id:
    @param actor_id:
    @return:
    '''
    log.debug("Finding commitments for resource_id: %s and actor_id: %s" % (resource_id, actor_id))

    if resource_id is None:
        return None

    try:
        gov_controller = bootstrap.container_instance.governance_controller
        commitments,_ = gov_controller.rr.find_objects(resource_id, PRED.hasCommitment, RT.Commitment)
        if not commitments:
            return None

        cur_time = get_ion_ts_millis()
        commitment_list = []
        for com in commitments:
            if ( actor_id == None or com.consumer == actor_id )  and ( int(com.expiration) == 0 or ( int(com.expiration) > 0 and cur_time < int(com.expiration))):
                commitment_list.append(com)

        if commitment_list:
            return commitment_list

    except Exception, e:
        log.error(e)
Example #3
0
def get_valid_resource_commitments(resource_id=None, actor_id=None):
    """
    Returns the list of valid commitments for the specified resource.
    If optional actor_id is supplied, then filtered by actor_id
    """
    log.debug("Finding commitments for resource_id: %s and actor_id: %s", resource_id, actor_id)
    if resource_id is None:
        return None

    try:
        gov_controller = bootstrap.container_instance.governance_controller
        commitments, _ = gov_controller.rr.find_subjects(RT.Commitment, PRED.hasTarget, resource_id, id_only=False)
        if not commitments:
            return None

        cur_time = get_ion_ts_millis()
        commitment_list = [com for com in commitments if (actor_id == None or com.consumer == actor_id) and \
                    (int(com.expiration) == 0 or (int(com.expiration) > 0 and cur_time < int(com.expiration)))]
        if commitment_list:
            return commitment_list

    except Exception:
        log.exception("Could not determine actor resource commitments")

    return None
Example #4
0
def get_valid_principal_commitments(principal_id=None, consumer_id=None):
    """
    Returns the list of valid commitments for the specified principal (org or actor.
    If optional consumer_id (actor) is supplied, then filtered by consumer_id
    """
    log.debug("Finding commitments for principal: %s", principal_id)
    if principal_id is None:
        return None

    try:
        gov_controller = bootstrap.container_instance.governance_controller
        commitments, _ = gov_controller.rr.find_objects(principal_id, PRED.hasCommitment, RT.Commitment, id_only=False)
        if not commitments:
            return None

        cur_time = get_ion_ts_millis()
        commitment_list = [com for com in commitments if (consumer_id == None or com.consumer == consumer_id) and \
                    (int(com.expiration) == 0 or (int(com.expiration) > 0 and cur_time < int(com.expiration)))]
        if commitment_list:
            return commitment_list

    except Exception:
        log.exception("Could not determine actor resource commitments")

    return None
Example #5
0
    def find_commitments(self,
                         org_id='',
                         resource_id='',
                         actor_id='',
                         exclusive=False,
                         include_expired=False):
        """Returns all commitments in specified org and optionally a given actor and/or optionally a given resource.
        If exclusive == True, only return exclusive commitments.
        """
        self._validate_resource_id("org_id", org_id, RT.Org, optional=True)
        self._validate_resource_id("actor_id",
                                   actor_id,
                                   RT.ActorIdentity,
                                   optional=True)
        if not org_id and not resource_id and not actor_id:
            raise BadRequest("Must restrict search for commitments")

        if resource_id:
            com_objs, _ = self.rr.find_subjects(RT.Commitment,
                                                PRED.hasTarget,
                                                resource_id,
                                                id_only=False)
            if actor_id:
                com_objs = [c for c in com_objs if c.consumer == actor_id]
            if org_id:
                com_objs = [c for c in com_objs if c.provider == org_id]
        elif actor_id:
            com_objs, _ = self.rr.find_objects(actor_id,
                                               PRED.hasCommitment,
                                               RT.Commitment,
                                               id_only=False)
            if org_id:
                com_objs = [c for c in com_objs if c.provider == org_id]
        else:
            com_objs, _ = self.rr.find_objects(org_id,
                                               PRED.hasCommitment,
                                               RT.Commitment,
                                               id_only=False)

        if exclusive:
            com_objs = [
                c for c in com_objs
                if c.commitment.type_ == OT.ResourceCommitment
                and c.commitment.exclusive
            ]
        else:
            com_objs = [
                c for c in com_objs
                if c.commitment.type_ != OT.ResourceCommitment or (
                    c.commitment.type_ == OT.ResourceCommitment
                    and not c.commitment.exclusive)
            ]
        if not include_expired:
            cur_time = get_ion_ts_millis()
            com_objs = [
                c for c in com_objs
                if int(c.expiration) == 0 or cur_time < int(c.expiration)
            ]

        return com_objs
    def find_commitments(self, org_id='', resource_id='', actor_id='', exclusive=False, include_expired=False):
        """Returns all commitments in specified org and optionally a given actor and/or optionally a given resource.
        If exclusive == True, only return exclusive commitments.
        """
        self._validate_resource_id("org_id", org_id, RT.Org, optional=True)
        self._validate_resource_id("actor_id", actor_id, RT.ActorIdentity, optional=True)
        if not org_id and not resource_id and not actor_id:
            raise BadRequest("Must restrict search for commitments")

        if resource_id:
            com_objs, _ = self.rr.find_subjects(RT.Commitment, PRED.hasTarget, resource_id, id_only=False)
            if actor_id:
                com_objs = [c for c in com_objs if c.consumer == actor_id]
            if org_id:
                com_objs = [c for c in com_objs if c.provider == org_id]
        elif actor_id:
            com_objs, _ = self.rr.find_objects(actor_id, PRED.hasCommitment, RT.Commitment, id_only=False)
            if org_id:
                com_objs = [c for c in com_objs if c.provider == org_id]
        else:
            com_objs, _ = self.rr.find_objects(org_id, PRED.hasCommitment, RT.Commitment, id_only=False)

        if exclusive:
            com_objs = [c for c in com_objs if c.commitment.type_ == OT.ResourceCommitment and c.commitment.exclusive]
        else:
            com_objs = [c for c in com_objs if c.commitment.type_ != OT.ResourceCommitment or (
                        c.commitment.type_ == OT.ResourceCommitment and not c.commitment.exclusive)]
        if not include_expired:
            cur_time = get_ion_ts_millis()
            com_objs = [c for c in com_objs if int(c.expiration) == 0 or cur_time < int(c.expiration)]

        return com_objs
Example #7
0
    def target(self, *args, **kwargs):
        """
        Control entrypoint. Setup the base properties for this process (mainly a listener).
        """
        if self.name:
            threading.current_thread().name = "%s-target" % self.name

        # start time
        self._start_time = get_ion_ts_millis()
        self._proc_interval_num = self._start_time / STAT_INTERVAL_LENGTH

        # spawn control flow loop
        self._ctrl_thread = self.thread_manager.spawn(self._control_flow)
        self._ctrl_thread.proc._glname = "ION Proc CL %s" % self.name

        # wait on control flow loop, heartbeating as appropriate
        while not self._ctrl_thread.ev_exit.wait(timeout=self._heartbeat_secs):
            hbst = self.heartbeat()

            if not all(hbst):
                log.warn("Heartbeat status for process %s returned %s", self, hbst)
                if self._heartbeat_stack is not None:
                    stack_out = "".join(traceback.format_list(self._heartbeat_stack))
                else:
                    stack_out = "N/A"

                #raise PyonHeartbeatError("Heartbeat failed: %s, stacktrace:\n%s" % (hbst, stack_out))
                log.warn("Heartbeat failed: %s, stacktrace:\n%s", hbst, stack_out)

        # this is almost a no-op as we don't fall out of the above loop without
        # exiting the ctrl_thread, but having this line here makes testing much
        # easier.
        self._ctrl_thread.join()
Example #8
0
    def test_get_valid_org_commitment(self):
        from pyon.util.containers import get_ion_ts_millis

        # create ION org and an actor
        ion_org = IonObject(RT.Org, name='ION')
        ion_org_id, _ = self.rr.create(ion_org)
        ion_org._id = ion_org_id
        actor = IonObject(RT.ActorIdentity, name='actor1')
        actor_id, _ = self.rr.create(actor)
        device = IonObject(RT.TestDevice, name="device1")
        device_id, _ = self.rr.create(device)

        # create an expired commitment in the org
        ts = get_ion_ts_millis() - 50000
        com_obj = IonObject(RT.Commitment,
                            provider=ion_org_id,
                            consumer=actor_id,
                            commitment=True,
                            expiration=ts)
        com_id, _ = self.rr.create(com_obj)
        self.rr.create_association(ion_org_id, PRED.hasCommitment, com_id)
        c = get_valid_principal_commitments(ion_org_id, actor_id)
        # verify that the commitment is not returned
        self.assertIsNone(c)

        self.rr.create_association(com_id, PRED.hasTarget, device_id)
        c = get_valid_resource_commitments(device_id, actor_id)
        # verify that the commitment is not returned
        self.assertIsNone(c)

        # create a commitment that has not expired yet
        ts = get_ion_ts_millis() + 50000
        com_obj = IonObject(RT.Commitment,
                            provider=ion_org_id,
                            consumer=actor_id,
                            commitment=True,
                            expiration=ts)
        com_id, _ = self.rr.create(com_obj)
        self.rr.create_association(ion_org_id, PRED.hasCommitment, com_id)
        c = get_valid_principal_commitments(ion_org_id, actor_id)
        # verify that the commitment is returned
        self.assertIsNotNone(c)

        self.rr.create_association(com_id, PRED.hasTarget, device_id)
        c = get_valid_resource_commitments(device_id, actor_id)
        # verify that the commitment is not returned
        self.assertIsNotNone(c)
Example #9
0
    def heartbeat(self):
        """
        Returns a 3-tuple indicating everything is ok.

        Should only be called after the process has been started.
        Checks the following:
            - All attached endpoints are alive + listening (this means ready)
            - The control flow greenlet is alive + listening or processing

        @return 3-tuple indicating (listeners ok, ctrl thread ok, heartbeat status). Use all on it for a
                boolean indication of success.
        """
        listeners_ok = True
        for l in self.listeners:
            if not (l in self._listener_map
                    and not self._listener_map[l].proc.dead
                    and l.get_ready_event().is_set()):
                listeners_ok = False

        ctrl_thread_ok = self._ctrl_thread.running

        # are we currently processing something?
        heartbeat_ok = True
        if self._ctrl_current is not None:
            st = traceback.extract_stack(self._ctrl_thread.proc.gr_frame)

            if self._ctrl_current == self._heartbeat_op:

                if st == self._heartbeat_stack:
                    self._heartbeat_count += 1  # we've seen this before! increment count

                    # we've been in this for the last X ticks, or it's been X seconds, fail this part of the heartbeat
                    if self._heartbeat_count > CFG.get_safe('container.timeout.heartbeat_proc_count_threshold', 30) or \
                       get_ion_ts_millis() - int(self._heartbeat_time) >= CFG.get_safe('container.timeout.heartbeat_proc_time_threshold', 30) * 1000:
                        heartbeat_ok = False
                else:
                    # it's made some progress
                    self._heartbeat_count = 1
                    self._heartbeat_stack = st
                    self._heartbeat_time = get_ion_ts()
            else:
                self._heartbeat_op = self._ctrl_current
                self._heartbeat_count = 1
                self._heartbeat_time = get_ion_ts()
                self._heartbeat_stack = st

        else:
            self._heartbeat_op = None
            self._heartbeat_count = 0

        #log.debug("%s %s %s", listeners_ok, ctrl_thread_ok, heartbeat_ok)
        return (listeners_ok, ctrl_thread_ok, heartbeat_ok)
Example #10
0
    def heartbeat(self):
        """
        Returns a 3-tuple indicating everything is ok.

        Should only be called after the process has been started.
        Checks the following:
            - All attached endpoints are alive + listening (this means ready)
            - The control flow greenlet is alive + listening or processing

        @return 3-tuple indicating (listeners ok, ctrl thread ok, heartbeat status). Use all on it for a
                boolean indication of success.
        """
        listeners_ok = True
        for l in self.listeners:
            if not (l in self._listener_map and not self._listener_map[l].proc.dead and l.get_ready_event().is_set()):
                listeners_ok = False

        ctrl_thread_ok = self._ctrl_thread.running

        # are we currently processing something?
        heartbeat_ok = True
        if self._ctrl_current is not None:
            st = traceback.extract_stack(self._ctrl_thread.proc.gr_frame)

            if self._ctrl_current == self._heartbeat_op:

                if st == self._heartbeat_stack:
                    self._heartbeat_count += 1  # we've seen this before! increment count

                    # we've been in this for the last X ticks, or it's been X seconds, fail this part of the heartbeat
                    if self._heartbeat_count > CFG.get_safe('container.timeout.heartbeat_proc_count_threshold', 30) or \
                       get_ion_ts_millis() - int(self._heartbeat_time) >= CFG.get_safe('container.timeout.heartbeat_proc_time_threshold', 30) * 1000:
                        heartbeat_ok = False
                else:
                    # it's made some progress
                    self._heartbeat_count = 1
                    self._heartbeat_stack = st
                    self._heartbeat_time  = get_ion_ts()
            else:
                self._heartbeat_op      = self._ctrl_current
                self._heartbeat_count   = 1
                self._heartbeat_time    = get_ion_ts()
                self._heartbeat_stack   = st

        else:
            self._heartbeat_op      = None
            self._heartbeat_count   = 0

        #log.debug("%s %s %s", listeners_ok, ctrl_thread_ok, heartbeat_ok)
        return (listeners_ok, ctrl_thread_ok, heartbeat_ok)
Example #11
0
    def publish_event_object(self, event_object):
        """
        Publishes an event of given type for the given origin. Event_type defaults to an
        event_type set when initializing the EventPublisher. Other kwargs fill out the fields
        of the event. This operation will fail with an exception.
        @param event_object     the event object to be published
        @retval event_object    the event object which was published
        """
        if not event_object:
            raise BadRequest("Must provide event_object")

        event_object.base_types = event_object._get_extends()

        topic = self._topic(
            event_object
        )  # Routing key generated using type_, base_types, origin, origin_type, sub_type
        to_name = (self._send_name.exchange, topic)

        current_time = get_ion_ts_millis()

        # Ensure valid created timestamp if supplied
        if event_object.ts_created:

            if not is_valid_ts(event_object.ts_created):
                raise BadRequest("The ts_created value is not a valid timestamp: '%s'" % (event_object.ts_created))

            # Reject events that are older than specified time
            if int(event_object.ts_created) > (current_time + VALID_EVENT_TIME_PERIOD):
                raise BadRequest("This ts_created value is too far in the future:'%s'" % (event_object.ts_created))

            # Reject events that are older than specified time
            if int(event_object.ts_created) < (current_time - VALID_EVENT_TIME_PERIOD):
                raise BadRequest("This ts_created value is too old:'%s'" % (event_object.ts_created))

        else:
            event_object.ts_created = str(current_time)

        # Set the actor id based on
        if not event_object.actor_id:
            event_object.actor_id = self._get_actor_id()

        # Validate this object - ideally the validator should pass on problems, but for now just log
        # any errors and keep going, since seeing invalid situations are better than skipping validation.
        try:
            event_object._validate()
        except Exception, e:
            log.exception(e)
Example #12
0
    def time_stats(self):
        """
        Returns a 5-tuple of (total time, idle time, processing time, time since prior interval start,
        busy since prior interval start), all in ms (int).
        """
        now = get_ion_ts_millis()
        running_time = now - self._start_time
        idle_time = running_time - self._proc_time

        cur_interval = now / STAT_INTERVAL_LENGTH
        now_since_prior = now - (cur_interval - 1) * STAT_INTERVAL_LENGTH

        if cur_interval == self._proc_interval_num:
            proc_time_since_prior = self._proc_time-self._proc_time_prior2
        elif cur_interval-1 == self._proc_interval_num:
            proc_time_since_prior = self._proc_time-self._proc_time_prior
        else:
            proc_time_since_prior = 0

        return (running_time, idle_time, self._proc_time, now_since_prior, proc_time_since_prior)
Example #13
0
    def time_stats(self):
        """
        Returns a 5-tuple of (total time, idle time, processing time, time since prior interval start,
        busy since prior interval start), all in ms (int).
        """
        now = get_ion_ts_millis()
        running_time = now - self._start_time
        idle_time = running_time - self._proc_time

        cur_interval = now / STAT_INTERVAL_LENGTH
        now_since_prior = now - (cur_interval - 1) * STAT_INTERVAL_LENGTH

        if cur_interval == self._proc_interval_num:
            proc_time_since_prior = self._proc_time - self._proc_time_prior2
        elif cur_interval - 1 == self._proc_interval_num:
            proc_time_since_prior = self._proc_time - self._proc_time_prior
        else:
            proc_time_since_prior = 0

        return (running_time, idle_time, self._proc_time, now_since_prior,
                proc_time_since_prior)
Example #14
0
    def target(self, *args, **kwargs):
        """
        Entry point for the main process greenlet.
        Setup the base properties for this process (mainly the control thread).
        """
        if self.name:
            threading.current_thread().name = "%s-target" % self.name

        # start time
        self._start_time = get_ion_ts_millis()
        self._proc_interval_num = self._start_time / STAT_INTERVAL_LENGTH

        # spawn control flow loop
        self._ctrl_thread = self.thread_manager.spawn(self._control_flow)
        self._ctrl_thread.proc._glname = "ION Proc CL %s" % self.name

        # wait on control flow loop, heartbeating as appropriate
        while not self._ctrl_thread.ev_exit.wait(timeout=self._heartbeat_secs):
            hbst = self.heartbeat()

            if not all(hbst):
                log.warn("Heartbeat status for process %s returned %s", self,
                         hbst)
                if self._heartbeat_stack is not None:
                    stack_out = "".join(
                        traceback.format_list(self._heartbeat_stack))
                else:
                    stack_out = "N/A"

                #raise PyonHeartbeatError("Heartbeat failed: %s, stacktrace:\n%s" % (hbst, stack_out))
                log.warn("Heartbeat failed: %s, stacktrace:\n%s", hbst,
                         stack_out)

        # this is almost a no-op as we don't fall out of the above loop without
        # exiting the ctrl_thread, but having this line here makes testing much easier.
        self._ctrl_thread.join()
Example #15
0
    def publish_event_object(self, event_object):
        """
        Publishes an event of given type for the given origin. Event_type defaults to an
        event_type set when initializing the EventPublisher. Other kwargs fill out the fields
        of the event. This operation will fail with an exception.
        @param event_object     the event object to be published
        @retval event_object    the event object which was published
        """
        if not event_object:
            raise BadRequest("Must provide event_object")

        event_object.base_types = event_object._get_extends()

        topic = self._topic(event_object)  # Routing key generated using type_, base_types, origin, origin_type, sub_type
        container = (hasattr(self, '_process') and hasattr(self._process, 'container') and self._process.container) or BaseEndpoint._get_container_instance()
        if container and container.has_capability(container.CCAP.EXCHANGE_MANAGER):
            # make sure we are an xp, if not, upgrade
            if not isinstance(self._send_name, XOTransport):

                default_nt = NameTrio(self.get_events_exchange_point())
                if isinstance(self._send_name, NameTrio) \
                   and self._send_name.exchange == default_nt.exchange \
                   and self._send_name.queue == default_nt.queue \
                   and self._send_name.binding == default_nt.binding:
                    self._send_name = container.create_xp(self._events_xp)
                else:
                    self._send_name = container.create_xp(self._send_name)

            xp = self._send_name
            to_name = xp.create_route(topic)
        else:
            to_name = (self._send_name.exchange, topic)

        current_time = get_ion_ts_millis()

        # Ensure valid created timestamp if supplied
        if event_object.ts_created:

            if not is_valid_ts(event_object.ts_created):
                raise BadRequest("The ts_created value is not a valid timestamp: '%s'" % (event_object.ts_created))

            # Reject events that are older than specified time
            if int(event_object.ts_created) > ( current_time + VALID_EVENT_TIME_PERIOD ):
                raise BadRequest("This ts_created value is too far in the future:'%s'" % (event_object.ts_created))

            # Reject events that are older than specified time
            if int(event_object.ts_created) < (current_time - VALID_EVENT_TIME_PERIOD) :
                raise BadRequest("This ts_created value is too old:'%s'" % (event_object.ts_created))

        else:
            event_object.ts_created = str(current_time)

        # Set the actor id based on
        if not event_object.actor_id:
            event_object.actor_id = self._get_actor_id()

        #Validate this object - ideally the validator should pass on problems, but for now just log
        #any errors and keep going, since seeing invalid situations are better than skipping validation.
        try:
            event_object._validate()
        except Exception as e:
            log.exception(e)


        #Ensure the event object has a unique id
        if '_id' in event_object:
            raise BadRequest("The event object cannot contain a _id field '%s'" % (event_object))

        #Generate a unique ID for this event
        event_object._id = create_unique_event_id()

        try:
            self.publish(event_object, to_name=to_name)
        except Exception as ex:
            log.exception("Failed to publish event (%s): '%s'" % (ex.message, event_object))
            raise

        return event_object
Example #16
0
 def _compute_proc_stats(self, start_proc_time):
     cur_time = get_ion_ts_millis()
     self._record_proc_time(cur_time)
     proc_time = cur_time - start_proc_time
     self._proc_time += proc_time
Example #17
0
    def _control_flow(self):
        """
        Main process thread of execution method.

        This method is run inside a greenlet and exists for each ION process. Listeners
        attached to the process, either RPC Servers or Subscribers, synchronize their calls
        by placing future calls into the queue by calling _routing_call.  This is all done
        automatically for you by the Container's Process Manager.

        This method blocks until there are calls to be made in the synchronized queue, and
        then calls from within this greenlet.  Any exception raised is caught and re-raised
        in the greenlet that originally scheduled the call.  If successful, the AsyncResult
        created at scheduling time is set with the result of the call.
        """
        if self.name:
            svc_name = "unnamed-service"
            if self.service is not None and hasattr(self.service, 'name'):
                svc_name = self.service.name
            threading.current_thread().name = "%s-%s-ctrl" % (svc_name, self.name)

        self._ready_control.set()

        for calltuple in self._ctrl_queue:
            calling_gl, ar, call, callargs, callkwargs, context = calltuple
            #log.debug("control_flow making call: %s %s %s (has context: %s)", call, callargs, callkwargs, context is not None)

            res = None
            start_proc_time = get_ion_ts_millis()
            self._record_proc_time(start_proc_time)

            # check context for expiration
            if context is not None and 'reply-by' in context:
                if start_proc_time >= int(context['reply-by']):
                    log.info("control_flow: attempting to process message already exceeding reply-by, ignore")

                    # raise a timeout in the calling thread to allow endpoints to continue processing
                    e = IonTimeout("Reply-by time has already occurred (reply-by: %s, op start time: %s)" % (context['reply-by'], start_proc_time))
                    calling_gl.kill(exception=e, block=False)

                    continue

            # also check ar if it is set, if it is, that means it is cancelled
            if ar.ready():
                log.info("control_flow: attempting to process message that has been cancelled, ignore")
                continue

            try:
                with self.service.push_context(context):
                    with self.service.container.context.push_context(context):
                        self._ctrl_current = ar
                        res = call(*callargs, **callkwargs)
            except OperationInterruptedException:
                # endpoint layer takes care of response as it's the one that caused this
                log.debug("Operation interrupted")
                pass
            except Exception as e:
                # raise the exception in the calling greenlet, and don't
                # wait for it to die - it's likely not going to do so.

                # try decorating the args of the exception with the true traceback
                # this should be reported by ThreadManager._child_failed
                exc = PyonThreadTraceback("IonProcessThread _control_flow caught an exception (call: %s, *args %s, **kwargs %s, context %s)\nTrue traceback captured by IonProcessThread' _control_flow:\n\n%s" % (call, callargs, callkwargs, context, traceback.format_exc()))
                e.args = e.args + (exc,)

                # HACK HACK HACK
                # we know that we only handle TypeError and IonException derived things, so only forward those if appropriate
                if isinstance(e, (TypeError, IonException)):
                    calling_gl.kill(exception=e, block=False)
                else:
                    # otherwise, swallow/record/report and hopefully we can continue on our way
                    self._errors.append((call, callargs, callkwargs, context, e, exc))

                    log.warn(exc)
                    log.warn("Attempting to continue...")

                    # have to raise something friendlier on the client side
                    calling_gl.kill(exception=ContainerError(str(exc)), block=False)
            finally:
                self._compute_proc_stats(start_proc_time)

                self._ctrl_current = None

            ar.set(res)
Example #18
0
 def _is_lock_expired(self, lock_entry):
     if not lock_entry:
         raise BadRequest("No lock entry provided")
     return 0 < lock_entry.attributes[LOCK_EXPIRES_ATTR] <= get_ion_ts_millis()
Example #19
0
 def _compute_proc_stats(self, start_proc_time):
     cur_time = get_ion_ts_millis()
     self._record_proc_time(cur_time)
     proc_time = cur_time - start_proc_time
     self._proc_time += proc_time
Example #20
0
    def _control_flow(self):
        """
        Entry point for process control thread of execution.

        This method is run by the control greenlet for each ION process. Listeners attached
        to the process, either RPC Servers or Subscribers, synchronize calls to the process
        by placing call requests into the queue by calling _routing_call.

        This method blocks until there are calls to be made in the synchronized queue, and
        then calls from within this greenlet.  Any exception raised is caught and re-raised
        in the greenlet that originally scheduled the call.  If successful, the AsyncResult
        created at scheduling time is set with the result of the call.
        """
        svc_name = getattr(
            self.service, "name",
            "unnamed-service") if self.service else "unnamed-service"
        proc_id = getattr(self.service, "id",
                          "unknown-pid") if self.service else "unknown-pid"
        if self.name:
            threading.current_thread().name = "%s-%s" % (svc_name, self.name)
        thread_base_name = threading.current_thread().name

        self._ready_control.set()

        for calltuple in self._ctrl_queue:
            calling_gl, ar, call, callargs, callkwargs, context = calltuple
            request_id = (context or {}).get("request-id", None)
            if request_id:
                threading.current_thread(
                ).name = thread_base_name + "-" + str(request_id)
            #log.debug("control_flow making call: %s %s %s (has context: %s)", call, callargs, callkwargs, context is not None)

            res = None
            start_proc_time = get_ion_ts_millis()
            self._record_proc_time(start_proc_time)

            # check context for expiration
            if context is not None and 'reply-by' in context:
                if start_proc_time >= int(context['reply-by']):
                    log.info(
                        "control_flow: attempting to process message already exceeding reply-by, ignore"
                    )

                    # raise a timeout in the calling thread to allow endpoints to continue processing
                    e = IonTimeout(
                        "Reply-by time has already occurred (reply-by: %s, op start time: %s)"
                        % (context['reply-by'], start_proc_time))
                    calling_gl.kill(exception=e, block=False)

                    continue

            # If ar is set, means it is cancelled
            if ar.ready():
                log.info(
                    "control_flow: attempting to process message that has been cancelled, ignore"
                )
                continue

            init_db_stats()
            try:
                # ******************************************************************
                # ****** THIS IS WHERE THE RPC OPERATION/SERVICE CALL IS MADE ******

                with self.service.push_context(context), \
                     self.service.container.context.push_context(context):
                    self._ctrl_current = ar
                    res = call(*callargs, **callkwargs)

                # ****** END CALL, EXCEPTION HANDLING FOLLOWS                 ******
                # ******************************************************************

            except OperationInterruptedException:
                # endpoint layer takes care of response as it's the one that caused this
                log.debug("Operation interrupted")
                pass

            except Exception as e:
                if self._log_call_exception:
                    log.exception("PROCESS exception: %s" % e.message)

                # Raise the exception in the calling greenlet.
                # Try decorating the args of the exception with the true traceback -
                # this should be reported by ThreadManager._child_failed
                exc = PyonThreadTraceback(
                    "IonProcessThread _control_flow caught an exception "
                    "(call: %s, *args %s, **kwargs %s, context %s)\n"
                    "True traceback captured by IonProcessThread' _control_flow:\n\n%s"
                    % (call, callargs, callkwargs, context,
                       traceback.format_exc()))
                e.args = e.args + (exc, )

                if isinstance(e, (TypeError, IonException)):
                    # Pass through known process exceptions, in particular IonException
                    calling_gl.kill(exception=e, block=False)
                else:
                    # Otherwise, wrap unknown, forward and hopefully we can continue on our way
                    self._errors.append(
                        (call, callargs, callkwargs, context, e, exc))

                    log.warn(exc)
                    log.warn("Attempting to continue...")

                    # Note: Too large exception string will crash the container (when passed on as msg header).
                    exception_str = str(exc)
                    if len(exception_str) > 10000:
                        exception_str = (
                            "Exception string representation too large. "
                            "Begin and end of the exception:\n" +
                            exception_str[:2000] + "\n...\n" +
                            exception_str[-2000:])
                    calling_gl.kill(exception=ContainerError(exception_str),
                                    block=False)
            finally:
                try:
                    # Compute statistics
                    self._compute_proc_stats(start_proc_time)

                    db_stats = get_db_stats()
                    if db_stats:
                        if self._warn_call_dbstmt_threshold > 0 and db_stats.get(
                                "count.all",
                                0) >= self._warn_call_dbstmt_threshold:
                            stats_str = ", ".join(
                                "{}={}".format(k, db_stats[k])
                                for k in sorted(db_stats.keys()))
                            log.warn(
                                "PROC_OP '%s.%s' EXCEEDED DB THRESHOLD. stats=%s",
                                svc_name, call.__name__, stats_str)
                        elif self._log_call_dbstats:
                            stats_str = ", ".join(
                                "{}={}".format(k, db_stats[k])
                                for k in sorted(db_stats.keys()))
                            log.info("PROC_OP '%s.%s' DB STATS: %s", svc_name,
                                     call.__name__, stats_str)
                    clear_db_stats()

                    if stats_callback:
                        stats_callback(proc_id=proc_id,
                                       proc_name=self.name,
                                       svc=svc_name,
                                       op=call.__name__,
                                       request_id=request_id,
                                       context=context,
                                       db_stats=db_stats,
                                       proc_stats=self.time_stats,
                                       result=res,
                                       exc=None)
                except Exception:
                    log.exception("Error computing process call stats")

                self._ctrl_current = None
                threading.current_thread().name = thread_base_name

            # Set response in AsyncEvent of caller (endpoint greenlet)
            ar.set(res)
Example #21
0
    def publish_event_object(self, event_object):
        """
        Publishes an event of given type for the given origin. Event_type defaults to an
        event_type set when initializing the EventPublisher. Other kwargs fill out the fields
        of the event. This operation will fail with an exception.
        @param event_object     the event object to be published
        @retval event_object    the event object which was published
        """
        if not event_object:
            raise BadRequest("Must provide event_object")

        event_object.base_types = event_object._get_extends()

        topic = self._topic(
            event_object
        )  # Routing key generated using type_, base_types, origin, origin_type, sub_type
        container = (hasattr(self, '_process') and hasattr(
            self._process, 'container') and self._process.container
                     ) or BaseEndpoint._get_container_instance()
        if container and container.has_capability(
                container.CCAP.EXCHANGE_MANAGER):
            # make sure we are an xp, if not, upgrade
            if not isinstance(self._send_name, XOTransport):

                default_nt = NameTrio(self.get_events_exchange_point())
                if isinstance(self._send_name, NameTrio) \
                   and self._send_name.exchange == default_nt.exchange \
                   and self._send_name.queue == default_nt.queue \
                   and self._send_name.binding == default_nt.binding:
                    self._send_name = container.create_xp(self._events_xp)
                else:
                    self._send_name = container.create_xp(self._send_name)

            xp = self._send_name
            to_name = xp.create_route(topic)
        else:
            to_name = (self._send_name.exchange, topic)

        current_time = get_ion_ts_millis()

        # Ensure valid created timestamp if supplied
        if event_object.ts_created:

            if not is_valid_ts(event_object.ts_created):
                raise BadRequest(
                    "The ts_created value is not a valid timestamp: '%s'" %
                    (event_object.ts_created))

            # Reject events that are older than specified time
            if int(event_object.ts_created) > (current_time +
                                               VALID_EVENT_TIME_PERIOD):
                raise BadRequest(
                    "This ts_created value is too far in the future:'%s'" %
                    (event_object.ts_created))

            # Reject events that are older than specified time
            if int(event_object.ts_created) < (current_time -
                                               VALID_EVENT_TIME_PERIOD):
                raise BadRequest("This ts_created value is too old:'%s'" %
                                 (event_object.ts_created))

        else:
            event_object.ts_created = str(current_time)

        # Set the actor id based on
        if not event_object.actor_id:
            event_object.actor_id = self._get_actor_id()

        #Validate this object - ideally the validator should pass on problems, but for now just log
        #any errors and keep going, since seeing invalid situations are better than skipping validation.
        try:
            event_object._validate()
        except Exception as e:
            log.exception(e)

        #Ensure the event object has a unique id
        if '_id' in event_object:
            raise BadRequest(
                "The event object cannot contain a _id field '%s'" %
                (event_object))

        #Generate a unique ID for this event
        event_object._id = create_unique_event_id()

        try:
            self.publish(event_object, to_name=to_name)
        except Exception as ex:
            log.exception("Failed to publish event (%s): '%s'" %
                          (ex.message, event_object))
            raise

        return event_object
Example #22
0
 def _is_lock_expired(self, lock_entry):
     if not lock_entry:
         raise BadRequest("No lock entry provided")
     return 0 < lock_entry.attributes[LOCK_EXPIRES_ATTR] <= get_ion_ts_millis()
Example #23
0
    def _control_flow(self):
        """
        Entry point for process control thread of execution.

        This method is run by the control greenlet for each ION process. Listeners attached
        to the process, either RPC Servers or Subscribers, synchronize calls to the process
        by placing call requests into the queue by calling _routing_call.

        This method blocks until there are calls to be made in the synchronized queue, and
        then calls from within this greenlet.  Any exception raised is caught and re-raised
        in the greenlet that originally scheduled the call.  If successful, the AsyncResult
        created at scheduling time is set with the result of the call.
        """
        if self.name:
            svc_name = "unnamed-service"
            if self.service is not None and hasattr(self.service, 'name'):
                svc_name = self.service.name
            threading.current_thread().name = "%s-%s" % (svc_name, self.name)
        thread_base_name = threading.current_thread().name

        self._ready_control.set()

        for calltuple in self._ctrl_queue:
            calling_gl, ar, call, callargs, callkwargs, context = calltuple
            request_id = (context or {}).get("request-id", None)
            if request_id:
                threading.current_thread().name = thread_base_name + "-" + str(request_id)
            #log.debug("control_flow making call: %s %s %s (has context: %s)", call, callargs, callkwargs, context is not None)

            res = None
            start_proc_time = get_ion_ts_millis()
            self._record_proc_time(start_proc_time)

            # check context for expiration
            if context is not None and 'reply-by' in context:
                if start_proc_time >= int(context['reply-by']):
                    log.info("control_flow: attempting to process message already exceeding reply-by, ignore")

                    # raise a timeout in the calling thread to allow endpoints to continue processing
                    e = IonTimeout("Reply-by time has already occurred (reply-by: %s, op start time: %s)" % (context['reply-by'], start_proc_time))
                    calling_gl.kill(exception=e, block=False)

                    continue

            # If ar is set, means it is cancelled
            if ar.ready():
                log.info("control_flow: attempting to process message that has been cancelled, ignore")
                continue

            init_db_stats()
            try:
                # ******************************************************************
                # ****** THIS IS WHERE THE RPC OPERATION/SERVICE CALL IS MADE ******

                with self.service.push_context(context):
                    with self.service.container.context.push_context(context):
                        self._ctrl_current = ar
                        res = call(*callargs, **callkwargs)

                # ****** END CALL, EXCEPTION HANDLING FOLLOWS                 ******
                # ******************************************************************

            except OperationInterruptedException:
                # endpoint layer takes care of response as it's the one that caused this
                log.debug("Operation interrupted")
                pass

            except Exception as e:
                if self._log_call_exception:
                    log.exception("PROCESS exception: %s" % e.message)

                # Raise the exception in the calling greenlet.
                # Try decorating the args of the exception with the true traceback -
                # this should be reported by ThreadManager._child_failed
                exc = PyonThreadTraceback("IonProcessThread _control_flow caught an exception (call: %s, *args %s, **kwargs %s, context %s)\nTrue traceback captured by IonProcessThread' _control_flow:\n\n%s" % (
                        call, callargs, callkwargs, context, traceback.format_exc()))
                e.args = e.args + (exc,)

                # HACK HACK HACK
                # we know that we only handle TypeError and IonException derived things, so only forward those if appropriate
                if isinstance(e, (TypeError, IonException)):
                    calling_gl.kill(exception=e, block=False)
                else:
                    # otherwise, swallow/record/report and hopefully we can continue on our way
                    self._errors.append((call, callargs, callkwargs, context, e, exc))

                    log.warn(exc)
                    log.warn("Attempting to continue...")

                    # have to raise something friendlier on the client side
                    # calling_gl.kill(exception=ContainerError(str(exc)), block=False)
                    # If exception string representation then calling calling_gl.kill(exception=ContainerError(str(exc)), block=False)
                    # will crush the container.
                    exceptions_str = str(exc)
                    if len(exceptions_str) > 10000:
                        exceptions_str = (
                            "Exception string representation is to large to put it all here. "
                            "Begin and end of the exception:\n"
                            + exceptions_str[:2000] + "\n...\n" + exceptions_str[-2000:]
                        )
                    calling_gl.kill(exception=ContainerError(exceptions_str), block=False)
            finally:
                try:
                    self._compute_proc_stats(start_proc_time)

                    db_stats = get_db_stats()
                    if db_stats:
                        if self._warn_call_dbstmt_threshold > 0 and db_stats.get("count.all", 0) >= self._warn_call_dbstmt_threshold:
                            stats_str = ", ".join("{}={}".format(k, db_stats[k]) for k in sorted(db_stats.keys()))
                            log.warn("PROC_OP '%s.%s' EXCEEDED DB THRESHOLD. stats=%s", call.__module__, call.__name__, stats_str)
                        elif self._log_call_dbstats:
                            stats_str = ", ".join("{}={}".format(k, db_stats[k]) for k in sorted(db_stats.keys()))
                            log.info("PROC_OP '%s.%s' DB STATS: %s", call.__module__, call.__name__, stats_str)
                    clear_db_stats()
                except Exception:
                    log.exception("Error computing process call stats")

                self._ctrl_current = None
                threading.current_thread().name = thread_base_name

            ar.set(res)
Example #24
0
    def acquire_lock(self, key, timeout=LOCK_EXPIRES_DEFAULT, lock_holder=None, lock_info=None):
        """
        Attempts to atomically acquire a lock with the given key and namespace.
        If holder is given and holder already has the lock, renew.
        Checks for expired locks.
        @param timeout  Secs until lock expiration or 0 for no expiration
        @param lock_holder  Str value identifying lock holder for subsequent exclusive access
        @param lock_info  Dict value for additional attributes describing lock
        @retval  bool - could lock be acquired?
        """
        if not key:
            raise BadRequest("Missing argument: key")
        if "/" in key:
            raise BadRequest("Invalid argument value: key")

        lock_attrs = {LOCK_EXPIRES_ATTR: get_ion_ts_millis() + int(1000*timeout) if timeout else 0,
                      LOCK_HOLDER_ATTR: lock_holder or ""}
        if lock_info:
            lock_attrs.update(lock_info)
        expires = int(lock_attrs[LOCK_EXPIRES_ATTR])  # Check type just to be sure
        if expires and get_ion_ts_millis() > expires:
            raise BadRequest("Invalid lock expiration value: %s", expires)

        direntry = self._create_dir_entry(LOCK_DIR_PATH, key, attributes=lock_attrs)
        lock_result = False
        try:
            # This is an atomic operation. It relies on the unique key constraint of the directory service
            self.dir_store.create(direntry, create_unique_directory_id())
            lock_result = True
        except BadRequest as ex:
            if ex.message.startswith("DirEntry already exists"):
                de_old = self.lookup(LOCK_DIR_PATH, key, return_entry=True)
                if de_old:
                    if self._is_lock_expired(de_old):
                        # Lock is expired: remove, try to relock
                        # Note: even as holder, it's safer to reacquire in this case than renew
                        log.warn("Removing expired lock: %s/%s", de_old.parent, de_old.key)
                        try:
                            # This is safe, because of lock was deleted + recreated in the meantime, it has different id
                            self._delete_lock(de_old)
                            # Try recreate - may fail again due to concurrency
                            self.dir_store.create(direntry, create_unique_directory_id())
                            lock_result = True
                        except BadRequest as ex:
                            if not ex.message.startswith("DirEntry already exists"):
                                log.exception("Error releasing/reacquiring expired lock %s", de_old.key)
                        except Exception:
                            log.exception("Error releasing/reacquiring expired lock %s", de_old.key)
                    elif lock_holder and de_old.attributes[LOCK_HOLDER_ATTR] == lock_holder:
                        # Holder currently holds the lock: renew
                        log.debug("Renewing lock %s/%s for holder %s", de_old.parent, de_old.key, lock_holder)
                        de_old.attributes = lock_attrs
                        try:
                            self.dir_store.update(de_old)
                            lock_result = True
                        except Exception:
                            log.exception("Error renewing expired lock %s", de_old.key)
                # We do nothing if we could not find the lock now...
            else:
                raise

        log.debug("Directory.acquire_lock(%s): %s -> %s", key, lock_attrs, lock_result)

        return lock_result
Example #25
0
    def acquire_lock(self, key, timeout=LOCK_EXPIRES_DEFAULT, lock_holder=None, lock_info=None):
        """
        Attempts to atomically acquire a lock with the given key and namespace.
        If holder is given and holder already has the lock, renew.
        Checks for expired locks.
        @param timeout  Int value of millis until lock expiration or 0 for no expiration
        @param lock_holder  Str value identifying lock holder for subsequent exclusive access
        @param lock_info  Dict value for additional attributes describing lock
        @retval  bool - could lock be acquired?
        """
        if not key:
            raise BadRequest("Missing argument: key")
        if "/" in key:
            raise BadRequest("Invalid argument value: key")

        lock_attrs = {LOCK_EXPIRES_ATTR: get_ion_ts_millis() + timeout if timeout else 0,
                      LOCK_HOLDER_ATTR: lock_holder or ""}
        if lock_info:
            lock_attrs.update(lock_info)
        expires = int(lock_attrs[LOCK_EXPIRES_ATTR])  # Check type just to be sure
        if expires and get_ion_ts_millis() > expires:
            raise BadRequest("Invalid lock expiration value: %s", expires)

        direntry = self._create_dir_entry(LOCK_DIR_PATH, key, attributes=lock_attrs)
        lock_result = False
        try:
            # This is an atomic operation. It relies on the unique key constraint of the directory service
            self.dir_store.create(direntry, create_unique_directory_id())
            lock_result = True
        except BadRequest as ex:
            if ex.message.startswith("DirEntry already exists"):
                de_old = self.lookup(LOCK_DIR_PATH, key, return_entry=True)
                if de_old:
                    if self._is_lock_expired(de_old):
                        # Lock is expired: remove, try to relock
                        # Note: even as holder, it's safer to reacquire in this case than renew
                        log.warn("Removing expired lock: %s/%s", de_old.parent, de_old.key)
                        try:
                            # This is safe, because of lock was deleted + recreated in the meantime, it has different id
                            self._delete_lock(de_old)
                            # Try recreate - may fail again due to concurrency
                            self.dir_store.create(direntry, create_unique_directory_id())
                            lock_result = True
                        except Exception:
                            log.exception("Error releasing/reacquiring expired lock %s", de_old.key)
                    elif lock_holder and de_old.attributes[LOCK_HOLDER_ATTR] == lock_holder:
                        # Holder currently holds the lock: renew
                        log.info("Renewing lock %s/%s for holder %s", de_old.parent, de_old.key, lock_holder)
                        de_old.attributes = lock_attrs
                        try:
                            self.dir_store.update(de_old)
                            lock_result = True
                        except Exception:
                            log.exception("Error renewing expired lock %s", de_old.key)
                # We do nothing if we could not find the lock now...
            else:
                raise

        log.debug("Directory.acquire_lock(%s): %s -> %s", key, lock_attrs, lock_result)

        return lock_result