Exemple #1
0
    def __init__(self, pubsub, sub_key, delivery_lock, delivery_list,
                 deliver_pubsub_msg_cb, confirm_pubsub_msg_delivered_cb,
                 sub_config):
        self.keep_running = True
        self.pubsub = pubsub
        self.sub_key = sub_key
        self.delivery_lock = delivery_lock
        self.delivery_list = delivery_list
        self.deliver_pubsub_msg_cb = deliver_pubsub_msg_cb
        self.confirm_pubsub_msg_delivered_cb = confirm_pubsub_msg_delivered_cb
        self.sub_config = sub_config
        self.topic_name = sub_config.topic_name
        self.wait_sock_err = self.sub_config.wait_sock_err
        self.wait_non_sock_err = self.sub_config.wait_non_sock_err
        self.last_run = utcnow_as_ms()
        self.delivery_interval = self.sub_config.task_delivery_interval / 1000.0
        self.delivery_counter = 1

        # If self.wrap_in_list is True, messages will be always wrapped in a list,
        # even if there is only one message to send. Note that self.wrap_in_list will be False
        # only if both batch_size is 1 and wrap_one_msg_in_list is True.
        if self.sub_config.delivery_batch_size == 1:
            if self.sub_config.wrap_one_msg_in_list:
                self.wrap_in_list = True
            else:
                self.wrap_in_list = False

        # With batch_size > 1, we always send a list, no matter what.
        else:
            self.wrap_in_list = True

        spawn_greenlet(self.run)
Exemple #2
0
 def __init__(self):
     self.recv_time = utcnow_as_ms()
     self.server_name = None
     self.server_pid = None
     self.topic = None
     self.sub_key = None
     self.pub_msg_id = None
     self.pub_correl_id = None
     self.in_reply_to = None
     self.ext_client_id = None
     self.group_id = None
     self.position_in_group = None
     self.pub_time = None
     self.ext_pub_time = None
     self.data = ''
     self.data_prefix = ''
     self.data_prefix_short = ''
     self.mime_type = None
     self.priority = None
     self.expiration = None
     self.expiration_time = None
     self.has_gd = None
     self.delivery_status = None
     self.pub_pattern_matched = None
     self.sub_pattern_matched = {}
     self.size = None
     self.published_by_id = None
     self.topic_id = None
     self.is_in_sub_queue = None
     self.topic_name = None
     self.cluster_id = None
     self.delivery_count = 0
     self.pub_time_iso = None
     self.ext_pub_time_iso = None
     self.expiration_time_iso = None
Exemple #3
0
 def enqueue_gd_messages_by_sub_key(self, sub_key, session=None):
     """ Fetches GD messages from SQL for sub_key given on input and adds them to local queue of messages to deliver.
     """
     with self.sub_key_locks[sub_key]:
         gd_msg_list = self._fetch_gd_messages_by_sub_key_list(
             [sub_key], utcnow_as_ms(), session)
         self._enqueue_gd_messages_by_sub_key(sub_key, gd_msg_list)
Exemple #4
0
    def handle(
        self,
        _msg='Cleaning up WSX pub/sub, channel:`%s`, now:`%s (%s)`, md:`%s`, ma:`%s` (%s)'
    ):

        # We receive a multi-line list of WSX channel name -> max timeout accepted on input
        config = parse_extra_into_dict(self.request.raw_request)

        with closing(self.odb.session()) as session:

            # Delete stale connections for each subscriber
            for channel_name, max_delta in config.items():

                # Input timeout is in minutes but timestamps in ODB are in seconds
                # so we convert the minutes to seconds, as expected by the database.
                max_delta = max_delta * 60

                # We compare everything using seconds
                now = utcnow_as_ms()

                # Laster interaction time for each connection must not be older than that many seconds ago
                max_allowed = now - max_delta

                now_as_iso = datetime_from_ms(now * 1000)
                max_allowed_as_iso = datetime_from_ms(max_allowed * 1000)

                # Get all sub_keys that are about to be deleted - retrieving them from the DELETE
                # statement below is not portable so we do it manually first.
                items = self._run_max_allowed_query(session,
                                                    SubscriptionSelect(),
                                                    channel_name, max_allowed)
                sub_key_list = [item.sub_key for item in items]

                if sub_key_list:
                    self.logger.debug(_msg, channel_name, now_as_iso, now,
                                      max_delta, max_allowed_as_iso,
                                      max_allowed)
                    logger_pubsub.info(_msg, channel_name, now_as_iso, now,
                                       max_delta, max_allowed_as_iso,
                                       max_allowed)

                # First we need a list of topics to which sub_keys were related - required by broker messages.
                topic_sub_keys = get_topic_sub_keys_from_sub_keys(
                    session, self.server.cluster_id, sub_key_list)

                # Now, delete old connections for that channel from SQL
                self._run_max_allowed_query(session, SubscriptionDelete(),
                                            channel_name, max_allowed)

                # Next, notify processes about deleted subscriptions to allow to update in-RAM structures
                self.broker_client.publish({
                    'topic_sub_keys':
                    topic_sub_keys,
                    'action':
                    PUBSUB.SUBSCRIPTION_DELETE.value,
                })

            # Commit all deletions
            session.commit()
Exemple #5
0
    def _subscribe_impl(self, ctx):
        """ Invoked by subclasses to subscribe callers using input pub/sub config context.
        """
        with self.lock('zato.pubsub.subscribe.%s.%s' % (ctx.topic_name, ctx.endpoint_id)):

            with closing(self.odb.session()) as session:

                # Non-WebSocket clients cannot subscribe to the same topic multiple times
                if not ctx.ws_channel_id:
                    if has_subscription(session, ctx.cluster_id, ctx.topic.id, ctx.endpoint_id):
                        raise PubSubSubscriptionExists(self.cid, 'Subscription to topic `{}` already exists'.format(
                            ctx.topic.name))

                ctx.creation_time = now = utcnow_as_ms()
                ctx.sub_key = new_sub_key()

                # If we subscribe a WSX client, we need to create its accompanying SQL models
                if ctx.ws_channel_id:

                    # This object persists across multiple WSX connections
                    add_wsx_subscription(
                        session, ctx.cluster_id, ctx.is_internal, ctx.sub_key, ctx.ext_client_id, ctx.ws_channel_id)

                    # This object will be transient - dropped each time a WSX disconnects
                    self.pubsub.add_ws_client_pubsub_keys(session, ctx.sql_ws_client_id, ctx.sub_key, ctx.ws_channel_name,
                        ctx.ws_pub_client_id)

                    # Let the WebSocket connection object know that it should handle this particular sub_key
                    ctx.web_socket.pubsub_tool.add_sub_key(ctx.sub_key)

                # Create a new subscription object
                ps_sub = add_subscription(session, ctx.cluster_id, ctx)

                # Flush the session because we need the subscription's ID below in INSERT from SELECT
                session.flush()

                # Move all available messages to that subscriber's queue
                total_moved = move_messages_to_sub_queue(session, ctx.cluster_id, ctx.topic.id, ctx.endpoint_id, ps_sub.id, now)

                # Commit all changes
                session.commit()

                # Produce response
                self.response.payload.sub_key = ctx.sub_key
                self.response.payload.queue_depth = total_moved

                # Notify workers of a new subscription
                broker_input = Bunch()
                broker_input.topic_name = ctx.topic.name
                broker_input.endpoint_type = self.endpoint_type

                for name in sub_broker_attrs:
                    broker_input[name] = getattr(ps_sub, name, None)

                broker_input.action = BROKER_MSG_PUBSUB.SUBSCRIPTION_CREATE.value
                self.broker_client.publish(broker_input)
Exemple #6
0
    def enqueue_initial_messages(self,
                                 sub_key,
                                 topic_name,
                                 endpoint_name,
                                 _group_size=20):
        """ Looks up any messages for input task in the database and pushes them all and enqueues in batches any found.
        """
        with self.sub_key_locks[sub_key]:

            pub_time_max = utcnow_as_ms()
            session = None

            try:

                # One SQL session for all queries
                session = self.pubsub.server.odb.session()

                # Get IDs of any messages already queued up so as to break them out into batches of messages to fetch
                msg_ids = self.pubsub.get_initial_sql_msg_ids_by_sub_key(
                    session, sub_key, pub_time_max)
                msg_ids = [elem.pub_msg_id for elem in msg_ids]

                if msg_ids:
                    len_msg_ids = len(msg_ids)
                    suffix = ' ' if len_msg_ids == 1 else 's '
                    groups = list(grouper(_group_size, msg_ids))
                    len_groups = len(groups)

                    # This we log using both loggers because we run during server startup so we should
                    # let users know that their server have to do something extra
                    for _logger in logger, logger_zato:
                        _logger.info(
                            'Found %d initial message%sto enqueue for sub_key:`%s` (%s -> %s), `%s`, g:%d, gs:%d',
                            len_msg_ids, suffix, sub_key, topic_name,
                            endpoint_name, msg_ids, len(groups), _group_size)

                    for idx, group in enumerate(groups, 1):
                        group_msg_ids = [elem for elem in group if elem]
                        logger.info(
                            'Enqueuing group %d/%d (gs:%d) (%s, %s -> %s) `%s`',
                            idx, len_groups, _group_size, sub_key, topic_name,
                            endpoint_name, group_msg_ids)

                        msg_list = self.pubsub.get_sql_messages_by_msg_id_list(
                            session, sub_key, pub_time_max, group_msg_ids)
                        self._enqueue_gd_messages_by_sub_key(sub_key, msg_list)

            except Exception:
                for _logger in logger, logger_zato:
                    _logger.warn(
                        'Could not enqueue initial messages for `%s` (%s -> %s), e:`%s`',
                        sub_key, topic_name, endpoint_name, format_exc())

            finally:
                if session:
                    session.close()
Exemple #7
0
def get_queue_depth_by_topic_id_list(session, cluster_id, topic_id_list):
    """ Returns queue depth for a given sub_key - does not include messages expired, in staging, or already delivered.
    """
    return session.query(PubSubEnqMsg.topic_id, func.count(PubSubEnqMsg.topic_id)).\
        filter(PubSubEnqMsg.topic_id.in_(topic_id_list)).\
        filter(PubSubEnqMsg.cluster_id==cluster_id).\
        filter(PubSubEnqMsg.delivery_status==_initialized).\
        filter(PubSubEnqMsg.pub_msg_id==PubSubMessage.pub_msg_id).\
        filter(PubSubMessage.expiration_time>=utcnow_as_ms()).\
        group_by(PubSubMessage.topic_id).\
        all()
Exemple #8
0
    def handle(self):

        sub_key = self.request.input.sub_key
        msg_id_list = self.request.input.msg_id_list

        if msg_id_list:
            with closing(self.odb.session()) as session:

                # Call SQL UPDATE ..
                acknowledge_delivery(session, self.server.cluster_id, sub_key,
                                     msg_id_list, utcnow_as_ms())

                # .. and confirm the transaction
                session.commit()
Exemple #9
0
    def run(self,
            default_sleep_time=0.1,
            _run_deliv_status=PUBSUB.RUN_DELIVERY_STATUS):
        logger.info('Starting delivery task for sub_key:`%s`', self.sub_key)

        try:
            while self.keep_running:
                if self._should_wake():

                    with self.delivery_lock:

                        # Update last run time to be able to wake up in time for the next delivery
                        self.last_run = utcnow_as_ms()

                        # Get the list of all message IDs for which delivery was successful,
                        # indicating whether all currently lined up messages have been
                        # successfully delivered.
                        result = self._run_delivery()

                        # On success, sleep for a moment because we have just run out of all messages.
                        if result == _run_deliv_status.OK:
                            continue

                        elif result == _run_deliv_status.NO_MSG:
                            sleep(default_sleep_time)

                        # Otherwise, sleep for a longer time because our endpoint must have returned an error.
                        # After this sleep, self._run_delivery will again attempt to deliver all messages
                        # we queued up. Note that we are the only delivery task for this sub_key  so when we sleep here
                        # for a moment, we do not block other deliveries.
                        else:
                            sleep_time = self.wait_sock_err if result == _run_deliv_status.SOCKET_ERROR else self.wait_non_sock_err
                            msg = 'Sleeping for {}s after `{}` in sub_key:`{}`'.format(
                                sleep_time, result, self.sub_key)
                            logger.warn(msg)
                            logger_zato.warn(msg)
                            sleep(sleep_time)

                else:

                    # Wait for our turn
                    sleep(default_sleep_time)

# ################################################################################################################################

        except Exception, e:
            error_msg = 'Exception in delivery task for sub_key:`%s`, e:`%s`'
            e_formatted = format_exc(e)
            logger.warn(error_msg, self.sub_key, e_formatted)
            logger_zato.warn(error_msg, self.sub_key, e_formatted)
Exemple #10
0
    def handle(self):
        input = self.request.input
        self.response.payload.msg_id = input.msg_id
        session = self.odb.session() if self._message_update_has_gd else None

        try:
            # Get that from its storage, no matter what it is
            item = self._get_item(input, session)

            if session and (not item):
                self.response.payload.found = False
                return

            item.data = input.data.encode('utf8')
            item.data_prefix = input.data[:self.pubsub.data_prefix_len].encode(
                'utf8')
            item.data_prefix_short = input.data[:self.pubsub.
                                                data_prefix_short_len].encode(
                                                    'utf8')
            item.size = len(input.data)
            item.expiration = get_expiration(self.cid, input)
            item.priority = get_priority(self.cid, input)

            item.msg_id = input.msg_id
            item.pub_correl_id = input.correl_id
            item.in_reply_to = input.in_reply_to
            item.mime_type = input.mime_type

            if item.expiration:
                if self.request.input.exp_from_now:
                    from_ = utcnow_as_ms()
                else:
                    from_ = item.pub_time
                item.expiration_time = from_ + (item.expiration / 1000.0)
            else:
                item.expiration_time = 'zzz'

            # Save data to its storage, SQL for GD and RAM for non-GD messages
            found = self._save_item(item, input, session)

            self.response.payload.found = found
            self.response.payload.size = item.size
            self.response.payload.expiration_time = datetime_from_ms(
                item.expiration_time *
                1000.0) if item.expiration_time else None
        finally:
            if session:
                session.close()
Exemple #11
0
    def handle(self):
        input = self.request.input
        input.require_any('sub_key', 'sub_key_list')

        # Support both on input but always pass on a list further on
        sub_key_list = [input.sub_key] if input.sub_key else input.sub_key_list

        # Response to return
        response = {}

        with closing(self.odb.session()) as session:
            for item in sub_key_list:
                response[item] = get_queue_depth_by_sub_key(
                    session, self.server.cluster_id, item, utcnow_as_ms())

        self.response.payload.queue_depth = response
Exemple #12
0
    def handle(self, _batch_size=PUBSUB.DEFAULT.GET_BATCH_SIZE):
        input = self.request.input
        batch_size = input.batch_size or _batch_size

        with closing(self.odb.session()) as session:
            msg_list = get_messages(session, self.server.cluster_id,
                                    input.sub_key, batch_size, utcnow_as_ms())

            for elem in msg_list:
                ext_pub_time = datetime_from_ms(
                    elem.ext_pub_time) if elem.ext_pub_time else None

                self.response.payload.append({
                    'msg_id':
                    elem.msg_id,
                    'correl_id':
                    elem.correl_id,
                    'in_reply_to':
                    elem.in_reply_to,
                    'priority':
                    elem.priority,
                    'size':
                    elem.size,
                    'data_format':
                    elem.data_format,
                    'mime_type':
                    elem.mime_type,
                    'data':
                    elem.data,
                    'expiration':
                    elem.expiration,
                    'expiration_time':
                    datetime_from_ms(elem.expiration_time),
                    'ext_client_id':
                    elem.ext_client_id,
                    'ext_pub_time':
                    ext_pub_time,
                    'topic_name':
                    elem.topic_name,
                    'recv_time':
                    datetime_from_ms(elem.recv_time),
                    'delivery_count':
                    elem.delivery_count,
                })

            # We need to commit the session because the underlying query issued SELECT FOR UPDATE
            session.commit()
Exemple #13
0
    def __init__(self, pubsub_tool, pubsub, sub_key, delivery_lock,
                 delivery_list, deliver_pubsub_msg,
                 confirm_pubsub_msg_delivered_cb, sub_config):
        self.keep_running = True
        self.pubsub_tool = pubsub_tool
        self.pubsub = pubsub
        self.sub_key = sub_key
        self.delivery_lock = delivery_lock
        self.delivery_list = delivery_list
        self.deliver_pubsub_msg = deliver_pubsub_msg
        self.confirm_pubsub_msg_delivered_cb = confirm_pubsub_msg_delivered_cb
        self.sub_config = sub_config
        self.topic_name = sub_config.topic_name
        self.wait_sock_err = float(self.sub_config.wait_sock_err)
        self.wait_non_sock_err = float(self.sub_config.wait_non_sock_err)
        self.last_run = utcnow_as_ms()
        self.delivery_interval = self.sub_config.task_delivery_interval / 1000.0
        self.delivery_max_retry = self.sub_config.delivery_max_retry
        self.previous_delivery_method = self.sub_config.delivery_method

        # This is a total of messages processed so far
        self.delivery_counter = 0

        # A list of messages that were requested to be deleted while a delivery was in progress,
        # checked before each delivery.
        self.delete_requested = []

        # This is a lock used for micro-operations such as changing or consulting the contents of self.delete_requested.
        self.interrupt_lock = RLock()

        # If self.wrap_in_list is True, messages will be always wrapped in a list,
        # even if there is only one message to send. Note that self.wrap_in_list will be False
        # only if both batch_size is 1 and wrap_one_msg_in_list is True.

        if self.sub_config.delivery_batch_size == 1:
            if self.sub_config.wrap_one_msg_in_list:
                self.wrap_in_list = True
            else:
                self.wrap_in_list = False

        # With batch_size > 1, we always send a list, no matter what.
        else:
            self.wrap_in_list = True

        spawn_greenlet(self.run)
Exemple #14
0
    def handle(self, _utcnow=datetime.utcnow):
        input = self.request.input

        with closing(self.odb.session()) as session:
            item = session.query(PubSubMessage).\
                filter(PubSubMessage.cluster_id==input.cluster_id).\
                filter(PubSubMessage.pub_msg_id==input.msg_id).\
                first()

            if not item:
                self.response.payload.found = False
                return

            item.data = input.data.encode('utf8')
            item.data_prefix = input.data[:self.pubsub.data_prefix_len].encode(
                'utf8')
            item.data_prefix_short = input.data[:self.pubsub.
                                                data_prefix_short_len].encode(
                                                    'utf8')
            item.size = len(input.data)
            item.expiration = get_expiration(self.cid, input)
            item.priority = get_priority(self.cid, input)

            item.pub_correl_id = input.correl_id
            item.in_reply_to = input.in_reply_to
            item.mime_type = input.mime_type

            if item.expiration:
                if self.request.input.exp_from_now:
                    from_ = utcnow_as_ms()
                else:
                    from_ = item.pub_time
                item.expiration_time = from_ + (item.expiration / 1000.0)
            else:
                item.expiration_time = None

            session.add(item)
            session.commit()

            self.response.payload.found = True
            self.response.payload.size = item.size
            self.response.payload.expiration_time = datetime_from_ms(
                item.expiration_time *
                1000.0) if item.expiration_time else None
Exemple #15
0
    def __init__(self, config, server_name, server_pid):
        self.config = config
        self.server_name = server_name
        self.server_pid = server_pid
        self.id = config.id
        self.name = config.name
        self.is_active = config.is_active
        self.is_internal = config.is_internal
        self.max_depth_gd = config.max_depth_gd
        self.max_depth_non_gd = config.max_depth_non_gd
        self.has_gd = config.has_gd
        self.depth_check_freq = config.depth_check_freq
        self.pub_buffer_size_gd = config.pub_buffer_size_gd
        self.task_delivery_interval = config.task_delivery_interval
        self.meta_store_frequency = config.meta_store_frequency
        self.event_log = EventLog('t.{}.{}.{}'.format(self.server_name,
                                                      self.server_pid,
                                                      self.name))
        self.set_hooks()

        # For now, task sync interval is the same for GD and non-GD messages
        # so we can arbitrarily pick the former to serve for both types of messages.
        self.task_sync_interval = config.task_sync_interval / 1000.0

        # How many messages have been published to this topic from current server,
        # i.e. this is not a global counter.
        self.msg_pub_counter = 0
        self.msg_pub_counter_gd = 0
        self.msg_pub_counter_non_gd = 0

        # When were subscribers last notified about messages from current server,
        # again, this is not a global counter.
        self.last_synced = utcnow_as_ms()

        # Flags to indicate if there has been a GD or non-GD message published for this topic
        # since the last time self.last_synced has been updated. They are changed through PubSub
        # with a lock for this topic held.
        self.sync_has_gd_msg = False
        self.sync_has_non_gd_msg = False

        # The last time a GD message was published to this topic
        self.gd_pub_time_max = None
Exemple #16
0
 def __init__(self):
     self.recv_time = utcnow_as_ms()
     self.server_name = None
     self.server_pid = None
     self.topic = None
     self.sub_key = None
     self.pub_msg_id = None
     self.pub_correl_id = None
     self.in_reply_to = None
     self.ext_client_id = None
     self.group_id = None
     self.position_in_group = None
     self.pub_time = None
     self.ext_pub_time = None
     self.data = ''
     self.data_prefix = ''
     self.data_prefix_short = ''
     self.mime_type = None
     self.priority = None
     self.expiration = None
     self.expiration_time = None
     self.has_gd = None
     self.delivery_status = None
     self.pub_pattern_matched = None
     self.sub_pattern_matched = {}
     self.size = None
     self.published_by_id = None
     self.topic_id = None
     self.is_in_sub_queue = None
     self.topic_name = None
     self.cluster_id = None
     self.delivery_count = 0
     self.pub_time_iso = None
     self.ext_pub_time_iso = None
     self.expiration_time_iso = None
     self.reply_to_sk = []
     self.deliver_to_sk = []
     self.user_ctx = None
     self.zato_ctx = None
     self.serialized = None  # May be set by hooks to provide an explicitly serialized output for this message
     setattr(self, GENERIC.ATTR_NAME,
             None)  # To make this class look more like an SQLAlchemy one
Exemple #17
0
    def handle(
        self,
        _msg='Cleaning up WSX pub/sub, channel:`%s`, now:`%s (%s)`, md:`%s`, ma:`%s` (%s)'
    ):

        # We receive a multi-line list of WSX channel name -> max timeout accepted on input
        config = parse_extra_into_dict(self.request.raw_request)

        with closing(self.odb.session()) as session:

            # Delete stale connections for each subscriber
            for channel_name, max_delta in config.items():

                # Input timeout is in minutes but timestamps in ODB are in seconds
                # so we convert the minutes to seconds, as expected by the database.
                max_delta = max_delta * 60

                # We compare everything using seconds
                now = utcnow_as_ms()

                # Laster interaction time for each connection must not be older than that many seconds ago
                max_allowed = now - max_delta

                now_as_iso = datetime_from_ms(now * 1000)
                max_allowed_as_iso = datetime_from_ms(max_allowed * 1000)

                self.logger.info(_msg, channel_name, now_as_iso, now,
                                 max_delta, max_allowed_as_iso, max_allowed)
                logger_pubsub.info(_msg, channel_name, now_as_iso, now,
                                   max_delta, max_allowed_as_iso, max_allowed)

                # Delete old connections for that channel
                session.execute(
                    SubscriptionDelete().\
                    where(SubscriptionTable.c.ws_channel_id==WSXChannelTable.c.id).\
                    where(WSXChannelTable.c.name==channel_name).\
                    where(SubscriptionTable.c.last_interaction_time < max_allowed)
                )

            # Commit all deletions
            session.commit()
Exemple #18
0
    def handle(self):

        input = self.request.input
        pubsub = self.server.worker_store.pubsub  # type: PubSub
        endpoint_id = input.endpoint_id

        # Will return publication pattern matched or raise an exception that we don't catch
        endpoint_id, pattern_matched = self.get_pattern_matched(
            endpoint_id, input)

        try:
            topic = pubsub.get_topic_by_name(input.topic_name)  # type: Topic
        except KeyError:
            raise NotFound(self.cid,
                           'No such topic `{}`'.format(input.topic_name))

        # We always count time in milliseconds since UNIX epoch
        now = utcnow_as_ms()

        # Get all subscribers for that topic from local worker store
        subscriptions_by_topic = pubsub.get_subscriptions_by_topic(topic.name)

        # If input.data is a list, it means that it is a list of messages, each of which has its own
        # metadata. Otherwise, it's a string to publish and other input parameters describe it.
        data_list = input.data_list if input.data_list else None

        # Input messages may contain a mix of GD and non-GD messages, and we need to extract them separately.
        msg_id_list, gd_msg_list, non_gd_msg_list = self._get_messages_from_data(
            topic, data_list, input, now, pattern_matched, endpoint_id,
            bool(subscriptions_by_topic))

        # Create a wrapper object for all the input data and metadata
        ctx = PubCtx(self.server.cluster_id, pubsub, topic, endpoint_id,
                     pubsub.get_endpoint_by_id(endpoint_id).name,
                     subscriptions_by_topic, msg_id_list, gd_msg_list,
                     non_gd_msg_list, pattern_matched,
                     input.get('ext_client_id'), False, now)

        # We have all the input data, publish the message(s) now
        self._publish(ctx)
Exemple #19
0
    def _subscribe_impl(self, ctx):
        """ Invoked by subclasses to subscribe callers using input pub/sub config context.
        """
        with self.lock('zato.pubsub.subscribe.%s' % (ctx.topic_name)):

            # Emit events about an upcoming subscription
            self.pubsub.emit_about_to_subscribe({
                'stage': 'sub.sk.1',
                'sub_key': ctx.sub_key
            })

            self.pubsub.emit_about_to_subscribe({
                'stage': 'init.ctx',
                'data': ctx
            })

            self.pubsub.emit_about_to_subscribe({
                'stage': 'sub.sk.2',
                'sub_key': ctx.sub_key
            })

            # Endpoint on whose behalf the subscription will be made
            endpoint = self.pubsub.get_endpoint_by_id(ctx.endpoint_id)

            # Event log
            self.pubsub.emit_in_subscribe_impl({
                'stage': 'endpoint',
                'data': endpoint,
            })

            self.pubsub.emit_about_to_subscribe({
                'stage': 'sub.sk.3',
                'sub_key': ctx.sub_key
            })

            with closing(self.odb.session()) as session:

                with session.no_autoflush:

                    # Non-WebSocket clients cannot subscribe to the same topic multiple times
                    if not ctx.ws_channel_id:

                        # Event log
                        self.pubsub.emit_in_subscribe_impl({
                            'stage':
                            'no_ctx_ws_channel_id',
                            'data':
                            ctx.ws_channel_id
                        })

                        self.pubsub.emit_about_to_subscribe({
                            'stage':
                            'sub.sk.4',
                            'sub_key':
                            ctx.sub_key
                        })

                        if has_subscription(session, ctx.cluster_id,
                                            ctx.topic.id, ctx.endpoint_id):

                            # Event log
                            self.pubsub.emit_in_subscribe_impl({
                                'stage': 'has_subscription',
                                'data': {
                                    'ctx.cluster_id': ctx.cluster_id,
                                    'ctx.topic_id': ctx.topic.id,
                                    'ctx.topic_id': ctx.endpoint_id,
                                }
                            })

                            raise PubSubSubscriptionExists(
                                self.cid,
                                'Endpoint `{}` is already subscribed to topic `{}`'
                                .format(endpoint.name, ctx.topic.name))

                    # Is it a WebSockets client?
                    is_wsx = bool(ctx.ws_channel_id)

                    self.pubsub.emit_about_to_subscribe({
                        'stage': 'sub.sk.5',
                        'sub_key': ctx.sub_key
                    })

                    ctx.creation_time = now = utcnow_as_ms()
                    sub_key = new_sub_key(self.endpoint_type,
                                          ctx.ext_client_id)

                    self.pubsub.emit_in_subscribe_impl({
                        'stage': 'new_sk_generated',
                        'data': {
                            'sub_key': sub_key,
                        }
                    })

                    # Event log
                    self.pubsub.emit_in_subscribe_impl({
                        'stage': 'before_add_subscription',
                        'data': {
                            'is_wsx': is_wsx,
                            'ctx.creation_time': ctx.creation_time,
                            'sub_key': sub_key,
                            'sub_sk':
                            sorted(self.pubsub.subscriptions_by_sub_key),
                        }
                    })

                    # Create a new subscription object and flush the session because the subscription's ID
                    # may be needed for the WSX subscription
                    ps_sub = add_subscription(session, ctx.cluster_id, sub_key,
                                              ctx)
                    session.flush()

                    # Event log
                    self.pubsub.emit_in_subscribe_impl({
                        'stage': 'after_add_subscription',
                        'data': {
                            'ctx.cluster_id': ctx.cluster_id,
                            'ps_sub': ps_sub.asdict(),
                            'sub_sk':
                            sorted(self.pubsub.subscriptions_by_sub_key),
                        }
                    })

                    # Common configuration for WSX and broker messages
                    sub_config = Bunch()
                    sub_config.topic_name = ctx.topic.name
                    sub_config.task_delivery_interval = ctx.topic.task_delivery_interval
                    sub_config.endpoint_name = endpoint.name
                    sub_config.endpoint_type = self.endpoint_type
                    sub_config.unsub_on_wsx_close = ctx.unsub_on_wsx_close
                    sub_config.ext_client_id = ctx.ext_client_id

                    for name in sub_broker_attrs:
                        sub_config[name] = getattr(ps_sub, name, None)

                    #
                    # At this point there may be several cases depending on whether there are already other subscriptions
                    # or messages in the topic.
                    #
                    # * If there are subscribers, then this method will not move any messages because the messages
                    #   will have been already moved to queues of other subscribers before we are called
                    #
                    # * If there are no subscribers but there are messages in the topic then this subscriber will become
                    #   the sole recipient of the messages (we don't have any intrinsic foreknowledge of when, if at all,
                    #   other subscribers can appear)
                    #
                    # * If there are no subscribers and no messages in the topic then this is a no-op
                    #

                    move_messages_to_sub_queue(session, ctx.cluster_id,
                                               ctx.topic.id, ctx.endpoint_id,
                                               ctx.sub_pattern_matched,
                                               sub_key, now)

                    # Subscription's ID is available only now, after the session was flushed
                    sub_config.id = ps_sub.id

                    # Update current server's pub/sub config
                    self.pubsub.add_subscription(sub_config)

                    if is_wsx:

                        # Event log
                        self.pubsub.emit_in_subscribe_impl({
                            'stage': 'before_wsx_sub',
                            'data': {
                                'is_wsx':
                                is_wsx,
                                'sub_sk':
                                sorted(self.pubsub.subscriptions_by_sub_key),
                            }
                        })

                        # This object persists across multiple WSX connections
                        wsx_sub = add_wsx_subscription(
                            session, ctx.cluster_id, ctx.is_internal, sub_key,
                            ctx.ext_client_id, ctx.ws_channel_id, ps_sub.id)

                        # Event log
                        self.pubsub.emit_in_subscribe_impl({
                            'stage': 'after_wsx_sub',
                            'data': {
                                'wsx_sub':
                                wsx_sub.asdict(),
                                'sub_sk':
                                sorted(self.pubsub.subscriptions_by_sub_key),
                            }
                        })

                        # This object will be transient - dropped each time a WSX client disconnects
                        self.pubsub.add_wsx_client_pubsub_keys(
                            session, ctx.sql_ws_client_id, sub_key,
                            ctx.ws_channel_name, ctx.ws_pub_client_id,
                            ctx.web_socket.get_peer_info_dict())

                        # Let the WebSocket connection object know that it should handle this particular sub_key
                        ctx.web_socket.pubsub_tool.add_sub_key(sub_key)

                    # Commit all changes
                    session.commit()

                    # Produce response
                    self.response.payload.sub_key = sub_key

                    if is_wsx:

                        # Let the pub/sub task know it can fetch any messages possibly enqueued for that subscriber,
                        # note that since this is a new subscription, it is certain that only GD messages may be available,
                        # never non-GD ones.
                        ctx.web_socket.pubsub_tool.enqueue_gd_messages_by_sub_key(
                            sub_key)

                        gd_depth, non_gd_depth = ctx.web_socket.pubsub_tool.get_queue_depth(
                            sub_key)
                        self.response.payload.queue_depth = gd_depth + non_gd_depth
                    else:

                        # TODO:
                        # This should be read from that client's delivery task instead of SQL so as to include
                        # non-GD messages too.

                        self.response.payload.queue_depth = get_queue_depth_by_sub_key(
                            session, ctx.cluster_id, sub_key, now)

                # Notify workers of a new subscription
                sub_config.action = BROKER_MSG_PUBSUB.SUBSCRIPTION_CREATE.value

                # Append information about current server which will let all workers
                # know if they should create a subscription object (if they are different) or not.
                sub_config.server_receiving_subscription_id = self.server.id
                sub_config.server_receiving_subscription_pid = self.server.pid
                sub_config.is_api_call = True

                logger_pubsub.info('Subscription created `%s`', sub_config)

                self.broker_client.publish(sub_config)
Exemple #20
0
 def _cleanup(self, session):
     total = delete_msg_expired(session, self.server.cluster_id, None,
                                utcnow_as_ms())
     return total, 'expired'
Exemple #21
0
 def _cleanup(self, session):
     total = delete_msg_delivered(session, self.server.cluster_id,
                                  utcnow_as_ms())
     return total, 'delivered (from topic)'
Exemple #22
0
    def handle(self):

        input = self.request.input
        pubsub = self.server.worker_store.pubsub  # type: PubSub
        endpoint_id = input.endpoint_id

        # Will return publication pattern matched or raise an exception that we don't catch
        endpoint_id, pub_pattern_matched = self.get_pub_pattern_matched(
            endpoint_id, input)

        try:
            topic = pubsub.get_topic_by_name(input.topic_name)  # type: Topic
        except KeyError:
            raise NotFound(self.cid,
                           'No such topic `{}`'.format(input.topic_name))

        # Reject the message is topic is not active
        if not topic.is_active:
            raise ServiceUnavailable(
                self.cid, 'Topic is inactive `{}`'.format(input.topic_name))

        # We always count time in milliseconds since UNIX epoch
        now = utcnow_as_ms()

        # Get all subscribers for that topic from local worker store
        all_subscriptions_by_topic = pubsub.get_subscriptions_by_topic(
            topic.name)
        len_all_sub = len(all_subscriptions_by_topic)

        # If we are to deliver the message(s) to only selected subscribers only,
        # filter out any unwated ones first.
        if input.deliver_to_sk:

            has_all = False
            subscriptions_by_topic = []

            # Get any matching subscriptions out of the whole set
            for sub in all_subscriptions_by_topic:
                if sub.sub_key in input.deliver_to_sk:
                    subscriptions_by_topic.append(sub)

        else:
            # We deliver this message to all of the topic's subscribers
            has_all = True
            subscriptions_by_topic = all_subscriptions_by_topic

        # This is only for logging purposes
        _subs_found = []

        # Assume that there are no missing servers for WSX clients by default
        has_wsx_no_server = False

        for sub in subscriptions_by_topic:

            # Prepare data for logging
            _subs_found.append({sub.sub_key: sub.sub_pattern_matched})

            # Is there at least one WSX subscriber to this topic that is currently not connected?
            # If so, later on we will need to turn all the messages into GD ones.
            sk_server = self.pubsub.get_sub_key_server(sub.sub_key)
            if not sk_server:
                if has_logger_pubsub_debug:
                    logger_pubsub.debug(
                        'No sk_server for sub_key `%s` among `%s`',
                        sub.sub_key,
                        sorted(self.pubsub.sub_key_servers.keys()))
                has_wsx_no_server = True  # We have found at least one WSX subscriber that has no server = it is not connected

        logger_pubsub.info(
            'Subscriptions for topic `%s` `%s` (a:%d, %d/%d, cid:%s)',
            topic.name, _subs_found, has_all, len(subscriptions_by_topic),
            len_all_sub, self.cid)

        # If input.data is a list, it means that it is a list of messages, each of which has its own
        # metadata. Otherwise, it's a string to publish and other input parameters describe it.
        data_list = input.data_list if input.data_list else None

        # Input messages may contain a mix of GD and non-GD messages, and we need to extract them separately.
        msg_id_list, gd_msg_list, non_gd_msg_list = self._get_messages_from_data(
            topic, data_list, input, now, pub_pattern_matched, endpoint_id,
            subscriptions_by_topic, has_wsx_no_server,
            input.get('reply_to_sk', None))

        # Create a wrapper object for all the input data and metadata
        ctx = PubCtx(self.server.cluster_id, pubsub, topic, endpoint_id,
                     pubsub.get_endpoint_by_id(endpoint_id).name,
                     subscriptions_by_topic, msg_id_list, gd_msg_list,
                     non_gd_msg_list, pub_pattern_matched,
                     input.get('ext_client_id'), False, now)

        # We have all the input data, publish the message(s) now
        self._publish(ctx)
Exemple #23
0
    def _get_delete_messages_by_sub_keys(self, topic_id, sub_keys, delete_msg=True, delete_sub=False):
        """ Low-level implementation of retrieve_messages_by_sub_keys which must be called with self.lock held.
        """
        now = utcnow_as_ms() # We cannot return expired messages
        msg_seen = set() # We cannot have duplicates on output
        out = []

        # A list of messages that will be optionally deleted before they are returned
        to_delete_msg = set()

        # First, collect data for all sub_keys ..
        for sub_key in sub_keys:

            for msg_id in self.sub_key_to_msg_id.get(sub_key, []):

                # We already had this message marked for output
                if msg_id in msg_seen:
                    continue
                else:
                    # Mark as already seen
                    msg_seen.add(msg_id)

                    # Filter out expired messages
                    msg = self.msg_id_to_msg.get(msg_id)
                    if not msg:
                        logger.warn('Msg `%s` not found in self.msg_id_to_msg', msg_id)
                        continue
                    if now >= msg['expiration_time']:
                        continue
                    else:
                        out.append(self.msg_id_to_msg[msg_id])

                if delete_msg:
                    to_delete_msg.add(msg_id)

        # Explicitly delete a left-over name from the loop above
        del sub_key

        # Delete all messages marked to be deleted ..
        for msg_id in to_delete_msg:

            # .. first, direct mappings ..
            self.msg_id_to_msg.pop(msg_id, None)

            logger.info('Deleting msg from mapping dict `%s`, before:`%s`', msg_id, self.msg_id_to_msg)

            # .. now, remove the message from topic ..
            self.topic_msg_id[topic_id].remove(msg_id)

            logger.info('Deleting msg from mapping topic `%s`, after:`%s`', msg_id, self.topic_msg_id)

            # .. now, find the message for each sub_key ..
            for sub_key in sub_keys:
                sub_key_to_msg_id = self.sub_key_to_msg_id.get(sub_key)

                # We need this if statement because it is possible that a client is subscribed to a topic
                # but it will not receive a particular message. This is possible if the message is a response
                # to a previous request and the latter used reply_to_sk, in which case only that one sub_key pointed to
                # by reply_to_sk will get the response, which ultimately means that self.sub_key_to_msg_id
                # will not have this response for current sub_key.
                if sub_key_to_msg_id:

                    # .. delete the message itself - but we need to catch ValueError because
                    # to_delete_msg is a list of all messages to be deleted and we do not know
                    # if this particular message belonged to this particular sub_key or not.
                    try:
                        sub_key_to_msg_id.remove(msg_id)
                    except KeyError:
                        pass # OK, message was not found for this sub_key

                    # .. now delete the sub_key either because we are explicitly told to (e.g. during unsubscribe)
                    if delete_sub:# or (not sub_key_to_msg_id):
                        del self.sub_key_to_msg_id[sub_key]

        return out
Exemple #24
0
def move_messages_to_sub_queue(session, cluster_id, topic_id, endpoint_id, sub_pattern_matched, sub_key, pub_time_max,
    _initialized=_initialized):
    """ Move all unexpired messages from topic to a given subscriber's queue. This method must be called with a global lock
    held for topic because it carries out its job through a couple of non-atomic queries.
    """
    enqueued_id_subquery = session.query(
        PubSubEndpointEnqueuedMessage.pub_msg_id
        ).\
        filter(PubSubEndpointEnqueuedMessage.sub_key==sub_key)

    now = utcnow_as_ms()

    # SELECT statement used by the INSERT below finds all messages for that topic
    # that haven't expired yet.
    select_messages = session.query(
        PubSubMessage.pub_msg_id,
        PubSubMessage.topic_id,
        expr.bindparam('creation_time', now),
        expr.bindparam('endpoint_id', endpoint_id),
        expr.bindparam('sub_pattern_matched', sub_pattern_matched),
        expr.bindparam('sub_key', sub_key),
        expr.bindparam('is_in_staging', False),
        expr.bindparam('cluster_id', cluster_id),
        ).\
        filter(PubSubMessage.topic_id==topic_id).\
        filter(PubSubMessage.cluster_id==cluster_id).\
        filter(PubSubMessage.expiration_time > pub_time_max).\
        filter(~PubSubMessage.is_in_sub_queue).\
        filter(PubSubMessage.pub_msg_id.notin_(enqueued_id_subquery))

    # All message IDs that are available in topic for that subscriber, if there are any at all.
    # In theory, it is not required to pull all the messages to build the list in Python, but this is a relatively
    # efficient operation because there won't be that many data returned yet it allows us to make sure
    # the INSERT and UPDATE below are issued only if truly needed.
    msg_ids = [elem.pub_msg_id for elem in select_messages.all()]

    if msg_ids:

        # INSERT references to topic's messages in the subscriber's queue.
        insert_messages = insert(PubSubEndpointEnqueuedMessage).\
            from_select((
                PubSubEndpointEnqueuedMessage.pub_msg_id,
                PubSubEndpointEnqueuedMessage.topic_id,
                expr.column('creation_time'),
                expr.column('endpoint_id'),
                expr.column('sub_pattern_matched'),
                expr.column('sub_key'),
                expr.column('is_in_staging'),
                expr.column('cluster_id'),
                ), select_messages)

        # Move messages to subscriber's queue
        session.execute(insert_messages)

        # Indicate that all the messages are being delivered to the subscriber which means that no other
        # subscriber will ever receive them. Note that we are changing the status only for the messages pertaining
        # to the current subscriber without ever touching messages reiceved by any other one.

        session.execute(
            update(MsgTable).\
            values({
                'is_in_sub_queue': True,
                }).\
            where(and_(
                MsgTable.c.pub_msg_id.in_(msg_ids),
                ~MsgTable.c.is_in_sub_queue
            ))
        )
Exemple #25
0
    def _handle_new_messages(self, ctx, delta=60):
        """ A callback invoked when there is at least one new message to be handled for input sub_keys.
        If has_gd is True, it means that at least one GD message available. If non_gd_msg_list is not empty,
        it is a list of non-GD message for sub_keys.
        """
        session = None
        try:
            if ctx.has_gd:
                session = self.pubsub.server.odb.session()
            else:
                if not ctx.non_gd_msg_list:
                    # This is an unusual situation but not an erroneous one because it is possible
                    # that we were triggered to deliver messages that have already expired in the meantime,
                    # in which case we just log on info level rather than warn.
                    logger.info('No messages received ({}) for cid:`{}`, has_gd:`{}` and sub_key_list:`{}`'.format(
                        ctx.non_gd_msg_list, ctx.cid, ctx.has_gd, ctx.sub_key_list))
                    return

            logger.info('Handle new messages, cid:%s, gd:%d, sub_keys:%s, len_non_gd:%d bg:%d',
                ctx.cid, int(ctx.has_gd), ctx.sub_key_list, len(ctx.non_gd_msg_list), ctx.is_bg_call)

            gd_msg_list = {}

            # We need to have the broad lock first to read in messages for all the sub keys
            with self.lock:

                # Get messages for all sub_keys on input and break them out by each sub_key separately,
                # provided that we have a flag indicating that there should be some GD messages around in the database.
                if ctx.has_gd:
                    for msg in self._fetch_gd_messages_by_sub_key_list(ctx.sub_key_list, ctx.pub_time_max, session):
                        _sk_msg_list = gd_msg_list.setdefault(msg.sub_key, [])
                        _sk_msg_list.append(msg)

                # Note how we substract delta seconds from current time - this is because
                # it is possible that there will be new messages enqueued in between our last
                # run and current time's generation - the difference will be likely just a few
                # milliseconds but to play it safe we use by default a generous slice of 60 seconds.
                # This is fine because any SQL queries depending on this value will also
                # include other filters such as delivery_status.
                new_now = utcnow_as_ms() - delta

                # Go over all sub_keys given on input and carry out all operations while holding a lock for each sub_key
                for sub_key in ctx.sub_key_list:

                    with self.sub_key_locks[sub_key]:

                        # Accept all input non-GD messages
                        if ctx.non_gd_msg_list:
                            self._add_non_gd_messages_by_sub_key(sub_key, ctx.non_gd_msg_list)

                        # Push all GD messages, if there are any at all for this sub_key
                        if ctx.has_gd and sub_key in gd_msg_list:

                            topic_name = self.pubsub.get_topic_name_by_sub_key(sub_key)
                            self._push_gd_messages_by_sub_key(sub_key, topic_name, gd_msg_list[sub_key])

                            self.last_gd_run[sub_key] = new_now

                            logger.info('Storing last_gd_run of `%r` for sub_key:%s (d:%s)', new_now, sub_key, delta)

        except Exception:
            e = format_exc()
            logger.warn(e)
            logger_zato.warn(e)

        finally:
            if session:
                session.commit()
                session.close()
Exemple #26
0
def move_messages_to_sub_queue(session,
                               cluster_id,
                               topic_id,
                               endpoint_id,
                               sub_key,
                               pub_time_max,
                               _initialized=_initialized):
    """ Move all unexpired messages from topic to a given subscriber's queue and returns the number of messages moved.
    This method must be called with a global lock held for topic because it carries out its job through a couple
    of non-atomic queries.
    """
    enqueued_id_subquery = session.query(
        PubSubEndpointEnqueuedMessage.pub_msg_id
        ).\
        filter(PubSubEndpointEnqueuedMessage.sub_key==sub_key)

    now = utcnow_as_ms()

    # SELECT statement used by the INSERT below finds all messages for that topic
    # that haven't expired yet.
    select_messages = session.query(
        PubSubMessage.pub_msg_id,
        PubSubMessage.topic_id,
        expr.bindparam('creation_time', now),
        expr.bindparam('endpoint_id', endpoint_id),
        expr.bindparam('sub_key', sub_key),
        expr.bindparam('is_in_staging', False),
        expr.bindparam('cluster_id', cluster_id),
        ).\
        filter(PubSubMessage.topic_id==topic_id).\
        filter(PubSubMessage.cluster_id==cluster_id).\
        filter(PubSubMessage.expiration_time > pub_time_max).\
        filter(~PubSubMessage.is_in_sub_queue).\
        filter(PubSubMessage.pub_msg_id.notin_(enqueued_id_subquery))

    # INSERT references to topic's messages in the subscriber's queue.
    insert_messages = insert(PubSubEndpointEnqueuedMessage).\
        from_select((
            PubSubEndpointEnqueuedMessage.pub_msg_id,
            PubSubEndpointEnqueuedMessage.topic_id,
            expr.column('creation_time'),
            expr.column('endpoint_id'),
            expr.column('sub_key'),
            expr.column('is_in_staging'),
            expr.column('cluster_id'),
            ), select_messages)

    # Move messages to subscriber's queue
    session.execute(insert_messages)

    # Indicate that all the messages are being delivered to the subscriber which means that no other
    # subscriber will ever receive them. Note that we changing the status only for the messages pertaining
    # to the current subscriber without ever touching messages reiceved by any other one.

    msg_ids = get_sql_msg_ids_by_sub_key(session, cluster_id, sub_key, None,
                                         pub_time_max)

    session.execute(
        update(MsgTable).\
        values({
            'is_in_sub_queue': True,
            }).\
        where(and_(
            MsgTable.c.pub_msg_id.in_(msg_ids),
            ~MsgTable.c.is_in_sub_queue
        ))
    )
Exemple #27
0
    def handle(self):

        input = self.request.input
        pubsub = self.server.worker_store.pubsub  # type: PubSub
        endpoint_id = input.endpoint_id

        # Will return publication pattern matched or raise an exception that we don't catch
        endpoint_id, pattern_matched = self.get_pattern_matched(
            endpoint_id, input)

        try:
            topic = pubsub.get_topic_by_name(input.topic_name)  # type: Topic
        except KeyError:
            raise NotFound(self.cid,
                           'No such topic `{}`'.format(input.topic_name))

        # We always count time in milliseconds since UNIX epoch
        now = utcnow_as_ms()

        # If input.data is a list, it means that it is a list of messages, each of which has its own
        # metadata. Otherwise, it's a string to publish and other input parameters describe it.
        data_list = input.data_list if input.data_list else None

        # Input messages may contain a mix of GD and non-GD messages, and we need to extract them separately.
        msg_id_list, gd_msg_list, non_gd_msg_list = self._get_messages_from_data(
            topic, data_list, input, now, pattern_matched, endpoint_id)

        len_gd_msg_list = len(gd_msg_list)
        has_gd_msg_list = bool(len_gd_msg_list)

        # Get all subscribers for that topic from local worker store
        subscriptions_by_topic = pubsub.get_subscriptions_by_topic(
            input.topic_name)

        # Just so it is not overlooked, log information that no subscribers are found for this topic
        if not subscriptions_by_topic:
            self.logger.warn('No subscribers found for topic `%s`',
                             input.topic_name)

        # Local aliases
        cluster_id = self.server.cluster_id
        has_pubsub_audit_log = self.server.has_pubsub_audit_log

        # This is initially unknown and will be set only for GD messages
        current_depth = 'n/a'

        # We don't always have GD messages on input so there is no point in running an SQL transaction otherwise.
        if has_gd_msg_list:

            # Operate under a global lock for that topic to rule out any interference from other publishers
            with self.lock('zato.pubsub.publish.%s' % input.topic_name):

                with closing(self.odb.session()) as session:

                    # Abort if max depth is already reached but check first if we should check the depth in this iteration.
                    topic.incr_gd_depth_check()

                    if topic.needs_gd_depth_check():

                        # Get current depth of this topic
                        current_depth = get_topic_depth(
                            session, cluster_id, topic.id)

                        if current_depth + len_gd_msg_list > topic.max_depth_gd:
                            raise ServiceUnavailable(
                                self.cid,
                                'Publication rejected - would have exceeded max depth for `{}`'
                                .format(topic.name))
                        else:

                            # This only updates the local variable
                            current_depth = current_depth + len_gd_msg_list

                    # This updates data in SQL
                    incr_topic_depth(session, cluster_id, topic.id, now,
                                     len_gd_msg_list)

                    # Publish messages - INSERT rows, each representing an individual message
                    insert_topic_messages(session, self.cid, gd_msg_list)

                    # Move messages to each subscriber's queue
                    if subscriptions_by_topic:
                        insert_queue_messages(session, cluster_id,
                                              subscriptions_by_topic,
                                              gd_msg_list, topic.id, now)

                    # Run an SQL commit for all queries above
                    session.commit()

                    # Update metadata in background
                    spawn(self._update_pub_metadata, cluster_id, topic.id,
                          endpoint_id, now, gd_msg_list, pattern_matched)

        # Either commit succeeded or there were no GD messages on input but in both cases we can now,
        # optionally, store data in pub/sub audit log.
        if has_pubsub_audit_log:

            msg = 'PUB. CID:`%s`, topic:`%s`, from:`%s`, ext_client_id:`%s`, pattern:`%s`, new_depth:`%s`' \
                  ', GD data:`%s`, non-GD data:`%s`'

            logger_audit.info(msg, self.cid, topic.name,
                              self.pubsub.endpoints[endpoint_id].name,
                              input.get('ext_client_id') or 'n/a',
                              pattern_matched, current_depth, gd_msg_list,
                              non_gd_msg_list)

        # Also in background, notify pub/sub task runners that there are new messages for them
        if subscriptions_by_topic:

            # Do not notify anything if there are no messages available - this is possible because,
            # for instance, we had a list of messages on input but a hook service filtered them out.
            if non_gd_msg_list or has_gd_msg_list:
                self._notify_pubsub_tasks(topic.id, topic.name,
                                          subscriptions_by_topic,
                                          non_gd_msg_list, has_gd_msg_list)

        # Return either a single msg_id if there was only one message published or a list of message IDs,
        # one for each message published.
        len_msg_list = len_gd_msg_list + len(non_gd_msg_list)

        if len_msg_list == 1:
            self.response.payload.msg_id = msg_id_list[0]
        else:
            self.response.payload.msg_id_list = msg_id_list
Exemple #28
0
 def _cleanup(self, session):
     number = delete_expired(session, self.server.cluster_id,
                             utcnow_as_ms())
     return number, 'expired'
Exemple #29
0
    def _subscribe_impl(self, ctx):
        """ Invoked by subclasses to subscribe callers using input pub/sub config context.
        """
        with self.lock('zato.pubsub.subscribe.%s' % (ctx.topic_name)):

            with closing(self.odb.session()) as session:

                # Non-WebSocket clients cannot subscribe to the same topic multiple times
                if not ctx.ws_channel_id:
                    if has_subscription(session, ctx.cluster_id, ctx.topic.id,
                                        ctx.endpoint_id):
                        raise PubSubSubscriptionExists(
                            self.cid,
                            'Endpoint `{}` is already subscribed to topic `{}`'
                            .format(
                                self.pubsub.get_endpoint_by_id(
                                    ctx.endpoint_id).name, ctx.topic.name))

                # Is it a WebSockets client?
                has_wsx = bool(ctx.ws_channel_id)

                ctx.creation_time = now = utcnow_as_ms()
                ctx.sub_key = new_sub_key()

                # Create a new subscription object and flush the session because the subscription's ID
                # may be needed for the WSX subscription
                ps_sub = add_subscription(session, ctx.cluster_id, ctx)
                session.flush()

                # If we subscribe a WSX client, we need to create its accompanying SQL models
                if has_wsx:

                    # This object persists across multiple WSX connections
                    add_wsx_subscription(session, ctx.cluster_id,
                                         ctx.is_internal, ctx.sub_key,
                                         ctx.ext_client_id, ctx.ws_channel_id,
                                         ps_sub.id)

                    # This object will be transient - dropped each time a WSX client disconnects
                    self.pubsub.add_ws_client_pubsub_keys(
                        session, ctx.sql_ws_client_id, ctx.sub_key,
                        ctx.ws_channel_name, ctx.ws_pub_client_id)

                # Common configuration for WSX and broker messages
                sub_config = Bunch()
                sub_config.topic_name = ctx.topic.name
                sub_config.task_delivery_interval = ctx.topic.task_delivery_interval
                sub_config.endpoint_type = self.endpoint_type

                for name in sub_broker_attrs:
                    sub_config[name] = getattr(ps_sub, name, None)

                #
                # Move all available messages to that subscriber's queue. Note that we are operating under a global
                # lock for the topic, the same lock that publications work under, which means that at this point
                # there may be several cases depending on whether there are already other subscriptions
                # or messages in the topic.
                #
                # * If there are subscribers, then this method will not move any messages because the messages
                #   will have been already moved to queues of other subscribers before we are called under this lock
                #
                # * If there are no subscribers but there are messages in the topic then this subscriber will become
                #   the sole recipient of the messages (we don't have any intrinsic foreknowledge of when, if at all,
                #   other subscribers can appear)
                #
                # * If there are no subscribers and no messages in the topic then this is a no-op
                #
                move_messages_to_sub_queue(session, ctx.cluster_id,
                                           ctx.topic.id, ctx.endpoint_id,
                                           ctx.sub_key, now)

                # Subscription's ID is available only now, after the session was flushed
                sub_config.id = ps_sub.id

                # Update current server's pub/sub config
                self.pubsub.add_subscription(sub_config)

                if has_wsx:

                    # Let the WebSocket connection object know that it should handle this particular sub_key
                    ctx.web_socket.pubsub_tool.add_sub_key(ctx.sub_key)

                # Commit all changes
                session.commit()

                # Produce response
                self.response.payload.sub_key = ctx.sub_key

                if has_wsx:

                    # Let the pub/sub task know it can fetch any messages possibly enqueued for that subscriber,
                    # note that since this is a new subscription, it is certain that only GD messages may be available,
                    # never non-GD ones.
                    ctx.web_socket.pubsub_tool.enqueue_gd_messages_by_sub_key(
                        ctx.sub_key)

                    gd_depth, non_gd_depth = ctx.web_socket.pubsub_tool.get_queue_depth(
                        ctx.sub_key)
                    self.response.payload.queue_depth = gd_depth + non_gd_depth
                else:

                    # TODO:
                    # This should be read from that client's delivery task instead of SQL so as to include
                    # non-GD messages too.

                    self.response.payload.queue_depth = get_queue_depth_by_sub_key(
                        session, ctx.cluster_id, ctx.sub_key, now)

                # Notify workers of a new subscription
                sub_config.action = BROKER_MSG_PUBSUB.SUBSCRIPTION_CREATE.value
                sub_config.add_subscription = not ctx.ws_channel_id  # WSX clients already had their subscriptions created above

                self.broker_client.publish(sub_config)
Exemple #30
0
    def run(self, default_sleep_time=0.1, _status=PUBSUB.RUN_DELIVERY_STATUS, _notify_methods=_notify_methods):
        """ Runs the delivery task's main loop.
        """
        logger.info('Starting delivery task for sub_key:`%s` (%s, %s)',
            self.sub_key, self.topic_name, self.sub_config.delivery_method)

        #
        # Before starting anything, check if there are any messages already queued up in the database for this task.
        # This may happen, for instance, if:
        #
        # * Our delivery_method is `pull`
        # * Some messages get published to topic but the subscribers never gets them
        # * Our server is restarted
        # * The server is ultimately brought up and we need to find these messages that were previously
        #   published but never delivered
        #
        # Since this is about messages taken from the database, by definition, all of them they must be GD ones.
        #
        self.pubsub_tool.enqueue_initial_messages(self.sub_key, self.topic_name, self.sub_config.endpoint_name)

        try:
            while self.keep_running:

                # We are a task that does not notify endpoints of nothing - they will query us themselves
                # so in such a case we can sleep for a while and repeat the loop - perhaps in the meantime
                # someone will change delivery_method to one that allows for notifications to be sent.
                # If not, we will be simply looping forever, checking periodically
                # if we can send notifications already.

                # Apparently, our delivery method has changed since the last time our self.sub_config
                # was modified, so we can log this fact and store it for later use.
                if self.sub_config.delivery_method != self.previous_delivery_method:
                    logger.info('Changed delivery_method from `%s` to `%s` for `%s` (%s -> %s)`',
                        self.previous_delivery_method, self.sub_config.delivery_method, self.sub_key,
                        self.topic_name, self.sub_config.endpoint_name)

                    # Our new value is now the last value too until potentially overridden at one point
                    self.previous_delivery_method = self.sub_config.delivery_method

                if self.sub_config.delivery_method not in _notify_methods:
                    sleep(5)
                    continue

                if self._should_wake():

                    with self.delivery_lock:

                        # Update last run time to be able to wake up in time for the next delivery
                        self.last_run = utcnow_as_ms()

                        # Get the list of all message IDs for which delivery was successful,
                        # indicating whether all currently lined up messages have been
                        # successfully delivered.
                        result = self.run_delivery()

                        # On success, sleep for a moment because we have just run out of all messages.
                        if result == _status.OK:
                            continue

                        elif result == _status.NO_MSG:
                            sleep(default_sleep_time)

                        # Otherwise, sleep for a longer time because our endpoint must have returned an error.
                        # After this sleep, self.run_delivery will again attempt to deliver all messages
                        # we queued up. Note that we are the only delivery task for this sub_key  so when we sleep here
                        # for a moment, we do not block other deliveries.
                        else:
                            sleep_time = self.wait_sock_err if result == _status.SOCKET_ERROR else self.wait_non_sock_err
                            msg = 'Sleeping for {}s after `{}` in sub_key:`{}`'.format(sleep_time, result, self.sub_key)
                            logger.warn(msg)
                            logger_zato.warn(msg)
                            sleep(sleep_time)

                else:

                    # Wait for our turn
                    sleep(default_sleep_time)

# ################################################################################################################################

        except Exception:
            error_msg = 'Exception in delivery task for sub_key:`%s`, e:`%s`'
            e_formatted = format_exc()
            logger.warn(error_msg, self.sub_key, e_formatted)
            logger_zato.warn(error_msg, self.sub_key, e_formatted)