Beispiel #1
0
 def prepareQueueMessageList(self, activity_tool, message_list):
   db = activity_tool.getSQLConnection()
   quote = db.string_literal
   def insert(reset_uid):
     values = self._insert_separator.join(values_list)
     del values_list[:]
     for _ in xrange(UID_ALLOCATION_TRY_COUNT):
       if reset_uid:
         reset_uid = False
         # Overflow will result into IntegrityError.
         db.query(b"SET @uid := %d" % getrandbits(UID_SAFE_BITSIZE))
       try:
         db.query(self._insert_template % (str2bytes(self.sql_table), values))
       except MySQLdb.IntegrityError as e:
         if e.args[0] != DUP_ENTRY:
           raise
         reset_uid = True
       else:
         break
     else:
       raise RuntimeError("Maximum retry for prepareQueueMessageList reached")
   i = 0
   reset_uid = True
   values_list = []
   max_payload = self._insert_max_payload
   sep_len = len(self._insert_separator)
   hasDependency = self._hasDependency
   for m in message_list:
     if m.is_registered:
       active_process_uid = m.active_process_uid
       date = m.activity_kw.get('at_date')
       row = b','.join((
         b'@uid+%d' % i,
         quote('/'.join(m.object_path)),
         b'NULL' if active_process_uid is None else str2bytes(str(active_process_uid)),
         b"UTC_TIMESTAMP(6)" if date is None else quote(render_datetime(date)),
         quote(m.method_id),
         b'-1' if hasDependency(m) else b'0',
         str2bytes(str(m.activity_kw.get('priority', 1))),
         str2bytes(str(m.activity_kw.get('node', 0))),
         quote(m.getGroupId()),
         quote(m.activity_kw.get('tag', b'')),
         quote(m.activity_kw.get('serialization_tag', b'')),
         quote(Message.dump(m))))
       i += 1
       n = sep_len + len(row)
       max_payload -= n
       if max_payload < 0:
         if values_list:
           insert(reset_uid)
           reset_uid = False
           max_payload = self._insert_max_payload - n
         else:
           raise ValueError("max_allowed_packet too small to insert message")
       values_list.append(row)
   if values_list:
     insert(reset_uid)
Beispiel #2
0
 def render(value, render_string):
   if isinstance(value, _SQLTEST_NO_QUOTE_TYPE_SET):
     return column_op + str2bytes(str(value))
   if isinstance(value, DateTime):
     value = render_datetime(value)
   if isinstance(value, basestring):
     return column_op + render_string(value)
   assert op == "=", value
   if value is None: # XXX: see comment in SQLBase._getMessageList
     return column + b" IS NULL"
   for x in value:
     return b"%s IN (%s)" % (column, str2bytes(', '.join(map(
       str if isinstance(x, _SQLTEST_NO_QUOTE_TYPE_SET) else
       render_datetime if isinstance(x, DateTime) else
       render_string, value))))
   return b"0"
Beispiel #3
0
def encodeKey(key):
  """
    Encode the key like 'Quoted Printable'.
  """
  # According to the memcached's protocol.txt, the key cannot contain
  # control characters and white spaces.
  return encodestring(str2bytes(key), True).replace(b'\n', b'').replace(b'\r', b'')
Beispiel #4
0
 def hasActivitySQL(self, quote, only_valid=False, only_invalid=False, **kw):
   where = [sqltest_dict[k](v, quote) for (k, v) in six.iteritems(kw) if v]
   if only_valid:
     where.append(b'processing_node > %d' % INVOKE_ERROR_STATE)
   if only_invalid:
     where.append(b'processing_node <= %d' % INVOKE_ERROR_STATE)
   return b"SELECT 1 FROM %s WHERE %s LIMIT 1" % (
     str2bytes(self.sql_table), b" AND ".join(where) or b"1")
Beispiel #5
0
 def _getMessageList(self, db, count=1000, src__=0, **kw):
   # XXX: Because most columns have NOT NULL constraint, conditions with None
   #      value should be ignored, instead of trying to render them
   #      (with comparisons with NULL).
   q = db.string_literal
   sql = b'\n  AND '.join(sqltest_dict[k](v, q) for k, v in six.iteritems(kw))
   sql = b"SELECT * FROM %s%s\nORDER BY priority, date, uid%s" % (
     str2bytes(self.sql_table),
     sql and b'\nWHERE ' + sql,
     b'' if count is None else b'\nLIMIT %d' % count,
   )
   return sql if src__ else Results(db.query(sql, max_rows=0))
Beispiel #6
0
 def getPriority(self, activity_tool, processing_node, node_set=None):
   if node_set is None:
     q = (b"SELECT 3*priority, date FROM %s"
       b" WHERE processing_node=0 AND date <= UTC_TIMESTAMP(6)"
       b" ORDER BY priority, date LIMIT 1" % str2bytes(self.sql_table))
   else:
     subquery = ("(SELECT 3*priority{} as effective_priority, date FROM %s"
       " WHERE {} AND processing_node=0 AND date <= UTC_TIMESTAMP(6)"
       " ORDER BY priority, date LIMIT 1)" % self.sql_table).format
     node = 'node=%s' % processing_node
     # "ALL" on all but one, to incur deduplication cost only once.
     # "UNION ALL" between the two naturally distinct sets.
     q = (b"SELECT * FROM (%s UNION ALL %s UNION %s%s) as t"
       b" ORDER BY effective_priority, date LIMIT 1" % (
         str2bytes(subquery(-1, node)),
         str2bytes(subquery('', 'node=0')),
         str2bytes(subquery('+IF(node, IF(%s, -1, 1), 0)' % node, 'node>=0')),
         b' UNION ALL ' + str2bytes(subquery(-1, 'node IN (%s)' % ','.join(map(str, node_set)))) if node_set else b'',
       ))
   result = activity_tool.getSQLConnection().query(q, 0)[1]
   if result:
     return result[0]
   return Queue.getPriority(self, activity_tool, processing_node, node_set)
Beispiel #7
0
 def initialize(self, activity_tool, clear):
   db = activity_tool.getSQLConnection()
   create = self.createTableSQL()
   if clear:
     db.query("DROP TABLE IF EXISTS " + self.sql_table)
     db.query(create)
   else:
     src = db.upgradeSchema(create, create_if_not_exists=1,
                                    initialize=self._initialize)
     if src:
       LOG('CMFActivity', INFO, "%r table upgraded\n%s"
           % (self.sql_table, src))
   self._insert_max_payload = (db.getMaxAllowedPacket()
     + len(self._insert_separator)
     - len(self._insert_template % (str2bytes(self.sql_table), b'')))
Beispiel #8
0
 def insert(reset_uid):
   values = self._insert_separator.join(values_list)
   del values_list[:]
   for _ in xrange(UID_ALLOCATION_TRY_COUNT):
     if reset_uid:
       reset_uid = False
       # Overflow will result into IntegrityError.
       db.query(b"SET @uid := %d" % getrandbits(UID_SAFE_BITSIZE))
     try:
       db.query(self._insert_template % (str2bytes(self.sql_table), values))
     except MySQLdb.IntegrityError as e:
       if e.args[0] != DUP_ENTRY:
         raise
       reset_uid = True
     else:
       break
   else:
     raise RuntimeError("Maximum retry for prepareQueueMessageList reached")
Beispiel #9
0
 def query(self, query_string, max_rows=1000):
     """Execute 'query_string' and return at most 'max_rows'."""
     self._use_TM and self._register()
     desc = None
     if not isinstance(query_string, bytes):
         query_string = str2bytes(query_string)
     # XXX deal with a typical mistake that the user appends
     # an unnecessary and rather harmful semicolon at the end.
     # Unfortunately, MySQLdb does not want to be graceful.
     if query_string[-1:] == b';':
         query_string = query_string[:-1]
     for qs in query_string.split(b'\0'):
         qs = qs.strip()
         if qs:
             select_match = match_select(qs)
             if select_match:
                 query_timeout = getTimeLeft()
                 if query_timeout is not None:
                     statement, select = select_match.groups()
                     if statement:
                         statement += b", max_statement_time=%f" % query_timeout
                     else:
                         statement = b"max_statement_time=%f" % query_timeout
                     qs = b"SET STATEMENT %s FOR SELECT %s" % (statement,
                                                               select)
                 if max_rows:
                     qs = b"%s LIMIT %d" % (qs, max_rows)
             c = self._query(qs)
             if c:
                 if desc is not None is not c.describe():
                     raise Exception(
                         'Multiple select schema are not allowed')
                 desc = c.describe()
                 result = c.fetch_row(max_rows)
     if desc is None:
         return (), ()
     get_def = self.defs.get
     items = [{
         'name': d[0],
         'type': get_def(d[1], "t"),
         'width': d[2],
         'null': d[6]
     } for d in desc]
     return items, result
Beispiel #10
0
def DA__call__(self, REQUEST=None, __ick__=None, src__=0, test__=0, **kw):
    """Call the database method

    The arguments to the method should be passed via keyword
    arguments, or in a single mapping object. If no arguments are
    given, and if the method was invoked through the Web, then the
    method will try to acquire and use the Web REQUEST object as
    the argument mapping.

    The returned value is a sequence of record objects.
    """
    __traceback_supplement__ = (SQLMethodTracebackSupplement, self)

    c = kw.pop("connection_id", None)
    #if c is not None:
      #LOG("DA", 300, "connection %s provided to %s" %(c, self.id))
    # patch: dynamic brain configuration
    zsql_brain = kw.pop('zsql_brain', None)
    # patch end


    if REQUEST is None:
        if kw: REQUEST=kw
        else:
            if hasattr(self, 'REQUEST'): REQUEST=self.REQUEST
            else: REQUEST={}

    # Patch to implement dynamic connection id
    # Connection id is retrieve from user preference
    if c is None:
      # XXX cleaner solution will be needed
      if not (self.connection_id in ('cmf_activity_sql_connection',
                                     'erp5_sql_transactionless_connection')
              or 'portal_catalog' in self.getPhysicalPath()):
        portal = self.getPortalObject()
        if 'portal_archives' in portal.__dict__:
          archive_id = portal.portal_preferences.getPreferredArchive()
          if archive_id:
            archive_id = archive_id.split('/')[-1]
            #LOG("DA__call__, archive_id 2", 300, archive_id)
            archive = portal.portal_archives._getOb(archive_id, None)
            if archive is not None:
              c = archive.getConnectionId()
              #LOG("DA call", INFO, "retrieved connection %s from preference" %(c,))

    if c is None:
      # connection hook
      c = self.connection_id
      # for backwards compatability
      hk = self.connection_hook
      # go get the connection hook and call it
      if hk: c = getattr(self, hk)()
    #LOG("DA__call__ connection", 300, c)
    try: dbc=getattr(self, c)
    except AttributeError:
        raise AttributeError(
            "The database connection <em>%s</em> cannot be found." % (
            c))

    try: DB__=dbc()
    except: raise DatabaseError(
        '%s is not connected to a database' % self.id)

    p = aq_parent(self) # None if no aq_parent

    argdata=self._argdata(REQUEST)
    argdata['sql_delimiter']='\0'
    argdata['sql_quote__']=dbc.sql_quote__

    security=getSecurityManager()
    security.addContext(self)
    try:
        query = str2bytes(self.template(p, **argdata))
    except TypeError as msg:
        msg = str(msg)
        if 'client' in msg:
            raise NameError("'client' may not be used as an "
                    "argument name in this context")
        raise
    finally:
        security.removeContext(self)

    if src__: return query

    if self.cache_time_ > 0 and self.max_cache_ > 0:
        result=self._cached_result(DB__, query, self.max_rows_, c)
    else:
      try:
#         if 'portal_ids' in query:
#           LOG("DA query", INFO, "query = %s" %(query,))
        result=DB__.query(query, self.max_rows_)
      except:
        LOG("DA call raise", ERROR, "DB = %s, c = %s, query = %s" %(DB__, c, query), error=True)
        raise

    # patch: dynamic brain configuration
    if zsql_brain is not None:
        try:
          class_file_, class_name_ = zsql_brain.rsplit('.', 1)
        except:
          #import pdb; pdb.post_mortem()
          raise
        brain = getBrain(class_file_, class_name_)
        # XXX remove this logging for performance
        LOG(__name__, INFO, "Using special brain: %r\n" % (brain,))
    else:
        brain = getBrain(self.class_file_, self.class_name_)

    if type(result) is type(''):
        f=BytesIO()
        f.write(result)
        f.seek(0)
        result=RDB.File(f,brain,p)
    else:
        result=Results(result, brain, p)
    columns=result._searchable_result_columns()
    if test__ and columns != self._col: self._col=columns

    # If run in test mode, return both the query and results so
    # that the template doesn't have to be rendered twice!
    if test__: return query, result

    return result
Beispiel #11
0
def unconvert(encoding,S):
    if encoding == 'base64':
        return base64.decodestring(S)
    else:
        return str2bytes(eval(b"'" + S.replace(b'\n', b'') + b"'"))
Beispiel #12
0
def save_global(self, tag, data):
    a = data[1]
    return save_put(self, GLOBAL + str2bytes(a['module']) + b'\n' +
                    str2bytes(a['name']) + b'\n', a)
Beispiel #13
0
def modifyRequest(self, req, resp):
    """Copies cookie-supplied credentials to the basic auth fields.

  Returns a flag indicating what the user is trying to do with
  cookies: ATTEMPT_NONE, ATTEMPT_LOGIN, or ATTEMPT_RESUME.  If
  cookie login is disabled for this request, raises
  CookieCrumblerDisabled.
  """
    enabled = (req.__class__ is HTTPRequest
               and req['REQUEST_METHOD'] in ('HEAD', 'GET', 'PUT', 'POST')
               and 'WEBDAV_SOURCE_PORT' not in req.environ)
    if enabled:
        req.post_traverse(balancer_cookie_hook, (self, req, resp))

        # attempt may contain information about an earlier attempt to
        # authenticate using a higher-up cookie crumbler within the
        # same request.
        attempt = getattr(req, '_cookie_auth', ATTEMPT_NONE)

        if attempt == ATTEMPT_NONE:
            if req._auth:
                # An auth header was provided and no cookie crumbler
                # created it.  The user must be using basic auth.
                enabled = False
            elif self.pw_cookie in req and self.name_cookie in req:
                # Attempt to log in and set cookies.
                attempt = ATTEMPT_LOGIN
                name = req[self.name_cookie]
                pw = req[self.pw_cookie]
                ac = bytes2str(
                    standard_b64encode(str2bytes('%s:%s' %
                                                 (name, pw))).rstrip())
                self._setAuthHeader(ac, req, resp)
                if req.get(self.persist_cookie, 0):
                    # Persist the user name (but not the pw or session)
                    expires = (DateTime() + 365).toZone('GMT').rfc822()
                    resp.setCookie(self.name_cookie,
                                   name,
                                   path=self.getCookiePath(),
                                   expires=expires)
                else:
                    # Expire the user name
                    resp.expireCookie(self.name_cookie,
                                      path=self.getCookiePath())
                method = self.getCookieMethod('setAuthCookie',
                                              self.defaultSetAuthCookie)
                method(resp, self.auth_cookie, quote(ac))
            elif self.auth_cookie in req:
                # Attempt to resume a session if the cookie is valid.
                # Copy __ac to the auth header.
                ac = unquote(req[self.auth_cookie])
                if ac and ac != 'deleted':
                    try:
                        standard_b64decode(str2bytes(ac))
                    except:
                        # Not a valid auth header.
                        pass
                    else:
                        attempt = ATTEMPT_RESUME
                        self._setAuthHeader(ac, req, resp)
                        self.delRequestVar(req, self.auth_cookie)
                        method = self.getCookieMethod('twiddleAuthCookie',
                                                      None)
                        if method is not None:
                            method(resp, self.auth_cookie, quote(ac))

    self.delRequestVar(req, self.auth_cookie)
    self.delRequestVar(req, self.name_cookie)
    self.delRequestVar(req, self.pw_cookie)
    if not enabled:
        raise CookieCrumblerDisabled
    req._cookie_auth = attempt
    return attempt
Beispiel #14
0
 def load(line):
     # getProcessableMessageList already fetch messages with the same
     # group_method_id, so what remains to be filtered on are path and
     # method_id.
     # XXX: What about tag ?
     path = line.path
     method_id = line.method_id
     key = path, method_id
     uid = line.uid
     original_uid = path_and_method_id_dict.get(key)
     if original_uid is None:
         sql_method_id = b" AND method_id = %s AND group_method_id = %s" % (
             quote(method_id), quote(line.group_method_id))
         m = Message.load(line.message, uid=uid, line=line)
         merge_parent = m.activity_kw.get('merge_parent')
         try:
             if merge_parent:
                 path_list = []
                 while merge_parent != path:
                     path = path.rsplit('/', 1)[0]
                     assert path
                     original_uid = path_and_method_id_dict.get(
                         (path, method_id))
                     if original_uid is not None:
                         return None, original_uid, [uid]
                     path_list.append(path)
                 uid_list = []
                 if path_list:
                     # Select parent messages.
                     result = Results(
                         db.query(
                             b"SELECT * FROM message"
                             b" WHERE processing_node IN (0, %d) AND path IN (%s)%s"
                             b" ORDER BY path LIMIT 1 FOR UPDATE" % (
                                 processing_node,
                                 b','.join(map(quote, path_list)),
                                 sql_method_id,
                             ), 0))
                     if result:  # found a parent
                         # mark child as duplicate
                         uid_list.append(uid)
                         # switch to parent
                         line = result[0]
                         key = line.path, method_id
                         uid = line.uid
                         m = Message.load(line.message,
                                          uid=uid,
                                          line=line)
                 # return unreserved similar children
                 path = line.path
                 result = db.query(
                     b"SELECT uid FROM message"
                     b" WHERE processing_node = 0 AND (path = %s OR path LIKE %s)"
                     b"%s FOR UPDATE" % (
                         quote(path),
                         quote(path.replace('_', r'\_') + '/%'),
                         str2bytes(sql_method_id),
                     ), 0)[1]
                 reserve_uid_list = [x for x, in result]
                 uid_list += reserve_uid_list
                 if not line.processing_node:
                     # reserve found parent
                     reserve_uid_list.append(uid)
             else:
                 # Select duplicates.
                 result = db.query(
                     b"SELECT uid FROM message"
                     b" WHERE processing_node = 0 AND path = %s%s FOR UPDATE"
                     % (
                         quote(path),
                         sql_method_id,
                     ), 0)[1]
                 reserve_uid_list = uid_list = [x for x, in result]
             if reserve_uid_list:
                 self.assignMessageList(db, processing_node,
                                        reserve_uid_list)
             else:
                 db.query("COMMIT")  # XXX: useful ?
         except:
             self._log(WARNING, 'Failed to reserve duplicates')
             db.query("ROLLBACK")
             raise
         if uid_list:
             self._log(TRACE,
                       'Reserved duplicate messages: %r' % uid_list)
         path_and_method_id_dict[key] = uid
         return m, uid, uid_list
     # We know that original_uid != uid because caller skips lines we returned
     # earlier.
     return None, original_uid, [uid]
Beispiel #15
0
  def getProcessableMessageList(self, activity_tool, processing_node,
                                node_family_id_list):
    """
      Always true:
        For each reserved message, delete redundant messages when it gets
        reserved (definitely lost, but they are expandable since redundant).

      - reserve a message
      - if this message has a group_method_id:
        - reserve a bunch of messages
        - until the total "cost" of the group goes over 1
          - get one message from the reserved bunch (this messages will be
            "needed")
          - update the total cost
        - unreserve "unneeded" messages
      - return still-reserved message list and a group_method_id

      If any error happens in above described process, try to unreserve all
      messages already reserved in that process.
      If it fails, complain loudly that some messages might still be in an
      unclean state.

      Returned values:
        4-tuple:
          - list of messages
          - group_method_id
          - uid_to_duplicate_uid_list_dict
    """
    db = activity_tool.getSQLConnection()
    now_date = getNow(db)
    uid_to_duplicate_uid_list_dict = {}
    try:
      while 1: # not a loop
        # Select messages that were either assigned manually or left
        # unprocessed after a shutdown. Most of the time, there's none.
        # To minimize the probability of deadlocks, we also COMMIT so that a
        # new transaction starts on the first 'FOR UPDATE' query, which is all
        # the more important as the current on started with getPriority().
        result = db.query(b"SELECT * FROM %s WHERE processing_node=%d"
          b" ORDER BY priority, date LIMIT 1\0COMMIT" % (
          str2bytes(self.sql_table), processing_node), 0)
        already_assigned = result[1]
        if already_assigned:
          result = Results(result)
        else:
          result = self.getReservedMessageList(db, now_date, processing_node,
                                               1, node_set=node_family_id_list)
          if not result:
            break
          # So reserved documents are properly released even if load raises.
          for line in result:
            uid_to_duplicate_uid_list_dict[line.uid] = []
        load = self.getProcessableMessageLoader(db, processing_node)
        m, uid, uid_list = load(result[0])
        message_list = [m]
        uid_to_duplicate_uid_list_dict[uid] = uid_list
        group_method_id = m.line.group_method_id
        if group_method_id[0] != '\0':
          # Count the number of objects to prevent too many objects.
          cost = m.getGroupMethodCost()
          assert 0 < cost <= 1, (self.sql_table, uid)
          count = m.getObjectCount(activity_tool)
          # this is heuristic (messages with same group_method_id
          # are likely to have the same group_method_cost)
          limit = int(1. / cost + 1 - count)
          if limit > 1: # <=> cost * count < 1
            cost *= count
            # Retrieve objects which have the same group method.
            result = iter(already_assigned
              and Results(db.query(b"SELECT * FROM %s"
                b" WHERE processing_node=%d AND group_method_id=%s"
                b" ORDER BY priority, date LIMIT %d" % (
                str2bytes(self.sql_table), processing_node,
                db.string_literal(group_method_id), limit), 0))
                # Do not optimize rare case: keep the code simple by not
                # adding more results from getReservedMessageList if the
                # limit is not reached.
              or self.getReservedMessageList(db, now_date, processing_node,
                limit, group_method_id, node_family_id_list))
            for line in result:
              if line.uid in uid_to_duplicate_uid_list_dict:
                continue
              m, uid, uid_list = load(line)
              if m is None:
                uid_to_duplicate_uid_list_dict[uid] += uid_list
                continue
              uid_to_duplicate_uid_list_dict[uid] = uid_list
              cost += m.getObjectCount(activity_tool) * \
                      m.getGroupMethodCost()
              message_list.append(m)
              if cost >= 1:
                # Unreserve extra messages as soon as possible.
                uid_list = [line.uid for line in result if line.uid != uid]
                if uid_list:
                  self.assignMessageList(db, 0, uid_list)
        return message_list, group_method_id, uid_to_duplicate_uid_list_dict
    except:
      self._log(WARNING, 'Exception while reserving messages.')
      if uid_to_duplicate_uid_list_dict:
        to_free_uid_list = ensure_list(uid_to_duplicate_uid_list_dict.keys())
        for uid_list in six.itervalues(uid_to_duplicate_uid_list_dict):
          to_free_uid_list += uid_list
        try:
          self.assignMessageList(db, 0, to_free_uid_list)
        except:
          self._log(ERROR, 'Failed to free messages: %r' % to_free_uid_list)
        else:
          if to_free_uid_list:
            self._log(TRACE, 'Freed messages %r' % to_free_uid_list)
      else:
        self._log(TRACE, '(no message was reserved)')
    return (), None, None
Beispiel #16
0
  def getReservedMessageList(self, db, date, processing_node, limit,
                             group_method_id=None, node_set=None):
    """
      Get and reserve a list of messages.
      limit
        Maximum number of messages to fetch.
        This number is not garanted to be reached, because of not enough
        messages being pending execution.
    """
    assert limit
    quote = db.string_literal
    query = db.query
    args = (str2bytes(self.sql_table), sqltest_dict['to_date'](date, quote),
            b' AND group_method_id=' + quote(group_method_id)
            if group_method_id else b'' , limit)

    # Note: Not all write accesses to our table are protected by this lock.
    # This lock is not here for data consistency reasons, but to avoid wasting
    # time on SQL deadlocks caused by the varied lock ordering chosen by the
    # database. These queries specifically seem to be extremely prone to such
    # deadlocks, so prevent them from attempting to run in parallel on a given
    # activity table.
    # If more accesses are found to cause a significant waste of time because
    # of deadlocks, then they should acquire such lock as well. But
    # preemptively applying such lock everywhere without checking the amount
    # of waste is unlikely to produce a net gain.
    # XXX: timeout may benefit from being tweaked, but one second seem like a
    # reasonable starting point.
    # XXX: locking could probably be skipped altogether on clusters with few
    # enough processing nodes, as there should be little deadlocks and the
    # tradeoff becomes unfavorable to explicit locks. What threshold to
    # choose ?
    with SQLLock(db, self.sql_table, timeout=1) as acquired:
      if not acquired:
        # This table is busy, check for work to do elsewhere
        return ()
      # Get reservable messages.
      # During normal operation, sorting by date (as last criteria) is fairer
      # for users and reduce the probability to do the same work several times
      # (think of an object that is modified several times in a short period of
      # time).
      if node_set is None:
        result = Results(query(
          b"SELECT * FROM %s WHERE processing_node=0 AND %s%s"
          b" ORDER BY priority, date LIMIT %d FOR UPDATE" % args, 0))
      else:
        # We'd like to write
        #   ORDER BY priority, IF(node, IF(node={node}, -1, 1), 0), date
        # but this makes indices inefficient.
        subquery = (b"(SELECT *, 3*priority%%s as effective_priority FROM %s"
          b" WHERE %%s AND processing_node=0 AND %s%s"
          b" ORDER BY priority, date LIMIT %d FOR UPDATE)" % args)
        node = b'node=%d' % processing_node
        result = Results(query(
          # "ALL" on all but one, to incur deduplication cost only once.
          # "UNION ALL" between the two naturally distinct sets.
          b"SELECT * FROM (%s UNION ALL %s UNION %s%s) as t"
          b" ORDER BY effective_priority, date LIMIT %d"% (
              subquery % (b'-1', node),
              subquery % (b'', b'node=0'),
              subquery % (b'+IF(node, IF(%s, -1, 1), 0)' % node, b'node>=0'),
              b' UNION ALL ' + subquery % (str2bytes(str(-1)), str2bytes('node IN (%s)' % ','.join(map(str, node_set)))) if node_set else b'',
              limit), 0))
      if result:
        # Reserve messages.
        uid_list = [x.uid for x in result]
        self.assignMessageList(db, processing_node, uid_list)
        self._log(TRACE, 'Reserved messages: %r' % uid_list)
        return result
    return ()
Beispiel #17
0
 def _getExecutableMessageSet(self, activity_tool, db, message_list):
   """
   Return, from given message list, the set of messages which have all their
   dependencies satisfied.
   """
   # Principle of operation:
   # For each dependency type used in given message list, find all messages
   # matching any of the dependency values used in given message list.
   # This provides the SQL database with structurally simple queries that it
   # should be able to optimise easily.
   # Further refinements:
   # - Any blocked message is ignored in further dendency type lookups (we
   #   already know it is blocked, no point in checking further).
   # - Test the most popular dependency types first, with the expectation
   #   that these will find most of the blockers, reducing the set of
   #   activities left to test and (with the refinement above) reducing the
   #   probability of having to run further queries (if there are other
   #   dependency types to test)
   dependency_tester_dict = _DEPENDENCY_TESTER_DICT
   # dependency_name (str): Something like 'serialization_tag', etc
   # dependency_value (any): dependency_name-dependent structure and meaning.
   # dependency_dict: define the dependencies to check, and which messages are
   # blocked by each found blocker.
   #   [dependency_name][dependency_value] -> message set
   dependency_dict = defaultdict(lambda: defaultdict(set))
   # message_dependency_dict: define which message has which dependencies, to
   # efficiently remove a message from dependency_dict once it is found to be
   # blocked.
   #   [message][dependency_name] -> dependency_value
   message_dependency_dict = defaultdict(dict)
   for message in message_list:
     for (
       dependency_name,
       dependency_value,
     ) in six.iteritems(message.activity_kw):
       try:
         column_list, _, _ = dependency_tester_dict[dependency_name]
       except KeyError:
         continue
       # There are 2 types of dependencies:
       # - monovalued (most), which accepts a single value and a vector of
       #   values.
       # - n-valued (after_path_and_method_id and after_tag_and_method_id)
       #   which accept a n-vector, each item being a single value or a vector
       #   of values.
       # Convert every form into its vector equivalent form, ignoring
       # conditions which cannot match any activity, and (for n-valued)
       # enumerate all possible combinations for later reverse-lookup.
       column_count = len(column_list)
       if column_count == 1:
         if dependency_value is None:
           continue
         dependency_value_list = [
           x
           for x in (
             (dependency_value, )
             if isinstance(
               dependency_value,
               _SQLTEST_NON_SEQUENCE_TYPE_SET,
             ) else
             dependency_value
           )
           # None values cannot match any activity, ignore them.
           if x is not None
         ]
       else:
         try:
           if (
             len(dependency_value) != column_count or
             None in dependency_value
           ):
             # Malformed or impossible to match dependency, ignore.
             continue
         except TypeError:
           # Malformed dependency, ignore.
           continue
         # Note: it any resulting item ends up empty (ex: it only contained
         # None), product will return an empty list.
         dependency_value_list = list(product(*(
           (
             (dependency_column_value, )
             if isinstance(
               dependency_column_value,
               _SQLTEST_NON_SEQUENCE_TYPE_SET,
             ) else
             (x for x in dependency_column_value if x is not None)
           )
           for dependency_column_value in dependency_value
         )))
       if not dependency_value_list:
         continue
       message_dependency_dict[message][dependency_name] = dependency_value_list
       dependency_value_dict = dependency_dict[dependency_name]
       for dependency_value in dependency_value_list:
         dependency_value_dict[dependency_value].add(message)
   # Messages are supposed valid until blockage is found.
   result = set(message_list)
   # Messages for which a blockage is found, so removal of this message from
   # further dependency processing is delayed to the next iteration, to avoid
   # doing such work if there is no such further iteration.
   new_blocked_message_set = set()
   quote = db.string_literal
   table_name_list = activity_tool.getSQLTableNameSet()
   for (
     dependency_name,
     dependency_value_dict,
   ) in sorted(
     six.iteritems(dependency_dict),
     # Test first the condition with the most values.
     # XXX: after_path=('foo', 'bar') counts as 2 points for after_path
     # despite being a single activity. Is there a fairer (while cheap) way ?
     key=lambda dependency_dict_item: sum(
       len(message_set)
       for message_set in six.itervalues(dependency_dict_item[1])
     ),
     reverse=True,
   ):
     # Previous iteration found blocked messages.
     # Find which activities, and remove their values from dependency_dict
     # so these activities are not tested in further queries (we already
     # know they are blocked).
     while new_blocked_message_set:
       blocked_message = new_blocked_message_set.pop()
       for (
         message_dependency_name,
         message_dependency_value_list,
       ) in six.iteritems(message_dependency_dict[blocked_message]):
         message_dependency_value_dict = dependency_dict[message_dependency_name]
         if not message_dependency_value_dict:
           # This dependency was already dropped or evaluated, nothing to
           # cleanup here.
           continue
         for message_dependency_value in message_dependency_value_list:
           message_set = message_dependency_value_dict[message_dependency_value]
           message_set.remove(blocked_message)
           if not message_set:
             # No more message wait for this value for this dependency, drop
             # the entry.
             del message_dependency_value_dict[message_dependency_value]
         # Note: no point in editing dependency_dict if
         # message_dependency_value_dict is empty, the outer loop is working
         # on a copy.
     if not dependency_value_dict:
       # No more non-blocked message for this dependency, skip it.
       continue
     column_list, to_sql, min_processing_node = dependency_tester_dict[
       dependency_name
     ]
     row2key = (
       _ITEMGETTER0
       if len(column_list) == 1 else
       _IDENTITY
     )
     base_sql_suffix = b' WHERE processing_node > %i AND (%%s) LIMIT 1)' % (
       min_processing_node,
     )
     sql_suffix_list = [
       base_sql_suffix % to_sql(dependency_value, quote)
       for dependency_value in dependency_value_dict
     ]
     base_sql_prefix = b'(SELECT %s FROM ' % (
       b','.join([ str2bytes(c) for c in column_list ]),
     )
     subquery_list = [
       base_sql_prefix + str2bytes(table_name) + sql_suffix
       for table_name in table_name_list
       for sql_suffix in sql_suffix_list
     ]
     while subquery_list:
       # Join queries with a UNION, to reduce per-query latency.
       # Also, limit the number of subqueries per query, as their number can
       # largely exceed the number of activities being considered multiplied
       # by the number of activty tables: it is also proportional to the
       # number of distinct values being looked for in the current column.
       for row in db.query(
         b' UNION '.join(subquery_list[_MAX_DEPENDENCY_UNION_SUBQUERY_COUNT:]),
         max_rows=0,
       )[1]:
         # Each row is a value which blocks some activities.
         dependent_message_set = dependency_value_dict[row2key(row)]
         # queue blocked messages for processing in the beginning of next
         # outermost iteration.
         new_blocked_message_set |= dependent_message_set
         # ...but update result immediately, in case there is no next
         # outermost iteration.
         result -= dependent_message_set
       del subquery_list[_MAX_DEPENDENCY_UNION_SUBQUERY_COUNT:]
     dependency_value_dict.clear()
   return result