Example #1
0
def bulk_get_streams(realm, stream_names):
    if isinstance(realm, Realm):
        realm_id = realm.id
    else:
        realm_id = realm

    def fetch_streams_by_name(stream_names):
        # This should be just
        #
        # Stream.objects.select_related("realm").filter(name__iexact__in=stream_names,
        #                                               realm_id=realm_id)
        #
        # But chaining __in and __iexact doesn't work with Django's
        # ORM, so we have the following hack to construct the relevant where clause
        if len(stream_names) == 0:
            return []
        upper_list = ", ".join(["UPPER(%s)"] * len(stream_names))
        where_clause = "UPPER(zerver_stream.name::text) IN (%s)" % (upper_list,)
        return get_active_streams(realm_id).select_related("realm").extra(
            where=[where_clause],
            params=stream_names)

    return generic_bulk_cached_fetch(lambda stream_name: get_stream_cache_key(stream_name, realm),
                                     fetch_streams_by_name,
                                     [stream_name.lower() for stream_name in stream_names],
                                     id_fetcher=lambda stream: stream.name.lower())
Example #2
0
def user_ids_to_users(user_ids: List[int], realm: Realm) -> List[UserProfile]:
    # TODO: Consider adding a flag to control whether deactivated
    # users should be included.

    def fetch_users_by_id(user_ids: List[int]) -> List[UserProfile]:
        if len(user_ids) == 0:
            return []

        return list(UserProfile.objects.filter(id__in=user_ids).select_related())

    user_profiles_by_id = generic_bulk_cached_fetch(
        cache_key_function=user_profile_by_id_cache_key,
        query_function=fetch_users_by_id,
        object_ids=user_ids
    )  # type: Dict[int, UserProfile]

    found_user_ids = user_profiles_by_id.keys()
    missed_user_ids = [user_id for user_id in user_ids if user_id not in found_user_ids]
    if missed_user_ids:
        raise JsonableError(_("Invalid user ID: %s" % (missed_user_ids[0])))

    user_profiles = list(user_profiles_by_id.values())
    for user_profile in user_profiles:
        if user_profile.realm != realm:
            raise JsonableError(_("Invalid user ID: %s" % (user_profile.id,)))
    return user_profiles
Example #3
0
def messages_for_ids(message_ids: List[int],
                     user_message_flags: Dict[int, List[str]],
                     search_fields: Dict[int, Dict[str, str]],
                     apply_markdown: bool,
                     client_gravatar: bool,
                     allow_edit_history: bool) -> List[Dict[str, Any]]:

    cache_transformer = MessageDict.build_dict_from_raw_db_row
    id_fetcher = lambda row: row['id']

    message_dicts = generic_bulk_cached_fetch(to_dict_cache_key_id,
                                              MessageDict.get_raw_db_rows,
                                              message_ids,
                                              id_fetcher=id_fetcher,
                                              cache_transformer=cache_transformer,
                                              extractor=extract_message_dict,
                                              setter=stringify_message_dict)

    message_list = []  # type: List[Dict[str, Any]]

    for message_id in message_ids:
        msg_dict = message_dicts[message_id]
        msg_dict.update({"flags": user_message_flags[message_id]})
        if message_id in search_fields:
            msg_dict.update(search_fields[message_id])
        # Make sure that we never send message edit history to clients
        # in realms with allow_edit_history disabled.
        if "edit_history" in msg_dict and not allow_edit_history:
            del msg_dict["edit_history"]
        message_list.append(msg_dict)

    MessageDict.post_process_dicts(message_list, apply_markdown, client_gravatar)

    return message_list
Example #4
0
def bulk_get_recipients(type, type_ids):
    def cache_key_function(type_id):
        return get_recipient_cache_key(type, type_id)
    def query_function(type_ids):
        return Recipient.objects.filter(type=type, type_id__in=type_ids)

    return generic_bulk_cached_fetch(cache_key_function, query_function, type_ids,
                                     id_fetcher=lambda recipient: recipient.type_id)
Example #5
0
def bulk_get_users(emails: List[str], realm: Optional[Realm],
                   base_query: 'QuerySet[UserProfile]'=None) -> Dict[str, UserProfile]:
    if base_query is None:
        assert realm is not None
        query = UserProfile.objects.filter(realm=realm, is_active=True)
        realm_id = realm.id
    else:
        # WARNING: Currently, this code path only really supports one
        # version of `base_query` being used (because otherwise,
        # they'll share the cache, which can screw up the filtering).
        # If you're using this flow, you'll need to re-do any filters
        # in base_query in the code itself; base_query is just a perf
        # optimization.
        query = base_query
        realm_id = 0

    def fetch_users_by_email(emails: List[str]) -> List[UserProfile]:
        # This should be just
        #
        # UserProfile.objects.select_related("realm").filter(email__iexact__in=emails,
        #                                                    realm=realm)
        #
        # But chaining __in and __iexact doesn't work with Django's
        # ORM, so we have the following hack to construct the relevant where clause
        if len(emails) == 0:
            return []

        upper_list = ", ".join(["UPPER(%s)"] * len(emails))
        where_clause = "UPPER(zerver_userprofile.email::text) IN (%s)" % (upper_list,)
        return query.select_related("realm").extra(
            where=[where_clause],
            params=emails)

    return generic_bulk_cached_fetch(
        # Use a separate cache key to protect us from conflicts with
        # the get_user cache.
        lambda email: 'bulk_get_users:' + user_profile_cache_key_id(email, realm_id),
        fetch_users_by_email,
        [email.lower() for email in emails],
        id_fetcher=lambda user_profile: user_profile.email.lower()
    )
Example #6
0
def bulk_get_users(emails: List[str], realm: Optional[Realm],
                   base_query: 'QuerySet[UserProfile]'=None) -> Dict[str, UserProfile]:
    if base_query is None:
        assert realm is not None
        query = UserProfile.objects.filter(realm=realm, is_active=True)
        realm_id = realm.id
    else:
        # WARNING: Currently, this code path only really supports one
        # version of `base_query` being used (because otherwise,
        # they'll share the cache, which can screw up the filtering).
        # If you're using this flow, you'll need to re-do any filters
        # in base_query in the code itself; base_query is just a perf
        # optimization.
        query = base_query
        realm_id = 0

    def fetch_users_by_email(emails: List[str]) -> List[UserProfile]:
        # This should be just
        #
        # UserProfile.objects.select_related("realm").filter(email__iexact__in=emails,
        #                                                    realm=realm)
        #
        # But chaining __in and __iexact doesn't work with Django's
        # ORM, so we have the following hack to construct the relevant where clause
        where_clause = "upper(zerver_userprofile.email::text) IN (SELECT upper(email) FROM unnest(%s) AS email)"
        return query.select_related("realm").extra(
            where=[where_clause],
            params=(emails,))

    def user_to_email(user_profile: UserProfile) -> str:
        return user_profile.email.lower()

    return generic_bulk_cached_fetch(
        # Use a separate cache key to protect us from conflicts with
        # the get_user cache.
        lambda email: 'bulk_get_users:' + user_profile_cache_key_id(email, realm_id),
        fetch_users_by_email,
        [email.lower() for email in emails],
        id_fetcher=user_to_email,
    )
Example #7
0
    def test_empty_object_ids_list(self) -> None:
        class CustomException(Exception):
            pass

        def cache_key_function(
            email: str
        ) -> str:  # nocoverage -- this is just here to make sure it's not called
            raise CustomException("The cache key function was called")

        def query_function(
            emails: List[str]
        ) -> List[
                UserProfile]:  # nocoverage -- this is just here to make sure it's not called
            raise CustomException("The query function was called")

        # query_function and cache_key_function shouldn't be called, because
        # objects_ids is empty, so there's nothing to do.
        result: Dict[str, UserProfile] = generic_bulk_cached_fetch(
            cache_key_function=cache_key_function,
            query_function=query_function,
            object_ids=[])
        self.assertEqual(result, {})
Example #8
0
def messages_for_ids(
    message_ids: List[int],
    user_message_flags: Dict[int, List[str]],
    search_fields: Dict[int, Dict[str, str]],
    apply_markdown: bool,
    client_gravatar: bool,
    allow_edit_history: bool,
) -> List[Dict[str, Any]]:

    cache_transformer = MessageDict.build_dict_from_raw_db_row
    id_fetcher = lambda row: row["id"]

    message_dicts = generic_bulk_cached_fetch(
        to_dict_cache_key_id,
        MessageDict.get_raw_db_rows,
        message_ids,
        id_fetcher=id_fetcher,
        cache_transformer=cache_transformer,
        extractor=extract_message_dict,
        setter=stringify_message_dict,
    )

    message_list: List[Dict[str, Any]] = []

    for message_id in message_ids:
        msg_dict = message_dicts[message_id]
        msg_dict.update(flags=user_message_flags[message_id])
        if message_id in search_fields:
            msg_dict.update(search_fields[message_id])
        # Make sure that we never send message edit history to clients
        # in realms with allow_edit_history disabled.
        if "edit_history" in msg_dict and not allow_edit_history:
            del msg_dict["edit_history"]
        message_list.append(msg_dict)

    MessageDict.post_process_dicts(message_list, apply_markdown,
                                   client_gravatar)

    return message_list
Example #9
0
File: users.py Project: zackw/zulip
def user_ids_to_users(user_ids: Sequence[int], realm: Realm) -> List[UserProfile]:
    # TODO: Consider adding a flag to control whether deactivated
    # users should be included.

    def fetch_users_by_id(user_ids: List[int]) -> List[UserProfile]:
        return list(UserProfile.objects.filter(id__in=user_ids).select_related())

    user_profiles_by_id: Dict[int, UserProfile] = generic_bulk_cached_fetch(
        cache_key_function=user_profile_by_id_cache_key,
        query_function=fetch_users_by_id,
        object_ids=user_ids,
    )

    found_user_ids = user_profiles_by_id.keys()
    missed_user_ids = [user_id for user_id in user_ids if user_id not in found_user_ids]
    if missed_user_ids:
        raise JsonableError(_("Invalid user ID: %s") % (missed_user_ids[0],))

    user_profiles = list(user_profiles_by_id.values())
    for user_profile in user_profiles:
        if user_profile.realm != realm:
            raise JsonableError(_("Invalid user ID: %s") % (user_profile.id,))
    return user_profiles
Example #10
0
def bulk_fetch_display_recipients(
    recipient_tuples: Set[Tuple[int, int,
                                int]]) -> Dict[int, DisplayRecipientT]:
    """
    Takes set of tuples of the form (recipient_id, recipient_type, recipient_type_id)
    Returns dict mapping recipient_id to corresponding display_recipient
    """

    # Build dict mapping recipient id to (type, type_id) of the corresponding recipient:
    recipient_id_to_type_pair_dict = {
        recipient[0]: (recipient[1], recipient[2])
        for recipient in recipient_tuples
    }
    # And the inverse mapping:
    type_pair_to_recipient_id_dict = {(recipient[1], recipient[2]):
                                      recipient[0]
                                      for recipient in recipient_tuples}

    stream_recipients = {
        recipient
        for recipient in recipient_tuples if recipient[1] == Recipient.STREAM
    }
    personal_and_huddle_recipients = recipient_tuples - stream_recipients

    def stream_query_function(
            recipient_ids: List[int]) -> List[TinyStreamResult]:
        stream_ids = [
            recipient_id_to_type_pair_dict[recipient_id][1]
            for recipient_id in recipient_ids
        ]
        return Stream.objects.filter(id__in=stream_ids).values('name', 'id')

    def stream_id_fetcher(stream: TinyStreamResult) -> int:
        return type_pair_to_recipient_id_dict[(Recipient.STREAM, stream['id'])]

    def stream_cache_transformer(stream: TinyStreamResult) -> str:
        return stream['name']

    # ItemT = Stream, CacheItemT = str (name), ObjKT = int (recipient_id)
    stream_display_recipients: Dict[int, str] = generic_bulk_cached_fetch(
        cache_key_function=display_recipient_cache_key,
        query_function=stream_query_function,
        object_ids=[recipient[0] for recipient in stream_recipients],
        id_fetcher=stream_id_fetcher,
        cache_transformer=stream_cache_transformer,
    )

    # Now we have to create display_recipients for personal and huddle messages.
    # We do this via generic_bulk_cached_fetch, supplying apprioprate functions to it.

    def personal_and_huddle_query_function(
        recipient_ids: List[int]
    ) -> List[Tuple[int, List[UserDisplayRecipient]]]:
        """
        Return a list of tuples of the form (recipient_id, [list of UserProfiles])
        where [list of UserProfiles] has users corresponding to the recipient,
        so the receiving userin Recipient.PERSONAL case,
        or in Personal.HUDDLE case - users in the huddle.
        This is a pretty hacky return value, but it needs to be in this form,
        for this function to work as the query_function in generic_bulk_cached_fetch.
        """

        recipients = [
            Recipient(id=recipient_id,
                      type=recipient_id_to_type_pair_dict[recipient_id][0],
                      type_id=recipient_id_to_type_pair_dict[recipient_id][1])
            for recipient_id in recipient_ids
        ]

        # Find all user ids whose UserProfiles we will need to fetch:
        user_ids_to_fetch: Set[int] = set()
        huddle_user_ids: Dict[int, List[int]] = {}
        huddle_user_ids = bulk_get_huddle_user_ids([
            recipient for recipient in recipients
            if recipient.type == Recipient.HUDDLE
        ])
        for recipient in recipients:
            if recipient.type == Recipient.PERSONAL:
                user_ids_to_fetch.add(recipient.type_id)
            else:
                user_ids_to_fetch = user_ids_to_fetch.union(
                    huddle_user_ids[recipient.id])

        # Fetch the needed UserProfiles:
        user_profiles: Dict[
            int, UserDisplayRecipient] = bulk_get_user_profile_by_id(
                list(user_ids_to_fetch))

        # Build the return value:
        result: List[Tuple[int, List[UserDisplayRecipient]]] = []
        for recipient in recipients:
            if recipient.type == Recipient.PERSONAL:
                result.append(
                    (recipient.id, [user_profiles[recipient.type_id]]))
            else:
                result.append((recipient.id, [
                    user_profiles[user_id]
                    for user_id in huddle_user_ids[recipient.id]
                ]))

        return result

    def personal_and_huddle_cache_transformer(
        db_object: Tuple[int, List[UserDisplayRecipient]]
    ) -> List[UserDisplayRecipient]:
        """
        Takes an element of the list returned by the query_function, maps it to the final
        display_recipient list.
        """
        user_profile_list = db_object[1]
        display_recipient = user_profile_list

        return display_recipient

    def personal_and_huddle_id_fetcher(
            db_object: Tuple[int, List[UserDisplayRecipient]]) -> int:
        # db_object is a tuple, with recipient_id in the first position
        return db_object[0]

    # ItemT = Tuple[int, List[UserDisplayRecipient]] (recipient_id, list of corresponding users)
    # CacheItemT = List[UserDisplayRecipient] (display_recipient list)
    # ObjKT = int (recipient_id)
    personal_and_huddle_display_recipients = generic_bulk_cached_fetch(
        cache_key_function=display_recipient_cache_key,
        query_function=personal_and_huddle_query_function,
        object_ids=[
            recipient[0] for recipient in personal_and_huddle_recipients
        ],
        id_fetcher=personal_and_huddle_id_fetcher,
        cache_transformer=personal_and_huddle_cache_transformer)

    # Glue the dicts together and return:
    return {
        **stream_display_recipients,
        **personal_and_huddle_display_recipients
    }
Example #11
0
def get_old_messages_backend(request, user_profile,
                             anchor = REQ(converter=int),
                             num_before = REQ(converter=to_non_negative_int),
                             num_after = REQ(converter=to_non_negative_int),
                             narrow = REQ('narrow', converter=narrow_parameter, default=None),
                             use_first_unread_anchor = REQ(default=False, converter=ujson.loads),
                             apply_markdown=REQ(default=True,
                                                converter=ujson.loads)):
    # type: (HttpRequest, UserProfile, int, int, int, Optional[List[Dict[str, Any]]], bool, bool) -> HttpResponse
    include_history = ok_to_include_history(narrow, user_profile.realm)

    if include_history and not use_first_unread_anchor:
        query = select([column("id").label("message_id")], None, table("zerver_message"))
        inner_msg_id_col = literal_column("zerver_message.id")
    elif narrow is None:
        query = select([column("message_id"), column("flags")],
                       column("user_profile_id") == literal(user_profile.id),
                       table("zerver_usermessage"))
        inner_msg_id_col = column("message_id")
    else:
        # TODO: Don't do this join if we're not doing a search
        query = select([column("message_id"), column("flags")],
                       column("user_profile_id") == literal(user_profile.id),
                       join(table("zerver_usermessage"), table("zerver_message"),
                            literal_column("zerver_usermessage.message_id") ==
                            literal_column("zerver_message.id")))
        inner_msg_id_col = column("message_id")

    num_extra_messages = 1
    is_search = False

    if narrow is not None:
        # Add some metadata to our logging data for narrows
        verbose_operators = []
        for term in narrow:
            if term['operator'] == "is":
                verbose_operators.append("is:" + term['operand'])
            else:
                verbose_operators.append(term['operator'])
        request._log_data['extra'] = "[%s]" % (",".join(verbose_operators),)

        # Build the query for the narrow
        num_extra_messages = 0
        builder = NarrowBuilder(user_profile, inner_msg_id_col)
        search_term = None # type: Optional[Dict[str, Any]]
        for term in narrow:
            if term['operator'] == 'search':
                if not is_search:
                    search_term = term
                    query = query.column(column("subject")).column(column("rendered_content"))
                    is_search = True
                else:
                    # Join the search operators if there are multiple of them
                    search_term['operand'] += ' ' + term['operand']
            else:
                query = builder.add_term(query, term)
        if is_search:
            query = builder.add_term(query, search_term)

    # We add 1 to the number of messages requested if no narrow was
    # specified to ensure that the resulting list always contains the
    # anchor message.  If a narrow was specified, the anchor message
    # might not match the narrow anyway.
    if num_after != 0:
        num_after += num_extra_messages
    else:
        num_before += num_extra_messages

    sa_conn = get_sqlalchemy_connection()
    if use_first_unread_anchor:
        condition = column("flags").op("&")(UserMessage.flags.read.mask) == 0

        # We exclude messages on muted topics when finding the first unread
        # message in this narrow
        muting_conditions = exclude_muting_conditions(user_profile, narrow)
        if muting_conditions:
            condition = and_(condition, *muting_conditions)

        first_unread_query = query.where(condition)
        first_unread_query = first_unread_query.order_by(inner_msg_id_col.asc()).limit(1)
        first_unread_result = list(sa_conn.execute(first_unread_query).fetchall())
        if len(first_unread_result) > 0:
            anchor = first_unread_result[0][0]
        else:
            anchor = LARGER_THAN_MAX_MESSAGE_ID

    before_query = None
    after_query = None
    if num_before != 0:
        before_anchor = anchor
        if num_after != 0:
            # Don't include the anchor in both the before query and the after query
            before_anchor = anchor - 1
        before_query = query.where(inner_msg_id_col <= before_anchor) \
                            .order_by(inner_msg_id_col.desc()).limit(num_before)
    if num_after != 0:
        after_query = query.where(inner_msg_id_col >= anchor) \
                           .order_by(inner_msg_id_col.asc()).limit(num_after)

    if anchor == LARGER_THAN_MAX_MESSAGE_ID:
        # There's no need for an after_query if we're targeting just the target message.
        after_query = None

    if before_query is not None:
        if after_query is not None:
            query = union_all(before_query.self_group(), after_query.self_group())
        else:
            query = before_query
    elif after_query is not None:
        query = after_query
    else:
        # This can happen when a narrow is specified.
        query = query.where(inner_msg_id_col == anchor)

    main_query = alias(query)
    query = select(main_query.c, None, main_query).order_by(column("message_id").asc())
    # This is a hack to tag the query we use for testing
    query = query.prefix_with("/* get_old_messages */")
    query_result = list(sa_conn.execute(query).fetchall())

    # The following is a little messy, but ensures that the code paths
    # are similar regardless of the value of include_history.  The
    # 'user_messages' dictionary maps each message to the user's
    # UserMessage object for that message, which we will attach to the
    # rendered message dict before returning it.  We attempt to
    # bulk-fetch rendered message dicts from remote cache using the
    # 'messages' list.
    search_fields = dict() # type: Dict[int, Dict[str, Text]]
    message_ids = [] # type: List[int]
    user_message_flags = {} # type: Dict[int, List[str]]
    if include_history:
        message_ids = [row[0] for row in query_result]

        # TODO: This could be done with an outer join instead of two queries
        user_message_flags = dict((user_message.message_id, user_message.flags_list()) for user_message in
                                  UserMessage.objects.filter(user_profile=user_profile,
                                                             message__id__in=message_ids))
        for row in query_result:
            message_id = row[0]
            if user_message_flags.get(message_id) is None:
                user_message_flags[message_id] = ["read", "historical"]
            if is_search:
                (_, subject, rendered_content, content_matches, subject_matches) = row
                search_fields[message_id] = get_search_fields(rendered_content, subject,
                                                              content_matches, subject_matches)
    else:
        for row in query_result:
            message_id = row[0]
            flags = row[1]
            user_message_flags[message_id] = parse_usermessage_flags(flags)

            message_ids.append(message_id)

            if is_search:
                (_, _, subject, rendered_content, content_matches, subject_matches) = row
                search_fields[message_id] = get_search_fields(rendered_content, subject,
                                                              content_matches, subject_matches)

    cache_transformer = lambda row: MessageDict.build_dict_from_raw_db_row(row, apply_markdown)
    id_fetcher = lambda row: row['id']

    message_dicts = generic_bulk_cached_fetch(lambda message_id: to_dict_cache_key_id(message_id, apply_markdown),
                                              Message.get_raw_db_rows,
                                              message_ids,
                                              id_fetcher=id_fetcher,
                                              cache_transformer=cache_transformer,
                                              extractor=extract_message_dict,
                                              setter=stringify_message_dict)

    message_list = []
    for message_id in message_ids:
        msg_dict = message_dicts[message_id]
        msg_dict.update({"flags": user_message_flags[message_id]})
        msg_dict.update(search_fields.get(message_id, {}))
        message_list.append(msg_dict)

    statsd.incr('loaded_old_messages', len(message_list))
    ret = {'messages': message_list,
           "result": "success",
           "msg": ""}
    return json_success(ret)