Example #1
0
 def add_attachment(info: Dict[str, Any]) -> None:
     build_attachment(
         realm_id=realm_id,
         message_ids=info['message_ids'],
         user_id=info['sender_id'],
         fileinfo=dict(
             created=info['mtime'],  # minor lie
             size=info['size'],
             name=info['name'],
         ),
         s3_path=info['target_path'],
         zerver_attachment=attachments,
     )
Example #2
0
 def add_attachment(info: Dict[str, Any]) -> None:
     build_attachment(
         realm_id=realm_id,
         message_ids=info['message_ids'],
         user_id=info['sender_id'],
         fileinfo=dict(
             created=info['mtime'],  # minor lie
             size=info['size'],
             name=info['name'],
         ),
         s3_path=info['target_path'],
         zerver_attachment=attachments,
     )
Example #3
0
File: slack.py Project: kyoki/zulip
def channel_message_to_zerver_message(realm_id: int, users: List[ZerverFieldsT],
                                      added_users: AddedUsersT,
                                      added_recipient: AddedRecipientsT,
                                      all_messages: List[ZerverFieldsT],
                                      zerver_realmemoji: List[ZerverFieldsT],
                                      zerver_subscription: List[ZerverFieldsT],
                                      added_channels: AddedChannelsT,
                                      id_list: Tuple[int, int, int, int],
                                      domain_name: str) -> Tuple[List[ZerverFieldsT],
                                                                 List[ZerverFieldsT],
                                                                 List[ZerverFieldsT],
                                                                 List[ZerverFieldsT],
                                                                 List[ZerverFieldsT],
                                                                 Tuple[int, int, int, int]]:
    """
    Returns:
    1. zerver_message, which is a list of the messages
    2. zerver_usermessage, which is a list of the usermessages
    3. zerver_attachment, which is a list of the attachments
    4. uploads_list, which is a list of uploads to be mapped in uploads records.json
    5. reaction_list, which is a list of all user reactions
    6. id_list, which is a tuple of max ids of messages, usermessages, reactions and attachments
    """
    message_id_count, usermessage_id_count, reaction_id_count, attachment_id_count = id_list
    zerver_message = []
    zerver_usermessage = []  # type: List[ZerverFieldsT]
    uploads_list = []  # type: List[ZerverFieldsT]
    zerver_attachment = []  # type: List[ZerverFieldsT]
    reaction_list = []  # type: List[ZerverFieldsT]

    # For unicode emoji
    with open(NAME_TO_CODEPOINT_PATH) as fp:
        name_to_codepoint = ujson.load(fp)

    for message in all_messages:
        user = get_message_sending_user(message)
        if not user:
            # Ignore messages without user names
            # These are Sometimes produced by slack
            continue

        subtype = message.get('subtype', False)
        if subtype in [
                # Zulip doesn't have a pinned_item concept
                "pinned_item",
                "unpinned_item",
                # Slack's channel join/leave notices are spammy
                "channel_join",
                "channel_leave",
                "channel_name"
        ]:
            continue

        has_attachment = has_image = False
        try:
            content, mentioned_users_id, has_link = convert_to_zulip_markdown(
                message['text'], users, added_channels, added_users)
        except Exception:
            print("Slack message unexpectedly missing text representation:")
            print(json.dumps(message, indent=4))
            continue
        rendered_content = None

        recipient_id = added_recipient[message['channel_name']]
        message_id = message_id_count

        # Process message reactions
        if 'reactions' in message.keys():
            reaction_id_count = build_reactions(reaction_list, message['reactions'], added_users,
                                                message_id, reaction_id_count, name_to_codepoint,
                                                zerver_realmemoji)

        # Process different subtypes of slack messages

        # Subtypes which have only the action in the message should
        # be rendered with '/me' in the content initially
        # For example "sh_room_created" has the message 'started a call'
        # which should be displayed as '/me started a call'
        if subtype in ["bot_add", "sh_room_created", "me_message"]:
            content = ('/me %s' % (content))

        files = message.get('files', [])
        if subtype == 'file_share':
            # In Slack messages, uploads can either have the subtype as 'file_share' or
            # have the upload information in 'files' keyword
            files = [message['file']]

        for fileinfo in files:
            url = fileinfo['url_private']
            # For attachments with slack download link
            if 'files.slack.com' in url:
                has_attachment = has_link = True
                has_image = True if 'image' in fileinfo['mimetype'] else False

                file_user = [iterate_user for iterate_user in users if message['user'] == iterate_user['id']]
                file_user_email = get_user_email(file_user[0], domain_name)

                s3_path, content = get_attachment_path_and_content(fileinfo, realm_id)

                # construct attachments
                build_uploads(added_users[user], realm_id, file_user_email, fileinfo, s3_path,
                              uploads_list)

                attachment_id = attachment_id_count
                build_attachment(realm_id, message_id, attachment_id, added_users[user],
                                 fileinfo, s3_path, zerver_attachment)
                attachment_id_count += 1
            # For attachments with link not from slack
            # Example: Google drive integration
            else:
                has_link = True
                if 'title' in fileinfo:
                    file_name = fileinfo['title']
                else:
                    file_name = fileinfo['name']
                content = '[%s](%s)' % (file_name, fileinfo['url_private'])

        # construct message
        subject = 'imported from slack'

        zulip_message = build_message(subject, float(message['ts']), message_id, content,
                                      rendered_content, added_users[user], recipient_id,
                                      has_image, has_link, has_attachment)
        zerver_message.append(zulip_message)

        # construct usermessages
        usermessage_id_count = build_usermessages(
            zerver_usermessage, usermessage_id_count, zerver_subscription,
            recipient_id, mentioned_users_id, message_id)

        message_id_count += 1

    id_list = (message_id_count, usermessage_id_count,
               reaction_id_count, attachment_id_count)
    return zerver_message, zerver_usermessage, zerver_attachment, uploads_list, \
        reaction_list, id_list
Example #4
0
def process_message_files(message: ZerverFieldsT,
                          domain_name: str,
                          realm_id: int,
                          message_id: int,
                          user: str,
                          users: List[ZerverFieldsT],
                          added_users: AddedUsersT,
                          zerver_attachment: List[ZerverFieldsT],
                          uploads_list: List[ZerverFieldsT]) -> Dict[str, Any]:
    has_attachment = False
    has_image = False
    has_link = False

    files = message.get('files', [])

    subtype = message.get('subtype')

    if subtype == 'file_share':
        # In Slack messages, uploads can either have the subtype as 'file_share' or
        # have the upload information in 'files' keyword
        files = [message['file']]

    markdown_links = []

    for fileinfo in files:
        url = fileinfo['url_private']

        if 'files.slack.com' in url:
            # For attachments with slack download link
            has_attachment = True
            has_link = True
            has_image = True if 'image' in fileinfo['mimetype'] else False

            file_user = [iterate_user for iterate_user in users if message['user'] == iterate_user['id']]
            file_user_email = get_user_email(file_user[0], domain_name)

            s3_path, content_for_link = get_attachment_path_and_content(fileinfo, realm_id)
            markdown_links.append(content_for_link)

            # construct attachments
            build_uploads(added_users[user], realm_id, file_user_email, fileinfo, s3_path,
                          uploads_list)

            build_attachment(realm_id, {message_id}, added_users[user],
                             fileinfo, s3_path, zerver_attachment)
        else:
            # For attachments with link not from slack
            # Example: Google drive integration
            has_link = True
            if 'title' in fileinfo:
                file_name = fileinfo['title']
            else:
                file_name = fileinfo['name']
            markdown_links.append('[%s](%s)' % (file_name, fileinfo['url_private']))

    content = '\n'.join(markdown_links)

    return dict(
        content=content,
        has_attachment=has_attachment,
        has_image=has_image,
        has_link=has_link,
    )
Example #5
0
def process_message_files(message: ZerverFieldsT, domain_name: str,
                          realm_id: int, message_id: int, slack_user_id: str,
                          users: List[ZerverFieldsT],
                          slack_user_id_to_zulip_user_id: SlackToZulipUserIDT,
                          zerver_attachment: List[ZerverFieldsT],
                          uploads_list: List[ZerverFieldsT]) -> Dict[str, Any]:
    has_attachment = False
    has_image = False
    has_link = False

    files = message.get('files', [])

    subtype = message.get('subtype')

    if subtype == 'file_share':
        # In Slack messages, uploads can either have the subtype as 'file_share' or
        # have the upload information in 'files' keyword
        files = [message['file']]

    markdown_links = []

    for fileinfo in files:
        if fileinfo.get('mode', '') in ['tombstone', 'hidden_by_limit']:
            # Slack sometimes includes tombstone mode files with no
            # real data on the actual file (presumably in cases where
            # the file was deleted). hidden_by_limit mode is for files
            # that are hidden because of 10k cap in free plan.
            continue

        url = fileinfo['url_private']

        if 'files.slack.com' in url:
            # For attachments with slack download link
            has_attachment = True
            has_link = True
            has_image = True if 'image' in fileinfo['mimetype'] else False

            file_user = [
                iterate_user for iterate_user in users
                if message['user'] == iterate_user['id']
            ]
            file_user_email = get_user_email(file_user[0], domain_name)

            s3_path, content_for_link = get_attachment_path_and_content(
                fileinfo, realm_id)
            markdown_links.append(content_for_link)

            build_uploads(slack_user_id_to_zulip_user_id[slack_user_id],
                          realm_id, file_user_email, fileinfo, s3_path,
                          uploads_list)

            build_attachment(realm_id, {message_id},
                             slack_user_id_to_zulip_user_id[slack_user_id],
                             fileinfo, s3_path, zerver_attachment)
        else:
            # For attachments with link not from slack
            # Example: Google drive integration
            has_link = True
            if 'title' in fileinfo:
                file_name = fileinfo['title']
            else:
                file_name = fileinfo['name']
            markdown_links.append('[%s](%s)' %
                                  (file_name, fileinfo['url_private']))

    content = '\n'.join(markdown_links)

    return dict(
        content=content,
        has_attachment=has_attachment,
        has_image=has_image,
        has_link=has_link,
    )
Example #6
0
def process_message_attachments(
    attachments: List[Dict[str, Any]],
    realm_id: int,
    message_id: int,
    user_id: int,
    user_handler: UserHandler,
    zerver_attachment: List[ZerverFieldsT],
    uploads_list: List[ZerverFieldsT],
    mattermost_data_dir: str,
    output_dir: str,
) -> Tuple[str, bool]:
    has_image = False

    markdown_links = []

    for attachment in attachments:
        attachment_path = attachment["path"]
        attachment_full_path = os.path.join(mattermost_data_dir, "data", attachment_path)

        file_name = attachment_path.split("/")[-1]
        file_ext = f'.{file_name.split(".")[-1]}'

        if file_ext.lower() in IMAGE_EXTENSIONS:
            has_image = True

        s3_path = "/".join(
            [
                str(realm_id),
                format(random.randint(0, 255), "x"),
                secrets.token_urlsafe(18),
                sanitize_name(file_name),
            ]
        )
        content_for_link = f"[{file_name}](/user_uploads/{s3_path})"

        markdown_links.append(content_for_link)

        fileinfo = {
            "name": file_name,
            "size": os.path.getsize(attachment_full_path),
            "created": os.path.getmtime(attachment_full_path),
        }

        upload = dict(
            path=s3_path,
            realm_id=realm_id,
            content_type=None,
            user_profile_id=user_id,
            last_modified=fileinfo["created"],
            user_profile_email=user_handler.get_user(user_id=user_id)["email"],
            s3_path=s3_path,
            size=fileinfo["size"],
        )
        uploads_list.append(upload)

        build_attachment(
            realm_id=realm_id,
            message_ids={message_id},
            user_id=user_id,
            fileinfo=fileinfo,
            s3_path=s3_path,
            zerver_attachment=zerver_attachment,
        )

        # Copy the attachment file to output_dir
        attachment_out_path = os.path.join(output_dir, "uploads", s3_path)
        os.makedirs(os.path.dirname(attachment_out_path), exist_ok=True)
        shutil.copyfile(attachment_full_path, attachment_out_path)

    content = "\n".join(markdown_links)

    return content, has_image
Example #7
0
def channel_message_to_zerver_message(
    realm_id: int, users: List[ZerverFieldsT], added_users: AddedUsersT,
    added_recipient: AddedRecipientsT, all_messages: List[ZerverFieldsT],
    zerver_realmemoji: List[ZerverFieldsT],
    zerver_subscription: List[ZerverFieldsT], added_channels: AddedChannelsT,
    id_list: Tuple[int, int, int, int], domain_name: str
) -> Tuple[List[ZerverFieldsT], List[ZerverFieldsT], List[ZerverFieldsT],
           List[ZerverFieldsT], List[ZerverFieldsT], Tuple[int, int, int,
                                                           int]]:
    """
    Returns:
    1. zerver_message, which is a list of the messages
    2. zerver_usermessage, which is a list of the usermessages
    3. zerver_attachment, which is a list of the attachments
    4. uploads_list, which is a list of uploads to be mapped in uploads records.json
    5. reaction_list, which is a list of all user reactions
    6. id_list, which is a tuple of max ids of messages, usermessages, reactions and attachments
    """
    message_id_count, usermessage_id_count, reaction_id_count, attachment_id_count = id_list
    zerver_message = []
    zerver_usermessage = []  # type: List[ZerverFieldsT]
    uploads_list = []  # type: List[ZerverFieldsT]
    zerver_attachment = []  # type: List[ZerverFieldsT]
    reaction_list = []  # type: List[ZerverFieldsT]

    # For unicode emoji
    with open(NAME_TO_CODEPOINT_PATH) as fp:
        name_to_codepoint = ujson.load(fp)

    for message in all_messages:
        user = get_message_sending_user(message)
        if not user:
            # Ignore messages without user names
            # These are Sometimes produced by slack
            continue

        subtype = message.get('subtype', False)
        if subtype in [
                # Zulip doesn't have a pinned_item concept
                "pinned_item",
                "unpinned_item",
                # Slack's channel join/leave notices are spammy
                "channel_join",
                "channel_leave",
                "channel_name"
        ]:
            continue

        has_attachment = has_image = False
        try:
            content, mentioned_users_id, has_link = convert_to_zulip_markdown(
                message['text'], users, added_channels, added_users)
        except Exception:
            print("Slack message unexpectedly missing text representation:")
            print(json.dumps(message, indent=4))
            continue
        rendered_content = None

        recipient_id = added_recipient[message['channel_name']]
        message_id = message_id_count

        # Process message reactions
        if 'reactions' in message.keys():
            reaction_id_count = build_reactions(
                reaction_list, message['reactions'], added_users, message_id,
                reaction_id_count, name_to_codepoint, zerver_realmemoji)

        # Process different subtypes of slack messages

        # Subtypes which have only the action in the message should
        # be rendered with '/me' in the content initially
        # For example "sh_room_created" has the message 'started a call'
        # which should be displayed as '/me started a call'
        if subtype in ["bot_add", "sh_room_created", "me_message"]:
            content = ('/me %s' % (content))

        files = message.get('files', [])
        if subtype == 'file_share':
            # In Slack messages, uploads can either have the subtype as 'file_share' or
            # have the upload information in 'files' keyword
            files = [message['file']]

        for fileinfo in files:
            url = fileinfo['url_private']
            # For attachments with slack download link
            if 'files.slack.com' in url:
                has_attachment = has_link = True
                has_image = True if 'image' in fileinfo['mimetype'] else False

                file_user = [
                    iterate_user for iterate_user in users
                    if message['user'] == iterate_user['id']
                ]
                file_user_email = get_user_email(file_user[0], domain_name)

                s3_path, content = get_attachment_path_and_content(
                    fileinfo, realm_id)

                # construct attachments
                build_uploads(added_users[user], realm_id, file_user_email,
                              fileinfo, s3_path, uploads_list)

                attachment_id = attachment_id_count
                build_attachment(realm_id, message_id, attachment_id,
                                 added_users[user], fileinfo, s3_path,
                                 zerver_attachment)
                attachment_id_count += 1
            # For attachments with link not from slack
            # Example: Google drive integration
            else:
                has_link = True
                if 'title' in fileinfo:
                    file_name = fileinfo['title']
                else:
                    file_name = fileinfo['name']
                content = '[%s](%s)' % (file_name, fileinfo['url_private'])

        # construct message
        zulip_message = dict(
            sending_client=1,
            rendered_content_version=1,  # This is Zulip-specific
            has_image=has_image,
            subject='imported from slack',  # This is Zulip-specific
            pub_date=float(message['ts']),
            id=message_id,
            has_attachment=
            has_attachment,  # attachment will be posted in the subsequent message;
            # this is how Slack does it, i.e. less like email
            edit_history=None,
            sender=added_users[user],  # map slack id to zulip id
            content=content,
            rendered_content=rendered_content,  # slack doesn't cache this
            recipient=recipient_id,
            last_edit_time=None,
            has_link=has_link)
        zerver_message.append(zulip_message)

        # construct usermessages
        usermessage_id_count = build_usermessages(
            zerver_usermessage, usermessage_id_count, zerver_subscription,
            recipient_id, mentioned_users_id, message_id)

        message_id_count += 1

    id_list = (message_id_count, usermessage_id_count, reaction_id_count,
               attachment_id_count)
    return zerver_message, zerver_usermessage, zerver_attachment, uploads_list, \
        reaction_list, id_list
Example #8
0
def process_message_attachment(
    upload: Dict[str, Any],
    realm_id: int,
    message_id: int,
    user_id: int,
    user_handler: UserHandler,
    zerver_attachment: List[ZerverFieldsT],
    uploads_list: List[ZerverFieldsT],
    upload_id_to_upload_data_map: Dict[str, Dict[str, Any]],
    output_dir: str,
) -> Tuple[str, bool]:
    upload_file_data = upload_id_to_upload_data_map[upload["_id"]]
    file_name = upload["name"]
    file_ext = f'.{upload["type"].split("/")[-1]}'

    has_image = False
    if file_ext.lower() in IMAGE_EXTENSIONS:
        has_image = True

    s3_path = "/".join([
        str(realm_id),
        format(random.randint(0, 255), "x"),
        secrets.token_urlsafe(18),
        sanitize_name(file_name),
    ])

    # Build the attachment from chunks and save it to s3_path.
    file_out_path = os.path.join(output_dir, "uploads", s3_path)
    os.makedirs(os.path.dirname(file_out_path), exist_ok=True)
    with open(file_out_path, "wb") as upload_file:
        upload_file.write(b"".join(upload_file_data["chunk"]))

    attachment_content = (
        f'{upload_file_data["description"]}\n\n[{file_name}](/user_uploads/{s3_path})'
    )

    fileinfo = {
        "name": file_name,
        "size": upload_file_data["size"],
        "created": float(upload_file_data["_updatedAt"].timestamp()),
    }

    upload = dict(
        path=s3_path,
        realm_id=realm_id,
        content_type=upload["type"],
        user_profile_id=user_id,
        last_modified=fileinfo["created"],
        user_profile_email=user_handler.get_user(user_id=user_id)["email"],
        s3_path=s3_path,
        size=fileinfo["size"],
    )
    uploads_list.append(upload)

    build_attachment(
        realm_id=realm_id,
        message_ids={message_id},
        user_id=user_id,
        fileinfo=fileinfo,
        s3_path=s3_path,
        zerver_attachment=zerver_attachment,
    )

    return attachment_content, has_image
Example #9
0
def process_message_files(
    message: ZerverFieldsT,
    domain_name: str,
    realm_id: int,
    message_id: int,
    slack_user_id: str,
    users: List[ZerverFieldsT],
    slack_user_id_to_zulip_user_id: SlackToZulipUserIDT,
    zerver_attachment: List[ZerverFieldsT],
    uploads_list: List[ZerverFieldsT],
) -> Dict[str, Any]:
    has_attachment = False
    has_image = False
    has_link = False

    files = message.get("files", [])

    subtype = message.get("subtype")

    if subtype == "file_share":
        # In Slack messages, uploads can either have the subtype as 'file_share' or
        # have the upload information in 'files' keyword
        files = [message["file"]]

    markdown_links = []

    for fileinfo in files:
        if fileinfo.get("mode", "") in ["tombstone", "hidden_by_limit"]:
            # Slack sometimes includes tombstone mode files with no
            # real data on the actual file (presumably in cases where
            # the file was deleted). hidden_by_limit mode is for files
            # that are hidden because of 10k cap in free plan.
            continue

        url = fileinfo["url_private"]

        if "files.slack.com" in url:
            # For attachments with Slack download link
            has_attachment = True
            has_link = True
            has_image = True if "image" in fileinfo["mimetype"] else False

            file_user = [
                iterate_user for iterate_user in users if message["user"] == iterate_user["id"]
            ]
            file_user_email = get_user_email(file_user[0], domain_name)

            s3_path, content_for_link = get_attachment_path_and_content(fileinfo, realm_id)
            markdown_links.append(content_for_link)

            build_uploads(
                slack_user_id_to_zulip_user_id[slack_user_id],
                realm_id,
                file_user_email,
                fileinfo,
                s3_path,
                uploads_list,
            )

            build_attachment(
                realm_id,
                {message_id},
                slack_user_id_to_zulip_user_id[slack_user_id],
                fileinfo,
                s3_path,
                zerver_attachment,
            )
        else:
            # For attachments with link not from Slack
            # Example: Google drive integration
            has_link = True
            if "title" in fileinfo:
                file_name = fileinfo["title"]
            else:
                file_name = fileinfo["name"]
            markdown_links.append("[{}]({})".format(file_name, fileinfo["url_private"]))

    content = "\n".join(markdown_links)

    return dict(
        content=content,
        has_attachment=has_attachment,
        has_image=has_image,
        has_link=has_link,
    )
Example #10
0
def process_message_files(message: ZerverFieldsT,
                          domain_name: str,
                          realm_id: int,
                          message_id: int,
                          user: str,
                          users: List[ZerverFieldsT],
                          added_users: AddedUsersT,
                          zerver_attachment: List[ZerverFieldsT],
                          uploads_list: List[ZerverFieldsT]) -> Dict[str, Any]:
    has_attachment = False
    has_image = False
    has_link = False

    files = message.get('files', [])

    subtype = message.get('subtype')

    if subtype == 'file_share':
        # In Slack messages, uploads can either have the subtype as 'file_share' or
        # have the upload information in 'files' keyword
        files = [message['file']]

    markdown_links = []

    for fileinfo in files:
        url = fileinfo['url_private']

        if 'files.slack.com' in url:
            # For attachments with slack download link
            has_attachment = True
            has_link = True
            has_image = True if 'image' in fileinfo['mimetype'] else False

            file_user = [iterate_user for iterate_user in users if message['user'] == iterate_user['id']]
            file_user_email = get_user_email(file_user[0], domain_name)

            s3_path, content_for_link = get_attachment_path_and_content(fileinfo, realm_id)
            markdown_links.append(content_for_link)

            # construct attachments
            build_uploads(added_users[user], realm_id, file_user_email, fileinfo, s3_path,
                          uploads_list)

            build_attachment(realm_id, {message_id}, added_users[user],
                             fileinfo, s3_path, zerver_attachment)
        else:
            # For attachments with link not from slack
            # Example: Google drive integration
            has_link = True
            if 'title' in fileinfo:
                file_name = fileinfo['title']
            else:
                file_name = fileinfo['name']
            markdown_links.append('[%s](%s)' % (file_name, fileinfo['url_private']))

    content = '\n'.join(markdown_links)

    return dict(
        content=content,
        has_attachment=has_attachment,
        has_image=has_image,
        has_link=has_link,
    )