Ejemplo n.º 1
0
def write_upload_data(output_dir: str, realm_id: int) -> None:
    uploads_records = []  # type: List[ZerverFieldsT]
    uploads_folder = os.path.join(output_dir, 'uploads')
    os.makedirs(os.path.join(uploads_folder, str(realm_id)), exist_ok=True)

    attachments = []  # type: List[ZerverFieldsT]
    attachment = {"zerver_attachment": attachments}
    create_converted_data_files(uploads_records, output_dir, '/uploads/records.json')
    create_converted_data_files(attachment, output_dir, '/attachment.json')
Ejemplo n.º 2
0
def do_convert_data(gitter_data_file: str, output_dir: str, threads: int=6) -> None:
    #  Subdomain is set by the user while running the import commands
    realm_subdomain = ""
    domain_name = settings.EXTERNAL_HOST

    os.makedirs(output_dir, exist_ok=True)
    # output directory should be empty initially
    if os.listdir(output_dir):
        raise Exception("Output directory should be empty!")

    # Read data from the gitter file
    with open(gitter_data_file, "r") as fp:
        gitter_data = ujson.load(fp)

    realm, avatar_list, user_map = gitter_workspace_to_realm(
        domain_name, gitter_data, realm_subdomain)

    subscriber_map = make_subscriber_map(
        zerver_subscription=realm['zerver_subscription'],
    )

    # For user mentions
    user_short_name_to_full_name = {}
    for userprofile in realm['zerver_userprofile']:
        user_short_name_to_full_name[userprofile['short_name']] = userprofile['full_name']

    convert_gitter_workspace_messages(
        gitter_data, output_dir, subscriber_map, user_map,
        user_short_name_to_full_name)

    avatar_folder = os.path.join(output_dir, 'avatars')
    avatar_realm_folder = os.path.join(avatar_folder, str(realm_id))
    os.makedirs(avatar_realm_folder, exist_ok=True)
    avatar_records = process_avatars(avatar_list, avatar_folder, realm_id, threads)

    attachment = {"zerver_attachment": []}  # type: Dict[str, List[Any]]

    # IO realm.json
    create_converted_data_files(realm, output_dir, '/realm.json')
    # IO emoji records
    create_converted_data_files([], output_dir, '/emoji/records.json')
    # IO avatar records
    create_converted_data_files(avatar_records, output_dir, '/avatars/records.json')
    # IO uploads records
    create_converted_data_files([], output_dir, '/uploads/records.json')
    # IO attachments records
    create_converted_data_files(attachment, output_dir, '/attachment.json')

    subprocess.check_call(["tar", "-czf", output_dir + '.tar.gz', output_dir, '-P'])

    logging.info('######### DATA CONVERSION FINISHED #########\n')
    logging.info("Zulip data dump created at %s" % (output_dir))
Ejemplo n.º 3
0
def write_avatar_data(raw_user_data: List[ZerverFieldsT],
                      output_dir: str,
                      realm_id: int) -> None:
    avatar_folder = os.path.join(output_dir, 'avatars')
    avatar_realm_folder = os.path.join(avatar_folder, str(realm_id))
    os.makedirs(avatar_realm_folder, exist_ok=True)

    avatar_records = convert_avatar_data(
        avatar_folder=avatar_folder,
        raw_data=raw_user_data,
        realm_id=realm_id,
    )

    create_converted_data_files(avatar_records, output_dir, '/avatars/records.json')
Ejemplo n.º 4
0
def write_avatar_data(raw_user_data: List[ZerverFieldsT],
                      output_dir: str,
                      realm_id: int) -> None:
    avatar_folder = os.path.join(output_dir, 'avatars')
    avatar_realm_folder = os.path.join(avatar_folder, str(realm_id))
    os.makedirs(avatar_realm_folder, exist_ok=True)

    avatar_records = convert_avatar_data(
        avatar_folder=avatar_folder,
        raw_data=raw_user_data,
        realm_id=realm_id,
    )

    create_converted_data_files(avatar_records, output_dir, '/avatars/records.json')
Ejemplo n.º 5
0
def do_convert_data(gitter_data_file: str, output_dir: str, threads: int=6) -> None:
    #  Subdomain is set by the user while running the import commands
    realm_subdomain = ""
    domain_name = settings.EXTERNAL_HOST

    os.makedirs(output_dir, exist_ok=True)
    # output directory should be empty initially
    if os.listdir(output_dir):
        raise Exception("Output directory should be empty!")

    # Read data from the gitter file
    gitter_data = json.load(open(gitter_data_file))

    realm, avatar_list, user_map = gitter_workspace_to_realm(
        domain_name, gitter_data, realm_subdomain)

    # For user mentions
    user_short_name_to_full_name = {}
    for userprofile in realm['zerver_userprofile']:
        user_short_name_to_full_name[userprofile['short_name']] = userprofile['full_name']

    convert_gitter_workspace_messages(
        gitter_data, output_dir, realm['zerver_subscription'], user_map,
        user_short_name_to_full_name)

    avatar_folder = os.path.join(output_dir, 'avatars')
    avatar_realm_folder = os.path.join(avatar_folder, str(realm_id))
    os.makedirs(avatar_realm_folder, exist_ok=True)
    avatar_records = process_avatars(avatar_list, avatar_folder, realm_id, threads)

    attachment = {"zerver_attachment": []}  # type: Dict[str, List[Any]]

    # IO realm.json
    create_converted_data_files(realm, output_dir, '/realm.json')
    # IO emoji records
    create_converted_data_files([], output_dir, '/emoji/records.json')
    # IO avatar records
    create_converted_data_files(avatar_records, output_dir, '/avatars/records.json')
    # IO uploads records
    create_converted_data_files([], output_dir, '/uploads/records.json')
    # IO attachments records
    create_converted_data_files(attachment, output_dir, '/attachment.json')

    subprocess.check_call(["tar", "-czf", output_dir + '.tar.gz', output_dir, '-P'])

    logging.info('######### DATA CONVERSION FINISHED #########\n')
    logging.info("Zulip data dump created at %s" % (output_dir))
Ejemplo n.º 6
0
def process_raw_message_batch(
    realm_id: int,
    raw_messages: List[Dict[str, Any]],
    subscriber_map: Dict[int, Set[int]],
    user_handler: UserHandler,
    is_pm_data: bool,
    output_dir: str,
    zerver_realmemoji: List[ZerverFieldsT],
    total_reactions: List[ZerverFieldsT],
    uploads_list: List[ZerverFieldsT],
    zerver_attachment: List[ZerverFieldsT],
    upload_id_to_upload_data_map: Dict[str, Dict[str, Any]],
) -> None:
    def fix_mentions(content: str, mention_user_ids: Set[int],
                     rc_channel_mention_data: List[Dict[str, str]]) -> str:
        # Fix user mentions
        for user_id in mention_user_ids:
            user = user_handler.get_user(user_id=user_id)
            rc_mention = "@{short_name}".format(**user)
            zulip_mention = "@**{full_name}**".format(**user)
            content = content.replace(rc_mention, zulip_mention)

        content = content.replace("@all", "@**all**")
        # We don't have an equivalent for Rocket.Chat's @here mention
        # which mentions all users active in the channel.
        content = content.replace("@here", "@**all**")

        # Fix channel mentions
        for mention_data in rc_channel_mention_data:
            rc_mention = mention_data["rc_mention"]
            zulip_mention = mention_data["zulip_mention"]
            content = content.replace(rc_mention, zulip_mention)

        return content

    mention_map: Dict[int, Set[int]] = {}
    zerver_message: List[ZerverFieldsT] = []

    for raw_message in raw_messages:
        message_id = NEXT_ID("message")
        mention_user_ids = raw_message["mention_user_ids"]
        mention_map[message_id] = mention_user_ids

        content = fix_mentions(
            content=raw_message["content"],
            mention_user_ids=mention_user_ids,
            rc_channel_mention_data=raw_message["rc_channel_mention_data"],
        )

        if len(content) > 10000:  # nocoverage
            logging.info("skipping too-long message of length %s",
                         len(content))
            continue

        date_sent = raw_message["date_sent"]
        sender_user_id = raw_message["sender_id"]
        recipient_id = raw_message["recipient_id"]

        rendered_content = None

        has_attachment = False
        has_image = False
        has_link = raw_message["has_link"]

        if "file" in raw_message:
            has_attachment = True
            has_link = True

            attachment_content, has_image = process_message_attachment(
                upload=raw_message["file"],
                realm_id=realm_id,
                message_id=message_id,
                user_id=sender_user_id,
                user_handler=user_handler,
                uploads_list=uploads_list,
                zerver_attachment=zerver_attachment,
                upload_id_to_upload_data_map=upload_id_to_upload_data_map,
                output_dir=output_dir,
            )

            content += attachment_content

        topic_name = raw_message["topic_name"]

        message = build_message(
            content=content,
            message_id=message_id,
            date_sent=date_sent,
            recipient_id=recipient_id,
            rendered_content=rendered_content,
            topic_name=topic_name,
            user_id=sender_user_id,
            has_image=has_image,
            has_link=has_link,
            has_attachment=has_attachment,
        )
        zerver_message.append(message)
        build_reactions(
            total_reactions=total_reactions,
            reactions=raw_message["reactions"],
            message_id=message_id,
            zerver_realmemoji=zerver_realmemoji,
        )

    zerver_usermessage = make_user_messages(
        zerver_message=zerver_message,
        subscriber_map=subscriber_map,
        is_pm_data=is_pm_data,
        mention_map=mention_map,
    )

    message_json = dict(
        zerver_message=zerver_message,
        zerver_usermessage=zerver_usermessage,
    )

    dump_file_id = NEXT_ID("dump_file_id" + str(realm_id))
    message_file = f"/messages-{dump_file_id:06}.json"
    create_converted_data_files(message_json, output_dir, message_file)
Ejemplo n.º 7
0
    def write_info(self, output_dir: str, realm_id: int) -> None:
        attachments = []  # type: List[Dict[str, Any]]
        uploads_records = []  # type: List[Dict[str, Any]]

        def add_attachment(info: Dict[str, Any]) -> None:
            build_attachment(
                realm_id=realm_id,
                message_ids=info['message_ids'],
                user_id=info['sender_id'],
                fileinfo=dict(
                    created=info['mtime'],  # minor lie
                    size=info['size'],
                    name=info['name'],
                ),
                s3_path=info['target_path'],
                zerver_attachment=attachments,
            )

        def add_upload(info: Dict[str, Any]) -> None:
            target_path = info['target_path']
            upload_rec = dict(
                size=info['size'],
                user_profile_id=info['sender_id'],
                realm_id=realm_id,
                s3_path=target_path,
                path=target_path,
                content_type=None,
            )
            uploads_records.append(upload_rec)

        def make_full_target_path(info: Dict[str, Any]) -> str:
            target_path = info['target_path']
            full_target_path = os.path.join(
                output_dir,
                'uploads',
                target_path,
            )
            full_target_path = os.path.abspath(full_target_path)
            os.makedirs(os.path.dirname(full_target_path), exist_ok=True)
            return full_target_path

        def copy_file(info: Dict[str, Any]) -> None:
            source_path = info['local_fn']
            target_path = make_full_target_path(info)
            shutil.copyfile(source_path, target_path)

        logging.info('Start processing attachment files')

        for info in self.info_dict.values():
            add_attachment(info)
            add_upload(info)
            copy_file(info)

        uploads_folder = os.path.join(output_dir, 'uploads')
        os.makedirs(os.path.join(uploads_folder, str(realm_id)), exist_ok=True)

        attachment = dict(
            zerver_attachment=attachments
        )

        create_converted_data_files(uploads_records, output_dir, '/uploads/records.json')
        create_converted_data_files(attachment, output_dir, '/attachment.json')

        logging.info('Done processing attachment files')
Ejemplo n.º 8
0
def convert_slack_workspace_messages(slack_data_dir: str, users: List[ZerverFieldsT], realm_id: int,
                                     added_users: AddedUsersT, added_recipient: AddedRecipientsT,
                                     added_channels: AddedChannelsT, realm: ZerverFieldsT,
                                     zerver_realmemoji: List[ZerverFieldsT], domain_name: str,
                                     output_dir: str,
                                     chunk_size: int=MESSAGE_BATCH_CHUNK_SIZE) -> Tuple[List[ZerverFieldsT],
                                                                                        List[ZerverFieldsT],
                                                                                        List[ZerverFieldsT]]:
    """
    Returns:
    1. reactions, which is a list of the reactions
    2. uploads, which is a list of uploads to be mapped in uploads records.json
    3. attachment, which is a list of the attachments
    """
    all_messages = get_all_messages(slack_data_dir, added_channels)

    # we sort the messages according to the timestamp to show messages with
    # the proper date order
    all_messages = sorted(all_messages, key=lambda message: message['ts'])

    logging.info('######### IMPORTING MESSAGES STARTED #########\n')

    total_reactions = []  # type: List[ZerverFieldsT]
    total_attachments = []  # type: List[ZerverFieldsT]
    total_uploads = []  # type: List[ZerverFieldsT]

    # The messages are stored in batches
    low_index = 0
    upper_index = low_index + chunk_size
    dump_file_id = 1

    subscriber_map = make_subscriber_map(
        zerver_subscription=realm['zerver_subscription'],
    )

    while True:
        message_data = all_messages[low_index:upper_index]
        if len(message_data) == 0:
            break
        zerver_message, zerver_usermessage, attachment, uploads, reactions = \
            channel_message_to_zerver_message(
                realm_id, users, added_users, added_recipient, message_data,
                zerver_realmemoji, subscriber_map, added_channels,
                domain_name)

        message_json = dict(
            zerver_message=zerver_message,
            zerver_usermessage=zerver_usermessage)

        message_file = "/messages-%06d.json" % (dump_file_id,)
        logging.info("Writing Messages to %s\n" % (output_dir + message_file))
        create_converted_data_files(message_json, output_dir, message_file)

        total_reactions += reactions
        total_attachments += attachment
        total_uploads += uploads

        low_index = upper_index
        upper_index = chunk_size + low_index
        dump_file_id += 1

    logging.info('######### IMPORTING MESSAGES FINISHED #########\n')
    return total_reactions, total_uploads, total_attachments
Ejemplo n.º 9
0
def do_convert_data(input_tar_file: str,
                    output_dir: str,
                    masking_content: bool,
                    api_token: Optional[str] = None,
                    slim_mode: bool = False) -> None:
    input_data_dir = untar_input_file(input_tar_file)

    attachment_handler = AttachmentHandler()
    user_handler = UserHandler()
    subscriber_handler = SubscriberHandler()
    user_id_mapper = IdMapper()
    stream_id_mapper = IdMapper()

    realm_id = 0
    realm = make_realm(realm_id=realm_id)

    # users.json -> UserProfile
    raw_user_data = read_user_data(data_dir=input_data_dir)
    convert_user_data(
        user_handler=user_handler,
        slim_mode=slim_mode,
        user_id_mapper=user_id_mapper,
        raw_data=raw_user_data,
        realm_id=realm_id,
    )
    normal_users = user_handler.get_normal_users()
    # Don't write zerver_userprofile here, because we
    # may add more users later.

    # streams.json -> Stream
    raw_stream_data = read_room_data(data_dir=input_data_dir)
    zerver_stream = convert_room_data(
        raw_data=raw_stream_data,
        subscriber_handler=subscriber_handler,
        stream_id_mapper=stream_id_mapper,
        user_id_mapper=user_id_mapper,
        realm_id=realm_id,
        api_token=api_token,
    )
    realm['zerver_stream'] = zerver_stream

    zerver_recipient = build_recipients(
        zerver_userprofile=normal_users,
        zerver_stream=zerver_stream,
    )
    realm['zerver_recipient'] = zerver_recipient

    if api_token is None:
        if slim_mode:
            public_stream_subscriptions: List[ZerverFieldsT] = []
        else:
            public_stream_subscriptions = build_public_stream_subscriptions(
                zerver_userprofile=normal_users,
                zerver_recipient=zerver_recipient,
                zerver_stream=zerver_stream,
            )

        private_stream_subscriptions = build_stream_subscriptions(
            get_users=subscriber_handler.get_users,
            zerver_recipient=zerver_recipient,
            zerver_stream=[
                stream_dict for stream_dict in zerver_stream
                if stream_dict['invite_only']
            ],
        )
        stream_subscriptions = public_stream_subscriptions + private_stream_subscriptions
    else:
        stream_subscriptions = build_stream_subscriptions(
            get_users=subscriber_handler.get_users,
            zerver_recipient=zerver_recipient,
            zerver_stream=zerver_stream,
        )

    personal_subscriptions = build_personal_subscriptions(
        zerver_recipient=zerver_recipient, )
    zerver_subscription = personal_subscriptions + stream_subscriptions

    realm['zerver_subscription'] = zerver_subscription

    zerver_realmemoji = write_emoticon_data(
        realm_id=realm_id,
        data_dir=input_data_dir,
        output_dir=output_dir,
    )
    realm['zerver_realmemoji'] = zerver_realmemoji

    subscriber_map = make_subscriber_map(
        zerver_subscription=zerver_subscription, )

    logging.info('Start importing message data')
    for message_key in [
            'UserMessage', 'NotificationMessage', 'PrivateUserMessage'
    ]:
        write_message_data(
            realm_id=realm_id,
            slim_mode=slim_mode,
            message_key=message_key,
            zerver_recipient=zerver_recipient,
            subscriber_map=subscriber_map,
            data_dir=input_data_dir,
            output_dir=output_dir,
            masking_content=masking_content,
            stream_id_mapper=stream_id_mapper,
            user_id_mapper=user_id_mapper,
            user_handler=user_handler,
            attachment_handler=attachment_handler,
        )

    # Order is important here...don't write users until
    # we process everything else, since we may introduce
    # mirror users when processing messages.
    realm['zerver_userprofile'] = user_handler.get_all_users()
    realm['sort_by_date'] = True

    create_converted_data_files(realm, output_dir, '/realm.json')

    logging.info('Start importing avatar data')
    write_avatar_data(
        raw_user_data=raw_user_data,
        output_dir=output_dir,
        user_id_mapper=user_id_mapper,
        realm_id=realm_id,
    )

    attachment_handler.write_info(
        output_dir=output_dir,
        realm_id=realm_id,
    )

    logging.info('Start making tarball')
    subprocess.check_call(
        ["tar", "-czf", output_dir + '.tar.gz', output_dir, '-P'])
    logging.info('Done making tarball')
Ejemplo n.º 10
0
def convert_slack_workspace_messages(slack_data_dir: str, users: List[ZerverFieldsT], realm_id: int,
                                     added_users: AddedUsersT, added_recipient: AddedRecipientsT,
                                     added_channels: AddedChannelsT, realm: ZerverFieldsT,
                                     zerver_realmemoji: List[ZerverFieldsT], domain_name: str,
                                     output_dir: str,
                                     chunk_size: int=MESSAGE_BATCH_CHUNK_SIZE) -> Tuple[List[ZerverFieldsT],
                                                                                        List[ZerverFieldsT],
                                                                                        List[ZerverFieldsT]]:
    """
    Returns:
    1. reactions, which is a list of the reactions
    2. uploads, which is a list of uploads to be mapped in uploads records.json
    3. attachment, which is a list of the attachments
    """
    all_messages = get_all_messages(slack_data_dir, added_channels)

    # we sort the messages according to the timestamp to show messages with
    # the proper date order
    all_messages = sorted(all_messages, key=lambda message: message['ts'])

    logging.info('######### IMPORTING MESSAGES STARTED #########\n')

    total_reactions = []  # type: List[ZerverFieldsT]
    total_attachments = []  # type: List[ZerverFieldsT]
    total_uploads = []  # type: List[ZerverFieldsT]

    # The messages are stored in batches
    low_index = 0
    upper_index = low_index + chunk_size
    dump_file_id = 1

    subscriber_map = make_subscriber_map(
        zerver_subscription=realm['zerver_subscription'],
    )

    while True:
        message_data = all_messages[low_index:upper_index]
        if len(message_data) == 0:
            break
        zerver_message, zerver_usermessage, attachment, uploads, reactions = \
            channel_message_to_zerver_message(
                realm_id, users, added_users, added_recipient, message_data,
                zerver_realmemoji, subscriber_map, added_channels,
                domain_name)

        message_json = dict(
            zerver_message=zerver_message,
            zerver_usermessage=zerver_usermessage)

        message_file = "/messages-%06d.json" % (dump_file_id,)
        logging.info("Writing Messages to %s\n" % (output_dir + message_file))
        create_converted_data_files(message_json, output_dir, message_file)

        total_reactions += reactions
        total_attachments += attachment
        total_uploads += uploads

        low_index = upper_index
        upper_index = chunk_size + low_index
        dump_file_id += 1

    logging.info('######### IMPORTING MESSAGES FINISHED #########\n')
    return total_reactions, total_uploads, total_attachments
Ejemplo n.º 11
0
    def write_info(self, output_dir: str, realm_id: int) -> None:
        attachments = []  # type: List[Dict[str, Any]]
        uploads_records = []  # type: List[Dict[str, Any]]

        def add_attachment(info: Dict[str, Any]) -> None:
            build_attachment(
                realm_id=realm_id,
                message_ids=info['message_ids'],
                attachment_id=NEXT_ID('attachment'),
                user_id=info['sender_id'],
                fileinfo=dict(
                    created=info['mtime'],  # minor lie
                    size=info['size'],
                    name=info['name'],
                ),
                s3_path=info['target_path'],
                zerver_attachment=attachments,
            )

        def add_upload(info: Dict[str, Any]) -> None:
            target_path = info['target_path']
            upload_rec = dict(
                size=info['size'],
                user_profile_id=info['sender_id'],
                realm_id=realm_id,
                s3_path=target_path,
                path=target_path,
                content_type=None,
                last_modified=None,
            )
            uploads_records.append(upload_rec)

        def make_full_target_path(info: Dict[str, Any]) -> str:
            target_path = info['target_path']
            full_target_path = os.path.join(
                output_dir,
                'uploads',
                target_path,
            )
            full_target_path = os.path.abspath(full_target_path)
            os.makedirs(os.path.dirname(full_target_path), exist_ok=True)
            return full_target_path

        def copy_file(info: Dict[str, Any]) -> None:
            source_path = info['local_fn']
            target_path = make_full_target_path(info)
            shutil.copyfile(source_path, target_path)

        logging.info('Start processing attachment files')

        for info in self.info_dict.values():
            add_attachment(info)
            add_upload(info)
            copy_file(info)

        uploads_folder = os.path.join(output_dir, 'uploads')
        os.makedirs(os.path.join(uploads_folder, str(realm_id)), exist_ok=True)

        attachment = dict(zerver_attachment=attachments)

        create_converted_data_files(uploads_records, output_dir,
                                    '/uploads/records.json')
        create_converted_data_files(attachment, output_dir, '/attachment.json')

        logging.info('Done processing attachment files')
Ejemplo n.º 12
0
def process_raw_message_batch(realm_id: int, raw_messages: List[Dict[str,
                                                                     Any]],
                              subscriber_map: Dict[int, Set[int]],
                              user_id_mapper: IdMapper,
                              user_handler: UserHandler,
                              attachment_handler: AttachmentHandler,
                              get_recipient_id: Callable[[ZerverFieldsT], int],
                              is_pm_data: bool, output_dir: str) -> None:
    def fix_mentions(content: str, mention_user_ids: Set[int]) -> str:
        for user_id in mention_user_ids:
            user = user_handler.get_user(user_id=user_id)
            hipchat_mention = '@{short_name}'.format(**user)
            zulip_mention = '@**{full_name}**'.format(**user)
            content = content.replace(hipchat_mention, zulip_mention)

        content = content.replace('@here', '@**all**')
        return content

    mention_map = dict()  # type: Dict[int, Set[int]]

    def make_message(message_id: int,
                     raw_message: ZerverFieldsT) -> ZerverFieldsT:
        # One side effect here:
        mention_user_ids = {
            user_id_mapper.get(id)
            for id in set(raw_message['mention_user_ids'])
            if user_id_mapper.has(id)
        }
        mention_map[message_id] = mention_user_ids

        content = fix_mentions(
            content=raw_message['content'],
            mention_user_ids=mention_user_ids,
        )
        pub_date = raw_message['pub_date']
        recipient_id = get_recipient_id(raw_message)
        rendered_content = None

        if is_pm_data:
            topic_name = ''
        else:
            topic_name = 'imported from hipchat'
        user_id = raw_message['sender_id']

        # Another side effect:
        extra_content = attachment_handler.handle_message_data(
            realm_id=realm_id,
            message_id=message_id,
            sender_id=user_id,
            attachment=raw_message['attachment'],
            files_dir=raw_message['files_dir'],
        )

        if extra_content:
            has_attachment = True
            content += '\n' + extra_content
        else:
            has_attachment = False

        return build_message(
            content=content,
            message_id=message_id,
            pub_date=pub_date,
            recipient_id=recipient_id,
            rendered_content=rendered_content,
            topic_name=topic_name,
            user_id=user_id,
            has_attachment=has_attachment,
        )

    zerver_message = [
        make_message(message_id=NEXT_ID('message'), raw_message=raw_message)
        for raw_message in raw_messages
    ]

    zerver_usermessage = make_user_messages(
        zerver_message=zerver_message,
        subscriber_map=subscriber_map,
        is_pm_data=is_pm_data,
        mention_map=mention_map,
    )

    message_json = dict(
        zerver_message=zerver_message,
        zerver_usermessage=zerver_usermessage,
    )

    dump_file_id = NEXT_ID('dump_file_id')
    message_file = "/messages-%06d.json" % (dump_file_id, )
    create_converted_data_files(message_json, output_dir, message_file)
Ejemplo n.º 13
0
def do_convert_data(slack_zip_file: str, output_dir: str, token: str, threads: int = 6) -> None:
    # Subdomain is set by the user while running the import command
    realm_subdomain = ""
    realm_id = 0
    domain_name = settings.EXTERNAL_HOST

    check_token_access(token)

    slack_data_dir = slack_zip_file.replace(".zip", "")
    if not os.path.exists(slack_data_dir):
        os.makedirs(slack_data_dir)

    os.makedirs(output_dir, exist_ok=True)
    if os.listdir(output_dir):
        raise Exception("Output directory should be empty!")

    subprocess.check_call(["unzip", "-q", slack_zip_file, "-d", slack_data_dir])

    # We get the user data from the legacy token method of Slack API, which is depreciated
    # but we use it as the user email data is provided only in this method
    user_list = get_slack_api_data("https://slack.com/api/users.list", "members", token=token)
    fetch_shared_channel_users(user_list, slack_data_dir, token)

    custom_emoji_list = get_slack_api_data("https://slack.com/api/emoji.list", "emoji", token=token)

    (
        realm,
        slack_user_id_to_zulip_user_id,
        slack_recipient_name_to_zulip_recipient_id,
        added_channels,
        added_mpims,
        dm_members,
        avatar_list,
        emoji_url_map,
    ) = slack_workspace_to_realm(
        domain_name, realm_id, user_list, realm_subdomain, slack_data_dir, custom_emoji_list
    )

    reactions, uploads_list, zerver_attachment = convert_slack_workspace_messages(
        slack_data_dir,
        user_list,
        realm_id,
        slack_user_id_to_zulip_user_id,
        slack_recipient_name_to_zulip_recipient_id,
        added_channels,
        added_mpims,
        dm_members,
        realm,
        realm["zerver_userprofile"],
        realm["zerver_realmemoji"],
        domain_name,
        output_dir,
    )

    # Move zerver_reactions to realm.json file
    realm["zerver_reaction"] = reactions

    emoji_folder = os.path.join(output_dir, "emoji")
    os.makedirs(emoji_folder, exist_ok=True)
    emoji_records = process_emojis(realm["zerver_realmemoji"], emoji_folder, emoji_url_map, threads)

    avatar_folder = os.path.join(output_dir, "avatars")
    avatar_realm_folder = os.path.join(avatar_folder, str(realm_id))
    os.makedirs(avatar_realm_folder, exist_ok=True)
    avatar_records = process_avatars(
        avatar_list, avatar_folder, realm_id, threads, size_url_suffix="-512"
    )

    uploads_folder = os.path.join(output_dir, "uploads")
    os.makedirs(os.path.join(uploads_folder, str(realm_id)), exist_ok=True)
    uploads_records = process_uploads(uploads_list, uploads_folder, threads)
    attachment = {"zerver_attachment": zerver_attachment}

    team_info_dict = get_slack_api_data("https://slack.com/api/team.info", "team", token=token)
    realm_icons_folder = os.path.join(output_dir, "realm_icons")
    realm_icon_records = fetch_team_icons(
        realm["zerver_realm"][0], team_info_dict, realm_icons_folder
    )

    create_converted_data_files(realm, output_dir, "/realm.json")
    create_converted_data_files(emoji_records, output_dir, "/emoji/records.json")
    create_converted_data_files(avatar_records, output_dir, "/avatars/records.json")
    create_converted_data_files(uploads_records, output_dir, "/uploads/records.json")
    create_converted_data_files(attachment, output_dir, "/attachment.json")
    create_converted_data_files(realm_icon_records, output_dir, "/realm_icons/records.json")

    rm_tree(slack_data_dir)
    subprocess.check_call(["tar", "-czf", output_dir + ".tar.gz", output_dir, "-P"])

    logging.info("######### DATA CONVERSION FINISHED #########\n")
    logging.info("Zulip data dump created at %s", output_dir)
Ejemplo n.º 14
0
def do_convert_data(slack_zip_file: str, output_dir: str, token: str, threads: int=6) -> None:
    # Subdomain is set by the user while running the import command
    realm_subdomain = ""
    realm_id = 0
    domain_name = settings.EXTERNAL_HOST

    slack_data_dir = slack_zip_file.replace('.zip', '')
    if not os.path.exists(slack_data_dir):
        os.makedirs(slack_data_dir)

    os.makedirs(output_dir, exist_ok=True)
    # output directory should be empty initially
    if os.listdir(output_dir):
        raise Exception('Output directory should be empty!')

    subprocess.check_call(['unzip', '-q', slack_zip_file, '-d', slack_data_dir])
    # with zipfile.ZipFile(slack_zip_file, 'r') as zip_ref:
    #     zip_ref.extractall(slack_data_dir)

    # We get the user data from the legacy token method of slack api, which is depreciated
    # but we use it as the user email data is provided only in this method
    user_list = get_slack_api_data(token, "https://slack.com/api/users.list", "members")
    # Get custom emoji from slack api
    custom_emoji_list = get_slack_api_data(token, "https://slack.com/api/emoji.list", "emoji")

    realm, added_users, added_recipient, added_channels, avatar_list, \
        emoji_url_map = slack_workspace_to_realm(domain_name, realm_id, user_list,
                                                 realm_subdomain,
                                                 slack_data_dir, custom_emoji_list)

    reactions, uploads_list, zerver_attachment = convert_slack_workspace_messages(
        slack_data_dir, user_list, realm_id, added_users, added_recipient, added_channels,
        realm, realm['zerver_userprofile'], realm['zerver_realmemoji'], domain_name, output_dir)

    # Move zerver_reactions to realm.json file
    realm['zerver_reaction'] = reactions

    emoji_folder = os.path.join(output_dir, 'emoji')
    os.makedirs(emoji_folder, exist_ok=True)
    emoji_records = process_emojis(realm['zerver_realmemoji'], emoji_folder, emoji_url_map, threads)

    avatar_folder = os.path.join(output_dir, 'avatars')
    avatar_realm_folder = os.path.join(avatar_folder, str(realm_id))
    os.makedirs(avatar_realm_folder, exist_ok=True)
    avatar_records = process_avatars(avatar_list, avatar_folder, realm_id, threads, size_url_suffix='-512')

    uploads_folder = os.path.join(output_dir, 'uploads')
    os.makedirs(os.path.join(uploads_folder, str(realm_id)), exist_ok=True)
    uploads_records = process_uploads(uploads_list, uploads_folder, threads)
    attachment = {"zerver_attachment": zerver_attachment}

    # IO realm.json
    create_converted_data_files(realm, output_dir, '/realm.json')
    # IO emoji records
    create_converted_data_files(emoji_records, output_dir, '/emoji/records.json')
    # IO avatar records
    create_converted_data_files(avatar_records, output_dir, '/avatars/records.json')
    # IO uploads records
    create_converted_data_files(uploads_records, output_dir, '/uploads/records.json')
    # IO attachments records
    create_converted_data_files(attachment, output_dir, '/attachment.json')

    # remove slack dir
    rm_tree(slack_data_dir)
    subprocess.check_call(["tar", "-czf", output_dir + '.tar.gz', output_dir, '-P'])

    logging.info('######### DATA CONVERSION FINISHED #########\n')
    logging.info("Zulip data dump created at %s" % (output_dir))
Ejemplo n.º 15
0
def convert_slack_workspace_messages(slack_data_dir: str, users: List[ZerverFieldsT], realm_id: int,
                                     added_users: AddedUsersT, added_recipient: AddedRecipientsT,
                                     added_channels: AddedChannelsT, realm: ZerverFieldsT,
                                     zerver_userprofile: List[ZerverFieldsT],
                                     zerver_realmemoji: List[ZerverFieldsT], domain_name: str,
                                     output_dir: str,
                                     chunk_size: int=MESSAGE_BATCH_CHUNK_SIZE) -> Tuple[List[ZerverFieldsT],
                                                                                        List[ZerverFieldsT],
                                                                                        List[ZerverFieldsT]]:
    """
    Returns:
    1. reactions, which is a list of the reactions
    2. uploads, which is a list of uploads to be mapped in uploads records.json
    3. attachment, which is a list of the attachments
    """

    long_term_idle = process_long_term_idle_users(slack_data_dir, users, added_users,
                                                  added_channels, zerver_userprofile)

    # Now, we actually import the messages.
    all_messages = get_messages_iterator(slack_data_dir, added_channels)
    logging.info('######### IMPORTING MESSAGES STARTED #########\n')

    total_reactions = []  # type: List[ZerverFieldsT]
    total_attachments = []  # type: List[ZerverFieldsT]
    total_uploads = []  # type: List[ZerverFieldsT]

    # The messages are stored in batches
    dump_file_id = 1

    subscriber_map = make_subscriber_map(
        zerver_subscription=realm['zerver_subscription'],
    )

    while True:
        message_data = []
        _counter = 0
        for msg in all_messages:
            _counter += 1
            message_data.append(msg)
            if _counter == chunk_size:
                break
        if len(message_data) == 0:
            break

        zerver_message, zerver_usermessage, attachment, uploads, reactions = \
            channel_message_to_zerver_message(
                realm_id, users, added_users, added_recipient, message_data,
                zerver_realmemoji, subscriber_map, added_channels,
                domain_name, long_term_idle)

        message_json = dict(
            zerver_message=zerver_message,
            zerver_usermessage=zerver_usermessage)

        message_file = "/messages-%06d.json" % (dump_file_id,)
        logging.info("Writing Messages to %s\n" % (output_dir + message_file))
        create_converted_data_files(message_json, output_dir, message_file)

        total_reactions += reactions
        total_attachments += attachment
        total_uploads += uploads

        dump_file_id += 1

    logging.info('######### IMPORTING MESSAGES FINISHED #########\n')
    return total_reactions, total_uploads, total_attachments
Ejemplo n.º 16
0
def do_convert_data(input_tar_file: str, output_dir: str) -> None:
    input_data_dir = untar_input_file(input_tar_file)

    attachment_handler = AttachmentHandler()
    user_handler = UserHandler()

    realm_id = 0
    realm = make_realm(realm_id=realm_id)

    # users.json -> UserProfile
    raw_user_data = read_user_data(data_dir=input_data_dir)
    convert_user_data(
        user_handler=user_handler,
        raw_data=raw_user_data,
        realm_id=realm_id,
    )
    normal_users = user_handler.get_normal_users()
    # Don't write zerver_userprofile here, because we
    # may add more users later.

    # streams.json -> Stream
    raw_stream_data = read_room_data(data_dir=input_data_dir)
    zerver_stream = convert_room_data(
        raw_data=raw_stream_data,
        realm_id=realm_id,
    )
    realm['zerver_stream'] = zerver_stream

    zerver_recipient = build_recipients(
        zerver_userprofile=normal_users,
        zerver_stream=zerver_stream,
    )
    realm['zerver_recipient'] = zerver_recipient

    zerver_subscription = build_subscriptions(
        zerver_userprofile=normal_users,
        zerver_recipient=zerver_recipient,
        zerver_stream=zerver_stream,
    )
    realm['zerver_subscription'] = zerver_subscription

    zerver_realmemoji = write_emoticon_data(
        realm_id=realm_id,
        data_dir=input_data_dir,
        output_dir=output_dir,
    )
    realm['zerver_realmemoji'] = zerver_realmemoji

    logging.info('Start importing message data')
    for message_key in ['UserMessage',
                        'PrivateUserMessage']:
        write_message_data(
            realm_id=realm_id,
            message_key=message_key,
            zerver_recipient=zerver_recipient,
            zerver_subscription=zerver_subscription,
            data_dir=input_data_dir,
            output_dir=output_dir,
            user_handler=user_handler,
            attachment_handler=attachment_handler,
        )

    # Order is important here...don't write users until
    # we process everything else, since we may introduce
    # mirror users when processing messages.
    realm['zerver_userprofile'] = user_handler.get_all_users()
    create_converted_data_files(realm, output_dir, '/realm.json')

    logging.info('Start importing avatar data')
    write_avatar_data(
        raw_user_data=raw_user_data,
        output_dir=output_dir,
        realm_id=realm_id,
    )

    attachment_handler.write_info(
        output_dir=output_dir,
        realm_id=realm_id,
    )

    logging.info('Start making tarball')
    subprocess.check_call(["tar", "-czf", output_dir + '.tar.gz', output_dir, '-P'])
    logging.info('Done making tarball')
Ejemplo n.º 17
0
def do_convert_data(rocketchat_data_dir: str, output_dir: str) -> None:
    # Get all required exported data in a dictionary
    rocketchat_data = rocketchat_data_to_dict(rocketchat_data_dir)

    # Subdomain is set by the user while running the import command
    realm_subdomain = ""
    realm_id = 0
    domain_name = settings.EXTERNAL_HOST

    realm = make_realm(realm_id, realm_subdomain, domain_name,
                       rocketchat_data["instance"][0])

    user_id_to_user_map: Dict[str, Dict[str, Any]] = map_user_id_to_user(
        rocketchat_data["user"])
    username_to_user_id_map: Dict[str, str] = map_username_to_user_id(
        user_id_to_user_map)

    user_handler = UserHandler()
    subscriber_handler = SubscriberHandler()
    user_id_mapper = IdMapper()
    stream_id_mapper = IdMapper()
    huddle_id_mapper = IdMapper()

    process_users(
        user_id_to_user_map=user_id_to_user_map,
        realm_id=realm_id,
        domain_name=domain_name,
        user_handler=user_handler,
        user_id_mapper=user_id_mapper,
    )

    room_id_to_room_map: Dict[str, Dict[str, Any]] = {}
    team_id_to_team_map: Dict[str, Dict[str, Any]] = {}
    dsc_id_to_dsc_map: Dict[str, Dict[str, Any]] = {}
    direct_id_to_direct_map: Dict[str, Dict[str, Any]] = {}
    huddle_id_to_huddle_map: Dict[str, Dict[str, Any]] = {}

    categorize_channels_and_map_with_id(
        channel_data=rocketchat_data["room"],
        room_id_to_room_map=room_id_to_room_map,
        team_id_to_team_map=team_id_to_team_map,
        dsc_id_to_dsc_map=dsc_id_to_dsc_map,
        direct_id_to_direct_map=direct_id_to_direct_map,
        huddle_id_to_huddle_map=huddle_id_to_huddle_map,
    )

    zerver_stream = convert_channel_data(
        room_id_to_room_map=room_id_to_room_map,
        team_id_to_team_map=team_id_to_team_map,
        stream_id_mapper=stream_id_mapper,
        realm_id=realm_id,
    )
    realm["zerver_stream"] = zerver_stream

    # Add stream subscription data to `subscriber_handler`
    convert_stream_subscription_data(
        user_id_to_user_map=user_id_to_user_map,
        dsc_id_to_dsc_map=dsc_id_to_dsc_map,
        zerver_stream=zerver_stream,
        stream_id_mapper=stream_id_mapper,
        user_id_mapper=user_id_mapper,
        subscriber_handler=subscriber_handler,
    )

    zerver_huddle = convert_huddle_data(
        huddle_id_to_huddle_map=huddle_id_to_huddle_map,
        huddle_id_mapper=huddle_id_mapper,
        user_id_mapper=user_id_mapper,
        subscriber_handler=subscriber_handler,
    )
    realm["zerver_huddle"] = zerver_huddle

    all_users = user_handler.get_all_users()

    zerver_recipient = build_recipients(
        zerver_userprofile=all_users,
        zerver_stream=zerver_stream,
        zerver_huddle=zerver_huddle,
    )
    realm["zerver_recipient"] = zerver_recipient

    stream_subscriptions = build_stream_subscriptions(
        get_users=subscriber_handler.get_users,
        zerver_recipient=zerver_recipient,
        zerver_stream=zerver_stream,
    )

    huddle_subscriptions = build_huddle_subscriptions(
        get_users=subscriber_handler.get_users,
        zerver_recipient=zerver_recipient,
        zerver_huddle=zerver_huddle,
    )

    personal_subscriptions = build_personal_subscriptions(
        zerver_recipient=zerver_recipient, )

    zerver_subscription = personal_subscriptions + stream_subscriptions + huddle_subscriptions
    realm["zerver_subscription"] = zerver_subscription

    zerver_realmemoji = build_custom_emoji(
        realm_id=realm_id,
        custom_emoji_data=rocketchat_data["custom_emoji"],
        output_dir=output_dir,
    )
    realm["zerver_realmemoji"] = zerver_realmemoji

    subscriber_map = make_subscriber_map(
        zerver_subscription=zerver_subscription, )

    stream_id_to_recipient_id: Dict[int, int] = {}
    huddle_id_to_recipient_id: Dict[int, int] = {}
    user_id_to_recipient_id: Dict[int, int] = {}

    map_receiver_id_to_recipient_id(
        zerver_recipient=zerver_recipient,
        stream_id_to_recipient_id=stream_id_to_recipient_id,
        huddle_id_to_recipient_id=huddle_id_to_recipient_id,
        user_id_to_recipient_id=user_id_to_recipient_id,
    )

    channel_messages: List[Dict[str, Any]] = []
    private_messages: List[Dict[str, Any]] = []

    separate_channel_and_private_messages(
        messages=rocketchat_data["message"],
        direct_id_to_direct_map=direct_id_to_direct_map,
        huddle_id_to_huddle_map=huddle_id_to_huddle_map,
        channel_messages=channel_messages,
        private_messages=private_messages,
    )

    total_reactions: List[ZerverFieldsT] = []
    uploads_list: List[ZerverFieldsT] = []
    zerver_attachment: List[ZerverFieldsT] = []

    upload_id_to_upload_data_map = map_upload_id_to_upload_data(
        rocketchat_data["upload"])

    # Process channel messages
    process_messages(
        realm_id=realm_id,
        messages=channel_messages,
        subscriber_map=subscriber_map,
        is_pm_data=False,
        username_to_user_id_map=username_to_user_id_map,
        user_id_mapper=user_id_mapper,
        user_handler=user_handler,
        user_id_to_recipient_id=user_id_to_recipient_id,
        stream_id_mapper=stream_id_mapper,
        stream_id_to_recipient_id=stream_id_to_recipient_id,
        huddle_id_mapper=huddle_id_mapper,
        huddle_id_to_recipient_id=huddle_id_to_recipient_id,
        room_id_to_room_map=room_id_to_room_map,
        dsc_id_to_dsc_map=dsc_id_to_dsc_map,
        direct_id_to_direct_map=direct_id_to_direct_map,
        huddle_id_to_huddle_map=huddle_id_to_huddle_map,
        zerver_realmemoji=zerver_realmemoji,
        total_reactions=total_reactions,
        uploads_list=uploads_list,
        zerver_attachment=zerver_attachment,
        upload_id_to_upload_data_map=upload_id_to_upload_data_map,
        output_dir=output_dir,
    )
    # Process private messages
    process_messages(
        realm_id=realm_id,
        messages=private_messages,
        subscriber_map=subscriber_map,
        is_pm_data=True,
        username_to_user_id_map=username_to_user_id_map,
        user_id_mapper=user_id_mapper,
        user_handler=user_handler,
        user_id_to_recipient_id=user_id_to_recipient_id,
        stream_id_mapper=stream_id_mapper,
        stream_id_to_recipient_id=stream_id_to_recipient_id,
        huddle_id_mapper=huddle_id_mapper,
        huddle_id_to_recipient_id=huddle_id_to_recipient_id,
        room_id_to_room_map=room_id_to_room_map,
        dsc_id_to_dsc_map=dsc_id_to_dsc_map,
        direct_id_to_direct_map=direct_id_to_direct_map,
        huddle_id_to_huddle_map=huddle_id_to_huddle_map,
        zerver_realmemoji=zerver_realmemoji,
        total_reactions=total_reactions,
        uploads_list=uploads_list,
        zerver_attachment=zerver_attachment,
        upload_id_to_upload_data_map=upload_id_to_upload_data_map,
        output_dir=output_dir,
    )
    realm["zerver_reaction"] = total_reactions
    realm["zerver_userprofile"] = user_handler.get_all_users()
    realm["sort_by_date"] = True

    create_converted_data_files(realm, output_dir, "/realm.json")
    # TODO: Add support for importing avatars
    create_converted_data_files([], output_dir, "/avatars/records.json")

    # Import attachments
    attachment: Dict[str, List[Any]] = {"zerver_attachment": zerver_attachment}
    create_converted_data_files(attachment, output_dir, "/attachment.json")
    create_converted_data_files(uploads_list, output_dir,
                                "/uploads/records.json")

    logging.info("Start making tarball")
    subprocess.check_call(
        ["tar", "-czf", output_dir + ".tar.gz", output_dir, "-P"])
    logging.info("Done making tarball")
Ejemplo n.º 18
0
def process_message_file(fn: str,
                         get_recipient_id: Callable[[ZerverFieldsT], int],
                         message_key: str,
                         user_map: Dict[int, ZerverFieldsT],
                         zerver_subscription: List[ZerverFieldsT],
                         data_dir: str,
                         output_dir: str) -> None:

    def fix_mentions(content: str,
                     mention_user_ids: List[int]) -> str:
        for user_id in mention_user_ids:
            user = user_map[user_id]
            hipchat_mention = '@{short_name}'.format(**user)
            zulip_mention = '@**{full_name}**'.format(**user)
            content = content.replace(hipchat_mention, zulip_mention)

        content = content.replace('@here', '@**all**')
        return content

    def get_raw_messages(fn: str) -> List[ZerverFieldsT]:
        dir = os.path.dirname(fn)
        fn_id = int(os.path.basename(dir))
        data = json.load(open(fn))

        flat_data = [
            d[message_key]
            for d in data
            if message_key in d
        ]

        return [
            dict(
                fn_id=fn_id,
                sender_id=d['sender']['id'],
                receiver_id=d.get('receiver', {}).get('id'),
                content=d['message'],
                mention_user_ids=d['mentions'],
                pub_date=str_date_to_float(d['timestamp']),
            )
            for d in flat_data
        ]

    raw_messages = get_raw_messages(fn)

    mention_map = dict()  # type: Dict[int, Set[int]]

    def make_message(message_id: int, raw_message: ZerverFieldsT) -> ZerverFieldsT:
        # One side effect here:
        mention_map[message_id] = set(raw_message['mention_user_ids'])

        content = fix_mentions(
            content=raw_message['content'],
            mention_user_ids=raw_message['mention_user_ids'],
        )
        pub_date = raw_message['pub_date']
        recipient_id = get_recipient_id(raw_message)
        rendered_content = None
        subject = 'archived'
        user_id = raw_message['sender_id']

        return build_message(
            content=content,
            message_id=message_id,
            pub_date=pub_date,
            recipient_id=recipient_id,
            rendered_content=rendered_content,
            subject=subject,
            user_id=user_id,
        )

    zerver_message = [
        make_message(
            message_id=NEXT_ID('message'),
            raw_message=raw_message
        )
        for raw_message in raw_messages
    ]

    subscriber_map = dict()  # type: Dict[int, Set[int]]
    for sub in zerver_subscription:
        user_id = sub['user_profile']
        recipient_id = sub['recipient']
        if recipient_id not in subscriber_map:
            subscriber_map[recipient_id] = set()
        subscriber_map[recipient_id].add(user_id)

    zerver_usermessage = []

    for message in zerver_message:
        message_id = message['id']
        recipient_id = message['recipient']
        mention_user_ids = mention_map[message_id]
        user_ids = subscriber_map.get(recipient_id, set())
        for user_id in user_ids:
            is_mentioned = user_id in mention_user_ids
            user_message = build_user_message(
                id=NEXT_ID('user_message'),
                user_id=user_id,
                message_id=message_id,
                is_mentioned=is_mentioned,
            )
            zerver_usermessage.append(user_message)

    message_json = dict(
        zerver_message=zerver_message,
        zerver_usermessage=zerver_usermessage,
    )

    dump_file_id = NEXT_ID('dump_file_id')
    message_file = "/messages-%06d.json" % (dump_file_id,)
    create_converted_data_files(message_json, output_dir, message_file)
Ejemplo n.º 19
0
def process_raw_message_batch(realm_id: int,
                              raw_messages: List[Dict[str, Any]],
                              subscriber_map: Dict[int, Set[int]],
                              user_id_mapper: IdMapper,
                              user_handler: UserHandler,
                              get_recipient_id_from_receiver_name: Callable[[str, int], int],
                              is_pm_data: bool,
                              output_dir: str,
                              zerver_realmemoji: List[Dict[str, Any]],
                              total_reactions: List[Dict[str, Any]],
                              ) -> None:

    def fix_mentions(content: str, mention_user_ids: Set[int]) -> str:
        for user_id in mention_user_ids:
            user = user_handler.get_user(user_id=user_id)
            mattermost_mention = '@{short_name}'.format(**user)
            zulip_mention = '@**{full_name}**'.format(**user)
            content = content.replace(mattermost_mention, zulip_mention)

        content = content.replace('@channel', '@**all**')
        content = content.replace('@all', '@**all**')
        # We don't have an equivalent for Mattermost's @here mention which mentions all users
        # online in the channel.
        content = content.replace('@here', '@**all**')
        return content

    mention_map: Dict[int, Set[int]] = dict()
    zerver_message = []

    import html2text
    h = html2text.HTML2Text()

    pm_members = {}

    for raw_message in raw_messages:
        message_id = NEXT_ID('message')
        mention_user_ids = get_mentioned_user_ids(raw_message, user_id_mapper)
        mention_map[message_id] = mention_user_ids

        content = fix_mentions(
            content=raw_message['content'],
            mention_user_ids=mention_user_ids,
        )
        content = h.handle(content)

        if len(content) > 10000:  # nocoverage
            logging.info('skipping too-long message of length %s' % (len(content),))
            continue

        date_sent = raw_message['date_sent']
        sender_user_id = raw_message['sender_id']
        if "channel_name" in raw_message:
            recipient_id = get_recipient_id_from_receiver_name(raw_message["channel_name"], Recipient.STREAM)
        elif "huddle_name" in raw_message:
            recipient_id = get_recipient_id_from_receiver_name(raw_message["huddle_name"], Recipient.HUDDLE)
        elif "pm_members" in raw_message:
            members = raw_message["pm_members"]
            member_ids = {user_id_mapper.get(member) for member in members}
            pm_members[message_id] = member_ids
            if sender_user_id == user_id_mapper.get(members[0]):
                recipient_id = get_recipient_id_from_receiver_name(members[1], Recipient.PERSONAL)
            else:
                recipient_id = get_recipient_id_from_receiver_name(members[0], Recipient.PERSONAL)
        else:
            raise AssertionError("raw_message without channel_name, huddle_name or pm_members key")

        rendered_content = None

        topic_name = 'imported from mattermost'

        message = build_message(
            content=content,
            message_id=message_id,
            date_sent=date_sent,
            recipient_id=recipient_id,
            rendered_content=rendered_content,
            topic_name=topic_name,
            user_id=sender_user_id,
            has_attachment=False,
        )
        zerver_message.append(message)
        build_reactions(realm_id, total_reactions, raw_message["reactions"], message_id,
                        user_id_mapper, zerver_realmemoji)

    zerver_usermessage = make_user_messages(
        zerver_message=zerver_message,
        subscriber_map=subscriber_map,
        is_pm_data=is_pm_data,
        mention_map=mention_map,
    )

    message_json = dict(
        zerver_message=zerver_message,
        zerver_usermessage=zerver_usermessage,
    )

    dump_file_id = NEXT_ID('dump_file_id' + str(realm_id))
    message_file = "/messages-%06d.json" % (dump_file_id,)
    create_converted_data_files(message_json, output_dir, message_file)
Ejemplo n.º 20
0
def write_emoticon_data(
    realm_id: int, custom_emoji_data: List[Dict[str, Any]], data_dir: str, output_dir: str
) -> List[ZerverFieldsT]:
    """
    This function does most of the work for processing emoticons, the bulk
    of which is copying files.  We also write a json file with metadata.
    Finally, we return a list of RealmEmoji dicts to our caller.

    In our data_dir we have a pretty simple setup:

        The exported JSON file will have emoji rows if it contains any custom emoji
            {
                "type": "emoji",
                "emoji": {"name": "peerdium", "image": "exported_emoji/h15ni7kf1bnj7jeua4qhmctsdo/image"}
            }
            {
                "type": "emoji",
                "emoji": {"name": "tick", "image": "exported_emoji/7u7x8ytgp78q8jir81o9ejwwnr/image"}
            }

        exported_emoji/ - contains a bunch of image files:
            exported_emoji/7u7x8ytgp78q8jir81o9ejwwnr/image
            exported_emoji/h15ni7kf1bnj7jeua4qhmctsdo/image

    We move all the relevant files to Zulip's more nested
    directory structure.
    """

    logging.info("Starting to process emoticons")

    flat_data = [
        dict(
            path=d["image"],
            name=d["name"],
        )
        for d in custom_emoji_data
    ]

    emoji_folder = os.path.join(output_dir, "emoji")
    os.makedirs(emoji_folder, exist_ok=True)

    def process(data: ZerverFieldsT) -> ZerverFieldsT:
        source_sub_path = data["path"]
        source_path = os.path.join(data_dir, source_sub_path)

        target_fn = data["name"]
        target_sub_path = RealmEmoji.PATH_ID_TEMPLATE.format(
            realm_id=realm_id,
            emoji_file_name=target_fn,
        )
        target_path = os.path.join(emoji_folder, target_sub_path)

        os.makedirs(os.path.dirname(target_path), exist_ok=True)

        source_path = os.path.abspath(source_path)
        target_path = os.path.abspath(target_path)

        shutil.copyfile(source_path, target_path)

        return dict(
            path=target_path,
            s3_path=target_path,
            file_name=target_fn,
            realm_id=realm_id,
            name=data["name"],
        )

    emoji_records = list(map(process, flat_data))
    create_converted_data_files(emoji_records, output_dir, "/emoji/records.json")

    realmemoji = [
        build_realm_emoji(
            realm_id=realm_id,
            name=rec["name"],
            id=NEXT_ID("realmemoji"),
            file_name=rec["file_name"],
        )
        for rec in emoji_records
    ]
    logging.info("Done processing emoticons")

    return realmemoji
Ejemplo n.º 21
0
def process_raw_message_batch(
    realm_id: int,
    raw_messages: List[Dict[str, Any]],
    subscriber_map: Dict[int, Set[int]],
    user_id_mapper: IdMapper,
    user_handler: UserHandler,
    get_recipient_id: Callable[[ZerverFieldsT], int],
    is_pm_data: bool,
    output_dir: str,
    zerver_realmemoji: List[Dict[str, Any]],
    total_reactions: List[Dict[str, Any]],
) -> None:
    def fix_mentions(content: str, mention_user_ids: Set[int]) -> str:
        for user_id in mention_user_ids:
            user = user_handler.get_user(user_id=user_id)
            mattermost_mention = '@{short_name}'.format(**user)
            zulip_mention = '@**{full_name}**'.format(**user)
            content = content.replace(mattermost_mention, zulip_mention)

        content = content.replace('@channel', '@**all**')
        content = content.replace('@all', '@**all**')
        # We don't have an equivalent for Mattermost's @here mention which mentions all users
        # online in the channel.
        content = content.replace('@here', '@**all**')
        return content

    mention_map = dict()  # type: Dict[int, Set[int]]
    zerver_message = []

    import html2text
    h = html2text.HTML2Text()

    name_to_codepoint = get_name_to_codepoint_dict()

    for raw_message in raw_messages:
        message_id = NEXT_ID('message')
        mention_user_ids = get_mentioned_user_ids(raw_message, user_id_mapper)
        mention_map[message_id] = mention_user_ids

        content = fix_mentions(
            content=raw_message['content'],
            mention_user_ids=mention_user_ids,
        )
        content = h.handle(content)

        if len(content) > 10000:  # nocoverage
            logging.info('skipping too-long message of length %s' %
                         (len(content), ))
            continue

        pub_date = raw_message['pub_date']
        try:
            recipient_id = get_recipient_id(raw_message)
        except KeyError:
            logging.debug(
                "Could not find recipient_id for a message, skipping.")
            continue

        rendered_content = None

        topic_name = 'imported from mattermost'
        user_id = raw_message['sender_id']

        message = build_message(
            content=content,
            message_id=message_id,
            pub_date=pub_date,
            recipient_id=recipient_id,
            rendered_content=rendered_content,
            topic_name=topic_name,
            user_id=user_id,
            has_attachment=False,
        )
        zerver_message.append(message)
        build_reactions(realm_id, total_reactions, raw_message["reactions"],
                        message_id, name_to_codepoint, user_id_mapper,
                        zerver_realmemoji)

    zerver_usermessage = make_user_messages(
        zerver_message=zerver_message,
        subscriber_map=subscriber_map,
        is_pm_data=is_pm_data,
        mention_map=mention_map,
    )

    message_json = dict(
        zerver_message=zerver_message,
        zerver_usermessage=zerver_usermessage,
    )

    dump_file_id = NEXT_ID('dump_file_id' + str(realm_id))
    message_file = "/messages-%06d.json" % (dump_file_id, )
    create_converted_data_files(message_json, output_dir, message_file)
Ejemplo n.º 22
0
def write_emoticon_data(realm_id: int, data_dir: str,
                        output_dir: str) -> List[ZerverFieldsT]:
    '''
    This function does most of the work for processing emoticons, the bulk
    of which is copying files.  We also write a json file with metadata.
    Finally, we return a list of RealmEmoji dicts to our caller.

    In our data_dir we have a pretty simple setup:

        emoticons.json - has very simple metadata on emojis:

          {
            "Emoticon": {
              "id": 9875487,
              "path": "emoticons/yasss.jpg",
              "shortcut": "yasss"
            }
          },
          {
            "Emoticon": {
              "id": 718017,
              "path": "emoticons/yayyyyy.gif",
              "shortcut": "yayyyyy"
            }
          }

        emoticons/ - contains a bunch of image files:

            slytherinsnake.gif
            spanishinquisition.jpg
            sparkle.png
            spiderman.gif
            stableparrot.gif
            stalkerparrot.gif
            supergirl.png
            superman.png

    We move all the relevant files to Zulip's more nested
    directory structure.
    '''

    logging.info('Starting to process emoticons')

    fn = 'emoticons.json'
    data_file = os.path.join(data_dir, fn)
    if not os.path.exists(data_file):
        logging.warning("HipChat export does not contain emoticons.json.")
        logging.warning("As a result, custom emoji cannot be imported.")
        return []

    with open(data_file) as f:
        data = ujson.load(f)

    if isinstance(data, dict) and 'Emoticons' in data:
        # Handle the hc-migrate export format for emoticons.json.
        flat_data = [
            dict(
                path=d['path'],
                name=d['shortcut'],
            ) for d in data['Emoticons']
        ]
    else:
        flat_data = [
            dict(
                path=d['Emoticon']['path'],
                name=d['Emoticon']['shortcut'],
            ) for d in data
        ]

    emoji_folder = os.path.join(output_dir, 'emoji')
    os.makedirs(emoji_folder, exist_ok=True)

    def process(data: ZerverFieldsT) -> ZerverFieldsT:
        source_sub_path = data['path']
        source_fn = os.path.basename(source_sub_path)
        source_path = os.path.join(data_dir, source_sub_path)

        # Use our template from RealmEmoji
        # PATH_ID_TEMPLATE = "{realm_id}/emoji/images/{emoji_file_name}"
        target_fn = source_fn
        target_sub_path = RealmEmoji.PATH_ID_TEMPLATE.format(
            realm_id=realm_id,
            emoji_file_name=target_fn,
        )
        target_path = os.path.join(emoji_folder, target_sub_path)

        os.makedirs(os.path.dirname(target_path), exist_ok=True)

        source_path = os.path.abspath(source_path)
        target_path = os.path.abspath(target_path)

        shutil.copyfile(source_path, target_path)

        return dict(
            path=target_path,
            s3_path=target_path,
            file_name=target_fn,
            realm_id=realm_id,
            name=data['name'],
        )

    emoji_records = list(map(process, flat_data))
    create_converted_data_files(emoji_records, output_dir,
                                '/emoji/records.json')

    realmemoji = [
        build_realm_emoji(
            realm_id=realm_id,
            name=rec['name'],
            id=NEXT_ID('realmemoji'),
            file_name=rec['file_name'],
        ) for rec in emoji_records
    ]
    logging.info('Done processing emoticons')

    return realmemoji
Ejemplo n.º 23
0
def process_raw_message_batch(
    realm_id: int,
    raw_messages: List[Dict[str, Any]],
    subscriber_map: Dict[int, Set[int]],
    user_id_mapper: IdMapper,
    user_handler: UserHandler,
    get_recipient_id_from_receiver_name: Callable[[str, int], int],
    is_pm_data: bool,
    output_dir: str,
    zerver_realmemoji: List[Dict[str, Any]],
    total_reactions: List[Dict[str, Any]],
    uploads_list: List[ZerverFieldsT],
    zerver_attachment: List[ZerverFieldsT],
    mattermost_data_dir: str,
) -> None:
    def fix_mentions(content: str, mention_user_ids: Set[int]) -> str:
        for user_id in mention_user_ids:
            user = user_handler.get_user(user_id=user_id)
            mattermost_mention = "@{short_name}".format(**user)
            zulip_mention = "@**{full_name}**".format(**user)
            content = content.replace(mattermost_mention, zulip_mention)

        content = content.replace("@channel", "@**all**")
        content = content.replace("@all", "@**all**")
        # We don't have an equivalent for Mattermost's @here mention which mentions all users
        # online in the channel.
        content = content.replace("@here", "@**all**")
        return content

    mention_map: Dict[int, Set[int]] = {}
    zerver_message = []

    import html2text

    h = html2text.HTML2Text()

    pm_members = {}

    for raw_message in raw_messages:
        message_id = NEXT_ID("message")
        mention_user_ids = get_mentioned_user_ids(raw_message, user_id_mapper)
        mention_map[message_id] = mention_user_ids

        content = fix_mentions(
            content=raw_message["content"],
            mention_user_ids=mention_user_ids,
        )
        content = h.handle(content)

        if len(content) > 10000:  # nocoverage
            logging.info("skipping too-long message of length %s", len(content))
            continue

        date_sent = raw_message["date_sent"]
        sender_user_id = raw_message["sender_id"]
        if "channel_name" in raw_message:
            recipient_id = get_recipient_id_from_receiver_name(
                raw_message["channel_name"], Recipient.STREAM
            )
        elif "huddle_name" in raw_message:
            recipient_id = get_recipient_id_from_receiver_name(
                raw_message["huddle_name"], Recipient.HUDDLE
            )
        elif "pm_members" in raw_message:
            members = raw_message["pm_members"]
            member_ids = {user_id_mapper.get(member) for member in members}
            pm_members[message_id] = member_ids
            if sender_user_id == user_id_mapper.get(members[0]):
                recipient_id = get_recipient_id_from_receiver_name(members[1], Recipient.PERSONAL)
            else:
                recipient_id = get_recipient_id_from_receiver_name(members[0], Recipient.PERSONAL)
        else:
            raise AssertionError("raw_message without channel_name, huddle_name or pm_members key")

        rendered_content = None

        has_attachment = False
        has_image = False
        has_link = False
        if "attachments" in raw_message:
            has_attachment = True
            has_link = True

            attachment_markdown, has_image = process_message_attachments(
                attachments=raw_message["attachments"],
                realm_id=realm_id,
                message_id=message_id,
                user_id=sender_user_id,
                user_handler=user_handler,
                zerver_attachment=zerver_attachment,
                uploads_list=uploads_list,
                mattermost_data_dir=mattermost_data_dir,
                output_dir=output_dir,
            )

            content += attachment_markdown

        topic_name = "imported from mattermost"

        message = build_message(
            content=content,
            message_id=message_id,
            date_sent=date_sent,
            recipient_id=recipient_id,
            rendered_content=rendered_content,
            topic_name=topic_name,
            user_id=sender_user_id,
            has_image=has_image,
            has_link=has_link,
            has_attachment=has_attachment,
        )
        zerver_message.append(message)
        build_reactions(
            realm_id,
            total_reactions,
            raw_message["reactions"],
            message_id,
            user_id_mapper,
            zerver_realmemoji,
        )

    zerver_usermessage = make_user_messages(
        zerver_message=zerver_message,
        subscriber_map=subscriber_map,
        is_pm_data=is_pm_data,
        mention_map=mention_map,
    )

    message_json = dict(
        zerver_message=zerver_message,
        zerver_usermessage=zerver_usermessage,
    )

    dump_file_id = NEXT_ID("dump_file_id" + str(realm_id))
    message_file = f"/messages-{dump_file_id:06}.json"
    create_converted_data_files(message_json, output_dir, message_file)
Ejemplo n.º 24
0
def do_convert_data(input_tar_file: str, output_dir: str) -> None:
    input_data_dir = untar_input_file(input_tar_file)

    realm_id = 0
    realm = make_realm(realm_id=realm_id)

    # users.json -> UserProfile
    raw_user_data = read_user_data(data_dir=input_data_dir)
    zerver_userprofile = convert_user_data(
        raw_data=raw_user_data,
        realm_id=realm_id,
    )
    realm['zerver_userprofile'] = zerver_userprofile

    # streams.json -> Stream
    raw_stream_data = read_room_data(data_dir=input_data_dir)
    zerver_stream = convert_room_data(
        raw_data=raw_stream_data,
        realm_id=realm_id,
    )
    realm['zerver_stream'] = zerver_stream

    zerver_recipient = build_recipients(
        zerver_userprofile=zerver_userprofile,
        zerver_stream=zerver_stream,
    )
    realm['zerver_recipient'] = zerver_recipient

    zerver_subscription = build_subscriptions(
        zerver_userprofile=zerver_userprofile,
        zerver_recipient=zerver_recipient,
        zerver_stream=zerver_stream,
    )
    realm['zerver_subscription'] = zerver_subscription

    zerver_realmemoji = write_emoticon_data(
        realm_id=realm_id,
        data_dir=input_data_dir,
        output_dir=output_dir,
    )
    realm['zerver_realmemoji'] = zerver_realmemoji

    create_converted_data_files(realm, output_dir, '/realm.json')

    for message_key in ['UserMessage',
                        'PrivateUserMessage']:
        write_message_data(
            realm_id=realm_id,
            message_key=message_key,
            zerver_recipient=zerver_recipient,
            zerver_subscription=zerver_subscription,
            zerver_userprofile=zerver_userprofile,
            data_dir=input_data_dir,
            output_dir=output_dir,
        )

    write_avatar_data(
        raw_user_data=raw_user_data,
        output_dir=output_dir,
        realm_id=realm_id,
    )

    write_upload_data(
        output_dir=output_dir,
        realm_id=realm_id,
    )

    subprocess.check_call(["tar", "-czf", output_dir + '.tar.gz', output_dir, '-P'])
Ejemplo n.º 25
0
def do_convert_data(mattermost_data_dir: str, output_dir: str, masking_content: bool) -> None:
    username_to_user: Dict[str, Dict[str, Any]] = {}

    os.makedirs(output_dir, exist_ok=True)
    if os.listdir(output_dir):  # nocoverage
        raise Exception("Output directory should be empty!")

    mattermost_data_file = os.path.join(mattermost_data_dir, "export.json")
    mattermost_data = mattermost_data_file_to_dict(mattermost_data_file)

    username_to_user = create_username_to_user_mapping(mattermost_data["user"])

    for team in mattermost_data["team"]:
        realm_id = NEXT_ID("realm_id")
        team_name = team["name"]

        user_handler = UserHandler()
        subscriber_handler = SubscriberHandler()
        user_id_mapper = IdMapper()
        stream_id_mapper = IdMapper()
        huddle_id_mapper = IdMapper()

        print("Generating data for", team_name)
        realm = make_realm(realm_id, team)
        realm_output_dir = os.path.join(output_dir, team_name)

        reset_mirror_dummy_users(username_to_user)
        label_mirror_dummy_users(
            len(mattermost_data["team"]), team_name, mattermost_data, username_to_user
        )

        convert_user_data(
            user_handler=user_handler,
            user_id_mapper=user_id_mapper,
            user_data_map=username_to_user,
            realm_id=realm_id,
            team_name=team_name,
        )

        zerver_stream = convert_channel_data(
            channel_data=mattermost_data["channel"],
            user_data_map=username_to_user,
            subscriber_handler=subscriber_handler,
            stream_id_mapper=stream_id_mapper,
            user_id_mapper=user_id_mapper,
            realm_id=realm_id,
            team_name=team_name,
        )
        realm["zerver_stream"] = zerver_stream

        zerver_huddle: List[ZerverFieldsT] = []
        if len(mattermost_data["team"]) == 1:
            zerver_huddle = convert_huddle_data(
                huddle_data=mattermost_data["direct_channel"],
                user_data_map=username_to_user,
                subscriber_handler=subscriber_handler,
                huddle_id_mapper=huddle_id_mapper,
                user_id_mapper=user_id_mapper,
                realm_id=realm_id,
                team_name=team_name,
            )
            realm["zerver_huddle"] = zerver_huddle

        all_users = user_handler.get_all_users()

        zerver_recipient = build_recipients(
            zerver_userprofile=all_users,
            zerver_stream=zerver_stream,
            zerver_huddle=zerver_huddle,
        )
        realm["zerver_recipient"] = zerver_recipient

        stream_subscriptions = build_stream_subscriptions(
            get_users=subscriber_handler.get_users,
            zerver_recipient=zerver_recipient,
            zerver_stream=zerver_stream,
        )

        huddle_subscriptions = build_huddle_subscriptions(
            get_users=subscriber_handler.get_users,
            zerver_recipient=zerver_recipient,
            zerver_huddle=zerver_huddle,
        )

        personal_subscriptions = build_personal_subscriptions(
            zerver_recipient=zerver_recipient,
        )

        # Mattermost currently supports only exporting messages from channels.
        # Personal messages and huddles are not exported.
        zerver_subscription = personal_subscriptions + stream_subscriptions + huddle_subscriptions
        realm["zerver_subscription"] = zerver_subscription

        zerver_realmemoji = write_emoticon_data(
            realm_id=realm_id,
            custom_emoji_data=mattermost_data["emoji"],
            data_dir=mattermost_data_dir,
            output_dir=realm_output_dir,
        )
        realm["zerver_realmemoji"] = zerver_realmemoji

        subscriber_map = make_subscriber_map(
            zerver_subscription=zerver_subscription,
        )

        total_reactions: List[Dict[str, Any]] = []
        uploads_list: List[ZerverFieldsT] = []
        zerver_attachment: List[ZerverFieldsT] = []

        write_message_data(
            num_teams=len(mattermost_data["team"]),
            team_name=team_name,
            realm_id=realm_id,
            post_data=mattermost_data["post"],
            zerver_recipient=zerver_recipient,
            subscriber_map=subscriber_map,
            output_dir=realm_output_dir,
            masking_content=masking_content,
            stream_id_mapper=stream_id_mapper,
            huddle_id_mapper=huddle_id_mapper,
            user_id_mapper=user_id_mapper,
            user_handler=user_handler,
            zerver_realmemoji=zerver_realmemoji,
            total_reactions=total_reactions,
            uploads_list=uploads_list,
            zerver_attachment=zerver_attachment,
            mattermost_data_dir=mattermost_data_dir,
        )
        realm["zerver_reaction"] = total_reactions
        realm["zerver_userprofile"] = user_handler.get_all_users()
        realm["sort_by_date"] = True

        create_converted_data_files(realm, realm_output_dir, "/realm.json")
        # Mattermost currently doesn't support exporting avatars
        create_converted_data_files([], realm_output_dir, "/avatars/records.json")

        # Export message attachments
        attachment: Dict[str, List[Any]] = {"zerver_attachment": zerver_attachment}
        create_converted_data_files(uploads_list, realm_output_dir, "/uploads/records.json")
        create_converted_data_files(attachment, realm_output_dir, "/attachment.json")

        logging.info("Start making tarball")
        subprocess.check_call(["tar", "-czf", realm_output_dir + ".tar.gz", realm_output_dir, "-P"])
        logging.info("Done making tarball")
Ejemplo n.º 26
0
def write_emoticon_data(realm_id: int,
                        data_dir: str,
                        output_dir: str) -> List[ZerverFieldsT]:
    '''
    This function does most of the work for processing emoticons, the bulk
    of which is copying files.  We also write a json file with metadata.
    Finally, we return a list of RealmEmoji dicts to our caller.

    In our data_dir we have a pretty simple setup:

        emoticons.json - has very simple metadata on emojis:

          {
            "Emoticon": {
              "id": 9875487,
              "path": "emoticons/yasss.jpg",
              "shortcut": "yasss"
            }
          },
          {
            "Emoticon": {
              "id": 718017,
              "path": "emoticons/yayyyyy.gif",
              "shortcut": "yayyyyy"
            }
          }

        emoticons/ - contains a bunch of image files:

            slytherinsnake.gif
            spanishinquisition.jpg
            sparkle.png
            spiderman.gif
            stableparrot.gif
            stalkerparrot.gif
            supergirl.png
            superman.png

    We move all the relevant files to Zulip's more nested
    directory structure.
    '''

    logging.info('Starting to process emoticons')

    fn = 'emoticons.json'
    data_file = os.path.join(data_dir, fn)
    data = json.load(open(data_file))

    flat_data = [
        dict(
            path=d['Emoticon']['path'],
            name=d['Emoticon']['shortcut'],
        )
        for d in data
    ]

    emoji_folder = os.path.join(output_dir, 'emoji')
    os.makedirs(emoji_folder, exist_ok=True)

    def process(data: ZerverFieldsT) -> ZerverFieldsT:
        source_sub_path = data['path']
        source_fn = os.path.basename(source_sub_path)
        source_path = os.path.join(data_dir, source_sub_path)

        # Use our template from RealmEmoji
        # PATH_ID_TEMPLATE = "{realm_id}/emoji/images/{emoji_file_name}"
        target_fn = source_fn
        target_sub_path = RealmEmoji.PATH_ID_TEMPLATE.format(
            realm_id=realm_id,
            emoji_file_name=target_fn,
        )
        target_path = os.path.join(emoji_folder, target_sub_path)

        os.makedirs(os.path.dirname(target_path), exist_ok=True)

        source_path = os.path.abspath(source_path)
        target_path = os.path.abspath(target_path)

        shutil.copyfile(source_path, target_path)

        return dict(
            path=target_path,
            file_name=target_fn,
            realm_id=realm_id,
            name=data['name'],
        )

    emoji_records = list(map(process, flat_data))
    create_converted_data_files(emoji_records, output_dir, '/emoji/records.json')

    realmemoji = [
        build_realm_emoji(
            realm_id=realm_id,
            name=rec['name'],
            id=NEXT_ID('realmemoji'),
            file_name=rec['file_name'],
        )
        for rec in emoji_records
    ]
    logging.info('Done processing emoticons')

    return realmemoji
Ejemplo n.º 27
0
def process_raw_message_batch(realm_id: int, raw_messages: List[Dict[str,
                                                                     Any]],
                              subscriber_map: Dict[int, Set[int]],
                              user_id_mapper: IdMapper,
                              user_handler: UserHandler,
                              attachment_handler: AttachmentHandler,
                              get_recipient_id: Callable[[ZerverFieldsT], int],
                              is_pm_data: bool, output_dir: str) -> None:
    def fix_mentions(content: str, mention_user_ids: Set[int]) -> str:
        for user_id in mention_user_ids:
            user = user_handler.get_user(user_id=user_id)
            hipchat_mention = '@{short_name}'.format(**user)
            zulip_mention = '@**{full_name}**'.format(**user)
            content = content.replace(hipchat_mention, zulip_mention)

        content = content.replace('@here', '@**all**')
        return content

    mention_map: Dict[int, Set[int]] = dict()

    zerver_message = []

    import html2text
    h = html2text.HTML2Text()

    for raw_message in raw_messages:
        # One side effect here:

        message_id = NEXT_ID('message')
        mention_user_ids = {
            user_id_mapper.get(id)
            for id in set(raw_message['mention_user_ids'])
            if user_id_mapper.has(id)
        }
        mention_map[message_id] = mention_user_ids

        content = fix_mentions(
            content=raw_message['content'],
            mention_user_ids=mention_user_ids,
        )
        content = h.handle(content)

        if len(content) > 10000:
            logging.info('skipping too-long message of length %s' %
                         (len(content), ))
            continue

        date_sent = raw_message['date_sent']

        try:
            recipient_id = get_recipient_id(raw_message)
        except KeyError:
            logging.debug(
                "Could not find recipient_id for a message, skipping.")
            continue

        rendered_content = None

        if is_pm_data:
            topic_name = ''
        else:
            topic_name = 'imported from hipchat'
        user_id = raw_message['sender_id']

        # Another side effect:
        extra_content = attachment_handler.handle_message_data(
            realm_id=realm_id,
            message_id=message_id,
            sender_id=user_id,
            attachment=raw_message['attachment'],
            files_dir=raw_message['files_dir'],
        )

        if extra_content:
            has_attachment = True
            content += '\n' + extra_content
        else:
            has_attachment = False

        message = build_message(
            content=content,
            message_id=message_id,
            date_sent=date_sent,
            recipient_id=recipient_id,
            rendered_content=rendered_content,
            topic_name=topic_name,
            user_id=user_id,
            has_attachment=has_attachment,
        )
        zerver_message.append(message)

    zerver_usermessage = make_user_messages(
        zerver_message=zerver_message,
        subscriber_map=subscriber_map,
        is_pm_data=is_pm_data,
        mention_map=mention_map,
    )

    message_json = dict(
        zerver_message=zerver_message,
        zerver_usermessage=zerver_usermessage,
    )

    dump_file_id = NEXT_ID('dump_file_id')
    message_file = "/messages-%06d.json" % (dump_file_id, )
    create_converted_data_files(message_json, output_dir, message_file)
Ejemplo n.º 28
0
def process_message_file(fn: str,
                         get_recipient_id: Callable[[ZerverFieldsT], int],
                         message_key: str,
                         user_map: Dict[int, ZerverFieldsT],
                         zerver_subscription: List[ZerverFieldsT],
                         data_dir: str,
                         output_dir: str) -> None:

    def fix_mentions(content: str,
                     mention_user_ids: List[int]) -> str:
        for user_id in mention_user_ids:
            user = user_map[user_id]
            hipchat_mention = '@{short_name}'.format(**user)
            zulip_mention = '@**{full_name}**'.format(**user)
            content = content.replace(hipchat_mention, zulip_mention)

        content = content.replace('@here', '@**all**')
        return content

    def get_raw_messages(fn: str) -> List[ZerverFieldsT]:
        dir = os.path.dirname(fn)
        fn_id = int(os.path.basename(dir))
        data = json.load(open(fn))

        flat_data = [
            d[message_key]
            for d in data
            if message_key in d
        ]

        return [
            dict(
                fn_id=fn_id,
                sender_id=d['sender']['id'],
                receiver_id=d.get('receiver', {}).get('id'),
                content=d['message'],
                mention_user_ids=d['mentions'],
                pub_date=str_date_to_float(d['timestamp']),
            )
            for d in flat_data
        ]

    raw_messages = get_raw_messages(fn)

    mention_map = dict()  # type: Dict[int, Set[int]]

    def make_message(message_id: int, raw_message: ZerverFieldsT) -> ZerverFieldsT:
        # One side effect here:
        mention_map[message_id] = set(raw_message['mention_user_ids'])

        content = fix_mentions(
            content=raw_message['content'],
            mention_user_ids=raw_message['mention_user_ids'],
        )
        pub_date = raw_message['pub_date']
        recipient_id = get_recipient_id(raw_message)
        rendered_content = None
        subject = 'archived'
        user_id = raw_message['sender_id']

        return build_message(
            content=content,
            message_id=message_id,
            pub_date=pub_date,
            recipient_id=recipient_id,
            rendered_content=rendered_content,
            subject=subject,
            user_id=user_id,
        )

    zerver_message = [
        make_message(
            message_id=NEXT_ID('message'),
            raw_message=raw_message
        )
        for raw_message in raw_messages
    ]

    subscriber_map = dict()  # type: Dict[int, Set[int]]
    for sub in zerver_subscription:
        user_id = sub['user_profile']
        recipient_id = sub['recipient']
        if recipient_id not in subscriber_map:
            subscriber_map[recipient_id] = set()
        subscriber_map[recipient_id].add(user_id)

    zerver_usermessage = []

    for message in zerver_message:
        message_id = message['id']
        recipient_id = message['recipient']
        mention_user_ids = mention_map[message_id]
        user_ids = subscriber_map.get(recipient_id, set())
        for user_id in user_ids:
            is_mentioned = user_id in mention_user_ids
            user_message = build_user_message(
                id=NEXT_ID('user_message'),
                user_id=user_id,
                message_id=message_id,
                is_mentioned=is_mentioned,
            )
            zerver_usermessage.append(user_message)

    message_json = dict(
        zerver_message=zerver_message,
        zerver_usermessage=zerver_usermessage,
    )

    dump_file_id = NEXT_ID('dump_file_id')
    message_file = "/messages-%06d.json" % (dump_file_id,)
    create_converted_data_files(message_json, output_dir, message_file)
Ejemplo n.º 29
0
def convert_slack_workspace_messages(
    slack_data_dir: str,
    users: List[ZerverFieldsT],
    realm_id: int,
    slack_user_id_to_zulip_user_id: SlackToZulipUserIDT,
    slack_recipient_name_to_zulip_recipient_id: SlackToZulipRecipientT,
    added_channels: AddedChannelsT,
    added_mpims: AddedMPIMsT,
    dm_members: DMMembersT,
    realm: ZerverFieldsT,
    zerver_userprofile: List[ZerverFieldsT],
    zerver_realmemoji: List[ZerverFieldsT],
    domain_name: str,
    output_dir: str,
    chunk_size: int = MESSAGE_BATCH_CHUNK_SIZE
) -> Tuple[List[ZerverFieldsT], List[ZerverFieldsT], List[ZerverFieldsT]]:
    """
    Returns:
    1. reactions, which is a list of the reactions
    2. uploads, which is a list of uploads to be mapped in uploads records.json
    3. attachment, which is a list of the attachments
    """

    long_term_idle = process_long_term_idle_users(
        slack_data_dir, users, slack_user_id_to_zulip_user_id, added_channels,
        added_mpims, dm_members, zerver_userprofile)

    all_messages = get_messages_iterator(slack_data_dir, added_channels,
                                         added_mpims, dm_members)
    logging.info('######### IMPORTING MESSAGES STARTED #########\n')

    total_reactions = []  # type: List[ZerverFieldsT]
    total_attachments = []  # type: List[ZerverFieldsT]
    total_uploads = []  # type: List[ZerverFieldsT]

    dump_file_id = 1

    subscriber_map = make_subscriber_map(
        zerver_subscription=realm['zerver_subscription'], )

    while True:
        message_data = []
        _counter = 0
        for msg in all_messages:
            _counter += 1
            message_data.append(msg)
            if _counter == chunk_size:
                break
        if len(message_data) == 0:
            break

        zerver_message, zerver_usermessage, attachment, uploads, reactions = \
            channel_message_to_zerver_message(
                realm_id, users, slack_user_id_to_zulip_user_id, slack_recipient_name_to_zulip_recipient_id,
                message_data, zerver_realmemoji, subscriber_map, added_channels, dm_members,
                domain_name, long_term_idle)

        message_json = dict(zerver_message=zerver_message,
                            zerver_usermessage=zerver_usermessage)

        message_file = "/messages-%06d.json" % (dump_file_id, )
        logging.info("Writing Messages to %s\n" %
                     (output_dir + message_file, ))
        create_converted_data_files(message_json, output_dir, message_file)

        total_reactions += reactions
        total_attachments += attachment
        total_uploads += uploads

        dump_file_id += 1

    logging.info('######### IMPORTING MESSAGES FINISHED #########\n')
    return total_reactions, total_uploads, total_attachments
Ejemplo n.º 30
0
def do_convert_data(input_tar_file: str, output_dir: str) -> None:
    input_data_dir = untar_input_file(input_tar_file)

    realm_id = 0
    realm = make_realm(realm_id=realm_id)

    # users.json -> UserProfile
    raw_user_data = read_user_data(data_dir=input_data_dir)
    zerver_userprofile = convert_user_data(
        raw_data=raw_user_data,
        realm_id=realm_id,
    )
    realm['zerver_userprofile'] = zerver_userprofile

    # streams.json -> Stream
    raw_stream_data = read_room_data(data_dir=input_data_dir)
    zerver_stream = convert_room_data(
        raw_data=raw_stream_data,
        realm_id=realm_id,
    )
    realm['zerver_stream'] = zerver_stream

    zerver_recipient = build_recipients(
        zerver_userprofile=zerver_userprofile,
        zerver_stream=zerver_stream,
    )
    realm['zerver_recipient'] = zerver_recipient

    zerver_subscription = build_subscriptions(
        zerver_userprofile=zerver_userprofile,
        zerver_recipient=zerver_recipient,
        zerver_stream=zerver_stream,
    )
    realm['zerver_subscription'] = zerver_subscription

    zerver_realmemoji = write_emoticon_data(
        realm_id=realm_id,
        data_dir=input_data_dir,
        output_dir=output_dir,
    )
    realm['zerver_realmemoji'] = zerver_realmemoji

    create_converted_data_files(realm, output_dir, '/realm.json')

    logging.info('Start importing message data')
    for message_key in ['UserMessage',
                        'PrivateUserMessage']:
        write_message_data(
            realm_id=realm_id,
            message_key=message_key,
            zerver_recipient=zerver_recipient,
            zerver_subscription=zerver_subscription,
            zerver_userprofile=zerver_userprofile,
            data_dir=input_data_dir,
            output_dir=output_dir,
        )

    logging.info('Start importing avatar data')
    write_avatar_data(
        raw_user_data=raw_user_data,
        output_dir=output_dir,
        realm_id=realm_id,
    )

    write_upload_data(
        output_dir=output_dir,
        realm_id=realm_id,
    )

    logging.info('Start making tarball')
    subprocess.check_call(["tar", "-czf", output_dir + '.tar.gz', output_dir, '-P'])
    logging.info('Done making tarball')
Ejemplo n.º 31
0
def do_convert_data(slack_zip_file: str, output_dir: str, token: str, threads: int=6) -> None:
    # Subdomain is set by the user while running the import command
    realm_subdomain = ""
    realm_id = 0
    domain_name = settings.EXTERNAL_HOST

    slack_data_dir = slack_zip_file.replace('.zip', '')
    if not os.path.exists(slack_data_dir):
        os.makedirs(slack_data_dir)

    os.makedirs(output_dir, exist_ok=True)
    # output directory should be empty initially
    if os.listdir(output_dir):
        raise Exception('Output directory should be empty!')

    subprocess.check_call(['unzip', '-q', slack_zip_file, '-d', slack_data_dir])
    # with zipfile.ZipFile(slack_zip_file, 'r') as zip_ref:
    #     zip_ref.extractall(slack_data_dir)

    # We get the user data from the legacy token method of slack api, which is depreciated
    # but we use it as the user email data is provided only in this method
    user_list = get_slack_api_data(token, "https://slack.com/api/users.list", "members")
    # Get custom emoji from slack api
    custom_emoji_list = get_slack_api_data(token, "https://slack.com/api/emoji.list", "emoji")

    realm, added_users, added_recipient, added_channels, avatar_list, \
        emoji_url_map = slack_workspace_to_realm(domain_name, realm_id, user_list,
                                                 realm_subdomain,
                                                 slack_data_dir, custom_emoji_list)

    reactions, uploads_list, zerver_attachment = convert_slack_workspace_messages(
        slack_data_dir, user_list, realm_id, added_users, added_recipient, added_channels,
        realm, realm['zerver_realmemoji'], domain_name, output_dir)

    # Move zerver_reactions to realm.json file
    realm['zerver_reaction'] = reactions

    emoji_folder = os.path.join(output_dir, 'emoji')
    os.makedirs(emoji_folder, exist_ok=True)
    emoji_records = process_emojis(realm['zerver_realmemoji'], emoji_folder, emoji_url_map, threads)

    avatar_folder = os.path.join(output_dir, 'avatars')
    avatar_realm_folder = os.path.join(avatar_folder, str(realm_id))
    os.makedirs(avatar_realm_folder, exist_ok=True)
    avatar_records = process_avatars(avatar_list, avatar_folder, realm_id, threads, size_url_suffix='-512')

    uploads_folder = os.path.join(output_dir, 'uploads')
    os.makedirs(os.path.join(uploads_folder, str(realm_id)), exist_ok=True)
    uploads_records = process_uploads(uploads_list, uploads_folder, threads)
    attachment = {"zerver_attachment": zerver_attachment}

    # IO realm.json
    create_converted_data_files(realm, output_dir, '/realm.json')
    # IO emoji records
    create_converted_data_files(emoji_records, output_dir, '/emoji/records.json')
    # IO avatar records
    create_converted_data_files(avatar_records, output_dir, '/avatars/records.json')
    # IO uploads records
    create_converted_data_files(uploads_records, output_dir, '/uploads/records.json')
    # IO attachments records
    create_converted_data_files(attachment, output_dir, '/attachment.json')

    # remove slack dir
    rm_tree(slack_data_dir)
    subprocess.check_call(["tar", "-czf", output_dir + '.tar.gz', output_dir, '-P'])

    logging.info('######### DATA CONVERSION FINISHED #########\n')
    logging.info("Zulip data dump created at %s" % (output_dir))
Ejemplo n.º 32
0
def build_custom_emoji(realm_id: int,
                       custom_emoji_data: Dict[str, List[Dict[str, Any]]],
                       output_dir: str) -> List[ZerverFieldsT]:
    logging.info("Starting to process custom emoji")

    emoji_folder = os.path.join(output_dir, "emoji")
    os.makedirs(emoji_folder, exist_ok=True)

    zerver_realmemoji: List[ZerverFieldsT] = []
    emoji_records: List[ZerverFieldsT] = []

    # Map emoji file_id to emoji file data
    emoji_file_data = {}
    for emoji_file in custom_emoji_data["file"]:
        emoji_file_data[emoji_file["_id"]] = {
            "filename": emoji_file["filename"],
            "chunks": []
        }
    for emoji_chunk in custom_emoji_data["chunk"]:
        emoji_file_data[emoji_chunk["files_id"]]["chunks"].append(
            emoji_chunk["data"])

    # Build custom emoji
    for rc_emoji in custom_emoji_data["emoji"]:
        # Subject to change with changes in database
        emoji_file_id = ".".join([rc_emoji["name"], rc_emoji["extension"]])

        emoji_file_info = emoji_file_data[emoji_file_id]

        emoji_filename = emoji_file_info["filename"]
        emoji_data = b"".join(emoji_file_info["chunks"])

        target_sub_path = RealmEmoji.PATH_ID_TEMPLATE.format(
            realm_id=realm_id,
            emoji_file_name=emoji_filename,
        )
        target_path = os.path.join(emoji_folder, target_sub_path)

        os.makedirs(os.path.dirname(target_path), exist_ok=True)
        with open(target_path, "wb") as e_file:
            e_file.write(emoji_data)

        emoji_aliases = [rc_emoji["name"]]
        emoji_aliases.extend(rc_emoji["aliases"])

        for alias in emoji_aliases:
            emoji_record = dict(
                path=target_path,
                s3_path=target_path,
                file_name=emoji_filename,
                realm_id=realm_id,
                name=alias,
            )
            emoji_records.append(emoji_record)

            realmemoji = build_realm_emoji(
                realm_id=realm_id,
                name=alias,
                id=NEXT_ID("realmemoji"),
                file_name=emoji_filename,
            )
            zerver_realmemoji.append(realmemoji)

    create_converted_data_files(emoji_records, output_dir,
                                "/emoji/records.json")
    logging.info("Done processing emoji")

    return zerver_realmemoji