Example #1
0
def process_raw_message_batch(realm_id: int, raw_messages: List[Dict[str,
                                                                     Any]],
                              subscriber_map: Dict[int, Set[int]],
                              user_id_mapper: IdMapper,
                              user_handler: UserHandler,
                              attachment_handler: AttachmentHandler,
                              get_recipient_id: Callable[[ZerverFieldsT], int],
                              is_pm_data: bool, output_dir: str) -> None:
    def fix_mentions(content: str, mention_user_ids: Set[int]) -> str:
        for user_id in mention_user_ids:
            user = user_handler.get_user(user_id=user_id)
            hipchat_mention = '@{short_name}'.format(**user)
            zulip_mention = '@**{full_name}**'.format(**user)
            content = content.replace(hipchat_mention, zulip_mention)

        content = content.replace('@here', '@**all**')
        return content

    mention_map: Dict[int, Set[int]] = dict()

    zerver_message = []

    import html2text
    h = html2text.HTML2Text()

    for raw_message in raw_messages:
        # One side effect here:

        message_id = NEXT_ID('message')
        mention_user_ids = {
            user_id_mapper.get(id)
            for id in set(raw_message['mention_user_ids'])
            if user_id_mapper.has(id)
        }
        mention_map[message_id] = mention_user_ids

        content = fix_mentions(
            content=raw_message['content'],
            mention_user_ids=mention_user_ids,
        )
        content = h.handle(content)

        if len(content) > 10000:
            logging.info('skipping too-long message of length %s' %
                         (len(content), ))
            continue

        date_sent = raw_message['date_sent']

        try:
            recipient_id = get_recipient_id(raw_message)
        except KeyError:
            logging.debug(
                "Could not find recipient_id for a message, skipping.")
            continue

        rendered_content = None

        if is_pm_data:
            topic_name = ''
        else:
            topic_name = 'imported from hipchat'
        user_id = raw_message['sender_id']

        # Another side effect:
        extra_content = attachment_handler.handle_message_data(
            realm_id=realm_id,
            message_id=message_id,
            sender_id=user_id,
            attachment=raw_message['attachment'],
            files_dir=raw_message['files_dir'],
        )

        if extra_content:
            has_attachment = True
            content += '\n' + extra_content
        else:
            has_attachment = False

        message = build_message(
            content=content,
            message_id=message_id,
            date_sent=date_sent,
            recipient_id=recipient_id,
            rendered_content=rendered_content,
            topic_name=topic_name,
            user_id=user_id,
            has_attachment=has_attachment,
        )
        zerver_message.append(message)

    zerver_usermessage = make_user_messages(
        zerver_message=zerver_message,
        subscriber_map=subscriber_map,
        is_pm_data=is_pm_data,
        mention_map=mention_map,
    )

    message_json = dict(
        zerver_message=zerver_message,
        zerver_usermessage=zerver_usermessage,
    )

    dump_file_id = NEXT_ID('dump_file_id')
    message_file = "/messages-%06d.json" % (dump_file_id, )
    create_converted_data_files(message_json, output_dir, message_file)
Example #2
0
def do_convert_data(input_tar_file: str,
                    output_dir: str,
                    masking_content: bool,
                    api_token: Optional[str] = None,
                    slim_mode: bool = False) -> None:
    input_data_dir = untar_input_file(input_tar_file)

    attachment_handler = AttachmentHandler()
    user_handler = UserHandler()
    subscriber_handler = SubscriberHandler()
    user_id_mapper = IdMapper()
    stream_id_mapper = IdMapper()

    realm_id = 0
    realm = make_realm(realm_id=realm_id)

    # users.json -> UserProfile
    raw_user_data = read_user_data(data_dir=input_data_dir)
    convert_user_data(
        user_handler=user_handler,
        slim_mode=slim_mode,
        user_id_mapper=user_id_mapper,
        raw_data=raw_user_data,
        realm_id=realm_id,
    )
    normal_users = user_handler.get_normal_users()
    # Don't write zerver_userprofile here, because we
    # may add more users later.

    # streams.json -> Stream
    raw_stream_data = read_room_data(data_dir=input_data_dir)
    zerver_stream = convert_room_data(
        raw_data=raw_stream_data,
        subscriber_handler=subscriber_handler,
        stream_id_mapper=stream_id_mapper,
        user_id_mapper=user_id_mapper,
        realm_id=realm_id,
        api_token=api_token,
    )
    realm['zerver_stream'] = zerver_stream

    zerver_recipient = build_recipients(
        zerver_userprofile=normal_users,
        zerver_stream=zerver_stream,
    )
    realm['zerver_recipient'] = zerver_recipient

    if api_token is None:
        if slim_mode:
            public_stream_subscriptions: List[ZerverFieldsT] = []
        else:
            public_stream_subscriptions = build_public_stream_subscriptions(
                zerver_userprofile=normal_users,
                zerver_recipient=zerver_recipient,
                zerver_stream=zerver_stream,
            )

        private_stream_subscriptions = build_stream_subscriptions(
            get_users=subscriber_handler.get_users,
            zerver_recipient=zerver_recipient,
            zerver_stream=[
                stream_dict for stream_dict in zerver_stream
                if stream_dict['invite_only']
            ],
        )
        stream_subscriptions = public_stream_subscriptions + private_stream_subscriptions
    else:
        stream_subscriptions = build_stream_subscriptions(
            get_users=subscriber_handler.get_users,
            zerver_recipient=zerver_recipient,
            zerver_stream=zerver_stream,
        )

    personal_subscriptions = build_personal_subscriptions(
        zerver_recipient=zerver_recipient, )
    zerver_subscription = personal_subscriptions + stream_subscriptions

    realm['zerver_subscription'] = zerver_subscription

    zerver_realmemoji = write_emoticon_data(
        realm_id=realm_id,
        data_dir=input_data_dir,
        output_dir=output_dir,
    )
    realm['zerver_realmemoji'] = zerver_realmemoji

    subscriber_map = make_subscriber_map(
        zerver_subscription=zerver_subscription, )

    logging.info('Start importing message data')
    for message_key in [
            'UserMessage', 'NotificationMessage', 'PrivateUserMessage'
    ]:
        write_message_data(
            realm_id=realm_id,
            slim_mode=slim_mode,
            message_key=message_key,
            zerver_recipient=zerver_recipient,
            subscriber_map=subscriber_map,
            data_dir=input_data_dir,
            output_dir=output_dir,
            masking_content=masking_content,
            stream_id_mapper=stream_id_mapper,
            user_id_mapper=user_id_mapper,
            user_handler=user_handler,
            attachment_handler=attachment_handler,
        )

    # Order is important here...don't write users until
    # we process everything else, since we may introduce
    # mirror users when processing messages.
    realm['zerver_userprofile'] = user_handler.get_all_users()
    realm['sort_by_date'] = True

    create_converted_data_files(realm, output_dir, '/realm.json')

    logging.info('Start importing avatar data')
    write_avatar_data(
        raw_user_data=raw_user_data,
        output_dir=output_dir,
        user_id_mapper=user_id_mapper,
        realm_id=realm_id,
    )

    attachment_handler.write_info(
        output_dir=output_dir,
        realm_id=realm_id,
    )

    logging.info('Start making tarball')
    subprocess.check_call(
        ["tar", "-czf", output_dir + '.tar.gz', output_dir, '-P'])
    logging.info('Done making tarball')
Example #3
0
def do_convert_data(input_tar_file: str, output_dir: str) -> None:
    input_data_dir = untar_input_file(input_tar_file)

    attachment_handler = AttachmentHandler()
    user_handler = UserHandler()

    realm_id = 0
    realm = make_realm(realm_id=realm_id)

    # users.json -> UserProfile
    raw_user_data = read_user_data(data_dir=input_data_dir)
    convert_user_data(
        user_handler=user_handler,
        raw_data=raw_user_data,
        realm_id=realm_id,
    )
    normal_users = user_handler.get_normal_users()
    # Don't write zerver_userprofile here, because we
    # may add more users later.

    # streams.json -> Stream
    raw_stream_data = read_room_data(data_dir=input_data_dir)
    zerver_stream = convert_room_data(
        raw_data=raw_stream_data,
        realm_id=realm_id,
    )
    realm['zerver_stream'] = zerver_stream

    zerver_recipient = build_recipients(
        zerver_userprofile=normal_users,
        zerver_stream=zerver_stream,
    )
    realm['zerver_recipient'] = zerver_recipient

    zerver_subscription = build_subscriptions(
        zerver_userprofile=normal_users,
        zerver_recipient=zerver_recipient,
        zerver_stream=zerver_stream,
    )
    realm['zerver_subscription'] = zerver_subscription

    zerver_realmemoji = write_emoticon_data(
        realm_id=realm_id,
        data_dir=input_data_dir,
        output_dir=output_dir,
    )
    realm['zerver_realmemoji'] = zerver_realmemoji

    logging.info('Start importing message data')
    for message_key in ['UserMessage',
                        'PrivateUserMessage']:
        write_message_data(
            realm_id=realm_id,
            message_key=message_key,
            zerver_recipient=zerver_recipient,
            zerver_subscription=zerver_subscription,
            data_dir=input_data_dir,
            output_dir=output_dir,
            user_handler=user_handler,
            attachment_handler=attachment_handler,
        )

    # Order is important here...don't write users until
    # we process everything else, since we may introduce
    # mirror users when processing messages.
    realm['zerver_userprofile'] = user_handler.get_all_users()
    create_converted_data_files(realm, output_dir, '/realm.json')

    logging.info('Start importing avatar data')
    write_avatar_data(
        raw_user_data=raw_user_data,
        output_dir=output_dir,
        realm_id=realm_id,
    )

    attachment_handler.write_info(
        output_dir=output_dir,
        realm_id=realm_id,
    )

    logging.info('Start making tarball')
    subprocess.check_call(["tar", "-czf", output_dir + '.tar.gz', output_dir, '-P'])
    logging.info('Done making tarball')