Ejemplo n.º 1
0
def do_convert_data(gitter_data_file: str,
                    output_dir: str,
                    threads: int = 6) -> None:
    #  Subdomain is set by the user while running the import commands
    realm_subdomain = ""
    domain_name = settings.EXTERNAL_HOST

    os.makedirs(output_dir, exist_ok=True)
    # output directory should be empty initially
    if os.listdir(output_dir):
        raise Exception("Output directory should be empty!")

    # Read data from the Gitter file
    with open(gitter_data_file, "rb") as fp:
        gitter_data = orjson.loads(fp.read())

    realm, avatar_list, user_map, stream_map = gitter_workspace_to_realm(
        domain_name, gitter_data, realm_subdomain)

    subscriber_map = make_subscriber_map(
        zerver_subscription=realm['zerver_subscription'], )

    # For user mentions
    user_short_name_to_full_name = {}
    for userprofile in realm['zerver_userprofile']:
        user_short_name_to_full_name[
            userprofile['short_name']] = userprofile['full_name']

    convert_gitter_workspace_messages(gitter_data, output_dir, subscriber_map,
                                      user_map, stream_map,
                                      user_short_name_to_full_name)

    avatar_folder = os.path.join(output_dir, 'avatars')
    avatar_realm_folder = os.path.join(avatar_folder, str(realm_id))
    os.makedirs(avatar_realm_folder, exist_ok=True)
    avatar_records = process_avatars(avatar_list, avatar_folder, realm_id,
                                     threads)

    attachment: Dict[str, List[Any]] = {"zerver_attachment": []}

    # IO realm.json
    create_converted_data_files(realm, output_dir, '/realm.json')
    # IO emoji records
    create_converted_data_files([], output_dir, '/emoji/records.json')
    # IO avatar records
    create_converted_data_files(avatar_records, output_dir,
                                '/avatars/records.json')
    # IO uploads records
    create_converted_data_files([], output_dir, '/uploads/records.json')
    # IO attachments records
    create_converted_data_files(attachment, output_dir, '/attachment.json')

    subprocess.check_call(
        ["tar", "-czf", output_dir + '.tar.gz', output_dir, '-P'])

    logging.info('######### DATA CONVERSION FINISHED #########\n')
    logging.info("Zulip data dump created at %s", output_dir)
Ejemplo n.º 2
0
def do_convert_data(gitter_data_file: str, output_dir: str, threads: int=6) -> None:
    #  Subdomain is set by the user while running the import commands
    realm_subdomain = ""
    domain_name = settings.EXTERNAL_HOST

    os.makedirs(output_dir, exist_ok=True)
    # output directory should be empty initially
    if os.listdir(output_dir):
        raise Exception("Output directory should be empty!")

    # Read data from the gitter file
    with open(gitter_data_file, "r") as fp:
        gitter_data = ujson.load(fp)

    realm, avatar_list, user_map = gitter_workspace_to_realm(
        domain_name, gitter_data, realm_subdomain)

    subscriber_map = make_subscriber_map(
        zerver_subscription=realm['zerver_subscription'],
    )

    # For user mentions
    user_short_name_to_full_name = {}
    for userprofile in realm['zerver_userprofile']:
        user_short_name_to_full_name[userprofile['short_name']] = userprofile['full_name']

    convert_gitter_workspace_messages(
        gitter_data, output_dir, subscriber_map, user_map,
        user_short_name_to_full_name)

    avatar_folder = os.path.join(output_dir, 'avatars')
    avatar_realm_folder = os.path.join(avatar_folder, str(realm_id))
    os.makedirs(avatar_realm_folder, exist_ok=True)
    avatar_records = process_avatars(avatar_list, avatar_folder, realm_id, threads)

    attachment = {"zerver_attachment": []}  # type: Dict[str, List[Any]]

    # IO realm.json
    create_converted_data_files(realm, output_dir, '/realm.json')
    # IO emoji records
    create_converted_data_files([], output_dir, '/emoji/records.json')
    # IO avatar records
    create_converted_data_files(avatar_records, output_dir, '/avatars/records.json')
    # IO uploads records
    create_converted_data_files([], output_dir, '/uploads/records.json')
    # IO attachments records
    create_converted_data_files(attachment, output_dir, '/attachment.json')

    subprocess.check_call(["tar", "-czf", output_dir + '.tar.gz', output_dir, '-P'])

    logging.info('######### DATA CONVERSION FINISHED #########\n')
    logging.info("Zulip data dump created at %s" % (output_dir))
Ejemplo n.º 3
0
def convert_slack_workspace_messages(slack_data_dir: str, users: List[ZerverFieldsT], realm_id: int,
                                     added_users: AddedUsersT, added_recipient: AddedRecipientsT,
                                     added_channels: AddedChannelsT, realm: ZerverFieldsT,
                                     zerver_realmemoji: List[ZerverFieldsT], domain_name: str,
                                     output_dir: str,
                                     chunk_size: int=MESSAGE_BATCH_CHUNK_SIZE) -> Tuple[List[ZerverFieldsT],
                                                                                        List[ZerverFieldsT],
                                                                                        List[ZerverFieldsT]]:
    """
    Returns:
    1. reactions, which is a list of the reactions
    2. uploads, which is a list of uploads to be mapped in uploads records.json
    3. attachment, which is a list of the attachments
    """
    all_messages = get_all_messages(slack_data_dir, added_channels)

    # we sort the messages according to the timestamp to show messages with
    # the proper date order
    all_messages = sorted(all_messages, key=lambda message: message['ts'])

    logging.info('######### IMPORTING MESSAGES STARTED #########\n')

    total_reactions = []  # type: List[ZerverFieldsT]
    total_attachments = []  # type: List[ZerverFieldsT]
    total_uploads = []  # type: List[ZerverFieldsT]

    # The messages are stored in batches
    low_index = 0
    upper_index = low_index + chunk_size
    dump_file_id = 1

    subscriber_map = make_subscriber_map(
        zerver_subscription=realm['zerver_subscription'],
    )

    while True:
        message_data = all_messages[low_index:upper_index]
        if len(message_data) == 0:
            break
        zerver_message, zerver_usermessage, attachment, uploads, reactions = \
            channel_message_to_zerver_message(
                realm_id, users, added_users, added_recipient, message_data,
                zerver_realmemoji, subscriber_map, added_channels,
                domain_name)

        message_json = dict(
            zerver_message=zerver_message,
            zerver_usermessage=zerver_usermessage)

        message_file = "/messages-%06d.json" % (dump_file_id,)
        logging.info("Writing Messages to %s\n" % (output_dir + message_file))
        create_converted_data_files(message_json, output_dir, message_file)

        total_reactions += reactions
        total_attachments += attachment
        total_uploads += uploads

        low_index = upper_index
        upper_index = chunk_size + low_index
        dump_file_id += 1

    logging.info('######### IMPORTING MESSAGES FINISHED #########\n')
    return total_reactions, total_uploads, total_attachments
Ejemplo n.º 4
0
def convert_slack_workspace_messages(
    slack_data_dir: str,
    users: List[ZerverFieldsT],
    realm_id: int,
    slack_user_id_to_zulip_user_id: SlackToZulipUserIDT,
    slack_recipient_name_to_zulip_recipient_id: SlackToZulipRecipientT,
    added_channels: AddedChannelsT,
    added_mpims: AddedMPIMsT,
    dm_members: DMMembersT,
    realm: ZerverFieldsT,
    zerver_userprofile: List[ZerverFieldsT],
    zerver_realmemoji: List[ZerverFieldsT],
    domain_name: str,
    output_dir: str,
    chunk_size: int = MESSAGE_BATCH_CHUNK_SIZE
) -> Tuple[List[ZerverFieldsT], List[ZerverFieldsT], List[ZerverFieldsT]]:
    """
    Returns:
    1. reactions, which is a list of the reactions
    2. uploads, which is a list of uploads to be mapped in uploads records.json
    3. attachment, which is a list of the attachments
    """

    long_term_idle = process_long_term_idle_users(
        slack_data_dir, users, slack_user_id_to_zulip_user_id, added_channels,
        added_mpims, dm_members, zerver_userprofile)

    all_messages = get_messages_iterator(slack_data_dir, added_channels,
                                         added_mpims, dm_members)
    logging.info('######### IMPORTING MESSAGES STARTED #########\n')

    total_reactions = []  # type: List[ZerverFieldsT]
    total_attachments = []  # type: List[ZerverFieldsT]
    total_uploads = []  # type: List[ZerverFieldsT]

    dump_file_id = 1

    subscriber_map = make_subscriber_map(
        zerver_subscription=realm['zerver_subscription'], )

    while True:
        message_data = []
        _counter = 0
        for msg in all_messages:
            _counter += 1
            message_data.append(msg)
            if _counter == chunk_size:
                break
        if len(message_data) == 0:
            break

        zerver_message, zerver_usermessage, attachment, uploads, reactions = \
            channel_message_to_zerver_message(
                realm_id, users, slack_user_id_to_zulip_user_id, slack_recipient_name_to_zulip_recipient_id,
                message_data, zerver_realmemoji, subscriber_map, added_channels, dm_members,
                domain_name, long_term_idle)

        message_json = dict(zerver_message=zerver_message,
                            zerver_usermessage=zerver_usermessage)

        message_file = "/messages-%06d.json" % (dump_file_id, )
        logging.info("Writing Messages to %s\n" %
                     (output_dir + message_file, ))
        create_converted_data_files(message_json, output_dir, message_file)

        total_reactions += reactions
        total_attachments += attachment
        total_uploads += uploads

        dump_file_id += 1

    logging.info('######### IMPORTING MESSAGES FINISHED #########\n')
    return total_reactions, total_uploads, total_attachments
Ejemplo n.º 5
0
def do_convert_data(input_tar_file: str,
                    output_dir: str,
                    masking_content: bool,
                    api_token: Optional[str] = None,
                    slim_mode: bool = False) -> None:
    input_data_dir = untar_input_file(input_tar_file)

    attachment_handler = AttachmentHandler()
    user_handler = UserHandler()
    subscriber_handler = SubscriberHandler()
    user_id_mapper = IdMapper()
    stream_id_mapper = IdMapper()

    realm_id = 0
    realm = make_realm(realm_id=realm_id)

    # users.json -> UserProfile
    raw_user_data = read_user_data(data_dir=input_data_dir)
    convert_user_data(
        user_handler=user_handler,
        slim_mode=slim_mode,
        user_id_mapper=user_id_mapper,
        raw_data=raw_user_data,
        realm_id=realm_id,
    )
    normal_users = user_handler.get_normal_users()
    # Don't write zerver_userprofile here, because we
    # may add more users later.

    # streams.json -> Stream
    raw_stream_data = read_room_data(data_dir=input_data_dir)
    zerver_stream = convert_room_data(
        raw_data=raw_stream_data,
        subscriber_handler=subscriber_handler,
        stream_id_mapper=stream_id_mapper,
        user_id_mapper=user_id_mapper,
        realm_id=realm_id,
        api_token=api_token,
    )
    realm['zerver_stream'] = zerver_stream

    zerver_recipient = build_recipients(
        zerver_userprofile=normal_users,
        zerver_stream=zerver_stream,
    )
    realm['zerver_recipient'] = zerver_recipient

    if api_token is None:
        if slim_mode:
            public_stream_subscriptions: List[ZerverFieldsT] = []
        else:
            public_stream_subscriptions = build_public_stream_subscriptions(
                zerver_userprofile=normal_users,
                zerver_recipient=zerver_recipient,
                zerver_stream=zerver_stream,
            )

        private_stream_subscriptions = build_stream_subscriptions(
            get_users=subscriber_handler.get_users,
            zerver_recipient=zerver_recipient,
            zerver_stream=[
                stream_dict for stream_dict in zerver_stream
                if stream_dict['invite_only']
            ],
        )
        stream_subscriptions = public_stream_subscriptions + private_stream_subscriptions
    else:
        stream_subscriptions = build_stream_subscriptions(
            get_users=subscriber_handler.get_users,
            zerver_recipient=zerver_recipient,
            zerver_stream=zerver_stream,
        )

    personal_subscriptions = build_personal_subscriptions(
        zerver_recipient=zerver_recipient, )
    zerver_subscription = personal_subscriptions + stream_subscriptions

    realm['zerver_subscription'] = zerver_subscription

    zerver_realmemoji = write_emoticon_data(
        realm_id=realm_id,
        data_dir=input_data_dir,
        output_dir=output_dir,
    )
    realm['zerver_realmemoji'] = zerver_realmemoji

    subscriber_map = make_subscriber_map(
        zerver_subscription=zerver_subscription, )

    logging.info('Start importing message data')
    for message_key in [
            'UserMessage', 'NotificationMessage', 'PrivateUserMessage'
    ]:
        write_message_data(
            realm_id=realm_id,
            slim_mode=slim_mode,
            message_key=message_key,
            zerver_recipient=zerver_recipient,
            subscriber_map=subscriber_map,
            data_dir=input_data_dir,
            output_dir=output_dir,
            masking_content=masking_content,
            stream_id_mapper=stream_id_mapper,
            user_id_mapper=user_id_mapper,
            user_handler=user_handler,
            attachment_handler=attachment_handler,
        )

    # Order is important here...don't write users until
    # we process everything else, since we may introduce
    # mirror users when processing messages.
    realm['zerver_userprofile'] = user_handler.get_all_users()
    realm['sort_by_date'] = True

    create_converted_data_files(realm, output_dir, '/realm.json')

    logging.info('Start importing avatar data')
    write_avatar_data(
        raw_user_data=raw_user_data,
        output_dir=output_dir,
        user_id_mapper=user_id_mapper,
        realm_id=realm_id,
    )

    attachment_handler.write_info(
        output_dir=output_dir,
        realm_id=realm_id,
    )

    logging.info('Start making tarball')
    subprocess.check_call(
        ["tar", "-czf", output_dir + '.tar.gz', output_dir, '-P'])
    logging.info('Done making tarball')
Ejemplo n.º 6
0
def do_convert_data(mattermost_data_dir: str, output_dir: str, masking_content: bool) -> None:
    username_to_user: Dict[str, Dict[str, Any]] = {}

    os.makedirs(output_dir, exist_ok=True)
    if os.listdir(output_dir):  # nocoverage
        raise Exception("Output directory should be empty!")

    mattermost_data_file = os.path.join(mattermost_data_dir, "export.json")
    mattermost_data = mattermost_data_file_to_dict(mattermost_data_file)

    username_to_user = create_username_to_user_mapping(mattermost_data["user"])

    for team in mattermost_data["team"]:
        realm_id = NEXT_ID("realm_id")
        team_name = team["name"]

        user_handler = UserHandler()
        subscriber_handler = SubscriberHandler()
        user_id_mapper = IdMapper()
        stream_id_mapper = IdMapper()
        huddle_id_mapper = IdMapper()

        print("Generating data for", team_name)
        realm = make_realm(realm_id, team)
        realm_output_dir = os.path.join(output_dir, team_name)

        reset_mirror_dummy_users(username_to_user)
        label_mirror_dummy_users(
            len(mattermost_data["team"]), team_name, mattermost_data, username_to_user
        )

        convert_user_data(
            user_handler=user_handler,
            user_id_mapper=user_id_mapper,
            user_data_map=username_to_user,
            realm_id=realm_id,
            team_name=team_name,
        )

        zerver_stream = convert_channel_data(
            channel_data=mattermost_data["channel"],
            user_data_map=username_to_user,
            subscriber_handler=subscriber_handler,
            stream_id_mapper=stream_id_mapper,
            user_id_mapper=user_id_mapper,
            realm_id=realm_id,
            team_name=team_name,
        )
        realm["zerver_stream"] = zerver_stream

        zerver_huddle: List[ZerverFieldsT] = []
        if len(mattermost_data["team"]) == 1:
            zerver_huddle = convert_huddle_data(
                huddle_data=mattermost_data["direct_channel"],
                user_data_map=username_to_user,
                subscriber_handler=subscriber_handler,
                huddle_id_mapper=huddle_id_mapper,
                user_id_mapper=user_id_mapper,
                realm_id=realm_id,
                team_name=team_name,
            )
            realm["zerver_huddle"] = zerver_huddle

        all_users = user_handler.get_all_users()

        zerver_recipient = build_recipients(
            zerver_userprofile=all_users,
            zerver_stream=zerver_stream,
            zerver_huddle=zerver_huddle,
        )
        realm["zerver_recipient"] = zerver_recipient

        stream_subscriptions = build_stream_subscriptions(
            get_users=subscriber_handler.get_users,
            zerver_recipient=zerver_recipient,
            zerver_stream=zerver_stream,
        )

        huddle_subscriptions = build_huddle_subscriptions(
            get_users=subscriber_handler.get_users,
            zerver_recipient=zerver_recipient,
            zerver_huddle=zerver_huddle,
        )

        personal_subscriptions = build_personal_subscriptions(
            zerver_recipient=zerver_recipient,
        )

        # Mattermost currently supports only exporting messages from channels.
        # Personal messages and huddles are not exported.
        zerver_subscription = personal_subscriptions + stream_subscriptions + huddle_subscriptions
        realm["zerver_subscription"] = zerver_subscription

        zerver_realmemoji = write_emoticon_data(
            realm_id=realm_id,
            custom_emoji_data=mattermost_data["emoji"],
            data_dir=mattermost_data_dir,
            output_dir=realm_output_dir,
        )
        realm["zerver_realmemoji"] = zerver_realmemoji

        subscriber_map = make_subscriber_map(
            zerver_subscription=zerver_subscription,
        )

        total_reactions: List[Dict[str, Any]] = []
        uploads_list: List[ZerverFieldsT] = []
        zerver_attachment: List[ZerverFieldsT] = []

        write_message_data(
            num_teams=len(mattermost_data["team"]),
            team_name=team_name,
            realm_id=realm_id,
            post_data=mattermost_data["post"],
            zerver_recipient=zerver_recipient,
            subscriber_map=subscriber_map,
            output_dir=realm_output_dir,
            masking_content=masking_content,
            stream_id_mapper=stream_id_mapper,
            huddle_id_mapper=huddle_id_mapper,
            user_id_mapper=user_id_mapper,
            user_handler=user_handler,
            zerver_realmemoji=zerver_realmemoji,
            total_reactions=total_reactions,
            uploads_list=uploads_list,
            zerver_attachment=zerver_attachment,
            mattermost_data_dir=mattermost_data_dir,
        )
        realm["zerver_reaction"] = total_reactions
        realm["zerver_userprofile"] = user_handler.get_all_users()
        realm["sort_by_date"] = True

        create_converted_data_files(realm, realm_output_dir, "/realm.json")
        # Mattermost currently doesn't support exporting avatars
        create_converted_data_files([], realm_output_dir, "/avatars/records.json")

        # Export message attachments
        attachment: Dict[str, List[Any]] = {"zerver_attachment": zerver_attachment}
        create_converted_data_files(uploads_list, realm_output_dir, "/uploads/records.json")
        create_converted_data_files(attachment, realm_output_dir, "/attachment.json")

        logging.info("Start making tarball")
        subprocess.check_call(["tar", "-czf", realm_output_dir + ".tar.gz", realm_output_dir, "-P"])
        logging.info("Done making tarball")
Ejemplo n.º 7
0
def do_convert_data(rocketchat_data_dir: str, output_dir: str) -> None:
    # Get all required exported data in a dictionary
    rocketchat_data = rocketchat_data_to_dict(rocketchat_data_dir)

    # Subdomain is set by the user while running the import command
    realm_subdomain = ""
    realm_id = 0
    domain_name = settings.EXTERNAL_HOST

    realm = make_realm(realm_id, realm_subdomain, domain_name,
                       rocketchat_data["instance"][0])

    user_id_to_user_map: Dict[str, Dict[str, Any]] = map_user_id_to_user(
        rocketchat_data["user"])
    username_to_user_id_map: Dict[str, str] = map_username_to_user_id(
        user_id_to_user_map)

    user_handler = UserHandler()
    subscriber_handler = SubscriberHandler()
    user_id_mapper = IdMapper()
    stream_id_mapper = IdMapper()
    huddle_id_mapper = IdMapper()

    process_users(
        user_id_to_user_map=user_id_to_user_map,
        realm_id=realm_id,
        domain_name=domain_name,
        user_handler=user_handler,
        user_id_mapper=user_id_mapper,
    )

    room_id_to_room_map: Dict[str, Dict[str, Any]] = {}
    team_id_to_team_map: Dict[str, Dict[str, Any]] = {}
    dsc_id_to_dsc_map: Dict[str, Dict[str, Any]] = {}
    direct_id_to_direct_map: Dict[str, Dict[str, Any]] = {}
    huddle_id_to_huddle_map: Dict[str, Dict[str, Any]] = {}

    categorize_channels_and_map_with_id(
        channel_data=rocketchat_data["room"],
        room_id_to_room_map=room_id_to_room_map,
        team_id_to_team_map=team_id_to_team_map,
        dsc_id_to_dsc_map=dsc_id_to_dsc_map,
        direct_id_to_direct_map=direct_id_to_direct_map,
        huddle_id_to_huddle_map=huddle_id_to_huddle_map,
    )

    zerver_stream = convert_channel_data(
        room_id_to_room_map=room_id_to_room_map,
        team_id_to_team_map=team_id_to_team_map,
        stream_id_mapper=stream_id_mapper,
        realm_id=realm_id,
    )
    realm["zerver_stream"] = zerver_stream

    # Add stream subscription data to `subscriber_handler`
    convert_stream_subscription_data(
        user_id_to_user_map=user_id_to_user_map,
        dsc_id_to_dsc_map=dsc_id_to_dsc_map,
        zerver_stream=zerver_stream,
        stream_id_mapper=stream_id_mapper,
        user_id_mapper=user_id_mapper,
        subscriber_handler=subscriber_handler,
    )

    zerver_huddle = convert_huddle_data(
        huddle_id_to_huddle_map=huddle_id_to_huddle_map,
        huddle_id_mapper=huddle_id_mapper,
        user_id_mapper=user_id_mapper,
        subscriber_handler=subscriber_handler,
    )
    realm["zerver_huddle"] = zerver_huddle

    all_users = user_handler.get_all_users()

    zerver_recipient = build_recipients(
        zerver_userprofile=all_users,
        zerver_stream=zerver_stream,
        zerver_huddle=zerver_huddle,
    )
    realm["zerver_recipient"] = zerver_recipient

    stream_subscriptions = build_stream_subscriptions(
        get_users=subscriber_handler.get_users,
        zerver_recipient=zerver_recipient,
        zerver_stream=zerver_stream,
    )

    huddle_subscriptions = build_huddle_subscriptions(
        get_users=subscriber_handler.get_users,
        zerver_recipient=zerver_recipient,
        zerver_huddle=zerver_huddle,
    )

    personal_subscriptions = build_personal_subscriptions(
        zerver_recipient=zerver_recipient, )

    zerver_subscription = personal_subscriptions + stream_subscriptions + huddle_subscriptions
    realm["zerver_subscription"] = zerver_subscription

    zerver_realmemoji = build_custom_emoji(
        realm_id=realm_id,
        custom_emoji_data=rocketchat_data["custom_emoji"],
        output_dir=output_dir,
    )
    realm["zerver_realmemoji"] = zerver_realmemoji

    subscriber_map = make_subscriber_map(
        zerver_subscription=zerver_subscription, )

    stream_id_to_recipient_id: Dict[int, int] = {}
    huddle_id_to_recipient_id: Dict[int, int] = {}
    user_id_to_recipient_id: Dict[int, int] = {}

    map_receiver_id_to_recipient_id(
        zerver_recipient=zerver_recipient,
        stream_id_to_recipient_id=stream_id_to_recipient_id,
        huddle_id_to_recipient_id=huddle_id_to_recipient_id,
        user_id_to_recipient_id=user_id_to_recipient_id,
    )

    channel_messages: List[Dict[str, Any]] = []
    private_messages: List[Dict[str, Any]] = []

    separate_channel_and_private_messages(
        messages=rocketchat_data["message"],
        direct_id_to_direct_map=direct_id_to_direct_map,
        huddle_id_to_huddle_map=huddle_id_to_huddle_map,
        channel_messages=channel_messages,
        private_messages=private_messages,
    )

    total_reactions: List[ZerverFieldsT] = []
    uploads_list: List[ZerverFieldsT] = []
    zerver_attachment: List[ZerverFieldsT] = []

    upload_id_to_upload_data_map = map_upload_id_to_upload_data(
        rocketchat_data["upload"])

    # Process channel messages
    process_messages(
        realm_id=realm_id,
        messages=channel_messages,
        subscriber_map=subscriber_map,
        is_pm_data=False,
        username_to_user_id_map=username_to_user_id_map,
        user_id_mapper=user_id_mapper,
        user_handler=user_handler,
        user_id_to_recipient_id=user_id_to_recipient_id,
        stream_id_mapper=stream_id_mapper,
        stream_id_to_recipient_id=stream_id_to_recipient_id,
        huddle_id_mapper=huddle_id_mapper,
        huddle_id_to_recipient_id=huddle_id_to_recipient_id,
        room_id_to_room_map=room_id_to_room_map,
        dsc_id_to_dsc_map=dsc_id_to_dsc_map,
        direct_id_to_direct_map=direct_id_to_direct_map,
        huddle_id_to_huddle_map=huddle_id_to_huddle_map,
        zerver_realmemoji=zerver_realmemoji,
        total_reactions=total_reactions,
        uploads_list=uploads_list,
        zerver_attachment=zerver_attachment,
        upload_id_to_upload_data_map=upload_id_to_upload_data_map,
        output_dir=output_dir,
    )
    # Process private messages
    process_messages(
        realm_id=realm_id,
        messages=private_messages,
        subscriber_map=subscriber_map,
        is_pm_data=True,
        username_to_user_id_map=username_to_user_id_map,
        user_id_mapper=user_id_mapper,
        user_handler=user_handler,
        user_id_to_recipient_id=user_id_to_recipient_id,
        stream_id_mapper=stream_id_mapper,
        stream_id_to_recipient_id=stream_id_to_recipient_id,
        huddle_id_mapper=huddle_id_mapper,
        huddle_id_to_recipient_id=huddle_id_to_recipient_id,
        room_id_to_room_map=room_id_to_room_map,
        dsc_id_to_dsc_map=dsc_id_to_dsc_map,
        direct_id_to_direct_map=direct_id_to_direct_map,
        huddle_id_to_huddle_map=huddle_id_to_huddle_map,
        zerver_realmemoji=zerver_realmemoji,
        total_reactions=total_reactions,
        uploads_list=uploads_list,
        zerver_attachment=zerver_attachment,
        upload_id_to_upload_data_map=upload_id_to_upload_data_map,
        output_dir=output_dir,
    )
    realm["zerver_reaction"] = total_reactions
    realm["zerver_userprofile"] = user_handler.get_all_users()
    realm["sort_by_date"] = True

    create_converted_data_files(realm, output_dir, "/realm.json")
    # TODO: Add support for importing avatars
    create_converted_data_files([], output_dir, "/avatars/records.json")

    # Import attachments
    attachment: Dict[str, List[Any]] = {"zerver_attachment": zerver_attachment}
    create_converted_data_files(attachment, output_dir, "/attachment.json")
    create_converted_data_files(uploads_list, output_dir,
                                "/uploads/records.json")

    logging.info("Start making tarball")
    subprocess.check_call(
        ["tar", "-czf", output_dir + ".tar.gz", output_dir, "-P"])
    logging.info("Done making tarball")
Ejemplo n.º 8
0
def convert_slack_workspace_messages(slack_data_dir: str, users: List[ZerverFieldsT], realm_id: int,
                                     added_users: AddedUsersT, added_recipient: AddedRecipientsT,
                                     added_channels: AddedChannelsT, realm: ZerverFieldsT,
                                     zerver_userprofile: List[ZerverFieldsT],
                                     zerver_realmemoji: List[ZerverFieldsT], domain_name: str,
                                     output_dir: str,
                                     chunk_size: int=MESSAGE_BATCH_CHUNK_SIZE) -> Tuple[List[ZerverFieldsT],
                                                                                        List[ZerverFieldsT],
                                                                                        List[ZerverFieldsT]]:
    """
    Returns:
    1. reactions, which is a list of the reactions
    2. uploads, which is a list of uploads to be mapped in uploads records.json
    3. attachment, which is a list of the attachments
    """

    long_term_idle = process_long_term_idle_users(slack_data_dir, users, added_users,
                                                  added_channels, zerver_userprofile)

    # Now, we actually import the messages.
    all_messages = get_messages_iterator(slack_data_dir, added_channels)
    logging.info('######### IMPORTING MESSAGES STARTED #########\n')

    total_reactions = []  # type: List[ZerverFieldsT]
    total_attachments = []  # type: List[ZerverFieldsT]
    total_uploads = []  # type: List[ZerverFieldsT]

    # The messages are stored in batches
    dump_file_id = 1

    subscriber_map = make_subscriber_map(
        zerver_subscription=realm['zerver_subscription'],
    )

    while True:
        message_data = []
        _counter = 0
        for msg in all_messages:
            _counter += 1
            message_data.append(msg)
            if _counter == chunk_size:
                break
        if len(message_data) == 0:
            break

        zerver_message, zerver_usermessage, attachment, uploads, reactions = \
            channel_message_to_zerver_message(
                realm_id, users, added_users, added_recipient, message_data,
                zerver_realmemoji, subscriber_map, added_channels,
                domain_name, long_term_idle)

        message_json = dict(
            zerver_message=zerver_message,
            zerver_usermessage=zerver_usermessage)

        message_file = "/messages-%06d.json" % (dump_file_id,)
        logging.info("Writing Messages to %s\n" % (output_dir + message_file))
        create_converted_data_files(message_json, output_dir, message_file)

        total_reactions += reactions
        total_attachments += attachment
        total_uploads += uploads

        dump_file_id += 1

    logging.info('######### IMPORTING MESSAGES FINISHED #########\n')
    return total_reactions, total_uploads, total_attachments
Ejemplo n.º 9
0
def convert_slack_workspace_messages(slack_data_dir: str, users: List[ZerverFieldsT], realm_id: int,
                                     added_users: AddedUsersT, added_recipient: AddedRecipientsT,
                                     added_channels: AddedChannelsT, realm: ZerverFieldsT,
                                     zerver_realmemoji: List[ZerverFieldsT], domain_name: str,
                                     output_dir: str,
                                     chunk_size: int=MESSAGE_BATCH_CHUNK_SIZE) -> Tuple[List[ZerverFieldsT],
                                                                                        List[ZerverFieldsT],
                                                                                        List[ZerverFieldsT]]:
    """
    Returns:
    1. reactions, which is a list of the reactions
    2. uploads, which is a list of uploads to be mapped in uploads records.json
    3. attachment, which is a list of the attachments
    """
    all_messages = get_all_messages(slack_data_dir, added_channels)

    # we sort the messages according to the timestamp to show messages with
    # the proper date order
    all_messages = sorted(all_messages, key=lambda message: message['ts'])

    logging.info('######### IMPORTING MESSAGES STARTED #########\n')

    total_reactions = []  # type: List[ZerverFieldsT]
    total_attachments = []  # type: List[ZerverFieldsT]
    total_uploads = []  # type: List[ZerverFieldsT]

    # The messages are stored in batches
    low_index = 0
    upper_index = low_index + chunk_size
    dump_file_id = 1

    subscriber_map = make_subscriber_map(
        zerver_subscription=realm['zerver_subscription'],
    )

    while True:
        message_data = all_messages[low_index:upper_index]
        if len(message_data) == 0:
            break
        zerver_message, zerver_usermessage, attachment, uploads, reactions = \
            channel_message_to_zerver_message(
                realm_id, users, added_users, added_recipient, message_data,
                zerver_realmemoji, subscriber_map, added_channels,
                domain_name)

        message_json = dict(
            zerver_message=zerver_message,
            zerver_usermessage=zerver_usermessage)

        message_file = "/messages-%06d.json" % (dump_file_id,)
        logging.info("Writing Messages to %s\n" % (output_dir + message_file))
        create_converted_data_files(message_json, output_dir, message_file)

        total_reactions += reactions
        total_attachments += attachment
        total_uploads += uploads

        low_index = upper_index
        upper_index = chunk_size + low_index
        dump_file_id += 1

    logging.info('######### IMPORTING MESSAGES FINISHED #########\n')
    return total_reactions, total_uploads, total_attachments