def process_raw_message_batch(realm_id: int, raw_messages: List[Dict[str, Any]], subscriber_map: Dict[int, Set[int]], user_id_mapper: IdMapper, user_handler: UserHandler, attachment_handler: AttachmentHandler, get_recipient_id: Callable[[ZerverFieldsT], int], is_pm_data: bool, output_dir: str) -> None: def fix_mentions(content: str, mention_user_ids: Set[int]) -> str: for user_id in mention_user_ids: user = user_handler.get_user(user_id=user_id) hipchat_mention = '@{short_name}'.format(**user) zulip_mention = '@**{full_name}**'.format(**user) content = content.replace(hipchat_mention, zulip_mention) content = content.replace('@here', '@**all**') return content mention_map: Dict[int, Set[int]] = dict() zerver_message = [] import html2text h = html2text.HTML2Text() for raw_message in raw_messages: # One side effect here: message_id = NEXT_ID('message') mention_user_ids = { user_id_mapper.get(id) for id in set(raw_message['mention_user_ids']) if user_id_mapper.has(id) } mention_map[message_id] = mention_user_ids content = fix_mentions( content=raw_message['content'], mention_user_ids=mention_user_ids, ) content = h.handle(content) if len(content) > 10000: logging.info('skipping too-long message of length %s' % (len(content), )) continue date_sent = raw_message['date_sent'] try: recipient_id = get_recipient_id(raw_message) except KeyError: logging.debug( "Could not find recipient_id for a message, skipping.") continue rendered_content = None if is_pm_data: topic_name = '' else: topic_name = 'imported from hipchat' user_id = raw_message['sender_id'] # Another side effect: extra_content = attachment_handler.handle_message_data( realm_id=realm_id, message_id=message_id, sender_id=user_id, attachment=raw_message['attachment'], files_dir=raw_message['files_dir'], ) if extra_content: has_attachment = True content += '\n' + extra_content else: has_attachment = False message = build_message( content=content, message_id=message_id, date_sent=date_sent, recipient_id=recipient_id, rendered_content=rendered_content, topic_name=topic_name, user_id=user_id, has_attachment=has_attachment, ) zerver_message.append(message) zerver_usermessage = make_user_messages( zerver_message=zerver_message, subscriber_map=subscriber_map, is_pm_data=is_pm_data, mention_map=mention_map, ) message_json = dict( zerver_message=zerver_message, zerver_usermessage=zerver_usermessage, ) dump_file_id = NEXT_ID('dump_file_id') message_file = "/messages-%06d.json" % (dump_file_id, ) create_converted_data_files(message_json, output_dir, message_file)
def process_raw_message_batch( realm_id: int, raw_messages: List[Dict[str, Any]], subscriber_map: Dict[int, Set[int]], user_id_mapper: IdMapper, user_handler: UserHandler, get_recipient_id_from_receiver_name: Callable[[str, int], int], is_pm_data: bool, output_dir: str, zerver_realmemoji: List[Dict[str, Any]], total_reactions: List[Dict[str, Any]], uploads_list: List[ZerverFieldsT], zerver_attachment: List[ZerverFieldsT], mattermost_data_dir: str, ) -> None: def fix_mentions(content: str, mention_user_ids: Set[int]) -> str: for user_id in mention_user_ids: user = user_handler.get_user(user_id=user_id) mattermost_mention = "@{short_name}".format(**user) zulip_mention = "@**{full_name}**".format(**user) content = content.replace(mattermost_mention, zulip_mention) content = content.replace("@channel", "@**all**") content = content.replace("@all", "@**all**") # We don't have an equivalent for Mattermost's @here mention which mentions all users # online in the channel. content = content.replace("@here", "@**all**") return content mention_map: Dict[int, Set[int]] = {} zerver_message = [] import html2text h = html2text.HTML2Text() pm_members = {} for raw_message in raw_messages: message_id = NEXT_ID("message") mention_user_ids = get_mentioned_user_ids(raw_message, user_id_mapper) mention_map[message_id] = mention_user_ids content = fix_mentions( content=raw_message["content"], mention_user_ids=mention_user_ids, ) content = h.handle(content) if len(content) > 10000: # nocoverage logging.info("skipping too-long message of length %s", len(content)) continue date_sent = raw_message["date_sent"] sender_user_id = raw_message["sender_id"] if "channel_name" in raw_message: recipient_id = get_recipient_id_from_receiver_name( raw_message["channel_name"], Recipient.STREAM ) elif "huddle_name" in raw_message: recipient_id = get_recipient_id_from_receiver_name( raw_message["huddle_name"], Recipient.HUDDLE ) elif "pm_members" in raw_message: members = raw_message["pm_members"] member_ids = {user_id_mapper.get(member) for member in members} pm_members[message_id] = member_ids if sender_user_id == user_id_mapper.get(members[0]): recipient_id = get_recipient_id_from_receiver_name(members[1], Recipient.PERSONAL) else: recipient_id = get_recipient_id_from_receiver_name(members[0], Recipient.PERSONAL) else: raise AssertionError("raw_message without channel_name, huddle_name or pm_members key") rendered_content = None has_attachment = False has_image = False has_link = False if "attachments" in raw_message: has_attachment = True has_link = True attachment_markdown, has_image = process_message_attachments( attachments=raw_message["attachments"], realm_id=realm_id, message_id=message_id, user_id=sender_user_id, user_handler=user_handler, zerver_attachment=zerver_attachment, uploads_list=uploads_list, mattermost_data_dir=mattermost_data_dir, output_dir=output_dir, ) content += attachment_markdown topic_name = "imported from mattermost" message = build_message( content=content, message_id=message_id, date_sent=date_sent, recipient_id=recipient_id, rendered_content=rendered_content, topic_name=topic_name, user_id=sender_user_id, has_image=has_image, has_link=has_link, has_attachment=has_attachment, ) zerver_message.append(message) build_reactions( realm_id, total_reactions, raw_message["reactions"], message_id, user_id_mapper, zerver_realmemoji, ) zerver_usermessage = make_user_messages( zerver_message=zerver_message, subscriber_map=subscriber_map, is_pm_data=is_pm_data, mention_map=mention_map, ) message_json = dict( zerver_message=zerver_message, zerver_usermessage=zerver_usermessage, ) dump_file_id = NEXT_ID("dump_file_id" + str(realm_id)) message_file = f"/messages-{dump_file_id:06}.json" create_converted_data_files(message_json, output_dir, message_file)
def process_raw_message_batch(realm_id: int, raw_messages: List[Dict[str, Any]], subscriber_map: Dict[int, Set[int]], user_id_mapper: IdMapper, user_handler: UserHandler, get_recipient_id_from_receiver_name: Callable[[str, int], int], is_pm_data: bool, output_dir: str, zerver_realmemoji: List[Dict[str, Any]], total_reactions: List[Dict[str, Any]], ) -> None: def fix_mentions(content: str, mention_user_ids: Set[int]) -> str: for user_id in mention_user_ids: user = user_handler.get_user(user_id=user_id) mattermost_mention = '@{short_name}'.format(**user) zulip_mention = '@**{full_name}**'.format(**user) content = content.replace(mattermost_mention, zulip_mention) content = content.replace('@channel', '@**all**') content = content.replace('@all', '@**all**') # We don't have an equivalent for Mattermost's @here mention which mentions all users # online in the channel. content = content.replace('@here', '@**all**') return content mention_map: Dict[int, Set[int]] = dict() zerver_message = [] import html2text h = html2text.HTML2Text() pm_members = {} for raw_message in raw_messages: message_id = NEXT_ID('message') mention_user_ids = get_mentioned_user_ids(raw_message, user_id_mapper) mention_map[message_id] = mention_user_ids content = fix_mentions( content=raw_message['content'], mention_user_ids=mention_user_ids, ) content = h.handle(content) if len(content) > 10000: # nocoverage logging.info('skipping too-long message of length %s' % (len(content),)) continue date_sent = raw_message['date_sent'] sender_user_id = raw_message['sender_id'] if "channel_name" in raw_message: recipient_id = get_recipient_id_from_receiver_name(raw_message["channel_name"], Recipient.STREAM) elif "huddle_name" in raw_message: recipient_id = get_recipient_id_from_receiver_name(raw_message["huddle_name"], Recipient.HUDDLE) elif "pm_members" in raw_message: members = raw_message["pm_members"] member_ids = {user_id_mapper.get(member) for member in members} pm_members[message_id] = member_ids if sender_user_id == user_id_mapper.get(members[0]): recipient_id = get_recipient_id_from_receiver_name(members[1], Recipient.PERSONAL) else: recipient_id = get_recipient_id_from_receiver_name(members[0], Recipient.PERSONAL) else: raise AssertionError("raw_message without channel_name, huddle_name or pm_members key") rendered_content = None topic_name = 'imported from mattermost' message = build_message( content=content, message_id=message_id, date_sent=date_sent, recipient_id=recipient_id, rendered_content=rendered_content, topic_name=topic_name, user_id=sender_user_id, has_attachment=False, ) zerver_message.append(message) build_reactions(realm_id, total_reactions, raw_message["reactions"], message_id, user_id_mapper, zerver_realmemoji) zerver_usermessage = make_user_messages( zerver_message=zerver_message, subscriber_map=subscriber_map, is_pm_data=is_pm_data, mention_map=mention_map, ) message_json = dict( zerver_message=zerver_message, zerver_usermessage=zerver_usermessage, ) dump_file_id = NEXT_ID('dump_file_id' + str(realm_id)) message_file = "/messages-%06d.json" % (dump_file_id,) create_converted_data_files(message_json, output_dir, message_file)
def process_raw_message_batch( realm_id: int, raw_messages: List[Dict[str, Any]], subscriber_map: Dict[int, Set[int]], user_id_mapper: IdMapper, user_handler: UserHandler, get_recipient_id: Callable[[ZerverFieldsT], int], is_pm_data: bool, output_dir: str, zerver_realmemoji: List[Dict[str, Any]], total_reactions: List[Dict[str, Any]], ) -> None: def fix_mentions(content: str, mention_user_ids: Set[int]) -> str: for user_id in mention_user_ids: user = user_handler.get_user(user_id=user_id) mattermost_mention = '@{short_name}'.format(**user) zulip_mention = '@**{full_name}**'.format(**user) content = content.replace(mattermost_mention, zulip_mention) content = content.replace('@channel', '@**all**') content = content.replace('@all', '@**all**') # We don't have an equivalent for Mattermost's @here mention which mentions all users # online in the channel. content = content.replace('@here', '@**all**') return content mention_map = dict() # type: Dict[int, Set[int]] zerver_message = [] import html2text h = html2text.HTML2Text() name_to_codepoint = get_name_to_codepoint_dict() for raw_message in raw_messages: message_id = NEXT_ID('message') mention_user_ids = get_mentioned_user_ids(raw_message, user_id_mapper) mention_map[message_id] = mention_user_ids content = fix_mentions( content=raw_message['content'], mention_user_ids=mention_user_ids, ) content = h.handle(content) if len(content) > 10000: # nocoverage logging.info('skipping too-long message of length %s' % (len(content), )) continue pub_date = raw_message['pub_date'] try: recipient_id = get_recipient_id(raw_message) except KeyError: logging.debug( "Could not find recipient_id for a message, skipping.") continue rendered_content = None topic_name = 'imported from mattermost' user_id = raw_message['sender_id'] message = build_message( content=content, message_id=message_id, pub_date=pub_date, recipient_id=recipient_id, rendered_content=rendered_content, topic_name=topic_name, user_id=user_id, has_attachment=False, ) zerver_message.append(message) build_reactions(realm_id, total_reactions, raw_message["reactions"], message_id, name_to_codepoint, user_id_mapper, zerver_realmemoji) zerver_usermessage = make_user_messages( zerver_message=zerver_message, subscriber_map=subscriber_map, is_pm_data=is_pm_data, mention_map=mention_map, ) message_json = dict( zerver_message=zerver_message, zerver_usermessage=zerver_usermessage, ) dump_file_id = NEXT_ID('dump_file_id' + str(realm_id)) message_file = "/messages-%06d.json" % (dump_file_id, ) create_converted_data_files(message_json, output_dir, message_file)
def process_raw_message_batch( realm_id: int, raw_messages: List[Dict[str, Any]], subscriber_map: Dict[int, Set[int]], user_handler: UserHandler, is_pm_data: bool, output_dir: str, zerver_realmemoji: List[ZerverFieldsT], total_reactions: List[ZerverFieldsT], uploads_list: List[ZerverFieldsT], zerver_attachment: List[ZerverFieldsT], upload_id_to_upload_data_map: Dict[str, Dict[str, Any]], ) -> None: def fix_mentions(content: str, mention_user_ids: Set[int], rc_channel_mention_data: List[Dict[str, str]]) -> str: # Fix user mentions for user_id in mention_user_ids: user = user_handler.get_user(user_id=user_id) rc_mention = "@{short_name}".format(**user) zulip_mention = "@**{full_name}**".format(**user) content = content.replace(rc_mention, zulip_mention) content = content.replace("@all", "@**all**") # We don't have an equivalent for Rocket.Chat's @here mention # which mentions all users active in the channel. content = content.replace("@here", "@**all**") # Fix channel mentions for mention_data in rc_channel_mention_data: rc_mention = mention_data["rc_mention"] zulip_mention = mention_data["zulip_mention"] content = content.replace(rc_mention, zulip_mention) return content mention_map: Dict[int, Set[int]] = {} zerver_message: List[ZerverFieldsT] = [] for raw_message in raw_messages: message_id = NEXT_ID("message") mention_user_ids = raw_message["mention_user_ids"] mention_map[message_id] = mention_user_ids content = fix_mentions( content=raw_message["content"], mention_user_ids=mention_user_ids, rc_channel_mention_data=raw_message["rc_channel_mention_data"], ) if len(content) > 10000: # nocoverage logging.info("skipping too-long message of length %s", len(content)) continue date_sent = raw_message["date_sent"] sender_user_id = raw_message["sender_id"] recipient_id = raw_message["recipient_id"] rendered_content = None has_attachment = False has_image = False has_link = raw_message["has_link"] if "file" in raw_message: has_attachment = True has_link = True attachment_content, has_image = process_message_attachment( upload=raw_message["file"], realm_id=realm_id, message_id=message_id, user_id=sender_user_id, user_handler=user_handler, uploads_list=uploads_list, zerver_attachment=zerver_attachment, upload_id_to_upload_data_map=upload_id_to_upload_data_map, output_dir=output_dir, ) content += attachment_content topic_name = raw_message["topic_name"] message = build_message( content=content, message_id=message_id, date_sent=date_sent, recipient_id=recipient_id, rendered_content=rendered_content, topic_name=topic_name, user_id=sender_user_id, has_image=has_image, has_link=has_link, has_attachment=has_attachment, ) zerver_message.append(message) build_reactions( total_reactions=total_reactions, reactions=raw_message["reactions"], message_id=message_id, zerver_realmemoji=zerver_realmemoji, ) zerver_usermessage = make_user_messages( zerver_message=zerver_message, subscriber_map=subscriber_map, is_pm_data=is_pm_data, mention_map=mention_map, ) message_json = dict( zerver_message=zerver_message, zerver_usermessage=zerver_usermessage, ) dump_file_id = NEXT_ID("dump_file_id" + str(realm_id)) message_file = f"/messages-{dump_file_id:06}.json" create_converted_data_files(message_json, output_dir, message_file)