def add_attachment(info: Dict[str, Any]) -> None: build_attachment( realm_id=realm_id, message_ids=info['message_ids'], user_id=info['sender_id'], fileinfo=dict( created=info['mtime'], # minor lie size=info['size'], name=info['name'], ), s3_path=info['target_path'], zerver_attachment=attachments, )
def add_attachment(info: Dict[str, Any]) -> None: build_attachment( realm_id=realm_id, message_ids=info['message_ids'], user_id=info['sender_id'], fileinfo=dict( created=info['mtime'], # minor lie size=info['size'], name=info['name'], ), s3_path=info['target_path'], zerver_attachment=attachments, )
def channel_message_to_zerver_message(realm_id: int, users: List[ZerverFieldsT], added_users: AddedUsersT, added_recipient: AddedRecipientsT, all_messages: List[ZerverFieldsT], zerver_realmemoji: List[ZerverFieldsT], zerver_subscription: List[ZerverFieldsT], added_channels: AddedChannelsT, id_list: Tuple[int, int, int, int], domain_name: str) -> Tuple[List[ZerverFieldsT], List[ZerverFieldsT], List[ZerverFieldsT], List[ZerverFieldsT], List[ZerverFieldsT], Tuple[int, int, int, int]]: """ Returns: 1. zerver_message, which is a list of the messages 2. zerver_usermessage, which is a list of the usermessages 3. zerver_attachment, which is a list of the attachments 4. uploads_list, which is a list of uploads to be mapped in uploads records.json 5. reaction_list, which is a list of all user reactions 6. id_list, which is a tuple of max ids of messages, usermessages, reactions and attachments """ message_id_count, usermessage_id_count, reaction_id_count, attachment_id_count = id_list zerver_message = [] zerver_usermessage = [] # type: List[ZerverFieldsT] uploads_list = [] # type: List[ZerverFieldsT] zerver_attachment = [] # type: List[ZerverFieldsT] reaction_list = [] # type: List[ZerverFieldsT] # For unicode emoji with open(NAME_TO_CODEPOINT_PATH) as fp: name_to_codepoint = ujson.load(fp) for message in all_messages: user = get_message_sending_user(message) if not user: # Ignore messages without user names # These are Sometimes produced by slack continue subtype = message.get('subtype', False) if subtype in [ # Zulip doesn't have a pinned_item concept "pinned_item", "unpinned_item", # Slack's channel join/leave notices are spammy "channel_join", "channel_leave", "channel_name" ]: continue has_attachment = has_image = False try: content, mentioned_users_id, has_link = convert_to_zulip_markdown( message['text'], users, added_channels, added_users) except Exception: print("Slack message unexpectedly missing text representation:") print(json.dumps(message, indent=4)) continue rendered_content = None recipient_id = added_recipient[message['channel_name']] message_id = message_id_count # Process message reactions if 'reactions' in message.keys(): reaction_id_count = build_reactions(reaction_list, message['reactions'], added_users, message_id, reaction_id_count, name_to_codepoint, zerver_realmemoji) # Process different subtypes of slack messages # Subtypes which have only the action in the message should # be rendered with '/me' in the content initially # For example "sh_room_created" has the message 'started a call' # which should be displayed as '/me started a call' if subtype in ["bot_add", "sh_room_created", "me_message"]: content = ('/me %s' % (content)) files = message.get('files', []) if subtype == 'file_share': # In Slack messages, uploads can either have the subtype as 'file_share' or # have the upload information in 'files' keyword files = [message['file']] for fileinfo in files: url = fileinfo['url_private'] # For attachments with slack download link if 'files.slack.com' in url: has_attachment = has_link = True has_image = True if 'image' in fileinfo['mimetype'] else False file_user = [iterate_user for iterate_user in users if message['user'] == iterate_user['id']] file_user_email = get_user_email(file_user[0], domain_name) s3_path, content = get_attachment_path_and_content(fileinfo, realm_id) # construct attachments build_uploads(added_users[user], realm_id, file_user_email, fileinfo, s3_path, uploads_list) attachment_id = attachment_id_count build_attachment(realm_id, message_id, attachment_id, added_users[user], fileinfo, s3_path, zerver_attachment) attachment_id_count += 1 # For attachments with link not from slack # Example: Google drive integration else: has_link = True if 'title' in fileinfo: file_name = fileinfo['title'] else: file_name = fileinfo['name'] content = '[%s](%s)' % (file_name, fileinfo['url_private']) # construct message subject = 'imported from slack' zulip_message = build_message(subject, float(message['ts']), message_id, content, rendered_content, added_users[user], recipient_id, has_image, has_link, has_attachment) zerver_message.append(zulip_message) # construct usermessages usermessage_id_count = build_usermessages( zerver_usermessage, usermessage_id_count, zerver_subscription, recipient_id, mentioned_users_id, message_id) message_id_count += 1 id_list = (message_id_count, usermessage_id_count, reaction_id_count, attachment_id_count) return zerver_message, zerver_usermessage, zerver_attachment, uploads_list, \ reaction_list, id_list
def process_message_files(message: ZerverFieldsT, domain_name: str, realm_id: int, message_id: int, user: str, users: List[ZerverFieldsT], added_users: AddedUsersT, zerver_attachment: List[ZerverFieldsT], uploads_list: List[ZerverFieldsT]) -> Dict[str, Any]: has_attachment = False has_image = False has_link = False files = message.get('files', []) subtype = message.get('subtype') if subtype == 'file_share': # In Slack messages, uploads can either have the subtype as 'file_share' or # have the upload information in 'files' keyword files = [message['file']] markdown_links = [] for fileinfo in files: url = fileinfo['url_private'] if 'files.slack.com' in url: # For attachments with slack download link has_attachment = True has_link = True has_image = True if 'image' in fileinfo['mimetype'] else False file_user = [iterate_user for iterate_user in users if message['user'] == iterate_user['id']] file_user_email = get_user_email(file_user[0], domain_name) s3_path, content_for_link = get_attachment_path_and_content(fileinfo, realm_id) markdown_links.append(content_for_link) # construct attachments build_uploads(added_users[user], realm_id, file_user_email, fileinfo, s3_path, uploads_list) build_attachment(realm_id, {message_id}, added_users[user], fileinfo, s3_path, zerver_attachment) else: # For attachments with link not from slack # Example: Google drive integration has_link = True if 'title' in fileinfo: file_name = fileinfo['title'] else: file_name = fileinfo['name'] markdown_links.append('[%s](%s)' % (file_name, fileinfo['url_private'])) content = '\n'.join(markdown_links) return dict( content=content, has_attachment=has_attachment, has_image=has_image, has_link=has_link, )
def process_message_files(message: ZerverFieldsT, domain_name: str, realm_id: int, message_id: int, slack_user_id: str, users: List[ZerverFieldsT], slack_user_id_to_zulip_user_id: SlackToZulipUserIDT, zerver_attachment: List[ZerverFieldsT], uploads_list: List[ZerverFieldsT]) -> Dict[str, Any]: has_attachment = False has_image = False has_link = False files = message.get('files', []) subtype = message.get('subtype') if subtype == 'file_share': # In Slack messages, uploads can either have the subtype as 'file_share' or # have the upload information in 'files' keyword files = [message['file']] markdown_links = [] for fileinfo in files: if fileinfo.get('mode', '') in ['tombstone', 'hidden_by_limit']: # Slack sometimes includes tombstone mode files with no # real data on the actual file (presumably in cases where # the file was deleted). hidden_by_limit mode is for files # that are hidden because of 10k cap in free plan. continue url = fileinfo['url_private'] if 'files.slack.com' in url: # For attachments with slack download link has_attachment = True has_link = True has_image = True if 'image' in fileinfo['mimetype'] else False file_user = [ iterate_user for iterate_user in users if message['user'] == iterate_user['id'] ] file_user_email = get_user_email(file_user[0], domain_name) s3_path, content_for_link = get_attachment_path_and_content( fileinfo, realm_id) markdown_links.append(content_for_link) build_uploads(slack_user_id_to_zulip_user_id[slack_user_id], realm_id, file_user_email, fileinfo, s3_path, uploads_list) build_attachment(realm_id, {message_id}, slack_user_id_to_zulip_user_id[slack_user_id], fileinfo, s3_path, zerver_attachment) else: # For attachments with link not from slack # Example: Google drive integration has_link = True if 'title' in fileinfo: file_name = fileinfo['title'] else: file_name = fileinfo['name'] markdown_links.append('[%s](%s)' % (file_name, fileinfo['url_private'])) content = '\n'.join(markdown_links) return dict( content=content, has_attachment=has_attachment, has_image=has_image, has_link=has_link, )
def process_message_attachments( attachments: List[Dict[str, Any]], realm_id: int, message_id: int, user_id: int, user_handler: UserHandler, zerver_attachment: List[ZerverFieldsT], uploads_list: List[ZerverFieldsT], mattermost_data_dir: str, output_dir: str, ) -> Tuple[str, bool]: has_image = False markdown_links = [] for attachment in attachments: attachment_path = attachment["path"] attachment_full_path = os.path.join(mattermost_data_dir, "data", attachment_path) file_name = attachment_path.split("/")[-1] file_ext = f'.{file_name.split(".")[-1]}' if file_ext.lower() in IMAGE_EXTENSIONS: has_image = True s3_path = "/".join( [ str(realm_id), format(random.randint(0, 255), "x"), secrets.token_urlsafe(18), sanitize_name(file_name), ] ) content_for_link = f"[{file_name}](/user_uploads/{s3_path})" markdown_links.append(content_for_link) fileinfo = { "name": file_name, "size": os.path.getsize(attachment_full_path), "created": os.path.getmtime(attachment_full_path), } upload = dict( path=s3_path, realm_id=realm_id, content_type=None, user_profile_id=user_id, last_modified=fileinfo["created"], user_profile_email=user_handler.get_user(user_id=user_id)["email"], s3_path=s3_path, size=fileinfo["size"], ) uploads_list.append(upload) build_attachment( realm_id=realm_id, message_ids={message_id}, user_id=user_id, fileinfo=fileinfo, s3_path=s3_path, zerver_attachment=zerver_attachment, ) # Copy the attachment file to output_dir attachment_out_path = os.path.join(output_dir, "uploads", s3_path) os.makedirs(os.path.dirname(attachment_out_path), exist_ok=True) shutil.copyfile(attachment_full_path, attachment_out_path) content = "\n".join(markdown_links) return content, has_image
def channel_message_to_zerver_message( realm_id: int, users: List[ZerverFieldsT], added_users: AddedUsersT, added_recipient: AddedRecipientsT, all_messages: List[ZerverFieldsT], zerver_realmemoji: List[ZerverFieldsT], zerver_subscription: List[ZerverFieldsT], added_channels: AddedChannelsT, id_list: Tuple[int, int, int, int], domain_name: str ) -> Tuple[List[ZerverFieldsT], List[ZerverFieldsT], List[ZerverFieldsT], List[ZerverFieldsT], List[ZerverFieldsT], Tuple[int, int, int, int]]: """ Returns: 1. zerver_message, which is a list of the messages 2. zerver_usermessage, which is a list of the usermessages 3. zerver_attachment, which is a list of the attachments 4. uploads_list, which is a list of uploads to be mapped in uploads records.json 5. reaction_list, which is a list of all user reactions 6. id_list, which is a tuple of max ids of messages, usermessages, reactions and attachments """ message_id_count, usermessage_id_count, reaction_id_count, attachment_id_count = id_list zerver_message = [] zerver_usermessage = [] # type: List[ZerverFieldsT] uploads_list = [] # type: List[ZerverFieldsT] zerver_attachment = [] # type: List[ZerverFieldsT] reaction_list = [] # type: List[ZerverFieldsT] # For unicode emoji with open(NAME_TO_CODEPOINT_PATH) as fp: name_to_codepoint = ujson.load(fp) for message in all_messages: user = get_message_sending_user(message) if not user: # Ignore messages without user names # These are Sometimes produced by slack continue subtype = message.get('subtype', False) if subtype in [ # Zulip doesn't have a pinned_item concept "pinned_item", "unpinned_item", # Slack's channel join/leave notices are spammy "channel_join", "channel_leave", "channel_name" ]: continue has_attachment = has_image = False try: content, mentioned_users_id, has_link = convert_to_zulip_markdown( message['text'], users, added_channels, added_users) except Exception: print("Slack message unexpectedly missing text representation:") print(json.dumps(message, indent=4)) continue rendered_content = None recipient_id = added_recipient[message['channel_name']] message_id = message_id_count # Process message reactions if 'reactions' in message.keys(): reaction_id_count = build_reactions( reaction_list, message['reactions'], added_users, message_id, reaction_id_count, name_to_codepoint, zerver_realmemoji) # Process different subtypes of slack messages # Subtypes which have only the action in the message should # be rendered with '/me' in the content initially # For example "sh_room_created" has the message 'started a call' # which should be displayed as '/me started a call' if subtype in ["bot_add", "sh_room_created", "me_message"]: content = ('/me %s' % (content)) files = message.get('files', []) if subtype == 'file_share': # In Slack messages, uploads can either have the subtype as 'file_share' or # have the upload information in 'files' keyword files = [message['file']] for fileinfo in files: url = fileinfo['url_private'] # For attachments with slack download link if 'files.slack.com' in url: has_attachment = has_link = True has_image = True if 'image' in fileinfo['mimetype'] else False file_user = [ iterate_user for iterate_user in users if message['user'] == iterate_user['id'] ] file_user_email = get_user_email(file_user[0], domain_name) s3_path, content = get_attachment_path_and_content( fileinfo, realm_id) # construct attachments build_uploads(added_users[user], realm_id, file_user_email, fileinfo, s3_path, uploads_list) attachment_id = attachment_id_count build_attachment(realm_id, message_id, attachment_id, added_users[user], fileinfo, s3_path, zerver_attachment) attachment_id_count += 1 # For attachments with link not from slack # Example: Google drive integration else: has_link = True if 'title' in fileinfo: file_name = fileinfo['title'] else: file_name = fileinfo['name'] content = '[%s](%s)' % (file_name, fileinfo['url_private']) # construct message zulip_message = dict( sending_client=1, rendered_content_version=1, # This is Zulip-specific has_image=has_image, subject='imported from slack', # This is Zulip-specific pub_date=float(message['ts']), id=message_id, has_attachment= has_attachment, # attachment will be posted in the subsequent message; # this is how Slack does it, i.e. less like email edit_history=None, sender=added_users[user], # map slack id to zulip id content=content, rendered_content=rendered_content, # slack doesn't cache this recipient=recipient_id, last_edit_time=None, has_link=has_link) zerver_message.append(zulip_message) # construct usermessages usermessage_id_count = build_usermessages( zerver_usermessage, usermessage_id_count, zerver_subscription, recipient_id, mentioned_users_id, message_id) message_id_count += 1 id_list = (message_id_count, usermessage_id_count, reaction_id_count, attachment_id_count) return zerver_message, zerver_usermessage, zerver_attachment, uploads_list, \ reaction_list, id_list
def process_message_attachment( upload: Dict[str, Any], realm_id: int, message_id: int, user_id: int, user_handler: UserHandler, zerver_attachment: List[ZerverFieldsT], uploads_list: List[ZerverFieldsT], upload_id_to_upload_data_map: Dict[str, Dict[str, Any]], output_dir: str, ) -> Tuple[str, bool]: upload_file_data = upload_id_to_upload_data_map[upload["_id"]] file_name = upload["name"] file_ext = f'.{upload["type"].split("/")[-1]}' has_image = False if file_ext.lower() in IMAGE_EXTENSIONS: has_image = True s3_path = "/".join([ str(realm_id), format(random.randint(0, 255), "x"), secrets.token_urlsafe(18), sanitize_name(file_name), ]) # Build the attachment from chunks and save it to s3_path. file_out_path = os.path.join(output_dir, "uploads", s3_path) os.makedirs(os.path.dirname(file_out_path), exist_ok=True) with open(file_out_path, "wb") as upload_file: upload_file.write(b"".join(upload_file_data["chunk"])) attachment_content = ( f'{upload_file_data["description"]}\n\n[{file_name}](/user_uploads/{s3_path})' ) fileinfo = { "name": file_name, "size": upload_file_data["size"], "created": float(upload_file_data["_updatedAt"].timestamp()), } upload = dict( path=s3_path, realm_id=realm_id, content_type=upload["type"], user_profile_id=user_id, last_modified=fileinfo["created"], user_profile_email=user_handler.get_user(user_id=user_id)["email"], s3_path=s3_path, size=fileinfo["size"], ) uploads_list.append(upload) build_attachment( realm_id=realm_id, message_ids={message_id}, user_id=user_id, fileinfo=fileinfo, s3_path=s3_path, zerver_attachment=zerver_attachment, ) return attachment_content, has_image
def process_message_files( message: ZerverFieldsT, domain_name: str, realm_id: int, message_id: int, slack_user_id: str, users: List[ZerverFieldsT], slack_user_id_to_zulip_user_id: SlackToZulipUserIDT, zerver_attachment: List[ZerverFieldsT], uploads_list: List[ZerverFieldsT], ) -> Dict[str, Any]: has_attachment = False has_image = False has_link = False files = message.get("files", []) subtype = message.get("subtype") if subtype == "file_share": # In Slack messages, uploads can either have the subtype as 'file_share' or # have the upload information in 'files' keyword files = [message["file"]] markdown_links = [] for fileinfo in files: if fileinfo.get("mode", "") in ["tombstone", "hidden_by_limit"]: # Slack sometimes includes tombstone mode files with no # real data on the actual file (presumably in cases where # the file was deleted). hidden_by_limit mode is for files # that are hidden because of 10k cap in free plan. continue url = fileinfo["url_private"] if "files.slack.com" in url: # For attachments with Slack download link has_attachment = True has_link = True has_image = True if "image" in fileinfo["mimetype"] else False file_user = [ iterate_user for iterate_user in users if message["user"] == iterate_user["id"] ] file_user_email = get_user_email(file_user[0], domain_name) s3_path, content_for_link = get_attachment_path_and_content(fileinfo, realm_id) markdown_links.append(content_for_link) build_uploads( slack_user_id_to_zulip_user_id[slack_user_id], realm_id, file_user_email, fileinfo, s3_path, uploads_list, ) build_attachment( realm_id, {message_id}, slack_user_id_to_zulip_user_id[slack_user_id], fileinfo, s3_path, zerver_attachment, ) else: # For attachments with link not from Slack # Example: Google drive integration has_link = True if "title" in fileinfo: file_name = fileinfo["title"] else: file_name = fileinfo["name"] markdown_links.append("[{}]({})".format(file_name, fileinfo["url_private"])) content = "\n".join(markdown_links) return dict( content=content, has_attachment=has_attachment, has_image=has_image, has_link=has_link, )
def process_message_files(message: ZerverFieldsT, domain_name: str, realm_id: int, message_id: int, user: str, users: List[ZerverFieldsT], added_users: AddedUsersT, zerver_attachment: List[ZerverFieldsT], uploads_list: List[ZerverFieldsT]) -> Dict[str, Any]: has_attachment = False has_image = False has_link = False files = message.get('files', []) subtype = message.get('subtype') if subtype == 'file_share': # In Slack messages, uploads can either have the subtype as 'file_share' or # have the upload information in 'files' keyword files = [message['file']] markdown_links = [] for fileinfo in files: url = fileinfo['url_private'] if 'files.slack.com' in url: # For attachments with slack download link has_attachment = True has_link = True has_image = True if 'image' in fileinfo['mimetype'] else False file_user = [iterate_user for iterate_user in users if message['user'] == iterate_user['id']] file_user_email = get_user_email(file_user[0], domain_name) s3_path, content_for_link = get_attachment_path_and_content(fileinfo, realm_id) markdown_links.append(content_for_link) # construct attachments build_uploads(added_users[user], realm_id, file_user_email, fileinfo, s3_path, uploads_list) build_attachment(realm_id, {message_id}, added_users[user], fileinfo, s3_path, zerver_attachment) else: # For attachments with link not from slack # Example: Google drive integration has_link = True if 'title' in fileinfo: file_name = fileinfo['title'] else: file_name = fileinfo['name'] markdown_links.append('[%s](%s)' % (file_name, fileinfo['url_private'])) content = '\n'.join(markdown_links) return dict( content=content, has_attachment=has_attachment, has_image=has_image, has_link=has_link, )