def test_file_name(self) -> None: self.assertEqual(sanitize_name(u'test.txt'), u'test.txt') self.assertEqual(sanitize_name(u'.hidden'), u'.hidden') self.assertEqual(sanitize_name(u'.hidden.txt'), u'.hidden.txt') self.assertEqual(sanitize_name(u'tarball.tar.gz'), u'tarball.tar.gz') self.assertEqual(sanitize_name(u'.hidden_tarball.tar.gz'), u'.hidden_tarball.tar.gz') self.assertEqual(sanitize_name(u'Testing{}*&*#().ta&&%$##&&r.gz'), u'Testing.tar.gz') self.assertEqual(sanitize_name(u'*testingfile?*.txt'), u'testingfile.txt') self.assertEqual(sanitize_name(u'snowman☃.txt'), u'snowman.txt') self.assertEqual(sanitize_name(u'테스트.txt'), u'테스트.txt') self.assertEqual(sanitize_name(u'~/."\`\?*"u0`000ssh/test.t**{}ar.gz'), u'.u0000sshtest.tar.gz')
def test_file_name(self) -> None: """ Unicode filenames should be processed correctly. """ self.login(self.example_email("hamlet")) for expected in ["Здравейте.txt", "test"]: fp = StringIO("bah!") fp.name = urllib.parse.quote(expected) result = self.client_post("/json/user_uploads", {'f1': fp}) assert sanitize_name(expected) in result.json()['uri']
def get_attachment_path_and_content(fileinfo: ZerverFieldsT, realm_id: int) -> Tuple[str, str]: # Should be kept in sync with its equivalent in zerver/lib/uploads in the function # 'upload_message_image' s3_path = "/".join([ str(realm_id), format(random.randint(0, 255), 'x'), random_name(18), sanitize_name(fileinfo['name']) ]) attachment_path = ('/user_uploads/%s' % (s3_path)) content = '[%s](%s)' % (fileinfo['title'], attachment_path) return s3_path, content
def get_attachment_path_and_content(fileinfo: ZerverFieldsT, realm_id: int) -> Tuple[str, str]: # Should be kept in sync with its equivalent in zerver/lib/uploads in the function # 'upload_message_file' s3_path = "/".join([ str(realm_id), 'SlackImportAttachment', # This is a special placeholder which should be kept # in sync with 'exports.py' function 'import_message_data' format(random.randint(0, 255), 'x'), random_name(18), sanitize_name(fileinfo['name']) ]) attachment_path = ('/user_uploads/%s' % (s3_path)) content = '[%s](%s)' % (fileinfo['title'], attachment_path) return s3_path, content
def get_attachment_path_and_content(fileinfo: ZerverFieldsT, realm_id: int) -> Tuple[str, str]: # Should be kept in sync with its equivalent in zerver/lib/uploads in the function # 'upload_message_file' s3_path = "/".join( [ str(realm_id), "SlackImportAttachment", # This is a special placeholder which should be kept # in sync with 'exports.py' function 'import_message_data' format(random.randint(0, 255), "x"), secrets.token_urlsafe(18), sanitize_name(fileinfo["name"]), ] ) attachment_path = f"/user_uploads/{s3_path}" content = "[{}]({})".format(fileinfo["title"], attachment_path) return s3_path, content
def import_uploads_s3(bucket_name: str, import_dir: Path, processing_avatars: bool = False, processing_emojis: bool = False) -> None: upload_backend = S3UploadBackend() conn = S3Connection(settings.S3_KEY, settings.S3_SECRET_KEY) bucket = conn.get_bucket(bucket_name, validate=True) records_filename = os.path.join(import_dir, "records.json") with open(records_filename) as records_file: records = ujson.loads(records_file.read()) re_map_foreign_keys_internal(records, 'records', 'realm_id', related_table="realm", id_field=True) re_map_foreign_keys_internal(records, 'records', 'user_profile_id', related_table="user_profile", id_field=True) for record in records: key = Key(bucket) if processing_avatars: # For avatars, we need to rehash the user's email with the # new server's avatar salt avatar_path = user_avatar_path_from_ids(record['user_profile_id'], record['realm_id']) key.key = avatar_path if record['s3_path'].endswith('.original'): key.key += '.original' if processing_emojis: # For emojis we follow the function 'upload_emoji_image' emoji_path = RealmEmoji.PATH_ID_TEMPLATE.format( realm_id=record['realm_id'], emoji_file_name=record['file_name']) key.key = emoji_path else: # Should be kept in sync with its equivalent in zerver/lib/uploads in the # function 'upload_message_image' s3_file_name = "/".join([ str(record['realm_id']), random_name(18), sanitize_name(os.path.basename(record['path'])) ]) key.key = s3_file_name path_maps['attachment_path'][record['path']] = s3_file_name user_profile_id = int(record['user_profile_id']) # Support email gateway bot and other cross-realm messages if user_profile_id in id_maps["user_profile"]: logging.info("Uploaded by ID mapped user: %s!" % (user_profile_id, )) user_profile_id = id_maps["user_profile"][user_profile_id] user_profile = get_user_profile_by_id(user_profile_id) key.set_metadata("user_profile_id", str(user_profile.id)) key.set_metadata("realm_id", str(user_profile.realm_id)) key.set_metadata("orig_last_modified", record['last_modified']) headers = {'Content-Type': record['content_type']} key.set_contents_from_filename(os.path.join(import_dir, record['path']), headers=headers) if processing_avatars: # TODO: Ideally, we'd do this in a separate pass, after # all the avatars have been uploaded, since we may end up # unnecssarily resizing images just before the medium-size # image in the export is uploaded. See the local uplods # code path for more notes. upload_backend.ensure_medium_avatar_image( user_profile=user_profile)
def import_uploads_local(import_dir: Path, processing_avatars: bool = False, processing_emojis: bool = False) -> None: records_filename = os.path.join(import_dir, "records.json") with open(records_filename) as records_file: records = ujson.loads(records_file.read()) re_map_foreign_keys_internal(records, 'records', 'realm_id', related_table="realm", id_field=True) if not processing_emojis: re_map_foreign_keys_internal(records, 'records', 'user_profile_id', related_table="user_profile", id_field=True) for record in records: if processing_avatars: # For avatars, we need to rehash the user ID with the # new server's avatar salt avatar_path = user_avatar_path_from_ids(record['user_profile_id'], record['realm_id']) file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, "avatars", avatar_path) if record['s3_path'].endswith('.original'): file_path += '.original' else: file_path += '.png' elif processing_emojis: # For emojis we follow the function 'upload_emoji_image' emoji_path = RealmEmoji.PATH_ID_TEMPLATE.format( realm_id=record['realm_id'], emoji_file_name=record['file_name']) file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, "avatars", emoji_path) else: # Should be kept in sync with its equivalent in zerver/lib/uploads in the # function 'upload_message_image' s3_file_name = "/".join([ str(record['realm_id']), random_name(18), sanitize_name(os.path.basename(record['path'])) ]) file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, "files", s3_file_name) path_maps['attachment_path'][record['path']] = s3_file_name orig_file_path = os.path.join(import_dir, record['path']) if not os.path.exists(os.path.dirname(file_path)): subprocess.check_call(["mkdir", "-p", os.path.dirname(file_path)]) shutil.copy(orig_file_path, file_path) if processing_avatars: # Ensure that we have medium-size avatar images for every # avatar. TODO: This implementation is hacky, both in that it # does get_user_profile_by_id for each user, and in that it # might be better to require the export to just have these. upload_backend = LocalUploadBackend() for record in records: if record['s3_path'].endswith('.original'): user_profile = get_user_profile_by_id( record['user_profile_id']) avatar_path = user_avatar_path_from_ids( user_profile.id, record['realm_id']) medium_file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, "avatars", avatar_path) + '-medium.png' if os.path.exists(medium_file_path): # We remove the image here primarily to deal with # issues when running the import script multiple # times in development (where one might reuse the # same realm ID from a previous iteration). os.remove(medium_file_path) upload_backend.ensure_medium_avatar_image( user_profile=user_profile)
def import_uploads_s3(bucket_name: str, import_dir: Path, processing_avatars: bool=False, processing_emojis: bool=False) -> None: upload_backend = S3UploadBackend() conn = S3Connection(settings.S3_KEY, settings.S3_SECRET_KEY) bucket = conn.get_bucket(bucket_name, validate=True) records_filename = os.path.join(import_dir, "records.json") with open(records_filename) as records_file: records = ujson.loads(records_file.read()) re_map_foreign_keys_internal(records, 'records', 'realm_id', related_table="realm", id_field=True) re_map_foreign_keys_internal(records, 'records', 'user_profile_id', related_table="user_profile", id_field=True) for record in records: key = Key(bucket) if processing_avatars: # For avatars, we need to rehash the user's email with the # new server's avatar salt avatar_path = user_avatar_path_from_ids(record['user_profile_id'], record['realm_id']) key.key = avatar_path if record['s3_path'].endswith('.original'): key.key += '.original' if processing_emojis: # For emojis we follow the function 'upload_emoji_image' emoji_path = RealmEmoji.PATH_ID_TEMPLATE.format( realm_id=record['realm_id'], emoji_file_name=record['file_name']) key.key = emoji_path else: # Should be kept in sync with its equivalent in zerver/lib/uploads in the # function 'upload_message_image' s3_file_name = "/".join([ str(record['realm_id']), random_name(18), sanitize_name(os.path.basename(record['path'])) ]) key.key = s3_file_name path_maps['attachment_path'][record['path']] = s3_file_name user_profile_id = int(record['user_profile_id']) # Support email gateway bot and other cross-realm messages if user_profile_id in id_maps["user_profile"]: logging.info("Uploaded by ID mapped user: %s!" % (user_profile_id,)) user_profile_id = id_maps["user_profile"][user_profile_id] user_profile = get_user_profile_by_id(user_profile_id) key.set_metadata("user_profile_id", str(user_profile.id)) key.set_metadata("realm_id", str(user_profile.realm_id)) key.set_metadata("orig_last_modified", record['last_modified']) headers = {'Content-Type': record['content_type']} key.set_contents_from_filename(os.path.join(import_dir, record['path']), headers=headers) if processing_avatars: # TODO: Ideally, we'd do this in a separate pass, after # all the avatars have been uploaded, since we may end up # unnecssarily resizing images just before the medium-size # image in the export is uploaded. See the local uplods # code path for more notes. upload_backend.ensure_medium_avatar_image(user_profile=user_profile)
def import_uploads_local(import_dir: Path, processing_avatars: bool=False, processing_emojis: bool=False) -> None: records_filename = os.path.join(import_dir, "records.json") with open(records_filename) as records_file: records = ujson.loads(records_file.read()) re_map_foreign_keys_internal(records, 'records', 'realm_id', related_table="realm", id_field=True) if not processing_emojis: re_map_foreign_keys_internal(records, 'records', 'user_profile_id', related_table="user_profile", id_field=True) for record in records: if processing_avatars: # For avatars, we need to rehash the user ID with the # new server's avatar salt avatar_path = user_avatar_path_from_ids(record['user_profile_id'], record['realm_id']) file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, "avatars", avatar_path) if record['s3_path'].endswith('.original'): file_path += '.original' else: file_path += '.png' elif processing_emojis: # For emojis we follow the function 'upload_emoji_image' emoji_path = RealmEmoji.PATH_ID_TEMPLATE.format( realm_id=record['realm_id'], emoji_file_name=record['file_name']) file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, "avatars", emoji_path) else: # Should be kept in sync with its equivalent in zerver/lib/uploads in the # function 'upload_message_image' s3_file_name = "/".join([ str(record['realm_id']), random_name(18), sanitize_name(os.path.basename(record['path'])) ]) file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, "files", s3_file_name) path_maps['attachment_path'][record['path']] = s3_file_name orig_file_path = os.path.join(import_dir, record['path']) if not os.path.exists(os.path.dirname(file_path)): subprocess.check_call(["mkdir", "-p", os.path.dirname(file_path)]) shutil.copy(orig_file_path, file_path) if processing_avatars: # Ensure that we have medium-size avatar images for every # avatar. TODO: This implementation is hacky, both in that it # does get_user_profile_by_id for each user, and in that it # might be better to require the export to just have these. upload_backend = LocalUploadBackend() for record in records: if record['s3_path'].endswith('.original'): user_profile = get_user_profile_by_id(record['user_profile_id']) avatar_path = user_avatar_path_from_ids(user_profile.id, record['realm_id']) medium_file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, "avatars", avatar_path) + '-medium.png' if os.path.exists(medium_file_path): # We remove the image here primarily to deal with # issues when running the import script multiple # times in development (where one might reuse the # same realm ID from a previous iteration). os.remove(medium_file_path) upload_backend.ensure_medium_avatar_image(user_profile=user_profile)
def process_message_attachments( attachments: List[Dict[str, Any]], realm_id: int, message_id: int, user_id: int, user_handler: UserHandler, zerver_attachment: List[ZerverFieldsT], uploads_list: List[ZerverFieldsT], mattermost_data_dir: str, output_dir: str, ) -> Tuple[str, bool]: has_image = False markdown_links = [] for attachment in attachments: attachment_path = attachment["path"] attachment_full_path = os.path.join(mattermost_data_dir, "data", attachment_path) file_name = attachment_path.split("/")[-1] file_ext = f'.{file_name.split(".")[-1]}' if file_ext.lower() in IMAGE_EXTENSIONS: has_image = True s3_path = "/".join( [ str(realm_id), format(random.randint(0, 255), "x"), secrets.token_urlsafe(18), sanitize_name(file_name), ] ) content_for_link = f"[{file_name}](/user_uploads/{s3_path})" markdown_links.append(content_for_link) fileinfo = { "name": file_name, "size": os.path.getsize(attachment_full_path), "created": os.path.getmtime(attachment_full_path), } upload = dict( path=s3_path, realm_id=realm_id, content_type=None, user_profile_id=user_id, last_modified=fileinfo["created"], user_profile_email=user_handler.get_user(user_id=user_id)["email"], s3_path=s3_path, size=fileinfo["size"], ) uploads_list.append(upload) build_attachment( realm_id=realm_id, message_ids={message_id}, user_id=user_id, fileinfo=fileinfo, s3_path=s3_path, zerver_attachment=zerver_attachment, ) # Copy the attachment file to output_dir attachment_out_path = os.path.join(output_dir, "uploads", s3_path) os.makedirs(os.path.dirname(attachment_out_path), exist_ok=True) shutil.copyfile(attachment_full_path, attachment_out_path) content = "\n".join(markdown_links) return content, has_image
def process_message_attachment( upload: Dict[str, Any], realm_id: int, message_id: int, user_id: int, user_handler: UserHandler, zerver_attachment: List[ZerverFieldsT], uploads_list: List[ZerverFieldsT], upload_id_to_upload_data_map: Dict[str, Dict[str, Any]], output_dir: str, ) -> Tuple[str, bool]: upload_file_data = upload_id_to_upload_data_map[upload["_id"]] file_name = upload["name"] file_ext = f'.{upload["type"].split("/")[-1]}' has_image = False if file_ext.lower() in IMAGE_EXTENSIONS: has_image = True s3_path = "/".join([ str(realm_id), format(random.randint(0, 255), "x"), secrets.token_urlsafe(18), sanitize_name(file_name), ]) # Build the attachment from chunks and save it to s3_path. file_out_path = os.path.join(output_dir, "uploads", s3_path) os.makedirs(os.path.dirname(file_out_path), exist_ok=True) with open(file_out_path, "wb") as upload_file: upload_file.write(b"".join(upload_file_data["chunk"])) attachment_content = ( f'{upload_file_data["description"]}\n\n[{file_name}](/user_uploads/{s3_path})' ) fileinfo = { "name": file_name, "size": upload_file_data["size"], "created": float(upload_file_data["_updatedAt"].timestamp()), } upload = dict( path=s3_path, realm_id=realm_id, content_type=upload["type"], user_profile_id=user_id, last_modified=fileinfo["created"], user_profile_email=user_handler.get_user(user_id=user_id)["email"], s3_path=s3_path, size=fileinfo["size"], ) uploads_list.append(upload) build_attachment( realm_id=realm_id, message_ids={message_id}, user_id=user_id, fileinfo=fileinfo, s3_path=s3_path, zerver_attachment=zerver_attachment, ) return attachment_content, has_image
def import_uploads_s3(bucket_name: str, import_dir: Path, processing_avatars: bool=False, processing_emojis: bool=False) -> None: upload_backend = S3UploadBackend() conn = S3Connection(settings.S3_KEY, settings.S3_SECRET_KEY) bucket = conn.get_bucket(bucket_name, validate=True) records_filename = os.path.join(import_dir, "records.json") with open(records_filename) as records_file: records = ujson.loads(records_file.read()) re_map_foreign_keys_internal(records, 'records', 'realm_id', related_table="realm", id_field=True) timestamp = datetime_to_timestamp(timezone_now()) if not processing_emojis: re_map_foreign_keys_internal(records, 'records', 'user_profile_id', related_table="user_profile", id_field=True) for record in records: key = Key(bucket) if processing_avatars: # For avatars, we need to rehash the user's email with the # new server's avatar salt avatar_path = user_avatar_path_from_ids(record['user_profile_id'], record['realm_id']) key.key = avatar_path if record['s3_path'].endswith('.original'): key.key += '.original' elif processing_emojis: # For emojis we follow the function 'upload_emoji_image' emoji_path = RealmEmoji.PATH_ID_TEMPLATE.format( realm_id=record['realm_id'], emoji_file_name=record['file_name']) key.key = emoji_path record['last_modified'] = timestamp else: # Should be kept in sync with its equivalent in zerver/lib/uploads in the # function 'upload_message_image' s3_file_name = "/".join([ str(record['realm_id']), random_name(18), sanitize_name(os.path.basename(record['path'])) ]) key.key = s3_file_name path_maps['attachment_path'][record['s3_path']] = s3_file_name # Exported custom emoji from tools like Slack don't have # the data for what user uploaded them in `user_profile_id`. if not processing_emojis: user_profile_id = int(record['user_profile_id']) # Support email gateway bot and other cross-realm messages if user_profile_id in id_maps["user_profile"]: logging.info("Uploaded by ID mapped user: %s!" % (user_profile_id,)) user_profile_id = id_maps["user_profile"][user_profile_id] user_profile = get_user_profile_by_id(user_profile_id) key.set_metadata("user_profile_id", str(user_profile.id)) key.set_metadata("orig_last_modified", record['last_modified']) key.set_metadata("realm_id", str(record['realm_id'])) # Zulip exports will always have a content-type, but third-party exports might not. content_type = record.get("content_type", guess_type(record['s3_path'])[0]) headers = {'Content-Type': content_type} key.set_contents_from_filename(os.path.join(import_dir, record['path']), headers=headers) if processing_avatars: # Ensure that we have medium-size avatar images for every # avatar. TODO: This implementation is hacky, both in that it # does get_user_profile_by_id for each user, and in that it # might be better to require the export to just have these. upload_backend = S3UploadBackend() for record in records: if record['s3_path'].endswith('.original'): user_profile = get_user_profile_by_id(record['user_profile_id']) upload_backend.ensure_medium_avatar_image(user_profile=user_profile)
def import_uploads(import_dir: Path, processing_avatars: bool=False, processing_emojis: bool=False) -> None: if processing_avatars and processing_emojis: raise AssertionError("Cannot import avatars and emojis at the same time!") if processing_avatars: logging.info("Importing avatars") elif processing_emojis: logging.info("Importing emojis") else: logging.info("Importing uploaded files") records_filename = os.path.join(import_dir, "records.json") with open(records_filename) as records_file: records = ujson.loads(records_file.read()) # type: List[Dict[str, Any]] timestamp = datetime_to_timestamp(timezone_now()) re_map_foreign_keys_internal(records, 'records', 'realm_id', related_table="realm", id_field=True) if not processing_emojis: re_map_foreign_keys_internal(records, 'records', 'user_profile_id', related_table="user_profile", id_field=True) s3_uploads = settings.LOCAL_UPLOADS_DIR is None if s3_uploads: if processing_avatars or processing_emojis: bucket_name = settings.S3_AVATAR_BUCKET else: bucket_name = settings.S3_AUTH_UPLOADS_BUCKET conn = S3Connection(settings.S3_KEY, settings.S3_SECRET_KEY) bucket = conn.get_bucket(bucket_name, validate=True) count = 0 for record in records: count += 1 if count % 1000 == 0: logging.info("Processed %s/%s uploads" % (count, len(records))) if processing_avatars: # For avatars, we need to rehash the user ID with the # new server's avatar salt relative_path = user_avatar_path_from_ids(record['user_profile_id'], record['realm_id']) if record['s3_path'].endswith('.original'): relative_path += '.original' else: relative_path += '.png' elif processing_emojis: # For emojis we follow the function 'upload_emoji_image' relative_path = RealmEmoji.PATH_ID_TEMPLATE.format( realm_id=record['realm_id'], emoji_file_name=record['file_name']) record['last_modified'] = timestamp else: # Should be kept in sync with its equivalent in zerver/lib/uploads in the # function 'upload_message_file' relative_path = "/".join([ str(record['realm_id']), random_name(18), sanitize_name(os.path.basename(record['path'])) ]) path_maps['attachment_path'][record['s3_path']] = relative_path if s3_uploads: key = Key(bucket) key.key = relative_path # Exported custom emoji from tools like Slack don't have # the data for what user uploaded them in `user_profile_id`. if not processing_emojis: user_profile_id = int(record['user_profile_id']) # Support email gateway bot and other cross-realm messages if user_profile_id in ID_MAP["user_profile"]: logging.info("Uploaded by ID mapped user: %s!" % (user_profile_id,)) user_profile_id = ID_MAP["user_profile"][user_profile_id] user_profile = get_user_profile_by_id(user_profile_id) key.set_metadata("user_profile_id", str(user_profile.id)) if 'last_modified' in record: key.set_metadata("orig_last_modified", record['last_modified']) key.set_metadata("realm_id", str(record['realm_id'])) # Zulip exports will always have a content-type, but third-party exports might not. content_type = record.get("content_type") if content_type is None: content_type = guess_type(record['s3_path'])[0] headers = {'Content-Type': content_type} # type: Dict[str, Any] key.set_contents_from_filename(os.path.join(import_dir, record['path']), headers=headers) else: if processing_avatars or processing_emojis: file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, "avatars", relative_path) else: file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, "files", relative_path) orig_file_path = os.path.join(import_dir, record['path']) os.makedirs(os.path.dirname(file_path), exist_ok=True) shutil.copy(orig_file_path, file_path) if processing_avatars: from zerver.lib.upload import upload_backend # Ensure that we have medium-size avatar images for every # avatar. TODO: This implementation is hacky, both in that it # does get_user_profile_by_id for each user, and in that it # might be better to require the export to just have these. for record in records: if record['s3_path'].endswith('.original'): user_profile = get_user_profile_by_id(record['user_profile_id']) if settings.LOCAL_UPLOADS_DIR is not None: avatar_path = user_avatar_path_from_ids(user_profile.id, record['realm_id']) medium_file_path = os.path.join(settings.LOCAL_UPLOADS_DIR, "avatars", avatar_path) + '-medium.png' if os.path.exists(medium_file_path): # We remove the image here primarily to deal with # issues when running the import script multiple # times in development (where one might reuse the # same realm ID from a previous iteration). os.remove(medium_file_path) upload_backend.ensure_medium_avatar_image(user_profile=user_profile)