def test_get_valid_s3_key(): """get_valid_s3_key avoids dupe s3 keys""" site = WebsiteFactory.create() site_prefix = site.starter.config.get("root-url-path").rstrip("/") file_1 = DriveFileFactory.create(name="(file).PnG", website=site, mime_type="image/png", s3_key=None) file_1.s3_key = file_1.get_valid_s3_key() assert file_1.s3_key == f"{site_prefix}/{site.name}/file.png" file_1.save() file_2 = DriveFileFactory.create(name="File!.pNG", website=site, mime_type="image/png", s3_key=None) file_2.s3_key = file_2.get_valid_s3_key() assert file_2.s3_key == f"{site_prefix}/{site.name}/file2.png" file_2.save() file_3 = DriveFileFactory.create(name="FILE?.png", website=site, mime_type="image/png", s3_key=None) file_3.s3_key = file_3.get_valid_s3_key() assert file_3.s3_key == f"{site_prefix}/{site.name}/file3.png" # Different website file_4 = DriveFileFactory.create(name="(file).PnG", mime_type="image/png", s3_key=None) assert file_4.get_valid_s3_key( ) == f"{site_prefix}/{file_4.website.name}/file.png"
def test_update_sync_status(file_errors, site_errors, status): """update_sync_status should update the website sync_status field as expected""" now = now_in_utc() website = WebsiteFactory.create(synced_on=now, sync_status=WebsiteSyncStatus.PROCESSING, sync_errors=site_errors) for error in file_errors: DriveFileFactory.create( website=website, sync_error=error, sync_dt=now, resource=(WebsiteContentFactory.create(type=CONTENT_TYPE_RESOURCE, website=website) if not error else None), status=(DriveFileStatus.COMPLETE if error is None else DriveFileStatus.FAILED), ) DriveFileFactory.create( website=website, sync_dt=now_in_utc() + timedelta(seconds=10), resource=WebsiteContentFactory.create(type=CONTENT_TYPE_RESOURCE, website=website), ) update_sync_status(website, now) website.refresh_from_db() assert website.sync_status == status assert sorted(website.sync_errors) == sorted( [error for error in file_errors if error] + (site_errors or []))
def test_delete_from_s3(mocker): """Deleting a DriveFile should also delete it from S3""" mock_delete_s3_objects = mocker.patch( "gdrive_sync.signals.delete_s3_objects") drive_file = DriveFileFactory.create() drive_file.delete() mock_delete_s3_objects.delay.assert_called_once_with(drive_file.s3_key)
def test_create_gdrive_resource_content(mime_type, mock_get_s3_content_type): """create_resource_from_gdrive should create a WebsiteContent object linked to a DriveFile object""" filenames = ["word.docx", "word!.docx", "(word?).docx"] deduped_names = ["word", "word2", "word3"] website = WebsiteFactory.create() for filename, deduped_name in zip(filenames, deduped_names): drive_file = DriveFileFactory.create( website=website, name=filename, s3_key=f"test/path/{deduped_name}.docx", mime_type=mime_type, ) create_gdrive_resource_content(drive_file) content = WebsiteContent.objects.filter( website=website, title=filename, type="resource", is_page_content=True, ).first() assert content is not None assert content.dirpath == "content/resource" assert content.filename == deduped_name assert content.metadata["resourcetype"] == RESOURCE_TYPE_DOCUMENT assert content.metadata["file_type"] == mime_type assert content.metadata["image"] == "" drive_file.refresh_from_db() assert drive_file.resource == content
def test_import_recent_files_nonvideos(settings, mocker, mocked_celery, raises_exception): """ import_recent_files should import non-video files """ mocker.patch("gdrive_sync.tasks.api.is_gdrive_enabled", return_value=True) settings.DRIVE_SHARED_ID = "test_drive" settings.DRIVE_UPLOADS_PARENT_FOLDER_ID = "parent" website = WebsiteFactory.create() parent_tree_responses = [[ { "id": "parent", "name": "ancestor_exists", }, { "id": LIST_FILE_RESPONSES[0]["files"][i]["parents"][0], "name": website.short_id, }, { "id": "abc123", "name": DRIVE_FOLDER_FILES_FINAL }, ] for i in range(2)] mocker.patch("gdrive_sync.api.get_parent_tree", side_effect=parent_tree_responses) mocker.patch( "gdrive_sync.tasks.api.query_files", return_value=LIST_FILE_RESPONSES[0]["files"], ) side_effect = ([Exception(), Exception()] if raises_exception else [ DriveFileFactory.create( file_id=LIST_FILE_RESPONSES[0]["files"][0]["id"]), None, ]) mocker.patch("gdrive_sync.tasks.api.process_file_result", side_effect=side_effect) mock_process_drive_file_task = mocker.patch( "gdrive_sync.tasks.process_drive_file.s") mock_log = mocker.patch("gdrive_sync.tasks.log.exception") if not raises_exception: with pytest.raises(mocked_celery.replace_exception_class): import_recent_files.delay(last_dt=datetime.strptime( "2021-01-01", "%Y-%m-%d").replace(tzinfo=pytz.UTC), ) else: import_recent_files.delay(last_dt=datetime.strptime( "2021-01-01", "%Y-%m-%d").replace(tzinfo=pytz.UTC), ) with pytest.raises(AssertionError): mock_process_drive_file_task.assert_any_call( LIST_VIDEO_RESPONSES[1]["files"][0]["id"]) if not raises_exception: mock_process_drive_file_task.assert_any_call( LIST_FILE_RESPONSES[0]["files"][0]["id"]) assert mock_log.call_count == (len(LIST_FILE_RESPONSES[0]["files"]) if raises_exception else 0)
def test_create_gdrive_resource_content_forbidden_name( mock_get_s3_content_type): """content for a google drive file with a forbidden name should have its filename attribute modified""" drive_file = DriveFileFactory.create( name=f"{CONTENT_FILENAMES_FORBIDDEN[0]}.pdf", s3_key=f"test/path/{CONTENT_FILENAMES_FORBIDDEN[0]}.pdf", mime_type="application/pdf", ) create_gdrive_resource_content(drive_file) drive_file.refresh_from_db() assert (drive_file.resource.filename == f"{CONTENT_FILENAMES_FORBIDDEN[0]}-{CONTENT_TYPE_RESOURCE}")
def test_stream_to_s3_error(mocker): """Task should make expected drive api and S3 upload calls""" mocker.patch("gdrive_sync.api.boto3") mock_service = mocker.patch("gdrive_sync.api.get_drive_service") mocker.patch("gdrive_sync.api.streaming_download", side_effect=HTTPError()) drive_file = DriveFileFactory.create() with pytest.raises(HTTPError): api.stream_to_s3(drive_file) drive_file.refresh_from_db() assert drive_file.status == DriveFileStatus.UPLOAD_FAILED mock_service.return_value.permissions.return_value.delete.assert_called_once( )
def test_create_gdrive_resource_content_update(mock_get_s3_content_type): """create_resource_from_gdrive should update a WebsiteContent object linked to a DriveFile object""" content = WebsiteContentFactory.create(file="test/path/old.doc") drive_file = DriveFileFactory.create(website=content.website, s3_key="test/path/word.docx", resource=content) assert content.file != drive_file.s3_key create_gdrive_resource_content(drive_file) content.refresh_from_db() drive_file.refresh_from_db() assert content.file == drive_file.s3_key assert drive_file.resource == content
def test_stream_to_s3(settings, mocker, is_video, current_s3_key): """stream_to_s3 should make expected drive api and S3 upload calls""" mock_service = mocker.patch("gdrive_sync.api.get_drive_service") mock_download = mocker.patch("gdrive_sync.api.streaming_download") mock_boto3 = mocker.patch("gdrive_sync.api.boto3") mock_bucket = mock_boto3.resource.return_value.Bucket.return_value drive_file = DriveFileFactory.create( name="A (Test) File!.ext", s3_key=current_s3_key, mime_type="video/mp4" if is_video else "application/pdf", drive_path= f"website/{DRIVE_FOLDER_VIDEOS_FINAL if is_video else DRIVE_FOLDER_FILES_FINAL}", ) api.stream_to_s3(drive_file) mock_service.return_value.permissions.return_value.create.assert_called_once( ) if current_s3_key: expected_key = current_s3_key elif is_video: expected_key = f"{settings.DRIVE_S3_UPLOAD_PREFIX}/{drive_file.website.name}/{drive_file.file_id}/a-test-file.ext" else: expected_key = ( f"{drive_file.s3_prefix}/{drive_file.website.name}/a-test-file.ext" ) if is_video: expected_extra_args = { "ContentType": drive_file.mime_type, "ACL": "public-read", "ContentDisposition": "attachment", } else: expected_extra_args = { "ContentType": drive_file.mime_type, "ACL": "public-read", } mock_bucket.upload_fileobj.assert_called_with( Fileobj=mocker.ANY, Key=expected_key, ExtraArgs=expected_extra_args, ) mock_download.assert_called_once_with(drive_file) mock_service.return_value.permissions.return_value.delete.assert_called_once( ) drive_file.refresh_from_db() assert drive_file.status == DriveFileStatus.UPLOAD_COMPLETE assert drive_file.s3_key == expected_key
def test_transcode_gdrive_video(settings, mocker, account_id, region, role_name): """ transcode_gdrive_video should create Video object and call create_media_convert_job""" settings.AWS_ACCOUNT_ID = account_id settings.AWS_REGION = region settings.AWS_ROLE_NAME = role_name mock_convert_job = mocker.patch("gdrive_sync.api.create_media_convert_job") drive_file = DriveFileFactory.create() transcode_gdrive_video(drive_file) drive_file.refresh_from_db() if account_id and region and role_name: assert drive_file.video.source_key == drive_file.s3_key mock_convert_job.assert_called_once_with(drive_file.video) else: assert drive_file.video is None mock_convert_job.assert_not_called()
def test_transcode_gdrive_video_error(settings, mocker): """Video status should be set to failure if a client error occurs""" settings.AWS_ACCOUNT_ID = "accountABC" settings.AWS_REGION = "us-east-1" settings.AWS_ROLE_NAME = "roleDEF" mocker.patch( "gdrive_sync.api.create_media_convert_job", side_effect=ClientError({"Error": {}}, "transcode"), ) mock_log = mocker.patch("gdrive_sync.api.log.exception") drive_file = DriveFileFactory.create() with pytest.raises(ClientError): transcode_gdrive_video(drive_file) drive_file.refresh_from_db() mock_log.assert_called_once_with("Error creating transcode job for %s", drive_file.video.source_key) assert drive_file.video.status == VideoStatus.FAILED
def test_create_gdrive_resource_content_error(mocker): """create_resource_from_gdrive should log an exception, update status if something goes wrong""" mocker.patch( "gdrive_sync.api.get_s3_content_type", return_value=Exception("Could not determine resource type"), ) mock_log = mocker.patch("gdrive_sync.api.log.exception") content = WebsiteContentFactory.create() drive_file = DriveFileFactory.create(website=content.website, s3_key="test/path/word.docx", resource=content) create_gdrive_resource_content(drive_file) content.refresh_from_db() drive_file.refresh_from_db() assert drive_file.status == DriveFileStatus.FAILED mock_log.assert_called_once_with( "Error creating resource for drive file %s", drive_file.file_id)
def test_process_drive_file(mocker, is_video, has_error): """The necessary steps should be run to process a google drive file""" drive_file = DriveFileFactory.create(drive_path=( DRIVE_FOLDER_VIDEOS_FINAL if is_video else DRIVE_FOLDER_FILES_FINAL)) mock_stream_s3 = mocker.patch( "gdrive_sync.tasks.api.stream_to_s3", side_effect=[(Exception("No bucket") if has_error else None)], ) mock_transcode = mocker.patch( "gdrive_sync.tasks.api.transcode_gdrive_video") mock_create_resource = mocker.patch( "gdrive_sync.tasks.api.create_gdrive_resource_content") mock_log = mocker.patch("gdrive_sync.tasks.log.exception") process_drive_file.delay(drive_file.file_id) assert mock_stream_s3.call_count == 1 assert mock_transcode.call_count == (1 if is_video and not has_error else 0) assert mock_create_resource.call_count == (0 if has_error else 1) assert mock_log.call_count == (1 if has_error else 0)
def test_update_youtube_statuses( settings, mocker, youtube_video_files_processing, youtube_video_files_new, is_enabled, ): """ Test that the correct number of YouTubeVideo objects have their statuses updated to the correct value. """ if not is_enabled: settings.YT_CLIENT_ID = None mock_youtube = mocker.patch("videos.tasks.YouTubeApi") mock_youtube.return_value.video_status.return_value = YouTubeStatus.PROCESSED mock_mail_youtube_upload_success = mocker.patch( "videos.tasks.mail_youtube_upload_success" ) mocker.patch("gdrive_sync.api.get_resource_type", return_value=RESOURCE_TYPE_VIDEO) for video_file in youtube_video_files_processing: drive_file = DriveFileFactory.create(video=video_file.video) create_gdrive_resource_content(drive_file) update_youtube_statuses() assert VideoFile.objects.filter( destination_status=YouTubeStatus.PROCESSED, status=VideoFileStatus.COMPLETE ).count() == (3 if is_enabled else 0) if is_enabled: mock_youtube.assert_called_once() for video_file in youtube_video_files_processing: mock_mail_youtube_upload_success.assert_any_call(video_file) assert video_file.video.drivefile_set.first().resource.metadata == { "resourcetype": "Video", "file_type": video_file.video.drivefile_set.first().mime_type, "video_files": { "video_thumbnail_file": YT_THUMBNAIL_IMG.format( video_id=video_file.destination_id ) }, "video_metadata": {"youtube_id": video_file.destination_id}, "image": "", } else: mock_youtube.assert_not_called() mock_mail_youtube_upload_success.assert_not_called()
def video_group(settings): """ Collection of model objects for testing video views""" drive_file_id = "abc123" drive_file_name = "testvid.avi" website = WebsiteFactory.create() video = VideoFactory.create( source_key= f"{settings.DRIVE_S3_UPLOAD_PREFIX}/{website.short_id}/{drive_file_id}/{drive_file_name}", status=VideoStatus.TRANSCODING, ) video_job = VideoJobFactory.create(video=video, ) drive_file = DriveFileFactory.create( file_id=drive_file_id, name=drive_file_name, video=video, s3_key=video.source_key, ) return SimpleNamespace(video=video, video_job=video_job, drive_file=drive_file)
def test_import_website_files(mocker, mocked_celery, mock_gdrive_files): # pylint:disable=unused-argument """import_website_files should run process_file_result for each drive file and trigger tasks""" mocker.patch("gdrive_sync.tasks.api.is_gdrive_enabled", return_value=True) website = WebsiteFactory.create() drive_files = DriveFileFactory.create_batch(2, website=website) mock_process_file_result = mocker.patch( "gdrive_sync.tasks.api.process_file_result", side_effect=drive_files) mock_process_gdrive_file = mocker.patch( "gdrive_sync.tasks.process_drive_file.s") mock_sync_content = mocker.patch( "gdrive_sync.tasks.sync_website_content.si") mock_update_status = mocker.patch( "gdrive_sync.tasks.update_website_status.si") with pytest.raises(mocked_celery.replace_exception_class): import_website_files.delay(website.name) assert mock_process_file_result.call_count == 2 for drive_file in drive_files: mock_process_gdrive_file.assert_any_call(drive_file.file_id) mock_sync_content.assert_called_once_with(website.name) website.refresh_from_db() mock_update_status.assert_called_once_with(website.pk, website.synced_on)
def test_import_recent_files_videos( settings, mocker, mocked_celery, arg_last_dt, tracker_last_dt, parent_folder, parent_folder_in_ancestors, same_checksum, ): """import_recent_files should created expected video objects and call s3 tasks""" mocker.patch("gdrive_sync.tasks.api.is_gdrive_enabled", return_value=True) settings.DRIVE_SHARED_ID = "test_drive" settings.DRIVE_UPLOADS_PARENT_FOLDER_ID = parent_folder website = WebsiteFactory.create() DriveFileFactory.create( file_id=LIST_VIDEO_RESPONSES[1]["files"][0]["id"], name=LIST_VIDEO_RESPONSES[1]["files"][0]["name"], checksum=(LIST_VIDEO_RESPONSES[1]["files"][0]["md5Checksum"] if same_checksum is True else "differentmd5"), status=DriveFileStatus.COMPLETE, ) parent_tree_responses = [ [ { "id": LIST_VIDEO_RESPONSES[0]["files"][0]["parents"][0], "name": website.short_id, }, { "id": "abc123", "name": DRIVE_FOLDER_VIDEOS_FINAL }, ], [ { "id": LIST_VIDEO_RESPONSES[0]["files"][1]["parents"][0], "name": "no-matching-website", }, { "id": "xyz987", "name": DRIVE_FOLDER_VIDEOS_FINAL }, ], [ { "id": LIST_VIDEO_RESPONSES[0]["files"][0]["parents"][0], "name": website.short_id, }, { "id": "def456", "name": DRIVE_FOLDER_VIDEOS_FINAL }, ], [ { "id": LIST_VIDEO_RESPONSES[0]["files"][1]["parents"][0], "name": "no-matching-website", }, { "id": "ghi789", "name": DRIVE_FOLDER_VIDEOS_FINAL }, ], ] if parent_folder_in_ancestors: for response in parent_tree_responses: response.append({ "id": "parent", "name": "ancestor_exists", }) mocker.patch("gdrive_sync.api.get_parent_tree", side_effect=parent_tree_responses) mock_list_files = mocker.patch( "gdrive_sync.tasks.api.query_files", return_value=LIST_VIDEO_RESPONSES[0]["files"] + LIST_VIDEO_RESPONSES[1]["files"], ) mock_process_func = mocker.patch("gdrive_sync.tasks.process_drive_file.s") mock_sync_content_task = mocker.patch( "gdrive_sync.tasks.sync_website_content.si") tracker = DriveApiQueryTrackerFactory.create(api_call=DRIVE_API_FILES, last_dt=tracker_last_dt) if parent_folder_in_ancestors or parent_folder is None: with pytest.raises(mocked_celery.replace_exception_class): import_recent_files.delay(last_dt=arg_last_dt) else: import_recent_files.delay(last_dt=arg_last_dt) last_dt = arg_last_dt or tracker_last_dt last_dt_str = last_dt.strftime("%Y-%m-%dT%H:%M:%S.%f") if last_dt else None base_query = "(not trashed and not mimeType = 'application/vnd.google-apps.folder')" expected_query = ( f"{base_query} and (modifiedTime > '{last_dt_str}' or createdTime > '{last_dt_str}')" if last_dt else base_query) mock_list_files.assert_called_once_with(query=expected_query, fields=DRIVE_FILE_FIELDS) tracker.refresh_from_db() for i in range(2): if (i == 1 and same_checksum) or ( parent_folder and not parent_folder_in_ancestors ): # chained tasks should not be run (wrong folder, or same checksum & name) with pytest.raises(AssertionError): assert mock_process_func.assert_any_call( LIST_VIDEO_RESPONSES[i]["files"][0]["id"]) else: # chained tasks should be run mock_process_func.assert_any_call( LIST_VIDEO_RESPONSES[i]["files"][0]["id"]) assert (tracker.last_dt == datetime.strptime( LIST_VIDEO_RESPONSES[0]["files"][0]["modifiedTime"], "%Y-%m-%dT%H:%M:%S.%fZ", ).replace(tzinfo=pytz.utc)) mock_sync_content_task.assert_any_call(website.name) if (not parent_folder or parent_folder_in_ancestors): # DriveFile should be created assert DriveFile.objects.filter( file_id=LIST_VIDEO_RESPONSES[i]["files"][0]["id"]).exists() assert (DriveFile.objects.filter( file_id=LIST_VIDEO_RESPONSES[i]["files"][1]["id"]).exists() is False)