Ejemplo n.º 1
0
def test_get_valid_s3_key():
    """get_valid_s3_key avoids dupe s3 keys"""
    site = WebsiteFactory.create()
    site_prefix = site.starter.config.get("root-url-path").rstrip("/")
    file_1 = DriveFileFactory.create(name="(file).PnG",
                                     website=site,
                                     mime_type="image/png",
                                     s3_key=None)
    file_1.s3_key = file_1.get_valid_s3_key()
    assert file_1.s3_key == f"{site_prefix}/{site.name}/file.png"
    file_1.save()
    file_2 = DriveFileFactory.create(name="File!.pNG",
                                     website=site,
                                     mime_type="image/png",
                                     s3_key=None)
    file_2.s3_key = file_2.get_valid_s3_key()
    assert file_2.s3_key == f"{site_prefix}/{site.name}/file2.png"
    file_2.save()
    file_3 = DriveFileFactory.create(name="FILE?.png",
                                     website=site,
                                     mime_type="image/png",
                                     s3_key=None)
    file_3.s3_key = file_3.get_valid_s3_key()
    assert file_3.s3_key == f"{site_prefix}/{site.name}/file3.png"
    # Different website
    file_4 = DriveFileFactory.create(name="(file).PnG",
                                     mime_type="image/png",
                                     s3_key=None)
    assert file_4.get_valid_s3_key(
    ) == f"{site_prefix}/{file_4.website.name}/file.png"
Ejemplo n.º 2
0
def test_update_sync_status(file_errors, site_errors, status):
    """update_sync_status should update the website sync_status field as expected"""
    now = now_in_utc()
    website = WebsiteFactory.create(synced_on=now,
                                    sync_status=WebsiteSyncStatus.PROCESSING,
                                    sync_errors=site_errors)
    for error in file_errors:
        DriveFileFactory.create(
            website=website,
            sync_error=error,
            sync_dt=now,
            resource=(WebsiteContentFactory.create(type=CONTENT_TYPE_RESOURCE,
                                                   website=website)
                      if not error else None),
            status=(DriveFileStatus.COMPLETE
                    if error is None else DriveFileStatus.FAILED),
        )
    DriveFileFactory.create(
        website=website,
        sync_dt=now_in_utc() + timedelta(seconds=10),
        resource=WebsiteContentFactory.create(type=CONTENT_TYPE_RESOURCE,
                                              website=website),
    )
    update_sync_status(website, now)
    website.refresh_from_db()
    assert website.sync_status == status
    assert sorted(website.sync_errors) == sorted(
        [error for error in file_errors if error] + (site_errors or []))
Ejemplo n.º 3
0
def test_delete_from_s3(mocker):
    """Deleting a DriveFile should also delete it from S3"""
    mock_delete_s3_objects = mocker.patch(
        "gdrive_sync.signals.delete_s3_objects")
    drive_file = DriveFileFactory.create()
    drive_file.delete()
    mock_delete_s3_objects.delay.assert_called_once_with(drive_file.s3_key)
Ejemplo n.º 4
0
def test_create_gdrive_resource_content(mime_type, mock_get_s3_content_type):
    """create_resource_from_gdrive should create a WebsiteContent object linked to a DriveFile object"""
    filenames = ["word.docx", "word!.docx", "(word?).docx"]
    deduped_names = ["word", "word2", "word3"]
    website = WebsiteFactory.create()
    for filename, deduped_name in zip(filenames, deduped_names):
        drive_file = DriveFileFactory.create(
            website=website,
            name=filename,
            s3_key=f"test/path/{deduped_name}.docx",
            mime_type=mime_type,
        )
        create_gdrive_resource_content(drive_file)
        content = WebsiteContent.objects.filter(
            website=website,
            title=filename,
            type="resource",
            is_page_content=True,
        ).first()
        assert content is not None
        assert content.dirpath == "content/resource"
        assert content.filename == deduped_name
        assert content.metadata["resourcetype"] == RESOURCE_TYPE_DOCUMENT
        assert content.metadata["file_type"] == mime_type
        assert content.metadata["image"] == ""
        drive_file.refresh_from_db()
        assert drive_file.resource == content
Ejemplo n.º 5
0
def test_import_recent_files_nonvideos(settings, mocker, mocked_celery,
                                       raises_exception):
    """
    import_recent_files should import non-video files
    """
    mocker.patch("gdrive_sync.tasks.api.is_gdrive_enabled", return_value=True)
    settings.DRIVE_SHARED_ID = "test_drive"
    settings.DRIVE_UPLOADS_PARENT_FOLDER_ID = "parent"
    website = WebsiteFactory.create()

    parent_tree_responses = [[
        {
            "id": "parent",
            "name": "ancestor_exists",
        },
        {
            "id": LIST_FILE_RESPONSES[0]["files"][i]["parents"][0],
            "name": website.short_id,
        },
        {
            "id": "abc123",
            "name": DRIVE_FOLDER_FILES_FINAL
        },
    ] for i in range(2)]
    mocker.patch("gdrive_sync.api.get_parent_tree",
                 side_effect=parent_tree_responses)

    mocker.patch(
        "gdrive_sync.tasks.api.query_files",
        return_value=LIST_FILE_RESPONSES[0]["files"],
    )

    side_effect = ([Exception(), Exception()] if raises_exception else [
        DriveFileFactory.create(
            file_id=LIST_FILE_RESPONSES[0]["files"][0]["id"]),
        None,
    ])
    mocker.patch("gdrive_sync.tasks.api.process_file_result",
                 side_effect=side_effect)
    mock_process_drive_file_task = mocker.patch(
        "gdrive_sync.tasks.process_drive_file.s")
    mock_log = mocker.patch("gdrive_sync.tasks.log.exception")

    if not raises_exception:
        with pytest.raises(mocked_celery.replace_exception_class):
            import_recent_files.delay(last_dt=datetime.strptime(
                "2021-01-01", "%Y-%m-%d").replace(tzinfo=pytz.UTC), )
    else:
        import_recent_files.delay(last_dt=datetime.strptime(
            "2021-01-01", "%Y-%m-%d").replace(tzinfo=pytz.UTC), )
    with pytest.raises(AssertionError):
        mock_process_drive_file_task.assert_any_call(
            LIST_VIDEO_RESPONSES[1]["files"][0]["id"])
    if not raises_exception:
        mock_process_drive_file_task.assert_any_call(
            LIST_FILE_RESPONSES[0]["files"][0]["id"])
    assert mock_log.call_count == (len(LIST_FILE_RESPONSES[0]["files"])
                                   if raises_exception else 0)
Ejemplo n.º 6
0
def test_create_gdrive_resource_content_forbidden_name(
        mock_get_s3_content_type):
    """content for a google drive file with a forbidden name should have its filename attribute modified"""
    drive_file = DriveFileFactory.create(
        name=f"{CONTENT_FILENAMES_FORBIDDEN[0]}.pdf",
        s3_key=f"test/path/{CONTENT_FILENAMES_FORBIDDEN[0]}.pdf",
        mime_type="application/pdf",
    )
    create_gdrive_resource_content(drive_file)
    drive_file.refresh_from_db()
    assert (drive_file.resource.filename ==
            f"{CONTENT_FILENAMES_FORBIDDEN[0]}-{CONTENT_TYPE_RESOURCE}")
Ejemplo n.º 7
0
def test_stream_to_s3_error(mocker):
    """Task should make expected drive api and S3 upload calls"""
    mocker.patch("gdrive_sync.api.boto3")
    mock_service = mocker.patch("gdrive_sync.api.get_drive_service")
    mocker.patch("gdrive_sync.api.streaming_download", side_effect=HTTPError())
    drive_file = DriveFileFactory.create()
    with pytest.raises(HTTPError):
        api.stream_to_s3(drive_file)
    drive_file.refresh_from_db()
    assert drive_file.status == DriveFileStatus.UPLOAD_FAILED
    mock_service.return_value.permissions.return_value.delete.assert_called_once(
    )
Ejemplo n.º 8
0
def test_create_gdrive_resource_content_update(mock_get_s3_content_type):
    """create_resource_from_gdrive should update a WebsiteContent object linked to a DriveFile object"""
    content = WebsiteContentFactory.create(file="test/path/old.doc")
    drive_file = DriveFileFactory.create(website=content.website,
                                         s3_key="test/path/word.docx",
                                         resource=content)
    assert content.file != drive_file.s3_key
    create_gdrive_resource_content(drive_file)
    content.refresh_from_db()
    drive_file.refresh_from_db()
    assert content.file == drive_file.s3_key
    assert drive_file.resource == content
Ejemplo n.º 9
0
def test_stream_to_s3(settings, mocker, is_video, current_s3_key):
    """stream_to_s3 should make expected drive api and S3 upload calls"""
    mock_service = mocker.patch("gdrive_sync.api.get_drive_service")
    mock_download = mocker.patch("gdrive_sync.api.streaming_download")
    mock_boto3 = mocker.patch("gdrive_sync.api.boto3")
    mock_bucket = mock_boto3.resource.return_value.Bucket.return_value
    drive_file = DriveFileFactory.create(
        name="A (Test) File!.ext",
        s3_key=current_s3_key,
        mime_type="video/mp4" if is_video else "application/pdf",
        drive_path=
        f"website/{DRIVE_FOLDER_VIDEOS_FINAL if is_video else DRIVE_FOLDER_FILES_FINAL}",
    )
    api.stream_to_s3(drive_file)
    mock_service.return_value.permissions.return_value.create.assert_called_once(
    )
    if current_s3_key:
        expected_key = current_s3_key
    elif is_video:
        expected_key = f"{settings.DRIVE_S3_UPLOAD_PREFIX}/{drive_file.website.name}/{drive_file.file_id}/a-test-file.ext"
    else:
        expected_key = (
            f"{drive_file.s3_prefix}/{drive_file.website.name}/a-test-file.ext"
        )

    if is_video:
        expected_extra_args = {
            "ContentType": drive_file.mime_type,
            "ACL": "public-read",
            "ContentDisposition": "attachment",
        }
    else:
        expected_extra_args = {
            "ContentType": drive_file.mime_type,
            "ACL": "public-read",
        }

    mock_bucket.upload_fileobj.assert_called_with(
        Fileobj=mocker.ANY,
        Key=expected_key,
        ExtraArgs=expected_extra_args,
    )
    mock_download.assert_called_once_with(drive_file)
    mock_service.return_value.permissions.return_value.delete.assert_called_once(
    )
    drive_file.refresh_from_db()
    assert drive_file.status == DriveFileStatus.UPLOAD_COMPLETE
    assert drive_file.s3_key == expected_key
Ejemplo n.º 10
0
def test_transcode_gdrive_video(settings, mocker, account_id, region,
                                role_name):
    """ transcode_gdrive_video should create Video object and call create_media_convert_job"""
    settings.AWS_ACCOUNT_ID = account_id
    settings.AWS_REGION = region
    settings.AWS_ROLE_NAME = role_name
    mock_convert_job = mocker.patch("gdrive_sync.api.create_media_convert_job")
    drive_file = DriveFileFactory.create()
    transcode_gdrive_video(drive_file)
    drive_file.refresh_from_db()
    if account_id and region and role_name:
        assert drive_file.video.source_key == drive_file.s3_key
        mock_convert_job.assert_called_once_with(drive_file.video)
    else:
        assert drive_file.video is None
        mock_convert_job.assert_not_called()
Ejemplo n.º 11
0
def test_transcode_gdrive_video_error(settings, mocker):
    """Video status should be set to failure if a client error occurs"""
    settings.AWS_ACCOUNT_ID = "accountABC"
    settings.AWS_REGION = "us-east-1"
    settings.AWS_ROLE_NAME = "roleDEF"
    mocker.patch(
        "gdrive_sync.api.create_media_convert_job",
        side_effect=ClientError({"Error": {}}, "transcode"),
    )
    mock_log = mocker.patch("gdrive_sync.api.log.exception")
    drive_file = DriveFileFactory.create()
    with pytest.raises(ClientError):
        transcode_gdrive_video(drive_file)
    drive_file.refresh_from_db()
    mock_log.assert_called_once_with("Error creating transcode job for %s",
                                     drive_file.video.source_key)
    assert drive_file.video.status == VideoStatus.FAILED
Ejemplo n.º 12
0
def test_create_gdrive_resource_content_error(mocker):
    """create_resource_from_gdrive should log an exception, update status if something goes wrong"""
    mocker.patch(
        "gdrive_sync.api.get_s3_content_type",
        return_value=Exception("Could not determine resource type"),
    )
    mock_log = mocker.patch("gdrive_sync.api.log.exception")
    content = WebsiteContentFactory.create()
    drive_file = DriveFileFactory.create(website=content.website,
                                         s3_key="test/path/word.docx",
                                         resource=content)
    create_gdrive_resource_content(drive_file)
    content.refresh_from_db()
    drive_file.refresh_from_db()
    assert drive_file.status == DriveFileStatus.FAILED
    mock_log.assert_called_once_with(
        "Error creating resource for drive file %s", drive_file.file_id)
Ejemplo n.º 13
0
def test_process_drive_file(mocker, is_video, has_error):
    """The necessary steps should be run to process a google drive file"""
    drive_file = DriveFileFactory.create(drive_path=(
        DRIVE_FOLDER_VIDEOS_FINAL if is_video else DRIVE_FOLDER_FILES_FINAL))
    mock_stream_s3 = mocker.patch(
        "gdrive_sync.tasks.api.stream_to_s3",
        side_effect=[(Exception("No bucket") if has_error else None)],
    )
    mock_transcode = mocker.patch(
        "gdrive_sync.tasks.api.transcode_gdrive_video")
    mock_create_resource = mocker.patch(
        "gdrive_sync.tasks.api.create_gdrive_resource_content")
    mock_log = mocker.patch("gdrive_sync.tasks.log.exception")
    process_drive_file.delay(drive_file.file_id)
    assert mock_stream_s3.call_count == 1
    assert mock_transcode.call_count == (1
                                         if is_video and not has_error else 0)
    assert mock_create_resource.call_count == (0 if has_error else 1)
    assert mock_log.call_count == (1 if has_error else 0)
Ejemplo n.º 14
0
def test_update_youtube_statuses(
    settings,
    mocker,
    youtube_video_files_processing,
    youtube_video_files_new,
    is_enabled,
):
    """
    Test that the correct number of YouTubeVideo objects have their statuses updated to the correct value.
    """
    if not is_enabled:
        settings.YT_CLIENT_ID = None
    mock_youtube = mocker.patch("videos.tasks.YouTubeApi")
    mock_youtube.return_value.video_status.return_value = YouTubeStatus.PROCESSED
    mock_mail_youtube_upload_success = mocker.patch(
        "videos.tasks.mail_youtube_upload_success"
    )
    mocker.patch("gdrive_sync.api.get_resource_type", return_value=RESOURCE_TYPE_VIDEO)
    for video_file in youtube_video_files_processing:
        drive_file = DriveFileFactory.create(video=video_file.video)
        create_gdrive_resource_content(drive_file)
    update_youtube_statuses()
    assert VideoFile.objects.filter(
        destination_status=YouTubeStatus.PROCESSED, status=VideoFileStatus.COMPLETE
    ).count() == (3 if is_enabled else 0)
    if is_enabled:
        mock_youtube.assert_called_once()
        for video_file in youtube_video_files_processing:
            mock_mail_youtube_upload_success.assert_any_call(video_file)
            assert video_file.video.drivefile_set.first().resource.metadata == {
                "resourcetype": "Video",
                "file_type": video_file.video.drivefile_set.first().mime_type,
                "video_files": {
                    "video_thumbnail_file": YT_THUMBNAIL_IMG.format(
                        video_id=video_file.destination_id
                    )
                },
                "video_metadata": {"youtube_id": video_file.destination_id},
                "image": "",
            }
    else:
        mock_youtube.assert_not_called()
        mock_mail_youtube_upload_success.assert_not_called()
Ejemplo n.º 15
0
def video_group(settings):
    """ Collection of model objects for testing video views"""
    drive_file_id = "abc123"
    drive_file_name = "testvid.avi"
    website = WebsiteFactory.create()
    video = VideoFactory.create(
        source_key=
        f"{settings.DRIVE_S3_UPLOAD_PREFIX}/{website.short_id}/{drive_file_id}/{drive_file_name}",
        status=VideoStatus.TRANSCODING,
    )
    video_job = VideoJobFactory.create(video=video, )
    drive_file = DriveFileFactory.create(
        file_id=drive_file_id,
        name=drive_file_name,
        video=video,
        s3_key=video.source_key,
    )
    return SimpleNamespace(video=video,
                           video_job=video_job,
                           drive_file=drive_file)
Ejemplo n.º 16
0
def test_import_website_files(mocker, mocked_celery, mock_gdrive_files):  # pylint:disable=unused-argument
    """import_website_files should run process_file_result for each drive file and trigger tasks"""
    mocker.patch("gdrive_sync.tasks.api.is_gdrive_enabled", return_value=True)
    website = WebsiteFactory.create()
    drive_files = DriveFileFactory.create_batch(2, website=website)
    mock_process_file_result = mocker.patch(
        "gdrive_sync.tasks.api.process_file_result", side_effect=drive_files)
    mock_process_gdrive_file = mocker.patch(
        "gdrive_sync.tasks.process_drive_file.s")
    mock_sync_content = mocker.patch(
        "gdrive_sync.tasks.sync_website_content.si")
    mock_update_status = mocker.patch(
        "gdrive_sync.tasks.update_website_status.si")
    with pytest.raises(mocked_celery.replace_exception_class):
        import_website_files.delay(website.name)
    assert mock_process_file_result.call_count == 2
    for drive_file in drive_files:
        mock_process_gdrive_file.assert_any_call(drive_file.file_id)
    mock_sync_content.assert_called_once_with(website.name)
    website.refresh_from_db()
    mock_update_status.assert_called_once_with(website.pk, website.synced_on)
Ejemplo n.º 17
0
def test_import_recent_files_videos(
    settings,
    mocker,
    mocked_celery,
    arg_last_dt,
    tracker_last_dt,
    parent_folder,
    parent_folder_in_ancestors,
    same_checksum,
):
    """import_recent_files should created expected video objects and call s3 tasks"""
    mocker.patch("gdrive_sync.tasks.api.is_gdrive_enabled", return_value=True)
    settings.DRIVE_SHARED_ID = "test_drive"
    settings.DRIVE_UPLOADS_PARENT_FOLDER_ID = parent_folder
    website = WebsiteFactory.create()
    DriveFileFactory.create(
        file_id=LIST_VIDEO_RESPONSES[1]["files"][0]["id"],
        name=LIST_VIDEO_RESPONSES[1]["files"][0]["name"],
        checksum=(LIST_VIDEO_RESPONSES[1]["files"][0]["md5Checksum"]
                  if same_checksum is True else "differentmd5"),
        status=DriveFileStatus.COMPLETE,
    )

    parent_tree_responses = [
        [
            {
                "id": LIST_VIDEO_RESPONSES[0]["files"][0]["parents"][0],
                "name": website.short_id,
            },
            {
                "id": "abc123",
                "name": DRIVE_FOLDER_VIDEOS_FINAL
            },
        ],
        [
            {
                "id": LIST_VIDEO_RESPONSES[0]["files"][1]["parents"][0],
                "name": "no-matching-website",
            },
            {
                "id": "xyz987",
                "name": DRIVE_FOLDER_VIDEOS_FINAL
            },
        ],
        [
            {
                "id": LIST_VIDEO_RESPONSES[0]["files"][0]["parents"][0],
                "name": website.short_id,
            },
            {
                "id": "def456",
                "name": DRIVE_FOLDER_VIDEOS_FINAL
            },
        ],
        [
            {
                "id": LIST_VIDEO_RESPONSES[0]["files"][1]["parents"][0],
                "name": "no-matching-website",
            },
            {
                "id": "ghi789",
                "name": DRIVE_FOLDER_VIDEOS_FINAL
            },
        ],
    ]

    if parent_folder_in_ancestors:
        for response in parent_tree_responses:
            response.append({
                "id": "parent",
                "name": "ancestor_exists",
            })

    mocker.patch("gdrive_sync.api.get_parent_tree",
                 side_effect=parent_tree_responses)

    mock_list_files = mocker.patch(
        "gdrive_sync.tasks.api.query_files",
        return_value=LIST_VIDEO_RESPONSES[0]["files"] +
        LIST_VIDEO_RESPONSES[1]["files"],
    )
    mock_process_func = mocker.patch("gdrive_sync.tasks.process_drive_file.s")
    mock_sync_content_task = mocker.patch(
        "gdrive_sync.tasks.sync_website_content.si")

    tracker = DriveApiQueryTrackerFactory.create(api_call=DRIVE_API_FILES,
                                                 last_dt=tracker_last_dt)

    if parent_folder_in_ancestors or parent_folder is None:
        with pytest.raises(mocked_celery.replace_exception_class):
            import_recent_files.delay(last_dt=arg_last_dt)
    else:
        import_recent_files.delay(last_dt=arg_last_dt)

    last_dt = arg_last_dt or tracker_last_dt
    last_dt_str = last_dt.strftime("%Y-%m-%dT%H:%M:%S.%f") if last_dt else None
    base_query = "(not trashed and not mimeType = 'application/vnd.google-apps.folder')"
    expected_query = (
        f"{base_query} and (modifiedTime > '{last_dt_str}' or createdTime > '{last_dt_str}')"
        if last_dt else base_query)

    mock_list_files.assert_called_once_with(query=expected_query,
                                            fields=DRIVE_FILE_FIELDS)
    tracker.refresh_from_db()
    for i in range(2):
        if (i == 1 and same_checksum) or (
                parent_folder and not parent_folder_in_ancestors
        ):  # chained tasks should not be run (wrong folder, or same checksum & name)
            with pytest.raises(AssertionError):
                assert mock_process_func.assert_any_call(
                    LIST_VIDEO_RESPONSES[i]["files"][0]["id"])
        else:  # chained tasks should be run
            mock_process_func.assert_any_call(
                LIST_VIDEO_RESPONSES[i]["files"][0]["id"])
            assert (tracker.last_dt == datetime.strptime(
                LIST_VIDEO_RESPONSES[0]["files"][0]["modifiedTime"],
                "%Y-%m-%dT%H:%M:%S.%fZ",
            ).replace(tzinfo=pytz.utc))
            mock_sync_content_task.assert_any_call(website.name)
        if (not parent_folder
                or parent_folder_in_ancestors):  # DriveFile should be created
            assert DriveFile.objects.filter(
                file_id=LIST_VIDEO_RESPONSES[i]["files"][0]["id"]).exists()
        assert (DriveFile.objects.filter(
            file_id=LIST_VIDEO_RESPONSES[i]["files"][1]["id"]).exists() is
                False)