Esempio n. 1
0
def videos_missing_captions(website: Website) -> List[WebsiteContent]:
    """Return a list of WebsiteContent objects for videos with unassigned captions"""
    if not is_ocw_site(website):
        return []
    query_resource_type_field = get_dict_query_field(
        "metadata", settings.FIELD_RESOURCETYPE)
    query_caption_field = get_dict_query_field("metadata",
                                               settings.YT_FIELD_CAPTIONS)
    return WebsiteContent.objects.filter(
        Q(website=website)
        & Q(**{query_resource_type_field: RESOURCE_TYPE_VIDEO})
        & (Q(**{query_caption_field: None}) | Q(**{query_caption_field: ""})))
Esempio n. 2
0
def update_youtube_metadata(website: Website, version=VERSION_DRAFT):
    """ Update YouTube video metadata via the API """
    if not is_youtube_enabled() or not is_ocw_site(website):
        return
    query_id_field = get_dict_query_field("metadata", settings.YT_FIELD_ID)
    video_resources = website.websitecontent_set.filter(
        Q(metadata__resourcetype=RESOURCE_TYPE_VIDEO)).exclude(
            Q(**{query_id_field: None}) | Q(**{query_id_field: ""}))
    if video_resources.count() == 0:
        return
    youtube = YouTubeApi()
    for video_resource in video_resources:
        youtube_id = get_dict_field(video_resource.metadata,
                                    settings.YT_FIELD_ID)
        # do not run this for any old imported videos
        if VideoFile.objects.filter(video__website=website,
                                    destination_id=youtube_id).exists():
            try:
                youtube.update_video(
                    video_resource,
                    privacy=("public" if version == VERSION_LIVE else None),
                )
            except:  # pylint:disable=bare-except
                log.exception(
                    "Unexpected error updating metadata for video resource %d",
                    video_resource.id,
                )
Esempio n. 3
0
def start_transcript_job(video_id: int):
    """
    Use threeplay api to order a transcript for video
    """

    video = Video.objects.filter(pk=video_id).last()
    folder_name = video.website.short_id
    youtube_id = video.youtube_id()

    query_youtube_id_field = get_dict_query_field("metadata",
                                                  settings.YT_FIELD_ID)

    video_resource = (WebsiteContent.objects.filter(
        website=video.website).filter(
            Q(**{query_youtube_id_field: youtube_id})).first())

    if video_resource:
        title = video_resource.title
    else:
        title = video.source_key.split("/")[-1]

    response = threeplay_api.threeplay_upload_video_request(
        folder_name, youtube_id, title)

    threeplay_file_id = response.get("data").get("id")

    if threeplay_file_id:
        threeplay_api.threeplay_order_transcript_request(
            video.id, threeplay_file_id)
        video.status = VideoStatus.SUBMITTED_FOR_TRANSCRIPTION
        video.save()
Esempio n. 4
0
def videos_with_unassigned_youtube_ids(
        website: Website) -> List[WebsiteContent]:
    """Return a list of WebsiteContent objects for videos with unassigned youtube ids"""
    if not is_ocw_site(website):
        return []
    query_resource_type_field = get_dict_query_field(
        "metadata", settings.FIELD_RESOURCETYPE)
    query_id_field = f"metadata__{'__'.join(settings.YT_FIELD_ID.split('.'))}"
    return WebsiteContent.objects.filter(
        Q(website=website)
        & Q(**{query_resource_type_field: RESOURCE_TYPE_VIDEO})
        & (Q(**{f"{query_id_field}__isnull": True})
           | Q(**{f"{query_id_field}": None})
           | Q(**{query_id_field: ""})))
Esempio n. 5
0
def update_transcripts_for_video(video_id: int):
    """Update transcripts for a video"""
    video = Video.objects.get(id=video_id)
    if threeplay_api.update_transcripts_for_video(video):
        first_transcript_download = False

        if video.status != VideoStatus.COMPLETE:
            video.status = VideoStatus.COMPLETE
            video.save()
            first_transcript_download = True

        website = video.website
        if is_ocw_site(website):
            search_fields = {}
            search_fields[get_dict_query_field(
                "metadata", settings.FIELD_RESOURCETYPE)] = RESOURCE_TYPE_VIDEO
            search_fields[get_dict_query_field(
                "metadata", settings.YT_FIELD_ID)] = video.youtube_id()

            for video_resource in website.websitecontent_set.filter(
                    **search_fields):
                metadata = video_resource.metadata
                set_dict_field(
                    metadata,
                    settings.YT_FIELD_TRANSCRIPT,
                    video.pdf_transcript_file.name,
                )
                set_dict_field(
                    metadata,
                    settings.YT_FIELD_CAPTIONS,
                    video.webvtt_transcript_file.name,
                )
                video_resource.save()

                if (first_transcript_download
                        and len(videos_missing_captions(website)) == 0):
                    mail_transcripts_complete_notification(website)
Esempio n. 6
0
def videos_with_truncatable_text(website: Website) -> List[WebsiteContent]:
    """Return a list of WebsiteContent objects with text fields that will be truncated in YouTube"""
    if not is_ocw_site(website):
        return []
    query_resource_type_field = get_dict_query_field(
        "metadata", settings.FIELD_RESOURCETYPE)
    return (WebsiteContent.objects.annotate(desc_len=Length(
        Cast(
            KeyTextTransform(settings.YT_FIELD_DESCRIPTION, "metadata"),
            CharField(),
        ))).annotate(title_len=Length("title")).filter(
            Q(website=website)
            & Q(**{query_resource_type_field: RESOURCE_TYPE_VIDEO})
            & (Q(desc_len__gt=YT_MAX_LENGTH_DESCRIPTION)
               | Q(title_len__gt=YT_MAX_LENGTH_TITLE))))
Esempio n. 7
0
def test_get_dict_query_field():
    """test get_dict_query_field"""
    assert (get_dict_query_field("metadata", "video_files.video_captions_file")
            == "metadata__video_files__video_captions_file")