def videos_missing_captions(website: Website) -> List[WebsiteContent]: """Return a list of WebsiteContent objects for videos with unassigned captions""" if not is_ocw_site(website): return [] query_resource_type_field = get_dict_query_field( "metadata", settings.FIELD_RESOURCETYPE) query_caption_field = get_dict_query_field("metadata", settings.YT_FIELD_CAPTIONS) return WebsiteContent.objects.filter( Q(website=website) & Q(**{query_resource_type_field: RESOURCE_TYPE_VIDEO}) & (Q(**{query_caption_field: None}) | Q(**{query_caption_field: ""})))
def update_youtube_metadata(website: Website, version=VERSION_DRAFT): """ Update YouTube video metadata via the API """ if not is_youtube_enabled() or not is_ocw_site(website): return query_id_field = get_dict_query_field("metadata", settings.YT_FIELD_ID) video_resources = website.websitecontent_set.filter( Q(metadata__resourcetype=RESOURCE_TYPE_VIDEO)).exclude( Q(**{query_id_field: None}) | Q(**{query_id_field: ""})) if video_resources.count() == 0: return youtube = YouTubeApi() for video_resource in video_resources: youtube_id = get_dict_field(video_resource.metadata, settings.YT_FIELD_ID) # do not run this for any old imported videos if VideoFile.objects.filter(video__website=website, destination_id=youtube_id).exists(): try: youtube.update_video( video_resource, privacy=("public" if version == VERSION_LIVE else None), ) except: # pylint:disable=bare-except log.exception( "Unexpected error updating metadata for video resource %d", video_resource.id, )
def start_transcript_job(video_id: int): """ Use threeplay api to order a transcript for video """ video = Video.objects.filter(pk=video_id).last() folder_name = video.website.short_id youtube_id = video.youtube_id() query_youtube_id_field = get_dict_query_field("metadata", settings.YT_FIELD_ID) video_resource = (WebsiteContent.objects.filter( website=video.website).filter( Q(**{query_youtube_id_field: youtube_id})).first()) if video_resource: title = video_resource.title else: title = video.source_key.split("/")[-1] response = threeplay_api.threeplay_upload_video_request( folder_name, youtube_id, title) threeplay_file_id = response.get("data").get("id") if threeplay_file_id: threeplay_api.threeplay_order_transcript_request( video.id, threeplay_file_id) video.status = VideoStatus.SUBMITTED_FOR_TRANSCRIPTION video.save()
def videos_with_unassigned_youtube_ids( website: Website) -> List[WebsiteContent]: """Return a list of WebsiteContent objects for videos with unassigned youtube ids""" if not is_ocw_site(website): return [] query_resource_type_field = get_dict_query_field( "metadata", settings.FIELD_RESOURCETYPE) query_id_field = f"metadata__{'__'.join(settings.YT_FIELD_ID.split('.'))}" return WebsiteContent.objects.filter( Q(website=website) & Q(**{query_resource_type_field: RESOURCE_TYPE_VIDEO}) & (Q(**{f"{query_id_field}__isnull": True}) | Q(**{f"{query_id_field}": None}) | Q(**{query_id_field: ""})))
def update_transcripts_for_video(video_id: int): """Update transcripts for a video""" video = Video.objects.get(id=video_id) if threeplay_api.update_transcripts_for_video(video): first_transcript_download = False if video.status != VideoStatus.COMPLETE: video.status = VideoStatus.COMPLETE video.save() first_transcript_download = True website = video.website if is_ocw_site(website): search_fields = {} search_fields[get_dict_query_field( "metadata", settings.FIELD_RESOURCETYPE)] = RESOURCE_TYPE_VIDEO search_fields[get_dict_query_field( "metadata", settings.YT_FIELD_ID)] = video.youtube_id() for video_resource in website.websitecontent_set.filter( **search_fields): metadata = video_resource.metadata set_dict_field( metadata, settings.YT_FIELD_TRANSCRIPT, video.pdf_transcript_file.name, ) set_dict_field( metadata, settings.YT_FIELD_CAPTIONS, video.webvtt_transcript_file.name, ) video_resource.save() if (first_transcript_download and len(videos_missing_captions(website)) == 0): mail_transcripts_complete_notification(website)
def videos_with_truncatable_text(website: Website) -> List[WebsiteContent]: """Return a list of WebsiteContent objects with text fields that will be truncated in YouTube""" if not is_ocw_site(website): return [] query_resource_type_field = get_dict_query_field( "metadata", settings.FIELD_RESOURCETYPE) return (WebsiteContent.objects.annotate(desc_len=Length( Cast( KeyTextTransform(settings.YT_FIELD_DESCRIPTION, "metadata"), CharField(), ))).annotate(title_len=Length("title")).filter( Q(website=website) & Q(**{query_resource_type_field: RESOURCE_TYPE_VIDEO}) & (Q(desc_len__gt=YT_MAX_LENGTH_DESCRIPTION) | Q(title_len__gt=YT_MAX_LENGTH_TITLE))))
def test_get_dict_query_field(): """test get_dict_query_field""" assert (get_dict_query_field("metadata", "video_files.video_captions_file") == "metadata__video_files__video_captions_file")