def post(self, request): """ Creates a video transcript instance with the given information. Arguments: request: A WSGI request. """ attrs = ('video_id', 'name', 'language_code', 'provider', 'file_format') missing = [attr for attr in attrs if attr not in request.data] if missing: LOGGER.warn('[VAL] Required transcript params are missing. %s', ' and '.join(missing)) return Response( status=status.HTTP_400_BAD_REQUEST, data=dict(message=u'{missing} must be specified.'.format( missing=' and '.join(missing)))) video_id = request.data['video_id'] language_code = request.data['language_code'] transcript_name = request.data['name'] provider = request.data['provider'] file_format = request.data['file_format'] supported_formats = sorted(dict(TranscriptFormat.CHOICES).keys()) if file_format not in supported_formats: message = ( u'"{format}" transcript file type is not supported. Supported formats are "{supported_formats}"' ).format(format=file_format, supported_formats=supported_formats) return Response(status=status.HTTP_400_BAD_REQUEST, data={'message': message}) supported_providers = sorted( dict(TranscriptProviderType.CHOICES).keys()) if provider not in supported_providers: message = ( u'"{provider}" provider is not supported. Supported transcription providers are "{supported_providers}"' ).format(provider=provider, supported_providers=supported_providers) return Response(status=status.HTTP_400_BAD_REQUEST, data={'message': message}) transcript = VideoTranscript.get_or_none(video_id, language_code) if transcript is None: create_or_update_video_transcript(video_id, language_code, metadata={ 'provider': provider, 'file_name': transcript_name, 'file_format': file_format }) response = Response(status=status.HTTP_200_OK) else: message = ( u'Can not override existing transcript for video "{video_id}" and language code "{language}".' ).format(video_id=video_id, language=language_code) response = Response(status=status.HTTP_400_BAD_REQUEST, data={'message': message}) return response
def get_video_transcript(video_id, language_code): """ Get video transcript info Arguments: video_id(unicode): A video id, it can be an edx_video_id or an external video id extracted from external sources of a video component. language_code(unicode): it will be the language code of the requested transcript. """ transcript = VideoTranscript.get_or_none(video_id=video_id, language_code=language_code) return TranscriptSerializer(transcript).data if transcript else None
def get_video_transcript_url(video_id, language_code): """ Returns course video transcript url or None if no transcript Arguments: video_id: it can be an edx_video_id or an external_id extracted from external sources in a video component. language_code: language code of a video transcript """ video_transcript = VideoTranscript.get_or_none(video_id, language_code) if video_transcript: return video_transcript.url()
def create_transcript_objects(xml): """ Create VideoTranscript objects. Arguments: xml (Element): lxml Element object """ for transcript in xml.findall('.//transcripts/transcript'): try: VideoTranscript.create_or_update( transcript.attrib['video_id'], transcript.attrib['language_code'], transcript.attrib['file_name'], transcript.attrib['file_format'], transcript.attrib['provider'], ) except KeyError: logger.warn( "VAL: Required attributes are missing from xml, xml=[%s]", etree.tostring(transcript).strip())
def post(self, request): """ Creates a video transcript instance with the given information. Arguments: request: A WSGI request. """ attrs = ('video_id', 'name', 'language_code', 'provider', 'file_format') missing = [attr for attr in attrs if attr not in request.data] if missing: LOGGER.warn( '[VAL] Required transcript params are missing. %s', ' and '.join(missing) ) return Response( status=status.HTTP_400_BAD_REQUEST, data=dict(message=u'{missing} must be specified.'.format(missing=' and '.join(missing))) ) video_id = request.data['video_id'] language_code = request.data['language_code'] transcript_name = request.data['name'] provider = request.data['provider'] file_format = request.data['file_format'] supported_formats = sorted(dict(TranscriptFormat.CHOICES).keys()) if file_format not in supported_formats: message = ( u'"{format}" transcript file type is not supported. Supported formats are "{supported_formats}"' ).format(format=file_format, supported_formats=supported_formats) return Response(status=status.HTTP_400_BAD_REQUEST, data={'message': message}) supported_providers = sorted(dict(TranscriptProviderType.CHOICES).keys()) if provider not in supported_providers: message = ( u'"{provider}" provider is not supported. Supported transcription providers are "{supported_providers}"' ).format(provider=provider, supported_providers=supported_providers) return Response(status=status.HTTP_400_BAD_REQUEST, data={'message': message}) transcript = VideoTranscript.get_or_none(video_id, language_code) if transcript is None: create_or_update_video_transcript(video_id, language_code, metadata={ 'provider': provider, 'file_name': transcript_name, 'file_format': file_format }) response = Response(status=status.HTTP_200_OK) else: message = ( u'Can not override existing transcript for video "{video_id}" and language code "{language}".' ).format(video_id=video_id, language=language_code) response = Response(status=status.HTTP_400_BAD_REQUEST, data={'message': message}) return response
def delete_video_transcript(video_id, language_code): """ Delete transcript for an existing video. Arguments: video_id: id identifying the video to which the transcript is associated. language_code: language code of a video transcript. """ video_transcript = VideoTranscript.get_or_none(video_id, language_code) if video_transcript: # delete the transcript content from storage. video_transcript.transcript.delete() # delete the transcript metadata from db. video_transcript.delete() logger.info('Transcript is removed for video "%s" and language code "%s"', video_id, language_code)
def create_or_update_video_transcript(video_id, language_code, metadata, file_data=None): """ Create or Update video transcript for an existing video. Arguments: video_id: it can be an edx_video_id or an external_id extracted from external sources in a video component. language_code: language code of a video transcript metadata (dict): A dict containing (to be overwritten) properties file_data (InMemoryUploadedFile): Transcript data to be saved for a course video. Returns: video transcript url """ # Filter wanted properties metadata = { prop: value for prop, value in metadata.items() if prop in ['provider', 'language_code', 'file_name', 'file_format'] and value } file_format = metadata.get('file_format') if file_format and file_format not in list( dict(TranscriptFormat.CHOICES).keys()): raise InvalidTranscriptFormat( '{} transcript format is not supported'.format(file_format)) provider = metadata.get('provider') if provider and provider not in list( dict(TranscriptProviderType.CHOICES).keys()): raise InvalidTranscriptProvider( '{} transcript provider is not supported'.format(provider)) try: # Video should be present in edxval in order to attach transcripts to it. video = Video.objects.get(edx_video_id=video_id) video_transcript, __ = VideoTranscript.create_or_update( video, language_code, metadata, file_data) except Video.DoesNotExist: return None return video_transcript.url()
def get_video_transcript_data(video_id, language_code): """ Get video transcript data Arguments: video_id(unicode): An id identifying the Video. language_code(unicode): it will be the language code of the requested transcript. Returns: A dict containing transcript file name and its content. """ video_transcript = VideoTranscript.get_or_none(video_id, language_code) if video_transcript: try: return dict(file_name=video_transcript.filename, content=video_transcript.transcript.file.read()) except Exception: logger.exception( '[edx-val] Error while retrieving transcript for video=%s -- language_code=%s', video_id, language_code) raise
def create_or_update_video_transcript( video_id, language_code, file_name, file_format, provider, file_data=None, ): """ Create or Update video transcript for an existing video. Arguments: video_id: it can be an edx_video_id or an external_id extracted from external sources in a video component. language_code: language code of a video transcript file_name: file name of a video transcript file_data (InMemoryUploadedFile): Transcript data to be saved for a course video. file_format: format of the transcript provider: transcript provider Returns: video transcript url """ if file_format not in dict(TranscriptFormat.CHOICES).keys(): raise InvalidTranscriptFormat( '{} transcript format is not supported'.format(file_format)) if provider not in dict(TranscriptProviderType.CHOICES).keys(): raise InvalidTranscriptProvider( '{} transcript provider is not supported'.format(provider)) video_transcript, __ = VideoTranscript.create_or_update( video_id, language_code, file_name, file_format, provider, file_data, ) return video_transcript.url()
def get_video_transcript_data(video_id, language_code): """ Get video transcript data Arguments: video_id(unicode): An id identifying the Video. language_code(unicode): it will be the language code of the requested transcript. Returns: A dict containing transcript file name and its content. """ video_transcript = VideoTranscript.get_or_none(video_id, language_code) if video_transcript: try: return dict(file_name=video_transcript.filename, content=video_transcript.transcript.file.read()) except Exception: logger.exception( '[edx-val] Error while retrieving transcript for video=%s -- language_code=%s', video_id, language_code ) raise
def create_or_update_video_transcript(video_id, language_code, metadata, file_data=None): """ Create or Update video transcript for an existing video. Arguments: video_id: it can be an edx_video_id or an external_id extracted from external sources in a video component. language_code: language code of a video transcript metadata (dict): A dict containing (to be overwritten) properties file_data (InMemoryUploadedFile): Transcript data to be saved for a course video. Returns: video transcript url """ # Filter wanted properties metadata = { prop: value for prop, value in six.iteritems(metadata) if prop in ['provider', 'language_code', 'file_name', 'file_format'] and value } file_format = metadata.get('file_format') if file_format and file_format not in list(dict(TranscriptFormat.CHOICES).keys()): raise InvalidTranscriptFormat('{} transcript format is not supported'.format(file_format)) provider = metadata.get('provider') if provider and provider not in list(dict(TranscriptProviderType.CHOICES).keys()): raise InvalidTranscriptProvider('{} transcript provider is not supported'.format(provider)) try: # Video should be present in edxval in order to attach transcripts to it. video = Video.objects.get(edx_video_id=video_id) video_transcript, __ = VideoTranscript.create_or_update(video, language_code, metadata, file_data) except Video.DoesNotExist: return None return video_transcript.url()
def create(self, validated_data): """ Create the video transcript. """ return VideoTranscript.create(**validated_data)
def import_transcript_from_fs(edx_video_id, language_code, file_name, provider, resource_fs, static_dir): """ Imports transcript file from file system and creates transcript record in DS. Arguments: edx_video_id (str): Video id of the video. language_code (unicode): Language code of the requested transcript. file_name (unicode): File name of the transcript file. provider (unicode): Transcript provider. resource_fs (OSFS): Import file system. static_dir (str): The Directory to retrieve transcript file. """ file_format = None existing_transcript = VideoTranscript.get_or_none(edx_video_id, language_code) # check if the transcript exists and if it does, make sure that overriding # existing transcripts is enabled before proceeding to import it if (existing_transcript and not OVERRIDE_EXISTING_IMPORTED_TRANSCRIPTS.is_enabled()): return # Read file from import file system and attach it to transcript record in DS. try: with resource_fs.open(combine(static_dir, file_name), 'r', encoding='utf-8-sig') as f: file_content = f.read() except ResourceNotFound: # Don't raise exception in case transcript file is not found in course OLX. logger.warning( '[edx-val] "%s" transcript "%s" for video "%s" is not found.', language_code, file_name, edx_video_id ) return except UnicodeDecodeError: # Don't raise exception in case transcript contains non-utf8 content. logger.warning( '[edx-val] "%s" transcript "%s" for video "%s" contains a non-utf8 file content.', language_code, file_name, edx_video_id ) return # change file content to utf8 utf8_encoded_file_content = file_content.encode('utf-8') new_transcript_content_file = ContentFile(utf8_encoded_file_content) # check if transcript content already exists, and if it does, make sure # the transcript isn't a duplicate transcript to the already existing one if (existing_transcript and is_duplicate_file(new_transcript_content_file, existing_transcript.transcript.file)): return # Get file format from transcript content. try: file_format = get_transcript_format(file_content) except Error: # Don't raise exception, just don't create transcript record. logger.warning( '[edx-val] Error while getting transcript format for video=%s -- language_code=%s --file_name=%s', edx_video_id, language_code, file_name ) return # Create transcript record. create_or_update_video_transcript( video_id=edx_video_id, language_code=language_code, metadata={ 'provider': provider, 'file_format': file_format, 'language_code': language_code, }, file_data=new_transcript_content_file, )