Exemple #1
0
    def post(self, request):
        """
        Creates a video transcript instance with the given information.

        Arguments:
            request: A WSGI request.
        """
        attrs = ('video_id', 'name', 'language_code', 'provider',
                 'file_format')
        missing = [attr for attr in attrs if attr not in request.data]
        if missing:
            LOGGER.warn('[VAL] Required transcript params are missing. %s',
                        ' and '.join(missing))
            return Response(
                status=status.HTTP_400_BAD_REQUEST,
                data=dict(message=u'{missing} must be specified.'.format(
                    missing=' and '.join(missing))))

        video_id = request.data['video_id']
        language_code = request.data['language_code']
        transcript_name = request.data['name']
        provider = request.data['provider']
        file_format = request.data['file_format']

        supported_formats = sorted(dict(TranscriptFormat.CHOICES).keys())
        if file_format not in supported_formats:
            message = (
                u'"{format}" transcript file type is not supported. Supported formats are "{supported_formats}"'
            ).format(format=file_format, supported_formats=supported_formats)
            return Response(status=status.HTTP_400_BAD_REQUEST,
                            data={'message': message})

        supported_providers = sorted(
            dict(TranscriptProviderType.CHOICES).keys())
        if provider not in supported_providers:
            message = (
                u'"{provider}" provider is not supported. Supported transcription providers are "{supported_providers}"'
            ).format(provider=provider,
                     supported_providers=supported_providers)
            return Response(status=status.HTTP_400_BAD_REQUEST,
                            data={'message': message})

        transcript = VideoTranscript.get_or_none(video_id, language_code)
        if transcript is None:
            create_or_update_video_transcript(video_id,
                                              language_code,
                                              metadata={
                                                  'provider': provider,
                                                  'file_name': transcript_name,
                                                  'file_format': file_format
                                              })
            response = Response(status=status.HTTP_200_OK)
        else:
            message = (
                u'Can not override existing transcript for video "{video_id}" and language code "{language}".'
            ).format(video_id=video_id, language=language_code)
            response = Response(status=status.HTTP_400_BAD_REQUEST,
                                data={'message': message})

        return response
Exemple #2
0
def get_video_transcript(video_id, language_code):
    """
    Get video transcript info

    Arguments:
        video_id(unicode): A video id, it can be an edx_video_id or an external video id extracted from
        external sources of a video component.
        language_code(unicode): it will be the language code of the requested transcript.
    """
    transcript = VideoTranscript.get_or_none(video_id=video_id, language_code=language_code)
    return TranscriptSerializer(transcript).data if transcript else None
Exemple #3
0
def get_video_transcript_url(video_id, language_code):
    """
    Returns course video transcript url or None if no transcript

    Arguments:
        video_id: it can be an edx_video_id or an external_id extracted from external sources in a video component.
        language_code: language code of a video transcript
    """
    video_transcript = VideoTranscript.get_or_none(video_id, language_code)
    if video_transcript:
        return video_transcript.url()
Exemple #4
0
def get_video_transcript_url(video_id, language_code):
    """
    Returns course video transcript url or None if no transcript

    Arguments:
        video_id: it can be an edx_video_id or an external_id extracted from external sources in a video component.
        language_code: language code of a video transcript
    """
    video_transcript = VideoTranscript.get_or_none(video_id, language_code)
    if video_transcript:
        return video_transcript.url()
Exemple #5
0
def create_transcript_objects(xml):
    """
    Create VideoTranscript objects.

    Arguments:
        xml (Element): lxml Element object
    """
    for transcript in xml.findall('.//transcripts/transcript'):
        try:
            VideoTranscript.create_or_update(
                transcript.attrib['video_id'],
                transcript.attrib['language_code'],
                transcript.attrib['file_name'],
                transcript.attrib['file_format'],
                transcript.attrib['provider'],
            )
        except KeyError:
            logger.warn(
                "VAL: Required attributes are missing from xml, xml=[%s]",
                etree.tostring(transcript).strip())
Exemple #6
0
def get_video_transcript(video_id, language_code):
    """
    Get video transcript info

    Arguments:
        video_id(unicode): A video id, it can be an edx_video_id or an external video id extracted from
        external sources of a video component.
        language_code(unicode): it will be the language code of the requested transcript.
    """
    transcript = VideoTranscript.get_or_none(video_id=video_id, language_code=language_code)
    return TranscriptSerializer(transcript).data if transcript else None
Exemple #7
0
    def post(self, request):
        """
        Creates a video transcript instance with the given information.

        Arguments:
            request: A WSGI request.
        """
        attrs = ('video_id', 'name', 'language_code', 'provider', 'file_format')
        missing = [attr for attr in attrs if attr not in request.data]
        if missing:
            LOGGER.warn(
                '[VAL] Required transcript params are missing. %s', ' and '.join(missing)
            )
            return Response(
                status=status.HTTP_400_BAD_REQUEST,
                data=dict(message=u'{missing} must be specified.'.format(missing=' and '.join(missing)))
            )

        video_id = request.data['video_id']
        language_code = request.data['language_code']
        transcript_name = request.data['name']
        provider = request.data['provider']
        file_format = request.data['file_format']

        supported_formats = sorted(dict(TranscriptFormat.CHOICES).keys())
        if file_format not in supported_formats:
            message = (
                u'"{format}" transcript file type is not supported. Supported formats are "{supported_formats}"'
            ).format(format=file_format, supported_formats=supported_formats)
            return Response(status=status.HTTP_400_BAD_REQUEST, data={'message': message})

        supported_providers = sorted(dict(TranscriptProviderType.CHOICES).keys())
        if provider not in supported_providers:
            message = (
                u'"{provider}" provider is not supported. Supported transcription providers are "{supported_providers}"'
            ).format(provider=provider, supported_providers=supported_providers)
            return Response(status=status.HTTP_400_BAD_REQUEST, data={'message': message})

        transcript = VideoTranscript.get_or_none(video_id, language_code)
        if transcript is None:
            create_or_update_video_transcript(video_id, language_code, metadata={
                'provider': provider,
                'file_name': transcript_name,
                'file_format': file_format
            })
            response = Response(status=status.HTTP_200_OK)
        else:
            message = (
                u'Can not override existing transcript for video "{video_id}" and language code "{language}".'
            ).format(video_id=video_id, language=language_code)
            response = Response(status=status.HTTP_400_BAD_REQUEST, data={'message': message})

        return response
Exemple #8
0
def delete_video_transcript(video_id, language_code):
    """
    Delete transcript for an existing video.

    Arguments:
        video_id: id identifying the video to which the transcript is associated.
        language_code: language code of a video transcript.
    """
    video_transcript = VideoTranscript.get_or_none(video_id, language_code)
    if video_transcript:
        # delete the transcript content from storage.
        video_transcript.transcript.delete()
        # delete the transcript metadata from db.
        video_transcript.delete()
        logger.info('Transcript is removed for video "%s" and language code "%s"', video_id, language_code)
Exemple #9
0
def delete_video_transcript(video_id, language_code):
    """
    Delete transcript for an existing video.

    Arguments:
        video_id: id identifying the video to which the transcript is associated.
        language_code: language code of a video transcript.
    """
    video_transcript = VideoTranscript.get_or_none(video_id, language_code)
    if video_transcript:
        # delete the transcript content from storage.
        video_transcript.transcript.delete()
        # delete the transcript metadata from db.
        video_transcript.delete()
        logger.info('Transcript is removed for video "%s" and language code "%s"', video_id, language_code)
Exemple #10
0
def create_or_update_video_transcript(video_id,
                                      language_code,
                                      metadata,
                                      file_data=None):
    """
    Create or Update video transcript for an existing video.

    Arguments:
        video_id: it can be an edx_video_id or an external_id extracted from external sources in a video component.
        language_code: language code of a video transcript
        metadata (dict): A dict containing (to be overwritten) properties
        file_data (InMemoryUploadedFile): Transcript data to be saved for a course video.

    Returns:
        video transcript url
    """
    # Filter wanted properties
    metadata = {
        prop: value
        for prop, value in metadata.items()
        if prop in ['provider', 'language_code', 'file_name', 'file_format']
        and value
    }

    file_format = metadata.get('file_format')
    if file_format and file_format not in list(
            dict(TranscriptFormat.CHOICES).keys()):
        raise InvalidTranscriptFormat(
            '{} transcript format is not supported'.format(file_format))

    provider = metadata.get('provider')
    if provider and provider not in list(
            dict(TranscriptProviderType.CHOICES).keys()):
        raise InvalidTranscriptProvider(
            '{} transcript provider is not supported'.format(provider))

    try:
        # Video should be present in edxval in order to attach transcripts to it.
        video = Video.objects.get(edx_video_id=video_id)
        video_transcript, __ = VideoTranscript.create_or_update(
            video, language_code, metadata, file_data)
    except Video.DoesNotExist:
        return None

    return video_transcript.url()
Exemple #11
0
def get_video_transcript_data(video_id, language_code):
    """
    Get video transcript data

    Arguments:
        video_id(unicode): An id identifying the Video.
        language_code(unicode): it will be the language code of the requested transcript.

    Returns:
        A dict containing transcript file name and its content.
    """
    video_transcript = VideoTranscript.get_or_none(video_id, language_code)
    if video_transcript:
        try:
            return dict(file_name=video_transcript.filename,
                        content=video_transcript.transcript.file.read())
        except Exception:
            logger.exception(
                '[edx-val] Error while retrieving transcript for video=%s -- language_code=%s',
                video_id, language_code)
            raise
Exemple #12
0
def create_or_update_video_transcript(
    video_id,
    language_code,
    file_name,
    file_format,
    provider,
    file_data=None,
):
    """
    Create or Update video transcript for an existing video.

    Arguments:
        video_id: it can be an edx_video_id or an external_id extracted from external sources in a video component.
        language_code: language code of a video transcript
        file_name: file name of a video transcript
        file_data (InMemoryUploadedFile): Transcript data to be saved for a course video.
        file_format: format of the transcript
        provider: transcript provider

    Returns:
        video transcript url
    """
    if file_format not in dict(TranscriptFormat.CHOICES).keys():
        raise InvalidTranscriptFormat(
            '{} transcript format is not supported'.format(file_format))

    if provider not in dict(TranscriptProviderType.CHOICES).keys():
        raise InvalidTranscriptProvider(
            '{} transcript provider is not supported'.format(provider))

    video_transcript, __ = VideoTranscript.create_or_update(
        video_id,
        language_code,
        file_name,
        file_format,
        provider,
        file_data,
    )

    return video_transcript.url()
Exemple #13
0
def get_video_transcript_data(video_id, language_code):
    """
    Get video transcript data

    Arguments:
        video_id(unicode): An id identifying the Video.
        language_code(unicode): it will be the language code of the requested transcript.

    Returns:
        A dict containing transcript file name and its content.
    """
    video_transcript = VideoTranscript.get_or_none(video_id, language_code)
    if video_transcript:
        try:
            return dict(file_name=video_transcript.filename, content=video_transcript.transcript.file.read())
        except Exception:
            logger.exception(
                '[edx-val] Error while retrieving transcript for video=%s -- language_code=%s',
                video_id,
                language_code
            )
            raise
Exemple #14
0
def create_or_update_video_transcript(video_id, language_code, metadata, file_data=None):
    """
    Create or Update video transcript for an existing video.

    Arguments:
        video_id: it can be an edx_video_id or an external_id extracted from external sources in a video component.
        language_code: language code of a video transcript
        metadata (dict): A dict containing (to be overwritten) properties
        file_data (InMemoryUploadedFile): Transcript data to be saved for a course video.

    Returns:
        video transcript url
    """
    # Filter wanted properties
    metadata = {
        prop: value
        for prop, value in six.iteritems(metadata)
        if prop in ['provider', 'language_code', 'file_name', 'file_format'] and value
    }

    file_format = metadata.get('file_format')
    if file_format and file_format not in list(dict(TranscriptFormat.CHOICES).keys()):
        raise InvalidTranscriptFormat('{} transcript format is not supported'.format(file_format))

    provider = metadata.get('provider')
    if provider and provider not in list(dict(TranscriptProviderType.CHOICES).keys()):
        raise InvalidTranscriptProvider('{} transcript provider is not supported'.format(provider))

    try:
        # Video should be present in edxval in order to attach transcripts to it.
        video = Video.objects.get(edx_video_id=video_id)
        video_transcript, __ = VideoTranscript.create_or_update(video, language_code, metadata, file_data)
    except Video.DoesNotExist:
        return None

    return video_transcript.url()
Exemple #15
0
 def create(self, validated_data):
     """
     Create the video transcript.
     """
     return VideoTranscript.create(**validated_data)
Exemple #16
0
def import_transcript_from_fs(edx_video_id, language_code, file_name, provider, resource_fs, static_dir):
    """
    Imports transcript file from file system and creates transcript record in DS.

    Arguments:
        edx_video_id (str): Video id of the video.
        language_code (unicode): Language code of the requested transcript.
        file_name (unicode): File name of the transcript file.
        provider (unicode): Transcript provider.
        resource_fs (OSFS): Import file system.
        static_dir (str): The Directory to retrieve transcript file.
    """
    file_format = None
    existing_transcript = VideoTranscript.get_or_none(edx_video_id, language_code)

    # check if the transcript exists and if it does, make sure that overriding
    # existing transcripts is enabled before proceeding to import it
    if (existing_transcript and
            not OVERRIDE_EXISTING_IMPORTED_TRANSCRIPTS.is_enabled()):
        return

    # Read file from import file system and attach it to transcript record in DS.
    try:
        with resource_fs.open(combine(static_dir, file_name), 'r', encoding='utf-8-sig') as f:
            file_content = f.read()
    except ResourceNotFound:
        # Don't raise exception in case transcript file is not found in course OLX.
        logger.warning(
            '[edx-val] "%s" transcript "%s" for video "%s" is not found.',
            language_code,
            file_name,
            edx_video_id
        )
        return
    except UnicodeDecodeError:
        # Don't raise exception in case transcript contains non-utf8 content.
        logger.warning(
            '[edx-val] "%s" transcript "%s" for video "%s" contains a non-utf8 file content.',
            language_code,
            file_name,
            edx_video_id
        )
        return

    # change file content to utf8
    utf8_encoded_file_content = file_content.encode('utf-8')
    new_transcript_content_file = ContentFile(utf8_encoded_file_content)

    # check if transcript content already exists, and if it does, make sure
    # the transcript isn't a duplicate transcript to the already existing one
    if (existing_transcript and
            is_duplicate_file(new_transcript_content_file, existing_transcript.transcript.file)):
        return

    # Get file format from transcript content.
    try:
        file_format = get_transcript_format(file_content)
    except Error:
        # Don't raise exception, just don't create transcript record.
        logger.warning(
            '[edx-val] Error while getting transcript format for video=%s -- language_code=%s --file_name=%s',
            edx_video_id,
            language_code,
            file_name
        )
        return

    # Create transcript record.
    create_or_update_video_transcript(
        video_id=edx_video_id,
        language_code=language_code,
        metadata={
            'provider': provider,
            'file_format': file_format,
            'language_code': language_code,
        },
        file_data=new_transcript_content_file,
    )
Exemple #17
0
 def create(self, validated_data):
     """
     Create the video transcript.
     """
     return VideoTranscript.create(**validated_data)