Example #1
0
def xml_for_definition(definition_key):
    """
    Method for loading OLX from Blockstore and parsing it to an etree.
    """
    try:
        xml_str = get_bundle_file_data_with_cache(
            bundle_uuid=definition_key.bundle_uuid,
            path=definition_key.olx_path,
            bundle_version=definition_key.bundle_version,
            draft_name=definition_key.draft_name,
        )
    except blockstore_api.BundleFileNotFound:
        raise NoSuchDefinition("OLX file {} not found in bundle {}.".format(  # lint-amnesty, pylint: disable=raise-missing-from
            definition_key.olx_path, definition_key.bundle_uuid,
        ))
    node = etree.fromstring(xml_str)
    return node
Example #2
0
    def get_transcript_from_blockstore(self, language, output_format, transcripts):
        """Return trancsript from blockstore"""
        language = language or "en"

        if output_format not in (Transcript.SRT, Transcript.SJSON, Transcript.TXT):
            raise NotFoundError(f"Invalid transcript format `{output_format}`")
        if language not in transcripts:
            raise NotFoundError(
                f"Video {self.scope_ids.usage_id} does not have a transcript "
                f"file defined for the '{language}' language in its OLX."
            )
        filename = transcripts[language]
        if not filename.endswith(".srt"):
            raise NotFoundError(
                "Video XBlocks in Blockstore only support .srt transcript files."
            )
        bundle_uuid = self.scope_ids.def_id.bundle_uuid
        path = self.scope_ids.def_id.olx_path.rpartition("/")[0] + "/static/" + filename
        bundle_version = self.scope_ids.def_id.bundle_version
        draft_name = self.scope_ids.def_id.draft_name
        try:
            content_binary = blockstore_cache.get_bundle_file_data_with_cache(
                bundle_uuid, path, bundle_version, draft_name
            )
        except blockstore_api.BundleFileNotFound as err:
            raise NotFoundError(
                f"Transcript file '{path}' missing for video XBlock {self.scope_ids.usage_id}"
            ) from err
        # Now convert the transcript data to the requested format:
        filename_no_extension = os.path.splitext(filename)[0]
        output_filename = f"{filename_no_extension}.{output_format}"
        # TODO: Do we only have srt subtitles?
        # output_transcript = Transcript.convert(
        #     content_binary.decode("utf-8"),
        #     input_format=Transcript.SRT,
        #     output_format=output_format,
        # )
        output_transcript = content_binary.decode("utf-8")
        if not output_transcript.strip():
            raise NotFoundError("The transcript is empty.")
        return output_transcript, output_filename, Transcript.mime_types[output_format]
Example #3
0
def get_transcript_from_blockstore(video_block, language, output_format,
                                   transcripts_info):
    """
    Get video transcript from Blockstore.

    Blockstore expects video transcripts to be placed into the 'static/'
    subfolder of the XBlock's folder in a Blockstore bundle. For example, if the
    video XBlock's definition is in the standard location of
        video/video1/definition.xml
    Then the .srt files should be placed at e.g.
        video/video1/static/video1-en.srt
    This is the same place where other public static files are placed for other
    XBlocks, such as image files used by HTML blocks.

    Video XBlocks in Blockstore must set the 'transcripts' XBlock field to a
    JSON dictionary listing the filename of the transcript for each language:
        <video
            youtube_id_1_0="3_yD_cEKoCk"
            transcripts='{"en": "3_yD_cEKoCk-en.srt"}'
            display_name="Welcome Video with Transcript"
            download_track="true"
        />

    This method is tested in openedx/core/djangoapps/content_libraries/tests/test_static_assets.py

    Arguments:
        video_block (Video XBlock): The video XBlock
        language (str): transcript language
        output_format (str): transcript output format
        transcripts_info (dict): transcript info for a video, from video_block.get_transcripts_info()

    Returns:
        tuple containing content, filename, mimetype
    """
    if output_format not in (Transcript.SRT, Transcript.SJSON, Transcript.TXT):
        raise NotFoundError(
            'Invalid transcript format `{output_format}`'.format(
                output_format=output_format))
    transcripts = transcripts_info['transcripts']
    if language not in transcripts:
        raise NotFoundError(
            "Video {} does not have a transcript file defined for the '{}' language in its OLX."
            .format(
                video_block.scope_ids.usage_id,
                language,
            ))
    filename = transcripts[language]
    if not filename.endswith('.srt'):
        # We want to standardize on .srt
        raise NotFoundError(
            "Video XBlocks in Blockstore only support .srt transcript files.")
    # Try to load the transcript file out of Blockstore
    # In lieu of an XBlock API for this (like block.runtime.resources_fs), we use the blockstore API directly.
    bundle_uuid = video_block.scope_ids.def_id.bundle_uuid
    path = video_block.scope_ids.def_id.olx_path.rpartition(
        '/')[0] + '/static/' + filename
    bundle_version = video_block.scope_ids.def_id.bundle_version  # Either bundle_version or draft_name will be set.
    draft_name = video_block.scope_ids.def_id.draft_name
    try:
        content_binary = blockstore_cache.get_bundle_file_data_with_cache(
            bundle_uuid, path, bundle_version, draft_name)
    except blockstore_api.BundleFileNotFound:
        raise NotFoundError(
            "Transcript file '{}' missing for video XBlock {}".format(
                path,
                video_block.scope_ids.usage_id,
            ))
    # Now convert the transcript data to the requested format:
    filename_no_extension = os.path.splitext(filename)[0]
    output_filename = '{}.{}'.format(filename_no_extension, output_format)
    output_transcript = Transcript.convert(
        content_binary.decode('utf-8'),
        input_format=Transcript.SRT,
        output_format=output_format,
    )
    if not output_transcript.strip():
        raise NotFoundError('No transcript content')
    return output_transcript, output_filename, Transcript.mime_types[
        output_format]