Ejemplo n.º 1
0
def extract_from_nimbus_attachment_caption(
        div_tag, processing_options: NimbusProcessingOptions):
    if div_tag.name != 'div':
        return

    if div_tag.text == '':
        # catches captions that are empty and just have <br> in them
        return Caption(processing_options, [TextItem(processing_options, '')])

    text_items = process_child_items(div_tag, processing_options)

    return Caption(processing_options, text_items)
Ejemplo n.º 2
0
    def extract_embed_block_quote_if_present():
        """
        Inner function to extract a block quote and caption from an embed div.
        A twitter block quote repeats itself twice in nimbus html so using the above blockquote extractor. This also
        result  int the contents being indented as a blockquote like a twitter tweet does.

        """
        embed_content_tag = div_tag.find('blockquote')
        if embed_content_tag:
            data = html_data_extractors.extract_from_blockquote(
                embed_content_tag, processing_options)
            # NOTE the exported remote frame and iframe work using process_child items
            # but a twitter block quote repeats itself twice in nimbus html so using the above blockquote extractor.
            # This also indents the contents as a blockquote like a twitter tweet does

            # and because we have extracted just the blockquote we need to now try and get the caption
            try:
                caption_items = extract_from_nimbus_attachment_caption(
                    div_tag.find('div', class_='attachment-caption'),
                    processing_options)
            except AttributeError:
                # all is OK just no caption data asking for forgiveness approach we make an empty entry
                caption_items = Caption(processing_options,
                                        [TextItem(processing_options, '')])

            if data:
                return EmbedNimbus(processing_options, data, caption_items)
def extract_from_figure_caption(
        tag,
        processing_options: ProcessingOptions,
        note_specific_tag_cleaning: Optional[Callable] = None):
    caption_text = process_child_items(tag, processing_options,
                                       note_specific_tag_cleaning)

    return Caption(processing_options, caption_text)
    def test_figure_none_for_image(self, processing_options):
        image_object = None

        caption_object = caption_object = Caption(processing_options, [TextItem(processing_options, "a caption")])

        expected = '<figure><figcaption>a caption</figcaption></figure>'

        result = html_string_builders.figure((image_object, caption_object))

        assert result == expected
    def test_figure(self, processing_options):
        image_object = ImageEmbed(processing_options, "an image", "image.png", Path("image.pdf"), "200", "300" )
        image_object.target_path = Path("image.png")
        caption_object = Caption(processing_options, [TextItem(processing_options, "a caption")])

        expected = '<figure><img src="image.png" alt="an image" width="200" height="300"><figcaption>a caption</figcaption></figure>'

        result = html_string_builders.figure((image_object, caption_object))

        assert result == expected
Ejemplo n.º 6
0
    def test_figure_none_for_image(self, processing_options):
        image_object = None

        caption_object = Caption(processing_options, [TextItem(processing_options, "a caption")])

        contents = (image_object, caption_object)
        figure = Figure(processing_options, contents)

        expected = '*a caption*\n\n'

        result = figure.markdown()
        assert result == expected
Ejemplo n.º 7
0
    def test_figure(self, processing_options):
        image_object = ImageEmbed(processing_options, "an image", "image.png", Path("image.pdf"), "200", "300")
        image_object.target_path = Path("image.png")
        caption_object = Caption(processing_options, [TextItem(processing_options, "a caption")])

        contents = (image_object, caption_object)
        figure = Figure(processing_options, contents)

        expected = '![an image|200x300](image.png)\n*a caption*\n\n'

        result = figure.markdown()

        assert result == expected
Ejemplo n.º 8
0
def get_caption_text(div_tag, processing_options):
    try:
        if div_tag.get('class') and 'attachment-caption' in div_tag['class']:
            tag_to_use = div_tag
        else:
            tag_to_use = div_tag.find('div', class_='attachment-caption')

        caption_text = extract_from_nimbus_attachment_caption(
            tag_to_use, processing_options)

    except AttributeError:
        caption_text = Caption(processing_options,
                               [TextItem(processing_options, '')])

    return caption_text