def handle_starttag(self, name, attrs, state, contentstate):
        if state.current_block is None:
            # Inline entity element encountered at the top level -
            # start a new paragraph block to contain it
            block = Block('unstyled', depth=state.list_depth)
            contentstate.blocks.append(block)
            state.current_block = block
            state.leading_whitespace = STRIP_WHITESPACE

        if state.leading_whitespace == FORCE_WHITESPACE:
            # any pending whitespace should be output before handling this tag,
            # and subsequent whitespace should be collapsed into it (= stripped)
            state.current_block.text += ' '
            state.leading_whitespace = STRIP_WHITESPACE

        # convert attrs from a list of (name, value) tuples to a dict
        # for get_attribute_data to work with
        attrs = dict(attrs)

        entity = Entity(self.entity_type, self.mutability,
                        self.get_attribute_data(attrs))
        key = contentstate.add_entity(entity)

        entity_range = EntityRange(key)
        entity_range.offset = len(state.current_block.text)
        state.current_block.entity_ranges.append(entity_range)
        state.current_entity_ranges.append(entity_range)
Esempio n. 2
0
 def create_block(self, name, attrs, state, contentstate):
     assert state.list_item_type is not None, (
         "%s element found outside of an enclosing list element" % name
     )
     return Block(
         state.list_item_type, depth=state.list_depth, key=attrs.get(BLOCK_KEY_NAME)
     )
def add_paragraph_block(state, contentstate):
    """
    Utility function for adding an unstyled (paragraph) block to contentstate;
    useful for element handlers that aren't paragraph elements themselves, but need
    to insert paragraphs to ensure correctness
    """
    block = Block('unstyled', depth=state.list_depth)
    contentstate.blocks.append(block)
    state.current_block = block
    state.leading_whitespace = STRIP_WHITESPACE
    state.has_preceding_nonatomic_block = True
    def handle_starttag(self, name, attrs, state, contentstate):
        # forcibly close any block that illegally contains this one
        state.current_block = None

        attr_dict = dict(attrs)  # convert attrs from list of (name, value) tuples to a dict
        entity = self.create_entity(name, attr_dict, state, contentstate)
        key = contentstate.add_entity(entity)

        block = Block('atomic', depth=state.list_depth)
        contentstate.blocks.append(block)
        block.text = ' '
        entity_range = EntityRange(key)
        entity_range.offset = 0
        entity_range.length = 1
        block.entity_ranges.append(entity_range)
    def handle_starttag(self, name, attrs, state, contentstate):
        if state.current_block is None:
            # Inline style element encountered at the top level -
            # start a new paragraph block to contain it
            block = Block('unstyled', depth=state.list_depth)
            contentstate.blocks.append(block)
            state.current_block = block
            state.leading_whitespace = STRIP_WHITESPACE

        if state.leading_whitespace == FORCE_WHITESPACE:
            # any pending whitespace should be output before handling this tag,
            # and subsequent whitespace should be collapsed into it (= stripped)
            state.current_block.text += ' '
            state.leading_whitespace = STRIP_WHITESPACE

        inline_style_range = InlineStyleRange(self.style)
        inline_style_range.offset = len(state.current_block.text)
        state.current_block.inline_style_ranges.append(inline_style_range)
        state.current_inline_styles.append(inline_style_range)
Esempio n. 6
0
    def handle_starttag(self, name, attrs, state, contentstate):
        # forcibly close any block that illegally contains this one
        state.current_block = None

        if not state.has_preceding_nonatomic_block:
            # if this block is NOT preceded by a non-atomic block,
            # need to insert a spacer paragraph
            add_paragraph_block(state, contentstate)

        attr_dict = dict(attrs)  # convert attrs from list of (name, value) tuples to a dict
        entity = self.create_entity(name, attr_dict, state, contentstate)
        key = contentstate.add_entity(entity)

        block = Block('atomic', depth=state.list_depth)
        contentstate.blocks.append(block)
        block.text = ' '
        entity_range = EntityRange(key)
        entity_range.offset = 0
        entity_range.length = 1
        block.entity_ranges.append(entity_range)
        state.has_preceding_nonatomic_block = False
    def handle_data(self, content):
        # normalise whitespace sequences to a single space
        content = re.sub(WHITESPACE_RE, ' ', content)

        if self.state.current_block is None:
            if content == ' ':
                # ignore top-level whitespace
                return
            else:
                # create a new paragraph block for this content
                self.add_block(Block('unstyled', depth=self.state.list_depth))

        if content == ' ':
            # if leading_whitespace = strip, this whitespace node is not significant
            #   and should be skipped.
            # For other cases, _don't_ output the whitespace yet, but set leading_whitespace = force
            # so that a space is forced before the next text node or inline element. If no such node
            # appears (= we reach the end of the block), the whitespace can rightfully be dropped.
            if self.state.leading_whitespace != STRIP_WHITESPACE:
                self.state.leading_whitespace = FORCE_WHITESPACE
        else:
            # strip or add leading whitespace according to the leading_whitespace flag
            if self.state.leading_whitespace == STRIP_WHITESPACE:
                content = content.lstrip()
            elif self.state.leading_whitespace == FORCE_WHITESPACE and not content.startswith(
                    ' '):
                content = ' ' + content

            if content.endswith(' '):
                # don't output trailing whitespace yet, because we want to discard it if the end
                # of the block follows. Instead, we'll set leading_whitespace = force so that
                # any following text or inline element will be prefixed by a space
                content = content.rstrip()
                self.state.leading_whitespace = FORCE_WHITESPACE
            else:
                # no trailing whitespace here - any leading whitespace at the start of the
                # next text node should be respected
                self.state.leading_whitespace = KEEP_WHITESPACE

            self.state.current_block.text += content
 def create_block(self, name, attrs, state, contentstate):
     return Block(self.block_type, depth=state.list_depth)
 def create_block(self, name, attrs, state, contentstate):
     assert state.list_item_type is not None, "%s element found outside of an enclosing list element" % name
     return Block(state.list_item_type, depth=state.list_depth)
 def create_block(self, name, attrs, state, contentstate):
     return Block(self.block_type,
                  depth=state.list_depth,
                  key=attrs.get(BLOCK_KEY_NAME))
Esempio n. 11
0
    def handle_starttag(self, name, attrs, state, contentstate):
        if state.current_block:
            # Placing an atomic block inside another block (e.g. a paragraph) is invalid in
            # contentstate; we will recover from this by forcibly closing the block along with all
            # of its inline styles / entities, and opening a new identical one afterwards.

            # Construct a new block of the same type and depth as the currently open one; this will
            # become the new 'current block' after we've added the atomic block.
            next_block = Block(state.current_block.type, depth=state.current_block.depth)

            for inline_style_range in state.current_inline_styles:
                # set this inline style to end at the current text position
                inline_style_range.length = len(state.current_block.text) - inline_style_range.offset
                # start a new one of the same type, which will begin at the next block
                new_inline_style = InlineStyleRange(inline_style_range.style)
                new_inline_style.offset = 0
                next_block.inline_style_ranges.append(new_inline_style)

            for entity_range in state.current_entity_ranges:
                # set this inline entity to end at the current text position
                entity_range.length = len(state.current_block.text) - entity_range.offset
                # start a new entity range, pointing to the same entity, to begin at the next block
                new_entity_range = EntityRange(entity_range.key)
                new_entity_range.offset = 0
                next_block.entity_ranges.append(new_entity_range)

            state.current_block = None
        else:
            next_block = None

        if not state.has_preceding_nonatomic_block:
            # if this block is NOT preceded by a non-atomic block,
            # need to insert a spacer paragraph
            add_paragraph_block(state, contentstate)
            # immediately set this as not the current block, so that any subsequent invocations
            # of this handler don't think we're inside it
            state.current_block = None

        attr_dict = dict(attrs)  # convert attrs from list of (name, value) tuples to a dict
        entity = self.create_entity(name, attr_dict, state, contentstate)
        key = contentstate.add_entity(entity)

        block = Block('atomic', depth=state.list_depth)
        contentstate.blocks.append(block)
        block.text = ' '
        entity_range = EntityRange(key)
        entity_range.offset = 0
        entity_range.length = 1
        block.entity_ranges.append(entity_range)
        state.has_preceding_nonatomic_block = False

        if next_block:
            # take the replica that we made of the previous block and its inline styles / entities,
            # and make that the new current block. Now, when we encounter the closing tags for
            # those styles/entities further on in the document, they will close the range that
            # began here.
            contentstate.blocks.append(next_block)
            state.current_block = next_block
            state.current_inline_styles = next_block.inline_style_ranges.copy()
            state.current_entity_ranges = next_block.entity_ranges.copy()
            state.has_preceding_nonatomic_block = True
            state.leading_whitespace = STRIP_WHITESPACE