Example #1
0
class Table(XmlModel):
    XML_TAG = 'tbl'

    rows = XmlCollection(TableRow, )

    def calculate_table_cell_spans(self):
        if not self.rows:
            return

        active_rowspan_cells_by_column = {}
        cell_to_rowspan_count = defaultdict(int)
        for row in self.rows:
            for column_index, cell in enumerate(row.cells):
                properties = cell.properties
                # If this element is omitted, then this cell shall not be
                # part of any vertically merged grouping of cells, and any
                # vertically merged group of preceding cells shall be
                # closed.
                if properties is None or properties.vertical_merge is None:
                    # if properties are missing, this is the same as the
                    # the element being omitted
                    active_rowspan_cells_by_column[column_index] = None
                elif properties:
                    vertical_merge = properties.vertical_merge.get(
                        'val', 'continue')  # noqa
                    if vertical_merge == 'restart':
                        active_rowspan_cells_by_column[column_index] = cell
                        cell_to_rowspan_count[cell] += 1
                    elif vertical_merge == 'continue':
                        active_rowspan_for_column = active_rowspan_cells_by_column.get(
                            column_index)  # noqa
                        if active_rowspan_for_column:
                            cell_to_rowspan_count[
                                active_rowspan_for_column] += 1  # noqa
        return dict(cell_to_rowspan_count)
Example #2
0
class ItemsModel(XmlModel):
    XML_TAG = 'items'

    children = XmlCollection(
        ('apple', AppleModel),
        OrangeModel,
    )
Example #3
0
class SimpleField(XmlModel):
    XML_TAG = 'fldSimple'

    instr = XmlAttribute()

    children = XmlCollection(
        Run,
        Hyperlink,
        SmartTagRun,
        InsertedRun,
        DeletedRun,
        SdtRun,
    )

    def _parse_instr_into_field_type_and_arg_string(self):
        return re.match('^\s*([^\s]+)\s*(.*)$', self.instr)

    def _parse_instr_arg_string_to_args(self, arg_string):
        return re.findall(r'\s*(?:"([^"]+)"|([^\s]+))+', arg_string)

    def parse_instr(self):
        m = self._parse_instr_into_field_type_and_arg_string()
        if not m:
            return
        field_type = m.group(1)
        raw_field_args = m.group(2)
        if not raw_field_args:
            return field_type, None
        m = self._parse_instr_arg_string_to_args(raw_field_args)
        if not m:
            return field_type, None
        field_args = [args[0] if args[0] else args[1] for args in m]
        return field_type, field_args
Example #4
0
class Hyperlink(XmlModel):
    XML_TAG = 'hyperlink'

    hyperlink_id = XmlAttribute(name='id')
    anchor = XmlAttribute(name='anchor')
    children = XmlCollection(Run, )

    @memoized
    def get_target_uri(self):
        if not self.container:
            return None
        if not self.container.package_part:
            return None
        package_part = self.container.package_part
        try:
            relationship = package_part.get_relationship(
                relationship_id=self.hyperlink_id, )
        except KeyError:
            return None

        if self.anchor:
            return '{0}#{1}'.format(relationship.target_uri, self.anchor)
        else:
            return relationship.target_uri

    @property
    def target_uri(self):
        return self.get_target_uri()

    @target_uri.setter
    def target_uri(self, target_uri):
        self.get_target_uri.memo.set_cache(target_uri, self)
Example #5
0
class SmartTagRun(XmlModel):
    XML_TAG = 'smartTag'

    children = XmlCollection(
        Run,
        'wordprocessing.SmartTagRun',
    )
Example #6
0
class Numbering(XmlModel):
    XML_TAG = 'numbering'

    elements = XmlCollection(AbstractNum, NumberingInstance)

    def __init__(self, **kwargs):
        super(Numbering, self).__init__(**kwargs)

        self._abstract_nums = {}
        self._nums = {}

        for el in self.elements:
            if isinstance(el, AbstractNum):
                self._abstract_nums[el.abstract_num_id] = el
            elif isinstance(el, NumberingInstance):
                self._nums[el.num_id] = el
            else:
                raise AssertionError(
                    'Unexpected element type {type} encountered'.format(
                        type=el.__class__.__name__, ))

    def get_numbering_definition(self, num_id):
        num = self._nums.get(num_id)
        if not num:
            return
        return self._abstract_nums.get(num.abstract_num_id)
Example #7
0
class AbstractNum(XmlModel):
    XML_TAG = 'abstractNum'

    abstract_num_id = XmlAttribute(name='abstractNumId')
    name = XmlChild(attrname='val')

    levels = XmlCollection(Level)

    def __init__(self, **kwargs):
        super(AbstractNum, self).__init__(**kwargs)

        self._levels = {}

        for level in self.levels:
            self._levels[level.level_id] = level

    def get_level(self, level_id):
        return self._levels.get(level_id)

    def get_indentation_between_levels(self):
        """
        Depending on the word version we may get a different default indentation between
        levels. For this we will only check first 2 levels as the other follow the same step.
        """

        try:
            lvl0_ind = self.levels[0].paragraph_properties.to_int(
                'indentation_left', default=0)
            lvl1_ind = self.levels[1].paragraph_properties.to_int(
                'indentation_left', default=0)
            ind_step = lvl1_ind - lvl0_ind
        except IndexError:
            ind_step = 720  # default one

        return ind_step
Example #8
0
class NumberingInstance(XmlModel):
    XML_TAG = 'num'

    num_id = XmlAttribute(name='numId')
    abstract_num_id = XmlChild(name='abstractNumId', attrname='val')

    level_overrides = XmlCollection(LevelOverride)
Example #9
0
class TableCell(XmlModel):
    XML_TAG = 'tc'

    properties = XmlChild(type=TableCellProperties)

    children = XmlCollection(Paragraph,
                             # Table is added in wordprocessing.table
                             )
Example #10
0
class InsertedRun(XmlModel):
    XML_TAG = 'ins'

    children = XmlCollection(
        Run,
        SmartTagRun,
        # TODO Needs DeletedRun
    )
Example #11
0
class Fallback(XmlModel):
    XML_TAG = 'Fallback'

    # It would be better to refer to the grandparent's children XmlCollection
    # lazily. The problem is that we don't have a good way to represent lazy
    # fields, nor do we have a way for those fields to reference their parents
    # before they are initialized with content.
    children = XmlCollection(allow_all_children=True)
Example #12
0
class EmbeddedObject(XmlModel):
    """
    reference:  https://msdn.microsoft.com/en-us/library/documentformat.openxml
    .wordprocessing.embeddedobject%28v=office.15%29.aspx
    """

    XML_TAG = 'object'

    children = XmlCollection(Shape)
Example #13
0
class DeletedRun(XmlModel):
    XML_TAG = 'del'

    children = XmlCollection(
        Run,
        SmartTagRun,
        'wordprocessing.DeletedRun',
        # TODO Needs InsertedRun
    )
Example #14
0
class SdtContentBlock(XmlModel):
    XML_TAG = 'sdtContent'

    children = XmlCollection(
        Paragraph,
        Table,
        InsertedRun,
        DeletedRun,
        # SdtBlock,
    )
Example #15
0
class Endnote(XmlModel):
    XML_TAG = 'endnote'

    endnote_id = XmlAttribute(name='id')

    children = XmlCollection(
        Paragraph,
        Table,
        InsertedRun,
        DeletedRun,
    )
Example #16
0
class SdtContentRun(XmlModel):
    XML_TAG = 'sdtContent'

    children = XmlCollection(
        Run,
        Hyperlink,
        SmartTagRun,
        InsertedRun,
        DeletedRun,
        # SdtRun,
    )
Example #17
0
class Shape(XmlModel):
    XML_TAG = 'shape'

    style = XmlAttribute()
    children = XmlCollection(ImageData, 'vml.Textbox')

    # TODO perhaps we could have a prepare_style, or clean_style convention?
    def get_style(self):
        if self.style:
            return dict(
                item.split(':', 1) for item in self.style.split(';') if item)
        return {}
Example #18
0
class Styles(XmlModel):
    XML_TAG = 'styles'

    styles = XmlCollection(Style)

    def __init__(self, styles=None, *args, **kwargs):
        super(Styles, self).__init__(styles=styles, *args, **kwargs)

        styles_by_type = defaultdict(dict)
        for style in self.styles:
            styles_by_type[style.style_type][style.style_id] = style
        self.styles_by_type = dict(styles_by_type)

    def get_styles_by_type(self, style_type):
        return self.styles_by_type.get(style_type, {})
Example #19
0
class Endnotes(XmlModel):
    XML_TAG = 'endnotes'

    children = XmlCollection(Endnote)

    def __init__(self, *args, **kwargs):
        super(Endnotes, self).__init__(*args, **kwargs)

        endnote_by_id = {}
        for endnote in self.children:
            if endnote.endnote_id:
                endnote_by_id[endnote.endnote_id] = endnote
        self._endnote_by_id = endnote_by_id

    def get_endnote_by_id(self, endnote_id):
        return self._endnote_by_id.get(endnote_id)
Example #20
0
class Footnotes(XmlModel):
    XML_TAG = 'footnotes'

    children = XmlCollection(Footnote)

    def __init__(self, *args, **kwargs):
        super(Footnotes, self).__init__(*args, **kwargs)

        footnote_by_id = {}
        for footnote in self.children:
            if footnote.footnote_id:
                footnote_by_id[footnote.footnote_id] = footnote
        self._footnote_by_id = footnote_by_id

    def get_footnote_by_id(self, footnote_id):
        return self._footnote_by_id.get(footnote_id)
Example #21
0
class AbstractNum(XmlModel):
    XML_TAG = 'abstractNum'

    abstract_num_id = XmlAttribute(name='abstractNumId')
    name = XmlChild(attrname='val')

    levels = XmlCollection(Level)

    def __init__(self, **kwargs):
        super(AbstractNum, self).__init__(**kwargs)

        self._levels = {}

        for level in self.levels:
            self._levels[level.level_id] = level

    def get_level(self, level_id):
        return self._levels.get(level_id)
Example #22
0
class Textbox(XmlModel):
    XML_TAG = 'textbox'

    children = XmlCollection('wordprocessing.TxBxContent', )
Example #23
0
class TxBxContent(XmlModel):
    XML_TAG = 'txbxContent'
    children = XmlCollection(
        'wordprocessing.Paragraph',
        'wordprocessing.Table',
    )
Example #24
0
class SmartTagRun(XmlModel):
    XML_TAG = 'smartTag'

    children = XmlCollection(Run, )
Example #25
0
class Paragraph(XmlModel):
    XML_TAG = 'p'

    properties = XmlChild(type=ParagraphProperties)

    children = XmlCollection(
        Run,
        Hyperlink,
        SmartTagRun,
        InsertedRun,
        DeletedRun,
        SdtRun,
        SimpleField,
    )

    def __init__(self, **kwargs):
        super(Paragraph, self).__init__(**kwargs)
        self._effective_properties = None

    @property
    def effective_properties(self):
        # TODO need to calculate effective properties like Run
        if not self._effective_properties:
            properties = self.properties
            self._effective_properties = properties
        return self._effective_properties

    def has_structured_document_parent(self):
        from pydocx.openxml.wordprocessing import SdtBlock
        return self.has_ancestor(SdtBlock)

    def get_style_chain_stack(self):
        if not self.properties:
            return

        parent_style = self.properties.parent_style
        if not parent_style:
            return

        # TODO the getattr is necessary because of footnotes. From the context
        # of a footnote, a paragraph's container is the footnote part, which
        # doesn't have access to the style_definitions_part
        part = getattr(self.container, 'style_definitions_part', None)
        if part:
            style_stack = part.get_style_chain_stack('paragraph', parent_style)
            for result in style_stack:
                yield result

    @property
    def heading_style(self):
        if hasattr(self, '_heading_style'):
            return getattr(self, '_heading_style')
        style_stack = self.get_style_chain_stack()
        heading_style = None
        for style in style_stack:
            if style.is_a_heading():
                heading_style = style
                break
        self.heading_style = heading_style
        return heading_style

    @heading_style.setter
    def heading_style(self, style):
        self._heading_style = style

    def get_numbering_definition(self):
        # TODO add memoization

        # TODO the getattr is necessary because of footnotes. From the context
        # of a footnote, a paragraph's container is the footnote part, which
        # doesn't have access to the numbering_definitions_part
        part = getattr(self.container, 'numbering_definitions_part', None)
        if not part:
            return
        if not self.effective_properties:
            return
        numbering_properties = self.effective_properties.numbering_properties
        if not numbering_properties:
            return
        return part.numbering.get_numbering_definition(
            num_id=numbering_properties.num_id, )

    def get_numbering_level(self):
        # TODO add memoization
        numbering_definition = self.get_numbering_definition()
        if not numbering_definition:
            return
        if not self.effective_properties:
            return
        numbering_properties = self.effective_properties.numbering_properties
        if not numbering_properties:
            return
        return numbering_definition.get_level(
            level_id=numbering_properties.level_id, )

    @property
    def runs(self):
        for p_child in self.children:
            if isinstance(p_child, Run):
                yield p_child

    def get_text(self, tab_char=None):
        '''
        Return a string of all of the contained Text nodes concatenated
        together. If `tab_char` is set, then any TabChar encountered will be
        represented in the returned text using the specified string.

        For example:

        Given the following paragraph XML definition:

            <p>
                <r>
                    <t>abc</t>
                </r>
                <r>
                    <t>def</t>
                </r>
            </p>

        `get_text()` will return 'abcdef'
        '''

        text = []
        for run in self.runs:
            for r_child in run.children:
                if isinstance(r_child, Text):
                    if r_child.text:
                        text.append(r_child.text)
                if tab_char and isinstance(r_child, TabChar):
                    text.append(tab_char)
        return ''.join(text)

    def get_number_of_initial_tabs(self):
        '''
        Return the number of initial TabChars.
        '''
        tab_count = 0
        for p_child in self.children:
            if isinstance(p_child, Run):
                for r_child in p_child.children:
                    if isinstance(r_child, TabChar):
                        tab_count += 1
                    else:
                        break
            else:
                break
        return tab_count
Example #26
0
class AlternateContent(XmlModel):
    XML_TAG = 'AlternateContent'
    children = XmlCollection(Fallback)
Example #27
0
class TableRow(XmlModel):
    XML_TAG = 'tr'

    cells = XmlCollection(TableCell, )
Example #28
0
class Picture(XmlModel):
    XML_TAG = 'pict'

    children = XmlCollection(Shape, Rect)
Example #29
0
class Run(XmlModel):
    XML_TAG = 'r'

    properties = XmlChild(type=RunProperties)

    children = XmlCollection(
        EmbeddedObject,
        TabChar,
        Break,
        NoBreakHyphen,
        Text,
        Drawing,
        Picture,
        DeletedText,
        FootnoteReference,
        FootnoteReferenceMark,
        EndnoteReference,
        EndnoteReferenceMark,
        FieldChar,
        FieldCode,
        AlternateContent,
    )

    def get_style_chain_stack(self):
        if not self.properties:
            return

        parent_style = self.properties.parent_style
        if not parent_style:
            return

        # TODO the getattr is necessary because of footnotes. From the context
        # of a footnote, a paragraph's container is the footnote part, which
        # doesn't have access to the style_definitions_part
        part = getattr(self.container, 'style_definitions_part', None)
        if part:
            style_stack = part.get_style_chain_stack('character', parent_style)
            for result in style_stack:
                yield result

    def _get_properties_inherited_from_parent_paragraph(self):
        from pydocx.openxml.wordprocessing.paragraph import Paragraph

        inherited_properties = {}

        parent_paragraph = self.get_first_ancestor(Paragraph)
        if parent_paragraph:
            style_stack = parent_paragraph.get_style_chain_stack()
            for style in reversed(list(style_stack)):
                if style.run_properties:
                    inherited_properties.update(
                        dict(style.run_properties.fields), )
        return inherited_properties

    def _get_inherited_properties_from_parent_style(self):
        inherited_properties = {}
        style_stack = self.get_style_chain_stack()
        for style in reversed(list(style_stack)):
            if style.run_properties:
                inherited_properties.update(dict(
                    style.run_properties.fields), )
        return inherited_properties

    @property
    def inherited_properties(self):
        properties = {}
        properties.update(
            self._get_properties_inherited_from_parent_paragraph(), )
        properties.update(self._get_inherited_properties_from_parent_style(), )
        return RunProperties(**properties)

    @property
    @memoized
    def effective_properties(self):
        inherited_properties = self.inherited_properties
        effective_properties = {}
        effective_properties.update(dict(inherited_properties.fields))
        if self.properties:
            effective_properties.update(dict(self.properties.fields))
        return RunProperties(**effective_properties)