class Hyperlink(XmlModel): XML_TAG = 'hyperlink' hyperlink_id = XmlAttribute(name='id') anchor = XmlAttribute(name='anchor') children = XmlCollection(Run, ) @memoized def get_target_uri(self): if not self.container: return None if not self.container.package_part: return None package_part = self.container.package_part try: relationship = package_part.get_relationship( relationship_id=self.hyperlink_id, ) except KeyError: return None if self.anchor: return '{0}#{1}'.format(relationship.target_uri, self.anchor) else: return relationship.target_uri @property def target_uri(self): return self.get_target_uri() @target_uri.setter def target_uri(self, target_uri): self.get_target_uri.memo.set_cache(target_uri, self)
class Style(XmlModel): XML_TAG = 'style' style_type = XmlAttribute(name='type', default='paragraph') style_id = XmlAttribute(name='styleId', default='') name = XmlChild(attrname='val', default='') run_properties = XmlChild(type=RunProperties) parent_style = XmlChild(name='basedOn', attrname='val') def is_a_heading(self): if not self.name: return False return self.name.lower().startswith('heading')
class SimpleField(XmlModel): XML_TAG = 'fldSimple' instr = XmlAttribute() children = XmlCollection( Run, Hyperlink, SmartTagRun, InsertedRun, DeletedRun, SdtRun, ) def _parse_instr_into_field_type_and_arg_string(self): return re.match('^\s*([^\s]+)\s*(.*)$', self.instr) def _parse_instr_arg_string_to_args(self, arg_string): return re.findall(r'\s*(?:"([^"]+)"|([^\s]+))+', arg_string) def parse_instr(self): m = self._parse_instr_into_field_type_and_arg_string() if not m: return field_type = m.group(1) raw_field_args = m.group(2) if not raw_field_args: return field_type, None m = self._parse_instr_arg_string_to_args(raw_field_args) if not m: return field_type, None field_args = [args[0] if args[0] else args[1] for args in m] return field_type, field_args
class NumberingInstance(XmlModel): XML_TAG = 'num' num_id = XmlAttribute(name='numId') abstract_num_id = XmlChild(name='abstractNumId', attrname='val') level_overrides = XmlCollection(LevelOverride)
class AbstractNum(XmlModel): XML_TAG = 'abstractNum' abstract_num_id = XmlAttribute(name='abstractNumId') name = XmlChild(attrname='val') levels = XmlCollection(Level) def __init__(self, **kwargs): super(AbstractNum, self).__init__(**kwargs) self._levels = {} for level in self.levels: self._levels[level.level_id] = level def get_level(self, level_id): return self._levels.get(level_id) def get_indentation_between_levels(self): """ Depending on the word version we may get a different default indentation between levels. For this we will only check first 2 levels as the other follow the same step. """ try: lvl0_ind = self.levels[0].paragraph_properties.to_int( 'indentation_left', default=0) lvl1_ind = self.levels[1].paragraph_properties.to_int( 'indentation_left', default=0) ind_step = lvl1_ind - lvl0_ind except IndexError: ind_step = 720 # default one return ind_step
class Break(XmlModel): XML_TAG = 'br' break_type = XmlAttribute(name='type') def is_page_break(self): return self.break_type == 'page'
class RFonts(XmlModel): XML_TAG = 'rFonts' hint = XmlAttribute(name='hint') ascii = XmlAttribute(name='ascii') h_ansi = XmlAttribute(name='hAnsi') east_asia = XmlAttribute(name='eastAsia') cs = XmlAttribute(name='cs') ascii_theme = XmlAttribute(name='asciiTheme') h_ansi_theme = XmlAttribute(name='hAnsiTheme') east_asia_theme = XmlAttribute(name='eastAsiaTheme') cs_theme = XmlAttribute(name='cstheme') def is_symbol(self): return self.h_ansi == 'Symbol'
class Endnote(XmlModel): XML_TAG = 'endnote' endnote_id = XmlAttribute(name='id') children = XmlCollection( Paragraph, Table, InsertedRun, DeletedRun, )
class Shape(XmlModel): XML_TAG = 'shape' style = XmlAttribute() children = XmlCollection(ImageData, 'vml.Textbox') # TODO perhaps we could have a prepare_style, or clean_style convention? def get_style(self): if self.style: return dict( item.split(':', 1) for item in self.style.split(';') if item) return {}
class ImageData(XmlModel): XML_TAG = 'imagedata' # TODO We need namespaced attributes, because of conflicts like this. This # attribute is in the relationship namespace, and there's another attribute # named "id" which is in the default (in this case VML) namespace. # See https://msdn.microsoft.com/en-us/library/documentformat.openxml.vml.imagedata%28v=office.14%29.aspx # noqa relationship_id = XmlAttribute(name='id') def get_picture_extents(self): style = self.parent.get_style() width = style.get('width', 0) height = style.get('height', 0) # TODO if width/height are missing units, "px" is implied return width, height
class EndnoteReference(XmlModel): XML_TAG = 'endnoteReference' endnote_id = XmlAttribute(name='id') @property def endnote(self): if not self.endnote_id: return part = self.container.endnotes_part if not part: return endnotes = part.endnotes endnote = endnotes.get_endnote_by_id(endnote_id=self.endnote_id) return endnote
class Level(XmlModel): XML_TAG = 'lvl' level_id = XmlAttribute(name='ilvl') start = XmlChild(attrname='val') num_format = XmlChild(name='numFmt', attrname='val') restart = XmlChild(name='lvlRestart', attrname='val') paragraph_style = XmlChild(name='pStyle', attrname='val') run_properties = XmlChild(type=RunProperties) paragraph_properties = XmlChild(type=ParagraphProperties) def is_bullet_format(self): return self.num_format == 'bullet' def format_is_none(self): if not self.num_format: return True return self.num_format.lower() == 'none'
class AbstractNum(XmlModel): XML_TAG = 'abstractNum' abstract_num_id = XmlAttribute(name='abstractNumId') name = XmlChild(attrname='val') levels = XmlCollection(Level) def __init__(self, **kwargs): super(AbstractNum, self).__init__(**kwargs) self._levels = {} for level in self.levels: self._levels[level.level_id] = level def get_level(self, level_id): return self._levels.get(level_id)
class FieldChar(XmlModel): XML_TAG = 'fldChar' _char_type = XmlAttribute(name='fldCharType') @property def char_type(self): if not self._char_type: return return self._char_type.lower() def is_type_begin(self): return self.char_type == 'begin' def is_type_separate(self): return self.char_type == 'separate' def is_type_end(self): return self.char_type == 'end'
class Document(XmlModel): XML_TAG = 'document' conformance = XmlAttribute(name='conformance') body = XmlChild(type=Body)
class OrangeModel(XmlModel): XML_TAG = 'orange' type = XmlAttribute(default='Organic')
class LevelOverride(XmlModel): XML_TAG = 'lvlOverride' level_id = XmlAttribute(name='ilvl') start_override = XmlChild(name='startOverride', attrname='val') level = XmlChild(type=Level)
class Blip(XmlModel): XML_TAG = 'blip' embedded_picture_id = XmlAttribute(name='embed') linked_picture_id = XmlAttribute(name='link')
class Transform2D(XmlModel): XML_TAG = 'xfrm' extents = XmlChild(type=Extents) rotate = XmlAttribute(name='rot', default=None)
class DocPr(XmlModel): XML_TAG = 'docPr' title = XmlAttribute(name='title') descr = XmlAttribute(name='descr')
class Extents(XmlModel): XML_TAG = 'ext' length = XmlAttribute(name='cx') width = XmlAttribute(name='cy')
class Bookmark(XmlModel): XML_TAG = 'bookmarkStart' name = XmlAttribute(name='name')
class AppleModel(XmlModel): XML_TAG = 'apple' type = XmlAttribute(default='Honey Crisp')