Exemple #1
0
def add_extracted_styles(wk, content_xml, styles_xml):
    # Round up all the styles that exist
    styles = {}
    category_map = {"office:styles":"named", "office:automatic-styles":"automatic"}
    for xml in [content_xml, styles_xml]:
        for category in category_map:
            for style_block in xml.getElementsByTagName(category):
                for s in [x for x in style_block.getElementsByTagName("*") if x.hasAttribute("style:name")]:
                    name = s.getAttribute("style:name")
                    if not name in styles:
                        display_name = s.getAttribute("style:display-name")
                        styleRecord = Style(parent=wk, workspace=wk, name=name, display_name=(display_name or name), category=category_map[category])

                        for p_prop in s.getElementsByTagName("style:paragraph-properties"):
                            styleRecord.border_top = p_prop.hasAttribute("fo:border-top") and p_prop.getAttribute("fo:border-top") != "none"
                            styleRecord.border_bottom = p_prop.hasAttribute("fo:border-bottom") and p_prop.getAttribute("fo:border-bottom") != "none"
                            if p_prop.hasAttribute("fo:margin-left"):
                                match = re.match("\d+", p_prop.getAttribute("fo:margin-left"))
                                if match is not None:
                                    styleRecord.margin_left = match.group(0) and int(match.group(0)) > 0
                        for t_prop in s.getElementsByTagName("style:text-properties"):
                            if t_prop.hasAttribute("fo:font-weight"):
                                styleRecord.font_weight = t_prop.getAttribute("fo:font-weight")
                            if t_prop.hasAttribute("fo:font-style"):
                                styleRecord.font_style = t_prop.getAttribute("fo:font-style")
                            if t_prop.hasAttribute("fo:font-size"):
                                match = re.match(r"(\d+)([a-zA-Z]+)?", t_prop.getAttribute("fo:font-size"))
                                if match:
                                    size = float(match.group(1))
                                    units = match.group(2)
                                    if units == "in": size = size * 72
                                    if units == "cm": size = size * 28
                                    styleRecord.font_size = int(size)
                            if t_prop.hasAttribute("text-position"):
                                styleRecord.text_position = t_prop.getAttribute("text-position")
                            if t_prop.hasAttribute("style:text-underline-style"):
                                styleRecord.text_underline_style = t_prop.getAttribute("style:text-underline-style")
                        styleRecord.list_style_bullet = len(s.getElementsByTagName("text:list-level-style-bullet")) > 0
                        styleRecord.list_style_number = len(s.getElementsByTagName("text:list-level-style-number")) > 0
                        styles[name] = styleRecord        

    styles_in_use = {}
    # Only create the ones that are used
    # for body in content_xml.getElementsByTagName("office:body"):
    #     for el in body.getElementsByTagName("office:text"):
    for el in content_xml.getElementsByTagName("*"):
        if el.hasAttribute("text:style-name"):
            name = el.getAttribute("text:style-name")
            if name in styles and name not in styles_in_use:
                styles_in_use[name] = styles[name]

    db.put([styles_in_use[x] for x in styles_in_use])