Python xpathの例、docxcompose.utils.xpath Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_properties.py プロジェクト: bicyclemicycle/docxcompose

    def test_complex_docprop_with_multiple_textnode_in_same_run_are_updated(
            self):
        document = Document(docx_path('two_textnodes_in_run_docproperty.docx'))
        paragraphs = xpath(document.element.body, '//w:p')
        assert len(paragraphs) == 1, 'input file contains one paragraph'
        assert len(xpath(document.element.body, '//w:instrText')) == 1, \
            'input contains one complex field docproperty'

        w_p = paragraphs[0]
        cached_values = cached_complex_field_values(w_p)
        assert len(cached_values) == 2, \
            'doc property value is scattered over 2 parts'
        assert ''.join(cached_values) == 'Hello there'

        CustomProperties(document).update_all()

        w_p = xpath(document.element.body, '//w:p')[0]
        cached_values = cached_complex_field_values(w_p)
        assert len(cached_values) == 1, \
            'doc property value has been reset to one cached value'
        assert cached_values[0] == 'i will be spllchecked!'

コード例 #2

0

ファイルを表示

 def add_diagrams(self, doc, element):
     # While waiting docxcompose 1.3.3
     dgm_rels = xpath(element, './/dgm:relIds[@r:dm]')
     for dgm_rel in dgm_rels:
         for item, rt_type in (('dm', RT.DIAGRAM_DATA), ('lo',
                                                         RT.DIAGRAM_LAYOUT),
                               ('qs', RT.DIAGRAM_QUICK_STYLE),
                               ('cs', RT.DIAGRAM_COLORS)):
             dm_rid = dgm_rel.get('{%s}%s' % (NS['r'], item))
             dm_part = doc.part.rels[dm_rid].target_part
             new_rid = self.doc.part.relate_to(dm_part, rt_type)
             dgm_rel.set('{%s}%s' % (NS['r'], item), new_rid)

コード例 #3

0

ファイルを表示

def test_complex_docprop_with_multiple_textnode_in_same_run_are_updated():
    document = Document(docx_path('two_textnodes_in_run_docproperty.docx'))
    paragraphs = xpath(document.element.body, '//w:p')
    assert 1 == len(paragraphs), 'input file contains one paragraph'
    assert 1 == len(xpath(document.element.body, '//w:instrText')), \
        'input contains one complex field docproperty'

    w_p = paragraphs[0]
    cached_value = xpath(w_p, XPATH_CACHED_DOCPROPERTY_VALUES)
    assert 2 == len(cached_value), \
        'doc property value is scattered over 2 parts'
    assert 'Hello there' == ''.join(
        each.text for each in cached_value)

    CustomProperties(document).update_all()

    w_p = xpath(document.element.body, '//w:p')[0]
    cached_value = xpath(w_p, XPATH_CACHED_DOCPROPERTY_VALUES)
    assert 1 == len(cached_value), \
        'doc property value has been reset to one cached value'
    assert 'i will be spllchecked!' == cached_value[0].text

コード例 #4

0

ファイルを表示

ファイル: properties.py プロジェクト: goerz/docxcompose

    def __setitem__(self, key, value):
        """Set the value of a property."""
        props = xpath(self._element, u'.//cp:property[@name="{}"]'.format(key))
        if not props:
            self.add(key, value)
            return

        value_el = props[0][0]
        new_value_el = value2vt(value)
        value_el.getparent().replace(value_el, new_value_el)

        self._update_part()

コード例 #5

0

ファイルを表示

ファイル: composer.py プロジェクト: Inujel/docxcompose

    def _next_numbering_ids(self):
        numbering_part = self.numbering_part()

        # Determine next unused numId (numbering starts with 1)
        current_num_ids = [
            n.numId for n in xpath(numbering_part.element, './/w:num')]
        if current_num_ids:
            next_num_id = max(current_num_ids) + 1
        else:
            next_num_id = 1

        # Determine next unused abstractNumId (numbering starts with 0)
        current_anum_ids = [
            int(n) for n in
            xpath(numbering_part.element, './/w:abstractNum/@w:abstractNumId')]
        if current_anum_ids:
            next_anum_id = max(current_anum_ids) + 1
        else:
            next_anum_id = 0

        return next_num_id, next_anum_id

コード例 #6

0

ファイルを表示

ファイル: composer.py プロジェクト: 4teamwork/docxcompose

    def add_shapes(self, doc, element):
        shapes = xpath(element, './/v:shape/v:imagedata')
        for shape in shapes:
            rid = shape.get('{%s}id' % NS['r'])
            img_part = doc.part.rels[rid].target_part

            new_img_part = self.pkg.image_parts._get_by_sha1(img_part.sha1)
            if new_img_part is None:
                image = ImageWrapper(img_part)
                new_img_part = self.pkg.image_parts._add_image_part(image)

            new_rid = self.doc.part.relate_to(new_img_part, RT.IMAGE)
            shape.set('{%s}id' % NS['r'], new_rid)

コード例 #7

0

ファイルを表示

 def _get_fieldname_string(self):
     """The field name can be split up in several instrText runs
     so we look for all the instrText nodes between the begin and either
     separate or end runs
     """
     separate_run = self.get_separate_run()
     last = (self.w_p.index(separate_run)
             if separate_run is not None else self.w_p.index(self.end_run))
     runs = [run for run in self._runs if self.w_p.index(run) < last]
     texts = []
     for run in runs:
         texts.extend(xpath(run, self.XPATH_TEXTS))
     return "".join([each.text for each in texts])

コード例 #8

0

ファイルを表示

ファイル: test_properties.py プロジェクト: lunasxk/docxcompose

    def test_multiple_identical_docprops_get_updated(self):
        document = Document(docx_path('multiple_identical_properties.docx'))
        assert 3 == len(document.paragraphs), 'input file should contain 3 paragraphs'
        for paragraph in document.paragraphs:
            assert 1 == len(xpath(paragraph._p, './/w:instrText')), \
                'paragraph should contain one complex field docproperties'

            assert u'Foo' == paragraph.text

        CustomProperties(document).update_all()

        for i, paragraph in enumerate(document.paragraphs):
            assert u'Bar' == paragraph.text, 'docprop {} was not updated'.format(i+1)

コード例 #9

0

ファイルを表示

    def get_separate_run(self):
        """The ooxml format standard says that the separate node is optional,
        so we check whether we find one in our complex field, otherwise
        we return None."""
        separates = xpath(self.w_r, self.XPATH_FOLLOWING_SEPARATES)
        if not separates:
            return None

        separate = separates[0]
        if not self.w_p.index(separate) < self.w_p.index(self.end_run):
            return None

        return separate

コード例 #10

0

ファイルを表示

ファイル: test_properties.py プロジェクト: goerz/docxcompose

    def test_removes_simple_field_but_keeps_value(self):
        document = Document(
            docx_path('outdated_docproperty_with_umlauts.docx'))
        assert 1 == len(
            document.paragraphs), 'input file should contain 1 paragraph'
        fields = xpath(
            document.element.body,
            u'.//w:fldSimple[contains(@w:instr, \'DOCPROPERTY "F\xfc\xfc"\')]//w:t'
        )
        assert 1 == len(fields), 'should contain one simple field docproperty'

        assert u'Hie chund ds property: ' == document.paragraphs[0].text
        assert u'xxx' == fields[0].text

        CustomProperties(document).dissolve_fields(u"F\xfc\xfc")
        fields = xpath(
            document.element.body,
            u'.//w:fldSimple[contains(@w:instr, \'DOCPROPERTY "F\xfc\xfc"\')]//w:t'
        )
        assert 0 == len(fields), 'should not contain any docproperties anymore'
        # when simple field is removed, the value is moved one up in the hierarchy
        assert u'Hie chund ds property: xxx' == document.paragraphs[0].text

コード例 #11

0

ファイルを表示

    def nullify(self, key):
        """Delete key for non text-properties, set key to empty string for
        text.
        """

        props = xpath(self._element, u'.//cp:property[@name="{}"]'.format(key))

        if not props:
            raise KeyError(key)

        if is_text_property(props[0][0]):
            self[key] = ''
        else:
            del self[key]

コード例 #12

0

ファイルを表示

    def add_images(self, doc, element):
        """Add images from the given document used in the given element."""
        blips = xpath(element, '(.//a:blip|.//asvg:svgBlip)[@r:embed]')
        for blip in blips:
            rid = blip.get('{%s}embed' % NS['r'])
            img_part = doc.part.rels[rid].target_part

            new_img_part = self.pkg.image_parts._get_by_sha1(img_part.sha1)
            if new_img_part is None:
                image = ImageWrapper(img_part)
                new_img_part = self.pkg.image_parts._add_image_part(image)

            new_rid = self.doc.part.relate_to(new_img_part, RT.IMAGE)
            blip.set('{%s}embed' % NS['r'], new_rid)

コード例 #13

0

ファイルを表示

ファイル: test_properties.py プロジェクト: bicyclemicycle/docxcompose

    def test_multiple_identical_docprops_get_updated(self):
        document = Document(docx_path('multiple_identical_properties.docx'))
        assert len(
            document.paragraphs) == 3, 'input file should contain 3 paragraphs'
        for paragraph in document.paragraphs:
            assert len(xpath(paragraph._p, './/w:instrText')) == 1, \
                'paragraph should contain one complex field docproperties'

            assert paragraph.text == u'Foo'

        CustomProperties(document).update("Text Property", "New value")

        for i, paragraph in enumerate(document.paragraphs):
            assert paragraph.text == u'New value',\
                'docprop {} was not updated'.format(i+1)

コード例 #14

0

ファイルを表示

    def __delitem__(self, key):
        """Delete a property."""
        props = xpath(self._element, u'.//cp:property[@name="{}"]'.format(key))

        if not props:
            raise KeyError(key)

        props[0].getparent().remove(props[0])
        # Renumber pids
        pid = MIN_PID
        for prop in self._element:
            prop.set('pid', text_type(pid))
            pid += 1

        self._update_part()

コード例 #15

0

ファイルを表示

ファイル: properties.py プロジェクト: iStar2021/docxcompose

    def add(self, name, value):
        """Add a property."""
        pids = [int(pid) for pid in xpath(self._element, u'.//cp:property/@pid')]
        if pids:
            pid = max(pids) + 1
        else:
            pid = MIN_PID
        prop = parse_xml('<cp:property xmlns:cp="{}"/>'.format(NS['cp']))
        prop.set('fmtid', CUSTOM_PROPERTY_FMTID)
        prop.set('name', name)
        prop.set('pid', text_type(pid))
        value_el = value2vt(value)
        prop.append(value_el)
        self._element.append(prop)

        self._update_part()

コード例 #16

0

ファイルを表示

    def update(self, name):
        """Update a property field value."""
        value = self.get(name)
        if isinstance(value, bool):
            value = u'Y' if value else u'N'
        elif isinstance(value, datetime):
            value = value.strftime('%x')
        else:
            value = text_type(value)

        # Simple field
        sfield = xpath(
            self.doc.element.body,
            u'.//w:fldSimple[contains(@w:instr, \'DOCPROPERTY "{}"\')]'.format(name))
        if sfield:
            text = xpath(sfield[0], './/w:t')
            if text:
                text[0].text = value

        # Complex field
        cfield = xpath(
            self.doc.element.body,
            u'.//w:instrText[contains(.,\'DOCPROPERTY "{}"\')]'.format(name))
        if cfield:
            w_p = cfield[0].getparent().getparent()
            runs = xpath(
                w_p,
                u'.//w:r[following-sibling::w:r/w:fldChar/@w:fldCharType="end"'
                u' and preceding-sibling::w:r/w:fldChar/@w:fldCharType="separate"]')
            if runs:
                first_w_r = runs[0]
                text = xpath(first_w_r, u'.//w:t')
                if text:
                    text[0].text = value
                # remove any additional text-nodes inside the first run. we
                # update the first text-node only with the full cached
                # docproperty value. if for some reason the initial cached
                # value is split into multiple text nodes we remove any
                # additional node after updating the first node.
                for unnecessary_w_t in text[1:]:
                    first_w_r.remove(unnecessary_w_t)

                # if there are multiple runs between "separate" and "end" they
                # all may contain a piece of the cached docproperty value. we
                # can't reliably handle this situation and only update the
                # first node in the first run with the full cached value. it
                # appears any additional runs with text nodes should then be
                # removed to avoid duplicating parts of the cached docproperty
                # value.
                for w_r in runs[1:]:
                    text = xpath(w_r, u'.//w:t')
                    if text:
                        w_p.remove(w_r)

コード例 #17

0

ファイルを表示

 def get(self, name):
     """Get the value of a property."""
     prop = xpath(
         self._element,
         u'.//cp:property[@name="{}"]'.format(name))
     if prop:
         value = list(prop[0])[0]
         if value.tag.endswith(u'}lpwstr'):
             return value.text
         elif value.tag.endswith(u'}i4'):
             return int(value.text)
         elif value.tag.endswith(u'}bool'):
             if value.text.lower() == u'true':
                 return True
             else:
                 return False
         elif value.tag.endswith(u'}filetime'):
             return CT_CoreProperties._parse_W3CDTF_to_datetime(value.text)

コード例 #18

0

ファイルを表示

ファイル: test_properties.py プロジェクト: lunasxk/docxcompose

    def test_complex_field_gets_updated(self):
        document = Document(docx_path('docproperties.docx'))
        assert 6 == len(document.paragraphs), 'input file should contain 6 paragraphs'

        properties = xpath(document.element.body, './/w:instrText')
        assert 5 == len(properties),\
            'input should contain five complex field docproperties'

        expected_paragraphs = [u'Custom Doc Properties',
                               u'Text: Foo Bar',
                               u'Number: 123',
                               u'Boolean: Y',
                               u'Date: 11.06.2019',
                               u'Float: 1.1']
        actual_paragraphs = [paragraph.text for paragraph in document.paragraphs]
        assert actual_paragraphs == expected_paragraphs

        CustomProperties(document).update("Number Property", 423)

        expected_paragraphs[2] = u'Number: 423'
        actual_paragraphs = [paragraph.text for paragraph in document.paragraphs]
        assert actual_paragraphs == expected_paragraphs

コード例 #19

0

ファイルを表示

ファイル: composer.py プロジェクト: 4teamwork/docxcompose

    def add_images(self, doc, element):
        """Add images from the given document used in the given element."""
        blips = xpath(element, '(.//a:blip|.//asvg:svgBlip)[@r:embed]')
        for blip in blips:
            rid = blip.get('{%s}embed' % NS['r'])
            img_part = doc.part.rels[rid].target_part

            new_img_part = self.pkg.image_parts._get_by_sha1(img_part.sha1)
            if new_img_part is None:
                image = ImageWrapper(img_part)
                new_img_part = self.pkg.image_parts._add_image_part(image)

            new_rid = self.doc.part.relate_to(new_img_part, RT.IMAGE)
            blip.set('{%s}embed' % NS['r'], new_rid)

            # handle external reference as images can be embedded and have an
            # external reference
            rid = blip.get('{%s}link' % NS['r'])
            if rid:
                rel = doc.part.rels[rid]
                new_rel = self.add_relationship(None, self.doc.part, rel)
                blip.set('{%s}link' % NS['r'], new_rel.rId)

コード例 #20

0

ファイルを表示

    def values(self):
        if self._element is None:
            return []

        props = xpath(self._element, u'.//cp:property')
        return [vt2value(prop[0]) for prop in props]

コード例 #21

0

ファイルを表示

    def keys(self):
        if self._element is None:
            return []

        props = xpath(self._element, u'.//cp:property')
        return [prop.get('name') for prop in props]

コード例 #22

0

ファイルを表示

ファイル: docx_B.py プロジェクト: GingerNg/P_Stack

    src_func(key5)

    composer = MyComposer(Document())
    # composer.append(Document(root_path+""+".docx"))
    composer.replace(Document(root_path + "/公有云服务介绍" + ".docx"))
    composer.replace(Document(root_path + "/机器预填【部分】" + ".docx"))

    dest_file_path = root_path + "/机器预填【部分】.docx"
    docx_obj = Document(dest_file_path)
    gindex = 1
    for element in docx_obj.element.body:
        if isinstance(element, CT_SectPr):
            continue
        else:
            # print(type(element))
            bookmarks_start = xpath(element, './/w:sdt')
            if bookmarks_start:
                children = element.getchildren()
                for child in children:
                    if child.tag == "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}alias":
                        # index = int(bookmarks_start[0].get(
                        #     "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}"))
                        key = bookmarks_start[0].get(
                            "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val"
                        )
                        print(key)
                        src_ee = srcs.get(key)
                        if src_ee is not None:
                            element.remove(child)
                            # bookmarks_start = [src_element]
                            src_ees = srcs.get(key)

コード例 #23

0

ファイルを表示

    def items(self):
        if self._element is None:
            return []

        props = xpath(self._element, u'.//cp:property')
        return [(prop.get('name'), vt2value(prop[0])) for prop in props]

コード例 #24

0

ファイルを表示

ファイル: sdt.py プロジェクト: uonxhou/docxcompose

 def tags_by_alias(self, alias):
     """Get Structured Document Tags by alias."""
     return xpath(
         self.doc.element.body,
         './/w:sdt/w:sdtPr/w:alias[@w:val="%s"]/ancestor::w:sdt' % alias)

コード例 #25

0

ファイルを表示

 def update(self, value):
     text = xpath(self.node, './/w:t')
     if text:
         text[0].text = self._format_value(value)

コード例 #26

0

ファイルを表示

ファイル: composer.py プロジェクト: 4teamwork/docxcompose

    def restart_first_numbering(self, doc, element):
        if not self.restart_numbering:
            return
        style_id = xpath(element, './/w:pStyle/@w:val')
        if not style_id:
            return
        style_id = style_id[0]
        if style_id in self._numbering_restarted:
            return
        style_element = self.doc.styles.element.get_by_id(style_id)
        if style_element is None:
            return
        outline_lvl = xpath(style_element, './/w:outlineLvl')
        if outline_lvl:
            # Styles with an outline level are probably headings.
            # Do not restart numbering of headings
            return

        # if there is a numId referenced from the paragraph, that numId is
        # relevant, otherwise fall back to the style's numId
        local_num_id = xpath(element, './/w:numPr/w:numId/@w:val')
        if local_num_id:
            num_id = local_num_id[0]
        else:
            style_num_id = xpath(style_element, './/w:numId/@w:val')
            if not style_num_id:
                return
            num_id = style_num_id[0]

        numbering_part = self.numbering_part()
        num_element = xpath(numbering_part.element,
                            './/w:num[@w:numId="%s"]' % num_id)

        if not num_element:
            # Styles with no numbering element should not be processed
            return

        anum_id = xpath(num_element[0], './/w:abstractNumId/@w:val')[0]
        anum_element = xpath(
            numbering_part.element,
            './/w:abstractNum[@w:abstractNumId="%s"]' % anum_id)
        num_fmt = xpath(anum_element[0],
                        './/w:lvl[@w:ilvl="0"]/w:numFmt/@w:val')
        # Do not restart numbering of bullets
        if num_fmt and num_fmt[0] == 'bullet':
            return

        new_num_element = deepcopy(num_element[0])
        lvl_override = parse_xml(
            '<w:lvlOverride xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"'
            ' w:ilvl="0"><w:startOverride w:val="1"/></w:lvlOverride>')
        new_num_element.append(lvl_override)
        next_num_id, next_anum_id = self._next_numbering_ids()
        new_num_element.numId = next_num_id
        self._insert_num(new_num_element)

        paragraph_props = xpath(
            element, './/w:pPr/w:pStyle[@w:val="%s"]/parent::w:pPr' % style_id)
        num_pr = xpath(paragraph_props[0], './/w:numPr')
        if num_pr:
            num_pr = num_pr[0]
            previous_num_id = num_pr.numId.val
            self._replace_mapped_num_id(previous_num_id, next_num_id)
            num_pr.numId.val = next_num_id
        else:
            num_pr = parse_xml(
                '<w:numPr xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">'
                '<w:ilvl w:val="0"/><w:numId w:val="%s"/></w:numPr>' %
                next_num_id)
            paragraph_props[0].append(num_pr)
        self._numbering_restarted.add(style_id)

コード例 #27

0

ファイルを表示

 def begin_run(self):
     begins = xpath(self.w_r, self.XPATH_PRECEDING_BEGINS)
     if not begins:
         msg = "Complex field without begin node is not supported"
         raise InvalidComplexField(msg)
     return begins[-1]

コード例 #28

0

ファイルを表示

ファイル: composer.py プロジェクト: 4teamwork/docxcompose

 def remove_header_and_footer_references(self, doc, element):
     refs = xpath(element, './/w:headerReference|.//w:footerReference')
     for ref in refs:
         ref.getparent().remove(ref)

コード例 #29

0

ファイルを表示

 def _runs(self):
     return xpath(self.begin_run, "./following-sibling::w:r")

コード例 #30

0

ファイルを表示

ファイル: composer.py プロジェクト: 4teamwork/docxcompose

    def add_styles(self, doc, element):
        """Add styles from the given document used in the given element."""
        our_style_ids = [s.style_id for s in self.doc.styles]
        # de-duplicate ids and keep order to make sure tests are not flaky
        used_style_ids = list(
            OrderedDict.fromkeys([
                e.val for e in xpath(element,
                                     './/w:tblStyle|.//w:pStyle|.//w:rStyle')
            ]))

        for style_id in used_style_ids:
            our_style_id = self.mapped_style_id(style_id)
            if our_style_id not in our_style_ids:
                style_element = deepcopy(
                    doc.styles.element.get_by_id(style_id))
                self.doc.styles.element.append(style_element)
                self.add_numberings(doc, style_element)
                # Also add linked styles
                linked_style_ids = xpath(style_element, './/w:link/@w:val')
                if linked_style_ids:
                    linked_style_id = linked_style_ids[0]
                    our_linked_style_id = self.mapped_style_id(linked_style_id)
                    if our_linked_style_id not in our_style_ids:
                        our_linked_style = doc.styles.element.get_by_id(
                            linked_style_id)
                        self.doc.styles.element.append(
                            deepcopy(our_linked_style))
            else:
                # Create a mapping for abstractNumIds used in existing styles
                # This is used when adding numberings to avoid having multiple
                # <w:abstractNum> elements for the same style.
                style_element = doc.styles.element.get_by_id(style_id)
                if style_element is not None:
                    num_ids = xpath(style_element, './/w:numId/@w:val')
                    if num_ids:
                        anum_ids = xpath(
                            doc.part.numbering_part.element,
                            './/w:num[@w:numId="%s"]/w:abstractNumId/@w:val' %
                            num_ids[0])
                        if anum_ids:
                            our_style_element = self.doc.styles.element.get_by_id(
                                our_style_id)
                            our_num_ids = xpath(our_style_element,
                                                './/w:numId/@w:val')
                            if our_num_ids:
                                numbering_part = self.numbering_part()
                                our_anum_ids = xpath(
                                    numbering_part.element,
                                    './/w:num[@w:numId="%s"]/w:abstractNumId/@w:val'
                                    % our_num_ids[0])
                                if our_anum_ids:
                                    self.anum_id_mapping[int(
                                        anum_ids[0])] = int(our_anum_ids[0])

            # Replace language-specific style id with our style id
            if our_style_id != style_id and our_style_id is not None:
                style_elements = xpath(
                    element, './/w:tblStyle[@w:val="%(styleid)s"]|'
                    './/w:pStyle[@w:val="%(styleid)s"]|'
                    './/w:rStyle[@w:val="%(styleid)s"]' %
                    dict(styleid=style_id))
                for el in style_elements:
                    el.val = our_style_id
            # Update our style ids
            our_style_ids = [s.style_id for s in self.doc.styles]