Python P Examples

Programming Language: Python

Namespace/Package Name: lxml.html.builder

Class/Type: P

Examples at hotexamples.com: 5

Python P - 5 examples found. These are the top rated real world Python examples of lxml.html.builder.P extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

P(5)

Frequently Used Methods

P (5)

Example #1

Show file

def clean_stars(div: HtmlElement) -> None:
    for e in elements(div, "p[strong[em]]"):
        e.drop_tree()
    for e in elements(div, ".//hr"):
        e.drop_tree()

    div.insert(0, H1("Unspeakable Desolation Pouring Down From the Stars"))
    e = element(div, "./p[1]")
    h2 = H2(e.text_content().title())
    replace(e, h2)

    e = element(div, "./p[strong[a]]")
    a = element(div, "./p/strong/a")
    p = P(
        CLASS("breakabove"),
        A(e.text_content(), CLASS("internal"), href=a.attrib["href"]),
    )
    replace(e, p)

Example #2

Show file

File: to_html.py Project: yesplease/calibre

    def convert_p(self, p):
        dest = P()
        self.object_map[dest] = p
        style = self.styles.resolve_paragraph(p)
        self.layers[p] = []
        self.frame_map[p] = style.frame
        self.add_frame(dest, style.frame)

        current_anchor = None
        current_hyperlink = None
        hl_xpath = self.namespace.XPath('ancestor::w:hyperlink[1]')

        def p_parent(x):
            # Ensure that nested <w:p> tags are handled. These can occur if a
            # textbox is present inside a paragraph.
            while True:
                x = x.getparent()
                try:
                    if x.tag.endswith('}p'):
                        return x
                except AttributeError:
                    break

        for x in self.namespace.descendants(p, 'w:r', 'w:bookmarkStart', 'w:hyperlink', 'w:instrText'):
            if p_parent(x) is not p:
                continue
            if x.tag.endswith('}r'):
                span = self.convert_run(x)
                if current_anchor is not None:
                    (dest if len(dest) == 0 else span).set('id', current_anchor)
                    current_anchor = None
                if current_hyperlink is not None:
                    try:
                        hl = hl_xpath(x)[0]
                        self.link_map[hl].append(span)
                        self.link_source_map[hl] = self.current_rels
                        x.set('is-link', '1')
                    except IndexError:
                        current_hyperlink = None
                dest.append(span)
                self.layers[p].append(x)
            elif x.tag.endswith('}bookmarkStart'):
                anchor = self.namespace.get(x, 'w:name')
                if anchor and anchor not in self.anchor_map and anchor != '_GoBack':
                    # _GoBack is a special bookmark inserted by Word 2010 for
                    # the return to previous edit feature, we ignore it
                    old_anchor = current_anchor
                    self.anchor_map[anchor] = current_anchor = generate_anchor(anchor, frozenset(itervalues(self.anchor_map)))
                    if old_anchor is not None:
                        # The previous anchor was not applied to any element
                        for a, t in tuple(iteritems(self.anchor_map)):
                            if t == old_anchor:
                                self.anchor_map[a] = current_anchor
            elif x.tag.endswith('}hyperlink'):
                current_hyperlink = x
            elif x.tag.endswith('}instrText') and x.text and x.text.strip().startswith('TOC '):
                old_anchor = current_anchor
                anchor = unicode_type(uuid.uuid4())
                self.anchor_map[anchor] = current_anchor = generate_anchor('toc', frozenset(itervalues(self.anchor_map)))
                self.toc_anchor = current_anchor
                if old_anchor is not None:
                    # The previous anchor was not applied to any element
                    for a, t in tuple(iteritems(self.anchor_map)):
                        if t == old_anchor:
                            self.anchor_map[a] = current_anchor
        if current_anchor is not None:
            # This paragraph had no <w:r> descendants
            dest.set('id', current_anchor)
            current_anchor = None

        m = re.match(r'heading\s+(\d+)$', style.style_name or '', re.IGNORECASE)
        if m is not None:
            n = min(6, max(1, int(m.group(1))))
            dest.tag = 'h%d' % n

        if style.bidi is True:
            dest.set('dir', 'rtl')

        border_runs = []
        common_borders = []
        for span in dest:
            run = self.object_map[span]
            style = self.styles.resolve_run(run)
            if not border_runs or border_runs[-1][1].same_border(style):
                border_runs.append((span, style))
            elif border_runs:
                if len(border_runs) > 1:
                    common_borders.append(border_runs)
                border_runs = []

        for border_run in common_borders:
            spans = []
            bs = {}
            for span, style in border_run:
                style.get_border_css(bs)
                style.clear_border_css()
                spans.append(span)
            if bs:
                cls = self.styles.register(bs, 'text_border')
                wrapper = self.wrap_elems(spans, SPAN())
                wrapper.set('class', cls)

        if not dest.text and len(dest) == 0 and not style.has_visible_border():
            # Empty paragraph add a non-breaking space so that it is rendered
            # by WebKit
            dest.text = NBSP

        # If the last element in a block is a <br> the <br> is not rendered in
        # HTML, unless it is followed by a trailing space. Word, on the other
        # hand inserts a blank line for trailing <br>s.
        if len(dest) > 0 and not dest[-1].tail:
            if dest[-1].tag == 'br':
                dest[-1].tail = NBSP
            elif len(dest[-1]) > 0 and dest[-1][-1].tag == 'br' and not dest[-1][-1].tail:
                dest[-1][-1].tail = NBSP

        return dest

Example #3

Show file

    def convert_p(self, p):
        dest = P()
        self.object_map[dest] = p
        style = self.styles.resolve_paragraph(p)
        self.layers[p] = []
        self.add_frame(dest, style.frame)

        current_anchor = None
        current_hyperlink = None

        for x in descendants(p, 'w:r', 'w:bookmarkStart', 'w:hyperlink'):
            if x.tag.endswith('}r'):
                span = self.convert_run(x)
                if current_anchor is not None:
                    (dest if len(dest) == 0 else span).set(
                        'id', current_anchor)
                    current_anchor = None
                if current_hyperlink is not None:
                    hl = ancestor(x, 'w:hyperlink')
                    if hl is not None:
                        self.link_map[hl].append(span)
                    else:
                        current_hyperlink = None
                dest.append(span)
                self.layers[p].append(x)
            elif x.tag.endswith('}bookmarkStart'):
                anchor = get(x, 'w:name')
                if anchor and anchor not in self.anchor_map:
                    self.anchor_map[anchor] = current_anchor = generate_anchor(
                        anchor, frozenset(self.anchor_map.itervalues()))
            elif x.tag.endswith('}hyperlink'):
                current_hyperlink = x

        m = re.match(r'heading\s+(\d+)$', style.style_name or '',
                     re.IGNORECASE)
        if m is not None:
            n = min(6, max(1, int(m.group(1))))
            dest.tag = 'h%d' % n

        if style.direction == 'rtl':
            dest.set('dir', 'rtl')

        border_runs = []
        common_borders = []
        for span in dest:
            run = self.object_map[span]
            style = self.styles.resolve_run(run)
            if not border_runs or border_runs[-1][1].same_border(style):
                border_runs.append((span, style))
            elif border_runs:
                if len(border_runs) > 1:
                    common_borders.append(border_runs)
                border_runs = []

        for border_run in common_borders:
            spans = []
            bs = {}
            for span, style in border_run:
                style.get_border_css(bs)
                style.clear_border_css()
                spans.append(span)
            if bs:
                cls = self.styles.register(bs, 'text_border')
                wrapper = self.wrap_elems(spans, SPAN())
                wrapper.set('class', cls)

        return dest

Example #4

Show file

File: importxml.py Project: hanyu1986/editorsnotes

    def handle_filemaker(self, root, **options):

        # Prefix to append to import IDs.
        ID_PREFIX = 'inglis:'

        # Load user to create/update documents as.
        try:
            user = User.objects.get(username=options['username'])
        except User.DoesNotExist:
            raise CommandError('unknown user: %s' % options['username'])

        # Verify that we have the fields we expect.
        f = expected_fields = [
            'CardID', 'Export', 'CardHeading', 'Transcription', 'CardType',
            'CardFormat', 'Language', 'ContributorJoin::Contributors',
            'SubjectJoin::Subject', 'OrganizationJoin::Organizations',
            'Citation', 'CatalogLink', 'ResourceLink', 'AdditionalNotes',
            'DateEntered'
        ]
        fields = root.xpath('./fmp:METADATA/fmp:FIELD/@NAME', namespaces=NS)
        if not fields == expected_fields:
            new_fields = [f for f in fields if not f in expected_fields]
            missing_fields = [f for f in expected_fields if not f in fields]
            message = 'fields have changed:\n'
            message += '\n'.join(ndiff(expected_fields, fields)) + '\n\n'
            if missing_fields:
                message += ('Missing fields:\n  ' +
                            '\n  '.join(missing_fields))
            if new_fields:
                message += ('Unexpected fields:\n  ' + '\n  '.join(new_fields))
            raise CommandError(message)

        # Utility functions for accessing XML data.
        def text(e):
            text = e.text or ''
            for child in e:
                if not child.tag == '{%s}BR' % NS['fmp']:
                    raise CommandError('Unexpected element: %s' % child)
                text += ('\n%s' % (child.tail or ''))
            return text.strip()

        def values(row, field):
            return list(
                set([v for v in [text(e) for e in row[f.index(field)]] if v]))

        def value(row, field):
            v = values(row, field)
            if len(v) == 0:
                return None
            elif len(v) == 1:
                return v[0]
            else:
                raise CommandError('multiple values for %s in record %s' %
                                   (field, row.get('RECORDID')))

        def row_to_dict(row):
            d = {}
            for field in f:
                if 'Join::' in field:
                    d[field] = values(row, field)
                else:
                    d[field] = value(row, field)
            return d

        # Statistics.
        created_count = collections_created_count = topics_created_count = changed_count = unchanged_count = skipped_count = deleted_count = 0

        for row in root.xpath('./fmp:RESULTSET/fmp:ROW', namespaces=NS):
            try:
                md = row_to_dict(row)
                for field in [
                        'CardID', 'CardHeading', 'CardType', 'Transcription'
                ]:
                    if md[field] is None:
                        raise CommandError('missing %s value in record %s' %
                                           (field, row.get('RECORDID')))
            except CommandError as e:
                self.stderr.write(self.style.ERROR('Warning: %s\n' % e))
                skipped_count += 1
                continue

            if md['Export'] == 'No':
                exists = Document.objects.filter(import_id__exact='%s%s' %
                                                 (ID_PREFIX, md['CardID']))
                if exists:
                    exists[0].delete()
                    deleted_count += 1
                    continue
                else:
                    skipped_count += 1
                    continue

            collection_id = ID_PREFIX + (':%s' % md['CardHeading'])
            collection_description = P('%s (Agnes Inglis cards)' %
                                       md['CardHeading'])
            collection, collection_created = Document.objects.get_or_create(
                import_id=collection_id,
                defaults={
                    'description': collection_description,
                    'creator': user,
                    'last_updater': user
                })
            if collection_created:
                collections_created_count += 1

            description = P('%s -- %s (Agnes Inglis card #%s)' %
                            (md['CardHeading'], md['CardType'], md['CardID']))
            document, created = Document.objects.get_or_create(
                import_id=(ID_PREFIX + md['CardID']),
                defaults={
                    'description': description,
                    'creator': user,
                    'last_updater': user
                })
            document.description = description
            document.collection = collection
            document.language = md['Language']
            document.save()

            # Set document topics.
            for topic_assignment in document.related_topics.all():
                topic_assignment.delete()

            def assign_topic(document, user, topic_name, topic_type=''):
                topic, topic_created = Topic.objects.get_or_create(
                    slug=Topic.make_slug(topic_name),
                    defaults={
                        'preferred_name': topic_name,
                        'creator': user,
                        'last_updater': user
                    })
                topic.type = topic_type
                topic.save()
                TopicAssignment.objects.create(content_object=document,
                                               topic=topic,
                                               creator=user)
                if topic_created:
                    return 1
                else:
                    return 0

            for topic_name in md['ContributorJoin::Contributors']:
                topics_created_count += assign_topic(document, user,
                                                     topic_name, 'PER')
            for topic_name in md['OrganizationJoin::Organizations']:
                topics_created_count += assign_topic(document, user,
                                                     topic_name, 'ORG')
            for topic_name in md['SubjectJoin::Subject']:
                topics_created_count += assign_topic(document, user,
                                                     topic_name)

            # Set document links.
            for link in document.links.all():
                link.delete()
            for url in [md['CatalogLink'], md['ResourceLink']]:
                if url is not None:
                    document.links.create(url=url, creator=user)

            # Set document metadata.
            changed = document.set_metadata(md, user)

            # Create or update document transcript.
            transcript_html = P(*list(
                chain.from_iterable((
                    (line, BR())
                    for line in md['Transcription'].split('\n'))))[:-1])
            if created:
                Transcript.objects.create(document=document,
                                          content=transcript_html,
                                          creator=user,
                                          last_updater=user)
                created_count += 1
            elif changed or options['force_update']:
                document.transcript.content = transcript_html
                document.transcript.last_updater = user
                document.transcript.save()
                document.last_updater = user
                document.save()
                changed_count += 1
            else:
                unchanged_count += 1

        self.stderr.write('%s records skipped.\n' % skipped_count)
        self.stderr.write('%s records deleted.\n' % deleted_count)
        self.stderr.write('%s new documents created.\n' % created_count)
        self.stderr.write('%s new collections created.\n' %
                          collections_created_count)
        self.stderr.write('%s new topics created.\n' % topics_created_count)
        self.stderr.write('%s documents updated.\n' % changed_count)
        self.stderr.write('%s documents unchanged.\n' % unchanged_count)

Example #5

Show file

File: to_html.py Project: kmshi/calibre

    def convert_p(self, p):
        dest = P()
        self.object_map[dest] = p
        style = self.styles.resolve_paragraph(p)
        self.layers[p] = []
        self.add_frame(dest, style.frame)

        current_anchor = None
        current_hyperlink = None
        hl_xpath = XPath('ancestor::w:hyperlink[1]')

        for x in descendants(p, 'w:r', 'w:bookmarkStart', 'w:hyperlink'):
            if x.tag.endswith('}r'):
                span = self.convert_run(x)
                if current_anchor is not None:
                    (dest if len(dest) == 0 else span).set(
                        'id', current_anchor)
                    current_anchor = None
                if current_hyperlink is not None:
                    try:
                        hl = hl_xpath(x)[0]
                        self.link_map[hl].append(span)
                        self.link_source_map[hl] = self.current_rels
                        x.set('is-link', '1')
                    except IndexError:
                        current_hyperlink = None
                dest.append(span)
                self.layers[p].append(x)
            elif x.tag.endswith('}bookmarkStart'):
                anchor = get(x, 'w:name')
                if anchor and anchor not in self.anchor_map:
                    old_anchor = current_anchor
                    self.anchor_map[anchor] = current_anchor = generate_anchor(
                        anchor, frozenset(self.anchor_map.itervalues()))
                    if old_anchor is not None:
                        # The previous anchor was not applied to any element
                        for a, t in tuple(self.anchor_map.iteritems()):
                            if t == old_anchor:
                                self.anchor_map[a] = current_anchor
            elif x.tag.endswith('}hyperlink'):
                current_hyperlink = x

        m = re.match(r'heading\s+(\d+)$', style.style_name or '',
                     re.IGNORECASE)
        if m is not None:
            n = min(6, max(1, int(m.group(1))))
            dest.tag = 'h%d' % n

        if style.direction == 'rtl':
            dest.set('dir', 'rtl')

        border_runs = []
        common_borders = []
        for span in dest:
            run = self.object_map[span]
            style = self.styles.resolve_run(run)
            if not border_runs or border_runs[-1][1].same_border(style):
                border_runs.append((span, style))
            elif border_runs:
                if len(border_runs) > 1:
                    common_borders.append(border_runs)
                border_runs = []

        for border_run in common_borders:
            spans = []
            bs = {}
            for span, style in border_run:
                style.get_border_css(bs)
                style.clear_border_css()
                spans.append(span)
            if bs:
                cls = self.styles.register(bs, 'text_border')
                wrapper = self.wrap_elems(spans, SPAN())
                wrapper.set('class', cls)

        if not dest.text and len(dest) == 0:
            # Empty paragraph add a non-breaking space so that it is rendered
            # by WebKit
            dest.text = NBSP

        # If the last element in a block is a <br> the <br> is not rendered in
        # HTML, unless it is followed by a trailing space. Word, on the other
        # hand inserts a blank line for trailing <br>s.
        if len(dest) > 0 and not dest[-1].tail:
            if dest[-1].tag == 'br':
                dest[-1].tail = NBSP
            elif len(dest[-1]) > 0 and dest[-1][
                    -1].tag == 'br' and not dest[-1][-1].tail:
                dest[-1][-1].tail = NBSP

        return dest