Example #1
0
def parse_html_lxml(html):
    'parses a logfile with lxml'

    messages = []

    doc = lxml.html.document_fromstring(html, parser = lxmlparser())
    for div in doc.xpath('//html/body/div'):
        try:
            message_type = div.attrib.get('class', '')
            if not 'message' in message_type:
                continue

            message_type = message_type.replace('message', '').strip()
            if not message_type in ('incoming', 'outgoing'):
                continue

            buddyname = div.find_class('buddy')[0].text
            timestamp = div.attrib.get('timestamp')
            if timestamp is not None:
                timestamp = parse_timestamp(timestamp)
            message = render_contents(div.find_class('msgcontent')[0])
            auto = boolify(div.attrib.get('auto', 'false'))
        except Exception:
            print_exc()
        else:
            messages.append(Message(buddy = S(name=buddyname),
                                    timestamp = timestamp,
                                    message = message,
                                    type = message_type,
                                    auto = auto,
                                    has_autotext = auto,
                                    ))

    return messages
Example #2
0
    def from_xml(self, x):
        atomget = lambda k, default=None: getattr(
            x, '{%s}%s' % (constants.NS.Atom, k), default)
        id = unicode(atomget('id') or '')
        title = unicode(atomget('title') or '')

        author_tag = atomget('author')
        if author_tag is None:
            author_name = author_id = author_url = author_tag
        else:
            author_name = unicode(author_tag.name)
            author_id = author_url = unicode(author_tag.uri)

        source_tag = atomget('source')
        if source_tag is None:
            source = None
        else:
            source = ActivitySource()
            source.populate(source_tag, InputType.XML)

        icon_url = None
        preview_url = None
        url = None

        links = atomget('link', [])
        for link in links:
            rel = link.attrib.get('rel')
            if rel == 'icon':
                icon_url = unicode(link.attrib.get('href'))

            if rel == 'preview':
                preview_url = unicode(link.attrib.get('href'))

            if rel == 'alternate' and url is None:
                url = unicode(link.attrib.get('href'))

        contents = []
        for content in atomget('content', []):
            contents.append((unicode(content.attrib.get('type')),
                             htmlutils.render_contents(content)))

        self.__dict__.update(
            id=id,
            title=title,
            author_id=author_id,
            author_url=author_url,
            author_name=author_name,
            source=source,
            url=url,
            icon_url=icon_url,
            preview_url=preview_url,
            contents=contents,
        )
Example #3
0
    def from_xml(self, x):
        atomget = lambda k, default=None: getattr(x, "{%s}%s" % (constants.NS.Atom, k), default)
        id = unicode(atomget("id") or "")
        title = unicode(atomget("title") or "")

        author_tag = atomget("author")
        if author_tag is None:
            author_name = author_id = author_url = author_tag
        else:
            author_name = unicode(author_tag.name)
            author_id = author_url = unicode(author_tag.uri)

        source_tag = atomget("source")
        if source_tag is None:
            source = None
        else:
            source = ActivitySource()
            source.populate(source_tag, InputType.XML)

        icon_url = None
        preview_url = None
        url = None

        links = atomget("link", [])
        for link in links:
            rel = link.attrib.get("rel")
            if rel == "icon":
                icon_url = unicode(link.attrib.get("href"))

            if rel == "preview":
                preview_url = unicode(link.attrib.get("href"))

            if rel == "alternate" and url is None:
                url = unicode(link.attrib.get("href"))

        contents = []
        for content in atomget("content", []):
            contents.append((unicode(content.attrib.get("type")), htmlutils.render_contents(content)))

        self.__dict__.update(
            id=id,
            title=title,
            author_id=author_id,
            author_url=author_url,
            author_name=author_name,
            source=source,
            url=url,
            icon_url=icon_url,
            preview_url=preview_url,
            contents=contents,
        )
Example #4
0
def parse_html_lxml(html):
    'parses a logfile with lxml'

    messages = []

    doc = lxml.html.document_fromstring(html, parser=lxmlparser())
    for div in doc.xpath('//html/body/div'):
        try:
            message_type = div.attrib.get('class', '')
            if not 'message' in message_type:
                continue

            message_type = message_type.replace('message', '').strip()
            if not message_type in ('incoming', 'outgoing'):
                continue

            buddyname = div.find_class('buddy')[0].text
            timestamp = div.attrib.get('timestamp')
            if timestamp is not None:
                timestamp = parse_timestamp(timestamp)
            message = render_contents(div.find_class('msgcontent')[0])
            auto = boolify(div.attrib.get('auto', 'false'))
        except Exception:
            print_exc()
        else:
            messages.append(
                Message(
                    buddy=S(name=buddyname),
                    timestamp=timestamp,
                    message=message,
                    type=message_type,
                    auto=auto,
                    has_autotext=auto,
                ))

    return messages
Example #5
0
def tree_to_string(tree):
    return render_contents(tree)
def tree_to_string(tree):
    return render_contents(tree)