Example #1
0
    def _parse(self, source, encoding):
        if not isinstance(source, Stream):
            source = XMLParser(source,
                               filename=self.filename,
                               encoding=encoding)
        stream = []

        for kind, data, pos in source:

            if kind is TEXT:
                for kind, data, pos in interpolate(data,
                                                   self.filepath,
                                                   pos[1],
                                                   pos[2],
                                                   lookup=self.lookup):
                    stream.append((kind, data, pos))

            elif kind is PI and data[0] == 'python':
                if not self.allow_exec:
                    raise TemplateSyntaxError('Python code blocks not allowed',
                                              self.filepath, *pos[1:])
                try:
                    suite = Suite(data[1],
                                  self.filepath,
                                  pos[1],
                                  lookup=self.lookup)
                except SyntaxError, err:
                    raise TemplateSyntaxError(err, self.filepath,
                                              pos[1] + (err.lineno or 1) - 1,
                                              pos[2] + (err.offset or 0))
                stream.append((EXEC, suite, pos))

            elif kind is COMMENT:
                if not data.lstrip().startswith('!'):
                    stream.append((kind, data, pos))
Example #2
0
 def test_text_node_pos_single_line(self):
     text = '<elem>foo bar</elem>'
     events = list(XMLParser(StringIO(text)))
     kind, data, pos = events[1]
     self.assertEqual(Stream.TEXT, kind)
     self.assertEqual('foo bar', data)
     self.assertEqual((None, 1, 6), pos)
Example #3
0
 def test_undefined_entity_with_dtd(self):
     text = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
     "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
     <html>&junk;</html>
     """
     events = XMLParser(StringIO(text))
     self.assertRaises(ParseError, list, events)
Example #4
0
    def subtemplates(self, tpl=None):
        if tpl is None:
            tpl = self

        subtemplates = []
        f = open(tpl.path, 'r')
        try:
            for kind, data, pos in XMLParser(f, filename=tpl.path):
                if kind is START:
                    tag, attrib = data
                    if tag.namespace == 'http://www.w3.org/2001/XInclude'\
                            and tag.localname == 'include':
                        subtpl_ident = attrib.get('href')
                        try:
                            subtpl = self.loader.load(subtpl_ident)
                        except TemplateNotFound:
                            # This will fail later, here we just need to ignore
                            # template idents that are dynamically computed.
                            pass
                        else:
                            subtemplates.append(subtpl)
        finally:
            f.close()

        for subtemplate in subtemplates:
            for new_subtpl in self.subtemplates(subtemplate):
                if new_subtpl not in subtemplates:
                    subtemplates.append(new_subtpl)

        return subtemplates
Example #5
0
 def test_html_entity_in_attribute(self):
     text = '<p title="&nbsp;"/>'
     events = list(XMLParser(StringIO(text)))
     kind, data, pos = events[0]
     self.assertEqual(Stream.START, kind)
     self.assertEqual(u'\xa0', data[1].get('title'))
     kind, data, pos = events[1]
     self.assertEqual(Stream.END, kind)
Example #6
0
 def test_latin1_encoded_xmldecl(self):
     text = u"""<?xml version="1.0" encoding="iso-8859-1" ?>
     <div>\xf6</div>
     """.encode('iso-8859-1')
     events = list(XMLParser(StringIO(text)))
     kind, data, pos = events[2]
     self.assertEqual(Stream.TEXT, kind)
     self.assertEqual(u'\xf6', data)
Example #7
0
 def test_xmldecl_encoding(self):
     text = '<?xml version="1.0" encoding="utf-8" ?><root />'
     events = list(XMLParser(StringIO(text)))
     kind, (version, encoding, standalone), pos = events[0]
     self.assertEqual(Stream.XML_DECL, kind)
     self.assertEqual(u'1.0', version)
     self.assertEqual(u'utf-8', encoding)
     self.assertEqual(-1, standalone)
Example #8
0
 def test_xmldecl_standalone(self):
     text = '<?xml version="1.0" standalone="yes" ?><root />'
     events = list(XMLParser(StringIO(text)))
     kind, (version, encoding, standalone), pos = events[0]
     self.assertEqual(Stream.XML_DECL, kind)
     self.assertEqual(u'1.0', version)
     self.assertEqual(None, encoding)
     self.assertEqual(1, standalone)
Example #9
0
    def test_text_node_pos_multi_line(self):
        text = '''<elem>foo
bar</elem>'''
        events = list(XMLParser(StringIO(text)))
        kind, data, pos = events[1]
        self.assertEqual(Stream.TEXT, kind)
        self.assertEqual(u'foo\nbar', data)
        if sys.version_info[:2] >= (2, 4):
            self.assertEqual((None, 1, -1), pos)
Example #10
0
 def test_html_entity_with_dtd(self):
     text = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
     "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
     <html>&nbsp;</html>
     """
     events = list(XMLParser(StringIO(text)))
     kind, data, pos = events[2]
     self.assertEqual(Stream.TEXT, kind)
     self.assertEqual(u'\xa0', data)
Example #11
0
 def test_element_attribute_order(self):
     text = '<elem title="baz" id="foo" class="bar" />'
     events = list(XMLParser(StringIO(text)))
     kind, data, pos = events[0]
     self.assertEqual(Stream.START, kind)
     tag, attrib = data
     self.assertEqual(u'elem', tag)
     self.assertEqual((u'title', u'baz'), attrib[0])
     self.assertEqual((u'id', u'foo'), attrib[1])
     self.assertEqual((u'class', u'bar'), attrib[2])
Example #12
0
    def extract_javascript_script(fileobj, keywords, comment_tags, options):
        """Extract messages from Javascript embedding in <script> tags.

        Select <script type="javascript/text"> tags and delegate to
        `extract_javascript`.
        """
        from genshi.core import Stream
        from genshi.input import XMLParser

        out = StringIO()
        stream = Stream(XMLParser(fileobj))
        stream.select('//script[@type="text/javascript"]').render(out=out)
        out.seek(0)
        return extract_javascript(out, keywords, comment_tags, options)
Example #13
0
class XHTMLParser(object):
    """ parse an XHTML fragment """
    def __init__(self, text):
        self.parser = XMLParser(StringIO("<div>%s</div>" % text))
        self.depth = 0
    def __iter__(self):
        self.iter = self.parser.__iter__()
        return self
    def next(self):
        object = self.iter.next()
        if object[0] == 'END': self.depth = self.depth - 1
        predepth = self.depth
        if object[0] == 'START': self.depth = self.depth + 1
        if predepth: return object
        return self.next()
Example #14
0
class XHTMLParser(object):
    """ parse an XHTML fragment """
    def __init__(self, text):
        self.parser = XMLParser(StringIO("<div>%s</div>" % text))
        self.depth = 0
    def __iter__(self):
        self.iter = self.parser.__iter__()
        return self
    def next(self):
        object = self.iter.next()
        if object[0] == 'END': self.depth = self.depth - 1
        predepth = self.depth
        if object[0] == 'START': self.depth = self.depth + 1
        if predepth: return object
        return self.next()
Example #15
0
 def test_undefined_entity_without_dtd(self):
     text = '<html>&junk;</html>'
     events = XMLParser(StringIO(text))
     self.assertRaises(ParseError, list, events)
Example #16
0
 def test_html_entity_without_dtd(self):
     text = '<html>&nbsp;</html>'
     events = list(XMLParser(StringIO(text)))
     kind, data, pos = events[1]
     self.assertEqual(Stream.TEXT, kind)
     self.assertEqual(u'\xa0', data)
Example #17
0
 def __init__(self, text):
     self.parser = XMLParser(StringIO("<div>%s</div>" % text))
     self.depth = 0
Example #18
0
 def test_latin1_encoded(self):
     text = u'<div>\xf6</div>'.encode('iso-8859-1')
     events = list(XMLParser(StringIO(text), encoding='iso-8859-1'))
     kind, data, pos = events[1]
     self.assertEqual(Stream.TEXT, kind)
     self.assertEqual(u'\xf6', data)
Example #19
0
 def test_unicode_input(self):
     text = u'<div>\u2013</div>'
     events = list(XMLParser(StringIO(text)))
     kind, data, pos = events[1]
     self.assertEqual(Stream.TEXT, kind)
     self.assertEqual(u'\u2013', data)
Example #20
0
def run(script, doc, output_file=None, options={}):
    """ process an Genshi template """

    context = Context(**options)

    tmpl_fileobj = open(script)
    tmpl = MarkupTemplate(tmpl_fileobj, script)
    tmpl_fileobj.close()

    if not output_file: 
        # filter
        context.push({'input':XMLParser(StringIO(doc))})
    else:
        # template
        import time
        from planet import config,feedparser
        from planet.spider import filename

        # gather a list of subscriptions, feeds
        global subscriptions
        feeds = []
        sources = config.cache_sources_directory()
        for sub in config.subscriptions():
            data=feedparser.parse(filename(sources,sub))
            data.feed.config = norm(dict(config.parser.items(sub)))
            if data.feed.has_key('link'):
                feeds.append((data.feed.config.get('name',''),data.feed))
            subscriptions.append(norm(sub))
        feeds.sort()

        # annotate each entry
        new_date_format = config.new_date_format()
        vars = feedparser.parse(StringIO(doc))
        vars.feeds = [value for name,value in feeds]
        last_feed = None
        last_date = None
        for entry in vars.entries:
             entry.source.config = find_config(config, entry.source)

             # add new_feed and new_date fields
             entry.new_feed = entry.source.id
             entry.new_date = date = None
             if entry.has_key('published_parsed'): date=entry.published_parsed
             if entry.has_key('updated_parsed'): date=entry.updated_parsed
             if date: entry.new_date = time.strftime(new_date_format, date)

             # remove new_feed and new_date fields if not "new"
             if entry.new_date == last_date:
                 entry.new_date = None
                 if entry.new_feed == last_feed:
                     entry.new_feed = None
                 else:
                     last_feed = entry.new_feed
             elif entry.new_date:
                 last_date = entry.new_date
                 last_feed = None

             # add streams for all text constructs
             for key in entry.keys():
                 if key.endswith("_detail") and entry[key].has_key('type') and \
                     entry[key].has_key('value'):
                     streamify(entry[key],entry.source.planet_bozo)
             if entry.has_key('content'):
                 for content in entry.content:
                     streamify(content,entry.source.planet_bozo)
     
        # add cumulative feed information to the Genshi context
        vars.feed.config = dict(config.parser.items('Planet',True))
        context.push(vars)

    # apply template
    output=tmpl.generate(context).render('xml')

    if output_file:
        out_file = open(output_file,'w')
        out_file.write(output)
        out_file.close()
    else:
        return output
Example #21
0
 def __init__(self, text):
     self.parser = XMLParser(StringIO("<div>%s</div>" % text))
     self.depth = 0
Example #22
0
#!/usr/bin/python
import sys
from pprint import pprint

from genshi.input import XMLParser

with open(sys.argv[1]) as f:
    parser = XMLParser(f)
    pprint(list(parser))