def startElement(self, name, attrs): if self._elemstack and self._elemstack[-1] in ("content", "summary"): return # ignore the embedded markup #print "<%s>" % name, attrs if self._elemstack: parent = self._elemstack[-1] else: parent = None SAXTracker.startElement(self, name, attrs) if name == "feed": self._push_base(attrs.get("xml:base", "")) self._obj = self._site elif name == "entry": self._obj = self._factory.make_item(self._site) self._site.add_item(self._obj) self._push_base(attrs.get("xml:base", "")) self._summary = None elif name == "link" and parent == "entry" and \ attrs.get("rel", "alternate") == "alternate": url = self._get_full_url(attrs["href"]) self._obj.set_link(url) elif name == "link" and parent == "feed" and \ attrs.get("rel", "alternate") == "alternate": url = self._get_full_url(attrs["href"]) self._site.set_link(url)
def __init__(self): SAXTracker.__init__(self, ["param"]) self._server = Server(None, None) self._coll = None self._feed = None self._obj = self._server self._attrs = None
def __init__(self, site, factory): SAXTracker.__init__(self, ["title", "id", "published", "content", "name", "summary", "subtitle", "updated"]) self._site = site self._obj = None self._factory = factory self._bases = [self._site.get_url()] self._summary = None
def __init__(self, site, factory): SAXTracker.__init__(self, ["title", "link", "description", "url", "name", "language", "rating", "copyright", "pubDate", "lastBuildDate", "docs", "webMaster", "managingEditor", "guid", "atom:summary", "author", "dc:creator", "dc:date"]) self._site = site self._obj = None self._factory = factory
def startElement(self, name, attrs): SAXTracker.startElement(self, name, attrs) if name == "outline": url = attrs.get("xmlUrl", attrs.get("htmlUrl")) feed = self._factory.make_site(url) feed.set_title(attrs.get("title")) feed.set_format(attrs.get("type")) if attrs.has_key("htmlUrl"): feed.set_link(attrs.get("htmlUrl")) self._registry.add_feed(feed)
def endElement(self, name): #print "</%s>" % name SAXTracker.endElement(self, name) if self._elemstack: parent = self._elemstack[-1] else: parent = None if name == "title": self._obj.set_title(self._contents) elif name == "link": self._obj.set_link(self._contents) elif name == "description": self._obj.set_description(self._contents) elif name == "guid": self._obj.set_guid(self._contents) elif name == "atom:summary": if not self._obj.get_description(): self._obj.set_description(self._contents) elif name == "author": self._obj.set_author(self._contents) elif name == "dc:creator": if isinstance(self._obj, Item) and (not self._obj.get_author()): self._obj.set_author(self._contents) elif name == "dc:date": if not self._obj.get_pubdate(): self._obj.set_pubdate(self._contents) elif name == "url" and parent == "image": self._site.image.url = self._contents elif name == "name" and parent == "textinput": self._site.input.name = self._contents elif name == "language": self._site.lang = self._contents elif name == "rating": self._site.rating = self._contents elif name == "copyright": self._site.copyright = self._contents elif name == "pubDate": self._obj.set_pubdate(self._contents) elif name == "lastBuildDate": self._site.lastbuild = self._contents elif name == "docs": self._site.docs = self._contents elif name == "managingEditor": self._site.editor = self._contents elif name == "webMaster": self._site.webmaster = self._contents elif name == "image": self._obj = self._site # restore previous object
def startElement(self, name, attrs): SAXTracker.startElement(self, name, attrs) if name == "param": self._attrs = attrs elif name == "collection": self._coll = Collection(attrs["title"], attrs["id"], None, self._server) self._server.add_collection(self._coll) elif name == "relation": self._feed = CSVFragmentFeed(attrs["source"], attrs["type"], attrs["pattern"], attrs["timestamp"]) self._coll.add_feed(self._feed) elif name == "property": self._feed.add_column(Column(attrs["column"], attrs["uri"], True))
def endElement(self, name): if self._elemstack and \ ((self._elemstack[-1] == "content" and name != "content") or (self._elemstack[-1] == "summary" and name != "summary")): return # ignore the embedded markup #print "</%s>" % name SAXTracker.endElement(self, name) if self._elemstack: parent = self._elemstack[-1] else: parent = None if name == "feed": self._pop_base() elif name == "entry": self._pop_base() if self._obj.get_author() == None: self._obj.set_author(self._site.get_editor()) # inherit if self._obj.get_description() == None and self._summary != None: self._obj.set_description(self._summary) # fallback elif name == "title": self._obj.set_title(self._contents) elif name == "subtitle" and parent == "feed": self._site.set_description(self._contents) elif name == "id": if parent == "feed" and self._obj.get_link() == None: self._obj.set_link(self._contents) elif parent == "entry": self._obj.set_guid(self._contents) elif name == "published": self._obj.set_pubdate(self._contents) elif name == "updated" and parent == "entry": if not self._obj.get_pubdate(): self._obj.set_pubdate(self._contents) elif name == "content": self._obj.set_description(self._contents) elif name == "summary": self._summary = self._contents # use if no <content> element elif name == "name" and parent == "author": if self._elemstack[-2] == "feed": self._obj.set_editor(self._contents) elif self._elemstack[-2] == "entry": self._obj.set_author(self._contents)
def startElement(self, name, attrs): if self._elemstack: parent = self._elemstack[-1] else: parent = None #print "<%s>" % name if name == "rss" and attrs.has_key("version"): if attrs["version"] != "0.91": self._site.errors.append("Unknown RSS version %s" % (attrs["version"])) elif name == "rss" or name == "channel": self._obj = self._site elif name == "item" and parent != "keywords": self._obj = self._factory.make_item(self._site) self._site.add_item(self._obj) elif name == "image": self._site.image = self._factory.make_image() self._obj = self._site.image elif name == "textinput": self._site.input = self._factory.make_text_input() self._obj = self._site.input SAXTracker.startElement(self, name, attrs)
def endElement(self, name): SAXTracker.endElement(self, name) if name == "param": set_param(self._obj, self._attrs["name"], self._contents)
def endElement(self, name): SAXTracker.endElement(self, name) if name == "title": self._registry.set_title(self._contents)
def __init__(self, registry, factory): SAXTracker.__init__(self, ["title"]) self._registry = registry self._factory = factory