def feed_page(self, name, data): if name == self.linkinfo: print >>stderr, 'Loading: %r' % name for line in data.split('\n'): if line: (name,strs) = eval(line) self.dic[name] = strs else: try: n = name.index('/') base_href = 'http://'+name[n+1:] if not self.linkinfo: self.baseid = name[:n] handler = HTMLLinkFinder(self, base_href, self) parser = HTMLParser3(handler, charset=self.default_charset) parser.feed(data) parser.close() if not self.acldb or self.acldb.allowed(name): tree = parsestr(data, charset=self.default_charset, base_href=base_href) n = self.analyzer.add_tree(name, tree) print >>stderr, 'Added: %d: %s' % (n, name) else: print >>stderr, 'Skipped: %s' % name except ValueError: pass return
def feed_page(self, name, data): self.feed_tree(name, parsestr(data, charset=self.default_charset)) return