def next(self): # doc URI uri = self.iterable.next().strip().decode('utf8') # link URIs link_uris = [] for line in self.iterable: if line == "\n": break else: link_uris.append(line.strip().decode('utf8')) doc = Document(uri) doc.cache_link_uris(link_uris) return doc
def test_cache_link_uris(self): doc = Document('http://stanford.edu/', '<a href="a.html">a</a>') self.assertEquals(['http://stanford.edu/a.html'], doc.link_uris) doc.cache_link_uris(['http://stanford.edu/other.html']) self.assertEquals(['http://stanford.edu/other.html'], doc.link_uris)