Exemple #1
0
    def next(self):
        # doc URI
        uri = self.iterable.next().strip().decode('utf8')
        
        # link URIs
        link_uris = []
        for line in self.iterable:
            if line == "\n":
                break
            else:
                link_uris.append(line.strip().decode('utf8'))

        doc = Document(uri)
        doc.cache_link_uris(link_uris)
        return doc
Exemple #2
0
 def test_cache_link_uris(self):
     doc = Document('http://stanford.edu/', '<a href="a.html">a</a>')
     self.assertEquals(['http://stanford.edu/a.html'], doc.link_uris)
     doc.cache_link_uris(['http://stanford.edu/other.html'])
     self.assertEquals(['http://stanford.edu/other.html'], doc.link_uris)