def test_empty_doc(self): emptydoc1 = fixtures.dumpdocs('empty-doc')['http://aero-comlab.stanford.edu/sichoi/tet_mesh.html'] # should not raise 'ParserError: Document is empty' self.assertEquals([], emptydoc1.link_uris) # should not raise "AttributeError: 'NoneType' object has no attribute # 'text_content'" self.assertEquals([], emptydoc1.tokens())
def test_writes_file1(self): out = StringIO.StringIO() writer = LinkFileOutputStream(out) docs = fixtures.dumpdocs("small1") writer.add("http://example.com/", docs["http://example.com/"].link_uris) writer.add("http://example.com/about", docs["http://example.com/about"].link_uris) writer.add("http://example.com/contact", docs["http://example.com/contact"].link_uris) self.assertEquals(small1_links, out.getvalue())
def test_parses_file1(self): linkfile = LinkFile(small1_links.splitlines(True)) doclinks = dict((doc.uri, list(doc.link_uris)) for doc in linkfile) exp_doclinks = dict((uri, list(doc.link_uris)) for (uri, doc) in fixtures.dumpdocs("small1").items()) self.assertEquals(exp_doclinks, doclinks)
def test_raw_None(self): rawNone = fixtures.dumpdocs('raw-None')['http://cse.stanford.edu/class/cs201/projects-00-01/napster/index.html'] self.assertEquals(None, rawNone.html_parser)