def test05_from_disk_paths(self): rlb = ResourceListBuilder() rlb.mapper = Mapper( ['http://example.org/t', 'resync/test/testdata/dir1']) # no path, should get no resources rl = rlb.from_disk(paths=[]) self.assertEqual(len(rl), 0) # full path, 2 resources rl = rlb.from_disk(paths=['resync/test/testdata/dir1']) self.assertEqual(len(rl), 2) # new object with mapper covering larger space of disk rlb = ResourceListBuilder(set_path=True) rlb.mapper = Mapper(['http://example.org/t', 'resync/test/testdata']) # same path with 2 resources rl = rlb.from_disk(paths=['resync/test/testdata/dir1']) self.assertEqual(len(rl), 2) # same path with 2 resources rl = rlb.from_disk( paths=['resync/test/testdata/dir1', 'resync/test/testdata/dir2']) self.assertEqual(len(rl), 3) # path that is just a single file rl = rlb.from_disk(paths=['resync/test/testdata/dir1/file_a']) self.assertEqual(len(rl), 1) rli = iter(rl) r = rli.next() self.assertTrue(r is not None) self.assertEqual(r.uri, 'http://example.org/t/dir1/file_a') self.assertEqual(r.lastmod, '2012-07-25T17:13:46Z') self.assertEqual(r.md5, None) self.assertEqual(r.length, 20) self.assertEqual(r.path, 'resync/test/testdata/dir1/file_a')
def test05_from_disk_paths(self): rlb = ResourceListBuilder() rlb.mapper = Mapper(['http://example.org/t','resync/test/testdata/dir1']) # no path, should get no resources rl = rlb.from_disk(paths=[]) self.assertEqual( len(rl), 0) # full path, 2 resources rl = rlb.from_disk(paths=['resync/test/testdata/dir1']) self.assertEqual( len(rl), 2) # new object with mapper covering larger space of disk rlb = ResourceListBuilder(set_path=True) rlb.mapper = Mapper(['http://example.org/t','resync/test/testdata']) # same path with 2 resources rl = rlb.from_disk(paths=['resync/test/testdata/dir1']) self.assertEqual( len(rl), 2) # same path with 2 resources rl = rlb.from_disk(paths=['resync/test/testdata/dir1','resync/test/testdata/dir2']) self.assertEqual( len(rl), 3) # path that is just a single file rl = rlb.from_disk(paths=['resync/test/testdata/dir1/file_a']) self.assertEqual( len(rl), 1) rli = iter(rl) r = rli.next() self.assertTrue( r is not None ) self.assertEqual( r.uri, 'http://example.org/t/dir1/file_a' ) self.assertEqual( r.lastmod, '2012-07-25T17:13:46Z' ) self.assertEqual( r.md5, None ) self.assertEqual( r.length, 20 ) self.assertEqual( r.path, 'resync/test/testdata/dir1/file_a' )
def test2_pretty_output(self): rlb = ResourceListBuilder() rlb.mapper = Mapper(["http://example.org/t", "resync/test/testdata/dir1"]) rl = rlb.from_disk() rl.md["modified"] = None # don't write so we can test output easily self.assertEqual( rl.as_xml(pretty_xml=True), '<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/">\n<rs:md capability="resourcelist" />\n<url><loc>http://example.org/t/file_a</loc><lastmod>2012-07-25T17:13:46Z</lastmod><rs:md length="20" /></url>\n<url><loc>http://example.org/t/file_b</loc><lastmod>2001-09-09T01:46:40Z</lastmod><rs:md length="45" /></url>\n</urlset>', )
def test04_data(self): rlb = ResourceListBuilder(set_path=True, set_md5=True) rlb.mapper = Mapper(['http://example.org/t', 'tests/testdata/dir1']) rl = rlb.from_disk() self.assertEqual(len(rl), 2) r = rl.resources.get('http://example.org/t/file_a') self.assertTrue(r is not None) self.assertEqual(r.uri, 'http://example.org/t/file_a') self.assertEqual(r.lastmod, '2012-07-25T17:13:46Z') self.assertEqual(r.md5, 'a/Jv1mYBtSjS4LR+qoft/Q==') self.assertEqual(r.path, 'tests/testdata/dir1/file_a')
def test04_data(self): rlb = ResourceListBuilder(set_path=True,set_md5=True) rlb.mapper = Mapper(['http://example.org/t','resync/test/testdata/dir1']) rl = rlb.from_disk() self.assertEqual( len(rl), 2) r = rl.resources.get('http://example.org/t/file_a') self.assertTrue( r is not None ) self.assertEqual( r.uri, 'http://example.org/t/file_a' ) self.assertEqual( r.lastmod, '2012-07-25T17:13:46Z' ) self.assertEqual( r.md5, 'a/Jv1mYBtSjS4LR+qoft/Q==' ) self.assertEqual( r.path, 'resync/test/testdata/dir1/file_a' )
def test4_data(self): rlb = ResourceListBuilder(do_md5=True) rlb.mapper = Mapper(["http://example.org/t", "resync/test/testdata/dir1"]) rl = rlb.from_disk(set_path=True) self.assertEqual(len(rl), 2) r1 = rl.resources.get("http://example.org/t/file_a") self.assertTrue(r1 is not None) self.assertEqual(r1.uri, "http://example.org/t/file_a") self.assertEqual(r1.lastmod, "2012-07-25T17:13:46Z") self.assertEqual(r1.md5, "a/Jv1mYBtSjS4LR+qoft/Q==") self.assertEqual(r1.path, "resync/test/testdata/dir1/file_a")
def test04_data(self): rlb = ResourceListBuilder(set_path=True, set_hashes=['md5']) rlb.mapper = Mapper(['http://example.org/t', 'tests/testdata/dir1']) rl = rlb.from_disk() self.assertEqual(len(rl), 2) r = rl.resources.get('http://example.org/t/file_a') self.assertTrue(r is not None) self.assertEqual(r.uri, 'http://example.org/t/file_a') self.assertEqual(r.lastmod, '2012-07-25T17:13:46Z') self.assertEqual(r.md5, '6bf26fd66601b528d2e0b47eaa87edfd') self.assertEqual(r.path, 'tests/testdata/dir1/file_a')
def test06_odd_file_names(self): """Verfify we can read unicode file names properly.""" rlb = ResourceListBuilder() rlb.mapper = Mapper(['x:', 'tests/testdata/odd_file_names']) rl = rlb.from_disk(paths=['tests/testdata/odd_file_names']) # Get list of URIs to test uris = [x.uri for x in rl] self.assertTrue('x:/not_odd.txt' in uris) self.assertTrue('x:/with&ersand.txt' in uris) self.assertTrue('x:/with spaces.txt' in uris) # File names for accented chars represented with combining chars self.assertTrue(u'x:/Pi\u006e\u0303a_Colada.txt' in uris) self.assertFalse(u'x:/Pi\u00f1a_Colada.txt' in uris) self.assertTrue(u'x:/A_\u0041\u0303_tilde.txt' in uris) self.assertFalse(u'x:/A_\u00c3_tilde.txt' in uris) # Snowman is single char self.assertFalse(u'x:snowman_\u2603.txt' in uris)
def test03_set_md5(self): rlb = ResourceListBuilder(set_md5=True) rlb.mapper = Mapper(['http://example.org/t', 'tests/testdata/dir1']) rl = rlb.from_disk() self.assertEqual(len(rl), 2) rli = iter(rl) r = next(rli) self.assertEqual(r.uri, 'http://example.org/t/file_a') self.assertEqual(r.lastmod, '2012-07-25T17:13:46Z') self.assertEqual(r.md5, 'a/Jv1mYBtSjS4LR+qoft/Q==') self.assertEqual(r.length, 20) self.assertEqual(r.path, None) r = next(rli) self.assertEqual(r.uri, 'http://example.org/t/file_b') self.assertEqual(r.lastmod, '2001-09-09T01:46:40Z') self.assertEqual(r.md5, 'RS5Uva4WJqxdbnvoGzneIQ==') self.assertEqual(r.length, 45) self.assertEqual(r.path, None)
def test02_no_length(self): rlb = ResourceListBuilder(set_length=False) rlb.mapper = Mapper(['http://example.org/t', 'tests/testdata/dir1']) rl = rlb.from_disk() self.assertEqual(len(rl), 2) rli = iter(rl) r = next(rli) self.assertEqual(r.uri, 'http://example.org/t/file_a') self.assertEqual(r.lastmod, '2012-07-25T17:13:46Z') self.assertEqual(r.md5, None) self.assertEqual(r.length, None) self.assertEqual(r.path, None) r = next(rli) self.assertEqual(r.uri, 'http://example.org/t/file_b') self.assertEqual(r.lastmod, '2001-09-09T01:46:40Z') self.assertEqual(r.md5, None) self.assertEqual(r.length, None) self.assertEqual(r.path, None)
def test03_set_md5(self): rlb = ResourceListBuilder(set_md5=True) rlb.mapper = Mapper(['http://example.org/t','resync/test/testdata/dir1']) rl = rlb.from_disk() self.assertEqual( len(rl), 2 ) rli = iter(rl) r = rli.next() self.assertEqual( r.uri, 'http://example.org/t/file_a' ) self.assertEqual( r.lastmod, '2012-07-25T17:13:46Z' ) self.assertEqual( r.md5, 'a/Jv1mYBtSjS4LR+qoft/Q==' ) self.assertEqual( r.length, 20 ) self.assertEqual( r.path, None ) r = rli.next() self.assertEqual( r.uri, 'http://example.org/t/file_b' ) self.assertEqual( r.lastmod, '2001-09-09T01:46:40Z' ) self.assertEqual( r.md5, 'RS5Uva4WJqxdbnvoGzneIQ==' ) self.assertEqual( r.length, 45 ) self.assertEqual( r.path, None )
def test02_no_length(self): rlb = ResourceListBuilder(set_length=False) rlb.mapper = Mapper(['http://example.org/t','resync/test/testdata/dir1']) rl = rlb.from_disk() self.assertEqual( len(rl), 2 ) rli = iter(rl) r = rli.next() self.assertEqual( r.uri, 'http://example.org/t/file_a' ) self.assertEqual( r.lastmod, '2012-07-25T17:13:46Z' ) self.assertEqual( r.md5, None ) self.assertEqual( r.length, None ) self.assertEqual( r.path, None ) r = rli.next() self.assertEqual( r.uri, 'http://example.org/t/file_b' ) self.assertEqual( r.lastmod, '2001-09-09T01:46:40Z' ) self.assertEqual( r.md5, None ) self.assertEqual( r.length, None ) self.assertEqual( r.path, None )
def test03_set_hashes(self): rlb = ResourceListBuilder(set_hashes=['md5']) rlb.mapper = Mapper(['http://example.org/t', 'tests/testdata/dir1']) rl = rlb.from_disk() self.assertEqual(len(rl), 2) rli = iter(rl) r = next(rli) self.assertEqual(r.uri, 'http://example.org/t/file_a') self.assertEqual(r.lastmod, '2012-07-25T17:13:46Z') self.assertEqual(r.md5, '6bf26fd66601b528d2e0b47eaa87edfd') self.assertEqual(r.length, 20) self.assertEqual(r.path, None) r = next(rli) self.assertEqual(r.uri, 'http://example.org/t/file_b') self.assertEqual(r.lastmod, '2001-09-09T01:46:40Z') self.assertEqual(r.md5, '452e54bdae1626ac5d6e7be81b39de21') self.assertEqual(r.length, 45) self.assertEqual(r.path, None)
def test3_with_md5(self): rlb = ResourceListBuilder(do_md5=True) rlb.mapper = Mapper(["http://example.org/t", "resync/test/testdata/dir1"]) rl = rlb.from_disk() xml = rl.as_xml() self.assertNotEqual( None, re.search( '<loc>http://example.org/t/file_a</loc><lastmod>[\w\:\-]+Z</lastmod><rs:md hash="md5:a/Jv1mYBtSjS4LR\+qoft/Q==" length="20" />', xml, ), ) # must escape + in md5 self.assertNotEqual( None, re.search( '<loc>http://example.org/t/file_b</loc><lastmod>[\w\:\-]+Z</lastmod><rs:md hash="md5:RS5Uva4WJqxdbnvoGzneIQ==" length="45" />', xml, ), )
def test01_simple_scan(self): rlb = ResourceListBuilder() rlb.mapper = Mapper(['http://example.org/t', 'tests/testdata/dir1']) rl = rlb.from_disk() self.assertEqual(len(rl), 2) rli = iter(rl) r = next(rli) self.assertEqual(r.uri, 'http://example.org/t/file_a') self.assertEqual(r.lastmod, '2012-07-25T17:13:46Z') self.assertEqual(r.md5, None) self.assertEqual(r.length, 20) self.assertEqual(r.path, None) r = next(rli) self.assertEqual(r.uri, 'http://example.org/t/file_b') self.assertEqual(r.lastmod, '2001-09-09T01:46:40Z') self.assertEqual(r.md5, None) self.assertEqual(r.length, 45) self.assertEqual(r.path, None) # Make sure at and completed were set self.assertTrue(rl.md_at is not None) self.assertTrue(rl.md_completed is not None)
def test01_simple_scan(self): rlb = ResourceListBuilder() rlb.mapper = Mapper(['http://example.org/t','resync/test/testdata/dir1']) rl = rlb.from_disk() self.assertEqual( len(rl), 2 ) rli = iter(rl) r = rli.next() self.assertEqual( r.uri, 'http://example.org/t/file_a' ) self.assertEqual( r.lastmod, '2012-07-25T17:13:46Z' ) self.assertEqual( r.md5, None ) self.assertEqual( r.length, 20 ) self.assertEqual( r.path, None ) r = rli.next() self.assertEqual( r.uri, 'http://example.org/t/file_b' ) self.assertEqual( r.lastmod, '2001-09-09T01:46:40Z' ) self.assertEqual( r.md5, None ) self.assertEqual( r.length, 45 ) self.assertEqual( r.path, None ) # Make sure at and completed were set self.assertTrue( rl.md_at is not None ) self.assertTrue( rl.md_completed is not None )