def test05_from_disk_paths(self):
     rlb = ResourceListBuilder()
     rlb.mapper = Mapper(
         ['http://example.org/t', 'resync/test/testdata/dir1'])
     # no path, should get no resources
     rl = rlb.from_disk(paths=[])
     self.assertEqual(len(rl), 0)
     # full path, 2 resources
     rl = rlb.from_disk(paths=['resync/test/testdata/dir1'])
     self.assertEqual(len(rl), 2)
     # new object with mapper covering larger space of disk
     rlb = ResourceListBuilder(set_path=True)
     rlb.mapper = Mapper(['http://example.org/t', 'resync/test/testdata'])
     # same path with 2 resources
     rl = rlb.from_disk(paths=['resync/test/testdata/dir1'])
     self.assertEqual(len(rl), 2)
     # same path with 2 resources
     rl = rlb.from_disk(
         paths=['resync/test/testdata/dir1', 'resync/test/testdata/dir2'])
     self.assertEqual(len(rl), 3)
     # path that is just a single file
     rl = rlb.from_disk(paths=['resync/test/testdata/dir1/file_a'])
     self.assertEqual(len(rl), 1)
     rli = iter(rl)
     r = rli.next()
     self.assertTrue(r is not None)
     self.assertEqual(r.uri, 'http://example.org/t/dir1/file_a')
     self.assertEqual(r.lastmod, '2012-07-25T17:13:46Z')
     self.assertEqual(r.md5, None)
     self.assertEqual(r.length, 20)
     self.assertEqual(r.path, 'resync/test/testdata/dir1/file_a')
 def test05_from_disk_paths(self):
     rlb = ResourceListBuilder()
     rlb.mapper = Mapper(['http://example.org/t','resync/test/testdata/dir1'])
     # no path, should get no resources
     rl = rlb.from_disk(paths=[])
     self.assertEqual( len(rl), 0)
     # full path, 2 resources
     rl = rlb.from_disk(paths=['resync/test/testdata/dir1'])
     self.assertEqual( len(rl), 2)
     # new object with mapper covering larger space of disk
     rlb = ResourceListBuilder(set_path=True)
     rlb.mapper = Mapper(['http://example.org/t','resync/test/testdata'])
     # same path with 2 resources
     rl = rlb.from_disk(paths=['resync/test/testdata/dir1'])
     self.assertEqual( len(rl), 2)
     # same path with 2 resources
     rl = rlb.from_disk(paths=['resync/test/testdata/dir1','resync/test/testdata/dir2'])
     self.assertEqual( len(rl), 3)
     # path that is just a single file
     rl = rlb.from_disk(paths=['resync/test/testdata/dir1/file_a'])
     self.assertEqual( len(rl), 1)
     rli = iter(rl)
     r = rli.next()
     self.assertTrue( r is not None )
     self.assertEqual( r.uri, 'http://example.org/t/dir1/file_a' )
     self.assertEqual( r.lastmod, '2012-07-25T17:13:46Z' )
     self.assertEqual( r.md5, None )
     self.assertEqual( r.length, 20 )
     self.assertEqual( r.path, 'resync/test/testdata/dir1/file_a' ) 
 def test2_pretty_output(self):
     rlb = ResourceListBuilder()
     rlb.mapper = Mapper(["http://example.org/t", "resync/test/testdata/dir1"])
     rl = rlb.from_disk()
     rl.md["modified"] = None  # don't write so we can test output easily
     self.assertEqual(
         rl.as_xml(pretty_xml=True),
         '<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/">\n<rs:md capability="resourcelist" />\n<url><loc>http://example.org/t/file_a</loc><lastmod>2012-07-25T17:13:46Z</lastmod><rs:md length="20" /></url>\n<url><loc>http://example.org/t/file_b</loc><lastmod>2001-09-09T01:46:40Z</lastmod><rs:md length="45" /></url>\n</urlset>',
     )
Exemple #4
0
 def test04_data(self):
     rlb = ResourceListBuilder(set_path=True, set_md5=True)
     rlb.mapper = Mapper(['http://example.org/t', 'tests/testdata/dir1'])
     rl = rlb.from_disk()
     self.assertEqual(len(rl), 2)
     r = rl.resources.get('http://example.org/t/file_a')
     self.assertTrue(r is not None)
     self.assertEqual(r.uri, 'http://example.org/t/file_a')
     self.assertEqual(r.lastmod, '2012-07-25T17:13:46Z')
     self.assertEqual(r.md5, 'a/Jv1mYBtSjS4LR+qoft/Q==')
     self.assertEqual(r.path, 'tests/testdata/dir1/file_a')
 def test04_data(self):
     rlb = ResourceListBuilder(set_path=True,set_md5=True)
     rlb.mapper = Mapper(['http://example.org/t','resync/test/testdata/dir1'])
     rl = rlb.from_disk()
     self.assertEqual( len(rl), 2)
     r = rl.resources.get('http://example.org/t/file_a')
     self.assertTrue( r is not None )
     self.assertEqual( r.uri, 'http://example.org/t/file_a' )
     self.assertEqual( r.lastmod, '2012-07-25T17:13:46Z' )
     self.assertEqual( r.md5, 'a/Jv1mYBtSjS4LR+qoft/Q==' )
     self.assertEqual( r.path, 'resync/test/testdata/dir1/file_a' ) 
 def test4_data(self):
     rlb = ResourceListBuilder(do_md5=True)
     rlb.mapper = Mapper(["http://example.org/t", "resync/test/testdata/dir1"])
     rl = rlb.from_disk(set_path=True)
     self.assertEqual(len(rl), 2)
     r1 = rl.resources.get("http://example.org/t/file_a")
     self.assertTrue(r1 is not None)
     self.assertEqual(r1.uri, "http://example.org/t/file_a")
     self.assertEqual(r1.lastmod, "2012-07-25T17:13:46Z")
     self.assertEqual(r1.md5, "a/Jv1mYBtSjS4LR+qoft/Q==")
     self.assertEqual(r1.path, "resync/test/testdata/dir1/file_a")
 def test04_data(self):
     rlb = ResourceListBuilder(set_path=True, set_hashes=['md5'])
     rlb.mapper = Mapper(['http://example.org/t', 'tests/testdata/dir1'])
     rl = rlb.from_disk()
     self.assertEqual(len(rl), 2)
     r = rl.resources.get('http://example.org/t/file_a')
     self.assertTrue(r is not None)
     self.assertEqual(r.uri, 'http://example.org/t/file_a')
     self.assertEqual(r.lastmod, '2012-07-25T17:13:46Z')
     self.assertEqual(r.md5, '6bf26fd66601b528d2e0b47eaa87edfd')
     self.assertEqual(r.path, 'tests/testdata/dir1/file_a')
 def test06_odd_file_names(self):
     """Verfify we can read unicode file names properly."""
     rlb = ResourceListBuilder()
     rlb.mapper = Mapper(['x:', 'tests/testdata/odd_file_names'])
     rl = rlb.from_disk(paths=['tests/testdata/odd_file_names'])
     # Get list of URIs to test
     uris = [x.uri for x in rl]
     self.assertTrue('x:/not_odd.txt' in uris)
     self.assertTrue('x:/with&ampersand.txt' in uris)
     self.assertTrue('x:/with spaces.txt' in uris)
     # File names for accented chars represented with combining chars
     self.assertTrue(u'x:/Pi\u006e\u0303a_Colada.txt' in uris)
     self.assertFalse(u'x:/Pi\u00f1a_Colada.txt' in uris)
     self.assertTrue(u'x:/A_\u0041\u0303_tilde.txt' in uris)
     self.assertFalse(u'x:/A_\u00c3_tilde.txt' in uris)
     # Snowman is single char
     self.assertFalse(u'x:snowman_\u2603.txt' in uris)
Exemple #9
0
 def test03_set_md5(self):
     rlb = ResourceListBuilder(set_md5=True)
     rlb.mapper = Mapper(['http://example.org/t', 'tests/testdata/dir1'])
     rl = rlb.from_disk()
     self.assertEqual(len(rl), 2)
     rli = iter(rl)
     r = next(rli)
     self.assertEqual(r.uri, 'http://example.org/t/file_a')
     self.assertEqual(r.lastmod, '2012-07-25T17:13:46Z')
     self.assertEqual(r.md5, 'a/Jv1mYBtSjS4LR+qoft/Q==')
     self.assertEqual(r.length, 20)
     self.assertEqual(r.path, None)
     r = next(rli)
     self.assertEqual(r.uri, 'http://example.org/t/file_b')
     self.assertEqual(r.lastmod, '2001-09-09T01:46:40Z')
     self.assertEqual(r.md5, 'RS5Uva4WJqxdbnvoGzneIQ==')
     self.assertEqual(r.length, 45)
     self.assertEqual(r.path, None)
Exemple #10
0
 def test02_no_length(self):
     rlb = ResourceListBuilder(set_length=False)
     rlb.mapper = Mapper(['http://example.org/t', 'tests/testdata/dir1'])
     rl = rlb.from_disk()
     self.assertEqual(len(rl), 2)
     rli = iter(rl)
     r = next(rli)
     self.assertEqual(r.uri, 'http://example.org/t/file_a')
     self.assertEqual(r.lastmod, '2012-07-25T17:13:46Z')
     self.assertEqual(r.md5, None)
     self.assertEqual(r.length, None)
     self.assertEqual(r.path, None)
     r = next(rli)
     self.assertEqual(r.uri, 'http://example.org/t/file_b')
     self.assertEqual(r.lastmod, '2001-09-09T01:46:40Z')
     self.assertEqual(r.md5, None)
     self.assertEqual(r.length, None)
     self.assertEqual(r.path, None)
 def test03_set_md5(self):
     rlb = ResourceListBuilder(set_md5=True)
     rlb.mapper = Mapper(['http://example.org/t','resync/test/testdata/dir1'])
     rl = rlb.from_disk()
     self.assertEqual( len(rl), 2 )
     rli = iter(rl)
     r = rli.next()
     self.assertEqual( r.uri, 'http://example.org/t/file_a' )
     self.assertEqual( r.lastmod, '2012-07-25T17:13:46Z' )
     self.assertEqual( r.md5, 'a/Jv1mYBtSjS4LR+qoft/Q==' )
     self.assertEqual( r.length, 20 )
     self.assertEqual( r.path, None )
     r = rli.next()
     self.assertEqual( r.uri, 'http://example.org/t/file_b' )
     self.assertEqual( r.lastmod, '2001-09-09T01:46:40Z' )
     self.assertEqual( r.md5, 'RS5Uva4WJqxdbnvoGzneIQ==' )
     self.assertEqual( r.length, 45 )
     self.assertEqual( r.path, None )
 def test02_no_length(self):
     rlb = ResourceListBuilder(set_length=False)
     rlb.mapper = Mapper(['http://example.org/t','resync/test/testdata/dir1'])
     rl = rlb.from_disk()
     self.assertEqual( len(rl), 2 )
     rli = iter(rl)
     r = rli.next()
     self.assertEqual( r.uri, 'http://example.org/t/file_a' )
     self.assertEqual( r.lastmod, '2012-07-25T17:13:46Z' )
     self.assertEqual( r.md5, None )
     self.assertEqual( r.length, None )
     self.assertEqual( r.path, None )
     r = rli.next()
     self.assertEqual( r.uri, 'http://example.org/t/file_b' )
     self.assertEqual( r.lastmod, '2001-09-09T01:46:40Z' )
     self.assertEqual( r.md5, None )
     self.assertEqual( r.length, None )
     self.assertEqual( r.path, None )
 def test03_set_hashes(self):
     rlb = ResourceListBuilder(set_hashes=['md5'])
     rlb.mapper = Mapper(['http://example.org/t', 'tests/testdata/dir1'])
     rl = rlb.from_disk()
     self.assertEqual(len(rl), 2)
     rli = iter(rl)
     r = next(rli)
     self.assertEqual(r.uri, 'http://example.org/t/file_a')
     self.assertEqual(r.lastmod, '2012-07-25T17:13:46Z')
     self.assertEqual(r.md5, '6bf26fd66601b528d2e0b47eaa87edfd')
     self.assertEqual(r.length, 20)
     self.assertEqual(r.path, None)
     r = next(rli)
     self.assertEqual(r.uri, 'http://example.org/t/file_b')
     self.assertEqual(r.lastmod, '2001-09-09T01:46:40Z')
     self.assertEqual(r.md5, '452e54bdae1626ac5d6e7be81b39de21')
     self.assertEqual(r.length, 45)
     self.assertEqual(r.path, None)
 def test3_with_md5(self):
     rlb = ResourceListBuilder(do_md5=True)
     rlb.mapper = Mapper(["http://example.org/t", "resync/test/testdata/dir1"])
     rl = rlb.from_disk()
     xml = rl.as_xml()
     self.assertNotEqual(
         None,
         re.search(
             '<loc>http://example.org/t/file_a</loc><lastmod>[\w\:\-]+Z</lastmod><rs:md hash="md5:a/Jv1mYBtSjS4LR\+qoft/Q==" length="20" />',
             xml,
         ),
     )  # must escape + in md5
     self.assertNotEqual(
         None,
         re.search(
             '<loc>http://example.org/t/file_b</loc><lastmod>[\w\:\-]+Z</lastmod><rs:md hash="md5:RS5Uva4WJqxdbnvoGzneIQ==" length="45" />',
             xml,
         ),
     )
Exemple #15
0
 def test01_simple_scan(self):
     rlb = ResourceListBuilder()
     rlb.mapper = Mapper(['http://example.org/t', 'tests/testdata/dir1'])
     rl = rlb.from_disk()
     self.assertEqual(len(rl), 2)
     rli = iter(rl)
     r = next(rli)
     self.assertEqual(r.uri, 'http://example.org/t/file_a')
     self.assertEqual(r.lastmod, '2012-07-25T17:13:46Z')
     self.assertEqual(r.md5, None)
     self.assertEqual(r.length, 20)
     self.assertEqual(r.path, None)
     r = next(rli)
     self.assertEqual(r.uri, 'http://example.org/t/file_b')
     self.assertEqual(r.lastmod, '2001-09-09T01:46:40Z')
     self.assertEqual(r.md5, None)
     self.assertEqual(r.length, 45)
     self.assertEqual(r.path, None)
     # Make sure at and completed were set
     self.assertTrue(rl.md_at is not None)
     self.assertTrue(rl.md_completed is not None)
 def test01_simple_scan(self):
     rlb = ResourceListBuilder()
     rlb.mapper = Mapper(['http://example.org/t','resync/test/testdata/dir1'])
     rl = rlb.from_disk()
     self.assertEqual( len(rl), 2 )
     rli = iter(rl)
     r = rli.next()
     self.assertEqual( r.uri, 'http://example.org/t/file_a' )
     self.assertEqual( r.lastmod, '2012-07-25T17:13:46Z' )
     self.assertEqual( r.md5, None )
     self.assertEqual( r.length, 20 )
     self.assertEqual( r.path, None )
     r = rli.next()
     self.assertEqual( r.uri, 'http://example.org/t/file_b' )
     self.assertEqual( r.lastmod, '2001-09-09T01:46:40Z' )
     self.assertEqual( r.md5, None )
     self.assertEqual( r.length, 45 )
     self.assertEqual( r.path, None )
     # Make sure at and completed were set
     self.assertTrue( rl.md_at is not None )
     self.assertTrue( rl.md_completed is not None )