def test05_from_disk_paths(self): rlb = ResourceListBuilder() rlb.mapper = Mapper( ['http://example.org/t', 'resync/test/testdata/dir1']) # no path, should get no resources rl = rlb.from_disk(paths=[]) self.assertEqual(len(rl), 0) # full path, 2 resources rl = rlb.from_disk(paths=['resync/test/testdata/dir1']) self.assertEqual(len(rl), 2) # new object with mapper covering larger space of disk rlb = ResourceListBuilder(set_path=True) rlb.mapper = Mapper(['http://example.org/t', 'resync/test/testdata']) # same path with 2 resources rl = rlb.from_disk(paths=['resync/test/testdata/dir1']) self.assertEqual(len(rl), 2) # same path with 2 resources rl = rlb.from_disk( paths=['resync/test/testdata/dir1', 'resync/test/testdata/dir2']) self.assertEqual(len(rl), 3) # path that is just a single file rl = rlb.from_disk(paths=['resync/test/testdata/dir1/file_a']) self.assertEqual(len(rl), 1) rli = iter(rl) r = rli.next() self.assertTrue(r is not None) self.assertEqual(r.uri, 'http://example.org/t/dir1/file_a') self.assertEqual(r.lastmod, '2012-07-25T17:13:46Z') self.assertEqual(r.md5, None) self.assertEqual(r.length, 20) self.assertEqual(r.path, 'resync/test/testdata/dir1/file_a')
def test06_mapper_unsafe(self): self.assertFalse(Mapper(['http://example.com/=/tmp/a']).unsafe()) self.assertFalse( Mapper(['http://example.com/=http://example.com/']).unsafe()) self.assertFalse( Mapper(['http://example.com/'], use_default_path=True).unsafe()) # Following hits case of single local arg supplied self.assertTrue(Mapper(['/tmp/a'], use_default_path=True).unsafe()) # One good, one bad -> bad self.assertTrue( Mapper(['http://example.com/=/tmp/a', '/tmp/a=/tmp']).unsafe())
def test00_mapper_creation(self): m1 = Mapper(['http://e.org/p/', '/tmp/q/']) self.assertEqual(len(m1), 1) m2 = Mapper(mappings=['http://e.org/p', '/tmp/q']) self.assertEqual(len(m2), 1) self.assertEqual(str(m1), str(m2)) m3 = Mapper(['http://e.org/p/=/tmp/q/']) self.assertEqual(len(m3), 1) self.assertEqual(str(m1), str(m3)) m4 = Mapper(['http://e.org/p/=/tmp/q/', 'http://e.org/r/=/tmp/s/']) m5 = Mapper(['http://e.org/r/=/tmp/s/', 'http://e.org/p/=/tmp/q/']) self.assertEqual(len(m4), 2) self.assertEqual(len(m5), 2) self.assertNotEqual(str(m4), str(m5))
def test_src_to_dst(self): m=Mapper('http://e.org/p','/tmp/q') self.assertEqual( m.src_to_dst('http://e.org/p'), '/tmp/q') self.assertEqual( m.src_to_dst('http://e.org/p/aa'), '/tmp/q/aa') self.assertEqual( m.src_to_dst('http://e.org/p/aa/bb'), '/tmp/q/aa/bb') self.assertEqual( m.src_to_dst('http://e.org/p/aa/bb/'), '/tmp/q/aa/bb/') self.assertEqual( m.src_to_dst('http://e.org/pa'), '/tmp/qa') #should throw error
def test02_mapper_dst_to_src(self): m = Mapper(['http://e.org/p/', '/tmp/q/']) self.assertEqual(m.dst_to_src('/tmp/q/'), 'http://e.org/p/') self.assertEqual(m.dst_to_src('/tmp/q/bb'), 'http://e.org/p/bb') self.assertEqual(m.dst_to_src('/tmp/q/bb/cc'), 'http://e.org/p/bb/cc') self.assertRaises(MapperError, m.dst_to_src, '/tmp/q') self.assertRaises(MapperError, m.dst_to_src, '/tmp/qa') self.assertRaises(MapperError, m.dst_to_src, 'nomatch')
def test1_simple_output(self): ib = InventoryBuilder(verbose=True) ib.mapper = Mapper( ['http://example.org/t', 'resync/test/testdata/dir1']) i = ib.from_disk() self.assertEqual( Sitemap().resources_as_xml(i), '<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/"><url><loc>http://example.org/t/file_a</loc><lastmod>2012-07-25T17:13:46Z</lastmod><rs:size>20</rs:size></url><url><loc>http://example.org/t/file_b</loc><lastmod>2001-09-09T01:46:40Z</lastmod><rs:size>45</rs:size></url></urlset>' )
def test1_simple_output(self): ib = InventoryBuilder(verbose=True) ib.mapper = Mapper( ['http://example.org/t', 'resync/test/testdata/dir1']) i = ib.from_disk() self.assertEqual( Sitemap().inventory_as_xml(i), '<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://resourcesync.org/change/0.1"><url><loc>http://example.org/t/file_a</loc><lastmod>2012-03-14T17:46:04</lastmod><rs:size>20</rs:size></url><url><loc>http://example.org/t/file_b</loc><lastmod>2012-03-14T17:46:25</lastmod><rs:size>45</rs:size></url></urlset>' )
def test01_mapper_src_to_dst(self): m = Mapper(['http://e.org/p/', '/tmp/q/']) self.assertEqual(m.src_to_dst('http://e.org/p/'), '/tmp/q/') self.assertEqual(m.src_to_dst('http://e.org/p/aa'), '/tmp/q/aa') self.assertEqual(m.src_to_dst('http://e.org/p/aa/bb'), '/tmp/q/aa/bb') self.assertEqual(m.src_to_dst('http://e.org/p/aa/bb/'), '/tmp/q/aa/bb/') self.assertRaises(MapperError, m.src_to_dst, 'http://e.org/p') self.assertRaises(MapperError, m.src_to_dst, 'http://e.org/pa') self.assertRaises(MapperError, m.src_to_dst, 'nomatch')
def test04_data(self): rlb = ResourceListBuilder(set_path=True, set_md5=True) rlb.mapper = Mapper(['http://example.org/t', 'tests/testdata/dir1']) rl = rlb.from_disk() self.assertEqual(len(rl), 2) r = rl.resources.get('http://example.org/t/file_a') self.assertTrue(r is not None) self.assertEqual(r.uri, 'http://example.org/t/file_a') self.assertEqual(r.lastmod, '2012-07-25T17:13:46Z') self.assertEqual(r.md5, 'a/Jv1mYBtSjS4LR+qoft/Q==') self.assertEqual(r.path, 'tests/testdata/dir1/file_a')
def test04_data(self): rlb = ResourceListBuilder(set_path=True, set_hashes=['md5']) rlb.mapper = Mapper(['http://example.org/t', 'tests/testdata/dir1']) rl = rlb.from_disk() self.assertEqual(len(rl), 2) r = rl.resources.get('http://example.org/t/file_a') self.assertTrue(r is not None) self.assertEqual(r.uri, 'http://example.org/t/file_a') self.assertEqual(r.lastmod, '2012-07-25T17:13:46Z') self.assertEqual(r.md5, '6bf26fd66601b528d2e0b47eaa87edfd') self.assertEqual(r.path, 'tests/testdata/dir1/file_a')
def test_11_write_multifile(self): tempdir = tempfile.mkdtemp(prefix='test_resource_list_multifile') rl = ResourceList() rl.mapper = Mapper(['http://localhost/=%s/' % (tempdir)]) rl.add(Resource(uri='http://localhost/a')) rl.add(Resource(uri='http://localhost/b')) rl.add(Resource(uri='http://localhost/c')) rl.add(Resource(uri='http://localhost/d')) rl.max_sitemap_entries = 2 # first try writing without mutlifile allowed rl.allow_multifile = False self.assertRaises(ListBaseIndexError, rl.write, basename=os.path.join(tempdir, 'sitemap.xml')) # second actually do it rl.allow_multifile = True rl.write(basename=os.path.join(tempdir, 'sitemap.xml')) # check the two component sitemaps rl1 = ResourceList() rl1.read(os.path.join(tempdir, 'sitemap00000.xml')) self.assertEquals(len(rl1), 2) self.assertEquals(rl1.capability, 'resourcelist') self.assertFalse(rl1.sitemapindex) i = iter(rl1) self.assertEquals(next(i).uri, 'http://localhost/a') self.assertEquals(next(i).uri, 'http://localhost/b') rl2 = ResourceList() rl2.read(os.path.join(tempdir, 'sitemap00001.xml')) self.assertEquals(len(rl2), 2) i = iter(rl2) self.assertEquals(next(i).uri, 'http://localhost/c') self.assertEquals(next(i).uri, 'http://localhost/d') # check the sitemapindex (read just as index) rli = ResourceList() rli.read(os.path.join(tempdir, 'sitemap.xml'), index_only=True) self.assertEquals(len(rli), 2) i = iter(rli) self.assertEquals(rli.capability, 'resourcelist') self.assertTrue(rli.sitemapindex) self.assertEquals(next(i).uri, 'http://localhost/sitemap00000.xml') self.assertEquals(next(i).uri, 'http://localhost/sitemap00001.xml') # check the sitemapindex and components rli = ResourceList(mapper=rl.mapper) rli.read(os.path.join(tempdir, 'sitemap.xml')) self.assertEquals(len(rli), 4) self.assertEquals(rli.capability, 'resourcelist') self.assertFalse(rli.sitemapindex) i = iter(rli) self.assertEquals(next(i).uri, 'http://localhost/a') self.assertEquals(next(i).uri, 'http://localhost/b') self.assertEquals(next(i).uri, 'http://localhost/c') self.assertEquals(next(i).uri, 'http://localhost/d') # cleanup tempdir shutil.rmtree(tempdir)
def test4_data(self): ib = InventoryBuilder(do_md5=True) ib.mapper = Mapper( ['http://example.org/t', 'resync/test/testdata/dir1']) i = ib.from_disk() self.assertEqual(len(i), 2) r1 = i.resources.get('http://example.org/t/file_a') self.assertTrue(r1 is not None) self.assertEqual(r1.uri, 'http://example.org/t/file_a') self.assertEqual(r1.lastmod, '2012-03-14T17:46:04') self.assertEqual(r1.md5, '6bf26fd66601b528d2e0b47eaa87edfd') self.assertEqual(r1.file, 'resync/test/testdata/dir1/file_a')
def test_02_read_with_mapper(self): rl = ResourceList() rl.mapper = Mapper(['http://localhost/=tests/testdata/sitemapindex2/']) rl.read('tests/testdata/sitemapindex2/sitemap_mapper.xml') self.assertEqual(len(rl.resources), 17, '17 resources from 3 sitemaps listed') sr = sorted(rl.uris()) self.assertEqual(sr[0], 'http://localhost:8888/resources/1') self.assertEqual(sr[1], 'http://localhost:8888/resources/10') self.assertEqual(sr[2], 'http://localhost:8888/resources/100') self.assertEqual(sr[3], 'http://localhost:8888/resources/1000') self.assertEqual(sr[16], 'http://localhost:8888/resources/826')
def test4_data(self): ib = InventoryBuilder(do_md5=True) ib.mapper = Mapper( ['http://example.org/t', 'resync/test/testdata/dir1']) i = ib.from_disk() self.assertEqual(len(i), 2) r1 = i.resources.get('http://example.org/t/file_a') self.assertTrue(r1 is not None) self.assertEqual(r1.uri, 'http://example.org/t/file_a') self.assertEqual(r1.lastmod, '2012-07-25T17:13:46Z') self.assertEqual(r1.md5, 'a/Jv1mYBtSjS4LR+qoft/Q==') self.assertEqual(r1.file, 'resync/test/testdata/dir1/file_a')
def test05_path_from_uri(self): m = Mapper() self.assertEqual(m.path_from_uri('a_file'), 'a_file') self.assertEqual(m.path_from_uri('some_path/a_file'), 'some_path/a_file') self.assertEqual(m.path_from_uri('http://localhost/p'), 'localhost_p') self.assertEqual(m.path_from_uri('http://localhost:8888/p'), 'localhost_8888_p') self.assertEqual(m.path_from_uri('https://localhost:8888/p'), 'localhost_8888_p') self.assertEqual(m.path_from_uri('http://example.com'), 'example.com') self.assertEqual(m.path_from_uri('http://example.com/'), 'example.com') self.assertEqual(m.path_from_uri('http://example.com/ex1'), 'example.com_ex1') self.assertEqual(m.path_from_uri('http://example.com/ex1/'), 'example.com_ex1')
def test06_odd_file_names(self): """Verfify we can read unicode file names properly.""" rlb = ResourceListBuilder() rlb.mapper = Mapper(['x:', 'tests/testdata/odd_file_names']) rl = rlb.from_disk(paths=['tests/testdata/odd_file_names']) # Get list of URIs to test uris = [x.uri for x in rl] self.assertTrue('x:/not_odd.txt' in uris) self.assertTrue('x:/with&ersand.txt' in uris) self.assertTrue('x:/with spaces.txt' in uris) # File names for accented chars represented with combining chars self.assertTrue(u'x:/Pi\u006e\u0303a_Colada.txt' in uris) self.assertFalse(u'x:/Pi\u00f1a_Colada.txt' in uris) self.assertTrue(u'x:/A_\u0041\u0303_tilde.txt' in uris) self.assertFalse(u'x:/A_\u00c3_tilde.txt' in uris) # Snowman is single char self.assertFalse(u'x:snowman_\u2603.txt' in uris)
def test3_with_md5(self): ib = InventoryBuilder(do_md5=True) ib.mapper = Mapper( ['http://example.org/t', 'resync/test/testdata/dir1']) i = ib.from_disk() s = Sitemap() xml = s.inventory_as_xml(i) self.assertNotEqual( None, re.search( '<loc>http://example.org/t/file_a</loc><lastmod>[\w\:\-]+</lastmod><rs:size>20</rs:size><rs:md5>6bf26fd66601b528d2e0b47eaa87edfd</rs:md5>', xml), 'size/checksum for file_a') self.assertNotEqual( None, re.search( '<loc>http://example.org/t/file_b</loc><lastmod>[\w\:\-]+</lastmod><rs:size>45</rs:size><rs:md5>452e54bdae1626ac5d6e7be81b39de21</rs:md5>', xml), 'size/checksum for file_b')
def test3_with_md5(self): ib = InventoryBuilder(do_md5=True) ib.mapper = Mapper( ['http://example.org/t', 'resync/test/testdata/dir1']) i = ib.from_disk() s = Sitemap() xml = s.resources_as_xml(i) self.assertNotEqual( None, re.search( '<loc>http://example.org/t/file_a</loc><lastmod>[\w\:\-]+Z</lastmod><rs:size>20</rs:size><rs:fixity type="md5">a/Jv1mYBtSjS4LR\+qoft/Q==</rs:fixity>', xml)) #must escape + in md5 self.assertNotEqual( None, re.search( '<loc>http://example.org/t/file_b</loc><lastmod>[\w\:\-]+Z</lastmod><rs:size>45</rs:size><rs:fixity type="md5">RS5Uva4WJqxdbnvoGzneIQ==</rs:fixity>', xml))
def test03_set_hashes(self): rlb = ResourceListBuilder(set_hashes=['md5']) rlb.mapper = Mapper(['http://example.org/t', 'tests/testdata/dir1']) rl = rlb.from_disk() self.assertEqual(len(rl), 2) rli = iter(rl) r = next(rli) self.assertEqual(r.uri, 'http://example.org/t/file_a') self.assertEqual(r.lastmod, '2012-07-25T17:13:46Z') self.assertEqual(r.md5, '6bf26fd66601b528d2e0b47eaa87edfd') self.assertEqual(r.length, 20) self.assertEqual(r.path, None) r = next(rli) self.assertEqual(r.uri, 'http://example.org/t/file_b') self.assertEqual(r.lastmod, '2001-09-09T01:46:40Z') self.assertEqual(r.md5, '452e54bdae1626ac5d6e7be81b39de21') self.assertEqual(r.length, 45) self.assertEqual(r.path, None)
def test03_set_md5(self): rlb = ResourceListBuilder(set_md5=True) rlb.mapper = Mapper(['http://example.org/t', 'tests/testdata/dir1']) rl = rlb.from_disk() self.assertEqual(len(rl), 2) rli = iter(rl) r = next(rli) self.assertEqual(r.uri, 'http://example.org/t/file_a') self.assertEqual(r.lastmod, '2012-07-25T17:13:46Z') self.assertEqual(r.md5, 'a/Jv1mYBtSjS4LR+qoft/Q==') self.assertEqual(r.length, 20) self.assertEqual(r.path, None) r = next(rli) self.assertEqual(r.uri, 'http://example.org/t/file_b') self.assertEqual(r.lastmod, '2001-09-09T01:46:40Z') self.assertEqual(r.md5, 'RS5Uva4WJqxdbnvoGzneIQ==') self.assertEqual(r.length, 45) self.assertEqual(r.path, None)
def test02_no_length(self): rlb = ResourceListBuilder(set_length=False) rlb.mapper = Mapper(['http://example.org/t', 'tests/testdata/dir1']) rl = rlb.from_disk() self.assertEqual(len(rl), 2) rli = iter(rl) r = next(rli) self.assertEqual(r.uri, 'http://example.org/t/file_a') self.assertEqual(r.lastmod, '2012-07-25T17:13:46Z') self.assertEqual(r.md5, None) self.assertEqual(r.length, None) self.assertEqual(r.path, None) r = next(rli) self.assertEqual(r.uri, 'http://example.org/t/file_b') self.assertEqual(r.lastmod, '2001-09-09T01:46:40Z') self.assertEqual(r.md5, None) self.assertEqual(r.length, None) self.assertEqual(r.path, None)
def test01_simple_scan(self): rlb = ResourceListBuilder() rlb.mapper = Mapper(['http://example.org/t', 'tests/testdata/dir1']) rl = rlb.from_disk() self.assertEqual(len(rl), 2) rli = iter(rl) r = next(rli) self.assertEqual(r.uri, 'http://example.org/t/file_a') self.assertEqual(r.lastmod, '2012-07-25T17:13:46Z') self.assertEqual(r.md5, None) self.assertEqual(r.length, 20) self.assertEqual(r.path, None) r = next(rli) self.assertEqual(r.uri, 'http://example.org/t/file_b') self.assertEqual(r.lastmod, '2001-09-09T01:46:40Z') self.assertEqual(r.md5, None) self.assertEqual(r.length, 45) self.assertEqual(r.path, None) # Make sure at and completed were set self.assertTrue(rl.md_at is not None) self.assertTrue(rl.md_completed is not None)
def __init__(self, checksum=False, verbose=False, dryrun=False): super(Client, self).__init__() self.checksum = checksum self.verbose = verbose self.dryrun = dryrun self.logger = logging.getLogger('resync.client') self.mapper = Mapper() self.resource_list_name = 'resourcelist.xml' self.change_list_name = 'changelist.xml' self.dump_format = None self.exclude_patterns = [] self.sitemap_name = None self.allow_multifile = True self.noauth = False self.strictauth = False self.max_sitemap_entries = None self.ignore_failures = False self.pretty_xml = True # Default file names self.status_file = '.resync-client-status.cfg' self.default_resource_dump = 'resourcedump.zip' self.default_change_dump = 'changedump.zip'
def test00_mapper_creation(self): m1 = Mapper(['http://e.org/p/', '/tmp/q/']) self.assertEqual(len(m1), 1) m2 = Mapper(mappings=['http://e.org/p', '/tmp/q']) self.assertEqual(len(m2), 1) self.assertEqual(str(m1), str(m2)) m3 = Mapper(['http://e.org/p/=/tmp/q/']) self.assertEqual(len(m3), 1) self.assertEqual(str(m1), str(m3)) m4 = Mapper(['http://e.org/p/=/tmp/q/', 'http://e.org/r/=/tmp/s/']) m5 = Mapper(['http://e.org/r/=/tmp/s/', 'http://e.org/p/=/tmp/q/']) self.assertEqual(len(m4), 2) self.assertEqual(len(m5), 2) self.assertNotEqual(str(m4), str(m5)) # error cases m6 = Mapper() # too many equals self.assertRaises(MapperError, m6.parse, ['a=b=c']) self.assertRaises(MapperError, m6.parse, ['a=b=c=d']) # dupes self.assertRaises(MapperError, m6.parse, ['a=b', 'a=c']) self.assertRaises(MapperError, m6.parse, ['x=z', 'y=z'])
def test04_mapper2_dst_to_src(self): m = Mapper(['http://e.org/p=/tmp/q', 'http://e.org/r=/tmp/s']) self.assertEqual(m.dst_to_src('/tmp/q/'), 'http://e.org/p/') self.assertEqual(m.dst_to_src('/tmp/q/bb'), 'http://e.org/p/bb') self.assertEqual(m.dst_to_src('/tmp/s/'), 'http://e.org/r/') self.assertEqual(m.dst_to_src('/tmp/s/bb'), 'http://e.org/r/bb')
def sync_or_audit(self, src_uri, dst_path, allow_deletion=False, audit_only=False): ### 1. Get inventorys from both src and dst # 1.a source inventory ib = InventoryBuilder() try: src_inventory = ib.get(src_uri) except IOError as e: raise ClientFatalError("Can't read source inventory (%s)" % str(e)) if (self.verbose): print "Read src inventory from %s, %d resources listed" % ( src_uri, len(src_inventory)) if (len(src_inventory) == 0): raise ClientFatalError( "Aborting as there are no resources to sync") if (self.checksum and not src_inventory.has_md5()): self.checksum = False print "Not calculating checksums on destination as not present in source inventory" # 1.b destination inventory mapped back to source URIs segments = src_uri.split('/') segments.pop() url_prefix = '/'.join(segments) ib.do_md5 = self.checksum dst_inventory = ib.from_disk(dst_path, url_prefix) ### 2. Compare these inventorys respecting any comparison options (num_same, changed, deleted, added) = dst_inventory.compare(src_inventory) ### 3. Report status and planned actions status = " IN SYNC " if (len(changed) > 0 or len(deleted) > 0 or len(added) > 0): status = "NOT IN SYNC" print "Status: %s (same=%d, changed=%d, deleted=%d, added=%d)" %\ (status,num_same,len(changed),len(deleted),len(added)) if (audit_only): return ### 4. Grab files to do sync mapper = Mapper(url_prefix, dst_path) for uri in changed: file = mapper.src_to_dst(uri) if (self.verbose): print "changed: %s -> %s" % (uri, file) self.update_resource(uri, file, src_inventory.resources[uri].timestamp) for uri in added: file = mapper.src_to_dst(uri) if (self.verbose): print "added: %s -> %s" % (uri, file) self.update_resource(uri, file, src_inventory.resources[uri].timestamp) for uri in deleted: if (allow_deletion): file = mapper.src_to_dst(uri) if (self.verbose): print "deleted: %s -> %s" % (uri, file) os.unlink(file) else: if (self.verbose): print "would delete %s (--delete to enable)" % uri
def set_mappings(self, mappings): """Build and set Mapper object based on input mappings""" self.mapper = Mapper(mappings, use_default_path=True)
def test_dst_to_src(self): m=Mapper('http://e.org/p','/tmp/q') self.assertEqual( m.dst_to_src('/tmp/q'), 'http://e.org/p') self.assertEqual( m.dst_to_src('/tmp/q/bb'), 'http://e.org/p/bb') self.assertEqual( m.dst_to_src('/tmp/q/bb/cc'), 'http://e.org/p/bb/cc')
def test03_mapper2_src_to_dst(self): m = Mapper(['http://e.org/p=/tmp/q', 'http://e.org/r=/tmp/s']) self.assertEqual(m.src_to_dst('http://e.org/p/'), '/tmp/q/') self.assertEqual(m.src_to_dst('http://e.org/p/aa'), '/tmp/q/aa') self.assertEqual(m.src_to_dst('http://e.org/r/'), '/tmp/s/') self.assertEqual(m.src_to_dst('http://e.org/r/aa'), '/tmp/s/aa')
def test07_default_src_uri(self): self.assertEqual(Mapper(['a=b']).default_src_uri(), 'a') self.assertEqual(Mapper(['a=b', 'b=c']).default_src_uri(), 'a') self.assertRaises(MapperError, Mapper().default_src_uri)