def test13_mime_type(self): r = Resource(uri='tv1', mime_type='text/plain') self.assertEqual( r.mime_type, 'text/plain' ) r.mime_type = None self.assertEqual( r.mime_type, None ) r = Resource(uri='tv2') self.assertEqual( r.mime_type, None )
def test_01_resource_str(self): r1 = Resource("a3") r1.lastmod = "2012-01-11T01:02:03" self.assertEqual( Sitemap().resource_as_xml(r1), "<?xml version='1.0' encoding='UTF-8'?>\n<url><loc>a3</loc><lastmod>2012-01-11T01:02:03</lastmod></url>", )
def test1c_same(self): """Same with lastmod instead of direct timestamp""" r1 = Resource("a") r1lm = "2012-01-01T00:00:00Z" r1.lastmod = r1lm r2 = Resource("a") for r2lm in ( "2012", "2012-01", "2012-01-01", "2012-01-01T00:00Z", "2012-01-01T00:00:00Z", "2012-01-01T00:00:00.000000Z", "2012-01-01T00:00:00.000000000000Z", "2012-01-01T00:00:00.000000000001Z", # below resolution "2012-01-01T00:00:00.00+00:00", "2012-01-01T00:00:00.00-00:00", "2012-01-01T02:00:00.00-02:00", "2011-12-31T23:00:00.00+01:00", ): r2.lastmod = r2lm self.assertEqual( r1.timestamp, r2.timestamp, ("%s (%f) == %s (%f)" % (r1lm, r1.timestamp, r2lm, r2.timestamp)) ) self.assertEqual(r1, r2)
def test1d_same(self): """Same with slight timestamp diff""" r1 = Resource("a") r1.lastmod = "2012-01-02T01:02:03Z" r2 = Resource("a") r2.lastmod = "2012-01-02T01:02:03.99Z" self.assertNotEqual(r1.timestamp, r2.timestamp) self.assertEqual(r1, r2)
def test01d_same(self): """Same with slight timestamp diff""" r1 = Resource('a') r1.lastmod='2012-01-02T01:02:03Z' r2 = Resource('a') r2.lastmod='2012-01-02T01:02:03.99Z' self.assertNotEqual( r1.timestamp, r2.timestamp ) self.assertEqual( r1, r2 )
def test_08_print_non_ascii_uri(self): """Verify that valid Unicode uri values give good XML out.""" m = ResourceList(md={'capability': 'resourcelist', 'modified': None}) m.add(Resource(uri=u'a_\u00c3_b')) m.add(Resource(uri=u'c_\u1234_d')) xml = Sitemap().resources_as_xml(m) self.assertTrue(re.search(u'<loc>a_.*_b</loc>', xml)) self.assertTrue(re.search(u'<loc>a_\u00c3_b</loc>', xml)) self.assertTrue(re.search(u'<loc>c_\u1234_d</loc>', xml))
def test_build_archives_ex_3_2(self): """Resource List Archive Index listing 2 component Resource List Archives""" rlai = ResourceListArchive() rlai.sitemapindex = True rlai.up = 'http://example.com/dataset1/capabilitylist.xml' rlai.add(Resource(uri='http://example.com/resourcelistarchive00001.xml')) rlai.add(Resource(uri='http://example.com/resourcelistarchive00002.xml')) ex_xml = self._open_ex('archives_ex_3_2').read() self._assert_xml_equal(rlai.as_xml(), ex_xml)
def test_03_resource_str_hashes(self): r1 = Resource('03hashes',1234.1) r1.md5 = 'aaa' r1.sha1 = 'bbb' self.assertEqual( Sitemap().resource_as_xml(r1), "<url><loc>03hashes</loc><lastmod>1970-01-01T00:20:34.100000Z</lastmod><rs:md hash=\"md5:aaa sha-1:bbb\" /></url>" ) r1.sha256 = 'ccc' self.assertEqual( Sitemap().resource_as_xml(r1), "<url><loc>03hashes</loc><lastmod>1970-01-01T00:20:34.100000Z</lastmod><rs:md hash=\"md5:aaa sha-1:bbb sha-256:ccc\" /></url>" ) r1.sha1 = None self.assertEqual( Sitemap().resource_as_xml(r1), "<url><loc>03hashes</loc><lastmod>1970-01-01T00:20:34.100000Z</lastmod><rs:md hash=\"md5:aaa sha-256:ccc\" /></url>" )
def test02_dump_check_files(self): cl = ChangeList() cl.add(Resource('http://ex.org/a', length=7, path='tests/testdata/a', change="updated")) cl.add(Resource('http://ex.org/b', length=21, path='tests/testdata/b', change="updated")) d = Dump(resources=cl) self.assertTrue(d.check_files()) self.assertEqual(d.total_size, 28)
def test10_no_path(self): rl = ResourceList() rl.add( Resource('http://ex.org/a', length=7, path='resync/test/testdata/a')) rl.add(Resource('http://ex.org/b', length=21)) d = Dump(rl) self.assertRaises(DumpError, d.check_files)
def test6_has_md5(self): r1 = Resource(uri="a") r2 = Resource(uri="b") m = Inventory() self.assertFalse(m.has_md5()) m.add(r1) m.add(r2) self.assertFalse(m.has_md5()) r1.md5 = "aabbcc" self.assertTrue(m.has_md5())
def test07_has_md5(self): r1 = Resource(uri='a') r2 = Resource(uri='b') i = ResourceList() self.assertFalse( i.has_md5() ) i.add(r1) i.add(r2) self.assertFalse( i.has_md5() ) r1.md5="aabbcc" self.assertTrue( i.has_md5() )
def test09_changetypeerror(self): r1 = Resource('a') self.assertEqual(r1.change, None) r1.change = 'deleted' self.assertEqual(r1.change, 'deleted') self.assertRaises(ChangeTypeError, Resource, 'a', change="bad") # disable checking Resource.CHANGE_TYPES = False r1 = Resource('a', change="bad") self.assertEqual(r1.change, 'bad')
def test02_order(self): rs = ResourceSet() rs.add(Resource('a2')) rs.add(Resource('a3')) rs.add(Resource('a1')) i = iter(rs) self.assertEqual(next(i).uri, 'a1') self.assertEqual(next(i).uri, 'a2') self.assertEqual(next(i).uri, 'a3') self.assertRaises(StopIteration, next, i)
def test_09_print_subset(self): r1 = Resource(uri='a',lastmod='2001-01-01',size=1234) r2 = Resource(uri='b',lastmod='2002-02-02',size=56789) r3 = Resource(uri='c',lastmod='2003-03-03',size=0) r3 = Resource(uri='d',lastmod='2003-03-04',size=444) m = Inventory() m.add(r1) m.add(r2) m.add(r3) self.assertEqual( Sitemap().inventory_as_xml(m, entries=['d','b']), "<?xml version='1.0' encoding='UTF-8'?>\n<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" xmlns:rs=\"http://resourcesync.org/change/0.1\"><url><loc>d</loc><lastmod>2003-03-04T00:00:00</lastmod><rs:size>444</rs:size></url><url><loc>b</loc><lastmod>2002-02-02T00:00:00</lastmod><rs:size>56789</rs:size></url></urlset>")
def test6_has_md5(self): r1 = Resource(uri='a') r2 = Resource(uri='b') m = Inventory() self.assertFalse(m.has_md5()) m.add(r1) m.add(r2) self.assertFalse(m.has_md5()) r1.md5 = "aabbcc" self.assertTrue(m.has_md5())
def test01_print(self): lb = ListBaseWithIndex() lb.add(Resource(uri='a', lastmod='2001-01-01', length=1234)) lb.add(Resource(uri='b', lastmod='2002-02-02', length=56789)) lb.add(Resource(uri='c', lastmod='2003-03-03', length=0)) lb.md['from'] = None #avoid now being added self.assertEqual( lb.as_xml(), '<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/"><rs:md capability="unknown" /><url><loc>a</loc><lastmod>2001-01-01T00:00:00Z</lastmod><rs:md length="1234" /></url><url><loc>b</loc><lastmod>2002-02-02T00:00:00Z</lastmod><rs:md length="56789" /></url><url><loc>c</loc><lastmod>2003-03-03T00:00:00Z</lastmod><rs:md length="0" /></url></urlset>' )
def test11_md_completed_roundtrips(self): r = Resource('a') r.md_completed='2013-04-14' self.assertEqual( r.md_completed, '2013-04-14T00:00:00Z' ) r.md_completed='2013-04-14T00:00:00+00:00' self.assertEqual( r.md_completed, '2013-04-14T00:00:00Z' ) r.md_completed='2013-04-14T00:00:00-00:00' self.assertEqual( r.md_completed, '2013-04-14T00:00:00Z' ) r.md_completed='2013-04-14T18:37:36Z' self.assertEqual( r.md_completed, '2013-04-14T18:37:36Z' )
def test6_has_md5(self): r1 = Resource(uri='a') r2 = Resource(uri='b') i = ResourceList() self.assertFalse( i.has_md5() ) i.add(r1) i.add(r2) self.assertFalse( i.has_md5() ) r1.md5="aabbcc" self.assertTrue( i.has_md5() )
def test09_changetypeerror(self): r1 = Resource('a') self.assertEqual( r1.change, None ) r1.change = 'deleted' self.assertEqual( r1.change, 'deleted' ) self.assertRaises( ChangeTypeError, Resource, 'a', change="bad" ) # disable checking Resource.CHANGE_TYPES = False r1 = Resource( 'a', change="bad" ) self.assertEqual( r1.change, 'bad' )
def test10_md_at_roundtrips(self): r = Resource('a') r.md_at='2013-03-14' self.assertEqual( r.md_at, '2013-03-14T00:00:00Z' ) r.md_at='2013-03-14T00:00:00+00:00' self.assertEqual( r.md_at, '2013-03-14T00:00:00Z' ) r.md_at='2013-03-14T00:00:00-00:00' self.assertEqual( r.md_at, '2013-03-14T00:00:00Z' ) r.md_at='2013-03-14T18:37:36Z' self.assertEqual( r.md_at, '2013-03-14T18:37:36Z' )
def test_build_ex_17(self): """Resource Dump with 3 entries and some metadata""" rd = ResourceDump() rd.up='http://example.com/dataset1/capabilitylist.xml' rd.md_at="2013-01-03T09:00:00Z" rd.md_completed="2013-01-03T09:04:00Z" z1 = Resource( uri='http://example.com/resourcedump-part1.zip', mime_type="application/zip", length=4765, md_at="2013-01-03T09:00:00Z", md_completed="2013-01-03T09:02:00Z" ) z1.link_set( rel="contents", href="http://example.com/resourcedump_manifest-part1.xml", mime_type="application/xml" ) rd.add( z1 ) z2 = Resource( uri='http://example.com/resourcedump-part2.zip', mime_type="application/zip", length=9875, md_at="2013-01-03T09:01:00Z", md_completed="2013-01-03T09:03:00Z" ) z2.link_set( rel="contents", href="http://example.com/resourcedump_manifest-part2.xml", mime_type="application/xml" ) rd.add( z2 ) z3 = Resource( uri='http://example.com/resourcedump-part3.zip', mime_type="application/zip", length=2298, md_at="2013-01-03T09:03:00Z", md_completed="2013-01-03T09:04:00Z" ) z3.link_set( rel="contents", href="http://example.com/resourcedump_manifest-part3.xml", mime_type="application/xml" ) rd.add( z3 ) ex_xml = self._open_ex('resourcesync_ex_17').read() self._assert_xml_equal( rd.as_xml(), ex_xml )
def test_build_ex_22(self): """Change Dump with three dump files""" cd = ChangeDump() cd.up = 'http://example.com/dataset1/capabilitylist.xml' cd.md_from="2013-01-01T00:00:00Z" z1 = Resource( uri='http://example.com/20130101-changedump.zip', lastmod='2013-01-01T23:59:59Z', length=3109, md_from="2013-01-01T00:00:00Z", md_until="2013-01-02T00:00:00Z", mime_type="application/zip" ) z1.contents='http://example.com/20130101-changedump-manifest.xml' z2 = Resource( uri='http://example.com/20130102-changedump.zip', lastmod='2013-01-02T23:59:59Z', length=6629, md_from="2013-01-02T00:00:00Z", md_until="2013-01-03T00:00:00Z", mime_type="application/zip" ) z2.contents='http://example.com/20130102-changedump-manifest.xml' z3 = Resource( uri='http://example.com/20130103-changedump.zip', lastmod='2013-01-03T23:59:59Z', length=8124, md_from="2013-01-03T00:00:00Z", md_until="2013-01-04T00:00:00Z", mime_type="application/zip" ) z3.contents='http://example.com/20130103-changedump-manifest.xml' cd.add( [z1, z2, z3] ) ex_xml = self._open_ex('resourcesync_ex_22').read() self._assert_xml_equal( cd.as_xml(), ex_xml )
def test18_update_resource(self): c = Client() resource = Resource(uri='http://example.org/dir/2') filename = os.path.join(self.tmpdir, 'dir/resource2') # dryrun with LogCapture() as lc: c.dryrun = True c.logger = logging.getLogger('resync.client') n = c.update_resource(resource, filename) self.assertEqual(n, 0) self.assertTrue(lc.records[-1].msg.startswith( 'dryrun: would GET http://example.org/dir/2 ')) c.dryrun = False # get from file uri that does not exist resource = Resource(uri='file:tests/testdata/i_do_not_exist') self.assertRaises(ClientFatalError, c.update_resource, resource, filename) # get from file uri that does not exist but with c.ignore_failures to # log resource = Resource(uri='file:tests/testdata/i_do_not_exist') with LogCapture() as lc: c.logger = logging.getLogger('resync.client') c.ignore_failures = True n = c.update_resource(resource, filename) self.assertEqual(n, 0) self.assertTrue(lc.records[-1].msg.startswith( 'Failed to GET file:tests/testdata/i_do_not_exist ')) # get from file uri resource = Resource( uri='file:tests/testdata/examples_from_spec/resourcesync_ex_1.xml', length=355, md5='abc', timestamp=10) c.last_timestamp = 0 with LogCapture() as lc: c.logger = logging.getLogger('resync.client') n = c.update_resource(resource, filename) self.assertEqual(n, 1) self.assertTrue(lc.records[-1].msg.startswith('Event: {')) # get from file uri with length and md5 warnings resource = Resource( uri='file:tests/testdata/examples_from_spec/resourcesync_ex_1.xml', length=111, md5='abc', timestamp=10) c.last_timestamp = 0 with LogCapture() as lc: c.logger = logging.getLogger('resync.client') c.hashes = set(['md5']) n = c.update_resource(resource, filename) self.assertEqual(n, 1) self.assertTrue(lc.records[-1].msg.startswith('MD5 mismatch ')) self.assertTrue( lc.records[-2].msg.startswith('Downloaded size for ')) self.assertTrue(lc.records[-3].msg.startswith('Event: {'))
def test_04_resource_str(self): r1 = Resource(uri='4a',lastmod="2013-01-02",length=9999,md5='ab54de') r1.ln = [{ 'rel':'duplicate', 'pri':'1', 'href':'http://mirror1.example.com/res1', 'modified':'2013-01-02T18:00:00Z' }] self.assertEqual( Sitemap().resource_as_xml(r1), "<url><loc>4a</loc><lastmod>2013-01-02T00:00:00Z</lastmod><rs:md hash=\"md5:ab54de\" length=\"9999\" /><rs:ln href=\"http://mirror1.example.com/res1\" modified=\"2013-01-02T18:00:00Z\" pri=\"1\" rel=\"duplicate\" /></url>" ) # add another two rs:ln's r1.ln.append( { 'rel':'num2' } ) r1.ln.append( { 'rel':'num3' } ) self.assertEqual( Sitemap().resource_as_xml(r1), "<url><loc>4a</loc><lastmod>2013-01-02T00:00:00Z</lastmod><rs:md hash=\"md5:ab54de\" length=\"9999\" /><rs:ln href=\"http://mirror1.example.com/res1\" modified=\"2013-01-02T18:00:00Z\" pri=\"1\" rel=\"duplicate\" /><rs:ln rel=\"num2\" /><rs:ln rel=\"num3\" /></url>" )
def test20_as_xml(self): rl = ResourceList() rl.add(Resource('a', timestamp=1)) rl.add(Resource('b', timestamp=2)) xml = rl.as_xml() self.assertTrue(re.search(r'<rs:md .*capability="resourcelist"', xml), 'XML has capability') self.assertTrue( re.search( r'<url><loc>a</loc><lastmod>1970-01-01T00:00:01Z</lastmod></url>', xml), 'XML has resource a')
def test_09_print_from_iter(self): r1 = Resource(uri='a', lastmod='2001-01-01', length=1234) r2 = Resource(uri='b', lastmod='2002-02-02', length=56789) m = ResourceList() m.add(r1) m.add(r2) i = iter(m) self.assertEqual( Sitemap().resources_as_xml(i), "<?xml version='1.0' encoding='UTF-8'?>\n<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" xmlns:rs=\"http://www.openarchives.org/rs/terms/\"><url><loc>a</loc><lastmod>2001-01-01T00:00:00Z</lastmod><rs:md length=\"1234\" /></url><url><loc>b</loc><lastmod>2002-02-02T00:00:00Z</lastmod><rs:md length=\"56789\" /></url></urlset>" )
def test01_as_xml(self): rd = ResourceDump() rd.add(Resource('a.zip', timestamp=1)) rd.add(Resource('b.zip', timestamp=2)) xml = rd.as_xml() self.assertTrue(re.search(r'<rs:md .*capability="resourcedump"', xml), 'XML has capability') self.assertTrue( re.search( r'<url><loc>a.zip</loc><lastmod>1970-01-01T00:00:01Z</lastmod></url>', xml), 'XML has resource a')
def test03_with_repeats_again(self): r1 = Resource(uri='a',length=1,change='created') r2 = Resource(uri='b',length=2,change='created') i = ChangeList() i.add(r1) i.add(r2) self.assertEqual( len(i), 2 ) # Can add another Resource with same URI r1d = Resource(uri='a',length=10,change='created') i.add(r1d) self.assertEqual( len(i), 3 )
def test_11_write_multifile(self): tempdir = tempfile.mkdtemp(prefix='test_resource_list_multifile') rl = ResourceList() rl.mapper = Mapper(['http://localhost/=%s/' % (tempdir)]) rl.add(Resource(uri='http://localhost/a')) rl.add(Resource(uri='http://localhost/b')) rl.add(Resource(uri='http://localhost/c')) rl.add(Resource(uri='http://localhost/d')) rl.max_sitemap_entries = 2 # first try writing without mutlifile allowed rl.allow_multifile = False self.assertRaises(ListBaseIndexError, rl.write, basename=os.path.join(tempdir, 'sitemap.xml')) # second actually do it rl.allow_multifile = True rl.write(basename=os.path.join(tempdir, 'sitemap.xml')) # check the two component sitemaps rl1 = ResourceList() rl1.read(os.path.join(tempdir, 'sitemap00000.xml')) self.assertEquals(len(rl1), 2) self.assertEquals(rl1.capability, 'resourcelist') self.assertFalse(rl1.sitemapindex) i = iter(rl1) self.assertEquals(next(i).uri, 'http://localhost/a') self.assertEquals(next(i).uri, 'http://localhost/b') rl2 = ResourceList() rl2.read(os.path.join(tempdir, 'sitemap00001.xml')) self.assertEquals(len(rl2), 2) i = iter(rl2) self.assertEquals(next(i).uri, 'http://localhost/c') self.assertEquals(next(i).uri, 'http://localhost/d') # check the sitemapindex (read just as index) rli = ResourceList() rli.read(os.path.join(tempdir, 'sitemap.xml'), index_only=True) self.assertEquals(len(rli), 2) i = iter(rli) self.assertEquals(rli.capability, 'resourcelist') self.assertTrue(rli.sitemapindex) self.assertEquals(next(i).uri, 'http://localhost/sitemap00000.xml') self.assertEquals(next(i).uri, 'http://localhost/sitemap00001.xml') # check the sitemapindex and components rli = ResourceList(mapper=rl.mapper) rli.read(os.path.join(tempdir, 'sitemap.xml')) self.assertEquals(len(rli), 4) self.assertEquals(rli.capability, 'resourcelist') self.assertFalse(rli.sitemapindex) i = iter(rli) self.assertEquals(next(i).uri, 'http://localhost/a') self.assertEquals(next(i).uri, 'http://localhost/b') self.assertEquals(next(i).uri, 'http://localhost/c') self.assertEquals(next(i).uri, 'http://localhost/d') # cleanup tempdir shutil.rmtree(tempdir)
def test_build_ex_07(self): """A Source Description document """ sd = SourceDescription() sd.describedby = 'http://example.com/info-about-source.xml' r = Resource( uri='http://example.com/dataset1/capabilitylist.xml', capability='capabilitylist' ) r.link_set( rel='describedby', href='http://example.com/info_about_set1_of_resources.xml' ) sd.add( r ) ex_xml = self._open_ex('resourcesync_ex_7').read() self._assert_xml_equal( sd.as_xml(), ex_xml )
def test2_iter(self): rc = ResourceContainer(resources=[]) rc.resources.append(Resource('a', timestamp=1)) rc.resources.append(Resource('b', timestamp=2)) rc.resources.append(Resource('c', timestamp=3)) rc.resources.append(Resource('d', timestamp=4)) resources = [] for r in rc: resources.append(r) self.assertEqual(len(resources), 4) self.assertEqual(resources[0].uri, 'a') self.assertEqual(resources[3].uri, 'd')
def test_build_archives_ex_3_1(self): """Resource List Archive listing 3 Resource Lists""" rla = ResourceListArchive() rla.up = 'http://example.com/dataset1/capabilitylist.xml' rla.add( Resource( uri='http://example.com/resourcelist1.xml', md_at='2012-11-03T09:00:00Z') ) rla.add( Resource( uri='http://example.com/resourcelist2.xml', md_at='2012-12-03T09:00:00Z') ) rla.add( Resource( uri='http://example.com/resourcelist3.xml', md_at='2013-01-03T09:00:00Z') ) ex_xml = self._open_ex('archives_ex_3_1').read() self._assert_xml_equal( rla.as_xml(), ex_xml )
def test08_iter(self): i = ResourceList() i.add(Resource('a', timestamp=1)) i.add(Resource('b', timestamp=2)) i.add(Resource('c', timestamp=3)) i.add(Resource('d', timestamp=4)) resources = [] for r in i: resources.append(r) self.assertEqual(len(resources), 4) self.assertEqual(resources[0].uri, 'a') self.assertEqual(resources[3].uri, 'd')
def test1_same(self): src = Inventory() src.add(Resource('a', timestamp=1)) src.add(Resource('b', timestamp=2)) dst = Inventory() dst.add(Resource('a', timestamp=1)) dst.add(Resource('b', timestamp=2)) (num_same, changed, deleted, added) = dst.compare(src) self.assertEqual(num_same, 2, "2 things unchanged") self.assertEqual(changed, [], "nothing changed") self.assertEqual(deleted, [], "nothing deleted") self.assertEqual(added, [], "nothing added")
def test19_delete_resource(self): c = Client() resource = Resource(uri='http://example.org/1') filename = os.path.join(self.tmpdir, 'resource1') c.last_timestamp = 5 # no delete, no timestamp update with LogCapture() as lc: c.logger = logging.getLogger('resync.client') n = c.delete_resource(resource, filename) self.assertEqual(n, 0) self.assertEqual( lc.records[-1].msg, 'nodelete: would delete http://example.org/1 (--delete to enable)' ) self.assertEqual(c.last_timestamp, 5) # no delete but timestamp update resource.timestamp = 10 with LogCapture() as lc: c.logger = logging.getLogger('resync.client') n = c.delete_resource(resource, filename) self.assertEqual(n, 0) self.assertEqual( lc.records[-1].msg, 'nodelete: would delete http://example.org/1 (--delete to enable)' ) self.assertEqual(c.last_timestamp, 10) # allow delete but dryrun with LogCapture() as lc: c.dryrun = True c.logger = logging.getLogger('resync.client') n = c.delete_resource(resource, filename, allow_deletion=True) self.assertEqual(n, 0) self.assertTrue(lc.records[-1].msg.startswith( 'dryrun: would delete http://example.org/1')) c.dryrun = False # allow delete but no resource present with LogCapture() as lc: c.logger = logging.getLogger('resync.client') n = c.delete_resource(resource, filename, allow_deletion=True) self.assertEqual(n, 0) self.assertTrue(lc.records[-1].msg.startswith( 'Failed to DELETE http://example.org/1')) # successful deletion, first make file... with open(filename, 'w') as fh: fh.write('delete me') fh.close() with LogCapture() as lc: c.logger = logging.getLogger('resync.client') n = c.delete_resource(resource, filename, allow_deletion=True) self.assertEqual(n, 1) self.assertTrue(lc.records[-1].msg.startswith('Event: {')) self.assertTrue(lc.records[-2].msg.startswith( 'deleted: http://example.org/1 ->'))
def test5_add_iterable(self): r1 = Resource(uri='a',size=1) r2 = Resource(uri='b',size=2) i = Inventory() i.add( [r1,r2] ) self.assertRaises( InventoryDupeError, i.add, r1) self.assertRaises( InventoryDupeError, i.add, r2) # allow dupes r1d = Resource(uri='a',size=10) i.add( [r1d] ,replace=True) self.assertEqual( len(i), 2 ) self.assertEqual( i.resources['a'].size, 10 )
def test05_write(self): rd = ResourceDump() rd.add(Resource('aa.zip', timestamp=1)) rd.add(Resource('bb.zip', timestamp=2)) dumpf = os.path.join(self.tmpdir, "test05_dump.xml") rd.write(basename=dumpf) self.assertTrue(os.path.exists(dumpf)) # Now read that back rd2 = ResourceDump() rd2.parse(dumpf) self.assertEqual(len(rd2), 2) self.assertEqual(rd2.uris(), ['aa.zip', 'bb.zip'])
def test06_add_iterable(self): r1 = Resource(uri='a', length=1) r2 = Resource(uri='b', length=2) i = ResourceList() i.add([r1, r2]) self.assertRaises(ResourceListDupeError, i.add, r1) self.assertRaises(ResourceListDupeError, i.add, r2) # allow dupes r1d = Resource(uri='a', length=10) i.add([r1d], replace=True) self.assertEqual(len(i), 2) self.assertEqual(i.resources['a'].length, 10)
def test05_iter(self): i = ChangeList() i.add( Resource('a',timestamp=1,change='created') ) i.add( Resource('b',timestamp=2,change='created') ) i.add( Resource('c',timestamp=3,change='created') ) i.add( Resource('d',timestamp=4,change='created') ) resources=[] for r in i: resources.append(r) self.assertEqual(len(resources), 4) self.assertEqual( resources[0].uri, 'a') self.assertEqual( resources[3].uri, 'd')
def test07_hashes(self): r1 = Resource(uri='a') r2 = Resource(uri='b') i = ResourceList() self.assertEqual(i.hashes(), set()) i.add(r1) i.add(r2) self.assertEqual(i.hashes(), set()) r1.md5 = "aabbcc" self.assertEqual(i.hashes(), set(['md5'])) r2.sha1 = "ddeeff" self.assertEqual(i.hashes(), set(['md5', 'sha-1']))
def test05_lastmod_roundtrips(self): r = Resource('a') r.lastmod='2012-03-14' self.assertEqual( r.lastmod, '2012-03-14T00:00:00Z' ) r.lastmod='2012-03-14T00:00:00+00:00' #print r.timestamp self.assertEqual( r.lastmod, '2012-03-14T00:00:00Z' ) r.lastmod='2012-03-14T00:00:00-00:00' #print r.timestamp self.assertEqual( r.lastmod, '2012-03-14T00:00:00Z' ) r.lastmod='2012-03-14T18:37:36Z' #print r.timestamp self.assertEqual( r.lastmod, '2012-03-14T18:37:36Z' )
def test00_dump_zip_resource_list(self): rl = ResourceDumpManifest() rl.add(Resource('http://ex.org/a', length=7, path='tests/testdata/a')) rl.add(Resource('http://ex.org/b', length=21, path='tests/testdata/b')) d = Dump() zipf = os.path.join(self.tmpdir, "test00_dump.zip") d.write_zip(resources=rl, dumpfile=zipf) # named args self.assertTrue(os.path.exists(zipf)) self.assertTrue(zipfile.is_zipfile(zipf)) zo = zipfile.ZipFile(zipf, 'r') self.assertEqual(len(zo.namelist()), 3) zo.close() os.unlink(zipf)
def test5_lastmod_roundtrips(self): r = Resource("a") r.lastmod = "2012-03-14" self.assertEqual(r.lastmod, "2012-03-14T00:00:00Z") r.lastmod = "2012-03-14T00:00:00+00:00" print r.timestamp self.assertEqual(r.lastmod, "2012-03-14T00:00:00Z") r.lastmod = "2012-03-14T00:00:00-00:00" print r.timestamp self.assertEqual(r.lastmod, "2012-03-14T00:00:00Z") r.lastmod = "2012-03-14T18:37:36Z" print r.timestamp self.assertEqual(r.lastmod, "2012-03-14T18:37:36Z")
def test_build_ex_03(self): """Simple Change List document """ cl = ChangeList() cl.md_from = '2013-01-02T00:00:00Z' cl.md_until= '2013-01-03T00:00:00Z' cl.add( Resource(uri='http://example.com/res2.pdf', lastmod='2013-01-02T13:00:00Z', change="updated") ) cl.add( Resource(uri='http://example.com/res3.tiff', lastmod='2013-01-02T18:00:00Z', change='deleted') ) ex_xml = self._open_ex('resourcesync_ex_3').read() self._assert_xml_equal( cl.as_xml(), ex_xml )
def test04_write(self): lb = ListBase(capability_name='special') lb.add(Resource(uri='http://example.org/lemon')) lb.add(Resource(uri='http://example.org/orange')) basename = os.path.join(self.tmpdir, 'lb.xml') lb.write(basename=basename) self.assertTrue(os.path.exists(basename)) # and now parse back fh = open(basename, 'r') lb2 = ListBase(capability_name='special') lb2.parse(fh=fh) self.assertEqual(lb2.capability, 'special') self.assertEqual(len(lb2), 2)
def test_build_ex_02(self): """Slightly more complex Resource List document """ rl = ResourceList() rl.md_at = '2013-01-03T09:00:00Z' rl.add( Resource(uri='http://example.com/res1', lastmod='2013-01-02T13:00:00Z', md5='1584abdf8ebdc9802ac0c6a7402c03b6') ) r2 = Resource(uri='http://example.com/res2', lastmod='2013-01-02T14:00:00Z', md5='1e0d5cb8ef6ba40c99b14c0237be735e') r2.link_set(rel="duplicate",href="http://mirror.example.com/res2") rl.add( r2 ) ex_xml = self._open_ex('resourcesync_ex_2').read() self._assert_xml_equal( rl.as_xml(), ex_xml )
def test_build_ex_30(self): cl = ChangeList() cl.up = "http://example.com/dataset1/capabilitylist.xml" cl.md_from = "2013-01-03T00:00:00Z" c1 = Resource(uri="http://example.com/res1", lastmod="2013-01-03T07:00:00Z", change="updated", md5="1584abdf8ebdc9802ac0c6a7402c03b6", length=8876, mime_type="text/html" ) c1.link_add(rel="collection", href="http://example.com/aggregation/0601007") cl.add( c1 ) self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_30' )
def test_build_ex_26(self): cl = ChangeList() cl.up = "http://example.com/dataset1/capabilitylist.xml" cl.md_from = "2013-01-03T00:00:00Z" c1 = Resource(uri="http://example.com/res1.html", lastmod="2013-01-03T18:00:00Z", change="updated", md5="1584abdf8ebdc9802ac0c6a7402c03b6", length=8876 ) c1.link_add(rel="canonical", href="http://example.com/res1", modified="2013-01-03T18:00:00Z") cl.add( c1 ) self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_26' )
def test_build_ex_28(self): cl = ChangeList() cl.up = "http://example.com/dataset1/capabilitylist.xml" cl.md_from = "2013-01-03T00:00:00Z" c1 = Resource(uri="http://example.com/res2.pdf", lastmod="2013-01-03T18:00:00Z", change="updated", md5="1584abdf8ebdc9802ac0c6a7402c03b6", length=8876, mime_type="application/pdf" ) c1.link_set(rel="describedby", href="http://example.com/res2_dublin-core_metadata.xml", modified="2013-01-01T12:00:00Z", type="application/xml") c2 = Resource(uri="http://example.com/res2_dublin-core_metadata.xml", lastmod="2013-01-03T19:00:00Z", change="updated", mime_type="application/xml") c2.link_set(rel="describes", href="http://example.com/res2.pdf", modified="2013-01-03T18:00:00Z", hash="md5:1584abdf8ebdc9802ac0c6a7402c03b6", length="8876", type="application/pdf") c2.link_set(rel="profile", href="http://purl.org/dc/elements/1.1/") cl.add( [c1,c2] ) self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_28' )
def test19_delete_resource(self): c = Client() resource = Resource(uri='http://example.org/1') filename = os.path.join(self.tmpdir, 'resource1') c.last_timestamp = 5 # no delete, no timestamp update with LogCapture() as lc: c.logger = logging.getLogger('resync.client') n = c.delete_resource(resource, filename) self.assertEqual(n, 0) self.assertEqual(lc.records[-1].msg, 'nodelete: would delete http://example.org/1 (--delete to enable)') self.assertEqual(c.last_timestamp, 5) # no delete but timestamp update resource.timestamp = 10 with LogCapture() as lc: c.logger = logging.getLogger('resync.client') n = c.delete_resource(resource, filename) self.assertEqual(n, 0) self.assertEqual(lc.records[-1].msg, 'nodelete: would delete http://example.org/1 (--delete to enable)') self.assertEqual(c.last_timestamp, 10) # allow delete but dryrun with LogCapture() as lc: c.dryrun = True c.logger = logging.getLogger('resync.client') n = c.delete_resource(resource, filename, allow_deletion=True) self.assertEqual(n, 0) self.assertTrue( lc.records[-1].msg.startswith('dryrun: would delete http://example.org/1')) c.dryrun = False # allow delete but no resource present with LogCapture() as lc: c.logger = logging.getLogger('resync.client') n = c.delete_resource(resource, filename, allow_deletion=True) self.assertEqual(n, 0) self.assertTrue( lc.records[-1].msg.startswith('Failed to DELETE http://example.org/1')) # successful deletion, first make file... with open(filename, 'w') as fh: fh.write('delete me') fh.close() with LogCapture() as lc: c.logger = logging.getLogger('resync.client') n = c.delete_resource(resource, filename, allow_deletion=True) self.assertEqual(n, 1) self.assertTrue(lc.records[-1].msg.startswith('Event: {')) self.assertTrue( lc.records[-2].msg.startswith('deleted: http://example.org/1 ->'))
def add_capability_list(self, capability_list=None): """Add a capability list. Adds either a CapabiltyList object specified in capability_list or else creates a Resource with the URI given in capability_list and adds that to the Source Description """ if (hasattr(capability_list, 'uri')): r = Resource(uri=capability_list.uri, capability=capability_list.capability_name) if (capability_list.describedby is not None): r.link_set(rel='describedby', href=capability_list.describedby) else: r = Resource(uri=capability_list, capability='capabilitylist') self.add(r)
def test_build_ex_25(self): cl = ChangeList() cl.up = "http://example.com/dataset1/capabilitylist.xml" cl.md_from = "2013-01-03T11:00:00Z" c1 = Resource(uri="http://example.com/res1", lastmod="2013-01-03T18:00:00Z", change="updated") c1.link_add(rel="alternate", href="http://example.com/res1.html", modified="2013-01-03T18:00:00Z", type="text/html") #FIXME - inconsistent c1.link_add(rel="alternate", href="http://example.com/res1.pdf", modified="2013-01-03T18:00:00Z", type="application/pdf") cl.add( c1 ) self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_25' )
def test_build_ex_24(self): cl = ChangeList() cl.up = "http://example.com/dataset1/capabilitylist.xml" cl.md_from = "2013-01-03T00:00:00Z" c1 = Resource(uri="http://example.com/res1", lastmod="2013-01-03T18:00:00Z", change="updated", md5="1584abdf8ebdc9802ac0c6a7402c03b6", length=8876, mime_type="text/html") # Resource.link_set with add or change link depending on one with # the particular rel exists unless allow_duplicates=True. # Resource.link_add will always add. Test both here... c1.link_set(rel="duplicate", href="http://mirror1.example.com/res1", pri="1", modified="2013-01-03T18:00:00Z") c1.link_set(rel="duplicate", href="http://mirror2.example.com/res1", pri="2", modified="2013-01-03T18:00:00Z", allow_duplicates=True) c1.link_add(rel="duplicate", href="gsiftp://gridftp.example.com/res1", pri="3", modified="2013-01-03T18:00:00Z") cl.add( c1 ) self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_24' )
def test_build_ex_33(self): cl = ChangeList() cl.up = "http://aggregator2.example.com/dataset1/capabilitylist.xml" cl.md_from = "2013-01-03T12:00:00Z" c1 = Resource(uri="http://aggregator2.example.com/res1.html", lastmod="2013-01-04T09:00:00Z", change="updated", md5="1584abdf8ebdc9802ac0c6a7402c03b6", length=8876, mime_type="text/html" ) c1.link_add(rel="via", href="http://original.example.com/res1.html", modified="2013-01-03T07:00:00Z", hash="md5:1584abdf8ebdc9802ac0c6a7402c03b6", length="8876", type="text/html") cl.add( c1 ) self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_33' )
def test_build_ex_27(self): cl = ChangeList() cl.up = "http://example.com/dataset1/capabilitylist.xml" cl.md_from = "2013-01-03T00:00:00Z" c1 = Resource(uri="http://example.com/res4", lastmod="2013-01-03T17:00:00Z", change="updated", sha256="f4OxZX_x_DFGFDgghgdfb6rtSx-iosjf6735432nklj", length=56778, mime_type="application/json" ) c1.link_set(rel="http://www.openarchives.org/rs/terms/patch", href="http://example.com/res4-json-patch", modified="2013-01-03T17:00:00Z", hash="sha-256:y66dER_t_HWEIKpesdkeb7rtSc-ippjf9823742opld", #FIXME - inconsistent length=73, type="application/json-patch") c2 = Resource(uri="http://example.com/res5-full.tiff", lastmod="2013-01-03T18:00:00Z", change="updated", sha256="f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk", length="9788456778", mime_type="image/tiff") c2.link_set(rel="http://www.openarchives.org/rs/terms/patch", href="http://example.com/res5-diff", modified="2013-01-03T18:00:00Z", hash="sha-256:h986gT_t_87HTkjHYE76G558hY-jdfgy76t55sadJUYT", length=4533, type="application/x-tiff-diff" ) cl.add( [c1,c2] ) self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_27' )
def test01c_same(self): """Same with lastmod instead of direct timestamp""" r1 = Resource('a') r1lm = '2012-01-01T00:00:00Z' r1.lastmod = r1lm r2 = Resource('a') for r2lm in ('2012', '2012-01', '2012-01-01', '2012-01-01T00:00Z', '2012-01-01T00:00:00Z', '2012-01-01T00:00:00.000000Z', '2012-01-01T00:00:00.000000000000Z', '2012-01-01T00:00:00.000000000001Z', #below resolution '2012-01-01T00:00:00.00+00:00', '2012-01-01T00:00:00.00-00:00', '2012-01-01T02:00:00.00-02:00', '2011-12-31T23:00:00.00+01:00' ): r2.lastmod = r2lm self.assertEqual( r1.timestamp, r2.timestamp, ('%s (%f) == %s (%f)' % (r1lm,r1.timestamp,r2lm,r2.timestamp)) ) self.assertEqual( r1, r2 )
def test1c_same(self): """Same with lastmod instead of direct timestamp""" r1 = Resource("a") r1.lastmod = "2012-01-02" r2 = Resource("a") for r2lm in ( "2012-01-02", "2012-01-02T00:00", "2012-01-02T00:00:00", "2012-01-02 00:00:00", "2012-01-02T00:00:00.00", "2012-01-02T00:00:00.000000000000", "2012-01-02T00:00:00.000000000001", # below resolution "2012-01-02T00:00:00.00Z", "2012-01-02T00:00:00.00+0000", "2012-01-02T00:00:00.00-0000", "2012-01-02T00:00:00.00+00:00", "2012-01-02T00:00:00.00-00:00", "2012-01-02T00:00:00.00+02:00", # FIXME - TZ info currently ignored ): r2.lastmod = r2lm self.assertEqual(r1.timestamp, r2.timestamp) self.assertEqual(r1.timestamp, r2.timestamp, ("2012-01-02 == %s" % r2lm)) self.assertEqual(r1, r2)