def test1c_same(self): """Same with lastmod instead of direct timestamp""" r1 = Resource('a') r1.lastmod = '2012-01-02' r2 = Resource('a') for r2lm in ( '2012-01-02', '2012-01-02T00:00', '2012-01-02T00:00:00', '2012-01-02 00:00:00', '2012-01-02T00:00:00.00', '2012-01-02T00:00:00.000000000000', '2012-01-02T00:00:00.000000000001', #below resolution '2012-01-02T00:00:00.00Z', '2012-01-02T00:00:00.00+0000', '2012-01-02T00:00:00.00-0000', '2012-01-02T00:00:00.00+00:00', '2012-01-02T00:00:00.00-00:00', '2012-01-02T00:00:00.00+02:00' # FIXME - TZ info currently ignored ): r2.lastmod = r2lm self.assertEqual(r1.timestamp, r2.timestamp) self.assertEqual(r1.timestamp, r2.timestamp, ('2012-01-02 == %s' % r2lm)) self.assertEqual(r1, r2)
def test_build_ex_22(self): """Change Dump with three dump files""" cd = ChangeDump() cd.up = 'http://example.com/dataset1/capabilitylist.xml' cd.md_from="2013-01-01T00:00:00Z" z1 = Resource( uri='http://example.com/20130101-changedump.zip', lastmod='2013-01-01T23:59:59Z', length=3109, md_from="2013-01-01T00:00:00Z", md_until="2013-01-02T00:00:00Z", mime_type="application/zip" ) z1.contents='http://example.com/20130101-changedump-manifest.xml' z2 = Resource( uri='http://example.com/20130102-changedump.zip', lastmod='2013-01-02T23:59:59Z', length=6629, md_from="2013-01-02T00:00:00Z", md_until="2013-01-03T00:00:00Z", mime_type="application/zip" ) z2.contents='http://example.com/20130102-changedump-manifest.xml' z3 = Resource( uri='http://example.com/20130103-changedump.zip', lastmod='2013-01-03T23:59:59Z', length=8124, md_from="2013-01-03T00:00:00Z", md_until="2013-01-04T00:00:00Z", mime_type="application/zip" ) z3.contents='http://example.com/20130103-changedump-manifest.xml' cd.add( [z1, z2, z3] ) ex_xml = self._open_ex('resourcesync_ex_22').read() self._assert_xml_equal( cd.as_xml(), ex_xml )
def test06_add_changed_resources(self): added = ResourceList() added.add( Resource('a',timestamp=1,change='created') ) added.add( Resource('d',timestamp=4,change='created') ) self.assertEqual(len(added), 2, "2 things in added resource_list") changes = ChangeList() changes.add_changed_resources( added, change='created' ) self.assertEqual(len(changes), 2, "2 things added") i = iter(changes) first = next(i) self.assertEqual(first.uri, 'a', "changes[0].uri=a") self.assertEqual(first.timestamp, 1, "changes[0].timestamp=1") self.assertEqual(first.change, 'created') #, "changes[0].change=createdd") second = next(i) self.assertEqual(second.timestamp, 4, "changes[1].timestamp=4") self.assertEqual(second.change, 'created', "changes[1].change=createdd") # Now add some with updated (one same, one diff) updated = ResourceList() updated.add( Resource('a',timestamp=5,change='created') ) updated.add( Resource('b',timestamp=6,change='created') ) self.assertEqual(len(updated), 2, "2 things in updated resource_list") changes.add_changed_resources( updated, change='updated' ) self.assertEqual(len(changes), 4, "4 = 2 old + 2 things updated") # Make new resource_list from the changes which should not have dupes dst = ResourceList() dst.add( changes, replace=True ) self.assertEqual(len(dst), 3, "3 unique resources") self.assertEqual(dst.resources['a'].timestamp, 5 ) # 5 was later in last the 1 self.assertEqual(dst.resources['a'].change, 'updated') self.assertEqual(dst.resources['b'].timestamp, 6) self.assertEqual(dst.resources['b'].change, 'updated') self.assertEqual(dst.resources['d'].timestamp, 4) self.assertEqual(dst.resources['d'].change, 'created')
def test_build_ex_21(self): """Change List which points back to index""" cl = ChangeList() cl.up = 'http://example.com/dataset1/capabilitylist.xml' cl.index = 'http://example.com/dataset1/changelist.xml' cl.md_from = "2013-01-02T00:00:00Z" cl.md_until = "2013-01-03T00:00:00Z" cl.add( Resource(uri='http://example.com/res7.html', lastmod='2013-01-02T12:00:00Z', change='created')) cl.add( Resource(uri='http://example.com/res9.pdf', lastmod='2013-01-02T13:00:00Z', change='updated')) cl.add( Resource(uri='http://example.com/res5.tiff', lastmod='2013-01-02T19:00:00Z', change='deleted')) cl.add( Resource(uri='http://example.com/res7.html', lastmod='2013-01-02T20:00:00Z', change='updated')) ex_xml = self._open_ex('resourcesync_ex_21').read() self._assert_xml_equal(cl.as_xml(), ex_xml)
def test13_mime_type(self): r = Resource(uri='tv1', mime_type='text/plain') self.assertEqual(r.mime_type, 'text/plain') r.mime_type = None self.assertEqual(r.mime_type, None) r = Resource(uri='tv2') self.assertEqual(r.mime_type, None)
def test_build_ex_17(self): """Resource Dump with 3 entries and some metadata""" rd = ResourceDump() rd.up='http://example.com/dataset1/capabilitylist.xml' rd.md_at="2013-01-03T09:00:00Z" rd.md_completed="2013-01-03T09:04:00Z" z1 = Resource( uri='http://example.com/resourcedump-part1.zip', mime_type="application/zip", length=4765, md_at="2013-01-03T09:00:00Z", md_completed="2013-01-03T09:02:00Z" ) z1.link_set( rel="contents", href="http://example.com/resourcedump_manifest-part1.xml", mime_type="application/xml" ) rd.add( z1 ) z2 = Resource( uri='http://example.com/resourcedump-part2.zip', mime_type="application/zip", length=9875, md_at="2013-01-03T09:01:00Z", md_completed="2013-01-03T09:03:00Z" ) z2.link_set( rel="contents", href="http://example.com/resourcedump_manifest-part2.xml", mime_type="application/xml" ) rd.add( z2 ) z3 = Resource( uri='http://example.com/resourcedump-part3.zip', mime_type="application/zip", length=2298, md_at="2013-01-03T09:03:00Z", md_completed="2013-01-03T09:04:00Z" ) z3.link_set( rel="contents", href="http://example.com/resourcedump_manifest-part3.xml", mime_type="application/xml" ) rd.add( z3 ) ex_xml = self._open_ex('resourcesync_ex_17').read() self._assert_xml_equal( rd.as_xml(), ex_xml )
def test_build_ex_27(self): cl = ChangeList() cl.up = "http://example.com/dataset1/capabilitylist.xml" cl.md_from = "2013-01-03T00:00:00Z" c1 = Resource(uri="http://example.com/res4", lastmod="2013-01-03T17:00:00Z", change="updated", sha256="f4OxZX_x_DFGFDgghgdfb6rtSx-iosjf6735432nklj", length=56778, mime_type="application/json" ) c1.link_set(rel="http://www.openarchives.org/rs/terms/patch", href="http://example.com/res4-json-patch", modified="2013-01-03T17:00:00Z", hash="sha-256:y66dER_t_HWEIKpesdkeb7rtSc-ippjf9823742opld", #FIXME - inconsistent length=73, type="application/json-patch") c2 = Resource(uri="http://example.com/res5-full.tiff", lastmod="2013-01-03T18:00:00Z", change="updated", sha256="f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk", length="9788456778", mime_type="image/tiff") c2.link_set(rel="http://www.openarchives.org/rs/terms/patch", href="http://example.com/res5-diff", modified="2013-01-03T18:00:00Z", hash="sha-256:h986gT_t_87HTkjHYE76G558hY-jdfgy76t55sadJUYT", length=4533, type="application/x-tiff-diff" ) cl.add( [c1,c2] ) self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_27' )
def test18_as_xml_part(self): r = [ Resource(uri='a', lastmod='2006-01-01', length=12), Resource(uri='b', lastmod='2007-02-02', length=34), Resource(uri='c', lastmod='2008-03-03', length=56) ] lb = ListBaseWithIndex(resources=r) # Allow unlimited entries, part makes no sense lb.max_sitemap_entries = None self.assertRaises(ListBaseIndexError, lb.as_xml_part) # Request after end lb.max_sitemap_entries = 1 self.assertRaises(ListBaseIndexError, lb.as_xml_part, part_number=9) # Allow only 1 entry lb.max_sitemap_entries = 1 xml = lb.as_xml_part(part_number=1) self.assertFalse(re.search(r'<loc>a</loc>', xml)) self.assertTrue(re.search(r'<loc>b</loc>', xml)) self.assertFalse(re.search(r'<loc>c</loc>', xml)) # Request truncated lb.max_sitemap_entries = 2 xml = lb.as_xml_part(part_number=1) self.assertFalse(re.search(r'<loc>a</loc>', xml)) self.assertFalse(re.search(r'<loc>b</loc>', xml)) self.assertTrue(re.search(r'<loc>c</loc>', xml))
def test_build_ex_18(self): """Resource Dump Manifest with 2 entries and some metadata""" rdm = ResourceDumpManifest() rdm.up = 'http://example.com/dataset1/capabilitylist.xml' rdm.md_at = "2013-01-03T09:00:00Z" rdm.md_completed = "2013-01-03T09:02:00Z" rdm.add( Resource(uri='http://example.com/res1', lastmod='2013-01-02T13:00:00Z', md5='1584abdf8ebdc9802ac0c6a7402c03b6', length=8876, mime_type='text/html', path='/resources/res1')) rdm.add( Resource( uri='http://example.com/res2', lastmod='2013-01-02T14:00:00Z', md5='1e0d5cb8ef6ba40c99b14c0237be735e', sha256= '854f61290e2e197a11bc91063afce22e43f8ccc655237050ace766adc68dc784', length=14599, mime_type='application/pdf', path='/resources/res2')) ex_xml = self._open_ex('resourcesync_ex_18').read() self._assert_xml_equal(rdm.as_xml(), ex_xml)
def test_build_ex_28(self): cl = ChangeList() cl.up = "http://example.com/dataset1/capabilitylist.xml" cl.md_from = "2013-01-03T00:00:00Z" c1 = Resource(uri="http://example.com/res2.pdf", lastmod="2013-01-03T18:00:00Z", change="updated", md5="1584abdf8ebdc9802ac0c6a7402c03b6", length=8876, mime_type="application/pdf" ) c1.link_set(rel="describedby", href="http://example.com/res2_dublin-core_metadata.xml", modified="2013-01-01T12:00:00Z", type="application/xml") c2 = Resource(uri="http://example.com/res2_dublin-core_metadata.xml", lastmod="2013-01-03T19:00:00Z", change="updated", mime_type="application/xml") c2.link_set(rel="describes", href="http://example.com/res2.pdf", modified="2013-01-03T18:00:00Z", hash="md5:1584abdf8ebdc9802ac0c6a7402c03b6", length="8876", type="application/pdf") c2.link_set(rel="profile", href="http://purl.org/dc/elements/1.1/") cl.add( [c1,c2] ) self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_28' )
def test_build_ex_23(self): cdm = ChangeDumpManifest() cdm.up = "http://example.com/dataset1/capabilitylist.xml" cdm.md_from = "2013-01-02T00:00:00Z" cdm.md_until = "2013-01-03T00:00:00Z" cdm.add( Resource(uri="http://example.com/res7.html", lastmod="2013-01-02T12:00:00Z", change="created", md5="1c1b0e264fa9b7e1e9aa6f9db8d6362b", length=4339, mime_type="text/html", path="/changes/res7.html") ) cdm.add( Resource(uri="http://example.com/res9.pdf", lastmod="2013-01-02T13:00:00Z", change="updated", md5="f906610c3d4aa745cb2b986f25b37c5a", length=38297, mime_type="application/pdf", path="/changes/res9.pdf") ) cdm.add( Resource(uri="http://example.com/res5.tiff", lastmod="2013-01-02T19:00:00Z", change="deleted") ) cdm.add( Resource(uri="http://example.com/res7.html", lastmod="2013-01-02T20:00:00Z", change="updated", md5="0988647082c8bc51778894a48ec3b576", length="5426", #should also take string mime_type="text/html", path="/changes/res7-v2.html") ) self._assert_xml_equal_ex( cdm.as_xml(), 'resourcesync_ex_23' )
def test_08_print(self): lb = ListBaseWithIndex() lb.add( Resource(uri='a',lastmod='2001-01-01',length=1234) ) lb.add( Resource(uri='b',lastmod='2002-02-02',length=56789) ) lb.add( Resource(uri='c',lastmod='2003-03-03',length=0) ) lb.md['from']=None #avoid now being added #print lb self.assertEqual( lb.as_xml(), '<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/"><rs:md capability="unknown" /><url><loc>a</loc><lastmod>2001-01-01T00:00:00Z</lastmod><rs:md length="1234" /></url><url><loc>b</loc><lastmod>2002-02-02T00:00:00Z</lastmod><rs:md length="56789" /></url><url><loc>c</loc><lastmod>2003-03-03T00:00:00Z</lastmod><rs:md length="0" /></url></urlset>' )
def test20_as_xml(self): rl = ResourceList() rl.add( Resource('a',timestamp=1) ) rl.add( Resource('b',timestamp=2) ) xml = rl.as_xml() print xml self.assertTrue( re.search(r'<rs:md .*capability="resourcelist"', xml), 'XML has capability' ) self.assertTrue( re.search(r'<url><loc>a</loc><lastmod>1970-01-01T00:00:01Z</lastmod></url>', xml), 'XML has resource a' )
def test04_change_list(self): cl = ChangeList() cl.add( Resource('a',timestamp=1,change='created') ) cl.add( Resource('b',timestamp=2,change='created') ) cl.add( Resource('c',timestamp=3,change='created') ) cl.add( Resource('d',timestamp=4,change='created') ) cl.add( Resource('e',timestamp=5,change='created') ) self.assertEqual(len(cl), 5, "5 things in src")
def test01d_same(self): """Same with slight timestamp diff""" r1 = Resource('a') r1.lastmod = '2012-01-02T01:02:03Z' r2 = Resource('a') r2.lastmod = '2012-01-02T01:02:03.99Z' self.assertNotEqual(r1.timestamp, r2.timestamp) self.assertEqual(r1, r2)
def test02_set_with_repeats(self): cl = ChangeList() cl.add( Resource('a',timestamp=1,change='updated') ) cl.add( Resource('b',timestamp=1,change='created') ) cl.add( Resource('c',timestamp=1,change='deleted') ) cl.add( Resource('a',timestamp=2,change='deleted') ) cl.add( Resource('b',timestamp=2,change='updated') ) self.assertEqual(len(cl), 5, "5 changes in change_list")
def test5_add(self): r1 = Resource(uri='a') r2 = Resource(uri='b') m = Inventory() m.add(r1) self.assertRaises(ValueError, m.add, r1) m.add(r2) self.assertRaises(ValueError, m.add, r2)
def test01_as_xml(self): rd = ChangeDump() rd.add( Resource('a.zip',timestamp=1) ) rd.add( Resource('b.zip',timestamp=2) ) xml = rd.as_xml() #print(xml) self.assertTrue( re.search(r'<rs:md .*capability="changedump"', xml), 'XML has capability' ) self.assertTrue( re.search(r'<url><loc>a.zip</loc><lastmod>1970-01-01T00:00:01Z</lastmod></url>', xml), 'XML has resource a' )
def test_build_ex_01(self): """Simple Resource List document """ rl = ResourceList() rl.md_at = '2013-01-03T09:00:00Z' rl.add( Resource('http://example.com/res1') ) rl.add( Resource('http://example.com/res2') ) ex_xml = self._open_ex('resourcesync_ex_1').read() self._assert_xml_equal( rl.as_xml(), ex_xml )
def test_08_print_non_ascii_uri(self): """Verify that valid Unicode uri values give good XML out.""" m = ResourceList(md={'capability': 'resourcelist', 'modified': None}) m.add(Resource(uri=u'a_\u00c3_b')) m.add(Resource(uri=u'c_\u1234_d')) xml = Sitemap().resources_as_xml(m) self.assertTrue(re.search(u'<loc>a_.*_b</loc>', xml)) self.assertTrue(re.search(u'<loc>a_\u00c3_b</loc>', xml)) self.assertTrue(re.search(u'<loc>c_\u1234_d</loc>', xml))
def test10_no_path(self): rl = ResourceList() rl.add( Resource('http://ex.org/a', length=7, path='resync/test/testdata/a')) rl.add(Resource('http://ex.org/b', length=21)) d = Dump(rl) self.assertRaises(DumpError, d.check_files)
def test_build_archives_ex_3_2(self): """Resource List Archive Index listing 2 component Resource List Archives""" rlai = ResourceListArchive() rlai.sitemapindex = True rlai.up = 'http://example.com/dataset1/capabilitylist.xml' rlai.add( Resource( uri='http://example.com/resourcelistarchive00001.xml' )) rlai.add( Resource( uri='http://example.com/resourcelistarchive00002.xml' )) ex_xml = self._open_ex('archives_ex_3_2').read() self._assert_xml_equal( rlai.as_xml(), ex_xml )
def test02_dump_check_files(self): cl = ChangeList() cl.add(Resource('http://ex.org/a', length=7, path='tests/testdata/a', change="updated")) cl.add(Resource('http://ex.org/b', length=21, path='tests/testdata/b', change="updated")) d = Dump(resources=cl) self.assertTrue(d.check_files()) self.assertEqual(d.total_size, 28)
def test07_as_xml(self): cl = ChangeList() cl.md_from = '1970-01-01T00:00:00Z' cl.add( Resource('a',timestamp=1,change='updated') ) cl.add( Resource('b',timestamp=2,change='updated') ) xml = cl.as_xml() self.assertTrue( re.search(r'<rs:md .*capability="changelist"', xml), 'XML has capability' ) self.assertTrue( re.search(r'<rs:md .*from="\d\d\d\d\-\d\d\-\d\dT\d\d:\d\d:\d\dZ"', xml), 'XML has from to seconds precision (and not more)' ) self.assertTrue( re.search(r'<url><loc>a</loc><lastmod>1970-01-01T00:00:01Z</lastmod>', xml), 'XML has resource a' )
def test01_add(self): rs = ResourceSet() self.assertEqual(len(rs), 0) rs.add(Resource('a')) self.assertEqual(len(rs), 1) rs.add(Resource('b')) self.assertEqual(len(rs), 2) rs.add(Resource('c')) self.assertEqual(len(rs), 3)
def test07_has_md5(self): r1 = Resource(uri='a') r2 = Resource(uri='b') i = ResourceList() self.assertFalse( i.has_md5() ) i.add(r1) i.add(r2) self.assertFalse( i.has_md5() ) r1.md5="aabbcc" self.assertTrue( i.has_md5() )
def test_09_print_subset(self): r1 = Resource(uri='a',lastmod='2001-01-01',size=1234) r2 = Resource(uri='b',lastmod='2002-02-02',size=56789) r3 = Resource(uri='c',lastmod='2003-03-03',size=0) r3 = Resource(uri='d',lastmod='2003-03-04',size=444) m = Inventory() m.add(r1) m.add(r2) m.add(r3) self.assertEqual( Sitemap().inventory_as_xml(m, entries=['d','b']), "<?xml version='1.0' encoding='UTF-8'?>\n<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" xmlns:rs=\"http://resourcesync.org/change/0.1\"><url><loc>d</loc><lastmod>2003-03-04T00:00:00</lastmod><rs:size>444</rs:size></url><url><loc>b</loc><lastmod>2002-02-02T00:00:00</lastmod><rs:size>56789</rs:size></url></urlset>")
def test6_has_md5(self): r1 = Resource(uri='a') r2 = Resource(uri='b') m = Inventory() self.assertFalse(m.has_md5()) m.add(r1) m.add(r2) self.assertFalse(m.has_md5()) r1.md5 = "aabbcc" self.assertTrue(m.has_md5())
def test02_order(self): rs = ResourceSet() rs.add(Resource('a2')) rs.add(Resource('a3')) rs.add(Resource('a1')) i = iter(rs) self.assertEqual(next(i).uri, 'a1') self.assertEqual(next(i).uri, 'a2') self.assertEqual(next(i).uri, 'a3') self.assertRaises(StopIteration, next, i)
def test09_changetypeerror(self): r1 = Resource('a') self.assertEqual(r1.change, None) r1.change = 'deleted' self.assertEqual(r1.change, 'deleted') self.assertRaises(ChangeTypeError, Resource, 'a', change="bad") # disable checking Resource.CHANGE_TYPES = False r1 = Resource('a', change="bad") self.assertEqual(r1.change, 'bad')