Beispiel #1
0
 def test1c_same(self):
     """Same with lastmod instead of direct timestamp"""
     r1 = Resource('a')
     r1.lastmod = '2012-01-02'
     r2 = Resource('a')
     for r2lm in (
             '2012-01-02',
             '2012-01-02T00:00',
             '2012-01-02T00:00:00',
             '2012-01-02 00:00:00',
             '2012-01-02T00:00:00.00',
             '2012-01-02T00:00:00.000000000000',
             '2012-01-02T00:00:00.000000000001',  #below resolution
             '2012-01-02T00:00:00.00Z',
             '2012-01-02T00:00:00.00+0000',
             '2012-01-02T00:00:00.00-0000',
             '2012-01-02T00:00:00.00+00:00',
             '2012-01-02T00:00:00.00-00:00',
             '2012-01-02T00:00:00.00+02:00'  # FIXME - TZ info currently ignored
     ):
         r2.lastmod = r2lm
         self.assertEqual(r1.timestamp, r2.timestamp)
         self.assertEqual(r1.timestamp, r2.timestamp,
                          ('2012-01-02 == %s' % r2lm))
         self.assertEqual(r1, r2)
Beispiel #2
0
 def test_build_ex_22(self):
     """Change Dump with three dump files"""
     cd = ChangeDump()
     cd.up = 'http://example.com/dataset1/capabilitylist.xml'
     cd.md_from="2013-01-01T00:00:00Z"
     z1 = Resource( uri='http://example.com/20130101-changedump.zip',
                    lastmod='2013-01-01T23:59:59Z',
                    length=3109,
                    md_from="2013-01-01T00:00:00Z",
                    md_until="2013-01-02T00:00:00Z",
                    mime_type="application/zip" )
     z1.contents='http://example.com/20130101-changedump-manifest.xml'
     z2 = Resource( uri='http://example.com/20130102-changedump.zip',
                    lastmod='2013-01-02T23:59:59Z',
                    length=6629,
                    md_from="2013-01-02T00:00:00Z",
                    md_until="2013-01-03T00:00:00Z",
                    mime_type="application/zip" )
     z2.contents='http://example.com/20130102-changedump-manifest.xml'
     z3 = Resource( uri='http://example.com/20130103-changedump.zip',
                    lastmod='2013-01-03T23:59:59Z',
                    length=8124,
                    md_from="2013-01-03T00:00:00Z",
                    md_until="2013-01-04T00:00:00Z",
                    mime_type="application/zip" )
     z3.contents='http://example.com/20130103-changedump-manifest.xml'
     cd.add( [z1, z2, z3] )
     ex_xml = self._open_ex('resourcesync_ex_22').read()
     self._assert_xml_equal( cd.as_xml(), ex_xml ) 
 def test06_add_changed_resources(self):
     added = ResourceList()
     added.add( Resource('a',timestamp=1,change='created') )
     added.add( Resource('d',timestamp=4,change='created') )
     self.assertEqual(len(added), 2, "2 things in added resource_list")
     changes = ChangeList()
     changes.add_changed_resources( added, change='created' )
     self.assertEqual(len(changes), 2, "2 things added")
     i = iter(changes)
     first = next(i)
     self.assertEqual(first.uri, 'a', "changes[0].uri=a")
     self.assertEqual(first.timestamp, 1, "changes[0].timestamp=1")
     self.assertEqual(first.change, 'created') #, "changes[0].change=createdd")
     second = next(i)
     self.assertEqual(second.timestamp, 4, "changes[1].timestamp=4")
     self.assertEqual(second.change, 'created', "changes[1].change=createdd")
     # Now add some with updated (one same, one diff)
     updated = ResourceList()
     updated.add( Resource('a',timestamp=5,change='created') )
     updated.add( Resource('b',timestamp=6,change='created') )
     self.assertEqual(len(updated), 2, "2 things in updated resource_list")
     changes.add_changed_resources( updated, change='updated' )
     self.assertEqual(len(changes), 4, "4 = 2 old + 2 things updated")
     # Make new resource_list from the changes which should not have dupes
     dst = ResourceList()
     dst.add( changes, replace=True )
     self.assertEqual(len(dst), 3, "3 unique resources")
     self.assertEqual(dst.resources['a'].timestamp, 5 ) # 5 was later in last the 1
     self.assertEqual(dst.resources['a'].change, 'updated')
     self.assertEqual(dst.resources['b'].timestamp, 6)
     self.assertEqual(dst.resources['b'].change, 'updated')
     self.assertEqual(dst.resources['d'].timestamp, 4)
     self.assertEqual(dst.resources['d'].change, 'created')
 def test_build_ex_21(self):
     """Change List which points back to index"""
     cl = ChangeList()
     cl.up = 'http://example.com/dataset1/capabilitylist.xml'
     cl.index = 'http://example.com/dataset1/changelist.xml'
     cl.md_from = "2013-01-02T00:00:00Z"
     cl.md_until = "2013-01-03T00:00:00Z"
     cl.add(
         Resource(uri='http://example.com/res7.html',
                  lastmod='2013-01-02T12:00:00Z',
                  change='created'))
     cl.add(
         Resource(uri='http://example.com/res9.pdf',
                  lastmod='2013-01-02T13:00:00Z',
                  change='updated'))
     cl.add(
         Resource(uri='http://example.com/res5.tiff',
                  lastmod='2013-01-02T19:00:00Z',
                  change='deleted'))
     cl.add(
         Resource(uri='http://example.com/res7.html',
                  lastmod='2013-01-02T20:00:00Z',
                  change='updated'))
     ex_xml = self._open_ex('resourcesync_ex_21').read()
     self._assert_xml_equal(cl.as_xml(), ex_xml)
Beispiel #5
0
 def test13_mime_type(self):
     r = Resource(uri='tv1', mime_type='text/plain')
     self.assertEqual(r.mime_type, 'text/plain')
     r.mime_type = None
     self.assertEqual(r.mime_type, None)
     r = Resource(uri='tv2')
     self.assertEqual(r.mime_type, None)
Beispiel #6
0
 def test_build_ex_17(self):
     """Resource Dump with 3 entries and some metadata"""
     rd = ResourceDump()
     rd.up='http://example.com/dataset1/capabilitylist.xml'
     rd.md_at="2013-01-03T09:00:00Z"
     rd.md_completed="2013-01-03T09:04:00Z"
     z1 = Resource( uri='http://example.com/resourcedump-part1.zip',
                    mime_type="application/zip",
                    length=4765,
                    md_at="2013-01-03T09:00:00Z",
                    md_completed="2013-01-03T09:02:00Z" )
     z1.link_set( rel="contents",
                  href="http://example.com/resourcedump_manifest-part1.xml",
                  mime_type="application/xml" )
     rd.add( z1 )
     z2 = Resource( uri='http://example.com/resourcedump-part2.zip',
                    mime_type="application/zip",
                    length=9875,
                    md_at="2013-01-03T09:01:00Z",
                    md_completed="2013-01-03T09:03:00Z" )
     z2.link_set( rel="contents",
                  href="http://example.com/resourcedump_manifest-part2.xml",
                  mime_type="application/xml" )
     rd.add( z2 )
     z3 = Resource( uri='http://example.com/resourcedump-part3.zip',
                    mime_type="application/zip",
                    length=2298,
                    md_at="2013-01-03T09:03:00Z",
                    md_completed="2013-01-03T09:04:00Z" )
     z3.link_set( rel="contents",
                  href="http://example.com/resourcedump_manifest-part3.xml",
                  mime_type="application/xml" )
     rd.add( z3 )
     ex_xml = self._open_ex('resourcesync_ex_17').read()
     self._assert_xml_equal( rd.as_xml(), ex_xml )
Beispiel #7
0
 def test_build_ex_27(self):
     cl = ChangeList()
     cl.up = "http://example.com/dataset1/capabilitylist.xml"
     cl.md_from = "2013-01-03T00:00:00Z"
     c1 = Resource(uri="http://example.com/res4",
                   lastmod="2013-01-03T17:00:00Z",
                   change="updated",
                   sha256="f4OxZX_x_DFGFDgghgdfb6rtSx-iosjf6735432nklj",
                   length=56778,
                   mime_type="application/json" )
     c1.link_set(rel="http://www.openarchives.org/rs/terms/patch",
                 href="http://example.com/res4-json-patch",
                 modified="2013-01-03T17:00:00Z",
                 hash="sha-256:y66dER_t_HWEIKpesdkeb7rtSc-ippjf9823742opld", #FIXME - inconsistent
                 length=73,
                 type="application/json-patch")
     c2 = Resource(uri="http://example.com/res5-full.tiff",
                   lastmod="2013-01-03T18:00:00Z",
                   change="updated",
                   sha256="f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk",
                   length="9788456778",
                   mime_type="image/tiff")
     c2.link_set(rel="http://www.openarchives.org/rs/terms/patch",
                 href="http://example.com/res5-diff",
                 modified="2013-01-03T18:00:00Z",
                 hash="sha-256:h986gT_t_87HTkjHYE76G558hY-jdfgy76t55sadJUYT",
                 length=4533,
                 type="application/x-tiff-diff" )
     cl.add( [c1,c2] )
     self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_27' )
Beispiel #8
0
 def test18_as_xml_part(self):
     r = [
         Resource(uri='a', lastmod='2006-01-01', length=12),
         Resource(uri='b', lastmod='2007-02-02', length=34),
         Resource(uri='c', lastmod='2008-03-03', length=56)
     ]
     lb = ListBaseWithIndex(resources=r)
     # Allow unlimited entries, part makes no sense
     lb.max_sitemap_entries = None
     self.assertRaises(ListBaseIndexError, lb.as_xml_part)
     # Request after end
     lb.max_sitemap_entries = 1
     self.assertRaises(ListBaseIndexError, lb.as_xml_part, part_number=9)
     # Allow only 1 entry
     lb.max_sitemap_entries = 1
     xml = lb.as_xml_part(part_number=1)
     self.assertFalse(re.search(r'<loc>a</loc>', xml))
     self.assertTrue(re.search(r'<loc>b</loc>', xml))
     self.assertFalse(re.search(r'<loc>c</loc>', xml))
     # Request truncated
     lb.max_sitemap_entries = 2
     xml = lb.as_xml_part(part_number=1)
     self.assertFalse(re.search(r'<loc>a</loc>', xml))
     self.assertFalse(re.search(r'<loc>b</loc>', xml))
     self.assertTrue(re.search(r'<loc>c</loc>', xml))
 def test_build_ex_18(self):
     """Resource Dump Manifest with 2 entries and some metadata"""
     rdm = ResourceDumpManifest()
     rdm.up = 'http://example.com/dataset1/capabilitylist.xml'
     rdm.md_at = "2013-01-03T09:00:00Z"
     rdm.md_completed = "2013-01-03T09:02:00Z"
     rdm.add(
         Resource(uri='http://example.com/res1',
                  lastmod='2013-01-02T13:00:00Z',
                  md5='1584abdf8ebdc9802ac0c6a7402c03b6',
                  length=8876,
                  mime_type='text/html',
                  path='/resources/res1'))
     rdm.add(
         Resource(
             uri='http://example.com/res2',
             lastmod='2013-01-02T14:00:00Z',
             md5='1e0d5cb8ef6ba40c99b14c0237be735e',
             sha256=
             '854f61290e2e197a11bc91063afce22e43f8ccc655237050ace766adc68dc784',
             length=14599,
             mime_type='application/pdf',
             path='/resources/res2'))
     ex_xml = self._open_ex('resourcesync_ex_18').read()
     self._assert_xml_equal(rdm.as_xml(), ex_xml)
Beispiel #10
0
 def test_build_ex_28(self):
     cl = ChangeList()
     cl.up = "http://example.com/dataset1/capabilitylist.xml"
     cl.md_from = "2013-01-03T00:00:00Z"
     c1 = Resource(uri="http://example.com/res2.pdf",
                   lastmod="2013-01-03T18:00:00Z",
                   change="updated",
                   md5="1584abdf8ebdc9802ac0c6a7402c03b6",
                   length=8876,
                   mime_type="application/pdf" )
     c1.link_set(rel="describedby",
                 href="http://example.com/res2_dublin-core_metadata.xml",
                 modified="2013-01-01T12:00:00Z",
                 type="application/xml")
     c2 = Resource(uri="http://example.com/res2_dublin-core_metadata.xml",
                   lastmod="2013-01-03T19:00:00Z",
                   change="updated",
                   mime_type="application/xml")
     c2.link_set(rel="describes",
                 href="http://example.com/res2.pdf",
                 modified="2013-01-03T18:00:00Z",
                 hash="md5:1584abdf8ebdc9802ac0c6a7402c03b6",
                 length="8876",
                 type="application/pdf")
     c2.link_set(rel="profile",
                 href="http://purl.org/dc/elements/1.1/")
     cl.add( [c1,c2] )
     self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_28' )
Beispiel #11
0
 def test_build_ex_23(self):
     cdm = ChangeDumpManifest()
     cdm.up = "http://example.com/dataset1/capabilitylist.xml"
     cdm.md_from = "2013-01-02T00:00:00Z"
     cdm.md_until = "2013-01-03T00:00:00Z"
     cdm.add( Resource(uri="http://example.com/res7.html",
                       lastmod="2013-01-02T12:00:00Z",
                       change="created",
                       md5="1c1b0e264fa9b7e1e9aa6f9db8d6362b",
                       length=4339,
                       mime_type="text/html",
                       path="/changes/res7.html") )
     cdm.add( Resource(uri="http://example.com/res9.pdf",
                       lastmod="2013-01-02T13:00:00Z",
                       change="updated",
                       md5="f906610c3d4aa745cb2b986f25b37c5a",
                       length=38297,
                       mime_type="application/pdf",
                       path="/changes/res9.pdf") )
     cdm.add( Resource(uri="http://example.com/res5.tiff",
                       lastmod="2013-01-02T19:00:00Z",
                       change="deleted") )
     cdm.add( Resource(uri="http://example.com/res7.html",
                       lastmod="2013-01-02T20:00:00Z",
                       change="updated",
                       md5="0988647082c8bc51778894a48ec3b576",
                       length="5426", #should also take string
                       mime_type="text/html",
                       path="/changes/res7-v2.html") )
     self._assert_xml_equal_ex( cdm.as_xml(), 'resourcesync_ex_23' )
 def test_08_print(self):
     lb = ListBaseWithIndex()
     lb.add( Resource(uri='a',lastmod='2001-01-01',length=1234) )
     lb.add( Resource(uri='b',lastmod='2002-02-02',length=56789) )
     lb.add( Resource(uri='c',lastmod='2003-03-03',length=0) )
     lb.md['from']=None #avoid now being added
     #print lb
     self.assertEqual( lb.as_xml(), '<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/"><rs:md capability="unknown" /><url><loc>a</loc><lastmod>2001-01-01T00:00:00Z</lastmod><rs:md length="1234" /></url><url><loc>b</loc><lastmod>2002-02-02T00:00:00Z</lastmod><rs:md length="56789" /></url><url><loc>c</loc><lastmod>2003-03-03T00:00:00Z</lastmod><rs:md length="0" /></url></urlset>' )
Beispiel #13
0
 def test20_as_xml(self):
     rl = ResourceList()
     rl.add( Resource('a',timestamp=1) )
     rl.add( Resource('b',timestamp=2) )
     xml = rl.as_xml()
     print xml
     self.assertTrue( re.search(r'<rs:md .*capability="resourcelist"', xml), 'XML has capability' )
     self.assertTrue( re.search(r'<url><loc>a</loc><lastmod>1970-01-01T00:00:01Z</lastmod></url>', xml), 'XML has resource a' ) 
 def test04_change_list(self):
     cl = ChangeList()
     cl.add( Resource('a',timestamp=1,change='created') )
     cl.add( Resource('b',timestamp=2,change='created') )
     cl.add( Resource('c',timestamp=3,change='created') )
     cl.add( Resource('d',timestamp=4,change='created') ) 
     cl.add( Resource('e',timestamp=5,change='created') )
     self.assertEqual(len(cl), 5, "5 things in src")
Beispiel #15
0
 def test01d_same(self):
     """Same with slight timestamp diff"""
     r1 = Resource('a')
     r1.lastmod = '2012-01-02T01:02:03Z'
     r2 = Resource('a')
     r2.lastmod = '2012-01-02T01:02:03.99Z'
     self.assertNotEqual(r1.timestamp, r2.timestamp)
     self.assertEqual(r1, r2)
 def test02_set_with_repeats(self):
     cl = ChangeList()
     cl.add( Resource('a',timestamp=1,change='updated') )
     cl.add( Resource('b',timestamp=1,change='created') )
     cl.add( Resource('c',timestamp=1,change='deleted') )
     cl.add( Resource('a',timestamp=2,change='deleted') )
     cl.add( Resource('b',timestamp=2,change='updated') )
     self.assertEqual(len(cl), 5, "5 changes in change_list")
Beispiel #17
0
 def test5_add(self):
     r1 = Resource(uri='a')
     r2 = Resource(uri='b')
     m = Inventory()
     m.add(r1)
     self.assertRaises(ValueError, m.add, r1)
     m.add(r2)
     self.assertRaises(ValueError, m.add, r2)
 def test01_as_xml(self):
     rd = ChangeDump()
     rd.add( Resource('a.zip',timestamp=1) )
     rd.add( Resource('b.zip',timestamp=2) )
     xml = rd.as_xml()
     #print(xml)
     self.assertTrue( re.search(r'<rs:md .*capability="changedump"', xml), 'XML has capability' )
     self.assertTrue( re.search(r'<url><loc>a.zip</loc><lastmod>1970-01-01T00:00:01Z</lastmod></url>', xml), 'XML has resource a' ) 
Beispiel #19
0
 def test_build_ex_01(self):
     """Simple Resource List document """
     rl = ResourceList()
     rl.md_at = '2013-01-03T09:00:00Z'
     rl.add( Resource('http://example.com/res1') )
     rl.add( Resource('http://example.com/res2') )
     ex_xml = self._open_ex('resourcesync_ex_1').read()
     self._assert_xml_equal( rl.as_xml(), ex_xml )
Beispiel #20
0
 def test_08_print_non_ascii_uri(self):
     """Verify that valid Unicode uri values give good XML out."""
     m = ResourceList(md={'capability': 'resourcelist', 'modified': None})
     m.add(Resource(uri=u'a_\u00c3_b'))
     m.add(Resource(uri=u'c_\u1234_d'))
     xml = Sitemap().resources_as_xml(m)
     self.assertTrue(re.search(u'<loc>a_.*_b</loc>', xml))
     self.assertTrue(re.search(u'<loc>a_\u00c3_b</loc>', xml))
     self.assertTrue(re.search(u'<loc>c_\u1234_d</loc>', xml))
Beispiel #21
0
 def test10_no_path(self):
     rl = ResourceList()
     rl.add(
         Resource('http://ex.org/a',
                  length=7,
                  path='resync/test/testdata/a'))
     rl.add(Resource('http://ex.org/b', length=21))
     d = Dump(rl)
     self.assertRaises(DumpError, d.check_files)
Beispiel #22
0
 def test_build_archives_ex_3_2(self):
     """Resource List Archive Index listing 2 component Resource List Archives"""
     rlai = ResourceListArchive()
     rlai.sitemapindex = True
     rlai.up = 'http://example.com/dataset1/capabilitylist.xml'
     rlai.add( Resource( uri='http://example.com/resourcelistarchive00001.xml' ))
     rlai.add( Resource( uri='http://example.com/resourcelistarchive00002.xml' ))
     ex_xml = self._open_ex('archives_ex_3_2').read()
     self._assert_xml_equal( rlai.as_xml(), ex_xml )
Beispiel #23
0
 def test02_dump_check_files(self):
     cl = ChangeList()
     cl.add(Resource('http://ex.org/a', length=7,
                     path='tests/testdata/a', change="updated"))
     cl.add(Resource('http://ex.org/b', length=21,
                     path='tests/testdata/b', change="updated"))
     d = Dump(resources=cl)
     self.assertTrue(d.check_files())
     self.assertEqual(d.total_size, 28)
 def test07_as_xml(self):
     cl = ChangeList()
     cl.md_from = '1970-01-01T00:00:00Z'
     cl.add( Resource('a',timestamp=1,change='updated') )
     cl.add( Resource('b',timestamp=2,change='updated') )
     xml = cl.as_xml()
     self.assertTrue( re.search(r'<rs:md .*capability="changelist"', xml), 'XML has capability' )
     self.assertTrue( re.search(r'<rs:md .*from="\d\d\d\d\-\d\d\-\d\dT\d\d:\d\d:\d\dZ"', xml), 'XML has from to seconds precision (and not more)' )
     self.assertTrue( re.search(r'<url><loc>a</loc><lastmod>1970-01-01T00:00:01Z</lastmod>', xml), 'XML has resource a' ) 
 def test01_add(self):
     rs = ResourceSet()
     self.assertEqual(len(rs), 0)
     rs.add(Resource('a'))
     self.assertEqual(len(rs), 1)
     rs.add(Resource('b'))
     self.assertEqual(len(rs), 2)
     rs.add(Resource('c'))
     self.assertEqual(len(rs), 3)
Beispiel #26
0
 def test07_has_md5(self):
     r1 = Resource(uri='a')
     r2 = Resource(uri='b')
     i = ResourceList()
     self.assertFalse( i.has_md5() )
     i.add(r1)
     i.add(r2)
     self.assertFalse( i.has_md5() )
     r1.md5="aabbcc"
     self.assertTrue( i.has_md5() )
Beispiel #27
0
 def test_09_print_subset(self): 
     r1 = Resource(uri='a',lastmod='2001-01-01',size=1234)
     r2 = Resource(uri='b',lastmod='2002-02-02',size=56789)
     r3 = Resource(uri='c',lastmod='2003-03-03',size=0)
     r3 = Resource(uri='d',lastmod='2003-03-04',size=444)
     m = Inventory()
     m.add(r1)
     m.add(r2)
     m.add(r3)
     self.assertEqual( Sitemap().inventory_as_xml(m, entries=['d','b']), "<?xml version='1.0' encoding='UTF-8'?>\n<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" xmlns:rs=\"http://resourcesync.org/change/0.1\"><url><loc>d</loc><lastmod>2003-03-04T00:00:00</lastmod><rs:size>444</rs:size></url><url><loc>b</loc><lastmod>2002-02-02T00:00:00</lastmod><rs:size>56789</rs:size></url></urlset>")
Beispiel #28
0
 def test6_has_md5(self):
     r1 = Resource(uri='a')
     r2 = Resource(uri='b')
     m = Inventory()
     self.assertFalse(m.has_md5())
     m.add(r1)
     m.add(r2)
     self.assertFalse(m.has_md5())
     r1.md5 = "aabbcc"
     self.assertTrue(m.has_md5())
 def test02_order(self):
     rs = ResourceSet()
     rs.add(Resource('a2'))
     rs.add(Resource('a3'))
     rs.add(Resource('a1'))
     i = iter(rs)
     self.assertEqual(next(i).uri, 'a1')
     self.assertEqual(next(i).uri, 'a2')
     self.assertEqual(next(i).uri, 'a3')
     self.assertRaises(StopIteration, next, i)
Beispiel #30
0
 def test09_changetypeerror(self):
     r1 = Resource('a')
     self.assertEqual(r1.change, None)
     r1.change = 'deleted'
     self.assertEqual(r1.change, 'deleted')
     self.assertRaises(ChangeTypeError, Resource, 'a', change="bad")
     # disable checking
     Resource.CHANGE_TYPES = False
     r1 = Resource('a', change="bad")
     self.assertEqual(r1.change, 'bad')