コード例 #1
0
 def test48_write_dump_if_requested(self):
     c = Client()
     # no dump file
     self.assertFalse(c.write_dump_if_requested(ChangeList(), None))
     # with dump file
     with capture_stdout() as capturer:
         c.write_dump_if_requested(ChangeList(), '/tmp/a_file')
     self.assertTrue(re.search(r'FIXME', capturer.result))
コード例 #2
0
 def test_build_ex_27(self):
     cl = ChangeList()
     cl.up = "http://example.com/dataset1/capabilitylist.xml"
     cl.md_from = "2013-01-03T00:00:00Z"
     c1 = Resource(uri="http://example.com/res4",
                   lastmod="2013-01-03T17:00:00Z",
                   change="updated",
                   sha256="f4OxZX_x_DFGFDgghgdfb6rtSx-iosjf6735432nklj",
                   length=56778,
                   mime_type="application/json" )
     c1.link_set(rel="http://www.openarchives.org/rs/terms/patch",
                 href="http://example.com/res4-json-patch",
                 modified="2013-01-03T17:00:00Z",
                 hash="sha-256:y66dER_t_HWEIKpesdkeb7rtSc-ippjf9823742opld", #FIXME - inconsistent
                 length=73,
                 type="application/json-patch")
     c2 = Resource(uri="http://example.com/res5-full.tiff",
                   lastmod="2013-01-03T18:00:00Z",
                   change="updated",
                   sha256="f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk",
                   length="9788456778",
                   mime_type="image/tiff")
     c2.link_set(rel="http://www.openarchives.org/rs/terms/patch",
                 href="http://example.com/res5-diff",
                 modified="2013-01-03T18:00:00Z",
                 hash="sha-256:h986gT_t_87HTkjHYE76G558hY-jdfgy76t55sadJUYT",
                 length=4533,
                 type="application/x-tiff-diff" )
     cl.add( [c1,c2] )
     self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_27' )
コード例 #3
0
 def test_build_ex_24(self):
     cl = ChangeList()
     cl.up = "http://example.com/dataset1/capabilitylist.xml"
     cl.md_from = "2013-01-03T00:00:00Z"
     c1 = Resource(uri="http://example.com/res1",
                   lastmod="2013-01-03T18:00:00Z",
                   change="updated",
                   md5="1584abdf8ebdc9802ac0c6a7402c03b6",
                   length=8876,
                   mime_type="text/html")
     # Resource.link_set with add or change link depending on one with 
     # the particular rel exists unless allow_duplicates=True. 
     # Resource.link_add will always add. Test both here...
     c1.link_set(rel="duplicate",
                 href="http://mirror1.example.com/res1",
                 pri="1",
                 modified="2013-01-03T18:00:00Z")
     c1.link_set(rel="duplicate",
                 href="http://mirror2.example.com/res1",
                 pri="2",
                 modified="2013-01-03T18:00:00Z",
                 allow_duplicates=True)
     c1.link_add(rel="duplicate",
                 href="gsiftp://gridftp.example.com/res1",
                 pri="3",
                 modified="2013-01-03T18:00:00Z")
     cl.add( c1 )
     self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_24' )
コード例 #4
0
 def test_build_ex_21(self):
     """Change List which points back to index"""
     cl = ChangeList()
     cl.up = 'http://example.com/dataset1/capabilitylist.xml'
     cl.index = 'http://example.com/dataset1/changelist.xml'
     cl.md_from = "2013-01-02T00:00:00Z"
     cl.md_until = "2013-01-03T00:00:00Z"
     cl.add(
         Resource(uri='http://example.com/res7.html',
                  lastmod='2013-01-02T12:00:00Z',
                  change='created'))
     cl.add(
         Resource(uri='http://example.com/res9.pdf',
                  lastmod='2013-01-02T13:00:00Z',
                  change='updated'))
     cl.add(
         Resource(uri='http://example.com/res5.tiff',
                  lastmod='2013-01-02T19:00:00Z',
                  change='deleted'))
     cl.add(
         Resource(uri='http://example.com/res7.html',
                  lastmod='2013-01-02T20:00:00Z',
                  change='updated'))
     ex_xml = self._open_ex('resourcesync_ex_21').read()
     self._assert_xml_equal(cl.as_xml(), ex_xml)
コード例 #5
0
 def test_build_ex_28(self):
     cl = ChangeList()
     cl.up = "http://example.com/dataset1/capabilitylist.xml"
     cl.md_from = "2013-01-03T00:00:00Z"
     c1 = Resource(uri="http://example.com/res2.pdf",
                   lastmod="2013-01-03T18:00:00Z",
                   change="updated",
                   md5="1584abdf8ebdc9802ac0c6a7402c03b6",
                   length=8876,
                   mime_type="application/pdf" )
     c1.link_set(rel="describedby",
                 href="http://example.com/res2_dublin-core_metadata.xml",
                 modified="2013-01-01T12:00:00Z",
                 type="application/xml")
     c2 = Resource(uri="http://example.com/res2_dublin-core_metadata.xml",
                   lastmod="2013-01-03T19:00:00Z",
                   change="updated",
                   mime_type="application/xml")
     c2.link_set(rel="describes",
                 href="http://example.com/res2.pdf",
                 modified="2013-01-03T18:00:00Z",
                 hash="md5:1584abdf8ebdc9802ac0c6a7402c03b6",
                 length="8876",
                 type="application/pdf")
     c2.link_set(rel="profile",
                 href="http://purl.org/dc/elements/1.1/")
     cl.add( [c1,c2] )
     self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_28' )
コード例 #6
0
 def test06_add_changed_resources(self):
     added = ResourceList()
     added.add( Resource('a',timestamp=1,change='created') )
     added.add( Resource('d',timestamp=4,change='created') )
     self.assertEqual(len(added), 2, "2 things in added resource_list")
     changes = ChangeList()
     changes.add_changed_resources( added, change='created' )
     self.assertEqual(len(changes), 2, "2 things added")
     i = iter(changes)
     first = next(i)
     self.assertEqual(first.uri, 'a', "changes[0].uri=a")
     self.assertEqual(first.timestamp, 1, "changes[0].timestamp=1")
     self.assertEqual(first.change, 'created') #, "changes[0].change=createdd")
     second = next(i)
     self.assertEqual(second.timestamp, 4, "changes[1].timestamp=4")
     self.assertEqual(second.change, 'created', "changes[1].change=createdd")
     # Now add some with updated (one same, one diff)
     updated = ResourceList()
     updated.add( Resource('a',timestamp=5,change='created') )
     updated.add( Resource('b',timestamp=6,change='created') )
     self.assertEqual(len(updated), 2, "2 things in updated resource_list")
     changes.add_changed_resources( updated, change='updated' )
     self.assertEqual(len(changes), 4, "4 = 2 old + 2 things updated")
     # Make new resource_list from the changes which should not have dupes
     dst = ResourceList()
     dst.add( changes, replace=True )
     self.assertEqual(len(dst), 3, "3 unique resources")
     self.assertEqual(dst.resources['a'].timestamp, 5 ) # 5 was later in last the 1
     self.assertEqual(dst.resources['a'].change, 'updated')
     self.assertEqual(dst.resources['b'].timestamp, 6)
     self.assertEqual(dst.resources['b'].change, 'updated')
     self.assertEqual(dst.resources['d'].timestamp, 4)
     self.assertEqual(dst.resources['d'].change, 'created')
コード例 #7
0
    def test09_parse_no_capability(self):
        # missing capability is an error for changelist
        xml='<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n\
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/">\
<url><loc>http://example.com/res1</loc><lastmod>2012-03-14T18:37:36Z</lastmod><rs:md change="updated"/></url>\
</urlset>'
        cl=ChangeList()
        self.assertRaises( SitemapParseError, cl.parse, fh=io.StringIO(xml) )
コード例 #8
0
 def test02_set_with_repeats(self):
     cl = ChangeList()
     cl.add( Resource('a',timestamp=1,change='updated') )
     cl.add( Resource('b',timestamp=1,change='created') )
     cl.add( Resource('c',timestamp=1,change='deleted') )
     cl.add( Resource('a',timestamp=2,change='deleted') )
     cl.add( Resource('b',timestamp=2,change='updated') )
     self.assertEqual(len(cl), 5, "5 changes in change_list")
コード例 #9
0
 def test04_change_list(self):
     cl = ChangeList()
     cl.add( Resource('a',timestamp=1,change='created') )
     cl.add( Resource('b',timestamp=2,change='created') )
     cl.add( Resource('c',timestamp=3,change='created') )
     cl.add( Resource('d',timestamp=4,change='created') ) 
     cl.add( Resource('e',timestamp=5,change='created') )
     self.assertEqual(len(cl), 5, "5 things in src")
コード例 #10
0
ファイル: test_dump.py プロジェクト: ramonmassip/resync
 def test02_dump_check_files(self):
     cl = ChangeList()
     cl.add(Resource('http://ex.org/a', length=7,
                     path='tests/testdata/a', change="updated"))
     cl.add(Resource('http://ex.org/b', length=21,
                     path='tests/testdata/b', change="updated"))
     d = Dump(resources=cl)
     self.assertTrue(d.check_files())
     self.assertEqual(d.total_size, 28)
コード例 #11
0
 def test07_as_xml(self):
     cl = ChangeList()
     cl.md_from = '1970-01-01T00:00:00Z'
     cl.add( Resource('a',timestamp=1,change='updated') )
     cl.add( Resource('b',timestamp=2,change='updated') )
     xml = cl.as_xml()
     self.assertTrue( re.search(r'<rs:md .*capability="changelist"', xml), 'XML has capability' )
     self.assertTrue( re.search(r'<rs:md .*from="\d\d\d\d\-\d\d\-\d\dT\d\d:\d\d:\d\dZ"', xml), 'XML has from to seconds precision (and not more)' )
     self.assertTrue( re.search(r'<url><loc>a</loc><lastmod>1970-01-01T00:00:01Z</lastmod>', xml), 'XML has resource a' ) 
コード例 #12
0
    def test10_parse_bad_capability(self):
        # the <rs:md capability="bad_capability".. should give error
        xml='<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n\
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/">\
<rs:md capability="bad_capability" from="2013-01-01"/>\
<url><loc>http://example.com/bad_res_1</loc><lastmod>2012-03-14T18:37:36Z</lastmod><rs:md change="updated"/></url>\
</urlset>'
        cl=ChangeList()
        self.assertRaises( SitemapParseError, cl.parse, fh=io.StringIO(xml) )
コード例 #13
0
 def test03_multiple(self):
     caps = CapabilityList()
     rl = ResourceList()
     caps.add_capability( rl, "rl.xml" )
     cl = ChangeList()
     caps.add_capability( cl, "cl.xml" )
     self.assertEqual( len(caps), 2 )
     xml = caps.as_xml()
     self.assertTrue( re.search( r'<loc>rl.xml</loc><rs:md capability="resourcelist" />', xml ) )
     self.assertTrue( re.search( r'<loc>cl.xml</loc><rs:md capability="changelist" />', xml) )
コード例 #14
0
 def test_build_ex_13(self):
     """Capability List document with 4 entries"""
     cl = CapabilityList()
     cl.describedby = 'http://example.com/info_about_set1_of_resources.xml'
     cl.up = 'http://example.com/resourcesync_description.xml'
     cl.add_capability( capability=ResourceList( uri='http://example.com/dataset1/resourcelist.xml' ) )
     cl.add_capability( capability=ResourceDump( uri='http://example.com/dataset1/resourcedump.xml' ) )
     cl.add_capability( capability=ChangeList( uri='http://example.com/dataset1/changelist.xml' ) )
     cl.add_capability( capability=ChangeDump( uri='http://example.com/dataset1/changedump.xml' ) )
     ex_xml = self._open_ex('resourcesync_ex_13').read()
     self._assert_xml_equal( cl.as_xml(), ex_xml )
コード例 #15
0
 def test03_with_repeats_again(self):
     r1 = Resource(uri='a',length=1,change='created')
     r2 = Resource(uri='b',length=2,change='created')
     i = ChangeList()
     i.add(r1)
     i.add(r2)
     self.assertEqual( len(i), 2 )
     # Can add another Resource with same URI
     r1d = Resource(uri='a',length=10,change='created')
     i.add(r1d)
     self.assertEqual( len(i), 3 )
コード例 #16
0
 def test_ex_03(self):
     """resourcesync_ex_3 is a simple change_list with 2 resources"""
     cl=ChangeList()
     cl.parse('tests/testdata/examples_from_spec/resourcesync_ex_3.xml')
     self.assertEqual( len(cl.resources), 2, '2 resources')
     sms = sorted(cl.uris())
     self.assertEqual( sms, ['http://example.com/res2.pdf','http://example.com/res3.tiff'] )
     self.assertEqual( cl.resources[0].lastmod, '2013-01-02T13:00:00Z' )
     self.assertEqual( cl.resources[1].lastmod, '2013-01-02T18:00:00Z' )
     self.assertEqual( cl.resources[0].change, 'updated' )
     self.assertEqual( cl.resources[1].change, 'deleted' )
コード例 #17
0
 def test_build_ex_31(self):
     cl = ChangeList()
     cl.up = "http://example.com/dataset1/capabilitylist.xml"
     cl.md_from = "2013-01-03T00:00:00Z"
     c1 = Resource(uri="http://original.example.com/res1.html",
                   lastmod="2013-01-03T07:00:00Z",
                   change="updated",
                   md5="1584abdf8ebdc9802ac0c6a7402c03b6",
                   length=8876,
                   mime_type="text/html" )
     cl.add( c1 )
     self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_31' )
コード例 #18
0
    def test08_parse(self):
        xml='<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n\
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/">\
<rs:md capability="changelist" from="2013-01-01"/>\
<url><loc>/tmp/rs_test/src/file_a</loc><lastmod>2012-03-14T18:37:36Z</lastmod><rs:md change="updated" length="12" /></url>\
<url><loc>/tmp/rs_test/src/file_b</loc><lastmod>2012-03-14T18:37:36Z</lastmod><rs:md change="deleted" length="32" /></url>\
</urlset>'
        cl=ChangeList()
        cl.parse(fh=io.StringIO(xml))
        self.assertEqual( len(cl.resources), 2, 'got 2 resources')
        self.assertEqual( cl.md['capability'], 'changelist', 'capability set' )
        self.assertEqual( cl.md['md_from'], '2013-01-01' )
コード例 #19
0
 def test05_iter(self):
     i = ChangeList()
     i.add( Resource('a',timestamp=1,change='created') )
     i.add( Resource('b',timestamp=2,change='created') )
     i.add( Resource('c',timestamp=3,change='created') )
     i.add( Resource('d',timestamp=4,change='created') )
     resources=[]
     for r in i:
         resources.append(r)
     self.assertEqual(len(resources), 4)
     self.assertEqual( resources[0].uri, 'a')
     self.assertEqual( resources[3].uri, 'd')
コード例 #20
0
 def test_build_ex_03(self):
     """Simple Change List document """
     cl = ChangeList()
     cl.md_from = '2013-01-02T00:00:00Z'
     cl.md_until= '2013-01-03T00:00:00Z'
     cl.add( Resource(uri='http://example.com/res2.pdf',
                      lastmod='2013-01-02T13:00:00Z',
                      change="updated") )
     cl.add( Resource(uri='http://example.com/res3.tiff',
                      lastmod='2013-01-02T18:00:00Z',
                      change='deleted') )
     ex_xml = self._open_ex('resourcesync_ex_3').read()
     self._assert_xml_equal( cl.as_xml(), ex_xml )
コード例 #21
0
 def test_build_ex_30(self):
     cl = ChangeList()
     cl.up = "http://example.com/dataset1/capabilitylist.xml"
     cl.md_from = "2013-01-03T00:00:00Z"
     c1 = Resource(uri="http://example.com/res1",
                   lastmod="2013-01-03T07:00:00Z",
                   change="updated",
                   md5="1584abdf8ebdc9802ac0c6a7402c03b6",
                   length=8876,
                   mime_type="text/html" )
     c1.link_add(rel="collection",
                 href="http://example.com/aggregation/0601007")
     cl.add( c1 )
     self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_30' )
コード例 #22
0
 def test_build_ex_26(self):
     cl = ChangeList()
     cl.up = "http://example.com/dataset1/capabilitylist.xml"
     cl.md_from = "2013-01-03T00:00:00Z"
     c1 = Resource(uri="http://example.com/res1.html",
                   lastmod="2013-01-03T18:00:00Z",
                   change="updated",
                   md5="1584abdf8ebdc9802ac0c6a7402c03b6",
                   length=8876 )
     c1.link_add(rel="canonical",
                 href="http://example.com/res1",
                 modified="2013-01-03T18:00:00Z")
     cl.add( c1 )
     self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_26' )
コード例 #23
0
 def test_build_ex_25(self):
     cl = ChangeList()
     cl.up = "http://example.com/dataset1/capabilitylist.xml"
     cl.md_from = "2013-01-03T11:00:00Z"
     c1 = Resource(uri="http://example.com/res1",
                   lastmod="2013-01-03T18:00:00Z",
                   change="updated")
     c1.link_add(rel="alternate",
                 href="http://example.com/res1.html",
                 modified="2013-01-03T18:00:00Z",
                 type="text/html") #FIXME - inconsistent
     c1.link_add(rel="alternate",
                 href="http://example.com/res1.pdf",
                 modified="2013-01-03T18:00:00Z",
                 type="application/pdf")
     cl.add( c1 )
     self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_25' )
コード例 #24
0
 def test01_add(self):
     # one
     caps = CapabilityList()
     r1 = Resource(uri='http://example.org/r1')
     caps.add(r1)
     self.assertEqual(len(caps), 1)
     # dupe
     self.assertRaises(ResourceSetDupeError, caps.add, r1)
     self.assertEqual(len(caps), 1)
     # dupe with replace
     caps = CapabilityList()
     caps.add([r1, r1], replace=True)
     self.assertEqual(len(caps), 1)
     # diff
     caps = CapabilityList()
     r2 = ChangeList(uri='http://example.org/r2')
     caps.add([r1, r2])
     self.assertEqual(len(caps), 2)
コード例 #25
0
 def test_build_ex_33(self):
     cl = ChangeList()
     cl.up = "http://aggregator2.example.com/dataset1/capabilitylist.xml"
     cl.md_from = "2013-01-03T12:00:00Z"
     c1 = Resource(uri="http://aggregator2.example.com/res1.html",
                   lastmod="2013-01-04T09:00:00Z",
                   change="updated",
                   md5="1584abdf8ebdc9802ac0c6a7402c03b6",
                   length=8876,
                   mime_type="text/html" )
     c1.link_add(rel="via",
                 href="http://original.example.com/res1.html",
                 modified="2013-01-03T07:00:00Z",
                 hash="md5:1584abdf8ebdc9802ac0c6a7402c03b6",
                 length="8876",
                 type="text/html")
     cl.add( c1 )
     self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_33' )
コード例 #26
0
 def test_build_ex_19(self):
     """Change List with 4 changes, 'open' as no until"""
     cl = ChangeList()
     cl.up = 'http://example.com/dataset1/capabilitylist.xml'
     cl.md_from="2013-01-03T00:00:00Z"
     cl.add( Resource( uri='http://example.com/res1.html',
                       lastmod='2013-01-03T11:00:00Z',
                       change='created' ) )
     cl.add( Resource( uri='http://example.com/res2.pdf',
                       lastmod='2013-01-03T13:00:00Z',
                       change='updated' ) )
     cl.add( Resource( uri='http://example.com/res3.tiff',
                       lastmod='2013-01-03T18:00:00Z',
                       change='deleted' ) )
     cl.add( Resource( uri='http://example.com/res2.pdf',
                       lastmod='2013-01-03T21:00:00Z',
                       change='updated' ) )
     ex_xml = self._open_ex('resourcesync_ex_19').read()
     self._assert_xml_equal( cl.as_xml(), ex_xml )
コード例 #27
0
 def write_change_list(self,
                       paths=None,
                       outfile=None,
                       ref_sitemap=None,
                       newref_sitemap=None,
                       empty=None,
                       links=None,
                       dump=None):
     """Write a change list
     
     Unless the both ref_sitemap and newref_sitemap are specified then the Change 
     List is calculated between the reference an the current state of files on
     disk. The files on disk are scanned based either on the paths setting or
     else on the mappings.
     """
     cl = ChangeList(ln=links)
     if (not empty):
         # 1. Get and parse reference sitemap
         old_rl = self.read_reference_resource_list(ref_sitemap)
         # 2. Depending on whether a newref_sitemap was specified, either read that
         # or build resource list from files on disk
         if (newref_sitemap is None):
             # Get resource list from disk
             new_rl = self.build_resource_list(paths=paths, set_path=dump)
         else:
             new_rl = self.read_reference_resource_list(
                 newref_sitemap, name='new reference')
         # 3. Calculate change list
         (same, updated, deleted, created) = old_rl.compare(new_rl)
         cl.add_changed_resources(updated, change='updated')
         cl.add_changed_resources(deleted, change='deleted')
         cl.add_changed_resources(created, change='created')
     # 4. Write out change list
     cl.mapper = self.mapper
     cl.pretty_xml = self.pretty_xml
     if (self.max_sitemap_entries is not None):
         cl.max_sitemap_entries = self.max_sitemap_entries
     if (outfile is None):
         print cl.as_xml()
     else:
         cl.write(basename=outfile)
     self.write_dump_if_requested(cl, dump)
コード例 #28
0
 def test_build_ex_29(self):
     cl = ChangeList()
     cl.up = "http://example.com/dataset1/capabilitylist.xml"
     cl.md_from = "2013-01-03T00:00:00Z"
     c1 = Resource(uri="http://example.com/res1",
                   lastmod="2013-01-03T18:00:00Z",
                   change="updated",
                   md5="1584abdf8ebdc9802ac0c6a7402c03b6",
                   length=8876,
                   mime_type="text/html" )
     c1.link_add(rel="memento",
                 href="http://example.com/20130103070000/res1",
                 modified="2013-01-02T18:00:00Z",
                 hash="md5:1584abdf8ebdc9802ac0c6a7402c03b6",
                 length="8876",
                 type="text/html")
     c1.link_add(rel="timegate",
                 href="http://example.com/timegate/http://example.com/res1")
     c1.link_add(rel="timemap",
                 href="http://example.com/timemap/http://example.com/res1",
                 type="application/link-format")
     cl.add( c1 )
     self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_29' )
コード例 #29
0
 def test01_add_if_changed(self):
     cl = ChangeList()
     cl.add_if_changed( Resource('a',timestamp=1,change='updated') )
     self.assertEqual( len(cl), 1 )
     self.assertRaises( ChangeTypeError, cl.add_if_changed,
                        Resource('c',timestamp=3) )
コード例 #30
0
 def generate(self):
     """Generate a list of changes."""
     changelist = ChangeList()
     for change in self.changes:
         changelist.add(change)
     return changelist