def test03_parse(self): xml='<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/"><rs:md capability="capabilitylist" from="2013-02-07T22:39:00" /><url><loc>http://example.org/resourcelist.xml</loc><rs:md capability="resourcelist" /></url></urlset>' cl=CapabilityList() cl.parse(str=xml) self.assertEqual( cl.capability, 'capabilitylist') self.assertEqual( len(cl.resources), 1, 'got 1 resource') [r] = cl.resources self.assertEqual( r.uri, 'http://example.org/resourcelist.xml', 'resourcelist uri') self.assertEqual( r.capability, 'resourcelist')
def test04_parse(self): xml='<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/"><rs:md capability="capabilitylist" from="2013-02-07T22:39:00" /><url><loc>http://example.org/resourcelist.xml</loc><rs:md capability="resourcelist" /></url></urlset>' cl=CapabilityList() cl.parse(str_data=xml) self.assertEqual( cl.capability, 'capabilitylist') self.assertEqual( len(cl.resources), 1, 'got 1 resource') [r] = cl.resources self.assertEqual( r.uri, 'http://example.org/resourcelist.xml', 'resourcelist uri') self.assertEqual( r.capability, 'resourcelist')
def test03_capability_list_links(self): xml = run_resync(['--write-capabilitylist=resourcelist=rl,changedump=cd', '--describedby-link=a', '--sourcedescription-link=b', '--capabilitylist-link=c']) # will be ignored capl = CapabilityList() capl.parse(fh=io.BytesIO(xml)) self.assertEqual(len(capl), 2) self.assertNotEqual(capl.link('describedby'), None) self.assertEqual(capl.link('describedby')['href'], 'a') self.assertNotEqual(capl.link('up'), None) self.assertEqual(capl.link('up')['href'], 'b')
def test03_capability_list_links(self): xml = run_resync([ '--capabilitylist=resourcelist=rl,changedump=cd', '--describedby-link=a', '--sourcedescription-link=b', '--capabilitylist-link=c' ]) #will be ignored capl = CapabilityList() capl.parse(fh=StringIO.StringIO(xml)) self.assertEqual(len(capl), 2) self.assertNotEqual(capl.link('describedby'), None) self.assertEqual(capl.link('describedby')['href'], 'a') self.assertNotEqual(capl.link('up'), None) self.assertEqual(capl.link('up')['href'], 'b')
def test_ex_06(self): """resourcesync_ex_6 is a simple capability list with three capabilities""" capl=CapabilityList() capl.parse('tests/testdata/examples_from_spec/resourcesync_ex_6.xml') self.assertEqual( len(capl.resources), 3, '3 capabilities') # What capabilities are present? self.assertTrue( capl.has_capability('resourcelist') ) self.assertEqual( capl.capability_info('resourcelist').uri, 'http://example.com/dataset1/resourcelist.xml') self.assertTrue( capl.has_capability('resourcedump') ) self.assertEqual( capl.capability_info('resourcedump').uri, 'http://example.com/dataset1/resourcedump.xml') self.assertTrue( capl.has_capability('changelist') ) self.assertEqual( capl.capability_info('changelist').uri, 'http://example.com/dataset1/changelist.xml') # Check some that aren't self.assertFalse( capl.has_capability() ) self.assertFalse( capl.has_capability('bogus') ) self.assertFalse( capl.has_capability('capabilitylist') )
new_lasttime = time_sorted_resources[-1]["time"].strftime( "%Y-%m-%dT%H:%M:%SZ") # write this time to the timefile timefile_out = open(args.time_file, "w") timefile_out.write(new_lasttime) timefile_out.close() # Downloads all resource lists def get_resource_lists(resources): for key, resource_list_resource in resources.iteritems(): resource_list_response = requests.get(resource_list_resource.uri) resource_list = ResourceList() resource_list.parse(str=resource_list_response.text) get_resources(resource_list.resources) # Download URI of the source description XML from # --> should actually be either via robots.txt or/in .well-known source_desc_response = requests.get(args.source_description_uri) source_desc = SourceDescription() source_desc.parse(str=source_desc_response.text) [capabilitylist_resource] = source_desc.resources # Download capablity list obtained from source description capabilitylist_response = requests.get(capabilitylist_resource.uri) capabilitylist = CapabilityList() capabilitylist.parse(str=capabilitylist_response.text) # Download resource lists obtained from capability list get_resource_lists(capabilitylist.resources)
# store newest modified time of newest resource new_lasttime = time_sorted_resources[-1]["time"].strftime("%Y-%m-%dT%H:%M:%SZ") # write this time to the timefile timefile_out = open(args.time_file, "w") timefile_out.write(new_lasttime) timefile_out.close() # Downloads all resource lists def get_resource_lists(resources): for key, resource_list_resource in resources.iteritems(): resource_list_response = requests.get(resource_list_resource.uri) resource_list = ResourceList() resource_list.parse(str=resource_list_response.text) get_resources(resource_list.resources) # Download URI of the source description XML from # --> should actually be either via robots.txt or/in .well-known source_desc_response = requests.get(args.source_description_uri) source_desc = SourceDescription() source_desc.parse(str=source_desc_response.text) [capabilitylist_resource] = source_desc.resources # Download capablity list obtained from source description capabilitylist_response = requests.get(capabilitylist_resource.uri) capabilitylist = CapabilityList() capabilitylist.parse(str=capabilitylist_response.text) # Download resource lists obtained from capability list get_resource_lists(capabilitylist.resources)