Esempio n. 1
0
 def test03_parse(self):
     xml='<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/"><rs:md capability="capabilitylist" from="2013-02-07T22:39:00" /><url><loc>http://example.org/resourcelist.xml</loc><rs:md capability="resourcelist" /></url></urlset>'
     cl=CapabilityList()
     cl.parse(str=xml)
     self.assertEqual( cl.capability, 'capabilitylist')
     self.assertEqual( len(cl.resources), 1, 'got 1 resource')
     [r] = cl.resources
     self.assertEqual( r.uri, 'http://example.org/resourcelist.xml', 'resourcelist uri')
     self.assertEqual( r.capability, 'resourcelist')
Esempio n. 2
0
 def test04_parse(self):
     xml='<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/"><rs:md capability="capabilitylist" from="2013-02-07T22:39:00" /><url><loc>http://example.org/resourcelist.xml</loc><rs:md capability="resourcelist" /></url></urlset>'
     cl=CapabilityList()
     cl.parse(str_data=xml)
     self.assertEqual( cl.capability, 'capabilitylist')
     self.assertEqual( len(cl.resources), 1, 'got 1 resource')
     [r] = cl.resources
     self.assertEqual( r.uri, 'http://example.org/resourcelist.xml', 'resourcelist uri')
     self.assertEqual( r.capability, 'resourcelist')
Esempio n. 3
0
 def test03_capability_list_links(self):
     xml = run_resync(['--write-capabilitylist=resourcelist=rl,changedump=cd',
                       '--describedby-link=a',
                       '--sourcedescription-link=b',
                       '--capabilitylist-link=c'])  # will be ignored
     capl = CapabilityList()
     capl.parse(fh=io.BytesIO(xml))
     self.assertEqual(len(capl), 2)
     self.assertNotEqual(capl.link('describedby'), None)
     self.assertEqual(capl.link('describedby')['href'], 'a')
     self.assertNotEqual(capl.link('up'), None)
     self.assertEqual(capl.link('up')['href'], 'b')
Esempio n. 4
0
 def test03_capability_list_links(self):
     xml = run_resync([
         '--capabilitylist=resourcelist=rl,changedump=cd',
         '--describedby-link=a', '--sourcedescription-link=b',
         '--capabilitylist-link=c'
     ])  #will be ignored
     capl = CapabilityList()
     capl.parse(fh=StringIO.StringIO(xml))
     self.assertEqual(len(capl), 2)
     self.assertNotEqual(capl.link('describedby'), None)
     self.assertEqual(capl.link('describedby')['href'], 'a')
     self.assertNotEqual(capl.link('up'), None)
     self.assertEqual(capl.link('up')['href'], 'b')
Esempio n. 5
0
 def test_ex_06(self):
     """resourcesync_ex_6 is a simple capability list with three capabilities"""
     capl=CapabilityList()
     capl.parse('tests/testdata/examples_from_spec/resourcesync_ex_6.xml')
     self.assertEqual( len(capl.resources), 3, '3 capabilities')
     # What capabilities are present?
     self.assertTrue( capl.has_capability('resourcelist') )
     self.assertEqual( capl.capability_info('resourcelist').uri, 'http://example.com/dataset1/resourcelist.xml')
     self.assertTrue( capl.has_capability('resourcedump') )
     self.assertEqual( capl.capability_info('resourcedump').uri, 'http://example.com/dataset1/resourcedump.xml')
     self.assertTrue( capl.has_capability('changelist') )
     self.assertEqual( capl.capability_info('changelist').uri, 'http://example.com/dataset1/changelist.xml')
     # Check some that aren't
     self.assertFalse( capl.has_capability() )
     self.assertFalse( capl.has_capability('bogus') )
     self.assertFalse( capl.has_capability('capabilitylist') )
Esempio n. 6
0
    new_lasttime = time_sorted_resources[-1]["time"].strftime(
        "%Y-%m-%dT%H:%M:%SZ")
    # write this time to the timefile
    timefile_out = open(args.time_file, "w")
    timefile_out.write(new_lasttime)
    timefile_out.close()


# Downloads all resource lists
def get_resource_lists(resources):
    for key, resource_list_resource in resources.iteritems():
        resource_list_response = requests.get(resource_list_resource.uri)
        resource_list = ResourceList()
        resource_list.parse(str=resource_list_response.text)
        get_resources(resource_list.resources)


# Download URI of the source description XML from
# --> should actually be either via robots.txt or/in .well-known
source_desc_response = requests.get(args.source_description_uri)
source_desc = SourceDescription()
source_desc.parse(str=source_desc_response.text)
[capabilitylist_resource] = source_desc.resources

# Download capablity list obtained from source description
capabilitylist_response = requests.get(capabilitylist_resource.uri)
capabilitylist = CapabilityList()
capabilitylist.parse(str=capabilitylist_response.text)

# Download resource lists obtained from capability list
get_resource_lists(capabilitylist.resources)
Esempio n. 7
0
	# store newest modified time of newest resource
	new_lasttime = time_sorted_resources[-1]["time"].strftime("%Y-%m-%dT%H:%M:%SZ")
	# write this time to the timefile
	timefile_out = open(args.time_file, "w")
	timefile_out.write(new_lasttime)
	timefile_out.close()

# Downloads all resource lists
def get_resource_lists(resources):
	for key, resource_list_resource in resources.iteritems():
		resource_list_response = requests.get(resource_list_resource.uri)
		resource_list = ResourceList()
		resource_list.parse(str=resource_list_response.text)
		get_resources(resource_list.resources)



# Download URI of the source description XML from 
# --> should actually be either via robots.txt or/in .well-known
source_desc_response = requests.get(args.source_description_uri)
source_desc = SourceDescription()
source_desc.parse(str=source_desc_response.text)
[capabilitylist_resource] = source_desc.resources

# Download capablity list obtained from source description
capabilitylist_response =  requests.get(capabilitylist_resource.uri)
capabilitylist = CapabilityList()
capabilitylist.parse(str=capabilitylist_response.text)

# Download resource lists obtained from capability list
get_resource_lists(capabilitylist.resources)