def test01_resourcelist(self): rl = ResourceList() caps = CapabilityList() caps.add_capability( rl, "http://example.org/resourcelist.xml" ) caps.md['modified'] = "2013-02-07T22:39:00" self.assertEqual( len(caps), 1 ) self.assertEqual( caps.as_xml(), '<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/"><rs:md capability="capabilitylist" modified="2013-02-07T22:39:00" /><url><loc>http://example.org/resourcelist.xml</loc><rs:md capability="resourcelist" /></url></urlset>' )
def test03_parse(self): xml='<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/"><rs:md capability="capabilitylist" from="2013-02-07T22:39:00" /><url><loc>http://example.org/resourcelist.xml</loc><rs:md capability="resourcelist" /></url></urlset>' cl=CapabilityList() cl.parse(str=xml) self.assertEqual( cl.capability, 'capabilitylist') self.assertEqual( len(cl.resources), 1, 'got 1 resource') [r] = cl.resources self.assertEqual( r.uri, 'http://example.org/resourcelist.xml', 'resourcelist uri') self.assertEqual( r.capability, 'resourcelist')
def test04_parse(self): xml='<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/"><rs:md capability="capabilitylist" from="2013-02-07T22:39:00" /><url><loc>http://example.org/resourcelist.xml</loc><rs:md capability="resourcelist" /></url></urlset>' cl=CapabilityList() cl.parse(str_data=xml) self.assertEqual( cl.capability, 'capabilitylist') self.assertEqual( len(cl.resources), 1, 'got 1 resource') [r] = cl.resources self.assertEqual( r.uri, 'http://example.org/resourcelist.xml', 'resourcelist uri') self.assertEqual( r.capability, 'resourcelist')
def test02_explore_show_summary(self): # Create dummy capabilities object and display cl = CapabilityList() cl.add( Resource('uri:resourcelist') ) cl.add( Resource('uri:changelist') ) e = Explorer() with capture_stdout() as capturer: e.explore_show_summary(cl,False,[]) self.assertTrue( re.search(r'Parsed \(unknown capability\) document with 2 entries:',capturer.result) ) self.assertTrue( re.search(r'\[1\] uri:changelist',capturer.result) ) self.assertTrue( re.search(r'\[2\] uri:resourcelist',capturer.result) )
def test_build_ex_06(self): """Simple Capability List document """ cl = CapabilityList() cl.describedby = 'http://example.com/info_about_set1_of_resources.xml' cl.up = 'http://example.com/resourcesync_description.xml' cl.add_capability( uri='http://example.com/dataset1/resourcelist.xml', name='resourcelist') cl.add_capability( uri='http://example.com/dataset1/resourcedump.xml', name='resourcedump') cl.add_capability( uri='http://example.com/dataset1/changelist.xml', name='changelist') ex_xml = self._open_ex('resourcesync_ex_6').read() self._assert_xml_equal(cl.as_xml(), ex_xml)
def test_build_ex_06(self): """Simple Capability List document """ cl = CapabilityList() cl.describedby = 'http://example.com/info_about_set1_of_resources.xml' cl.up = 'http://example.com/resourcesync_description.xml' cl.add_capability( uri='http://example.com/dataset1/resourcelist.xml', name='resourcelist' ) cl.add_capability( uri='http://example.com/dataset1/resourcedump.xml', name='resourcedump' ) cl.add_capability( uri='http://example.com/dataset1/changelist.xml', name='changelist' ) ex_xml = self._open_ex('resourcesync_ex_6').read() self._assert_xml_equal( cl.as_xml(), ex_xml )
def test_build_ex_13(self): """Capability List document with 4 entries""" cl = CapabilityList() cl.describedby = 'http://example.com/info_about_set1_of_resources.xml' cl.up = 'http://example.com/resourcesync_description.xml' cl.add_capability( capability=ResourceList( uri='http://example.com/dataset1/resourcelist.xml' ) ) cl.add_capability( capability=ResourceDump( uri='http://example.com/dataset1/resourcedump.xml' ) ) cl.add_capability( capability=ChangeList( uri='http://example.com/dataset1/changelist.xml' ) ) cl.add_capability( capability=ChangeDump( uri='http://example.com/dataset1/changedump.xml' ) ) ex_xml = self._open_ex('resourcesync_ex_13').read() self._assert_xml_equal( cl.as_xml(), ex_xml )
def get(self): """Implement GET for Capability List.""" capability_list = CapabilityList() capability_list.describedby = self.source.describedby_uri capability_list.up = self.source.source_description_uri capability_list.add_capability( uri=self.source.resource_list_builder.uri, name='resourcelist') if self.source.has_changememory: capability_list.add_capability( uri=self.source.changememory.base_uri, name='changelist') self.set_header("Content-Type", "application/xml") self.write(capability_list.as_xml())
def publish_metadata(self, new_zips, exluded_zip=None): """ (Re)publish metadata with addition of new_zips. An excluded zip will be removed from previously published metadata. :param new_zips: a resourcelist with newly created zip resources :param exluded_zip: local path to zip file that will be removed from previously published metadata. """ rs_dump_url = self.publish_url + RS_RESOURCE_DUMP_XML rs_dump_path = os.path.join(self.publish_dir, RS_RESOURCE_DUMP_XML) capa_list_url = self.publish_url + RS_CAPABILITY_LIST_XML capa_list_path = os.path.join(self.publish_dir, RS_CAPABILITY_LIST_XML) rs_dump = ResourceDump() # Load existing resource-dump, if any. Else set start time. if os.path.isfile(rs_dump_path): with open(rs_dump_path, "r") as rs_dump_file: sm = Sitemap() sm.parse_xml(rs_dump_file, resources=rs_dump) else: rs_dump.md_at = w3cdt.datetime_to_str(no_fractions=True) rs_dump.link_set(rel="up", href=capa_list_url) # Remove excluded zip, if any if exluded_zip: loc = self.publish_url + os.path.basename(exluded_zip) if loc in rs_dump.resources: del rs_dump.resources[loc] else: raise RuntimeError("Could not find %s in %s" % (loc, rs_dump_path)) # Add new zips for resource in new_zips: rs_dump.add(resource) # Write resource-dump.xml rs_dump.md_completed = w3cdt.datetime_to_str(no_fractions=True) with open(rs_dump_path, "w") as rs_dump_file: rs_dump_file.write(rs_dump.as_xml()) # There are several ways to decode base64, among them # iri = base64.b64decode(os.path.basename(self.publish_dir)).rstrip('\n') # iri = base64.b64decode(os.path.basename(self.publish_dir), '-_').rstrip('\n') iri = base64.urlsafe_b64decode(os.path.basename( self.publish_dir)).rstrip('\n') print "New %s for graph %s" % (RS_RESOURCE_DUMP_XML, iri) print "See %s" % rs_dump_url # Write capability-list.xml if not os.path.isfile(capa_list_path): capa_list = CapabilityList() capa_list.link_set(rel="up", href=self.src_desc_url) capa_list.add_capability(rs_dump, rs_dump_url) with open(capa_list_path, "w") as capa_list_file: capa_list_file.write(capa_list.as_xml()) print "New %s. See %s" % (RS_CAPABILITY_LIST_XML, capa_list_url)
def write_capability_list(self, capabilities=None, outfile=None, links=None): """Write a Capability List to outfile or STDOUT""" capl = CapabilityList(ln=links) capl.pretty_xml = self.pretty_xml if (capabilities is not None): for name in capabilities.keys(): capl.add_capability(name=name, uri=capabilities[name]) if (outfile is None): print capl.as_xml() else: capl.write(basename=outfile)
def write_capability_list(self,capabilities=None,outfile=None,links=None): """Write a Capability List to outfile or STDOUT""" capl = CapabilityList(ln=links) capl.pretty_xml = self.pretty_xml if (capabilities is not None): for name in capabilities.keys(): capl.add_capability(name=name, uri=capabilities[name]) if (outfile is None): print capl.as_xml() else: capl.write(basename=outfile)
def test_build_ex_12(self): """Source Description document with describedby links""" sd = SourceDescription() sd.describedby = 'http://example.com/info_about_source.xml' cl1 = CapabilityList( uri='http://example.com/capabilitylist1.xml' ) cl1.describedby = 'http://example.com/info_about_set1_of_resources.xml' sd.add_capability_list( cl1 ) cl2 = CapabilityList( uri='http://example.com/capabilitylist2.xml' ) cl2.describedby = 'http://example.com/info_about_set2_of_resources.xml' sd.add_capability_list( cl2 ) cl3 = CapabilityList( uri='http://example.com/capabilitylist3.xml' ) cl3.describedby = 'http://example.com/info_about_set3_of_resources.xml' sd.add_capability_list( cl3 ) ex_xml = self._open_ex('resourcesync_ex_12').read() self._assert_xml_equal( sd.as_xml(), ex_xml )
def test03_capability_list_links(self): xml = run_resync([ '--capabilitylist=resourcelist=rl,changedump=cd', '--describedby-link=a', '--sourcedescription-link=b', '--capabilitylist-link=c' ]) #will be ignored capl = CapabilityList() capl.parse(fh=StringIO.StringIO(xml)) self.assertEqual(len(capl), 2) self.assertNotEqual(capl.link('describedby'), None) self.assertEqual(capl.link('describedby')['href'], 'a') self.assertNotEqual(capl.link('up'), None) self.assertEqual(capl.link('up')['href'], 'b')
def get(self): capability_list = CapabilityList() capability_list.describedby = self.source.describedby_uri capability_list.add_capability(uri=self.source.resource_list_builder.uri, name='resourcelist') if self.source.has_changememory: capability_list.add_capability(uri=self.source.changememory.base_uri, name='changelist') self.set_header("Content-Type", "application/xml") self.write(capability_list.as_xml())
def test03_multiple(self): caps = CapabilityList() rl = ResourceList() caps.add_capability( rl, "rl.xml" ) cl = ChangeList() caps.add_capability( cl, "cl.xml" ) self.assertEqual( len(caps), 2 ) xml = caps.as_xml() self.assertTrue( re.search( r'<loc>rl.xml</loc><rs:md capability="resourcelist" />', xml ) ) self.assertTrue( re.search( r'<loc>cl.xml</loc><rs:md capability="changelist" />', xml) )
def test02_resourcelist(self): rl = ResourceList() caps = CapabilityList() caps.add_capability( rl, "http://example.org/resourcelist.xml" ) caps.md['from'] = "2013-02-07T22:39:00" self.assertEqual( len(caps), 1 ) self.assertEqual( caps.as_xml(), '<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/"><rs:md capability="capabilitylist" from="2013-02-07T22:39:00" /><url><loc>http://example.org/resourcelist.xml</loc><rs:md capability="resourcelist" /></url></urlset>' )
def test01_add(self): # one caps = CapabilityList() r1 = Resource(uri='http://example.org/r1') caps.add(r1) self.assertEqual(len(caps), 1) # dupe self.assertRaises(ResourceSetDupeError, caps.add, r1) self.assertEqual(len(caps), 1) # dupe with replace caps = CapabilityList() caps.add([r1, r1], replace=True) self.assertEqual(len(caps), 1) # diff caps = CapabilityList() r2 = ChangeList(uri='http://example.org/r2') caps.add([r1, r2]) self.assertEqual(len(caps), 2)
def publish_metadata(self, new_zips, exluded_zip=None): """ (Re)publish metadata with addition of new_zips. An excluded zip will be removed from previously published metadata. :param new_zips: a resourcelist with newly created zip resources :param exluded_zip: local path to zip file that will be removed from previously published metadata. """ rs_dump_url = self.publish_url + RS_RESOURCE_DUMP_XML rs_dump_path = os.path.join(self.publish_dir, RS_RESOURCE_DUMP_XML) capa_list_url = self.publish_url + RS_CAPABILITY_LIST_XML capa_list_path = os.path.join(self.publish_dir, RS_CAPABILITY_LIST_XML) rs_dump = ResourceDump() # Load existing resource-dump, if any. Else set start time. if os.path.isfile(rs_dump_path): with open(rs_dump_path, "r") as rs_dump_file: sm = Sitemap() sm.parse_xml(rs_dump_file, resources=rs_dump) else: rs_dump.md_at = w3cdt.datetime_to_str(no_fractions=True) rs_dump.link_set(rel="up", href=capa_list_url) # Remove excluded zip, if any if exluded_zip: loc = self.publish_url + os.path.basename(exluded_zip) if loc in rs_dump.resources: del rs_dump.resources[loc] else: raise RuntimeError("Could not find %s in %s" % (loc, rs_dump_path)) # Add new zips for resource in new_zips: rs_dump.add(resource) # Write resource-dump.xml rs_dump.md_completed = w3cdt.datetime_to_str(no_fractions=True) with open(rs_dump_path, "w") as rs_dump_file: rs_dump_file.write(rs_dump.as_xml()) # There are several ways to decode base64, among them # iri = base64.b64decode(os.path.basename(self.publish_dir)).rstrip('\n') # iri = base64.b64decode(os.path.basename(self.publish_dir), '-_').rstrip('\n') iri = base64.urlsafe_b64decode(os.path.basename(self.publish_dir)).rstrip("\n") print "New %s for graph %s" % (RS_RESOURCE_DUMP_XML, iri) print "See %s" % rs_dump_url # Write capability-list.xml if not os.path.isfile(capa_list_path): capa_list = CapabilityList() capa_list.link_set(rel="up", href=self.src_desc_url) capa_list.add_capability(rs_dump, rs_dump_url) with open(capa_list_path, "w") as capa_list_file: capa_list_file.write(capa_list.as_xml()) print "New %s. See %s" % (RS_CAPABILITY_LIST_XML, capa_list_url)
def test03_capability_list_links(self): xml = run_resync(['--write-capabilitylist=resourcelist=rl,changedump=cd', '--describedby-link=a', '--sourcedescription-link=b', '--capabilitylist-link=c']) # will be ignored capl = CapabilityList() capl.parse(fh=io.BytesIO(xml)) self.assertEqual(len(capl), 2) self.assertNotEqual(capl.link('describedby'), None) self.assertEqual(capl.link('describedby')['href'], 'a') self.assertNotEqual(capl.link('up'), None) self.assertEqual(capl.link('up')['href'], 'b')
def test02_multiple(self): caps = CapabilityList() rl = ResourceList() caps.add_capability( rl, "rl.xml" ) cl = ChangeList() caps.add_capability( cl, "cl.xml" ) self.assertEqual( len(caps), 2 ) xml = caps.as_xml() self.assertTrue( re.search( r'<loc>rl.xml</loc><rs:md capability="resourcelist" />', xml ) ) self.assertTrue( re.search( r'<loc>cl.xml</loc><rs:md capability="changelist" />', xml) )
def test06_explore_show_summary(self): e = Explorer() # file that exists with matching with capture_stdout() as capturer: e.explore_show_summary(list=CapabilityList()) self.assertTrue(re.search(r'Parsed \(unknown capability\) document with 0 entries:', capturer.result)) # dummy capabilities object and display cl = CapabilityList() cl.add(Resource('uri:resourcelist')) cl.add(Resource('uri:changelist')) with capture_stdout() as capturer: e.explore_show_summary(cl, False, []) self.assertTrue(re.search( r'Parsed \(unknown capability\) document with 2 entries:', capturer.result)) self.assertTrue(re.search(r'\[1\] uri:changelist', capturer.result)) self.assertTrue(re.search(r'\[2\] uri:resourcelist', capturer.result))
# store newest modified time of newest resource new_lasttime = time_sorted_resources[-1]["time"].strftime("%Y-%m-%dT%H:%M:%SZ") # write this time to the timefile timefile_out = open(args.time_file, "w") timefile_out.write(new_lasttime) timefile_out.close() # Downloads all resource lists def get_resource_lists(resources): for key, resource_list_resource in resources.iteritems(): resource_list_response = requests.get(resource_list_resource.uri) resource_list = ResourceList() resource_list.parse(str=resource_list_response.text) get_resources(resource_list.resources) # Download URI of the source description XML from # --> should actually be either via robots.txt or/in .well-known source_desc_response = requests.get(args.source_description_uri) source_desc = SourceDescription() source_desc.parse(str=source_desc_response.text) [capabilitylist_resource] = source_desc.resources # Download capablity list obtained from source description capabilitylist_response = requests.get(capabilitylist_resource.uri) capabilitylist = CapabilityList() capabilitylist.parse(str=capabilitylist_response.text) # Download resource lists obtained from capability list get_resource_lists(capabilitylist.resources)
new_lasttime = time_sorted_resources[-1]["time"].strftime( "%Y-%m-%dT%H:%M:%SZ") # write this time to the timefile timefile_out = open(args.time_file, "w") timefile_out.write(new_lasttime) timefile_out.close() # Downloads all resource lists def get_resource_lists(resources): for key, resource_list_resource in resources.iteritems(): resource_list_response = requests.get(resource_list_resource.uri) resource_list = ResourceList() resource_list.parse(str=resource_list_response.text) get_resources(resource_list.resources) # Download URI of the source description XML from # --> should actually be either via robots.txt or/in .well-known source_desc_response = requests.get(args.source_description_uri) source_desc = SourceDescription() source_desc.parse(str=source_desc_response.text) [capabilitylist_resource] = source_desc.resources # Download capablity list obtained from source description capabilitylist_response = requests.get(capabilitylist_resource.uri) capabilitylist = CapabilityList() capabilitylist.parse(str=capabilitylist_response.text) # Download resource lists obtained from capability list get_resource_lists(capabilitylist.resources)
raw_ts[8:10] + ":" + raw_ts[10:12] + ":" + raw_ts[12:14] + "Z" ) timestamps.append(ts) rl.add(Resource(args.resource_url + filename, lastmod=ts)) # Print to file at args.resource_dir + "/resource-list.xml" resource_list_file = open(args.resource_dir + "/resource-list.xml", "w") resource_list_file.write(rl.as_xml()) resource_list_file.close() print "Wrote resource list to: " + args.resource_dir + "/resource-list.xml" timestamps.sort() caps = CapabilityList() caps.add_capability(rl, args.resource_url + "resource-list.xml") if len(timestamps) > 0: caps.md['from'] = timestamps[0] # Print to file at args.resource_dir + "/capability-list.xml" capability_list_file = open(args.resource_dir + "/capability-list.xml", "w") capability_list_file.write(caps.as_xml()) capability_list_file.close() print "Wrote capability list to: " + args.resource_dir + "/capability-list.xml" rsd = SourceDescription() rsd.md_at = None rsd.add_capability_list(args.resource_url + "capability-list.xml")
continue _, raw_ts = filename.split("-") ts = (raw_ts[:4] + "-" + raw_ts[4:6] + "-" + raw_ts[6:8] + "T" + raw_ts[8:10] + ":" + raw_ts[10:12] + ":" + raw_ts[12:14] + "Z") timestamps.append(ts) rl.add(Resource(args.resource_url + filename, lastmod=ts)) # Print to file at args.resource_dir + "/resource-list.xml" resource_list_file = open(args.resource_dir + "/resource-list.xml", "w") resource_list_file.write(rl.as_xml()) resource_list_file.close() print "Wrote resource list to: " + args.resource_dir + "/resource-list.xml" timestamps.sort() caps = CapabilityList() caps.add_capability(rl, args.resource_url + "resource-list.xml") if len(timestamps) > 0: caps.md['from'] = timestamps[0] # Print to file at args.resource_dir + "/capability-list.xml" capability_list_file = open(args.resource_dir + "/capability-list.xml", "w") capability_list_file.write(caps.as_xml()) capability_list_file.close() print "Wrote capability list to: " + args.resource_dir + "/capability-list.xml" rsd = SourceDescription() rsd.md_at = None rsd.add_capability_list(args.resource_url + "capability-list.xml")
def test_ex_06(self): """resourcesync_ex_6 is a simple capability list with three capabilities""" capl=CapabilityList() capl.parse('tests/testdata/examples_from_spec/resourcesync_ex_6.xml') self.assertEqual( len(capl.resources), 3, '3 capabilities') # What capabilities are present? self.assertTrue( capl.has_capability('resourcelist') ) self.assertEqual( capl.capability_info('resourcelist').uri, 'http://example.com/dataset1/resourcelist.xml') self.assertTrue( capl.has_capability('resourcedump') ) self.assertEqual( capl.capability_info('resourcedump').uri, 'http://example.com/dataset1/resourcedump.xml') self.assertTrue( capl.has_capability('changelist') ) self.assertEqual( capl.capability_info('changelist').uri, 'http://example.com/dataset1/changelist.xml') # Check some that aren't self.assertFalse( capl.has_capability() ) self.assertFalse( capl.has_capability('bogus') ) self.assertFalse( capl.has_capability('capabilitylist') )