def get(self): """Implement GET for Source Description.""" source_description = SourceDescription() source_description.describedby = self.source.describedby_uri source_description.add_capability_list(self.source.capability_list_uri) self.set_header("Content-Type", "application/xml") self.write(source_description.as_xml())
def test01_empty(self): rsd = SourceDescription() rsd.describedby = "http://example.org/about" self.assertEqual(len(rsd), 0) rsd.md_at = None self.assertEqual( rsd.as_xml(), '<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/"><rs:ln href="http://example.org/about" rel="describedby" /><rs:md capability="description" /></urlset>' )
def text_ex_07(self): """resourcesync_ex_7 is a source description that list a single Capability List""" sd=SourceDescription() sd.read(uri='tests/testdata/examples_from_spec/resourcesync_ex_7.xml') self.assertEqual( len(sd.resources), 1, '1 capability list' ) cl=sd.resources[0] self.assertEqual( cl.uri, 'http://example.com/dataset1/capabilitylist.xml' ) self.assertEqual( cl.capability, 'resourcelist' ) self.assertEqual( cl.describedby, 'http://example.com/info_about_set1_of_resources.xml' )
def synchronize(self): """ Publish the resources found in source_dir in accordance with the Resourcesync Framework in sink_dir. """ if not os.path.isdir(self.source_dir): os.makedirs(self.source_dir) print "Created %s" % self.source_dir if not os.path.isdir(self.sink_dir): os.makedirs(self.sink_dir) print "Created %s" % self.sink_dir self.handshake = self.verify_handshake() if self.handshake is None: return #################### # print "Synchronizing state as of %s" % self.handshake ### initial resource description wellknown = os.path.join(self.sink_dir, RS_WELL_KNOWN) if not os.path.isdir(wellknown): os.makedirs(wellknown) src_desc = SourceDescription() new_src_desc = True # Load existing resource-description, if any. if os.path.isfile(self.src_desc_path): new_src_desc = False with open(self.src_desc_path, "r") as src_desc_file: sm = Sitemap() sm.parse_xml(src_desc_file, resources=src_desc) count_lists = len(src_desc.resources) ### resources in subdirectories or main directory ### the existance of FILE_INDEX indicates whether resources reside directly in source_dir or in subdirectories. index_file = os.path.join(self.source_dir, FILE_INDEX) if os.path.isfile(index_file): for dirname in os.walk(self.source_dir).next()[1]: source = os.path.join(self.source_dir, dirname) sink = os.path.join(self.sink_dir, dirname) publish_url = self.publish_url + dirname + "/" self.__execute_sync__(source, sink, publish_url, src_desc) else: self.__execute_sync__(self.source_dir, self.sink_dir, self.publish_url, src_desc) if new_src_desc or count_lists != len(src_desc.resources): ### publish resource description with open(self.src_desc_path, "w") as src_desc_file: src_desc_file.write(src_desc.as_xml()) print "New resource description. See %s" % self.src_desc_url self.report()
def test_ex_12(self): """resourcesync_ex_12 is a Source Description that talks about 3 sets of resources""" sd=SourceDescription() sd.read(uri='tests/testdata/examples_from_spec/resourcesync_ex_12.xml') self.assertEqual( len(sd), 3 ) self.assertEqual( sd.uris(), ['http://example.com/capabilitylist1.xml', 'http://example.com/capabilitylist2.xml', 'http://example.com/capabilitylist3.xml'] ) cl1=sd['http://example.com/capabilitylist1.xml'] self.assertEqual( cl1.capability, 'capabilitylist' ) self.assertEqual( cl1.describedby, 'http://example.com/info_about_set1_of_resources.xml')
def test04_parse(self): xml='<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/"><rs:ln href="http://example.org/about" rel="describedby" /><rs:md capability="description" /><url><loc>http://example.org/ds1/cl.xml</loc><rs:md capability="capabilitylist" /></url><url><loc>http://example.org/ds2/cl.xml</loc><rs:md capability="capabilitylist" /></url><url><loc>http://example.org/ds3/cl.xml</loc><rs:md capability="capabilitylist" /></url></urlset>' sd=SourceDescription() sd.parse(str=xml) self.assertEqual( sd.link_href('describedby'), 'http://example.org/about', 'describedby link' ) self.assertEqual( sd.capability, 'description' ) self.assertEqual( len(sd.resources), 3, 'got 3 capacility lists' ) [r1,r2,r3]=sd.resources self.assertEqual( r1.uri, 'http://example.org/ds1/cl.xml' ) self.assertEqual( r1.capability, 'capabilitylist' )
def test_build_ex_12(self): """Source Description document with describedby links""" sd = SourceDescription() sd.describedby = 'http://example.com/info_about_source.xml' cl1 = CapabilityList( uri='http://example.com/capabilitylist1.xml' ) cl1.describedby = 'http://example.com/info_about_set1_of_resources.xml' sd.add_capability_list( cl1 ) cl2 = CapabilityList( uri='http://example.com/capabilitylist2.xml' ) cl2.describedby = 'http://example.com/info_about_set2_of_resources.xml' sd.add_capability_list( cl2 ) cl3 = CapabilityList( uri='http://example.com/capabilitylist3.xml' ) cl3.describedby = 'http://example.com/info_about_set3_of_resources.xml' sd.add_capability_list( cl3 ) ex_xml = self._open_ex('resourcesync_ex_12').read() self._assert_xml_equal( sd.as_xml(), ex_xml )
def test03_a_bunch(self): rsd = SourceDescription() rsd.describedby = "http://example.org/about" self.assertEqual( len(rsd), 0 ) rsd.add_capability_list("http://example.org/ds1/cl.xml") rsd.add_capability_list("http://example.org/ds2/cl.xml") rsd.add_capability_list("http://example.org/ds3/cl.xml") self.assertEqual( len(rsd), 3 ) self.assertEqual( rsd.as_xml(), '<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/"><rs:ln href="http://example.org/about" rel="describedby" /><rs:md capability="description" /><url><loc>http://example.org/ds1/cl.xml</loc><rs:md capability="capabilitylist" /></url><url><loc>http://example.org/ds2/cl.xml</loc><rs:md capability="capabilitylist" /></url><url><loc>http://example.org/ds3/cl.xml</loc><rs:md capability="capabilitylist" /></url></urlset>' )
def write_source_description(self, capability_lists=None, outfile=None, links=None): """Write a ResourceSync Description document to outfile or STDOUT""" rsd = SourceDescription(ln=links) rsd.pretty_xml = self.pretty_xml if (capability_lists is not None): for uri in capability_lists: rsd.add_capability_list(uri) if (outfile is None): print rsd.as_xml() else: rsd.write(basename=outfile)
def write_source_description(self,capability_lists=None,outfile=None,links=None): """Write a ResourceSync Description document to outfile or STDOUT""" rsd = SourceDescription(ln=links) rsd.pretty_xml = self.pretty_xml if (capability_lists is not None): for uri in capability_lists: rsd.add_capability_list(uri) if (outfile is None): print rsd.as_xml() else: rsd.write(basename=outfile)
def test_build_ex_07(self): """A Source Description document """ sd = SourceDescription() sd.describedby = 'http://example.com/info-about-source.xml' r = Resource( uri='http://example.com/dataset1/capabilitylist.xml', capability='capabilitylist' ) r.link_set( rel='describedby', href='http://example.com/info_about_set1_of_resources.xml' ) sd.add( r ) ex_xml = self._open_ex('resourcesync_ex_7').read() self._assert_xml_equal( sd.as_xml(), ex_xml )
# store newest modified time of newest resource new_lasttime = time_sorted_resources[-1]["time"].strftime("%Y-%m-%dT%H:%M:%SZ") # write this time to the timefile timefile_out = open(args.time_file, "w") timefile_out.write(new_lasttime) timefile_out.close() # Downloads all resource lists def get_resource_lists(resources): for key, resource_list_resource in resources.iteritems(): resource_list_response = requests.get(resource_list_resource.uri) resource_list = ResourceList() resource_list.parse(str=resource_list_response.text) get_resources(resource_list.resources) # Download URI of the source description XML from # --> should actually be either via robots.txt or/in .well-known source_desc_response = requests.get(args.source_description_uri) source_desc = SourceDescription() source_desc.parse(str=source_desc_response.text) [capabilitylist_resource] = source_desc.resources # Download capablity list obtained from source description capabilitylist_response = requests.get(capabilitylist_resource.uri) capabilitylist = CapabilityList() capabilitylist.parse(str=capabilitylist_response.text) # Download resource lists obtained from capability list get_resource_lists(capabilitylist.resources)
def test01_empty(self): rsd = SourceDescription() rsd.describedby = "http://example.org/about" self.assertEqual( len(rsd), 0 ) rsd.md_at = None self.assertEqual( rsd.as_xml(), '<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/"><rs:ln href="http://example.org/about" rel="describedby" /><rs:md capability="description" /></urlset>' )
new_lasttime = time_sorted_resources[-1]["time"].strftime( "%Y-%m-%dT%H:%M:%SZ") # write this time to the timefile timefile_out = open(args.time_file, "w") timefile_out.write(new_lasttime) timefile_out.close() # Downloads all resource lists def get_resource_lists(resources): for key, resource_list_resource in resources.iteritems(): resource_list_response = requests.get(resource_list_resource.uri) resource_list = ResourceList() resource_list.parse(str=resource_list_response.text) get_resources(resource_list.resources) # Download URI of the source description XML from # --> should actually be either via robots.txt or/in .well-known source_desc_response = requests.get(args.source_description_uri) source_desc = SourceDescription() source_desc.parse(str=source_desc_response.text) [capabilitylist_resource] = source_desc.resources # Download capablity list obtained from source description capabilitylist_response = requests.get(capabilitylist_resource.uri) capabilitylist = CapabilityList() capabilitylist.parse(str=capabilitylist_response.text) # Download resource lists obtained from capability list get_resource_lists(capabilitylist.resources)
# Print to file at args.resource_dir + "/resource-list.xml" resource_list_file = open(args.resource_dir + "/resource-list.xml", "w") resource_list_file.write(rl.as_xml()) resource_list_file.close() print "Wrote resource list to: " + args.resource_dir + "/resource-list.xml" timestamps.sort() caps = CapabilityList() caps.add_capability(rl, args.resource_url + "resource-list.xml") if len(timestamps) > 0: caps.md['from'] = timestamps[0] # Print to file at args.resource_dir + "/capability-list.xml" capability_list_file = open(args.resource_dir + "/capability-list.xml", "w") capability_list_file.write(caps.as_xml()) capability_list_file.close() print "Wrote capability list to: " + args.resource_dir + "/capability-list.xml" rsd = SourceDescription() rsd.md_at = None rsd.add_capability_list(args.resource_url + "capability-list.xml") # Print to file at args.resource_dir + "/resourcesync" source_description_file = open(args.resource_dir + "/resourcesync", "w") source_description_file.write(rsd.as_xml()) source_description_file.close() print "Wrote source description to: " + args.resource_dir + "/resourcesync"