Ejemplo n.º 1
0
 def test01_resourcelist(self):
     rl = ResourceList()
     caps = CapabilityList()
     caps.add_capability( rl, "http://example.org/resourcelist.xml" )
     caps.md['modified'] = "2013-02-07T22:39:00"
     self.assertEqual( len(caps), 1 )
     self.assertEqual( caps.as_xml(), '<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/"><rs:md capability="capabilitylist" modified="2013-02-07T22:39:00" /><url><loc>http://example.org/resourcelist.xml</loc><rs:md capability="resourcelist" /></url></urlset>' )
Ejemplo n.º 2
0
 def test03_parse(self):
     xml='<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/"><rs:md capability="capabilitylist" from="2013-02-07T22:39:00" /><url><loc>http://example.org/resourcelist.xml</loc><rs:md capability="resourcelist" /></url></urlset>'
     cl=CapabilityList()
     cl.parse(str=xml)
     self.assertEqual( cl.capability, 'capabilitylist')
     self.assertEqual( len(cl.resources), 1, 'got 1 resource')
     [r] = cl.resources
     self.assertEqual( r.uri, 'http://example.org/resourcelist.xml', 'resourcelist uri')
     self.assertEqual( r.capability, 'resourcelist')
Ejemplo n.º 3
0
 def test04_parse(self):
     xml='<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/"><rs:md capability="capabilitylist" from="2013-02-07T22:39:00" /><url><loc>http://example.org/resourcelist.xml</loc><rs:md capability="resourcelist" /></url></urlset>'
     cl=CapabilityList()
     cl.parse(str_data=xml)
     self.assertEqual( cl.capability, 'capabilitylist')
     self.assertEqual( len(cl.resources), 1, 'got 1 resource')
     [r] = cl.resources
     self.assertEqual( r.uri, 'http://example.org/resourcelist.xml', 'resourcelist uri')
     self.assertEqual( r.capability, 'resourcelist')
Ejemplo n.º 4
0
 def test02_explore_show_summary(self):
     # Create dummy capabilities object and display
     cl = CapabilityList()
     cl.add( Resource('uri:resourcelist') )
     cl.add( Resource('uri:changelist') )
     e = Explorer()
     with capture_stdout() as capturer:
         e.explore_show_summary(cl,False,[])
     self.assertTrue( re.search(r'Parsed \(unknown capability\) document with 2 entries:',capturer.result) )
     self.assertTrue( re.search(r'\[1\] uri:changelist',capturer.result) )
     self.assertTrue( re.search(r'\[2\] uri:resourcelist',capturer.result) )
Ejemplo n.º 5
0
 def test_build_ex_06(self):
     """Simple Capability List document """
     cl = CapabilityList()
     cl.describedby = 'http://example.com/info_about_set1_of_resources.xml'
     cl.up = 'http://example.com/resourcesync_description.xml'
     cl.add_capability(
         uri='http://example.com/dataset1/resourcelist.xml', name='resourcelist')
     cl.add_capability(
         uri='http://example.com/dataset1/resourcedump.xml', name='resourcedump')
     cl.add_capability(
         uri='http://example.com/dataset1/changelist.xml', name='changelist')
     ex_xml = self._open_ex('resourcesync_ex_6').read()
     self._assert_xml_equal(cl.as_xml(), ex_xml)
Ejemplo n.º 6
0
 def test_build_ex_06(self):
     """Simple Capability List document """
     cl = CapabilityList()
     cl.describedby = 'http://example.com/info_about_set1_of_resources.xml'
     cl.up = 'http://example.com/resourcesync_description.xml'
     cl.add_capability( uri='http://example.com/dataset1/resourcelist.xml', name='resourcelist' )
     cl.add_capability( uri='http://example.com/dataset1/resourcedump.xml', name='resourcedump' )
     cl.add_capability( uri='http://example.com/dataset1/changelist.xml', name='changelist' )
     ex_xml = self._open_ex('resourcesync_ex_6').read()
     self._assert_xml_equal( cl.as_xml(), ex_xml )
Ejemplo n.º 7
0
 def test_build_ex_13(self):
     """Capability List document with 4 entries"""
     cl = CapabilityList()
     cl.describedby = 'http://example.com/info_about_set1_of_resources.xml'
     cl.up = 'http://example.com/resourcesync_description.xml'
     cl.add_capability( capability=ResourceList( uri='http://example.com/dataset1/resourcelist.xml' ) )
     cl.add_capability( capability=ResourceDump( uri='http://example.com/dataset1/resourcedump.xml' ) )
     cl.add_capability( capability=ChangeList( uri='http://example.com/dataset1/changelist.xml' ) )
     cl.add_capability( capability=ChangeDump( uri='http://example.com/dataset1/changedump.xml' ) )
     ex_xml = self._open_ex('resourcesync_ex_13').read()
     self._assert_xml_equal( cl.as_xml(), ex_xml )
Ejemplo n.º 8
0
 def get(self):
     """Implement GET for Capability List."""
     capability_list = CapabilityList()
     capability_list.describedby = self.source.describedby_uri
     capability_list.up = self.source.source_description_uri
     capability_list.add_capability(
         uri=self.source.resource_list_builder.uri, name='resourcelist')
     if self.source.has_changememory:
         capability_list.add_capability(
             uri=self.source.changememory.base_uri, name='changelist')
     self.set_header("Content-Type", "application/xml")
     self.write(capability_list.as_xml())
Ejemplo n.º 9
0
    def publish_metadata(self, new_zips, exluded_zip=None):
        """
        (Re)publish metadata with addition of new_zips. An excluded zip will be removed from previously published
        metadata.
        :param new_zips: a resourcelist with newly created zip resources
        :param exluded_zip: local path to zip file that will be removed from previously published metadata.
        """
        rs_dump_url = self.publish_url + RS_RESOURCE_DUMP_XML
        rs_dump_path = os.path.join(self.publish_dir, RS_RESOURCE_DUMP_XML)
        capa_list_url = self.publish_url + RS_CAPABILITY_LIST_XML
        capa_list_path = os.path.join(self.publish_dir, RS_CAPABILITY_LIST_XML)

        rs_dump = ResourceDump()

        # Load existing resource-dump, if any. Else set start time.
        if os.path.isfile(rs_dump_path):
            with open(rs_dump_path, "r") as rs_dump_file:
                sm = Sitemap()
                sm.parse_xml(rs_dump_file, resources=rs_dump)

        else:
            rs_dump.md_at = w3cdt.datetime_to_str(no_fractions=True)
            rs_dump.link_set(rel="up", href=capa_list_url)

        # Remove excluded zip, if any
        if exluded_zip:
            loc = self.publish_url + os.path.basename(exluded_zip)
            if loc in rs_dump.resources:
                del rs_dump.resources[loc]
            else:
                raise RuntimeError("Could not find %s in %s" %
                                   (loc, rs_dump_path))

        # Add new zips
        for resource in new_zips:
            rs_dump.add(resource)

        # Write resource-dump.xml
        rs_dump.md_completed = w3cdt.datetime_to_str(no_fractions=True)
        with open(rs_dump_path, "w") as rs_dump_file:
            rs_dump_file.write(rs_dump.as_xml())

        # There are several ways to decode base64, among them
        # iri = base64.b64decode(os.path.basename(self.publish_dir)).rstrip('\n')
        # iri = base64.b64decode(os.path.basename(self.publish_dir), '-_').rstrip('\n')
        iri = base64.urlsafe_b64decode(os.path.basename(
            self.publish_dir)).rstrip('\n')

        print "New %s for graph %s" % (RS_RESOURCE_DUMP_XML, iri)
        print "See %s" % rs_dump_url

        # Write capability-list.xml
        if not os.path.isfile(capa_list_path):
            capa_list = CapabilityList()
            capa_list.link_set(rel="up", href=self.src_desc_url)
            capa_list.add_capability(rs_dump, rs_dump_url)
            with open(capa_list_path, "w") as capa_list_file:
                capa_list_file.write(capa_list.as_xml())

            print "New %s. See %s" % (RS_CAPABILITY_LIST_XML, capa_list_url)
Ejemplo n.º 10
0
 def write_capability_list(self,
                           capabilities=None,
                           outfile=None,
                           links=None):
     """Write a Capability List to outfile or STDOUT"""
     capl = CapabilityList(ln=links)
     capl.pretty_xml = self.pretty_xml
     if (capabilities is not None):
         for name in capabilities.keys():
             capl.add_capability(name=name, uri=capabilities[name])
     if (outfile is None):
         print capl.as_xml()
     else:
         capl.write(basename=outfile)
Ejemplo n.º 11
0
 def write_capability_list(self,capabilities=None,outfile=None,links=None):
     """Write a Capability List to outfile or STDOUT"""
     capl = CapabilityList(ln=links)
     capl.pretty_xml = self.pretty_xml
     if (capabilities is not None):
         for name in capabilities.keys():
             capl.add_capability(name=name, uri=capabilities[name])
     if (outfile is None):
         print capl.as_xml()
     else:
         capl.write(basename=outfile)
Ejemplo n.º 12
0
 def test_build_ex_12(self):
     """Source Description document with describedby links"""
     sd = SourceDescription()
     sd.describedby = 'http://example.com/info_about_source.xml'
     cl1 = CapabilityList( uri='http://example.com/capabilitylist1.xml' )
     cl1.describedby = 'http://example.com/info_about_set1_of_resources.xml'
     sd.add_capability_list( cl1 )
     cl2 = CapabilityList( uri='http://example.com/capabilitylist2.xml' )
     cl2.describedby = 'http://example.com/info_about_set2_of_resources.xml'
     sd.add_capability_list( cl2 )
     cl3 = CapabilityList( uri='http://example.com/capabilitylist3.xml' )
     cl3.describedby = 'http://example.com/info_about_set3_of_resources.xml'
     sd.add_capability_list( cl3 )
     ex_xml = self._open_ex('resourcesync_ex_12').read()
     self._assert_xml_equal( sd.as_xml(), ex_xml )
Ejemplo n.º 13
0
 def test03_capability_list_links(self):
     xml = run_resync([
         '--capabilitylist=resourcelist=rl,changedump=cd',
         '--describedby-link=a', '--sourcedescription-link=b',
         '--capabilitylist-link=c'
     ])  #will be ignored
     capl = CapabilityList()
     capl.parse(fh=StringIO.StringIO(xml))
     self.assertEqual(len(capl), 2)
     self.assertNotEqual(capl.link('describedby'), None)
     self.assertEqual(capl.link('describedby')['href'], 'a')
     self.assertNotEqual(capl.link('up'), None)
     self.assertEqual(capl.link('up')['href'], 'b')
Ejemplo n.º 14
0
 def get(self):
     capability_list = CapabilityList()
     capability_list.describedby = self.source.describedby_uri
     capability_list.add_capability(uri=self.source.resource_list_builder.uri,
                                    name='resourcelist')
     if self.source.has_changememory:
         capability_list.add_capability(uri=self.source.changememory.base_uri,
                                        name='changelist')
     self.set_header("Content-Type", "application/xml")
     self.write(capability_list.as_xml())
Ejemplo n.º 15
0
 def test03_multiple(self):
     caps = CapabilityList()
     rl = ResourceList()
     caps.add_capability( rl, "rl.xml" )
     cl = ChangeList()
     caps.add_capability( cl, "cl.xml" )
     self.assertEqual( len(caps), 2 )
     xml = caps.as_xml()
     self.assertTrue( re.search( r'<loc>rl.xml</loc><rs:md capability="resourcelist" />', xml ) )
     self.assertTrue( re.search( r'<loc>cl.xml</loc><rs:md capability="changelist" />', xml) )
Ejemplo n.º 16
0
 def test02_resourcelist(self):
     rl = ResourceList()
     caps = CapabilityList()
     caps.add_capability( rl, "http://example.org/resourcelist.xml" )
     caps.md['from'] = "2013-02-07T22:39:00"
     self.assertEqual( len(caps), 1 )
     self.assertEqual( caps.as_xml(), '<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/"><rs:md capability="capabilitylist" from="2013-02-07T22:39:00" /><url><loc>http://example.org/resourcelist.xml</loc><rs:md capability="resourcelist" /></url></urlset>' )
Ejemplo n.º 17
0
 def test01_add(self):
     # one
     caps = CapabilityList()
     r1 = Resource(uri='http://example.org/r1')
     caps.add(r1)
     self.assertEqual(len(caps), 1)
     # dupe
     self.assertRaises(ResourceSetDupeError, caps.add, r1)
     self.assertEqual(len(caps), 1)
     # dupe with replace
     caps = CapabilityList()
     caps.add([r1, r1], replace=True)
     self.assertEqual(len(caps), 1)
     # diff
     caps = CapabilityList()
     r2 = ChangeList(uri='http://example.org/r2')
     caps.add([r1, r2])
     self.assertEqual(len(caps), 2)
Ejemplo n.º 18
0
    def publish_metadata(self, new_zips, exluded_zip=None):
        """
        (Re)publish metadata with addition of new_zips. An excluded zip will be removed from previously published
        metadata.
        :param new_zips: a resourcelist with newly created zip resources
        :param exluded_zip: local path to zip file that will be removed from previously published metadata.
        """
        rs_dump_url = self.publish_url + RS_RESOURCE_DUMP_XML
        rs_dump_path = os.path.join(self.publish_dir, RS_RESOURCE_DUMP_XML)
        capa_list_url = self.publish_url + RS_CAPABILITY_LIST_XML
        capa_list_path = os.path.join(self.publish_dir, RS_CAPABILITY_LIST_XML)

        rs_dump = ResourceDump()

        # Load existing resource-dump, if any. Else set start time.
        if os.path.isfile(rs_dump_path):
            with open(rs_dump_path, "r") as rs_dump_file:
                sm = Sitemap()
                sm.parse_xml(rs_dump_file, resources=rs_dump)

        else:
            rs_dump.md_at = w3cdt.datetime_to_str(no_fractions=True)
            rs_dump.link_set(rel="up", href=capa_list_url)

        # Remove excluded zip, if any
        if exluded_zip:
            loc = self.publish_url + os.path.basename(exluded_zip)
            if loc in rs_dump.resources:
                del rs_dump.resources[loc]
            else:
                raise RuntimeError("Could not find %s in %s" % (loc, rs_dump_path))

        # Add new zips
        for resource in new_zips:
            rs_dump.add(resource)

        # Write resource-dump.xml
        rs_dump.md_completed = w3cdt.datetime_to_str(no_fractions=True)
        with open(rs_dump_path, "w") as rs_dump_file:
            rs_dump_file.write(rs_dump.as_xml())

        # There are several ways to decode base64, among them
        # iri = base64.b64decode(os.path.basename(self.publish_dir)).rstrip('\n')
        # iri = base64.b64decode(os.path.basename(self.publish_dir), '-_').rstrip('\n')
        iri = base64.urlsafe_b64decode(os.path.basename(self.publish_dir)).rstrip("\n")

        print "New %s for graph %s" % (RS_RESOURCE_DUMP_XML, iri)
        print "See %s" % rs_dump_url

        # Write capability-list.xml
        if not os.path.isfile(capa_list_path):
            capa_list = CapabilityList()
            capa_list.link_set(rel="up", href=self.src_desc_url)
            capa_list.add_capability(rs_dump, rs_dump_url)
            with open(capa_list_path, "w") as capa_list_file:
                capa_list_file.write(capa_list.as_xml())

            print "New %s. See %s" % (RS_CAPABILITY_LIST_XML, capa_list_url)
Ejemplo n.º 19
0
 def test03_capability_list_links(self):
     xml = run_resync(['--write-capabilitylist=resourcelist=rl,changedump=cd',
                       '--describedby-link=a',
                       '--sourcedescription-link=b',
                       '--capabilitylist-link=c'])  # will be ignored
     capl = CapabilityList()
     capl.parse(fh=io.BytesIO(xml))
     self.assertEqual(len(capl), 2)
     self.assertNotEqual(capl.link('describedby'), None)
     self.assertEqual(capl.link('describedby')['href'], 'a')
     self.assertNotEqual(capl.link('up'), None)
     self.assertEqual(capl.link('up')['href'], 'b')
Ejemplo n.º 20
0
 def test02_multiple(self):
     caps = CapabilityList()
     rl = ResourceList()
     caps.add_capability( rl, "rl.xml" )
     cl = ChangeList()
     caps.add_capability( cl, "cl.xml" )
     self.assertEqual( len(caps), 2 )
     xml = caps.as_xml()
     self.assertTrue( re.search( r'<loc>rl.xml</loc><rs:md capability="resourcelist" />', xml ) )
     self.assertTrue( re.search( r'<loc>cl.xml</loc><rs:md capability="changelist" />', xml) )
Ejemplo n.º 21
0
 def test06_explore_show_summary(self):
     e = Explorer()
     # file that exists with matching
     with capture_stdout() as capturer:
         e.explore_show_summary(list=CapabilityList())
     self.assertTrue(re.search(r'Parsed \(unknown capability\) document with 0 entries:',
                               capturer.result))
     # dummy capabilities object and display
     cl = CapabilityList()
     cl.add(Resource('uri:resourcelist'))
     cl.add(Resource('uri:changelist'))
     with capture_stdout() as capturer:
         e.explore_show_summary(cl, False, [])
     self.assertTrue(re.search(
         r'Parsed \(unknown capability\) document with 2 entries:', capturer.result))
     self.assertTrue(re.search(r'\[1\] uri:changelist', capturer.result))
     self.assertTrue(re.search(r'\[2\] uri:resourcelist', capturer.result))
Ejemplo n.º 22
0
	# store newest modified time of newest resource
	new_lasttime = time_sorted_resources[-1]["time"].strftime("%Y-%m-%dT%H:%M:%SZ")
	# write this time to the timefile
	timefile_out = open(args.time_file, "w")
	timefile_out.write(new_lasttime)
	timefile_out.close()

# Downloads all resource lists
def get_resource_lists(resources):
	for key, resource_list_resource in resources.iteritems():
		resource_list_response = requests.get(resource_list_resource.uri)
		resource_list = ResourceList()
		resource_list.parse(str=resource_list_response.text)
		get_resources(resource_list.resources)



# Download URI of the source description XML from 
# --> should actually be either via robots.txt or/in .well-known
source_desc_response = requests.get(args.source_description_uri)
source_desc = SourceDescription()
source_desc.parse(str=source_desc_response.text)
[capabilitylist_resource] = source_desc.resources

# Download capablity list obtained from source description
capabilitylist_response =  requests.get(capabilitylist_resource.uri)
capabilitylist = CapabilityList()
capabilitylist.parse(str=capabilitylist_response.text)

# Download resource lists obtained from capability list
get_resource_lists(capabilitylist.resources)
Ejemplo n.º 23
0
    new_lasttime = time_sorted_resources[-1]["time"].strftime(
        "%Y-%m-%dT%H:%M:%SZ")
    # write this time to the timefile
    timefile_out = open(args.time_file, "w")
    timefile_out.write(new_lasttime)
    timefile_out.close()


# Downloads all resource lists
def get_resource_lists(resources):
    for key, resource_list_resource in resources.iteritems():
        resource_list_response = requests.get(resource_list_resource.uri)
        resource_list = ResourceList()
        resource_list.parse(str=resource_list_response.text)
        get_resources(resource_list.resources)


# Download URI of the source description XML from
# --> should actually be either via robots.txt or/in .well-known
source_desc_response = requests.get(args.source_description_uri)
source_desc = SourceDescription()
source_desc.parse(str=source_desc_response.text)
[capabilitylist_resource] = source_desc.resources

# Download capablity list obtained from source description
capabilitylist_response = requests.get(capabilitylist_resource.uri)
capabilitylist = CapabilityList()
capabilitylist.parse(str=capabilitylist_response.text)

# Download resource lists obtained from capability list
get_resource_lists(capabilitylist.resources)
Ejemplo n.º 24
0
		raw_ts[8:10] + ":" +
		raw_ts[10:12] + ":" +
		raw_ts[12:14] + "Z"
	)
	timestamps.append(ts)
	rl.add(Resource(args.resource_url + filename, lastmod=ts))

# Print to file at args.resource_dir + "/resource-list.xml"
resource_list_file = open(args.resource_dir + "/resource-list.xml", "w")
resource_list_file.write(rl.as_xml())
resource_list_file.close()
print "Wrote resource list to: " + args.resource_dir + "/resource-list.xml"

timestamps.sort()

caps = CapabilityList()
caps.add_capability(rl, args.resource_url + "resource-list.xml")
if len(timestamps) > 0:
	caps.md['from'] = timestamps[0]

# Print to file at args.resource_dir + "/capability-list.xml"
capability_list_file = open(args.resource_dir + "/capability-list.xml", "w")
capability_list_file.write(caps.as_xml())
capability_list_file.close()

print "Wrote capability list to: " + args.resource_dir + "/capability-list.xml"

rsd = SourceDescription()
rsd.md_at = None
rsd.add_capability_list(args.resource_url + "capability-list.xml")
Ejemplo n.º 25
0
        continue
    _, raw_ts = filename.split("-")
    ts = (raw_ts[:4] + "-" + raw_ts[4:6] + "-" + raw_ts[6:8] + "T" +
          raw_ts[8:10] + ":" + raw_ts[10:12] + ":" + raw_ts[12:14] + "Z")
    timestamps.append(ts)
    rl.add(Resource(args.resource_url + filename, lastmod=ts))

# Print to file at args.resource_dir + "/resource-list.xml"
resource_list_file = open(args.resource_dir + "/resource-list.xml", "w")
resource_list_file.write(rl.as_xml())
resource_list_file.close()
print "Wrote resource list to: " + args.resource_dir + "/resource-list.xml"

timestamps.sort()

caps = CapabilityList()
caps.add_capability(rl, args.resource_url + "resource-list.xml")
if len(timestamps) > 0:
    caps.md['from'] = timestamps[0]

# Print to file at args.resource_dir + "/capability-list.xml"
capability_list_file = open(args.resource_dir + "/capability-list.xml", "w")
capability_list_file.write(caps.as_xml())
capability_list_file.close()

print "Wrote capability list to: " + args.resource_dir + "/capability-list.xml"

rsd = SourceDescription()
rsd.md_at = None
rsd.add_capability_list(args.resource_url + "capability-list.xml")
Ejemplo n.º 26
0
 def test_ex_06(self):
     """resourcesync_ex_6 is a simple capability list with three capabilities"""
     capl=CapabilityList()
     capl.parse('tests/testdata/examples_from_spec/resourcesync_ex_6.xml')
     self.assertEqual( len(capl.resources), 3, '3 capabilities')
     # What capabilities are present?
     self.assertTrue( capl.has_capability('resourcelist') )
     self.assertEqual( capl.capability_info('resourcelist').uri, 'http://example.com/dataset1/resourcelist.xml')
     self.assertTrue( capl.has_capability('resourcedump') )
     self.assertEqual( capl.capability_info('resourcedump').uri, 'http://example.com/dataset1/resourcedump.xml')
     self.assertTrue( capl.has_capability('changelist') )
     self.assertEqual( capl.capability_info('changelist').uri, 'http://example.com/dataset1/changelist.xml')
     # Check some that aren't
     self.assertFalse( capl.has_capability() )
     self.assertFalse( capl.has_capability('bogus') )
     self.assertFalse( capl.has_capability('capabilitylist') )
Ejemplo n.º 27
0
 def test01_add(self):
     # one
     caps = CapabilityList()
     r1 = Resource(uri='http://example.org/r1')
     caps.add(r1)
     self.assertEqual(len(caps), 1)
     # dupe
     self.assertRaises(ResourceSetDupeError, caps.add, r1)
     self.assertEqual(len(caps), 1)
     # dupe with replace
     caps = CapabilityList()
     caps.add([r1, r1], replace=True)
     self.assertEqual(len(caps), 1)
     # diff
     caps = CapabilityList()
     r2 = ChangeList(uri='http://example.org/r2')
     caps.add([r1, r2])
     self.assertEqual(len(caps), 2)