예제 #1
0
 def get(self):
     """Implement GET for Source Description."""
     source_description = SourceDescription()
     source_description.describedby = self.source.describedby_uri
     source_description.add_capability_list(self.source.capability_list_uri)
     self.set_header("Content-Type", "application/xml")
     self.write(source_description.as_xml())
예제 #2
0
 def test01_empty(self):
     rsd = SourceDescription()
     rsd.describedby = "http://example.org/about"
     self.assertEqual(len(rsd), 0)
     rsd.md_at = None
     self.assertEqual(
         rsd.as_xml(),
         '<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/"><rs:ln href="http://example.org/about" rel="describedby" /><rs:md capability="description" /></urlset>'
     )
예제 #3
0
 def text_ex_07(self):
     """resourcesync_ex_7 is a source description that list a single Capability List"""
     sd=SourceDescription()
     sd.read(uri='tests/testdata/examples_from_spec/resourcesync_ex_7.xml')
     self.assertEqual( len(sd.resources), 1, '1 capability list' )
     cl=sd.resources[0]
     self.assertEqual( cl.uri, 'http://example.com/dataset1/capabilitylist.xml' )
     self.assertEqual( cl.capability, 'resourcelist' )
     self.assertEqual( cl.describedby, 'http://example.com/info_about_set1_of_resources.xml' )
예제 #4
0
    def synchronize(self):
        """
        Publish the resources found in source_dir in accordance with the Resourcesync Framework in sink_dir.
        """
        if not os.path.isdir(self.source_dir):
            os.makedirs(self.source_dir)
            print "Created %s" % self.source_dir

        if not os.path.isdir(self.sink_dir):
            os.makedirs(self.sink_dir)
            print "Created %s" % self.sink_dir

        self.handshake = self.verify_handshake()
        if self.handshake is None:
            return
        ####################

        # print "Synchronizing state as of %s" % self.handshake

        ### initial resource description
        wellknown = os.path.join(self.sink_dir, RS_WELL_KNOWN)
        if not os.path.isdir(wellknown):
            os.makedirs(wellknown)

        src_desc = SourceDescription()
        new_src_desc = True
        # Load existing resource-description, if any.
        if os.path.isfile(self.src_desc_path):
            new_src_desc = False
            with open(self.src_desc_path, "r") as src_desc_file:
                sm = Sitemap()
                sm.parse_xml(src_desc_file, resources=src_desc)

        count_lists = len(src_desc.resources)

        ### resources in subdirectories or main directory
        ### the existance of FILE_INDEX indicates whether resources reside directly in source_dir or in subdirectories.
        index_file = os.path.join(self.source_dir, FILE_INDEX)
        if os.path.isfile(index_file):
            for dirname in os.walk(self.source_dir).next()[1]:
                source = os.path.join(self.source_dir, dirname)
                sink = os.path.join(self.sink_dir, dirname)
                publish_url = self.publish_url + dirname + "/"
                self.__execute_sync__(source, sink, publish_url, src_desc)
        else:
            self.__execute_sync__(self.source_dir, self.sink_dir,
                                  self.publish_url, src_desc)

        if new_src_desc or count_lists != len(src_desc.resources):
            ### publish resource description
            with open(self.src_desc_path, "w") as src_desc_file:
                src_desc_file.write(src_desc.as_xml())
                print "New resource description. See %s" % self.src_desc_url

        self.report()
예제 #5
0
    def synchronize(self):
        """
        Publish the resources found in source_dir in accordance with the Resourcesync Framework in sink_dir.
        """
        if not os.path.isdir(self.source_dir):
            os.makedirs(self.source_dir)
            print "Created %s" % self.source_dir

        if not os.path.isdir(self.sink_dir):
            os.makedirs(self.sink_dir)
            print "Created %s" % self.sink_dir

        self.handshake = self.verify_handshake()
        if self.handshake is None:
            return
        ####################

        # print "Synchronizing state as of %s" % self.handshake

        ### initial resource description
        wellknown = os.path.join(self.sink_dir, RS_WELL_KNOWN)
        if not os.path.isdir(wellknown):
            os.makedirs(wellknown)

        src_desc = SourceDescription()
        new_src_desc = True
        # Load existing resource-description, if any.
        if os.path.isfile(self.src_desc_path):
            new_src_desc = False
            with open(self.src_desc_path, "r") as src_desc_file:
                sm = Sitemap()
                sm.parse_xml(src_desc_file, resources=src_desc)

        count_lists = len(src_desc.resources)

        ### resources in subdirectories or main directory
        ### the existance of FILE_INDEX indicates whether resources reside directly in source_dir or in subdirectories.
        index_file = os.path.join(self.source_dir, FILE_INDEX)
        if os.path.isfile(index_file):
            for dirname in os.walk(self.source_dir).next()[1]:
                source = os.path.join(self.source_dir, dirname)
                sink = os.path.join(self.sink_dir, dirname)
                publish_url = self.publish_url + dirname + "/"
                self.__execute_sync__(source, sink, publish_url, src_desc)
        else:
            self.__execute_sync__(self.source_dir, self.sink_dir, self.publish_url, src_desc)

        if new_src_desc or count_lists != len(src_desc.resources):
            ### publish resource description
            with open(self.src_desc_path, "w") as src_desc_file:
                src_desc_file.write(src_desc.as_xml())
                print "New resource description. See %s" % self.src_desc_url

        self.report()
예제 #6
0
 def test_ex_12(self):
     """resourcesync_ex_12 is a Source Description that talks about 3 sets of resources"""
     sd=SourceDescription()
     sd.read(uri='tests/testdata/examples_from_spec/resourcesync_ex_12.xml')
     self.assertEqual( len(sd), 3 )
     self.assertEqual( sd.uris(), ['http://example.com/capabilitylist1.xml',
                                   'http://example.com/capabilitylist2.xml',
                                   'http://example.com/capabilitylist3.xml'] )
     cl1=sd['http://example.com/capabilitylist1.xml']
     self.assertEqual( cl1.capability, 'capabilitylist' )
     self.assertEqual( cl1.describedby, 'http://example.com/info_about_set1_of_resources.xml')
예제 #7
0
 def test04_parse(self):
     xml='<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/"><rs:ln href="http://example.org/about" rel="describedby" /><rs:md capability="description" /><url><loc>http://example.org/ds1/cl.xml</loc><rs:md capability="capabilitylist" /></url><url><loc>http://example.org/ds2/cl.xml</loc><rs:md capability="capabilitylist" /></url><url><loc>http://example.org/ds3/cl.xml</loc><rs:md capability="capabilitylist" /></url></urlset>'
     sd=SourceDescription()
     sd.parse(str=xml)
     self.assertEqual( sd.link_href('describedby'), 'http://example.org/about', 
                       'describedby link' )
     self.assertEqual( sd.capability, 'description' )
     self.assertEqual( len(sd.resources), 3, 'got 3 capacility lists' )
     [r1,r2,r3]=sd.resources
     self.assertEqual( r1.uri, 'http://example.org/ds1/cl.xml' )
     self.assertEqual( r1.capability, 'capabilitylist' )
예제 #8
0
 def test_build_ex_12(self):
     """Source Description document with describedby links"""
     sd = SourceDescription()
     sd.describedby = 'http://example.com/info_about_source.xml'
     cl1 = CapabilityList( uri='http://example.com/capabilitylist1.xml' )
     cl1.describedby = 'http://example.com/info_about_set1_of_resources.xml'
     sd.add_capability_list( cl1 )
     cl2 = CapabilityList( uri='http://example.com/capabilitylist2.xml' )
     cl2.describedby = 'http://example.com/info_about_set2_of_resources.xml'
     sd.add_capability_list( cl2 )
     cl3 = CapabilityList( uri='http://example.com/capabilitylist3.xml' )
     cl3.describedby = 'http://example.com/info_about_set3_of_resources.xml'
     sd.add_capability_list( cl3 )
     ex_xml = self._open_ex('resourcesync_ex_12').read()
     self._assert_xml_equal( sd.as_xml(), ex_xml )
예제 #9
0
 def test03_a_bunch(self):
     rsd = SourceDescription()
     rsd.describedby = "http://example.org/about"
     self.assertEqual( len(rsd), 0 )
     rsd.add_capability_list("http://example.org/ds1/cl.xml")
     rsd.add_capability_list("http://example.org/ds2/cl.xml")
     rsd.add_capability_list("http://example.org/ds3/cl.xml")
     self.assertEqual( len(rsd), 3 )
     self.assertEqual( rsd.as_xml(), '<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/"><rs:ln href="http://example.org/about" rel="describedby" /><rs:md capability="description" /><url><loc>http://example.org/ds1/cl.xml</loc><rs:md capability="capabilitylist" /></url><url><loc>http://example.org/ds2/cl.xml</loc><rs:md capability="capabilitylist" /></url><url><loc>http://example.org/ds3/cl.xml</loc><rs:md capability="capabilitylist" /></url></urlset>' )
예제 #10
0
 def write_source_description(self,
                              capability_lists=None,
                              outfile=None,
                              links=None):
     """Write a ResourceSync Description document to outfile or STDOUT"""
     rsd = SourceDescription(ln=links)
     rsd.pretty_xml = self.pretty_xml
     if (capability_lists is not None):
         for uri in capability_lists:
             rsd.add_capability_list(uri)
     if (outfile is None):
         print rsd.as_xml()
     else:
         rsd.write(basename=outfile)
예제 #11
0
파일: client.py 프로젝트: EHRI/resync
 def write_source_description(self,capability_lists=None,outfile=None,links=None):
     """Write a ResourceSync Description document to outfile or STDOUT"""
     rsd = SourceDescription(ln=links)
     rsd.pretty_xml = self.pretty_xml
     if (capability_lists is not None):
         for uri in capability_lists:
             rsd.add_capability_list(uri)
     if (outfile is None):
         print rsd.as_xml()
     else:
         rsd.write(basename=outfile)
예제 #12
0
 def get(self):
     """Implement GET for Source Description."""
     source_description = SourceDescription()
     source_description.describedby = self.source.describedby_uri
     source_description.add_capability_list(self.source.capability_list_uri)
     self.set_header("Content-Type", "application/xml")
     self.write(source_description.as_xml())
예제 #13
0
 def test_build_ex_07(self):
     """A Source Description document """
     sd = SourceDescription()
     sd.describedby = 'http://example.com/info-about-source.xml'
     r = Resource( uri='http://example.com/dataset1/capabilitylist.xml',
                   capability='capabilitylist' )
     r.link_set( rel='describedby',
                 href='http://example.com/info_about_set1_of_resources.xml' )
     sd.add( r )
     ex_xml = self._open_ex('resourcesync_ex_7').read()
     self._assert_xml_equal( sd.as_xml(), ex_xml )
예제 #14
0
	# store newest modified time of newest resource
	new_lasttime = time_sorted_resources[-1]["time"].strftime("%Y-%m-%dT%H:%M:%SZ")
	# write this time to the timefile
	timefile_out = open(args.time_file, "w")
	timefile_out.write(new_lasttime)
	timefile_out.close()

# Downloads all resource lists
def get_resource_lists(resources):
	for key, resource_list_resource in resources.iteritems():
		resource_list_response = requests.get(resource_list_resource.uri)
		resource_list = ResourceList()
		resource_list.parse(str=resource_list_response.text)
		get_resources(resource_list.resources)



# Download URI of the source description XML from 
# --> should actually be either via robots.txt or/in .well-known
source_desc_response = requests.get(args.source_description_uri)
source_desc = SourceDescription()
source_desc.parse(str=source_desc_response.text)
[capabilitylist_resource] = source_desc.resources

# Download capablity list obtained from source description
capabilitylist_response =  requests.get(capabilitylist_resource.uri)
capabilitylist = CapabilityList()
capabilitylist.parse(str=capabilitylist_response.text)

# Download resource lists obtained from capability list
get_resource_lists(capabilitylist.resources)
예제 #15
0
 def test01_empty(self):
     rsd = SourceDescription()
     rsd.describedby = "http://example.org/about"
     self.assertEqual( len(rsd), 0 )
     rsd.md_at = None
     self.assertEqual( rsd.as_xml(), '<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/"><rs:ln href="http://example.org/about" rel="describedby" /><rs:md capability="description" /></urlset>' )
예제 #16
0
    new_lasttime = time_sorted_resources[-1]["time"].strftime(
        "%Y-%m-%dT%H:%M:%SZ")
    # write this time to the timefile
    timefile_out = open(args.time_file, "w")
    timefile_out.write(new_lasttime)
    timefile_out.close()


# Downloads all resource lists
def get_resource_lists(resources):
    for key, resource_list_resource in resources.iteritems():
        resource_list_response = requests.get(resource_list_resource.uri)
        resource_list = ResourceList()
        resource_list.parse(str=resource_list_response.text)
        get_resources(resource_list.resources)


# Download URI of the source description XML from
# --> should actually be either via robots.txt or/in .well-known
source_desc_response = requests.get(args.source_description_uri)
source_desc = SourceDescription()
source_desc.parse(str=source_desc_response.text)
[capabilitylist_resource] = source_desc.resources

# Download capablity list obtained from source description
capabilitylist_response = requests.get(capabilitylist_resource.uri)
capabilitylist = CapabilityList()
capabilitylist.parse(str=capabilitylist_response.text)

# Download resource lists obtained from capability list
get_resource_lists(capabilitylist.resources)
예제 #17
0
# Print to file at args.resource_dir + "/resource-list.xml"
resource_list_file = open(args.resource_dir + "/resource-list.xml", "w")
resource_list_file.write(rl.as_xml())
resource_list_file.close()
print "Wrote resource list to: " + args.resource_dir + "/resource-list.xml"

timestamps.sort()

caps = CapabilityList()
caps.add_capability(rl, args.resource_url + "resource-list.xml")
if len(timestamps) > 0:
    caps.md['from'] = timestamps[0]

# Print to file at args.resource_dir + "/capability-list.xml"
capability_list_file = open(args.resource_dir + "/capability-list.xml", "w")
capability_list_file.write(caps.as_xml())
capability_list_file.close()

print "Wrote capability list to: " + args.resource_dir + "/capability-list.xml"

rsd = SourceDescription()
rsd.md_at = None
rsd.add_capability_list(args.resource_url + "capability-list.xml")

# Print to file at args.resource_dir + "/resourcesync"
source_description_file = open(args.resource_dir + "/resourcesync", "w")
source_description_file.write(rsd.as_xml())
source_description_file.close()

print "Wrote source description to: " + args.resource_dir + "/resourcesync"
예제 #18
0
# Print to file at args.resource_dir + "/resource-list.xml"
resource_list_file = open(args.resource_dir + "/resource-list.xml", "w")
resource_list_file.write(rl.as_xml())
resource_list_file.close()
print "Wrote resource list to: " + args.resource_dir + "/resource-list.xml"

timestamps.sort()

caps = CapabilityList()
caps.add_capability(rl, args.resource_url + "resource-list.xml")
if len(timestamps) > 0:
	caps.md['from'] = timestamps[0]

# Print to file at args.resource_dir + "/capability-list.xml"
capability_list_file = open(args.resource_dir + "/capability-list.xml", "w")
capability_list_file.write(caps.as_xml())
capability_list_file.close()

print "Wrote capability list to: " + args.resource_dir + "/capability-list.xml"

rsd = SourceDescription()
rsd.md_at = None
rsd.add_capability_list(args.resource_url + "capability-list.xml")

# Print to file at args.resource_dir + "/resourcesync"
source_description_file = open(args.resource_dir + "/resourcesync", "w")
source_description_file.write(rsd.as_xml())
source_description_file.close()

print "Wrote source description to: " + args.resource_dir + "/resourcesync"