Esempio n. 1
0
 def test01_as_xml(self):
     rd = ChangeDump()
     rd.add( Resource('a.zip',timestamp=1) )
     rd.add( Resource('b.zip',timestamp=2) )
     xml = rd.as_xml()
     #print(xml)
     self.assertTrue( re.search(r'<rs:md .*capability="changedump"', xml), 'XML has capability' )
     self.assertTrue( re.search(r'<url><loc>a.zip</loc><lastmod>1970-01-01T00:00:01Z</lastmod></url>', xml), 'XML has resource a' ) 
Esempio n. 2
0
 def test01_as_xml(self):
     rd = ChangeDump()
     rd.add( Resource('a.zip',timestamp=1) )
     rd.add( Resource('b.zip',timestamp=2) )
     xml = rd.as_xml()
     print xml
     self.assertTrue( re.search(r'<rs:md .*capability="changedump"', xml), 'XML has capability' )
     self.assertTrue( re.search(r'<url><loc>a.zip</loc><lastmod>1970-01-01T00:00:01Z</lastmod></url>', xml), 'XML has resource a' ) 
Esempio n. 3
0
 def test01_as_xml(self):
     rd = ChangeDump()
     rd.add(Resource("a.zip", timestamp=1))
     rd.add(Resource("b.zip", timestamp=2))
     xml = rd.as_xml()
     # print(xml)
     self.assertTrue(re.search(r'<rs:md .*capability="changedump"', xml), "XML has capability")
     self.assertTrue(
         re.search(r"<url><loc>a.zip</loc><lastmod>1970-01-01T00:00:01Z</lastmod></url>", xml), "XML has resource a"
     )
Esempio n. 4
0
    def test10_parse(self):
        xml='<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n\
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/">\
<rs:md capability="changedump" from="2013-01-01"/>\
<url><loc>http://example.com/a.zip</loc><lastmod>2012-03-14T18:37:36Z</lastmod><rs:md length="12345" /></url>\
<url><loc>http://example.com/b.zip</loc><lastmod>2012-03-14T18:37:36Z</lastmod><rs:md length="56789" /></url>\
</urlset>'
        rd=ChangeDump()
        rd.parse(fh=io.StringIO(xml))
        self.assertEqual( len(rd.resources), 2, 'got 2 resource dumps')
        self.assertEqual( rd.md['capability'], 'changedump', 'capability set' )
        self.assertEqual( rd.md_from, '2013-01-01' )
        self.assertTrue( 'http://example.com/a.zip' in rd.resources )
        self.assertTrue( rd.resources['http://example.com/a.zip'].length, 12345 )
        self.assertTrue( 'http://example.com/b.zip' in rd.resources )
        self.assertTrue( rd.resources['http://example.com/b.zip'].length, 56789 )
Esempio n. 5
0
    def test10_parse(self):
        xml = '<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n\
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/">\
<rs:md capability="changedump" from="2013-01-01"/>\
<url><loc>http://example.com/a.zip</loc><lastmod>2012-03-14T18:37:36Z</lastmod><rs:md length="12345" /></url>\
<url><loc>http://example.com/b.zip</loc><lastmod>2012-03-14T18:37:36Z</lastmod><rs:md length="56789" /></url>\
</urlset>'
        rd = ChangeDump()
        rd.parse(fh=io.StringIO(xml))
        self.assertEqual(len(rd.resources), 2, 'got 2 resource dumps')
        self.assertEqual(rd.md['capability'], 'changedump', 'capability set')
        self.assertEqual(rd.md_from, '2013-01-01')
        self.assertTrue('http://example.com/a.zip' in rd.resources)
        self.assertTrue(rd.resources['http://example.com/a.zip'].length, 12345)
        self.assertTrue('http://example.com/b.zip' in rd.resources)
        self.assertTrue(rd.resources['http://example.com/b.zip'].length, 56789)
Esempio n. 6
0
 def test_build_ex_22(self):
     """Change Dump with three dump files"""
     cd = ChangeDump()
     cd.up = 'http://example.com/dataset1/capabilitylist.xml'
     cd.md_from="2013-01-01T00:00:00Z"
     z1 = Resource( uri='http://example.com/20130101-changedump.zip',
                    lastmod='2013-01-01T23:59:59Z',
                    length=3109,
                    md_from="2013-01-01T00:00:00Z",
                    md_until="2013-01-02T00:00:00Z",
                    mime_type="application/zip" )
     z1.contents='http://example.com/20130101-changedump-manifest.xml'
     z2 = Resource( uri='http://example.com/20130102-changedump.zip',
                    lastmod='2013-01-02T23:59:59Z',
                    length=6629,
                    md_from="2013-01-02T00:00:00Z",
                    md_until="2013-01-03T00:00:00Z",
                    mime_type="application/zip" )
     z2.contents='http://example.com/20130102-changedump-manifest.xml'
     z3 = Resource( uri='http://example.com/20130103-changedump.zip',
                    lastmod='2013-01-03T23:59:59Z',
                    length=8124,
                    md_from="2013-01-03T00:00:00Z",
                    md_until="2013-01-04T00:00:00Z",
                    mime_type="application/zip" )
     z3.contents='http://example.com/20130103-changedump-manifest.xml'
     cd.add( [z1, z2, z3] )
     ex_xml = self._open_ex('resourcesync_ex_22').read()
     self._assert_xml_equal( cd.as_xml(), ex_xml ) 
Esempio n. 7
0
    def test12_parse_bad_capability(self):
        # the <rs:md capability="bad_capability".. should give error
        xml='<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n\
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/">\
<rs:md capability="bad_capability" from="2013-01-01"/>\
<url><loc>http://example.com/bad_res_1</loc><lastmod>2012-03-14T18:37:36Z</lastmod></url>\
</urlset>'
        rd=ChangeDump()
        self.assertRaises( SitemapParseError, rd.parse, fh=io.StringIO(xml) )
Esempio n. 8
0
    def test11_parse_no_capability(self):
        # For a resource dump this should be an error
        xml='<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n\
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/">\
<rs:md at="2013-01-01"/>\
<url><loc>http://example.com/a.zip</loc><lastmod>2012-03-14T18:37:36Z</lastmod><rs:md length="12" /></url>\
</urlset>'
        rd=ChangeDump()
        self.assertRaises( SitemapParseError, rd.parse, fh=io.StringIO(xml) )
Esempio n. 9
0
 def test_build_ex_13(self):
     """Capability List document with 4 entries"""
     cl = CapabilityList()
     cl.describedby = 'http://example.com/info_about_set1_of_resources.xml'
     cl.up = 'http://example.com/resourcesync_description.xml'
     cl.add_capability( capability=ResourceList( uri='http://example.com/dataset1/resourcelist.xml' ) )
     cl.add_capability( capability=ResourceDump( uri='http://example.com/dataset1/resourcedump.xml' ) )
     cl.add_capability( capability=ChangeList( uri='http://example.com/dataset1/changelist.xml' ) )
     cl.add_capability( capability=ChangeDump( uri='http://example.com/dataset1/changedump.xml' ) )
     ex_xml = self._open_ex('resourcesync_ex_13').read()
     self._assert_xml_equal( cl.as_xml(), ex_xml )
Esempio n. 10
0
    def get_change_dump_xml(self, from_date):
        """
        Get change dump xml.

        :return: Updated Change List info
        """
        if not self._validation():
            return None
        change_dump = ChangeDump()
        change_dump.up = '{}resync/capability.xml'.format(request.url_root)
        change_dump.index = '{}resync/{}/changedump.xml'.format(
            request.url_root, self.repository_id)

        record_changes = self._get_record_changes_with_interval(from_date)

        for data in record_changes:
            try:
                next_ch = self._next_change(data, record_changes)
                if data.get('status') == 'deleted':
                    continue
                loc = '{}resync/{}/{}/change_dump_content.zip'.format(
                    request.url_root, self.repository_id,
                    '{}.{}'.format(data.get('record_id'),
                                   data.get('record_version')))

                rc = Resource(loc,
                              lastmod=data.get("updated"),
                              mime_type='application/zip',
                              md_from=data.get('updated'),
                              md_until=datetime.datetime.utcnow().replace(
                                  tzinfo=datetime.timezone.utc).isoformat(),
                              ln=[])
                if next_ch and next_ch.get('updated'):
                    rc.md_until = next_ch.get('updated')
                if self.change_dump_manifest:
                    ln = {
                        'rel':
                        'contents',
                        'href':
                        '{}resync/{}/{}/changedump_manifest.xml'.format(
                            request.url_root, self.repository_id,
                            '{}.{}'.format(data.get('record_id'),
                                           data.get('record_version'))),
                        'type':
                        'application/xml'
                    }
                    rc.ln.append(ln)
                change_dump.add(rc)
            except Exception:
                current_app.logger.error('-' * 60)
                traceback.print_exc(file=sys.stdout)
                current_app.logger.error('-' * 60)
                continue

        return change_dump.as_xml()