コード例 #1
0
 def test04_dump_multi_file_max_size(self):
     rl = ResourceList()
     for letter in map(chr, range(ord('a'), ord('l') + 1)):
         uri = 'http://ex.org/%s' % (letter)
         fname = 'tests/testdata/a_to_z/%s' % (letter)
         rl.add(Resource(uri, path=fname))
     self.assertEqual(len(rl), 12)
     d2 = Dump(rl)
     tmpbase = os.path.join(self.tmpdir, 'test0f_')
     d2.max_size = 2000
     n = d2.write(tmpbase)
     self.assertEqual(n, 2, 'expect to write 2 dump files')
     self.assertTrue(os.path.isfile(tmpbase + '00000.zip'))
     self.assertTrue(os.path.isfile(tmpbase + '00001.zip'))
     # Look at the first file in detail
     zipf = tmpbase + '00000.zip'
     zo = zipfile.ZipFile(zipf, 'r')
     self.assertEqual(zo.namelist(),
                      ['manifest.xml', 'a', 'b', 'c', 'd', 'e', 'f'])
     #self.assertEqual( zo.getinfo('manifest.xml').file_size, 470 )
     self.assertEqual(zo.getinfo('a').file_size, 9)
     self.assertEqual(zo.getinfo('b').file_size, 1116)
     self.assertEqual(zo.getinfo('c').file_size, 32)
     self.assertEqual(zo.getinfo('d').file_size, 13)
     self.assertEqual(zo.getinfo('e').file_size, 20)
     self.assertEqual(zo.getinfo('f').file_size, 1625)
     zo.close()
     os.unlink(zipf)
     # Check second and third files have expected contents
     zipf = tmpbase + '00001.zip'
     zo = zipfile.ZipFile(zipf, 'r')
     self.assertEqual(zo.namelist(),
                      ['manifest.xml', 'g', 'h', 'i', 'j', 'k', 'l'])
     zo.close()
     os.unlink(zipf)
コード例 #2
0
 def test20_as_xml(self):
     rl = ResourceList()
     rl.add( Resource('a',timestamp=1) )
     rl.add( Resource('b',timestamp=2) )
     xml = rl.as_xml()
     self.assertTrue( re.search(r'<rs:md .*capability="resourcelist"', xml), 'XML has capability' )
     self.assertTrue( re.search(r'<url><loc>a</loc><lastmod>1970-01-01T00:00:01Z</lastmod></url>', xml), 'XML has resource a' ) 
コード例 #3
0
ファイル: test_dump.py プロジェクト: semantalytics/resync
 def test00_dump_creation(self):
     i = ResourceList()
     i.add(Resource("http://ex.org/a", length=1, path="resync/test/testdata/a"))
     i.add(Resource("http://ex.org/b", length=2, path="resync/test/testdata/b"))
     d = Dump()
     d.check_files(resource_list=i)
     self.assertEqual(d.total_size, 28)
コード例 #4
0
 def test11_bad_size(self):
     rl = ResourceList()
     rl.add(
         Resource('http://ex.org/a', length=9999, path='tests/testdata/a'))
     d = Dump(rl)
     self.assertTrue(d.check_files(check_length=False))
     self.assertRaises(DumpError, d.check_files)
コード例 #5
0
 def test5_add_changed_resources(self):
     added = ResourceList()
     added.add( Resource('a',timestamp=1) )
     added.add( Resource('d',timestamp=4))
     self.assertEqual(len(added), 2, "2 things in added resource_list")
     changes = ChangeList()
     changes.add_changed_resources( added, change='created' )
     self.assertEqual(len(changes), 2, "2 things added")
     i = iter(changes)
     first = i.next()
     self.assertEqual(first.uri, 'a', "changes[0].uri=a")
     self.assertEqual(first.timestamp, 1, "changes[0].timestamp=1")
     self.assertEqual(first.change, 'created') #, "changes[0].change=created")
     second = i.next()
     self.assertEqual(second.timestamp, 4, "changes[1].timestamp=4")
     self.assertEqual(second.change, 'created', "changes[1].change=created")
     # Now add some with updated (one same, one diff)
     updated = ResourceList()
     updated.add( Resource('a',timestamp=5) )
     updated.add( Resource('b',timestamp=6))
     self.assertEqual(len(updated), 2, "2 things in updated resource_list")
     changes.add_changed_resources( updated, change='updated' )
     self.assertEqual(len(changes), 4, "4 = 2 old + 2 things updated")
     # Make new resource_list from the changes which should not have dupes
     dst = ResourceList()
     dst.add( changes, replace=True )
     self.assertEqual(len(dst), 3, "3 unique resources")
     self.assertEqual(dst.resources['a'].timestamp, 5 ) # 5 was later in last the 1
     self.assertEqual(dst.resources['a'].change, 'updated')
     self.assertEqual(dst.resources['b'].timestamp, 6)
     self.assertEqual(dst.resources['b'].change, 'updated')
     self.assertEqual(dst.resources['d'].timestamp, 4)
     self.assertEqual(dst.resources['d'].change, 'created')
コード例 #6
0
ファイル: test_dump.py プロジェクト: uweschmitt/resync
 def test04_dump_multi_file_max_size(self):
     rl=ResourceList()
     for letter in map(chr,xrange(ord('a'),ord('l')+1)):
         uri='http://ex.org/%s' % (letter)
         fname='resync/test/testdata/a_to_z/%s' % (letter)
         rl.add( Resource(uri, path=fname) )
     self.assertEqual( len(rl), 12 )
     d2=Dump(rl) 
     tmpbase=os.path.join(self.tmpdir,'test0f_')
     d2.max_size=2000
     n=d2.write(tmpbase)
     self.assertEqual( n, 2, 'expect to write 2 dump files' )
     self.assertTrue( os.path.isfile(tmpbase+'00000.zip') )
     self.assertTrue( os.path.isfile(tmpbase+'00001.zip') )
     # Look at the first file in detail
     zipf=tmpbase+'00000.zip'
     zo=zipfile.ZipFile(zipf,'r')
     self.assertEqual( zo.namelist(), ['manifest.xml','a','b','c','d','e','f'] )
     #self.assertEqual( zo.getinfo('manifest.xml').file_size, 470 )
     self.assertEqual( zo.getinfo('a').file_size, 9 )
     self.assertEqual( zo.getinfo('b').file_size, 1116 )
     self.assertEqual( zo.getinfo('c').file_size, 32 )
     self.assertEqual( zo.getinfo('d').file_size, 13 )
     self.assertEqual( zo.getinfo('e').file_size, 20 )
     self.assertEqual( zo.getinfo('f').file_size, 1625 )
     zo.close()
     os.unlink(zipf)
     # Check second and third files have expected contents
     zipf=tmpbase+'00001.zip'
     zo=zipfile.ZipFile(zipf,'r')
     self.assertEqual( zo.namelist(), ['manifest.xml','g','h','i','j','k','l'] )
     zo.close()
     os.unlink(zipf)
コード例 #7
0
 def test20_as_xml(self):
     rl = ResourceList()
     rl.add( Resource('a',timestamp=1) )
     rl.add( Resource('b',timestamp=2) )
     xml = rl.as_xml()
     print xml
     self.assertTrue( re.search(r'<rs:md .*capability="resourcelist"', xml), 'XML has capability' )
     self.assertTrue( re.search(r'<url><loc>a</loc><lastmod>1970-01-01T00:00:01Z</lastmod></url>', xml), 'XML has resource a' ) 
コード例 #8
0
 def test_build_ex_01(self):
     """Simple Resource List document """
     rl = ResourceList()
     rl.md_at = '2013-01-03T09:00:00Z'
     rl.add( Resource('http://example.com/res1') )
     rl.add( Resource('http://example.com/res2') )
     ex_xml = self._open_ex('resourcesync_ex_1').read()
     self._assert_xml_equal( rl.as_xml(), ex_xml )
コード例 #9
0
 def test_09_print_from_iter(self): 
     r1 = Resource(uri='a',lastmod='2001-01-01',length=1234)
     r2 = Resource(uri='b',lastmod='2002-02-02',length=56789)
     m = ResourceList()
     m.add(r1)
     m.add(r2)
     i = iter(m)
     self.assertEqual( Sitemap().resources_as_xml(i), "<?xml version='1.0' encoding='UTF-8'?>\n<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" xmlns:rs=\"http://www.openarchives.org/rs/terms/\"><url><loc>a</loc><lastmod>2001-01-01T00:00:00Z</lastmod><rs:md length=\"1234\" /></url><url><loc>b</loc><lastmod>2002-02-02T00:00:00Z</lastmod><rs:md length=\"56789\" /></url></urlset>")
コード例 #10
0
ファイル: test_sitemap.py プロジェクト: resync/resync
 def test_08_print_non_ascii_uri(self):
     """Verify that valid Unicode uri values give good XML out."""
     m = ResourceList(md={'capability': 'resourcelist', 'modified': None})
     m.add(Resource(uri=u'a_\u00c3_b'))
     m.add(Resource(uri=u'c_\u1234_d'))
     xml = Sitemap().resources_as_xml(m)
     self.assertTrue(re.search(u'<loc>a_.*_b</loc>', xml))
     self.assertTrue(re.search(u'<loc>a_\u00c3_b</loc>', xml))
     self.assertTrue(re.search(u'<loc>c_\u1234_d</loc>', xml))
コード例 #11
0
ファイル: test_dump.py プロジェクト: uweschmitt/resync
 def test10_no_path(self):
     rl = ResourceList()
     rl.add(
         Resource('http://ex.org/a',
                  length=7,
                  path='resync/test/testdata/a'))
     rl.add(Resource('http://ex.org/b', length=21))
     d = Dump(rl)
     self.assertRaises(DumpError, d.check_files)
コード例 #12
0
 def test20_as_xml(self):
     rl = ResourceList()
     rl.add( Resource('a',timestamp=1) )
     rl.add( Resource('b',timestamp=2) )
     xml = rl.as_xml()
     print xml
     self.assertTrue( re.search(r'<rs:md .*capability="resourcelist"', xml), 'XML has capability' )
     self.assertTrue( re.search(r'<rs:md .*modified="\d\d\d\d\-\d\d\-\d\dT\d\d:\d\d:\d\dZ"', xml), 'XML has modified to seconds precision (and not more)' )
     self.assertTrue( re.search(r'<url><loc>a</loc><lastmod>1970-01-01T00:00:01Z</lastmod></url>', xml), 'XML has resource a' ) 
コード例 #13
0
ファイル: test_sitemap.py プロジェクト: ramonmassip/resync
 def test_08_print_non_ascii_uri(self):
     """Verify that valid Unicode uri values give good XML out."""
     m = ResourceList(md={'capability': 'resourcelist', 'modified': None})
     m.add(Resource(uri=u'a_\u00c3_b'))
     m.add(Resource(uri=u'c_\u1234_d'))
     xml = Sitemap().resources_as_xml(m)
     self.assertTrue(re.search(u'<loc>a_.*_b</loc>', xml))
     self.assertTrue(re.search(u'<loc>a_\u00c3_b</loc>', xml))
     self.assertTrue(re.search(u'<loc>c_\u1234_d</loc>', xml))
コード例 #14
0
 def test07_has_md5(self):
     r1 = Resource(uri='a')
     r2 = Resource(uri='b')
     i = ResourceList()
     self.assertFalse( i.has_md5() )
     i.add(r1)
     i.add(r2)
     self.assertFalse( i.has_md5() )
     r1.md5="aabbcc"
     self.assertTrue( i.has_md5() )
コード例 #15
0
 def test07_has_md5(self):
     r1 = Resource(uri='a')
     r2 = Resource(uri='b')
     i = ResourceList()
     self.assertFalse( i.has_md5() )
     i.add(r1)
     i.add(r2)
     self.assertFalse( i.has_md5() )
     r1.md5="aabbcc"
     self.assertTrue( i.has_md5() )
コード例 #16
0
 def test_08_print(self):
     r1 = Resource(uri='a',lastmod='2001-01-01',length=1234)
     r2 = Resource(uri='b',lastmod='2002-02-02',length=56789)
     r3 = Resource(uri='c',lastmod='2003-03-03',length=0)
     m = ResourceList(md={'capability':'resourcelist','modified':None})
     m.add(r1)
     m.add(r2)
     m.add(r3)
     #print m
     self.assertEqual( Sitemap().resources_as_xml(m), "<?xml version='1.0' encoding='UTF-8'?>\n<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" xmlns:rs=\"http://www.openarchives.org/rs/terms/\"><rs:md capability=\"resourcelist\" /><url><loc>a</loc><lastmod>2001-01-01T00:00:00Z</lastmod><rs:md length=\"1234\" /></url><url><loc>b</loc><lastmod>2002-02-02T00:00:00Z</lastmod><rs:md length=\"56789\" /></url><url><loc>c</loc><lastmod>2003-03-03T00:00:00Z</lastmod><rs:md length=\"0\" /></url></urlset>")
コード例 #17
0
ファイル: test_sitemap.py プロジェクト: ramonmassip/resync
 def test_09_print_from_iter(self):
     r1 = Resource(uri='a', lastmod='2001-01-01', length=1234)
     r2 = Resource(uri='b', lastmod='2002-02-02', length=56789)
     m = ResourceList()
     m.add(r1)
     m.add(r2)
     i = iter(m)
     self.assertEqual(
         Sitemap().resources_as_xml(i),
         "<?xml version='1.0' encoding='UTF-8'?>\n<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" xmlns:rs=\"http://www.openarchives.org/rs/terms/\"><url><loc>a</loc><lastmod>2001-01-01T00:00:00Z</lastmod><rs:md length=\"1234\" /></url><url><loc>b</loc><lastmod>2002-02-02T00:00:00Z</lastmod><rs:md length=\"56789\" /></url></urlset>"
     )
コード例 #18
0
 def test_11_write_multifile(self):
     tempdir = tempfile.mkdtemp(prefix='test_resource_list_multifile')
     rl = ResourceList()
     rl.mapper = Mapper(['http://localhost/=%s/' % (tempdir)])
     rl.add(Resource(uri='http://localhost/a'))
     rl.add(Resource(uri='http://localhost/b'))
     rl.add(Resource(uri='http://localhost/c'))
     rl.add(Resource(uri='http://localhost/d'))
     rl.max_sitemap_entries = 2
     # first try writing without mutlifile allowed
     rl.allow_multifile = False
     self.assertRaises(ListBaseIndexError,
                       rl.write,
                       basename=os.path.join(tempdir, 'sitemap.xml'))
     # second actually do it
     rl.allow_multifile = True
     rl.write(basename=os.path.join(tempdir, 'sitemap.xml'))
     # check the two component sitemaps
     rl1 = ResourceList()
     rl1.read(os.path.join(tempdir, 'sitemap00000.xml'))
     self.assertEquals(len(rl1), 2)
     self.assertEquals(rl1.capability, 'resourcelist')
     self.assertFalse(rl1.sitemapindex)
     i = iter(rl1)
     self.assertEquals(next(i).uri, 'http://localhost/a')
     self.assertEquals(next(i).uri, 'http://localhost/b')
     rl2 = ResourceList()
     rl2.read(os.path.join(tempdir, 'sitemap00001.xml'))
     self.assertEquals(len(rl2), 2)
     i = iter(rl2)
     self.assertEquals(next(i).uri, 'http://localhost/c')
     self.assertEquals(next(i).uri, 'http://localhost/d')
     # check the sitemapindex (read just as index)
     rli = ResourceList()
     rli.read(os.path.join(tempdir, 'sitemap.xml'), index_only=True)
     self.assertEquals(len(rli), 2)
     i = iter(rli)
     self.assertEquals(rli.capability, 'resourcelist')
     self.assertTrue(rli.sitemapindex)
     self.assertEquals(next(i).uri, 'http://localhost/sitemap00000.xml')
     self.assertEquals(next(i).uri, 'http://localhost/sitemap00001.xml')
     # check the sitemapindex and components
     rli = ResourceList(mapper=rl.mapper)
     rli.read(os.path.join(tempdir, 'sitemap.xml'))
     self.assertEquals(len(rli), 4)
     self.assertEquals(rli.capability, 'resourcelist')
     self.assertFalse(rli.sitemapindex)
     i = iter(rli)
     self.assertEquals(next(i).uri, 'http://localhost/a')
     self.assertEquals(next(i).uri, 'http://localhost/b')
     self.assertEquals(next(i).uri, 'http://localhost/c')
     self.assertEquals(next(i).uri, 'http://localhost/d')
     # cleanup tempdir
     shutil.rmtree(tempdir)
コード例 #19
0
ファイル: test_resource_list.py プロジェクト: resync/resync
 def test07_hashes(self):
     r1 = Resource(uri='a')
     r2 = Resource(uri='b')
     i = ResourceList()
     self.assertEqual(i.hashes(), set())
     i.add(r1)
     i.add(r2)
     self.assertEqual(i.hashes(), set())
     r1.md5 = "aabbcc"
     self.assertEqual(i.hashes(), set(['md5']))
     r2.sha1 = "ddeeff"
     self.assertEqual(i.hashes(), set(['md5', 'sha-1']))
コード例 #20
0
 def test08_iter(self):
     i = ResourceList()
     i.add( Resource('a',timestamp=1) )
     i.add( Resource('b',timestamp=2) )
     i.add( Resource('c',timestamp=3) )
     i.add( Resource('d',timestamp=4) )
     resources=[]
     for r in i:
         resources.append(r)
     self.assertEqual(len(resources), 4)
     self.assertEqual( resources[0].uri, 'a')
     self.assertEqual( resources[3].uri, 'd')
コード例 #21
0
 def test08_iter(self):
     i = ResourceList()
     i.add(Resource('a', timestamp=1))
     i.add(Resource('b', timestamp=2))
     i.add(Resource('c', timestamp=3))
     i.add(Resource('d', timestamp=4))
     resources = []
     for r in i:
         resources.append(r)
     self.assertEqual(len(resources), 4)
     self.assertEqual(resources[0].uri, 'a')
     self.assertEqual(resources[3].uri, 'd')
コード例 #22
0
 def test06_add_iterable(self):
     r1 = Resource(uri='a', length=1)
     r2 = Resource(uri='b', length=2)
     i = ResourceList()
     i.add([r1, r2])
     self.assertRaises(ResourceListDupeError, i.add, r1)
     self.assertRaises(ResourceListDupeError, i.add, r2)
     # allow dupes
     r1d = Resource(uri='a', length=10)
     i.add([r1d], replace=True)
     self.assertEqual(len(i), 2)
     self.assertEqual(i.resources['a'].length, 10)
コード例 #23
0
 def test06_add_iterable(self):
     r1 = Resource(uri='a',length=1)
     r2 = Resource(uri='b',length=2)
     i = ResourceList()
     i.add( [r1,r2] )
     self.assertRaises( ResourceListDupeError, i.add, r1)
     self.assertRaises( ResourceListDupeError, i.add, r2)
     # allow dupes
     r1d = Resource(uri='a',length=10)
     i.add( [r1d] ,replace=True)
     self.assertEqual( len(i), 2 )
     self.assertEqual( i.resources['a'].length, 10 ) 
コード例 #24
0
 def test07_hashes(self):
     r1 = Resource(uri='a')
     r2 = Resource(uri='b')
     i = ResourceList()
     self.assertEqual(i.hashes(), set())
     i.add(r1)
     i.add(r2)
     self.assertEqual(i.hashes(), set())
     r1.md5 = "aabbcc"
     self.assertEqual(i.hashes(), set(['md5']))
     r2.sha1 = "ddeeff"
     self.assertEqual(i.hashes(), set(['md5', 'sha-1']))
コード例 #25
0
 def test_11_write_multifile(self):
     tempdir = tempfile.mkdtemp(prefix='test_resource_list_multifile_dir')
     rl = ResourceList()
     rl.mapper = Mapper(['http://localhost/=%s/' % (tempdir)])
     rl.add(Resource(uri='http://localhost/a'))
     rl.add(Resource(uri='http://localhost/b'))
     rl.add(Resource(uri='http://localhost/c'))
     rl.add(Resource(uri='http://localhost/d'))
     rl.max_sitemap_entries = 2
     # first try writing without mutlifile allowed
     rl.allow_multifile = False
     self.assertRaises(ListBaseIndexError, rl.write,
                       basename=os.path.join(tempdir, 'sitemap.xml'))
     # second actually do it
     rl.allow_multifile = True
     rl.write(basename=os.path.join(tempdir, 'sitemap.xml'))
     # check the two component sitemaps
     rl1 = ResourceList()
     rl1.read(os.path.join(tempdir, 'sitemap00000.xml'))
     self.assertEquals(len(rl1), 2)
     self.assertEquals(rl1.capability, 'resourcelist')
     self.assertFalse(rl1.sitemapindex)
     i = iter(rl1)
     self.assertEquals(next(i).uri, 'http://localhost/a')
     self.assertEquals(next(i).uri, 'http://localhost/b')
     rl2 = ResourceList()
     rl2.read(os.path.join(tempdir, 'sitemap00001.xml'))
     self.assertEquals(len(rl2), 2)
     i = iter(rl2)
     self.assertEquals(next(i).uri, 'http://localhost/c')
     self.assertEquals(next(i).uri, 'http://localhost/d')
     # check the sitemapindex (read just as index)
     rli = ResourceList()
     rli.read(os.path.join(tempdir, 'sitemap.xml'), index_only=True)
     self.assertEquals(len(rli), 2)
     i = iter(rli)
     self.assertEquals(rli.capability, 'resourcelist')
     self.assertTrue(rli.sitemapindex)
     self.assertEquals(next(i).uri, 'http://localhost/sitemap00000.xml')
     self.assertEquals(next(i).uri, 'http://localhost/sitemap00001.xml')
     # check the sitemapindex and components
     rli = ResourceList(mapper=rl.mapper)
     rli.read(os.path.join(tempdir, 'sitemap.xml'))
     self.assertEquals(len(rli), 4)
     self.assertEquals(rli.capability, 'resourcelist')
     self.assertFalse(rli.sitemapindex)
     i = iter(rli)
     self.assertEquals(next(i).uri, 'http://localhost/a')
     self.assertEquals(next(i).uri, 'http://localhost/b')
     self.assertEquals(next(i).uri, 'http://localhost/c')
     self.assertEquals(next(i).uri, 'http://localhost/d')
     # cleanup tempdir
     shutil.rmtree(tempdir)
コード例 #26
0
ファイル: test_sitemap.py プロジェクト: ramonmassip/resync
 def test_07_print(self):
     r1 = Resource(uri='a', lastmod='2001-01-01', length=1234)
     r2 = Resource(uri='b', lastmod='2002-02-02', length=56789)
     r3 = Resource(uri='c', lastmod='2003-03-03', length=0)
     m = ResourceList(md={'capability': 'resourcelist', 'modified': None})
     m.add(r1)
     m.add(r2)
     m.add(r3)
     # print m
     self.assertEqual(
         Sitemap().resources_as_xml(m),
         "<?xml version='1.0' encoding='UTF-8'?>\n<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" xmlns:rs=\"http://www.openarchives.org/rs/terms/\"><rs:md capability=\"resourcelist\" /><url><loc>a</loc><lastmod>2001-01-01T00:00:00Z</lastmod><rs:md length=\"1234\" /></url><url><loc>b</loc><lastmod>2002-02-02T00:00:00Z</lastmod><rs:md length=\"56789\" /></url><url><loc>c</loc><lastmod>2003-03-03T00:00:00Z</lastmod><rs:md length=\"0\" /></url></urlset>"
     )
コード例 #27
0
ファイル: test_resource_list.py プロジェクト: resync/resync
    def test33_write(self):
        # ResourceList
        rl = ResourceList()

        rl.add(Resource(uri='http://example.com/test/a', timestamp=1))
        rl.add(Resource(uri='http://example.com/test/b', timestamp=1))
        rl.add(Resource(uri='http://example.com/test/c', timestamp=1))

        rl_filename = os.path.join(self.tmpdir, 'test33_write_resourcelist.xml')
        rl.write(basename=rl_filename)

        with open(rl_filename, 'r') as f:
            s = Sitemap()
            s.parse_xml(fh=f)
            self.assertFalse(s.parsed_index)

        # ResourceListIndex
        rli = ResourceList()

        rli.add(Resource(uri='http://example.com/test/resourcelist00000.xml', timestamp=1))
        rli.add(Resource(uri='http://example.com/test/resourcelist00001.xml', timestamp=1))
        rli.add(Resource(uri='http://example.com/test/resourcelist00002.xml', timestamp=1))
        rli.sitemapindex = True

        rli_filename = os.path.join(self.tmpdir, 'test33_write_resourcelist-index.xml')
        rli.write(basename=rli_filename)

        with open(rli_filename, 'r') as f:
            s = Sitemap()
            s.parse_xml(fh=f)
            self.assertTrue(s.parsed_index)
コード例 #28
0
 def test_build_ex_02(self):
     """Slightly more complex Resource List document """
     rl = ResourceList()
     rl.md_at = '2013-01-03T09:00:00Z'
     rl.add( Resource(uri='http://example.com/res1',
                      lastmod='2013-01-02T13:00:00Z',
                      md5='1584abdf8ebdc9802ac0c6a7402c03b6') )
     r2 =  Resource(uri='http://example.com/res2',
                    lastmod='2013-01-02T14:00:00Z',
                    md5='1e0d5cb8ef6ba40c99b14c0237be735e')
     r2.link_set(rel="duplicate",href="http://mirror.example.com/res2")
     rl.add( r2 )
     ex_xml = self._open_ex('resourcesync_ex_2').read()
     self._assert_xml_equal( rl.as_xml(), ex_xml )
コード例 #29
0
 def test_build_ex_08(self):
     """Simple Resource List Index document
     
     This is not something that would usually be created directly 
     but instead would be created as part of the process of 
     writing a large Resource List in multiple files. However,
     it is possible to create manually.
     """
     rli = ResourceList()
     rli.sitemapindex=True
     rli.md_at = '2013-01-03T09:00:00Z'
     rli.add( Resource(uri='http://example.com/resourcelist-part1.xml') )
     rli.add( Resource(uri='http://example.com/resourcelist-part2.xml') )
     ex_xml = self._open_ex('resourcesync_ex_8').read()
     self._assert_xml_equal( rli.as_xml(), ex_xml )
コード例 #30
0
 def test02_changed(self):
     src = ResourceList()
     src.add(Resource('a', timestamp=1))
     src.add(Resource('b', timestamp=2))
     dst = ResourceList()
     dst.add(Resource('a', timestamp=3))
     dst.add(Resource('b', timestamp=4))
     (same, changed, deleted, added) = dst.compare(src)
     self.assertEqual(len(same), 0, "0 things unchanged")
     self.assertEqual(len(changed), 2, "2 things changed")
     i = iter(changed)
     self.assertEqual(next(i).uri, 'a', "first was a")
     self.assertEqual(next(i).uri, 'b', "second was b")
     self.assertEqual(len(deleted), 0, "nothing deleted")
     self.assertEqual(len(added), 0, "nothing added")
コード例 #31
0
 def test02_changed(self):
     src = ResourceList()
     src.add( Resource('a',timestamp=1) )
     src.add( Resource('b',timestamp=2) )
     dst = ResourceList()
     dst.add( Resource('a',timestamp=3) )
     dst.add( Resource('b',timestamp=4) )
     ( same, changed, deleted, added ) = dst.compare(src)
     self.assertEqual( len(same), 0, "0 things unchanged" )
     self.assertEqual( len(changed), 2, "2 things changed" )
     i = iter(changed)
     self.assertEqual( next(i).uri, 'a', "first was a" )
     self.assertEqual( next(i).uri, 'b', "second was b" )
     self.assertEqual( len(deleted), 0, "nothing deleted" )
     self.assertEqual( len(added), 0, "nothing added" )
コード例 #32
0
 def test_build_ex_16(self):
     rl = ResourceList()
     rl.up = 'http://example.com/dataset1/capabilitylist.xml'
     rl.index = 'http://example.com/dataset1/resourcelist-index.xml'
     rl.md_at="2013-01-03T09:00:00Z"
     rl.add( Resource( uri='http://example.com/res3',
                       lastmod='2013-01-02T13:00:00Z',
                       md5='1584abdf8ebdc9802ac0c6a7402c8753',
                       length=4385,
                       mime_type="application/pdf" ))
     rl.add( Resource( uri='http://example.com/res4',
                       lastmod='2013-01-02T14:00:00Z',
                       md5='4556abdf8ebdc9802ac0c6a7402c9881',
                       length=883,
                       mime_type="image/png" ))
     ex_xml = self._open_ex('resourcesync_ex_16').read()
     self._assert_xml_equal( rl.as_xml(), ex_xml )
コード例 #33
0
 def test03_dump_multi_file_max_size(self):
     rl = ResourceList()
     for letter in map(chr, range(ord('a'), ord('l') + 1)):
         uri = 'http://ex.org/%s' % (letter)
         fname = 'tests/testdata/a_to_z/%s' % (letter)
         rl.add(Resource(uri, path=fname))
     self.assertEqual(len(rl), 12)
     #d=Dump(rl)
     #tmpdir=tempfile.mkdtemp()
     #tmpbase=os.path.join(tmpdir,'base')
     #d.max_size=2000 # start new zip after size exceeds 2000 bytes
     #n=d.write(tmpbase)
     #self.assertEqual( n, 2, 'expect to write 2 dump files' )
     #
     # Now repeat with large size limit but small number of files limit
     d2 = Dump(rl)
     tmpbase = os.path.join(self.tmpdir, 'test03_')
     d2.max_files = 4
     n = d2.write(tmpbase)
     self.assertEqual(n, 3, 'expect to write 3 dump files')
     self.assertTrue(os.path.isfile(tmpbase + '00000.zip'))
     self.assertTrue(os.path.isfile(tmpbase + '00001.zip'))
     self.assertTrue(os.path.isfile(tmpbase + '00002.zip'))
     # Look at the first file in detail
     zipf = tmpbase + '00000.zip'
     zo = zipfile.ZipFile(zipf, 'r')
     self.assertEqual(zo.namelist(), ['manifest.xml', 'a', 'b', 'c', 'd'])
     #self.assertEqual( zo.getinfo('manifest.xml').file_size, 470 )
     self.assertEqual(zo.getinfo('a').file_size, 9)
     self.assertEqual(zo.getinfo('b').file_size, 1116)
     self.assertEqual(zo.getinfo('c').file_size, 32)
     self.assertEqual(zo.getinfo('d').file_size, 13)
     zo.close()
     os.unlink(zipf)
     # Check second and third files have expected contents
     zipf = tmpbase + '00001.zip'
     zo = zipfile.ZipFile(zipf, 'r')
     self.assertEqual(zo.namelist(), ['manifest.xml', 'e', 'f', 'g', 'h'])
     zo.close()
     os.unlink(zipf)
     zipf = tmpbase + '00002.zip'
     zo = zipfile.ZipFile(zipf, 'r')
     self.assertEqual(zo.namelist(), ['manifest.xml', 'i', 'j', 'k', 'l'])
     zo.close()
     os.unlink(zipf)
コード例 #34
0
ファイル: test_dump.py プロジェクト: uweschmitt/resync
 def test03_dump_multi_file_max_size(self):
     rl=ResourceList()
     for letter in map(chr,xrange(ord('a'),ord('l')+1)):
         uri='http://ex.org/%s' % (letter)
         fname='resync/test/testdata/a_to_z/%s' % (letter)
         rl.add( Resource(uri, path=fname) )
     self.assertEqual( len(rl), 12 )
     #d=Dump(rl) 
     #tmpdir=tempfile.mkdtemp()
     #tmpbase=os.path.join(tmpdir,'base')
     #d.max_size=2000 # start new zip after size exceeds 2000 bytes
     #n=d.write(tmpbase)
     #self.assertEqual( n, 2, 'expect to write 2 dump files' )
     # 
     # Now repeat with large size limit but small number of files limit
     d2=Dump(rl) 
     tmpbase=os.path.join(self.tmpdir,'test03_')
     d2.max_files=4
     n=d2.write(tmpbase)
     self.assertEqual( n, 3, 'expect to write 3 dump files' )
     self.assertTrue( os.path.isfile(tmpbase+'00000.zip') )
     self.assertTrue( os.path.isfile(tmpbase+'00001.zip') )
     self.assertTrue( os.path.isfile(tmpbase+'00002.zip') )
     # Look at the first file in detail
     zipf=tmpbase+'00000.zip'
     zo=zipfile.ZipFile(zipf,'r')
     self.assertEqual( zo.namelist(), ['manifest.xml','a','b','c','d'] )
     #self.assertEqual( zo.getinfo('manifest.xml').file_size, 470 )
     self.assertEqual( zo.getinfo('a').file_size, 9 )
     self.assertEqual( zo.getinfo('b').file_size, 1116 )
     self.assertEqual( zo.getinfo('c').file_size, 32 )
     self.assertEqual( zo.getinfo('d').file_size, 13 )
     zo.close()
     os.unlink(zipf)
     # Check second and third files have expected contents
     zipf=tmpbase+'00001.zip'
     zo=zipfile.ZipFile(zipf,'r')
     self.assertEqual( zo.namelist(), ['manifest.xml','e','f','g','h'] )
     zo.close()
     os.unlink(zipf)
     zipf=tmpbase+'00002.zip'
     zo=zipfile.ZipFile(zipf,'r')
     self.assertEqual( zo.namelist(), ['manifest.xml','i','j','k','l'] )
     zo.close()
     os.unlink(zipf)
コード例 #35
0
 def test_build_ex_14(self):
     """Resource List with 2 entries and some metadata"""
     rl = ResourceList()
     rl.up='http://example.com/dataset1/capabilitylist.xml'
     rl.md_at="2013-01-03T09:00:00Z"
     rl.md_completed="2013-01-03T09:01:00Z"
     rl.add( Resource( uri='http://example.com/res1',
                       lastmod='2013-01-02T13:00:00Z',
                       md5='1584abdf8ebdc9802ac0c6a7402c03b6',
                       length=8876,
                       mime_type="text/html" ))
     rl.add( Resource( uri='http://example.com/res2',
                       lastmod='2013-01-02T14:00:00Z',
                       md5='1e0d5cb8ef6ba40c99b14c0237be735e',
                       sha256='854f61290e2e197a11bc91063afce22e43f8ccc655237050ace766adc68dc784',
                       length=14599,
                       mime_type="application/pdf" ))
     ex_xml = self._open_ex('resourcesync_ex_14').read()
     self._assert_xml_equal( rl.as_xml(), ex_xml )
コード例 #36
0
 def test06_add_changed_resources(self):
     added = ResourceList()
     added.add( Resource('a',timestamp=1,change='created') )
     added.add( Resource('d',timestamp=4,change='created') )
     self.assertEqual(len(added), 2, "2 things in added resource_list")
     changes = ChangeList()
     changes.add_changed_resources( added, change='created' )
     self.assertEqual(len(changes), 2, "2 things added")
     i = iter(changes)
     first = next(i)
     self.assertEqual(first.uri, 'a', "changes[0].uri=a")
     self.assertEqual(first.timestamp, 1, "changes[0].timestamp=1")
     self.assertEqual(first.change, 'created') #, "changes[0].change=createdd")
     second = next(i)
     self.assertEqual(second.timestamp, 4, "changes[1].timestamp=4")
     self.assertEqual(second.change, 'created', "changes[1].change=createdd")
     # Now add some with updated (one same, one diff)
     updated = ResourceList()
     updated.add( Resource('a',timestamp=5,change='created') )
     updated.add( Resource('b',timestamp=6,change='created') )
     self.assertEqual(len(updated), 2, "2 things in updated resource_list")
     changes.add_changed_resources( updated, change='updated' )
     self.assertEqual(len(changes), 4, "4 = 2 old + 2 things updated")
     # Make new resource_list from the changes which should not have dupes
     dst = ResourceList()
     dst.add( changes, replace=True )
     self.assertEqual(len(dst), 3, "3 unique resources")
     self.assertEqual(dst.resources['a'].timestamp, 5 ) # 5 was later in last the 1
     self.assertEqual(dst.resources['a'].change, 'updated')
     self.assertEqual(dst.resources['b'].timestamp, 6)
     self.assertEqual(dst.resources['b'].change, 'updated')
     self.assertEqual(dst.resources['d'].timestamp, 4)
     self.assertEqual(dst.resources['d'].change, 'created')
コード例 #37
0
    def test33_write(self):
        # ResourceList
        rl = ResourceList()

        rl.add(Resource(uri='http://example.com/test/a', timestamp=1))
        rl.add(Resource(uri='http://example.com/test/b', timestamp=1))
        rl.add(Resource(uri='http://example.com/test/c', timestamp=1))

        rl_filename = os.path.join(self.tmpdir,
                                   'test33_write_resourcelist.xml')
        rl.write(basename=rl_filename)

        with open(rl_filename, 'r') as f:
            s = Sitemap()
            s.parse_xml(fh=f)
            self.assertFalse(s.parsed_index)

        # ResourceListIndex
        rli = ResourceList()

        rli.add(
            Resource(uri='http://example.com/test/resourcelist00000.xml',
                     timestamp=1))
        rli.add(
            Resource(uri='http://example.com/test/resourcelist00001.xml',
                     timestamp=1))
        rli.add(
            Resource(uri='http://example.com/test/resourcelist00002.xml',
                     timestamp=1))
        rli.sitemapindex = True

        rli_filename = os.path.join(self.tmpdir,
                                    'test33_write_resourcelist-index.xml')
        rli.write(basename=rli_filename)

        with open(rli_filename, 'r') as f:
            s = Sitemap()
            s.parse_xml(fh=f)
            self.assertTrue(s.parsed_index)
コード例 #38
0
 def test_build_ex_15(self):
     """Resource List Index with metadata"""
     rl = ResourceList(resources_class=ResourceListOrdered) #order in example is non-canonical
     rl.sitemapindex=True
     rl.up='http://example.com/dataset1/capabilitylist.xml'
     rl.md_at="2013-01-03T09:00:00Z"
     rl.md_completed="2013-01-03T09:10:00Z"
     rl.add( Resource( uri='http://example.com/resourcelist1.xml',
                       md_at='2013-01-03T09:00:00Z' ))
     rl.add( Resource( uri='http://example.com/resourcelist2.xml',
                       md_at='2013-01-03T09:03:00Z' ))
     rl.add( Resource( uri='http://example.com/resourcelist3.xml',
                       md_at='2013-01-03T09:07:00Z' ))
     ex_xml = self._open_ex('resourcesync_ex_15').read()
     self._assert_xml_equal( rl.as_xml(), ex_xml )
コード例 #39
0
 def test03_deleted(self):
     src = ResourceList()
     src.add( Resource('a',timestamp=1) )
     src.add( Resource('b',timestamp=2) )
     dst = ResourceList()
     dst.add( Resource('a',timestamp=1) )
     dst.add( Resource('b',timestamp=2) )
     dst.add( Resource('c',timestamp=3) )
     dst.add( Resource('d',timestamp=4) )
     ( same, changed, deleted, added ) = dst.compare(src)
     self.assertEqual( len(same), 2, "2 things unchanged" )
     self.assertEqual( len(changed), 0, "nothing changed" )
     self.assertEqual( len(deleted), 2, "c and d deleted" )
     i = iter(deleted)
     self.assertEqual( i.next().uri, 'c', "first was c" )
     self.assertEqual( i.next().uri, 'd', "second was d" )
     self.assertEqual( len(added), 0, "nothing added" )
コード例 #40
0
ファイル: 01_make_resourcelist.py プロジェクト: EHRI/resync
from resync.resource_list import ResourceList
from resync.resource import Resource
from resync.sitemap import Sitemap

rl = ResourceList()
rl.add( Resource('http://example.com/res1', lastmod='2013-01-01') )
rl.add( Resource('http://example.com/res2', lastmod='2013-01-02') )
print rl.as_xml(pretty_xml=True)
コード例 #41
0
rl = ResourceList()
timestamps = []
for filename in listdir(args.resource_dir):
	if filename[:len("rdfpatch-")] != "rdfpatch-":
		continue
	_, raw_ts = filename.split("-")
	ts = (
		raw_ts[:4] + "-" +
		raw_ts[4:6] + "-" +
		raw_ts[6:8] + "T" +
		raw_ts[8:10] + ":" +
		raw_ts[10:12] + ":" +
		raw_ts[12:14] + "Z"
	)
	timestamps.append(ts)
	rl.add(Resource(args.resource_url + filename, lastmod=ts))

# Print to file at args.resource_dir + "/resource-list.xml"
resource_list_file = open(args.resource_dir + "/resource-list.xml", "w")
resource_list_file.write(rl.as_xml())
resource_list_file.close()
print "Wrote resource list to: " + args.resource_dir + "/resource-list.xml"

timestamps.sort()

caps = CapabilityList()
caps.add_capability(rl, args.resource_url + "resource-list.xml")
if len(timestamps) > 0:
	caps.md['from'] = timestamps[0]

# Print to file at args.resource_dir + "/capability-list.xml"
コード例 #42
0
from resync.resource_list import ResourceList
from resync.resource import Resource
from resync.sitemap import Sitemap

rl = ResourceList()
rl.add(Resource('http://example.com/res1', lastmod='2013-01-01'))
rl.add(Resource('http://example.com/res2', lastmod='2013-01-02'))
print rl.as_xml(pretty_xml=True)
コード例 #43
0
if args.resource_url[-1] != '/':
    args.resource_url += '/'

if not isdir(args.resource_dir):
    raise IOError(args.resource_dir + " is not a directory")

rl = ResourceList()
timestamps = []
for filename in listdir(args.resource_dir):
    if filename[:len("rdfpatch-")] != "rdfpatch-":
        continue
    _, raw_ts = filename.split("-")
    ts = (raw_ts[:4] + "-" + raw_ts[4:6] + "-" + raw_ts[6:8] + "T" +
          raw_ts[8:10] + ":" + raw_ts[10:12] + ":" + raw_ts[12:14] + "Z")
    timestamps.append(ts)
    rl.add(Resource(args.resource_url + filename, lastmod=ts))

# Print to file at args.resource_dir + "/resource-list.xml"
resource_list_file = open(args.resource_dir + "/resource-list.xml", "w")
resource_list_file.write(rl.as_xml())
resource_list_file.close()
print "Wrote resource list to: " + args.resource_dir + "/resource-list.xml"

timestamps.sort()

caps = CapabilityList()
caps.add_capability(rl, args.resource_url + "resource-list.xml")
if len(timestamps) > 0:
    caps.md['from'] = timestamps[0]

# Print to file at args.resource_dir + "/capability-list.xml"
コード例 #44
0
ファイル: test_dump.py プロジェクト: uweschmitt/resync
 def test11_bad_size(self):
     rl=ResourceList()
     rl.add( Resource('http://ex.org/a', length=9999, path='resync/test/testdata/a') )
     d=Dump(rl)
     self.assertTrue( d.check_files(check_length=False) )
     self.assertRaises( DumpError, d.check_files )
コード例 #45
0
ファイル: test_dump.py プロジェクト: uweschmitt/resync
 def test10_no_path(self):
     rl=ResourceList()
     rl.add( Resource('http://ex.org/a', length=7, path='resync/test/testdata/a') )
     rl.add( Resource('http://ex.org/b', length=21 ) )
     d=Dump(rl)
     self.assertRaises( DumpError, d.check_files )