Beispiel #1
0
 def test_build_ex_24(self):
     cl = ChangeList()
     cl.up = "http://example.com/dataset1/capabilitylist.xml"
     cl.md_from = "2013-01-03T00:00:00Z"
     c1 = Resource(uri="http://example.com/res1",
                   lastmod="2013-01-03T18:00:00Z",
                   change="updated",
                   md5="1584abdf8ebdc9802ac0c6a7402c03b6",
                   length=8876,
                   mime_type="text/html")
     # Resource.link_set with add or change link depending on one with 
     # the particular rel exists unless allow_duplicates=True. 
     # Resource.link_add will always add. Test both here...
     c1.link_set(rel="duplicate",
                 href="http://mirror1.example.com/res1",
                 pri="1",
                 modified="2013-01-03T18:00:00Z")
     c1.link_set(rel="duplicate",
                 href="http://mirror2.example.com/res1",
                 pri="2",
                 modified="2013-01-03T18:00:00Z",
                 allow_duplicates=True)
     c1.link_add(rel="duplicate",
                 href="gsiftp://gridftp.example.com/res1",
                 pri="3",
                 modified="2013-01-03T18:00:00Z")
     cl.add( c1 )
     self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_24' )
Beispiel #2
0
 def test_build_ex_27(self):
     cl = ChangeList()
     cl.up = "http://example.com/dataset1/capabilitylist.xml"
     cl.md_from = "2013-01-03T00:00:00Z"
     c1 = Resource(uri="http://example.com/res4",
                   lastmod="2013-01-03T17:00:00Z",
                   change="updated",
                   sha256="f4OxZX_x_DFGFDgghgdfb6rtSx-iosjf6735432nklj",
                   length=56778,
                   mime_type="application/json" )
     c1.link_set(rel="http://www.openarchives.org/rs/terms/patch",
                 href="http://example.com/res4-json-patch",
                 modified="2013-01-03T17:00:00Z",
                 hash="sha-256:y66dER_t_HWEIKpesdkeb7rtSc-ippjf9823742opld", #FIXME - inconsistent
                 length=73,
                 type="application/json-patch")
     c2 = Resource(uri="http://example.com/res5-full.tiff",
                   lastmod="2013-01-03T18:00:00Z",
                   change="updated",
                   sha256="f4OxZX_x_FO5LcGBSKHWXfwtSx-j1ncoSt3SABJtkGk",
                   length="9788456778",
                   mime_type="image/tiff")
     c2.link_set(rel="http://www.openarchives.org/rs/terms/patch",
                 href="http://example.com/res5-diff",
                 modified="2013-01-03T18:00:00Z",
                 hash="sha-256:h986gT_t_87HTkjHYE76G558hY-jdfgy76t55sadJUYT",
                 length=4533,
                 type="application/x-tiff-diff" )
     cl.add( [c1,c2] )
     self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_27' )
Beispiel #3
0
 def test_build_ex_28(self):
     cl = ChangeList()
     cl.up = "http://example.com/dataset1/capabilitylist.xml"
     cl.md_from = "2013-01-03T00:00:00Z"
     c1 = Resource(uri="http://example.com/res2.pdf",
                   lastmod="2013-01-03T18:00:00Z",
                   change="updated",
                   md5="1584abdf8ebdc9802ac0c6a7402c03b6",
                   length=8876,
                   mime_type="application/pdf" )
     c1.link_set(rel="describedby",
                 href="http://example.com/res2_dublin-core_metadata.xml",
                 modified="2013-01-01T12:00:00Z",
                 type="application/xml")
     c2 = Resource(uri="http://example.com/res2_dublin-core_metadata.xml",
                   lastmod="2013-01-03T19:00:00Z",
                   change="updated",
                   mime_type="application/xml")
     c2.link_set(rel="describes",
                 href="http://example.com/res2.pdf",
                 modified="2013-01-03T18:00:00Z",
                 hash="md5:1584abdf8ebdc9802ac0c6a7402c03b6",
                 length="8876",
                 type="application/pdf")
     c2.link_set(rel="profile",
                 href="http://purl.org/dc/elements/1.1/")
     cl.add( [c1,c2] )
     self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_28' )
Beispiel #4
0
 def test02_dump_check_files(self):
     cl=ChangeList()
     cl.add( Resource('http://ex.org/a', length=7, path='resync/test/testdata/a', change="updated") )
     cl.add( Resource('http://ex.org/b', length=21, path='resync/test/testdata/b', change="updated") )
     d=Dump(resources=cl)
     self.assertTrue(d.check_files())
     self.assertEqual(d.total_size, 28)
 def test07_as_xml(self):
     cl = ChangeList()
     cl.md_from = '1970-01-01T00:00:00Z'
     cl.add( Resource('a',timestamp=1,change='updated') )
     cl.add( Resource('b',timestamp=2,change='updated') )
     xml = cl.as_xml()
     self.assertTrue( re.search(r'<rs:md .*capability="changelist"', xml), 'XML has capability' )
     self.assertTrue( re.search(r'<rs:md .*from="\d\d\d\d\-\d\d\-\d\dT\d\d:\d\d:\d\dZ"', xml), 'XML has from to seconds precision (and not more)' )
     self.assertTrue( re.search(r'<url><loc>a</loc><lastmod>1970-01-01T00:00:01Z</lastmod>', xml), 'XML has resource a' ) 
Beispiel #6
0
 def test02_dump_check_files(self):
     cl = ChangeList()
     cl.add(Resource('http://ex.org/a', length=7,
                     path='tests/testdata/a', change="updated"))
     cl.add(Resource('http://ex.org/b', length=21,
                     path='tests/testdata/b', change="updated"))
     d = Dump(resources=cl)
     self.assertTrue(d.check_files())
     self.assertEqual(d.total_size, 28)
 def test03_with_repeats_again(self):
     r1 = Resource(uri='a',length=1,change='created')
     r2 = Resource(uri='b',length=2,change='created')
     i = ChangeList()
     i.add(r1)
     i.add(r2)
     self.assertEqual( len(i), 2 )
     # Can add another Resource with same URI
     r1d = Resource(uri='a',length=10,change='created')
     i.add(r1d)
     self.assertEqual( len(i), 3 )
Beispiel #8
0
 def test_build_ex_31(self):
     cl = ChangeList()
     cl.up = "http://example.com/dataset1/capabilitylist.xml"
     cl.md_from = "2013-01-03T00:00:00Z"
     c1 = Resource(uri="http://original.example.com/res1.html",
                   lastmod="2013-01-03T07:00:00Z",
                   change="updated",
                   md5="1584abdf8ebdc9802ac0c6a7402c03b6",
                   length=8876,
                   mime_type="text/html" )
     cl.add( c1 )
     self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_31' )
Beispiel #9
0
 def test4_iter(self):
     i = ChangeList()
     i.add(Resource("a", timestamp=1, change="created"))
     i.add(Resource("b", timestamp=2, change="created"))
     i.add(Resource("c", timestamp=3, change="created"))
     i.add(Resource("d", timestamp=4, change="created"))
     resources = []
     for r in i:
         resources.append(r)
     self.assertEqual(len(resources), 4)
     self.assertEqual(resources[0].uri, "a")
     self.assertEqual(resources[3].uri, "d")
 def test4_iter(self):
     i = ChangeList()
     i.add( Resource('a',timestamp=1) )
     i.add( Resource('b',timestamp=2) )
     i.add( Resource('c',timestamp=3) )
     i.add( Resource('d',timestamp=4) )
     resources=[]
     for r in i:
         resources.append(r)
     self.assertEqual(len(resources), 4)
     self.assertEqual( resources[0].uri, 'a')
     self.assertEqual( resources[3].uri, 'd')
 def test05_iter(self):
     i = ChangeList()
     i.add( Resource('a',timestamp=1,change='created') )
     i.add( Resource('b',timestamp=2,change='created') )
     i.add( Resource('c',timestamp=3,change='created') )
     i.add( Resource('d',timestamp=4,change='created') )
     resources=[]
     for r in i:
         resources.append(r)
     self.assertEqual(len(resources), 4)
     self.assertEqual( resources[0].uri, 'a')
     self.assertEqual( resources[3].uri, 'd')
Beispiel #12
0
 def test_build_ex_03(self):
     """Simple Change List document """
     cl = ChangeList()
     cl.md_from = '2013-01-02T00:00:00Z'
     cl.md_until= '2013-01-03T00:00:00Z'
     cl.add( Resource(uri='http://example.com/res2.pdf',
                      lastmod='2013-01-02T13:00:00Z',
                      change="updated") )
     cl.add( Resource(uri='http://example.com/res3.tiff',
                      lastmod='2013-01-02T18:00:00Z',
                      change='deleted') )
     ex_xml = self._open_ex('resourcesync_ex_3').read()
     self._assert_xml_equal( cl.as_xml(), ex_xml )
 def test_build_ex_21(self):
     """Change List which points back to index"""
     cl = ChangeList()
     cl.up = 'http://example.com/dataset1/capabilitylist.xml'
     cl.index = 'http://example.com/dataset1/changelist.xml'
     cl.md_from = "2013-01-02T00:00:00Z"
     cl.md_until = "2013-01-03T00:00:00Z"
     cl.add(
         Resource(uri='http://example.com/res7.html',
                  lastmod='2013-01-02T12:00:00Z',
                  change='created'))
     cl.add(
         Resource(uri='http://example.com/res9.pdf',
                  lastmod='2013-01-02T13:00:00Z',
                  change='updated'))
     cl.add(
         Resource(uri='http://example.com/res5.tiff',
                  lastmod='2013-01-02T19:00:00Z',
                  change='deleted'))
     cl.add(
         Resource(uri='http://example.com/res7.html',
                  lastmod='2013-01-02T20:00:00Z',
                  change='updated'))
     ex_xml = self._open_ex('resourcesync_ex_21').read()
     self._assert_xml_equal(cl.as_xml(), ex_xml)
Beispiel #14
0
 def test20_as_xml(self):
     cl = ChangeList()
     cl.md_from = "1970-01-01T00:00:00Z"
     cl.add(Resource("a", timestamp=1, change="updated"))
     cl.add(Resource("b", timestamp=2, change="updated"))
     xml = cl.as_xml()
     self.assertTrue(re.search(r'<rs:md .*capability="changelist"', xml), "XML has capability")
     self.assertTrue(
         re.search(r'<rs:md .*from="\d\d\d\d\-\d\d\-\d\dT\d\d:\d\d:\d\dZ"', xml),
         "XML has from to seconds precision (and not more)",
     )
     self.assertTrue(
         re.search(r"<url><loc>a</loc><lastmod>1970-01-01T00:00:01Z</lastmod>", xml), "XML has resource a"
     )
Beispiel #15
0
 def test_build_ex_26(self):
     cl = ChangeList()
     cl.up = "http://example.com/dataset1/capabilitylist.xml"
     cl.md_from = "2013-01-03T00:00:00Z"
     c1 = Resource(uri="http://example.com/res1.html",
                   lastmod="2013-01-03T18:00:00Z",
                   change="updated",
                   md5="1584abdf8ebdc9802ac0c6a7402c03b6",
                   length=8876 )
     c1.link_add(rel="canonical",
                 href="http://example.com/res1",
                 modified="2013-01-03T18:00:00Z")
     cl.add( c1 )
     self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_26' )
Beispiel #16
0
 def test_build_ex_30(self):
     cl = ChangeList()
     cl.up = "http://example.com/dataset1/capabilitylist.xml"
     cl.md_from = "2013-01-03T00:00:00Z"
     c1 = Resource(uri="http://example.com/res1",
                   lastmod="2013-01-03T07:00:00Z",
                   change="updated",
                   md5="1584abdf8ebdc9802ac0c6a7402c03b6",
                   length=8876,
                   mime_type="text/html" )
     c1.link_add(rel="collection",
                 href="http://example.com/aggregation/0601007")
     cl.add( c1 )
     self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_30' )
 def test3_change_list(self):
     src = ChangeList()
     src.add( Resource('a',timestamp=1) )
     src.add( Resource('b',timestamp=2) )
     src.add( Resource('c',timestamp=3) )
     src.add( Resource('d',timestamp=4)) 
     src.add( Resource('e',timestamp=5) )
     self.assertEqual(len(src), 5, "5 things in src")
Beispiel #18
0
 def test_build_ex_25(self):
     cl = ChangeList()
     cl.up = "http://example.com/dataset1/capabilitylist.xml"
     cl.md_from = "2013-01-03T11:00:00Z"
     c1 = Resource(uri="http://example.com/res1",
                   lastmod="2013-01-03T18:00:00Z",
                   change="updated")
     c1.link_add(rel="alternate",
                 href="http://example.com/res1.html",
                 modified="2013-01-03T18:00:00Z",
                 type="text/html") #FIXME - inconsistent
     c1.link_add(rel="alternate",
                 href="http://example.com/res1.pdf",
                 modified="2013-01-03T18:00:00Z",
                 type="application/pdf")
     cl.add( c1 )
     self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_25' )
Beispiel #19
0
 def test_build_ex_33(self):
     cl = ChangeList()
     cl.up = "http://aggregator2.example.com/dataset1/capabilitylist.xml"
     cl.md_from = "2013-01-03T12:00:00Z"
     c1 = Resource(uri="http://aggregator2.example.com/res1.html",
                   lastmod="2013-01-04T09:00:00Z",
                   change="updated",
                   md5="1584abdf8ebdc9802ac0c6a7402c03b6",
                   length=8876,
                   mime_type="text/html" )
     c1.link_add(rel="via",
                 href="http://original.example.com/res1.html",
                 modified="2013-01-03T07:00:00Z",
                 hash="md5:1584abdf8ebdc9802ac0c6a7402c03b6",
                 length="8876",
                 type="text/html")
     cl.add( c1 )
     self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_33' )
 def test1_set_with_repeats(self):
     src = ChangeList()
     src.add( Resource('a',timestamp=1) )
     src.add( Resource('b',timestamp=1) )
     src.add( Resource('c',timestamp=1) )
     src.add( Resource('a',timestamp=2) )
     src.add( Resource('b',timestamp=2) )
     self.assertEqual(len(src), 5, "5 changes in change_list")
 def test04_change_list(self):
     cl = ChangeList()
     cl.add( Resource('a',timestamp=1,change='created') )
     cl.add( Resource('b',timestamp=2,change='created') )
     cl.add( Resource('c',timestamp=3,change='created') )
     cl.add( Resource('d',timestamp=4,change='created') ) 
     cl.add( Resource('e',timestamp=5,change='created') )
     self.assertEqual(len(cl), 5, "5 things in src")
 def test02_set_with_repeats(self):
     cl = ChangeList()
     cl.add( Resource('a',timestamp=1,change='updated') )
     cl.add( Resource('b',timestamp=1,change='created') )
     cl.add( Resource('c',timestamp=1,change='deleted') )
     cl.add( Resource('a',timestamp=2,change='deleted') )
     cl.add( Resource('b',timestamp=2,change='updated') )
     self.assertEqual(len(cl), 5, "5 changes in change_list")
Beispiel #23
0
 def test1_set_with_repeats(self):
     src = ChangeList()
     src.add(Resource("a", timestamp=1, change="updated"))
     src.add(Resource("b", timestamp=1, change="created"))
     src.add(Resource("c", timestamp=1, change="deleted"))
     src.add(Resource("a", timestamp=2, change="deleted"))
     src.add(Resource("b", timestamp=2, change="updated"))
     self.assertEqual(len(src), 5, "5 changes in change_list")
Beispiel #24
0
 def test3_change_list(self):
     src = ChangeList()
     src.add(Resource("a", timestamp=1, change="created"))
     src.add(Resource("b", timestamp=2, change="created"))
     src.add(Resource("c", timestamp=3, change="created"))
     src.add(Resource("d", timestamp=4, change="created"))
     src.add(Resource("e", timestamp=5, change="created"))
     self.assertEqual(len(src), 5, "5 things in src")
Beispiel #25
0
 def test_build_ex_29(self):
     cl = ChangeList()
     cl.up = "http://example.com/dataset1/capabilitylist.xml"
     cl.md_from = "2013-01-03T00:00:00Z"
     c1 = Resource(uri="http://example.com/res1",
                   lastmod="2013-01-03T18:00:00Z",
                   change="updated",
                   md5="1584abdf8ebdc9802ac0c6a7402c03b6",
                   length=8876,
                   mime_type="text/html" )
     c1.link_add(rel="memento",
                 href="http://example.com/20130103070000/res1",
                 modified="2013-01-02T18:00:00Z",
                 hash="md5:1584abdf8ebdc9802ac0c6a7402c03b6",
                 length="8876",
                 type="text/html")
     c1.link_add(rel="timegate",
                 href="http://example.com/timegate/http://example.com/res1")
     c1.link_add(rel="timemap",
                 href="http://example.com/timemap/http://example.com/res1",
                 type="application/link-format")
     cl.add( c1 )
     self._assert_xml_equal_ex( cl.as_xml(), 'resourcesync_ex_29' )
Beispiel #26
0
 def test_build_ex_19(self):
     """Change List with 4 changes, 'open' as no until"""
     cl = ChangeList()
     cl.up = 'http://example.com/dataset1/capabilitylist.xml'
     cl.md_from="2013-01-03T00:00:00Z"
     cl.add( Resource( uri='http://example.com/res1.html',
                       lastmod='2013-01-03T11:00:00Z',
                       change='created' ) )
     cl.add( Resource( uri='http://example.com/res2.pdf',
                       lastmod='2013-01-03T13:00:00Z',
                       change='updated' ) )
     cl.add( Resource( uri='http://example.com/res3.tiff',
                       lastmod='2013-01-03T18:00:00Z',
                       change='deleted' ) )
     cl.add( Resource( uri='http://example.com/res2.pdf',
                       lastmod='2013-01-03T21:00:00Z',
                       change='updated' ) )
     ex_xml = self._open_ex('resourcesync_ex_19').read()
     self._assert_xml_equal( cl.as_xml(), ex_xml )
Beispiel #27
0
 def test_build_ex_21(self):
     """Change List which points back to index"""
     cl = ChangeList()
     cl.up = 'http://example.com/dataset1/capabilitylist.xml'
     cl.index = 'http://example.com/dataset1/changelist.xml'
     cl.md_from="2013-01-02T00:00:00Z"
     cl.md_until="2013-01-03T00:00:00Z"
     cl.add( Resource( uri='http://example.com/res7.html',
                       lastmod='2013-01-02T12:00:00Z',
                       change='created' ) )
     cl.add( Resource( uri='http://example.com/res9.pdf',
                       lastmod='2013-01-02T13:00:00Z',
                       change='updated' ) )
     cl.add( Resource( uri='http://example.com/res5.tiff',
                       lastmod='2013-01-02T19:00:00Z',
                       change='deleted' ) )
     cl.add( Resource( uri='http://example.com/res7.html',
                       lastmod='2013-01-02T20:00:00Z',
                       change='updated' ) )
     ex_xml = self._open_ex('resourcesync_ex_21').read()
     self._assert_xml_equal( cl.as_xml(), ex_xml )
Beispiel #28
0
    def get_change_list_content_xml(self,
                                    from_date,
                                    from_date_args=None,
                                    to_date_args=None):
        """
        Get change list xml.

        :return: Updated Change List info
        """
        if not self._validation():
            return None

        from .utils import parse_date
        if from_date_args:
            from_date_args = parse_date(from_date_args)
        if to_date_args:
            to_date_args = parse_date(to_date_args)

        change_list = ChangeList()
        change_list.up = INVENIO_CAPABILITY_URL.format(request.url_root)
        change_list.index = '{}resync/{}/changelist.xml'.format(
            request.url_root,
            self.repository_id,
        )

        record_changes = self._get_record_changes_with_interval(from_date)

        for data in record_changes:
            try:
                if from_date_args and from_date_args > parse_date(
                        data.get("updated")):
                    continue
                if to_date_args and to_date_args < parse_date(
                        data.get("updated")):
                    continue
                pid_object = PersistentIdentifier.get('recid',
                                                      data.get('record_id'))
                latest_pid = PIDVersioning(child=pid_object).last_child
                is_latest = str(latest_pid.pid_value) == "{}.{}".format(
                    data.get('record_id'), data.get('record_version'))
                if not is_latest and data.get('status') != 'deleted':
                    loc = '{}resync/{}/records/{}'.format(
                        request.url_root, self.repository_id,
                        '{}.{}'.format(data.get('record_id'),
                                       data.get('record_version')))
                else:
                    loc = '{}resync/{}/records/{}'.format(
                        request.url_root, self.repository_id,
                        data.get('record_id'))
                rc = Resource(
                    loc,
                    lastmod=data.get("updated"),
                    change=data.get('status'),
                    md_at=data.get("updated"),
                )
                change_list.add(rc)
            except Exception:
                current_app.logger.error('-' * 60)
                traceback.print_exc(file=sys.stdout)
                current_app.logger.error('-' * 60)
                continue

        return change_list.as_xml()
Beispiel #29
0
 def generate(self):
     """Generate a list of changes."""
     changelist = ChangeList()
     for change in self.changes:
         changelist.add(change)
     return changelist
Beispiel #30
0
 def generate(self):
     """Generate a list of changes."""
     changelist = ChangeList()
     for change in self.changes:
         changelist.add(change)
     return changelist