Ejemplo n.º 1
0
 def test2_pretty_output(self):
     ib = InventoryBuilder()
     ib.mapper = Mapper(['http://example.org/t','resync/test/testdata/dir1'])
     i = ib.from_disk()
     s = Sitemap()
     s.pretty_xml=True
     self.assertEqual(s.resources_as_xml(i),'<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/">\n<url><loc>http://example.org/t/file_a</loc><lastmod>2012-07-25T17:13:46Z</lastmod><rs:size>20</rs:size></url>\n<url><loc>http://example.org/t/file_b</loc><lastmod>2001-09-09T01:46:40Z</lastmod><rs:size>45</rs:size></url>\n</urlset>' )
Ejemplo n.º 2
0
 def test2_pretty_output(self):
     ib = InventoryBuilder()
     ib.mapper = Mapper(['http://example.org/t','resync/test/testdata/dir1'])
     i = ib.from_disk()
     s = Sitemap()
     s.pretty_xml=True
     self.assertEqual(s.inventory_as_xml(i),'<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://resourcesync.org/change/0.1">\n<url><loc>http://example.org/t/file_a</loc><lastmod>2012-03-14T17:46:04</lastmod><rs:size>20</rs:size></url>\n<url><loc>http://example.org/t/file_b</loc><lastmod>2012-03-14T17:46:25</lastmod><rs:size>45</rs:size></url>\n</urlset>' )
Ejemplo n.º 3
0
 def test1_simple_output(self):
     ib = InventoryBuilder()
     i = ib.from_disk('resync/test/testdata/dir1', 'http://example.org/t')
     self.assertEqual(
         Sitemap().inventory_as_xml(i),
         '<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://resourcesync.org/change/0.1"><url><loc>http://example.org/t/file_a</loc><lastmod>2012-03-14T17:46:04</lastmod><rs:size>20</rs:size></url><url><loc>http://example.org/t/file_b</loc><lastmod>2012-03-14T17:46:25</lastmod><rs:size>45</rs:size></url></urlset>'
     )
 def test3_with_md5(self):
     ib = InventoryBuilder(do_md5=True)
     i = ib.from_disk('resync/test/testdata/dir1','http://example.org/t')
     s = Sitemap()
     xml = s.inventory_as_xml(i)
     self.assertNotEqual( None, re.search('<loc>http://example.org/t/file_a</loc><lastmod>[\w\:\-]+</lastmod><rs:size>20</rs:size><rs:md5>6bf26fd66601b528d2e0b47eaa87edfd</rs:md5>',xml), 'size/checksum for file_a')
     self.assertNotEqual( None, re.search('<loc>http://example.org/t/file_b</loc><lastmod>[\w\:\-]+</lastmod><rs:size>45</rs:size><rs:md5>452e54bdae1626ac5d6e7be81b39de21</rs:md5>',xml), 'size/checksum for file_b' )
Ejemplo n.º 5
0
 def test3_with_md5(self):
     ib = InventoryBuilder(do_md5=True)
     ib.mapper = Mapper(['http://example.org/t','resync/test/testdata/dir1'])
     i = ib.from_disk()
     s = Sitemap()
     xml = s.resources_as_xml(i)
     self.assertNotEqual( None, re.search('<loc>http://example.org/t/file_a</loc><lastmod>[\w\:\-]+Z</lastmod><rs:size>20</rs:size><rs:fixity type="md5">a/Jv1mYBtSjS4LR\+qoft/Q==</rs:fixity>',xml) ) #must escape + in md5
     self.assertNotEqual( None, re.search('<loc>http://example.org/t/file_b</loc><lastmod>[\w\:\-]+Z</lastmod><rs:size>45</rs:size><rs:fixity type="md5">RS5Uva4WJqxdbnvoGzneIQ==</rs:fixity>',xml) )
Ejemplo n.º 6
0
 def test1_simple_output(self):
     ib = InventoryBuilder(verbose=True)
     ib.mapper = Mapper(
         ['http://example.org/t', 'resync/test/testdata/dir1'])
     i = ib.from_disk()
     self.assertEqual(
         Sitemap().resources_as_xml(i),
         '<?xml version=\'1.0\' encoding=\'UTF-8\'?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:rs="http://www.openarchives.org/rs/terms/"><url><loc>http://example.org/t/file_a</loc><lastmod>2012-07-25T17:13:46Z</lastmod><rs:size>20</rs:size></url><url><loc>http://example.org/t/file_b</loc><lastmod>2001-09-09T01:46:40Z</lastmod><rs:size>45</rs:size></url></urlset>'
     )
Ejemplo n.º 7
0
    def sync_or_audit(self, allow_deletion=False, audit_only=False):
        ### 0. Sanity checks
        if (len(self.mappings)<1):
            raise ClientFatalError("No source to destination mapping specified")
        ### 1. Get inventories from both src and dst 
        # 1.a source inventory
        ib = InventoryBuilder(verbose=self.verbose,mapper=self.mapper)
        try:
            if (self.verbose):
                print "Reading sitemap %s ..." % (self.sitemap)
            src_inventory = ib.get(self.sitemap)
        except IOError as e:
            raise ClientFatalError("Can't read source inventory from %s (%s)" % (self.sitemap,str(e)))
        if (self.verbose):
            print "Read source inventory, %d resources listed" % (len(src_inventory))
        if (len(src_inventory)==0):
            raise ClientFatalError("Aborting as there are no resources to sync")
        if (self.checksum and not src_inventory.has_md5()):
            self.checksum=False
            print "Not calculating checksums on destination as not present in source inventory"
        # 1.b destination inventory mapped back to source URIs
        ib.do_md5=self.checksum
        dst_inventory = ib.from_disk()
        ### 2. Compare these inventorys respecting any comparison options
        (num_same,updated,deleted,created)=dst_inventory.compare(src_inventory)   
        ### 3. Report status and planned actions
        status = "  IN SYNC  "
        if (len(updated)>0 or len(deleted)>0 or len(created)>0):
            status = "NOT IN SYNC"
        print "Status: %s (same=%d, updated=%d, deleted=%d, created=%d)" %\
              (status,num_same,len(updated),len(deleted),len(created))

        if (audit_only):
            return
        ### 4. Grab files to do sync
        for uri in updated:
            file = self.mapper.src_to_dst(uri)
            if (self.verbose):
                print "updated: %s -> %s" % (uri,file)
            self.update_resource(uri,file,src_inventory.resources[uri].timestamp)
        for uri in created:
            file = self.mapper.src_to_dst(uri)
            self.update_resource(uri,file,src_inventory.resources[uri].timestamp)
        for uri in deleted:
            if (allow_deletion):
                file = self.mapper.src_to_dst(uri)
                if (self.dryrun):
                    print "dryrun: would delete %s -> %s" % (uri,file)
                else:
                    os.unlink(file)
                    if (self.verbose):
                        print "deleted: %s -> %s" % (uri,file)
            else:
                if (self.verbose):
                    print "nodelete: would delete %s (--delete to enable)" % uri
Ejemplo n.º 8
0
 def test4_data(self):
     ib = InventoryBuilder(do_md5=True)
     ib.mapper = Mapper(['http://example.org/t','resync/test/testdata/dir1'])
     i = ib.from_disk()
     self.assertEqual( len(i), 2)
     r1 = i.resources.get('http://example.org/t/file_a')
     self.assertTrue( r1 is not None )
     self.assertEqual( r1.uri, 'http://example.org/t/file_a' )
     self.assertEqual( r1.lastmod, '2012-03-14T17:46:04' )
     self.assertEqual( r1.md5, '6bf26fd66601b528d2e0b47eaa87edfd' )
     self.assertEqual( r1.file, 'resync/test/testdata/dir1/file_a' ) 
Ejemplo n.º 9
0
 def test4_data(self):
     ib = InventoryBuilder(do_md5=True)
     ib.mapper = Mapper(['http://example.org/t','resync/test/testdata/dir1'])
     i = ib.from_disk()
     self.assertEqual( len(i), 2)
     r1 = i.resources.get('http://example.org/t/file_a')
     self.assertTrue( r1 is not None )
     self.assertEqual( r1.uri, 'http://example.org/t/file_a' )
     self.assertEqual( r1.lastmod, '2012-07-25T17:13:46Z' )
     self.assertEqual( r1.md5, 'a/Jv1mYBtSjS4LR+qoft/Q==' )
     self.assertEqual( r1.file, 'resync/test/testdata/dir1/file_a' ) 
Ejemplo n.º 10
0
    def sync_or_audit(self, src_uri, dst_path, allow_deletion=False, 
                      audit_only=False):
        ### 1. Get inventorys from both src and dst 
        # 1.a source inventory
        ib = InventoryBuilder()
        try:
            src_inventory = ib.get(src_uri)
        except IOError as e:
            raise ClientFatalError("Can't read source inventory (%s)" % str(e))
        if (self.verbose):
            print "Read src inventory from %s, %d resources listed" % (src_uri,len(src_inventory))
        if (len(src_inventory)==0):
            raise ClientFatalError("Aborting as there are no resources to sync")
        if (self.checksum and not src_inventory.has_md5()):
            self.checksum=False
            print "Not calculating checksums on destination as not present in source inventory"
        # 1.b destination inventory mapped back to source URIs
        segments = src_uri.split('/')
        segments.pop()
        url_prefix='/'.join(segments)
        ib.do_md5=self.checksum
        dst_inventory = ib.from_disk(dst_path,url_prefix)
        ### 2. Compare these inventorys respecting any comparison options
        (num_same,changed,deleted,added)=dst_inventory.compare(src_inventory)   
        ### 3. Report status and planned actions
        status = "  IN SYNC  "
        if (len(changed)>0 or len(deleted)>0 or len(added)>0):
            status = "NOT IN SYNC"
        print "Status: %s (same=%d, changed=%d, deleted=%d, added=%d)" %\
              (status,num_same,len(changed),len(deleted),len(added))

        if (audit_only):
            return
        ### 4. Grab files to do sync
        mapper = Mapper(url_prefix,dst_path)
        for uri in changed:
            file = mapper.src_to_dst(uri)
            if (self.verbose):
                print "changed: %s -> %s" % (uri,file)
            self.update_resource(uri,file,src_inventory.resources[uri].timestamp)
        for uri in added:
            file = mapper.src_to_dst(uri)
            if (self.verbose):
                print "added: %s -> %s" % (uri,file)
            self.update_resource(uri,file,src_inventory.resources[uri].timestamp)
        for uri in deleted:
            if (allow_deletion):
                file = mapper.src_to_dst(uri)
                if (self.verbose):
                    print "deleted: %s -> %s" % (uri,file)
                os.unlink(file)
            else:
                if (self.verbose):
                    print "would delete %s (--delete to enable)" % uri
Ejemplo n.º 11
0
    def inventory(self):
        """Return inventory on disk based on current mappings

        Return inventory. Uses existing self.mapper settings.
        """
        ### 0. Sanity checks
        if (len(self.mappings)<1):
            raise ClientFatalError("No source to destination mapping specified")
        ### 1. Build from disk
        ib = InventoryBuilder(do_md5=self.checksum,verbose=self.verbose,mapper=self.mapper)
        return( ib.from_disk() )
Ejemplo n.º 12
0
    def inventory(self):
        """Return inventory on disk based on current mappings

        Return inventory. Uses existing self.mappings settings.
        """
        ib = InventoryBuilder(do_md5=self.checksum)
        m = Inventory()
        for base_path in sorted(self.mappings.keys()):
            base_uri = self.mappings[base_path]
            m = ib.from_disk(base_path, base_uri, inventory=m)
        return m
Ejemplo n.º 13
0
    def inventory(self):
        """Return inventory on disk based on current mappings

        Return inventory. Uses existing self.mappings settings.
        """
        ib = InventoryBuilder(do_md5=self.checksum)
        m = Inventory()
        for base_path in sorted(self.mappings.keys()):
            base_uri = self.mappings[base_path]
            m=ib.from_disk(base_path,base_uri,inventory=m)
        return m
Ejemplo n.º 14
0
 def test4_data(self):
     ib = InventoryBuilder(do_md5=True)
     ib.mapper = Mapper(
         ['http://example.org/t', 'resync/test/testdata/dir1'])
     i = ib.from_disk()
     self.assertEqual(len(i), 2)
     r1 = i.resources.get('http://example.org/t/file_a')
     self.assertTrue(r1 is not None)
     self.assertEqual(r1.uri, 'http://example.org/t/file_a')
     self.assertEqual(r1.lastmod, '2012-07-25T17:13:46Z')
     self.assertEqual(r1.md5, 'a/Jv1mYBtSjS4LR+qoft/Q==')
     self.assertEqual(r1.file, 'resync/test/testdata/dir1/file_a')
Ejemplo n.º 15
0
 def test4_data(self):
     ib = InventoryBuilder(do_md5=True)
     ib.mapper = Mapper(
         ['http://example.org/t', 'resync/test/testdata/dir1'])
     i = ib.from_disk()
     self.assertEqual(len(i), 2)
     r1 = i.resources.get('http://example.org/t/file_a')
     self.assertTrue(r1 is not None)
     self.assertEqual(r1.uri, 'http://example.org/t/file_a')
     self.assertEqual(r1.lastmod, '2012-03-14T17:46:04')
     self.assertEqual(r1.md5, '6bf26fd66601b528d2e0b47eaa87edfd')
     self.assertEqual(r1.file, 'resync/test/testdata/dir1/file_a')
Ejemplo n.º 16
0
    def inventory(self):
        """Return inventory on disk based on current mappings

        Return inventory. Uses existing self.mapper settings.
        """
        ### 0. Sanity checks
        if len(self.mappings) < 1:
            raise ClientFatalError("No source to destination mapping specified")
        ### 1. Build from disk
        ib = InventoryBuilder(do_md5=self.checksum, mapper=self.mapper)
        ib.add_exclude_files(self.exclude_patterns)
        return ib.from_disk()
Ejemplo n.º 17
0
    def inventory(self):
        """Return inventory on disk based on current mappings

        Return inventory. Uses existing self.mapper settings.
        """
        ### 0. Sanity checks
        if (len(self.mappings) < 1):
            raise ClientFatalError(
                "No source to destination mapping specified")
        ### 1. Build from disk
        ib = InventoryBuilder(do_md5=self.checksum, mapper=self.mapper)
        ib.add_exclude_files(self.exclude_patterns)
        return (ib.from_disk())
Ejemplo n.º 18
0
 def test3_with_md5(self):
     ib = InventoryBuilder(do_md5=True)
     i = ib.from_disk('resync/test/testdata/dir1', 'http://example.org/t')
     s = Sitemap()
     xml = s.inventory_as_xml(i)
     self.assertNotEqual(
         None,
         re.search(
             '<loc>http://example.org/t/file_a</loc><lastmod>[\w\:\-]+</lastmod><rs:size>20</rs:size><rs:md5>6bf26fd66601b528d2e0b47eaa87edfd</rs:md5>',
             xml), 'size/checksum for file_a')
     self.assertNotEqual(
         None,
         re.search(
             '<loc>http://example.org/t/file_b</loc><lastmod>[\w\:\-]+</lastmod><rs:size>45</rs:size><rs:md5>452e54bdae1626ac5d6e7be81b39de21</rs:md5>',
             xml), 'size/checksum for file_b')
Ejemplo n.º 19
0
 def test3_with_md5(self):
     ib = InventoryBuilder(do_md5=True)
     ib.mapper = Mapper(
         ['http://example.org/t', 'resync/test/testdata/dir1'])
     i = ib.from_disk()
     s = Sitemap()
     xml = s.resources_as_xml(i)
     self.assertNotEqual(
         None,
         re.search(
             '<loc>http://example.org/t/file_a</loc><lastmod>[\w\:\-]+Z</lastmod><rs:size>20</rs:size><rs:fixity type="md5">a/Jv1mYBtSjS4LR\+qoft/Q==</rs:fixity>',
             xml))  #must escape + in md5
     self.assertNotEqual(
         None,
         re.search(
             '<loc>http://example.org/t/file_b</loc><lastmod>[\w\:\-]+Z</lastmod><rs:size>45</rs:size><rs:fixity type="md5">RS5Uva4WJqxdbnvoGzneIQ==</rs:fixity>',
             xml))
Ejemplo n.º 20
0
 def incremental(self, allow_deletion=False, changeset_uri=None):
     self.logger.debug("Starting incremental sync")
     ### 0. Sanity checks
     if (len(self.mappings) < 1):
         raise ClientFatalError(
             "No source to destination mapping specified")
     ### 1. Get URI of changeset, from sitemap or explicit
     if (changeset_uri):
         # Translate as necessary using maps
         changeset = self.sitemap_changeset_uri(changeset_uri)
     else:
         # Get sitemap
         try:
             self.logger.info("Reading sitemap %s" % (self.sitemap))
             src_sitemap = Sitemap(allow_multifile=self.allow_multifile,
                                   mapper=self.mapper)
             src_inventory = src_sitemap.read(uri=self.sitemap,
                                              index_only=True)
             self.logger.debug("Finished reading sitemap/sitemapindex")
         except Exception as e:
             raise ClientFatalError(
                 "Can't read source sitemap from %s (%s)" %
                 (self.sitemap, str(e)))
         # Extract changeset location
         # FIXME - need to completely rework the way we handle/store capabilities
         links = self.extract_links(src_inventory.capabilities)
         if ('current' not in links):
             raise ClientFatalError(
                 "Failed to extract changeset location from sitemap %s" %
                 (self.sitemap))
         changeset = links['current']
     ### 2. Read changeset from source
     ib = InventoryBuilder(mapper=self.mapper)
     try:
         self.logger.info("Reading changeset %s" % (changeset))
         src_sitemap = Sitemap(allow_multifile=self.allow_multifile,
                               mapper=self.mapper)
         src_changeset = src_sitemap.read(uri=changeset, changeset=True)
         self.logger.debug("Finished reading changeset")
     except Exception as e:
         raise ClientFatalError("Can't read source changeset from %s (%s)" %
                                (changeset, str(e)))
     self.logger.info("Read source changeset, %d resources listed" %
                      (len(src_changeset)))
     if (len(src_changeset) == 0):
         raise ClientFatalError(
             "Aborting as there are no resources to sync")
     if (self.checksum and not src_changeset.has_md5()):
         self.checksum = False
         self.logger.info(
             "Not calculating checksums on destination as not present in source inventory"
         )
     ### 3. Check that sitemap has authority over URIs listed
     # FIXME - What does authority mean for changeset? Here use both the
     # changeset URI and, if we used it, the sitemap URI
     uauth_cs = UrlAuthority(changeset)
     if (not changeset_uri):
         uauth_sm = UrlAuthority(self.sitemap)
     for resource in src_changeset:
         if (not uauth_cs.has_authority_over(resource.uri)
                 and (changeset_uri
                      or not uauth_sm.has_authority_over(resource.uri))):
             if (self.noauth):
                 self.logger.warning(
                     "Changeset (%s) mentions resource at a location it does not have authority over (%s)"
                     % (changeset, resource.uri))
             else:
                 raise ClientFatalError(
                     "Aborting as changeset (%s) mentions resource at a location it does not have authority over (%s), override with --noauth"
                     % (changeset, resource.uri))
     ### 3. Apply changes
     for resource in src_changeset:
         uri = resource.uri
         file = self.mapper.src_to_dst(uri)
         if (resource.changetype == 'UPDATED'):
             self.logger.info("updated: %s -> %s" % (uri, file))
             self.update_resource(resource, file, 'UPDATED')
         elif (resource.changetype == 'CREATED'):
             self.logger.info("created: %s -> %s" % (uri, file))
             self.update_resource(resource, file, 'CREATED')
         elif (resource.changetype == 'DELETED'):
             if (allow_deletion):
                 file = self.mapper.src_to_dst(uri)
                 if (self.dryrun):
                     self.logger.info("dryrun: would delete %s -> %s" %
                                      (uri, file))
                 else:
                     os.unlink(file)
                     self.logger.info("deleted: %s -> %s" % (uri, file))
                     self.log_event(
                         ResourceChange(resource=resource,
                                        changetype="DELETED"))
             else:
                 self.logger.info(
                     "nodelete: would delete %s (--delete to enable)" % uri)
         else:
             raise ClientError("Unknown change type %s" %
                               (resource.changetype))
     self.logger.debug("Completed incremental stuff")
Ejemplo n.º 21
0
    def sync_or_audit(self,
                      src_uri,
                      dst_path,
                      allow_deletion=False,
                      audit_only=False):
        ### 1. Get inventorys from both src and dst
        # 1.a source inventory
        ib = InventoryBuilder()
        try:
            src_inventory = ib.get(src_uri)
        except IOError as e:
            raise ClientFatalError("Can't read source inventory (%s)" % str(e))
        if (self.verbose):
            print "Read src inventory from %s, %d resources listed" % (
                src_uri, len(src_inventory))
        if (len(src_inventory) == 0):
            raise ClientFatalError(
                "Aborting as there are no resources to sync")
        if (self.checksum and not src_inventory.has_md5()):
            self.checksum = False
            print "Not calculating checksums on destination as not present in source inventory"
        # 1.b destination inventory mapped back to source URIs
        segments = src_uri.split('/')
        segments.pop()
        url_prefix = '/'.join(segments)
        ib.do_md5 = self.checksum
        dst_inventory = ib.from_disk(dst_path, url_prefix)
        ### 2. Compare these inventorys respecting any comparison options
        (num_same, changed, deleted,
         added) = dst_inventory.compare(src_inventory)
        ### 3. Report status and planned actions
        status = "  IN SYNC  "
        if (len(changed) > 0 or len(deleted) > 0 or len(added) > 0):
            status = "NOT IN SYNC"
        print "Status: %s (same=%d, changed=%d, deleted=%d, added=%d)" %\
              (status,num_same,len(changed),len(deleted),len(added))

        if (audit_only):
            return
        ### 4. Grab files to do sync
        mapper = Mapper(url_prefix, dst_path)
        for uri in changed:
            file = mapper.src_to_dst(uri)
            if (self.verbose):
                print "changed: %s -> %s" % (uri, file)
            self.update_resource(uri, file,
                                 src_inventory.resources[uri].timestamp)
        for uri in added:
            file = mapper.src_to_dst(uri)
            if (self.verbose):
                print "added: %s -> %s" % (uri, file)
            self.update_resource(uri, file,
                                 src_inventory.resources[uri].timestamp)
        for uri in deleted:
            if (allow_deletion):
                file = mapper.src_to_dst(uri)
                if (self.verbose):
                    print "deleted: %s -> %s" % (uri, file)
                os.unlink(file)
            else:
                if (self.verbose):
                    print "would delete %s (--delete to enable)" % uri
Ejemplo n.º 22
0
 def sync_or_audit(self, allow_deletion=False, audit_only=False):
     action = "audit" if (audit_only) else "sync"
     self.logger.debug("Starting " + action)
     ### 0. Sanity checks
     if len(self.mappings) < 1:
         raise ClientFatalError("No source to destination mapping specified")
     ### 1. Get inventories from both src and dst
     # 1.a source inventory
     ib = InventoryBuilder(mapper=self.mapper)
     try:
         self.logger.info("Reading sitemap %s" % (self.sitemap))
         src_sitemap = Sitemap(allow_multifile=self.allow_multifile, mapper=self.mapper)
         src_inventory = src_sitemap.read(uri=self.sitemap)
         self.logger.debug("Finished reading sitemap")
     except Exception as e:
         raise ClientFatalError("Can't read source inventory from %s (%s)" % (self.sitemap, str(e)))
     self.logger.info("Read source inventory, %d resources listed" % (len(src_inventory)))
     if len(src_inventory) == 0:
         raise ClientFatalError("Aborting as there are no resources to sync")
     if self.checksum and not src_inventory.has_md5():
         self.checksum = False
         self.logger.info("Not calculating checksums on destination as not present in source inventory")
     # 1.b destination inventory mapped back to source URIs
     ib.do_md5 = self.checksum
     dst_inventory = ib.from_disk()
     ### 2. Compare these inventorys respecting any comparison options
     (same, updated, deleted, created) = dst_inventory.compare(src_inventory)
     ### 3. Report status and planned actions
     status = "  IN SYNC  "
     if len(updated) > 0 or len(deleted) > 0 or len(created) > 0:
         status = "NOT IN SYNC"
     self.logger.warning(
         "Status: %s (same=%d, updated=%d, deleted=%d, created=%d)"
         % (status, len(same), len(updated), len(deleted), len(created))
     )
     if audit_only:
         self.logger.debug("Completed " + action)
         return
     ### 4. Check that sitemap has authority over URIs listed
     uauth = UrlAuthority(self.sitemap)
     for resource in src_inventory:
         if not uauth.has_authority_over(resource.uri):
             if self.noauth:
                 self.logger.info(
                     "Sitemap (%s) mentions resource at a location it does not have authority over (%s)"
                     % (self.sitemap, resource.uri)
                 )
             else:
                 raise ClientFatalError(
                     "Aborting as sitemap (%s) mentions resource at a location it does not have authority over (%s), override with --noauth"
                     % (self.sitemap, resource.uri)
                 )
     ### 5. Grab files to do sync
     for resource in updated:
         uri = resource.uri
         file = self.mapper.src_to_dst(uri)
         self.logger.info("updated: %s -> %s" % (uri, file))
         self.update_resource(resource, file, "UPDATED")
     for resource in created:
         uri = resource.uri
         file = self.mapper.src_to_dst(uri)
         self.logger.info("created: %s -> %s" % (uri, file))
         self.update_resource(resource, file, "CREATED")
     for resource in deleted:
         uri = resource.uri
         if allow_deletion:
             file = self.mapper.src_to_dst(uri)
             if self.dryrun:
                 self.logger.info("dryrun: would delete %s -> %s" % (uri, file))
             else:
                 os.unlink(file)
                 self.logger.info("deleted: %s -> %s" % (uri, file))
                 self.log_event(ResourceChange(resource=resource, changetype="DELETED"))
         else:
             self.logger.info("nodelete: would delete %s (--delete to enable)" % uri)
     self.logger.debug("Completed " + action)
Ejemplo n.º 23
0
    def sync_or_audit(self, allow_deletion=False, audit_only=False):
        ### 0. Sanity checks
        if (len(self.mappings) < 1):
            raise ClientFatalError(
                "No source to destination mapping specified")
        ### 1. Get inventories from both src and dst
        # 1.a source inventory
        ib = InventoryBuilder(verbose=self.verbose, mapper=self.mapper)
        try:
            if (self.verbose):
                print "Reading sitemap %s ..." % (self.sitemap)
            src_inventory = ib.get(self.sitemap)
        except IOError as e:
            raise ClientFatalError("Can't read source inventory from %s (%s)" %
                                   (self.sitemap, str(e)))
        if (self.verbose):
            print "Read source inventory, %d resources listed" % (
                len(src_inventory))
        if (len(src_inventory) == 0):
            raise ClientFatalError(
                "Aborting as there are no resources to sync")
        if (self.checksum and not src_inventory.has_md5()):
            self.checksum = False
            print "Not calculating checksums on destination as not present in source inventory"
        # 1.b destination inventory mapped back to source URIs
        ib.do_md5 = self.checksum
        dst_inventory = ib.from_disk()
        ### 2. Compare these inventorys respecting any comparison options
        (num_same, updated, deleted,
         created) = dst_inventory.compare(src_inventory)
        ### 3. Report status and planned actions
        status = "  IN SYNC  "
        if (len(updated) > 0 or len(deleted) > 0 or len(created) > 0):
            status = "NOT IN SYNC"
        print "Status: %s (same=%d, updated=%d, deleted=%d, created=%d)" %\
              (status,num_same,len(updated),len(deleted),len(created))

        if (audit_only):
            return
        ### 4. Grab files to do sync
        for uri in updated:
            file = self.mapper.src_to_dst(uri)
            if (self.verbose):
                print "updated: %s -> %s" % (uri, file)
            self.update_resource(uri, file,
                                 src_inventory.resources[uri].timestamp)
        for uri in created:
            file = self.mapper.src_to_dst(uri)
            self.update_resource(uri, file,
                                 src_inventory.resources[uri].timestamp)
        for uri in deleted:
            if (allow_deletion):
                file = self.mapper.src_to_dst(uri)
                if (self.dryrun):
                    print "dryrun: would delete %s -> %s" % (uri, file)
                else:
                    os.unlink(file)
                    if (self.verbose):
                        print "deleted: %s -> %s" % (uri, file)
            else:
                if (self.verbose):
                    print "nodelete: would delete %s (--delete to enable)" % uri
Ejemplo n.º 24
0
 def sync_or_audit(self, allow_deletion=False, audit_only=False):
     action = ('audit' if (audit_only) else 'sync')
     self.logger.debug("Starting " + action)
     ### 0. Sanity checks
     if (len(self.mappings) < 1):
         raise ClientFatalError(
             "No source to destination mapping specified")
     ### 1. Get inventories from both src and dst
     # 1.a source inventory
     ib = InventoryBuilder(mapper=self.mapper)
     try:
         self.logger.info("Reading sitemap %s" % (self.sitemap))
         src_sitemap = Sitemap(allow_multifile=self.allow_multifile,
                               mapper=self.mapper)
         src_inventory = src_sitemap.read(uri=self.sitemap)
         self.logger.debug("Finished reading sitemap")
     except Exception as e:
         raise ClientFatalError("Can't read source inventory from %s (%s)" %
                                (self.sitemap, str(e)))
     self.logger.info("Read source inventory, %d resources listed" %
                      (len(src_inventory)))
     if (len(src_inventory) == 0):
         raise ClientFatalError(
             "Aborting as there are no resources to sync")
     if (self.checksum and not src_inventory.has_md5()):
         self.checksum = False
         self.logger.info(
             "Not calculating checksums on destination as not present in source inventory"
         )
     # 1.b destination inventory mapped back to source URIs
     ib.do_md5 = self.checksum
     dst_inventory = ib.from_disk()
     ### 2. Compare these inventorys respecting any comparison options
     (same, updated, deleted,
      created) = dst_inventory.compare(src_inventory)
     ### 3. Report status and planned actions
     status = "  IN SYNC  "
     if (len(updated) > 0 or len(deleted) > 0 or len(created) > 0):
         status = "NOT IN SYNC"
     self.logger.warning("Status: %s (same=%d, updated=%d, deleted=%d, created=%d)" %\
           (status,len(same),len(updated),len(deleted),len(created)))
     if (audit_only):
         self.logger.debug("Completed " + action)
         return
     ### 4. Check that sitemap has authority over URIs listed
     uauth = UrlAuthority(self.sitemap)
     for resource in src_inventory:
         if (not uauth.has_authority_over(resource.uri)):
             if (self.noauth):
                 self.logger.info(
                     "Sitemap (%s) mentions resource at a location it does not have authority over (%s)"
                     % (self.sitemap, resource.uri))
             else:
                 raise ClientFatalError(
                     "Aborting as sitemap (%s) mentions resource at a location it does not have authority over (%s), override with --noauth"
                     % (self.sitemap, resource.uri))
     ### 5. Grab files to do sync
     for resource in updated:
         uri = resource.uri
         file = self.mapper.src_to_dst(uri)
         self.logger.info("updated: %s -> %s" % (uri, file))
         self.update_resource(resource, file, 'UPDATED')
     for resource in created:
         uri = resource.uri
         file = self.mapper.src_to_dst(uri)
         self.logger.info("created: %s -> %s" % (uri, file))
         self.update_resource(resource, file, 'CREATED')
     for resource in deleted:
         uri = resource.uri
         if (allow_deletion):
             file = self.mapper.src_to_dst(uri)
             if (self.dryrun):
                 self.logger.info("dryrun: would delete %s -> %s" %
                                  (uri, file))
             else:
                 os.unlink(file)
                 self.logger.info("deleted: %s -> %s" % (uri, file))
                 self.log_event(
                     ResourceChange(resource=resource,
                                    changetype="DELETED"))
         else:
             self.logger.info(
                 "nodelete: would delete %s (--delete to enable)" % uri)
     self.logger.debug("Completed " + action)