Ejemplo n.º 1
0
 def lastmod(self, lastmod):
     """Set timestamp from an W3C Datetime Last-Modified value"""
     if lastmod is None:
         self.timestamp = None
         return
     if lastmod == "":
         raise ValueError("Attempt to set empty lastmod")
     self.timestamp = str_to_datetime(lastmod)
Ejemplo n.º 2
0
    def incremental(self,
                    allow_deletion=False,
                    change_list_uri=None,
                    from_datetime=None):
        """Incremental synchronization

        Use Change List to do incremental sync
        """
        self.logger.debug("Starting incremental sync")
        ### 0. Sanity checks
        if (len(self.mapper) < 1):
            raise ClientFatalError(
                "No source to destination mapping specified")
        if (self.mapper.unsafe()):
            raise ClientFatalError(
                "Source to destination mappings unsafe: %s" % str(self.mapper))
        from_timestamp = None
        if (from_datetime is not None):
            try:
                from_timestamp = str_to_datetime(from_datetime)
            except ValueError:
                raise ClientFatalError("Bad datetime in --from (%s)" %
                                       from_datetime)
    ### 1. Work out where to start from
        if (from_timestamp is None):
            from_timestamp = ClientState().get_state(self.sitemap)
            if (from_timestamp is None):
                raise ClientFatalError(
                    "Cannot do incremental sync. No stored timestamp for this site, and no explicit --from."
                )
    ### 2. Get URI of change list, from sitemap or explicit
        if (change_list_uri):
            # Translate as necessary using maps
            change_list = self.sitemap_uri(change_list_uri)
        else:
            # Try default name
            change_list = self.sitemap_uri(self.change_list_name)
    ### 3. Read change list from source
        try:
            self.logger.info("Reading change list %s" % (change_list))
            src_change_list = ChangeList()
            src_change_list.read(uri=change_list)
            self.logger.debug("Finished reading change list")
        except Exception as e:
            raise ClientFatalError(
                "Can't read source change list from %s (%s)" %
                (change_list, str(e)))
        self.logger.info("Read source change list, %d changes listed" %
                         (len(src_change_list)))
        #if (len(src_change_list)==0):
        #    raise ClientFatalError("Aborting as there are no resources to sync")
        if (self.checksum and not src_change_list.has_md5()):
            self.checksum = False
            self.logger.info(
                "Not calculating checksums on destination as not present in source change list"
            )
    # Check all changes have timestamp and record last
        self.last_timestamp = 0
        for resource in src_change_list:
            if (resource.timestamp is None):
                raise ClientFatalError(
                    "Aborting - missing timestamp for change in %s" % (uri))
            if (resource.timestamp > self.last_timestamp):
                self.last_timestamp = resource.timestamp
    ### 4. Check that the change list has authority over URIs listed
    # FIXME - What does authority mean for change list? Here use both the
    # change list URI and, if we used it, the sitemap URI
        if (not self.noauth):
            uauth_cs = UrlAuthority(change_list, self.strictauth)
            if (not change_list_uri):
                uauth_sm = UrlAuthority(self.sitemap)
                for resource in src_change_list:
                    if (not uauth_cs.has_authority_over(resource.uri) and
                        (change_list_uri
                         or not uauth_sm.has_authority_over(resource.uri))):
                        raise ClientFatalError(
                            "Aborting as change list (%s) mentions resource at a location it does not have authority over (%s), override with --noauth"
                            % (change_list, resource.uri))
    ### 5. Prune entries before starting timestamp and dupe changes for a resource
        num_skipped = src_change_list.prune_before(from_timestamp)
        if (num_skipped > 0):
            self.logger.info("Skipped %d changes before %s" %
                             (num_skipped, datetime_to_str(from_timestamp)))
        num_dupes = src_change_list.prune_dupes()
        if (num_dupes > 0):
            self.logger.info("Removed %d prior changes" % (num_dupes))
    # Review and log status before
    # FIXME - should at this stage prune the change list to pick out
    # only the last change for each resource
        to_update = 0
        to_create = 0
        to_delete = 0
        for resource in src_change_list:
            if (resource.change == 'updated'):
                to_update += 1
            elif (resource.change == 'created'):
                to_create += 1
            elif (resource.change == 'deleted'):
                to_delete += 1
            else:
                raise ClientError("Unknown change type %s" % (resource.change))
    # Log status based on what we know from the Change List. Exit if
    # either there are no changes or if there are only deletions and
    # we don't allow deletion
        in_sync = ((to_update + to_delete + to_create) == 0)
        self.log_status(in_sync=in_sync,
                        incremental=True,
                        created=to_create,
                        updated=to_update,
                        deleted=to_delete)
        if (in_sync or ((to_update + to_create) == 0 and not allow_deletion)):
            self.logger.debug("Completed incremental")
            return
    ### 6. Apply changes at same time or after from_timestamp
        delete_msg = (", and delete %d resources" %
                      to_delete) if (allow_deletion) else ''
        self.logger.warning("Will apply %d changes%s" %
                            (len(src_change_list), delete_msg))
        num_updated = 0
        num_deleted = 0
        num_created = 0
        for resource in src_change_list:
            uri = resource.uri
            file = self.mapper.src_to_dst(uri)
            if (resource.change == 'updated'):
                self.logger.info("updated: %s -> %s" % (uri, file))
                self.update_resource(resource, file, 'updated')
                num_updated += 1
            elif (resource.change == 'created'):
                self.logger.info("created: %s -> %s" % (uri, file))
                self.update_resource(resource, file, 'created')
                num_created += 1
            elif (resource.change == 'deleted'):
                num_deleted += self.delete_resource(resource, file,
                                                    allow_deletion)
            else:
                raise ClientError("Unknown change type %s" % (resource.change))
    ### 7. Report status and planned actions
        self.log_status(incremental=True,
                        created=num_created,
                        updated=num_updated,
                        deleted=num_deleted,
                        to_delete=to_delete)
        ### 8. Record last timestamp we have seen
        if (self.last_timestamp > 0):
            ClientState().set_state(self.sitemap, self.last_timestamp)
            self.logger.info("Written last timestamp %s for incremental sync" %
                             (datetime_to_str(self.last_timestamp)))

    ### 9. Done
        self.logger.debug("Completed incremental sync")
Ejemplo n.º 3
0
    def incremental(self, allow_deletion=False, change_list_uri=None, from_datetime=None):
	"""Incremental synchronization

        """
        self.logger.debug("Starting incremental sync")
        ### 0. Sanity checks
        if (len(self.mappings)<1):
            raise ClientFatalError("No source to destination mapping specified")
        from_timestamp = None
        if (from_datetime is not None):
            try:
                from_timestamp = str_to_datetime(from_datetime)
            except ValueError:
                raise ClientFatalError("Bad datetime in --from (%s)" % from_datetime)
        ### 1. Work out where to start from
        if (from_timestamp is None):
            from_timestamp=ClientState().get_state(self.sitemap)
            if (from_timestamp is None):
                raise ClientFatalError("No stored timestamp for this site, and no explicit --from")
        ### 2. Get URI of change list, from sitemap or explicit
        if (change_list_uri):
            # Translate as necessary using maps
            change_list = self.sitemap_uri(change_list_uri)
        else:
            # Try default name
            change_list = self.sitemap_uri(self.change_list_name)
        ### 3. Read change list from source
        try:
            self.logger.info("Reading change list %s" % (change_list))
            src_change_list = ChangeList()
            src_change_list.read(uri=change_list)
            self.logger.debug("Finished reading change list")
        except Exception as e:
            raise ClientFatalError("Can't read source change list from %s (%s)" % (change_list,str(e)))
        self.logger.info("Read source change list, %d changes listed" % (len(src_change_list)))
        #if (len(src_change_list)==0):
        #    raise ClientFatalError("Aborting as there are no resources to sync")
        if (self.checksum and not src_change_list.has_md5()):
            self.checksum=False
            self.logger.info("Not calculating checksums on destination as not present in source change list")
        # Check all changes have timestamp and record last
        self.last_timestamp = 0
        for resource in src_change_list:
            if (resource.timestamp is None):
                raise ClientFatalError("Aborting - missing timestamp for change in %s" % (uri))
            if (resource.timestamp > self.last_timestamp):
                self.last_timestamp = resource.timestamp
        ### 4. Check that the change list has authority over URIs listed
        # FIXME - What does authority mean for change list? Here use both the
        # change list URI and, if we used it, the sitemap URI
        uauth_cs = UrlAuthority(change_list)
        if (not change_list_uri):
            uauth_sm = UrlAuthority(self.sitemap)
        for resource in src_change_list:
            if (not uauth_cs.has_authority_over(resource.uri) and 
                (change_list_uri or not uauth_sm.has_authority_over(resource.uri))):
                if (self.noauth):
                    #self.logger.info("Change list (%s) mentions resource at a location it does not have authority over (%s)" % (change_list,resource.uri))
                    pass
                else:
                    raise ClientFatalError("Aborting as change list (%s) mentions resource at a location it does not have authority over (%s), override with --noauth" % (change_list,resource.uri))
        ### 5. Prune entries before starting timestamp and dupe changes for a resource
        num_skipped = src_change_list.prune_before(from_timestamp)
        if (num_skipped>0):
            self.logger.info("Skipped %d changes before %s" % (num_skipped,datetime_to_str(from_timestamp)))
        num_dupes = src_change_list.prune_dupes()
        if (num_dupes>0):
            self.logger.info("Removed %d prior changes" % (num_dupes))
        ### 6. Apply changes at same time or after from_timestamp
        self.logger.info("Applying %d changes" % (len(src_change_list)))
        num_updated = 0
        num_deleted = 0
        num_created = 0
        for resource in src_change_list:
            uri = resource.uri
            file = self.mapper.src_to_dst(uri)
            if (resource.change == 'updated'):
                self.logger.info("updated: %s -> %s" % (uri,file))
                self.update_resource(resource,file,'updated')
                num_updated+=1
            elif (resource.change == 'created'):
                self.logger.info("created: %s -> %s" % (uri,file))
                self.update_resource(resource,file,'created')
                num_created+=1
            elif (resource.change == 'deleted'):
                self.delete_resource(resource,file,allow_deletion)
                num_deleted+=1
            else:
                raise ClientError("Unknown change type %s" % (resource.change) )
        ### 7. Report status and planned actions
        self.log_status(in_sync=((num_updated+num_deleted+num_created)==0),
                        incremental=True,created=num_created, updated=num_updated, 
                        deleted=num_deleted)
        ### 8. Record last timestamp we have seen
        if (self.last_timestamp>0):
            ClientState().set_state(self.sitemap,self.last_timestamp)
            self.logger.info("Written last timestamp %s for incremental sync" % (datetime_to_str(self.last_timestamp)))
        ### 9. Done
        self.logger.debug("Completed incremental sync")
Ejemplo n.º 4
0
 def md_until(self, md_until):
     """Set md_until value from a W3C Datetime value"""
     self._set_extra( 'ts_until', str_to_datetime(md_until, context='md_until datetime') )
Ejemplo n.º 5
0
 def md_from(self, md_from):
     """Set md_from value from a W3C Datetime value"""
     self._set_extra( 'ts_from', str_to_datetime(md_from, context='md_from datetime') )
Ejemplo n.º 6
0
 def md_completed(self, md_completed):
     """Set md_completed value from a W3C Datetime value"""
     self._set_extra( 'ts_completed', str_to_datetime(md_completed, context='md_completed datetime') )
Ejemplo n.º 7
0
 def md_at(self, md_at):
     """Set at value from a W3C Datetime value"""
     self._set_extra( 'ts_at', str_to_datetime(md_at, context='md_at datetime') )
Ejemplo n.º 8
0
 def lastmod(self, lastmod):
     """Set timestamp from a W3C Datetime Last-Modified value"""
     self.timestamp = str_to_datetime(lastmod, context='lastmod')