Beispiel #1
0
    def save(self, snapshot, backup_id, cinder):
        job_stats_path = self._stats_file(snapshot['origin'])
        logger.rename('lunr.storage.helper.backup.save')
        setproctitle("lunr-save: " + backup_id)
        size = snapshot['size'] / 1024 / 1024 / 1024

        try:
            op_start = time()
            worker = Worker(snapshot['origin'],
                            conf=self.conf,
                            stats_path=job_stats_path)
        except exc.ClientException, e:
            if e.http_status != 404:
                raise
            op_start = time()
            conn = get_conn(self.conf)
            conn.put_container(snapshot['origin'])
            logger.warning("failed to retrieve manifest;"
                           " first time backup for this volume?")
            # TODO: write the block_size on the manifest at create?
            block_count, remainder = divmod(snapshot['size'], BLOCK_SIZE)
            if remainder:
                block_count += 1
            # initial backup is the only time the we need to worry about
            # creating a new manifest for the worker
            worker = Worker(snapshot['origin'],
                            conf=self.conf,
                            manifest=Manifest.blank(block_count),
                            stats_path=job_stats_path)
Beispiel #2
0
    def create(self, volume_id, size=None, backup_source_volume_id=None,
               backup_id=None, image_id=None, callback=None, lock=None,
               account=None, cinder=None, scrub_callback=None):

        op_start = time()

        size_str = self._get_size_str(size)
        tmp_vol = None
        snet_glance = None

        if image_id:
            mgmt_glance = get_glance_conn(self.conf, tenant_id=account,
                                          glance_urls=self.glance_mgmt_urls)
            snet_glance = get_glance_conn(self.conf, tenant_id=account)
            try:
                glance_start = time()
                image = mgmt_glance.head(image_id)
                logger.info('STAT: glance.head %r. Time: %r' %
                            (image_id, time() - glance_start))
                status = getattr(image, 'status', 'ACTIVE')
                if status.upper() != 'ACTIVE':
                    raise InvalidImage("Non-active image status: %s" % status)
                min_disk = getattr(image, 'min_disk', 0)
                if min_disk > 127:
                    raise InvalidImage("Image > 127GB: %s" % image_id)
                if min_disk:
                    multiplier = self._get_scratch_multiplier(image)
                    convert_gbs = int(min_disk * multiplier)
                else:
                    convert_gbs = self.convert_gbs
                tmp_vol = self.create_convert_scratch(image, convert_gbs)
            except GlanceError, e:
                logger.warning("Error fetching glance image: %s" % e)
                raise InvalidImage("Error fetching image: %s" % image_id)
Beispiel #3
0
    def create(self, volume_id, size=None, backup_source_volume_id=None,
               backup_id=None, image_id=None, callback=None, lock=None,
               account=None, cinder=None, scrub_callback=None):

        op_start = time()

        size_str = self._get_size_str(size)
        tmp_vol = None

        if image_id:
            mgmt_glance = get_glance_conn(self.conf, tenant_id=account,
                                          glance_urls=self.glance_mgmt_urls)
            try:
                glance_start = time()
                image = mgmt_glance.head(image_id)
                logger.info('STAT: glance.head %r. Time: %r' %
                            (image_id, time() - glance_start))
                status = getattr(image, 'status', 'ACTIVE')
                if status.upper() != 'ACTIVE':
                    raise InvalidImage("Non-active image status: %s" % status)
                min_disk = getattr(image, 'min_disk', 0)
                if min_disk > 127:
                    raise InvalidImage("Image > 127GB: %s" % image_id)
                if min_disk:
                    multiplier = self._get_scratch_multiplier(image)
                    convert_gbs = int(min_disk * multiplier)
                else:
                    convert_gbs = self.convert_gbs
                tmp_vol = self.create_convert_scratch(image, convert_gbs)
            except GlanceError, e:
                logger.warning("Error fetching glance image: %s" % e)
                raise InvalidImage("Error fetching image: %s" % image_id)
Beispiel #4
0
    def save(self, snapshot, backup_id, cinder):
        job_stats_path = self._stats_file(snapshot['origin'])
        logger.rename('lunr.storage.helper.backup.save')
        setproctitle("lunr-save: " + backup_id)
        size = snapshot['size'] / 1024 / 1024 / 1024

        try:
            op_start = time()
            worker = Worker(snapshot['origin'], conf=self.conf,
                            stats_path=job_stats_path)
        except exc.ClientException, e:
            if e.http_status != 404:
                raise
            op_start = time()
            conn = get_conn(self.conf)
            conn.put_container(snapshot['origin'])
            logger.warning("failed to retrieve manifest;"
                           " first time backup for this volume?")
            # TODO: write the block_size on the manifest at create?
            block_count, remainder = divmod(snapshot['size'], BLOCK_SIZE)
            if remainder:
                block_count += 1
            # initial backup is the only time the we need to worry about
            # creating a new manifest for the worker
            worker = Worker(snapshot['origin'], conf=self.conf,
                            manifest=Manifest.blank(block_count),
                            stats_path=job_stats_path)
Beispiel #5
0
 def progress_callback(percent):
     try:
         if cinder:
             cinder.update_volume_metadata(
                 clone_id, {'clone-progress': "%.2f%%" % percent})
     except CinderError, e:
         logger.warning(
             "Error updating clone-progress metadata: %s" % e)
Beispiel #6
0
 def progress_callback(percent):
     try:
         if cinder:
             cinder.update_volume_metadata(
                 clone_id, {'clone-progress': "%.2f%%" % percent})
     except CinderError, e:
         logger.warning("Error updating clone-progress metadata: %s" %
                        e)
Beispiel #7
0
 def callback():
     self.helper.volumes.delete(snapshot['id'])
     self.helper.make_api_request('backups', self.id,
                                  data={'status': 'AVAILABLE'})
     if cinder:
         try:
             cinder.snapshot_progress(self.id, "100%")
         except cinderclient.CinderError, e:
             logger.warning('Error updating snapshot progress: %s' % e)
Beispiel #8
0
 def head(self, image_id):
     try:
         return self.client.images.get(image_id)
     except (glance_exc.BaseException, glance_exc.HTTPException) as e:
         logger.warning(
             "Exception in glance.head, host: %s, id: %s, error: %s" %
             (self.glance_url, image_id, e))
         self._init_client()
         return self.head(image_id)
Beispiel #9
0
 def head(self, image_id):
     try:
         return self.client.images.get(image_id)
     except (glance_exc.BaseException, glance_exc.HTTPException) as e:
         logger.warning(
             "Exception in glance.head, host: %s, id: %s, error: %s" %
             (self.glance_url, image_id, e))
         self._init_client()
         return self.head(image_id)
Beispiel #10
0
 def callback():
     self.helper.volumes.delete(snapshot['id'])
     self.helper.make_api_request('backups',
                                  self.id,
                                  data={'status': 'AVAILABLE'})
     if cinder:
         try:
             cinder.snapshot_progress(self.id, "100%")
         except cinderclient.CinderError, e:
             logger.warning('Error updating snapshot progress: %s' % e)
Beispiel #11
0
    def prune(self, volume, backup_id):
        logger.rename("lunr.storage.helper.backup.prune")
        setproctitle("lunr-prune: " + backup_id)

        try:
            op_start = time()
            worker = Worker(volume["id"], self.conf)
        except exc.ClientException, e:
            # If the manifest doesn't exist, We consider the backup deleted.
            # If anything else happens, we bail.
            if e.http_status != 404:
                raise
            logger.warning("No manifest found pruning volume: %s" % volume["id"])
            return
Beispiel #12
0
    def prune(self, volume, backup_id):
        logger.rename('lunr.storage.helper.backup.prune')
        setproctitle("lunr-prune: " + backup_id)

        try:
            op_start = time()
            worker = Worker(volume['id'], self.conf)
        except exc.ClientException, e:
            # If the manifest doesn't exist, We consider the backup deleted.
            # If anything else happens, we bail.
            if e.http_status != 404:
                raise
            logger.warning('No manifest found pruning volume: %s' %
                           volume['id'])
            return
Beispiel #13
0
 def audit(self, volume):
     logger.rename("lunr.storage.helper.backup.audit")
     setproctitle("lunr-audit: " + volume["id"])
     try:
         op_start = time()
         worker = Worker(volume["id"], self.conf)
     except exc.ClientException, e:
         if e.http_status != 404:
             raise
         op_start = time()
         conn = get_conn(self.conf)
         conn.put_container(volume["id"])
         logger.warning("failed to retrieve manifest;" " auditing volume with no backups")
         # creating a blank manifest for the worker
         worker = Worker(volume["id"], conf=self.conf, manifest=Manifest.blank(0))
Beispiel #14
0
 def run(self, now=None):
     now = now or datetime.now()
     try:
         for volume in self.suspects(self.span, now=now).all():
             log.info("Long running scrub '%s' on node '%s'" %
                      (volume.id, volume.node.id))
             url = 'http://%s:%s/volumes/%s' % (volume.node.hostname,
                                                volume.node.port, volume.id)
             # Make a volume create call to the storage server
             if self.delete(url):
                 log.info("Restarted scrub job '%s' on node '%s'" %
                          (volume.id, volume.node.id))
                 # Update last_modified so we don't restart next time we run
                 volume.last_modified = func.now()
         self._sess.commit()
     except OperationalError, e:
         logger.warning("DB error", exc_info=True)
         self._sess.close()
Beispiel #15
0
    def write_raw_image(self, glance, image, path):
        op_start = time()
        with open(path, 'wb') as f:
            # Try until we run out of glances.
            while True:
                try:
                    chunks = glance.get(image.id)
                except GlanceError, e:
                    logger.warning("Error fetching glance image: %s" % e)
                    raise

                try:
                    for chunk in chunks:
                        f.write(chunk)
                    break
                # Glanceclient doesn't handle socket timeouts for chunk reads.
                except (GlanceError, socket.timeout) as e:
                    continue
Beispiel #16
0
    def write_raw_image(self, glance, image, path):
        op_start = time()
        with open(path, 'wb') as f:
            # Try until we run out of glances.
            while True:
                try:
                    chunks = glance.get(image.id)
                except GlanceError, e:
                    logger.warning("Error fetching glance image: %s" % e)
                    raise

                try:
                    for chunk in chunks:
                        f.write(chunk)
                    break
                # Glanceclient doesn't handle socket timeouts for chunk reads.
                except (GlanceError, socket.timeout) as e:
                    continue
Beispiel #17
0
 def call(self, request):
     # Match the Request URL to an action
     action = self.match(request)
     for attempts in range(0, 3):
         try:
             result = action(request)
             self.helper.commit()
             return result
         except OperationalError, e:
             if hasattr(e, 'orig') and e.orig.args[0] == 2006:
                 # MySQL server has gone away
                 logger.warning("DB connection error attempt #%d"
                                % attempts, exc_info=True)
                 sleep(2 ** attempts)
                 continue
             logger.exception("Database Error: %s" % e)
             raise HTTPInternalServerError("Internal database error")
         finally:
Beispiel #18
0
 def audit(self, volume):
     logger.rename('lunr.storage.helper.backup.audit')
     setproctitle("lunr-audit: " + volume['id'])
     try:
         op_start = time()
         worker = Worker(volume['id'], self.conf)
     except exc.ClientException, e:
         if e.http_status != 404:
             raise
         op_start = time()
         conn = get_conn(self.conf)
         conn.put_container(volume['id'])
         logger.warning("failed to retrieve manifest;"
                        " auditing volume with no backups")
         # creating a blank manifest for the worker
         worker = Worker(volume['id'],
                         conf=self.conf,
                         manifest=Manifest.blank(0))
Beispiel #19
0
 def run(self, now=None):
     now = now or datetime.now()
     try:
         for backup in self.suspects(self.span, now=now).all():
             log.info("Long running prune '%s' on node '%s'" %
                      (backup.id, backup.volume.node.id))
             if not self.locked(backup):
                 # DELETE /volumes/{volume_id}/backups/{backup_id}
                 #        ?account=account.id
                 if self.delete(self.url(backup),
                                account=backup.account.id):
                     log.info("Restarting prune '%s' on node '%s'" %
                              (backup.id, backup.volume.node.id))
                     # Update last_modified so we don't restart next time
                     backup.last_modified = func.now()
         self._sess.commit()
     except OperationalError, e:
         logger.warning("DB error", exc_info=True)
         self._sess.close()
Beispiel #20
0
 def run(self, now=None):
     now = now or datetime.now()
     try:
         for backup in self.suspects(self.span, now=now).all():
             log.info("Long running prune '%s' on node '%s'"
                      % (backup.id, backup.volume.node.id))
             if not self.locked(backup):
                 # DELETE /volumes/{volume_id}/backups/{backup_id}
                 #        ?account=account.id
                 if self.delete(self.url(backup),
                                account=backup.account.id):
                     log.info("Restarting prune '%s' on node '%s'"
                              % (backup.id, backup.volume.node.id))
                     # Update last_modified so we don't restart next time
                     backup.last_modified = func.now()
         self._sess.commit()
     except OperationalError, e:
         logger.warning("DB error", exc_info=True)
         self._sess.close()
Beispiel #21
0
 def run(self, now=None):
     now = now or datetime.now()
     try:
         for volume in self.suspects(self.span, now=now).all():
             log.info("Long running scrub '%s' on node '%s'"
                      % (volume.name, volume.node.id))
             url = 'http://%s:%s/volumes/%s' % (volume.node.hostname,
                                                volume.node.port,
                                                volume.name)
             # Make a volume create call to the storage server
             if self.delete(url):
                 log.info("Restarted scrub job '%s' on node '%s'"
                          % (volume.name, volume.node.id))
                 # Update last_modified so we don't restart next time we run
                 volume.last_modified = func.now()
         self._sess.commit()
     except OperationalError, e:
         logger.warning("DB error", exc_info=True)
         self._sess.close()
Beispiel #22
0
 def run(self, now=None):
     now = now or datetime.now()
     try:
         for backup in self.suspects(self.span, now=now).all():
             log.info("Backup audit suspect: '%s' on node '%s'"
                      % (backup.id, backup.volume.node.id))
             if not self.locked(backup):
                 # PUT /volumes/{id}/audit
                 #     ?backup_id=backup_id&account=account_id
                 if self.put(self.url(backup, '/audit'),
                             account=backup.account.id,
                             backup_id=backup.id):
                     log.info("Running audit for backup '%s' on node '%s'"
                              % (backup.id, backup.volume.node.id))
                     # Update last_modified so we don't restart next time
                     backup.last_modified = func.now()
         self._sess.commit()
     except OperationalError, e:
         logger.warning("DB error", exc_info=True)
         self._sess.close()
Beispiel #23
0
 def run(self, now=None):
     now = now or datetime.now()
     try:
         for backup in self.suspects(self.span, now=now).all():
             log.info("Backup audit suspect: '%s' on node '%s'" %
                      (backup.id, backup.volume.node.id))
             if not self.locked(backup):
                 # PUT /volumes/{id}/audit
                 #     ?backup_id=backup_id&account=account_id
                 if self.put(self.url(backup, '/audit'),
                             account=backup.account.id,
                             backup_id=backup.id):
                     log.info("Running audit for backup '%s' on node '%s'" %
                              (backup.id, backup.volume.node.id))
                     # Update last_modified so we don't restart next time
                     backup.last_modified = func.now()
         self._sess.commit()
     except OperationalError, e:
         logger.warning("DB error", exc_info=True)
         self._sess.close()
Beispiel #24
0
    def run(self, now=None):
        now = now or datetime.now()
        # Find all suspect backups that are older than span
        query = self.find(self.span, now=now)

        try:
            # Attempt to clean up detached exports
            for export in query.all():
                log.info("Found stuck detach '%s' on node '%s'" %
                         (export.id, export.volume.node.id))
                # Ask the node if xen initiator is connected to the target
                if not self.connected(export.volume):
                    log.info("Asking cinder to complete the detach for '%s'" %
                             (export.id))
                    # Tell cinder about the detach
                    self.detach(export)
                    # Update last_modified so we don't restart next time
                    export.last_modified = func.now()
            self._sess.commit()
        except OperationalError, e:
            logger.warning("DB error", exc_info=True)
            self._sess.close()
Beispiel #25
0
def get_registration_exceptions(local_info, node_info):
    exceptions = {}
    for k, v in local_info.items():
        if 'hostname' in k and node_info[k] != v:
            try:
                node_value = socket.gethostbyname(node_info[k])
            except socket.error:
                # skip hostname translation on failure
                pass
        else:
            try:
                node_value = node_info[k]
            except KeyError, e:
                logger.error("During registration; missing '%s' key in api "
                             "server response" % k)
                continue
        if node_value != v:
            logger.warning("Invalid '%s' registered "
                           "as %r != %r" % (k, node_value, v))
            exceptions[k] = v
        else:
            logger.info("Verified '%s' registered as '%s'" % (k, v))
Beispiel #26
0
def get_registration_exceptions(local_info, node_info):
    exceptions = {}
    for k, v in local_info.items():
        if 'hostname' in k and node_info[k] != v:
            try:
                node_value = socket.gethostbyname(node_info[k])
            except socket.error:
                # skip hostname translation on failure
                pass
        else:
            try:
                node_value = node_info[k]
            except KeyError, e:
                logger.error("During registration; missing '%s' key in api "
                             "server response" % k)
                continue
        if node_value != v:
            logger.warning("Invalid '%s' registered "
                           "as %r != %r" % (k, node_value, v))
            exceptions[k] = v
        else:
            logger.info("Verified '%s' registered as '%s'" % (k, v))
Beispiel #27
0
    def run(self, now=None):
        now = now or datetime.now()
        # Find all suspect backups that are older than span
        query = self.find(self.span, now=now)

        try:
            # Attempt to clean up detached exports
            for export in query.all():
                log.info("Found stuck detach '%s' on node '%s'"
                         % (export.id, export.volume.node.id))
                # Ask the node if xen initiator is connected to the target
                if not self.connected(export.volume):
                    log.info("Asking cinder to complete the detach for '%s'"
                             % (export.id))
                    # Tell cinder about the detach
                    self.detach(export)
                    # Update last_modified so we don't restart next time
                    export.last_modified = func.now()
            self._sess.commit()
        except OperationalError, e:
            logger.warning("DB error", exc_info=True)
            self._sess.close()
Beispiel #28
0
 def run(self, now=None):
     now = now or datetime.now()
     try:
         for restore in self.suspects(self.span, now=now).all():
             log.info("Long running restore '%s' on node '%s'"
                      % (restore.id, restore.node.id))
             url = 'http://%s:%s/volumes/%s' % (restore.node.hostname,
                                                restore.node.port,
                                                restore.id)
             # Get the Backup the restore is of
             backup = self._sess.query(Backup).get(restore.restore_of)
             # Make a restore create call to the storage server
             if self.put(url, backup_source_volume_id=backup.volume_id,
                         backup_id=backup.id, size=backup.size,
                         account=backup.account.id):
                 log.info("Restarted restore job '%s' on node '%s'"
                          % (restore.id, restore.node.id))
                 # Update last_modified so we don't restart next time
                 restore.last_modified = func.now()
         self._sess.commit()
     except OperationalError, e:
         logger.warning("DB error", exc_info=True)
         self._sess.close()
Beispiel #29
0
    def run(self, now=None):
        now = now or datetime.now()
        # Find all suspect backups that are older than span
        query = self.suspects(self.span, now=now)

        try:
            # Attempt to restart all the backups that are suspect
            for backup in query.all():
                log.info("Long running backup '%s' on node '%s'"
                         % (backup.id, backup.volume.node.id))
                # Re-create the timestamp used
                timestamp = int(mktime(backup.created_at.timetuple()))
                # Make a create call to the storage server
                if self.put(self.url(backup), timestamp=timestamp,
                            account=backup.account.id):
                    log.info("Restarted backup '%s' on node '%s'"
                             % (backup.id, backup.volume.node.id))
                    # Update last_modified so we don't restart next time
                    backup.last_modified = func.now()
            self._sess.commit()
        except OperationalError, e:
            logger.warning("DB error", exc_info=True)
            self._sess.close()
Beispiel #30
0
    def run(self, now=None):
        now = now or datetime.now()
        # Find all suspect backups that are older than span
        query = self.suspects(self.span, now=now)

        try:
            # Attempt to restart all the backups that are suspect
            for backup in query.all():
                log.info("Long running backup '%s' on node '%s'" %
                         (backup.id, backup.volume.node.id))
                # Re-create the timestamp used
                timestamp = int(mktime(backup.created_at.timetuple()))
                # Make a create call to the storage server
                if self.put(self.url(backup),
                            timestamp=timestamp,
                            account=backup.account.id):
                    log.info("Restarted backup '%s' on node '%s'" %
                             (backup.id, backup.volume.node.id))
                    # Update last_modified so we don't restart next time
                    backup.last_modified = func.now()
            self._sess.commit()
        except OperationalError, e:
            logger.warning("DB error", exc_info=True)
            self._sess.close()
Beispiel #31
0
 def run(self, now=None):
     now = now or datetime.now()
     try:
         for restore in self.suspects(self.span, now=now).all():
             log.info("Long running restore '%s' on node '%s'" %
                      (restore.id, restore.node.id))
             url = 'http://%s:%s/volumes/%s' % (
                 restore.node.hostname, restore.node.port, restore.id)
             # Get the Backup the restore is of
             backup = self._sess.query(Backup).get(restore.restore_of)
             # Make a restore create call to the storage server
             if self.put(url,
                         backup_source_volume_id=backup.volume_id,
                         backup_id=backup.id,
                         size=backup.size,
                         account=backup.account.id):
                 log.info("Restarted restore job '%s' on node '%s'" %
                          (restore.id, restore.node.id))
                 # Update last_modified so we don't restart next time
                 restore.last_modified = func.now()
         self._sess.commit()
     except OperationalError, e:
         logger.warning("DB error", exc_info=True)
         self._sess.close()
Beispiel #32
0
        try:
            op_start = time()
            worker = Worker(volume['id'], self.conf)
        except exc.ClientException, e:
            # If the manifest doesn't exist, We consider the backup deleted.
            # If anything else happens, we bail.
            if e.http_status != 404:
                raise
            logger.warning('No manifest found pruning volume: %s' %
                           volume['id'])
            return
        try:
            history = worker.delete(backup_id)
        except NotFound, e:
            logger.warning("backup_id: '%s' missing from manifest in prune" %
                           backup_id)
            return
        duration = time() - op_start
        logger.info('STAT: pruning %r. Time: %r s' % (backup_id, duration))

    def delete(self, volume, backup_id, callback=None, lock=None):
        spawn(lock, self.prune, volume, backup_id,
              callback=callback, skip_fork=self.skip_fork)

    def audit(self, volume):
        logger.rename('lunr.storage.helper.backup.audit')
        setproctitle("lunr-audit: " + volume['id'])
        try:
            op_start = time()
            worker = Worker(volume['id'], self.conf)
        except exc.ClientException, e:
Beispiel #33
0
        try:
            op_start = time()
            worker = Worker(volume['id'], self.conf)
        except exc.ClientException, e:
            # If the manifest doesn't exist, We consider the backup deleted.
            # If anything else happens, we bail.
            if e.http_status != 404:
                raise
            logger.warning('No manifest found pruning volume: %s' %
                           volume['id'])
            return
        try:
            history = worker.delete(backup_id)
        except NotFound, e:
            logger.warning("backup_id: '%s' missing from manifest in prune" %
                           backup_id)
            return
        duration = time() - op_start
        logger.info('STAT: pruning %r. Time: %r s' % (backup_id, duration))

    def delete(self, volume, backup_id, callback=None, lock=None):
        spawn(lock,
              self.prune,
              volume,
              backup_id,
              callback=callback,
              skip_fork=self.skip_fork)

    def audit(self, volume):
        logger.rename('lunr.storage.helper.backup.audit')
        setproctitle("lunr-audit: " + volume['id'])