Esempio n. 1
0
    def report_state(self):
        """Update the state of this service in the datastore."""
        if not self.manager.is_working():
            # NOTE(dulek): If manager reports a problem we're not sending
            # heartbeats - to indicate that service is actually down.
            LOG.error(
                _LE('Manager for service %(binary)s %(host)s is '
                    'reporting problems, not sending heartbeat. '
                    'Service will appear "down".'), {
                        'binary': self.binary,
                        'host': self.host
                    })
            return

        ctxt = context.get_admin_context()
        zone = CONF.storage_availability_zone
        try:
            try:
                service_ref = objects.Service.get_by_id(ctxt, self.service_id)
            except exception.NotFound:
                LOG.debug('The service database object disappeared, '
                          'recreating it.')
                self._create_service_ref(ctxt)
                service_ref = objects.Service.get_by_id(ctxt, self.service_id)

            service_ref.report_count += 1
            if zone != service_ref.availability_zone:
                service_ref.availability_zone = zone

            service_ref.save()

            # TODO(termie): make this pattern be more elegant.
            if getattr(self, 'model_disconnected', False):
                self.model_disconnected = False
                LOG.error(_LE('Recovered model server connection!'))

        except db_exc.DBConnectionError:
            if not getattr(self, 'model_disconnected', False):
                self.model_disconnected = True
                LOG.exception(_LE('model server went away'))

        # NOTE(jsbryant) Other DB errors can happen in HA configurations.
        # such errors shouldn't kill this thread, so we handle them here.
        except db_exc.DBError:
            if not getattr(self, 'model_disconnected', False):
                self.model_disconnected = True
                LOG.exception(_LE('DBError encountered: '))

        except Exception:
            if not getattr(self, 'model_disconnected', False):
                self.model_disconnected = True
                LOG.exception(_LE('Exception encountered: '))
Esempio n. 2
0
def main():
    objects.register_all()
    gmr_opts.set_defaults(CONF)
    CONF(sys.argv[1:], project='waterfall', version=version.version_string())
    config.set_middleware_defaults()
    logging.setup(CONF, "waterfall")
    LOG = logging.getLogger('waterfall.all')

    utils.monkey_patch()

    gmr.TextGuruMeditation.setup_autorun(version, conf=CONF)

    rpc.init(CONF)

    launcher = service.process_launcher()
    # waterfall-api
    try:
        server = service.WSGIService('osapi_workflow')
        launcher.launch_service(server, workers=server.workers or 1)
    except (Exception, SystemExit):
        LOG.exception(_LE('Failed to load osapi_workflow'))

    for binary in ['waterfall-scheduler', 'waterfall-backup']:
        try:
            launcher.launch_service(service.Service.create(binary=binary))
        except (Exception, SystemExit):
            LOG.exception(_LE('Failed to load %s'), binary)

    # waterfall-workflow
    try:
        if CONF.enabled_backends:
            for backend in CONF.enabled_backends:
                CONF.register_opt(workflow_cmd.host_opt, group=backend)
                backend_host = getattr(CONF, backend).backend_host
                host = "%s@%s" % (backend_host or CONF.host, backend)
                server = service.Service.create(host=host,
                                                service_name=backend,
                                                binary='waterfall-workflow')
                # Dispose of the whole DB connection pool here before
                # starting another process.  Otherwise we run into cases
                # where child processes share DB connections which results
                # in errors.
                session.dispose_engine()
                launcher.launch_service(server)
        else:
            server = service.Service.create(binary='waterfall-workflow')
            launcher.launch_service(server)
    except (Exception, SystemExit):
        LOG.exception(_LE('Failed to load conder-workflow'))

    launcher.wait()
Esempio n. 3
0
 def _heartbeat(self):
     try:
         self.coordinator.heartbeat()
         return True
     except coordination.ToozConnectionError:
         LOG.exception(
             _LE('Connection error while sending a heartbeat '
                 'to coordination backend.'))
         raise
     except coordination.ToozError:
         LOG.exception(
             _LE('Error sending a heartbeat to coordination '
                 'backend.'))
     return False
Esempio n. 4
0
    def _error(self, inner, req):
        if not isinstance(inner, exception.QuotaError):
            LOG.exception(_LE("Caught error: %(type)s %(error)s"), {
                'type': type(inner),
                'error': inner
            })
        safe = getattr(inner, 'safe', False)
        headers = getattr(inner, 'headers', None)
        status = getattr(inner, 'code', 500)
        if status is None:
            status = 500

        msg_dict = dict(url=req.url, status=status)
        LOG.info(_LI("%(url)s returned with HTTP %(status)d"), msg_dict)
        outer = self.status_to_type(status)
        if headers:
            outer.headers = headers
        # NOTE(johannes): We leave the explanation empty here on
        # purpose. It could possibly have sensitive information
        # that should not be returned back to the user. See
        # bugs 868360 and 874472
        # NOTE(eglynn): However, it would be over-conservative and
        # inconsistent with the EC2 API to hide every exception,
        # including those that are safe to expose, see bug 1021373
        if safe:
            msg = (inner.msg if isinstance(inner, exception.WaterfallException)
                   else six.text_type(inner))
            params = {
                'exception': inner.__class__.__name__,
                'explanation': msg
            }
            outer.explanation = _('%(exception)s: %(explanation)s') % params
        return wsgi.Fault(outer)
Esempio n. 5
0
def require_driver_initialized(driver):
    """Verifies if `driver` is initialized

    If the driver is not initialized, an exception will be raised.

    :params driver: The driver instance.
    :raises: `exception.DriverNotInitialized`
    """
    # we can't do anything if the driver didn't init
    if not driver.initialized:
        driver_name = driver.__class__.__name__
        LOG.error(_LE("Workflow driver %s not initialized"), driver_name)
        raise exception.DriverNotInitialized()
Esempio n. 6
0
    def __init__(self, message=None, **kwargs):
        self.kwargs = kwargs
        self.kwargs['message'] = message

        if 'code' not in self.kwargs:
            try:
                self.kwargs['code'] = self.code
            except AttributeError:
                pass

        for k, v in self.kwargs.items():
            if isinstance(v, Exception):
                self.kwargs[k] = six.text_type(v)

        if self._should_format():
            try:
                message = self.message % kwargs

            except Exception:
                exc_info = sys.exc_info()
                # kwargs doesn't match a variable in the message
                # log the issue and the kwargs
                LOG.exception(_LE('Exception in string format operation'))
                for name, value in kwargs.items():
                    LOG.error(_LE("%(name)s: %(value)s"),
                              {'name': name, 'value': value})
                if CONF.fatal_exception_format_errors:
                    six.reraise(*exc_info)
                # at least get the core message out if something happened
                message = self.message
        elif isinstance(message, Exception):
            message = six.text_type(message)

        # NOTE(luisg): We put the actual message in 'msg' so that we can access
        # it, because if we try to access the message via 'message' it will be
        # overshadowed by the class' message attribute
        self.msg = message
        super(WaterfallException, self).__init__(message)
Esempio n. 7
0
    def _check_extension(self, extension):
        """Checks for required methods in extension objects."""
        try:
            LOG.debug('Ext name: %s', extension.name)
            LOG.debug('Ext alias: %s', extension.alias)
            LOG.debug('Ext description: %s',
                      ' '.join(extension.__doc__.strip().split()))
            LOG.debug('Ext namespace: %s', extension.namespace)
            LOG.debug('Ext updated: %s', extension.updated)
        except AttributeError:
            LOG.exception(_LE("Exception loading extension."))
            return False

        return True
Esempio n. 8
0
 def start(self):
     """Connect to coordination backend and start heartbeat."""
     if not self.started:
         try:
             self._dead = threading.Event()
             self._start()
             self.started = True
             # NOTE(bluex): Start heartbeat in separate thread to avoid
             # being blocked by long coroutines.
             if self.coordinator and self.coordinator.requires_beating:
                 self._ev = eventlet.spawn(
                     lambda: tpool.execute(self.heartbeat))
         except coordination.ToozError:
             LOG.exception(_LE('Error starting coordination backend.'))
             raise
         LOG.info(_LI('Coordination backend started successfully.'))
Esempio n. 9
0
def robust_file_write(directory, filename, data):
    """Robust file write.

    Use "write to temp file and rename" model for writing the
    persistence file.

    :param directory: Target directory to create a file.
    :param filename: File name to store specified data.
    :param data: String data.
    """
    tempname = None
    dirfd = None
    try:
        dirfd = os.open(directory, os.O_DIRECTORY)

        # write data to temporary file
        with tempfile.NamedTemporaryFile(prefix=filename,
                                         dir=directory,
                                         delete=False) as tf:
            tempname = tf.name
            tf.write(data.encode('utf-8'))
            tf.flush()
            os.fdatasync(tf.fileno())
            tf.close()

            # Fsync the directory to ensure the fact of the existence of
            # the temp file hits the disk.
            os.fsync(dirfd)
            # If destination file exists, it will be replaced silently.
            os.rename(tempname, os.path.join(directory, filename))
            # Fsync the directory to ensure the rename hits the disk.
            os.fsync(dirfd)
    except OSError:
        with excutils.save_and_reraise_exception():
            LOG.error(_LE("Failed to write persistence file: %(path)s."),
                      {'path': os.path.join(directory, filename)})
            if os.path.isfile(tempname):
                os.unlink(tempname)
    finally:
        if dirfd:
            os.close(dirfd)