Beispiel #1
0
    def __init__(self, name=None, guid=None, data=None):
        self.template_dir = os.path.dirname(
            sys.modules.get(str(self.template_uid)).__file__)
        self.guid = guid or str(uuid4())
        self.name = name or self.guid
        self._public = False
        # location on the filesystem where to store the service
        self._path = os.path.join(config.data_repo.path,
                                  self.template_uid.host,
                                  self.template_uid.account,
                                  self.template_uid.repo,
                                  self.template_uid.name, self.name, self.guid)

        self.api = ZeroRobotAPI()

        self.data = ServiceData(self)
        if data:
            self.data.update(data)
        self.state = ServiceState()
        self.task_list = TaskList(self)

        self._delete_callback = []

        # start the greenlets of this service
        self.gl_mgr = GreenletsMgr()
        self.gl_mgr.add('executor', gevent.Greenlet(self._run))
        self.recurring_action('save', 10)

        self.logger = _configure_logger(self.guid)
Beispiel #2
0
 def state(self):
     # TODO: handle exceptions
     service, _ = self._zrobot_client.api.services.GetService(self.guid)
     s = ServiceState()
     for state in service.state:
         s.set(state.category, state.tag, state.state.value)
     return s
Beispiel #3
0
    def test_save_load(self):
        state = ServiceState()
        state.set('network', 'tcp-80', 'ok')
        state.set('network', 'tcp-81', 'error')

        with tempfile.TemporaryDirectory() as tmpdir:
            state2 = ServiceState()
            state2.load(state.categories)
            self.assertDictEqual(state.categories, state2.categories)
    def test_tlog_failure(self):
        state = ServiceState()
        logs = [(LOG_LVL_MESSAGE_INTERNAL, self.encoder.dumps({'error': 'IO error', 'tlog': '192.168.0.1:9000'}), None)]
        for level, msg, flag in logs:
            _health_monitoring(state, level, msg, flag)

        assert 'tlog_shards' in state.categories
        assert state.categories['tlog_shards']['192.168.0.1:9000'] == SERVICE_STATE_ERROR
Beispiel #5
0
    def test_set_state(self):
        state = ServiceState()
        state.set('network', 'tcp-80', 'ok')
        state.set('network', 'tcp-81', 'error')

        self.assertEqual(state.categories['network']['tcp-80'], 'ok', "state should be ok")
        self.assertEqual(state.categories['network']['tcp-81'], 'error', "state should be error")

        with self.assertRaises(BadServiceStateError, msg="support state should only be 'ok', 'error' or 'skipped'"):
            state.set('network', 'tcp-80', 'other')
Beispiel #6
0
 def test_repr(self):
     state = ServiceState()
     state.set('network', 'tcp-80', 'ok')
     state.set('network', 'tcp-81', 'error')
     self.assertEqual(str(state),
                      str({'network': {
                          'tcp-80': 'ok',
                          'tcp-81': 'error'
                      }}))
Beispiel #7
0
class TemplateBase:
    """
    This is the base class any service should inherit from.

    The child class will implement actions on this class.
    """

    # The developer of the template need to set the version the template
    version = None
    # This is the unique identifier of the template. This is set during template loading
    template_uid = None
    # path of the template on disk. This is set during template loading
    template_dir = None

    def __init__(self, name=None, guid=None, data=None):
        self.template_dir = os.path.dirname(
            sys.modules.get(str(self.template_uid)).__file__)
        self.guid = guid or str(uuid4())
        self.name = name or self.guid
        self._public = False
        # location on the filesystem where to store the service
        self._path = os.path.join(config.data_repo.path,
                                  self.template_uid.host,
                                  self.template_uid.account,
                                  self.template_uid.repo,
                                  self.template_uid.name, self.name, self.guid)

        self.api = ZeroRobotAPI()

        self.data = ServiceData(self)
        if data:
            self.data.update(data)
        self.state = ServiceState()
        self.task_list = TaskList(self)

        self._delete_callback = []

        # start the greenlets of this service
        self.gl_mgr = GreenletsMgr()
        self.gl_mgr.add('executor', gevent.Greenlet(self._run))
        self.recurring_action('save', 10)

        self.logger = _configure_logger(self.guid)

    def validate(self):
        """
        This method is called on all services during robot statup
        after all the service have been loaded

        in here you can implement some logic to ensure that all the requirement
        of you service are still met after a restart of the 0-robot
        """
        pass

    def save(self):
        """
        serialize the service state and data to a file

        @param base_path: path of the directory where
                          to save the service state and data
        return the path where the service is saved
        """
        if self._path is None:
            raise RuntimeError(
                "service._path is None, don't know where to save the service")

        os.makedirs(self._path, exist_ok=True)

        j.data.serializer.yaml.dump(
            os.path.join(self._path, 'service.yaml'), {
                'template': str(self.template_uid),
                'version': self.version,
                'name': self.name,
                'guid': self.guid,
                'public': self._public,
            })
        self.state.save(os.path.join(self._path, 'state.yaml'))
        self.data.save(os.path.join(self._path, 'data.yaml'))
        self.task_list.save(os.path.join(self._path, 'tasks.yaml'))
        return self._path

    def _run(self):
        """
        _run is responsible to walk over the task list to execute actions
        and handle responses from other service
        """
        # wait to start the processsing of task list after the service is fully loaded
        if config.SERVICE_LOADED:
            config.SERVICE_LOADED.wait()

        while True:
            try:
                task = self.task_list.get()
                task.service = self
                try:
                    task.execute()
                finally:
                    task_latency.labels(action_name=task.action_name,
                                        template_uid=str(
                                            self.template_uid)).observe(
                                                task.duration)
                    # notify the task list that this task is done
                    self.task_list.done(task)
                    if task.state == TASK_STATE_ERROR:
                        self.logger.error(
                            "error executing action %s:\n%s" %
                            (task.action_name, task.eco.traceback))
            except gevent.GreenletExit:
                # TODO: gracefull shutdown
                # make sure the task storage is close properly
                self.task_list._done.close()
                return
            except:
                self.logger.exception(
                    "Uncaught exception in service task loop!")

    def schedule_action(self, action, args=None):
        """
        Add an action to the task list of this service.
        This method should never be called directly by the user.
        It will always be called by another service.
        Or from a local service or from a remote service trough RPC

        @param action: action is the name of the action to add to the task list
        @param args: dictionnary of the argument to pass to the action
        """
        return self._schedule_action(action, args)

    def _schedule_action(self, action, args=None, priority=PRIORITY_NORMAL):
        if not hasattr(self, action):
            raise ActionNotFoundError("service %s doesn't have action %s" %
                                      (self.name, action))

        method = getattr(self, action)
        if not callable(method):
            raise ActionNotFoundError("%s is not a function" % action)

        # make sure the argument we pass are correct
        kwargs_enable = False
        s = inspect.signature(method, follow_wrapped=True)
        for param in s.parameters.values():
            if param.kind == param.VAR_KEYWORD:
                kwargs_enable = True
            if args is None:
                args = {}
            if param.default == s.empty and param.name not in args and param.kind != param.VAR_KEYWORD:
                raise BadActionArgumentError(
                    "parameter %s is mandatory but not passed to in args" %
                    param.name)

        if args is not None:
            signature_keys = set(s.parameters.keys())
            args_keys = set(args.keys())
            diff = args_keys.difference(signature_keys)
            if diff and not kwargs_enable:
                raise BadActionArgumentError(
                    'arguments "%s" are not present in the signature of the action'
                    % ','.join(diff))

        task = Task(method, args)
        self.task_list.put(task, priority=priority)
        return task

    def recurring_action(self, action, period):
        """
        configure an action to be executed every period second

        It will ensure that the action from service is schedule at best every period second.

        Since we dont' have control over how long other task from the task list take.
        we can only ensure that the action is never going to be schedule faster then every period second

        That means that it can be a longer time then period second during which the action is not executed

        @param action: a method or string that match the name of the method we want to make recurring
        @param period: minimum number of seconds between 2 scheduling of the action
        """
        if inspect.ismethod(action) or inspect.isfunction(action):
            action = action.__name__

        gl = gevent.Greenlet(_recurring_action, self, action, period)
        self.gl_mgr.add("recurring_" + action, gl)

    def delete(self, wait=False, timeout=60, die=False):
        """
        Delete the service.

        If you overwrite this method in your template,
        make sure to always call this method at the end of your method
        e.g: super().delete()
        """
        self.logger.info("deleting service %s (%s)", self.name, self.guid)

        # empty the task list
        self.task_list.clear()

        # wait for the current task to finish if there is any
        if self.task_list.current and wait:
            self.task_list.current.wait(timeout=timeout)

        if not self.task_list.empty():
            logger.warning(
                "service %s stop processing its task list, while some task remains in the queue"
            )

        # schedule and wait for all the cleanup actions
        delete_tasks = []
        for action in self._delete_callback:
            # action()
            task = self.schedule_action(action)
            delete_tasks.append(task)
        wait_all(delete_tasks, timeout=30, die=False)

        # stop all recurring action and processing of task list
        self.gl_mgr.stop_all(wait=True, timeout=5)

        # close ressources of logging handlers
        for h in self.logger.handlers:
            if hasattr(h, 'close'):
                h.close()

        # remove data from disk
        if self._path and os.path.exists(self._path):
            shutil.rmtree(self._path)

        # remove logs from disk
        log_file = os.path.join(j.dirs.LOGDIR, 'zrobot', self.guid)
        for f in glob.glob(log_file + '*'):
            os.remove(f)

        # remove from memory
        scol.delete(self)

    def update_data(self, data):
        """
        This method needs to be implement by child class
        when you want to control the update of the schema data

        This method is called everytime the schema data is changed
        by a blueprint.

        @param data: is a dict with the new schema data
        """
        pass

    def add_delete_callback(self, action):
        """
        register an action to be executed before the service is deleted

        when the 'delete' method of the service will be called, all the actions
        registered with this method will be executed before the service is deleted

        Use this when you have some cleanup/uninstall actions

        :param action: the action to be executed before delete
        :type action: method
        :raises ActionNotFoundError: raised when the action passed in not present on the service or if action is not a method
        """
        action_name = None
        if isinstance(action, str):
            if not hasattr(self, action):
                raise ActionNotFoundError("service %s doesn't have action %s" %
                                          (self.name, action))

            method = getattr(self, action)
            if not callable(method):
                raise ActionNotFoundError("%s is not a function" % action)
            action_name = action
        elif inspect.ismethod(action):
            action_name = action.__func__.__name__

        if action_name not in self._delete_callback:
            self._delete_callback.append(action_name)
Beispiel #8
0
    def test_delete(self):
        state = ServiceState()
        state.set('network', 'tcp-80', 'ok')
        state.set('network', 'tcp-81', 'error')

        # make sure the state is correct
        self.assertEqual(state.categories['network']['tcp-80'], 'ok',
                         "state should be ok")
        self.assertEqual(state.categories['network']['tcp-81'], 'error',
                         "state should be error")

        state.delete('network', 'tcp-80')
        with self.assertRaises(StateCategoryNotExistsError,
                               msg='tag tcp-80 should not exist'):
            state.get('network', 'tcp-80')

        state.delete('network')
        self.assertEqual(state.categories, {}, "state should be empty now")

        # should not raise when trying to delete non existing category or tag
        state.set('network', 'tcp-80', 'ok')
        state.delete('noexsits')
        state.delete('network', 'noexists')
Beispiel #9
0
    def test_check(self):
        state = ServiceState()
        state.set('network', 'tcp-80', 'ok')

        self.assertTrue(state.check('network', 'tcp-80', 'ok'))
        with self.assertRaises(StateCheckError):
            self.assertFalse(state.check('network', 'tcp-80', 'error'))
        with self.assertRaises(StateCheckError):
            self.assertFalse(state.check('network', '', 'ok'))
        with self.assertRaises(StateCheckError):
            self.assertFalse(state.check('foo', 'tcp-80', 'ok'))
        with self.assertRaises(StateCheckError):
            self.assertFalse(state.check('', '', 'ok'))
        with self.assertRaises(StateCheckError):
            self.assertFalse(state.check(None, 'tcp-80', 'ok'))
        with self.assertRaises(StateCheckError):
            self.assertFalse(state.check('network', None, 'ok'))
Beispiel #10
0
    def test_save_load(self):
        state = ServiceState()
        state.set('network', 'tcp-80', 'ok')
        state.set('network', 'tcp-81', 'error')

        with tempfile.TemporaryDirectory() as tmpdir:
            path = os.path.join(tmpdir, 'zrobot-test-state.yml')
            state.save(path)

            with open(path) as f:
                content = f.read()
            self.assertEqual(content,
                             j.data.serializer.yaml.dumps(state.categories))

            state2 = ServiceState()
            state2.load(path)
            self.assertDictEqual(state.categories, state2.categories)
Beispiel #11
0
    def test_get_state(self):
        state = ServiceState()
        state.set('network', 'tcp-80', 'ok')
        state.set('network', 'tcp-81', 'error')

        self.assertDictEqual(state.get('network'), {
            'tcp-80': 'ok',
            'tcp-81': 'error'
        })
        self.assertDictEqual(state.get('network', 'tcp-80'), {'tcp-80': 'ok'})

        with self.assertRaises(StateCategoryNotExistsError,
                               msg='category should not exist'):
            state.get('foo')

        with self.assertRaises(StateCategoryNotExistsError,
                               msg='tag should not exist'):
            # also when trying to get tag that doesn't exists
            state.get('network', 'foo')
Beispiel #12
0
 def test_create_state(self):
     state = ServiceState()
     self.assertIsNotNone(state.categories,
                          "categories dictionnary should not be None")