def blocking_shutdown():
            log.info("%s beginning shutdown" % self.fqdn)
            self._enter_shutdown()

            shutdown_start_time = IMLDateTime.utcnow()
            self.shutdown_agent()
            self._cluster.leave(self.nodename)
            shutdown_end_time = IMLDateTime.utcnow()

            shutdown_time = (shutdown_end_time - shutdown_start_time).seconds
            while shutdown_time < MIN_SHUTDOWN_DURATION:
                if not simulate_shutdown:
                    break

                remaining_delay = MIN_SHUTDOWN_DURATION - shutdown_time
                log.info("%s, sleeping for %d seconds on shutdown" %
                         (self.fqdn, remaining_delay))

                shutdown_time += remaining_delay
                time.sleep(remaining_delay)

            self._exit_shutdown()
            log.info("%s shutdown complete" % self.fqdn)

            if reboot:
                log.info("%s rebooting" % self.fqdn)
                self.startup(simulate_shutdown)
Esempio n. 2
0
    def test_message_callback(self):
        """Test that when a callback is included in a Message(), it is invoked
        after the message is sent"""

        client = mock.Mock()
        client._fqdn = "test_server"
        client.device_plugins = mock.Mock()
        client.device_plugins.get_plugins = mock.Mock(
            return_value=['test_plugin'])
        client.boot_time = IMLDateTime.utcnow()
        client.start_time = IMLDateTime.utcnow()

        callback = mock.Mock()

        # Disable poll() so that it's not trying to set up sessions, just doing passthrough of messages
        with mock.patch("chroma_agent.agent_client.HttpWriter.poll"):
            try:
                writer = HttpWriter(client)
                writer.start()

                message = Message("DATA",
                                  "test_plugin", {'key1': 'val1'},
                                  'session_foo',
                                  666,
                                  callback=callback)
                writer.put(message)

                TIMEOUT = 2
                i = 0
                while True:
                    if client.post.call_count and callback.call_count:
                        break
                    else:
                        time.sleep(1)
                        i += 1
                        if i > TIMEOUT:
                            raise RuntimeError(
                                "Timeout waiting for .post() and callback (%s %s)"
                                %
                                (client.post.call_count, callback.call_count))

                # Should have sent back the result
                self.assertEqual(client.post.call_count, 1)
                self.assertDictEqual(
                    client.post.call_args[0][0], {
                        'messages': [message.dump(client._fqdn)],
                        'server_boot_time': client.boot_time.isoformat() + "Z",
                        'client_start_time':
                        client.start_time.isoformat() + "Z"
                    })

                # Should have invoked the callback
                self.assertEqual(callback.call_count, 1)
            finally:
                writer.stop()
                writer.join()
Esempio n. 3
0
    def start_session(self):

        #  This fake plugin needs to look at it corosync defined peers of
        #  this fake server and determine
        #  which are online.  This happens in production by shelling out the
        #  call crm_mon --one-shot --as-xml

        #  To simulate this, the _server object which is a FakeServer, must
        #  be able to tell this server what it's peers are.

        #  This implementation looks at ALL the servers in the simulator,
        #  and those ones that are also join'ed in the cluster are online.

        log.debug('cluster nodes:  %s' % self._server._cluster.state['nodes'])

        nodes = [
            (node_dict['nodename'], node_dict['online'])
            for node_dict in self._server._cluster.state['nodes'].values()
        ]

        log.debug('Nodes and state:  %s' % nodes)

        dt = IMLDateTime.utcnow().isoformat()
        message = self.get_test_message(utc_iso_date_str=dt,
                                        node_status_list=nodes)

        log.debug(message)
        return message
Esempio n. 4
0
    def _scan(self, initial=False):
        started_at = IMLDateTime.utcnow().isoformat()
        audit = local.LocalAudit()

        # Only set resource_locations if we have the management package
        try:
            from chroma_agent.action_plugins import manage_targets
            resource_locations = manage_targets.get_resource_locations()
        except ImportError:
            resource_locations = None

        mounts = self._scan_mounts()

        if initial:
            packages = scan_packages()
        else:
            packages = None

        # FIXME: HYD-1095 we should be sending a delta instead of a full dump every time
        # FIXME: At this time the 'capabilities' attribute is unused on the manager
        return {
            "started_at": started_at,
            "agent_version": agent_version(),
            "capabilities": plugin_manager.ActionPluginManager().capabilities,
            "metrics": audit.metrics(),
            "properties": audit.properties(),
            "mounts": mounts,
            "packages": packages,
            "resource_locations": resource_locations
        }
 def inject_log_message(self, message):
     log.debug("Injecting log message %s/%s" % (self.fqdn, message))
     self._log_messages.append({
         'source': 'cluster_sim',
         'severity': 1,
         'facility': 1,
         'message': message,
         'datetime': IMLDateTime.utcnow().isoformat()
     })
Esempio n. 6
0
    def test_priorities(self):
        """
        Test that messages are consumed for POST based on the priority of the payload (data plane), or at the highest
        priority if no payload (control plane)
        """

        client = mock.Mock()
        client._fqdn = "test_server"
        client.boot_time = IMLDateTime.utcnow()
        client.start_time = IMLDateTime.utcnow()

        writer = HttpWriter(client)

        def inject_messages(*args, **kwargs):
            # A control plane message
            writer.put(
                Message("SESSION_CREATE_REQUEST", "plugin_fuz", None, None,
                        None))

            low_body = DevicePluginMessage('low', PRIO_LOW)
            normal_body = DevicePluginMessage('normal', PRIO_NORMAL)
            high_body = DevicePluginMessage('high', PRIO_HIGH)
            writer.put(Message("DATA", "plugin_foo", low_body, "foo", 0))
            writer.put(Message("DATA", "plugin_bar", normal_body, "foo", 1))
            writer.put(Message("DATA", "plugin_baz", high_body, "foo", 2))

        inject_messages()
        writer.send()
        self.assertEqual(client.post.call_count, 1)
        messages = client.post.call_args[0][0]['messages']

        self.assertEqual(len(messages), 4)
        # First two messages (of equal priority) arrive in order or insertion
        self.assertEqual(messages[0]['plugin'], 'plugin_fuz')
        self.assertEqual(messages[1]['plugin'], 'plugin_baz')
        # Remaining messages arrive in priority order
        self.assertEqual(messages[2]['plugin'], 'plugin_bar')
        self.assertEqual(messages[3]['plugin'], 'plugin_foo')
    def pop_log_messages(self):
        messages = self._log_messages

        d = datetime.datetime.now()
        messages.extend([{
            'source': 'cluster_sim',
            'severity': 1,
            'facility': 1,
            'message': "%s %s %s" % (self.fqdn, d, a),
            'datetime': IMLDateTime.utcnow().isoformat()
        } for a in range(0, self.log_rate)])

        self._log_messages = []
        return messages
Esempio n. 8
0
 def start_session(self):
     return {
         'log_lines': [{
             'source':
             'cluster_sim',
             'severity':
             1,
             'facility':
             1,
             'message':
             'Lustre: Cluster simulator syslog session start %s %s' %
             (self._server.fqdn, datetime.datetime.now()),
             'datetime':
             IMLDateTime.utcnow().isoformat() + 'Z'
         }]
     }
        def blocking_startup():
            log.info("%s beginning startup" % self.fqdn)

            startup_time = 0
            # Mixing shutdown/startup state here is a little weird, but
            # this only happens when it's explicitly requested by a caller
            # that wants full server simulation. The end result is that
            # simulating a reboot is easier if we deal with waiting for
            # the shutdown to finish here before moving on to startup.
            while simulate_bootup and self.shutting_down:
                log.info("%s, waiting for shutdown (%d)" %
                         (self.fqdn, startup_time))
                startup_time += 1
                time.sleep(1)

            # This may happen to the losing thread(s) during startup after
            # a multi-PDU power cycle.
            if self.starting_up:
                log.info(
                    "%s exiting startup because another thread is already doing it?"
                    % self.fqdn)
                return

            self._enter_startup()
            self.boot_time = IMLDateTime.utcnow()

            while startup_time < MIN_STARTUP_DURATION:
                if not simulate_bootup:
                    break

                remaining_delay = MIN_STARTUP_DURATION - startup_time
                log.info("%s, sleeping for %d seconds on boot" %
                         (self.fqdn, remaining_delay))

                startup_time += remaining_delay
                time.sleep(remaining_delay)

            self.start_agent()
            self._cluster.join(self.nodename)

            self._exit_startup()
            log.info("%s startup complete" % self.fqdn)
Esempio n. 10
0
    def update_session(self, first=False):
        mounts = []
        for resource in self._server._cluster.get_running_resources(
                self._server.nodename):
            mounts.append({
                'device': resource['device_path'],
                'fs_uuid': resource['uuid'],
                'mount_point': resource['mount_point'],
                'recovery_status': {}
            })

        if first:
            packages = self._server.scan_packages()
        else:
            packages = None

        return {
            'resource_locations': self._server._cluster.resource_locations(),
            'capabilities': ['manage_targets'],
            'metrics': {
                'raw': {
                    'node': self._server.get_node_stats(),
                    'lustre': self._server.get_lustre_stats(),
                    'lustre_client_mounts': self._server.lustre_client_mounts
                }
            },
            'packages': packages,
            'mounts': mounts,
            "properties": {
                'zfs_installed': False,
                'distro': 'CentOS',
                'distro_version': 6.6,
                'python_version_major_minor': 2.6,
                'python_patchlevel': 6,
                'kernel_version': '2.6.32-504.8.1.el6_lustre.x86_64'
            },
            'started_at': IMLDateTime.utcnow().isoformat() + 'Z',
            'agent_version': 'dummy'
        }
Esempio n. 11
0
    def test_oversized_messages(self):
        """
        Test that oversized messages are dropped and the session is terminated
        """
        # Monkey-patch this setting to a lower limit to make testing easier
        MAX_BYTES_PER_POST = 1024
        from chroma_agent.agent_client import MAX_BYTES_PER_POST as LARGE_MAX_BYTES_PER_POST

        def set_post_limit(size):
            import chroma_agent.agent_client
            chroma_agent.agent_client.MAX_BYTES_PER_POST = size

        self.addCleanup(set_post_limit, LARGE_MAX_BYTES_PER_POST)
        set_post_limit(MAX_BYTES_PER_POST)

        client = mock.Mock()
        client._fqdn = "test_server"
        client.boot_time = IMLDateTime.utcnow()
        client.start_time = IMLDateTime.utcnow()

        writer = HttpWriter(client)

        def fake_post(envelope):
            if len(json.dumps(envelope)) > MAX_BYTES_PER_POST:
                daemon_log.info("fake_post(): rejecting oversized message")
                raise HttpError()

        client.post = mock.Mock(side_effect=fake_post)
        TestPlugin = mock.Mock()

        mock_plugin_instance = mock.Mock()
        mock_plugin_instance.start_session = mock.Mock(
            return_value={'foo': 'bar'})
        client.device_plugins.get = mock.Mock(
            return_value=lambda (plugin_name): mock_plugin_instance)
        client.device_plugins.get_plugins = mock.Mock(
            return_value={'test_plugin': TestPlugin})
        client.sessions = SessionTable(client)

        daemon_log.setLevel(logging.DEBUG)

        import string
        from random import choice
        oversized_string = "".join(
            choice(string.printable) for i in range(MAX_BYTES_PER_POST))

        # There should be one message to set up the session
        writer.poll('test_plugin')
        self.assertTrue(writer.send())
        self.assertEqual(client.post.call_count, 1)
        messages = client.post.call_args[0][0]['messages']
        self.assertEqual(len(messages), 1)
        self.assertEqual(messages[0]['type'], "SESSION_CREATE_REQUEST")
        # Pretend we got a SESSION_CREATE_RESPONSE
        client.sessions.create('test_plugin', 'id_foo')
        self.assertEqual(len(client.sessions._sessions), 1)

        # Inject a normal and an oversized message
        normal_body = DevicePluginMessage('normal', PRIO_NORMAL)
        oversized_body = DevicePluginMessage(oversized_string, PRIO_NORMAL)
        writer.put(Message("DATA", "test_plugin", normal_body, "id_foo", 0))
        writer.put(Message("DATA", "test_plugin", oversized_body, "id_foo", 1))

        # Only the normal message should get through
        self.assertTrue(writer.send())
        self.assertEqual(client.post.call_count, 2)
        messages = client.post.call_args[0][0]['messages']
        self.assertEqual(len(messages), 1)
        self.assertEqual(messages[0]['type'], "DATA")

        # The oversized message should be dropped and the session
        # terminated
        self.assertFalse(writer.send())
        self.assertEqual(client.post.call_count, 3)
        self.assertEqual(len(client.sessions._sessions), 0)

        # However, we should eventually get a new session for the
        # offending plugin
        writer.poll('test_plugin')
        self.assertTrue(writer.send())
        self.assertEqual(client.post.call_count, 4)
        messages = client.post.call_args[0][0]['messages']
        self.assertEqual(len(messages), 1)
        self.assertEqual(messages[0]['type'], "SESSION_CREATE_REQUEST")
Esempio n. 12
0
    def test_session_backoff(self):
        """Test that when messages to the manager are being dropped due to POST failure,
        sending SESSION_CREATE_REQUEST messages has a power-of-two backoff wait"""
        client = mock.Mock()
        client._fqdn = "test_server"
        client.boot_time = IMLDateTime.utcnow()
        client.start_time = IMLDateTime.utcnow()

        writer = client.writer = HttpWriter(client)
        reader = client.reader = HttpReader(client)

        daemon_log.setLevel(logging.DEBUG)

        TestPlugin = mock.Mock()

        mock_plugin_instance = mock.Mock()
        mock_plugin_instance.start_session = mock.Mock(
            return_value={'foo': 'bar'})
        client.device_plugins.get = mock.Mock(
            return_value=lambda (plugin_name): mock_plugin_instance)
        client.device_plugins.get_plugins = mock.Mock(
            return_value={'test_plugin': TestPlugin})
        client.sessions = SessionTable(client)

        client.post = mock.Mock(side_effect=HttpError())

        # Pick an arbitrary time to use as a base for simulated waits
        t_0 = datetime.datetime.now()

        old_datetime = datetime.datetime
        try:

            def expect_message_at(t):
                datetime.datetime.now = mock.Mock(
                    return_value=t - datetime.timedelta(seconds=0.1))
                writer.poll('test_plugin')
                self.assertEqual(writer._messages.qsize(), 0)

                datetime.datetime.now = mock.Mock(
                    return_value=t + datetime.timedelta(seconds=0.1))
                writer.poll('test_plugin')
                self.assertEqual(writer._messages.qsize(), 1)

            datetime.datetime = mock.Mock()
            datetime.datetime.now = mock.Mock(return_value=t_0)

            # Stage 1: failing to POST, backing off
            # =====================================

            # Poll should put some session creation messages
            writer.poll('test_plugin')
            self.assertEqual(writer._messages.qsize(), 1)
            # Another poll immediately after shouldn't add any messages (MIN_SESSION_BACKOFF hasn't passed)
            writer.poll('test_plugin')
            self.assertEqual(writer._messages.qsize(), 1)

            # Send should consume the messages, and they go to nowhere because the POST fails
            writer.send()
            client.post.assert_called_once()
            self.assertEqual(len(client.post.call_args[0][0]['messages']), 1)

            # First time boundary: where the first repeat should happen
            from chroma_agent.agent_client import MIN_SESSION_BACKOFF
            t_1 = t_0 + MIN_SESSION_BACKOFF

            expect_message_at(t_1)

            # Have another crack at sending, it should fail and empty the queue
            writer.send()
            self.assertTrue(writer._messages.empty())

            # Second time boundary: where the second repeat should happen
            t_2 = t_1 + MIN_SESSION_BACKOFF * 2
            expect_message_at(t_2)

            # Stage 2: success in POST, session creation
            # ==========================================

            # This time we'll let the message go through, and a session to begin.
            client.post = mock.Mock()
            writer.send()

            # HttpReader receives a response from the manager, and should reset the backoff counters.
            reader._handle_messages([{
                'type': 'SESSION_CREATE_RESPONSE',
                'plugin': 'test_plugin',
                'session_id': 'id_foo',
                'session_seq': 0,
                'body': {}
            }])
            self.assertEqual(len(client.sessions._sessions), 1)
            session = client.sessions.get('test_plugin')

            # State 3: POSTs start failing again, see delay again
            # ===================================================

            # Break the POST link again
            client.post = mock.Mock(side_effect=HttpError())

            # Poll will get a DATA message from initial_scan
            session.initial_scan = mock.Mock(return_value={'foo': 'bar'})
            writer.poll('test_plugin')
            session.initial_scan.assert_called_once()
            self.assertFalse(writer._messages.empty())

            # Send will fail to send it, and as a result destroy the session
            writer.send()
            self.assertTrue(writer._messages.empty())
            self.assertEqual(len(client.sessions._sessions), 0)

            # Move to some point beyond the first backoff cycle
            t_3 = t_0 + datetime.timedelta(seconds=60)
            datetime.datetime.now = mock.Mock(return_value=t_3)

            writer.poll('test_plugin')
            self.assertEqual(writer._messages.qsize(), 1)
            writer.poll('test_plugin')
            self.assertEqual(writer._messages.qsize(), 1)
            writer.send()
            self.assertEqual(writer._messages.qsize(), 0)

            # Check the backoff time has gone back to MIN_SESSION_BACKOFF
            t_4 = t_3 + MIN_SESSION_BACKOFF
            expect_message_at(t_4)
        finally:
            datetime.datetime = old_datetime
    def __init__(self,
                 simulator,
                 fake_cluster,
                 server_id,
                 fqdn,
                 nodename,
                 nid_tuples_or_network_interfaces,
                 worker=False,
                 client_mounts=None):
        self.fqdn = fqdn

        super(FakeServer, self).__init__(simulator.folder)

        self.nodename = nodename

        # When reloaded from a file the network interfaces are presented as a dict but when created from code they
        # are represented as nid_tuples.
        if type(nid_tuples_or_network_interfaces) is dict:
            self.network_interfaces = nid_tuples_or_network_interfaces
        else:
            self.network_interfaces = {}
            interface_no = 0
            for nid in nid_tuples_or_network_interfaces:
                self.network_interfaces[nid[0]] = {
                    'interface_no': interface_no,
                    'type': nid[1],
                    'lnd_type': nid[1],
                    'lnd_network': nid[2]
                }
                interface_no += 1

        self.boot_time = IMLDateTime.utcnow()
        self.id = server_id

        self.log_rate = 0
        self._log_messages = []
        self._simulator = simulator
        self._devices = simulator.devices
        self._cluster = fake_cluster

        self._lock = threading.Lock()
        self._agent_client = None

        self._cluster.join(nodename, fqdn=fqdn)

        if simulator.folder:
            # If simulator is persistent, save Crypto in same place
            crypto_folder = os.path.join(simulator.folder,
                                         "%s_crypto" % self.fqdn)
        else:
            # If simulator is ephemeral, create temp dir for Crypto
            crypto_folder = tempfile.mkdtemp()

        if not os.path.exists(crypto_folder):
            os.makedirs(crypto_folder)
        self.crypto = Crypto(crypto_folder)

        if not client_mounts:
            client_mounts = {}

        self.state['id'] = server_id
        self.state['network_interfaces'] = self.network_interfaces
        self.state['nodename'] = nodename
        self.state['fqdn'] = fqdn
        self.state['worker'] = worker
        self.state['client_mounts'] = client_mounts
        self.state['packages'][
            'chroma-agent'] = self._simulator.available_packages(
                self.node_type)[
                    'chroma-agent']  # The agent is installed at this point.
        self.save()