def configure_logging(): import logging from chroma_agent.log import console_log, daemon_log console_log.addHandler(logging.StreamHandler(sys.stderr)) console_log.setLevel(logging.INFO) daemon_log.addHandler(logging.StreamHandler(sys.stderr)) daemon_log.setLevel(logging.WARNING)
def main(): parser = argparse.ArgumentParser(description="Simulated benchmarks") parser.add_argument('--remote_simulator', required=False, help="Disable built-in simulator (run it separately)", default=False) parser.add_argument('--debug', required=False, help="Enable DEBUG-level logs", default=False) parser.add_argument('--url', required=False, help="Manager URL", default="https://localhost:8000") parser.add_argument('--username', required=False, help="REST API username", default='admin') parser.add_argument('--password', required=False, help="REST API password", default='lustre') parser.add_argument('--servers', help="server count", default=8, type=int) subparsers = parser.add_subparsers() log_ingest_parser = subparsers.add_parser("reset") log_ingest_parser.set_defaults( func=lambda args, simulator: Benchmark(args, simulator).reset()) log_ingest_parser = subparsers.add_parser("log_ingest_rate") log_ingest_parser.set_defaults(func=lambda args, simulator: LogIngestRate( args, simulator).run_benchmark()) server_count_limit_parser = subparsers.add_parser("server_count_limit") server_count_limit_parser.set_defaults( func=lambda args, simulator: ServerCountLimit(args, simulator ).run_benchmark()) server_count_limit_parser = subparsers.add_parser( "concurrent_registration_limit") server_count_limit_parser.set_defaults( func=lambda args, simulator: ConcurrentRegistrationLimit( args, simulator).run_benchmark()) server_count_limit_parser = subparsers.add_parser("filesystem_size_limit") server_count_limit_parser.set_defaults( func=lambda args, simulator: FilesystemSizeLimit(args, simulator ).run_benchmark()) args = parser.parse_args() if args.debug: log.setLevel(logging.DEBUG) if not args.remote_simulator: log.info("Starting simulator...") # Enable logging by agent code run within simulator from chroma_agent.log import daemon_log daemon_log.setLevel(logging.DEBUG) handler = logging.FileHandler("chroma-agent.log") handler.setFormatter( logging.Formatter('[%(asctime)s] %(message)s', '%d/%b/%Y:%H:%M:%S')) daemon_log.addHandler(handler) daemon_log.info("Enabled agent logging within simulator") from cluster_sim.simulator import ClusterSimulator simulator = ClusterSimulator(folder=None, url=args.url + "/") simulator.power.setup(1) simulator.start_all() simulator.setup(0, 0, 0, nid_count=1, cluster_size=4, pdu_count=1, su_size=0) else: simulator = xmlrpclib.ServerProxy("http://localhost:%s" % SIMULATOR_PORT, allow_none=True) try: log.info("Starting benchmark...") args.func(args, simulator) except: # Because we do a hard exit at the end here, explicitly log terminating # exceptions or they would get lost. log.error(traceback.format_exc()) raise finally: # Do a hard exit to avoid dealing with lingering threads (not the cleanest, but # this isn't production code). os._exit(-1) log.info("Complete.")
def test_oversized_messages(self): """ Test that oversized messages are dropped and the session is terminated """ # Monkey-patch this setting to a lower limit to make testing easier MAX_BYTES_PER_POST = 1024 from chroma_agent.agent_client import MAX_BYTES_PER_POST as LARGE_MAX_BYTES_PER_POST def set_post_limit(size): import chroma_agent.agent_client chroma_agent.agent_client.MAX_BYTES_PER_POST = size self.addCleanup(set_post_limit, LARGE_MAX_BYTES_PER_POST) set_post_limit(MAX_BYTES_PER_POST) client = mock.Mock() client._fqdn = "test_server" client.boot_time = IMLDateTime.utcnow() client.start_time = IMLDateTime.utcnow() writer = HttpWriter(client) def fake_post(envelope): if len(json.dumps(envelope)) > MAX_BYTES_PER_POST: daemon_log.info("fake_post(): rejecting oversized message") raise HttpError() client.post = mock.Mock(side_effect=fake_post) TestPlugin = mock.Mock() mock_plugin_instance = mock.Mock() mock_plugin_instance.start_session = mock.Mock( return_value={'foo': 'bar'}) client.device_plugins.get = mock.Mock( return_value=lambda (plugin_name): mock_plugin_instance) client.device_plugins.get_plugins = mock.Mock( return_value={'test_plugin': TestPlugin}) client.sessions = SessionTable(client) daemon_log.setLevel(logging.DEBUG) import string from random import choice oversized_string = "".join( choice(string.printable) for i in range(MAX_BYTES_PER_POST)) # There should be one message to set up the session writer.poll('test_plugin') self.assertTrue(writer.send()) self.assertEqual(client.post.call_count, 1) messages = client.post.call_args[0][0]['messages'] self.assertEqual(len(messages), 1) self.assertEqual(messages[0]['type'], "SESSION_CREATE_REQUEST") # Pretend we got a SESSION_CREATE_RESPONSE client.sessions.create('test_plugin', 'id_foo') self.assertEqual(len(client.sessions._sessions), 1) # Inject a normal and an oversized message normal_body = DevicePluginMessage('normal', PRIO_NORMAL) oversized_body = DevicePluginMessage(oversized_string, PRIO_NORMAL) writer.put(Message("DATA", "test_plugin", normal_body, "id_foo", 0)) writer.put(Message("DATA", "test_plugin", oversized_body, "id_foo", 1)) # Only the normal message should get through self.assertTrue(writer.send()) self.assertEqual(client.post.call_count, 2) messages = client.post.call_args[0][0]['messages'] self.assertEqual(len(messages), 1) self.assertEqual(messages[0]['type'], "DATA") # The oversized message should be dropped and the session # terminated self.assertFalse(writer.send()) self.assertEqual(client.post.call_count, 3) self.assertEqual(len(client.sessions._sessions), 0) # However, we should eventually get a new session for the # offending plugin writer.poll('test_plugin') self.assertTrue(writer.send()) self.assertEqual(client.post.call_count, 4) messages = client.post.call_args[0][0]['messages'] self.assertEqual(len(messages), 1) self.assertEqual(messages[0]['type'], "SESSION_CREATE_REQUEST")
from blockdevice import BlockDevice from ..lib.shell import Shell try: # FIXME: this should be avoided, implicit knowledge of something outside the package from chroma_agent.log import daemon_log as log except ImportError: log = logging.getLogger(__name__) if not log.handlers: handler = logging.FileHandler('blockdevice_zfs.log') handler.setFormatter( logging.Formatter( "[%(asctime)s: %(levelname)s/%(name)s] %(message)s")) log.addHandler(handler) log.setLevel(logging.DEBUG) class NotZpoolException(Exception): pass class BlockDeviceZfs(BlockDevice): # From lustre_disk.h LDD_F_SV_TYPE_MDT = 0x0001 LDD_F_SV_TYPE_OST = 0x0002 LDD_F_SV_TYPE_MGS = 0x0004 LDD_F_SV_TYPE_MGS_or_MDT = (LDD_F_SV_TYPE_MGS | LDD_F_SV_TYPE_MDT) _supported_device_types = ['zfs']
def test_session_backoff(self): """Test that when messages to the manager are being dropped due to POST failure, sending SESSION_CREATE_REQUEST messages has a power-of-two backoff wait""" client = mock.Mock() client._fqdn = "test_server" client.boot_time = IMLDateTime.utcnow() client.start_time = IMLDateTime.utcnow() writer = client.writer = HttpWriter(client) reader = client.reader = HttpReader(client) daemon_log.setLevel(logging.DEBUG) TestPlugin = mock.Mock() mock_plugin_instance = mock.Mock() mock_plugin_instance.start_session = mock.Mock( return_value={'foo': 'bar'}) client.device_plugins.get = mock.Mock( return_value=lambda (plugin_name): mock_plugin_instance) client.device_plugins.get_plugins = mock.Mock( return_value={'test_plugin': TestPlugin}) client.sessions = SessionTable(client) client.post = mock.Mock(side_effect=HttpError()) # Pick an arbitrary time to use as a base for simulated waits t_0 = datetime.datetime.now() old_datetime = datetime.datetime try: def expect_message_at(t): datetime.datetime.now = mock.Mock( return_value=t - datetime.timedelta(seconds=0.1)) writer.poll('test_plugin') self.assertEqual(writer._messages.qsize(), 0) datetime.datetime.now = mock.Mock( return_value=t + datetime.timedelta(seconds=0.1)) writer.poll('test_plugin') self.assertEqual(writer._messages.qsize(), 1) datetime.datetime = mock.Mock() datetime.datetime.now = mock.Mock(return_value=t_0) # Stage 1: failing to POST, backing off # ===================================== # Poll should put some session creation messages writer.poll('test_plugin') self.assertEqual(writer._messages.qsize(), 1) # Another poll immediately after shouldn't add any messages (MIN_SESSION_BACKOFF hasn't passed) writer.poll('test_plugin') self.assertEqual(writer._messages.qsize(), 1) # Send should consume the messages, and they go to nowhere because the POST fails writer.send() client.post.assert_called_once() self.assertEqual(len(client.post.call_args[0][0]['messages']), 1) # First time boundary: where the first repeat should happen from chroma_agent.agent_client import MIN_SESSION_BACKOFF t_1 = t_0 + MIN_SESSION_BACKOFF expect_message_at(t_1) # Have another crack at sending, it should fail and empty the queue writer.send() self.assertTrue(writer._messages.empty()) # Second time boundary: where the second repeat should happen t_2 = t_1 + MIN_SESSION_BACKOFF * 2 expect_message_at(t_2) # Stage 2: success in POST, session creation # ========================================== # This time we'll let the message go through, and a session to begin. client.post = mock.Mock() writer.send() # HttpReader receives a response from the manager, and should reset the backoff counters. reader._handle_messages([{ 'type': 'SESSION_CREATE_RESPONSE', 'plugin': 'test_plugin', 'session_id': 'id_foo', 'session_seq': 0, 'body': {} }]) self.assertEqual(len(client.sessions._sessions), 1) session = client.sessions.get('test_plugin') # State 3: POSTs start failing again, see delay again # =================================================== # Break the POST link again client.post = mock.Mock(side_effect=HttpError()) # Poll will get a DATA message from initial_scan session.initial_scan = mock.Mock(return_value={'foo': 'bar'}) writer.poll('test_plugin') session.initial_scan.assert_called_once() self.assertFalse(writer._messages.empty()) # Send will fail to send it, and as a result destroy the session writer.send() self.assertTrue(writer._messages.empty()) self.assertEqual(len(client.sessions._sessions), 0) # Move to some point beyond the first backoff cycle t_3 = t_0 + datetime.timedelta(seconds=60) datetime.datetime.now = mock.Mock(return_value=t_3) writer.poll('test_plugin') self.assertEqual(writer._messages.qsize(), 1) writer.poll('test_plugin') self.assertEqual(writer._messages.qsize(), 1) writer.send() self.assertEqual(writer._messages.qsize(), 0) # Check the backoff time has gone back to MIN_SESSION_BACKOFF t_4 = t_3 + MIN_SESSION_BACKOFF expect_message_at(t_4) finally: datetime.datetime = old_datetime