def __init__(self, context, num_nodes, zk, security_protocol=SecurityConfig.PLAINTEXT, interbroker_security_protocol=SecurityConfig.PLAINTEXT, sasl_mechanism=SecurityConfig.SASL_MECHANISM_GSSAPI, topics=None, version=TRUNK, quota_config=None, jmx_object_names=None, jmx_attributes=[]): """ :type context :type zk: ZookeeperService :type topics: dict """ Service.__init__(self, context, num_nodes) JmxMixin.__init__(self, num_nodes, jmx_object_names, jmx_attributes) self.log_level = "DEBUG" self.zk = zk self.quota_config = quota_config self.security_protocol = security_protocol self.interbroker_security_protocol = interbroker_security_protocol self.sasl_mechanism = sasl_mechanism self.topics = topics for node in self.nodes: node.version = version node.config = KafkaConfig( **{config_property.BROKER_ID: self.idx(node)})
def __init__(self, context, agent_nodes=None, client_services=None, agent_port=DEFAULT_AGENT_PORT, coordinator_port=DEFAULT_COORDINATOR_PORT): """ Create a Trogdor service. :param context: The test context. :param agent_nodes: The nodes to run the agents on. :param client_services: Services whose nodes we should run agents on. :param agent_port: The port to use for the trogdor_agent daemons. :param coordinator_port: The port to use for the trogdor_coordinator daemons. """ Service.__init__(self, context, num_nodes=1) self.coordinator_node = self.nodes[0] if client_services is not None: for client_service in client_services: for node in client_service.nodes: self.nodes.append(node) if agent_nodes is not None: for agent_node in agent_nodes: self.nodes.append(agent_node) if (len(self.nodes) == 1): raise RuntimeError( "You must supply at least one agent node to run the service on." ) self.agent_port = agent_port self.coordinator_port = coordinator_port
def test_producer_and_consumer(self, security_protocol, interbroker_security_protocol=None, new_consumer=True): """ Setup: 1 node zk + 3 node kafka cluster Concurrently produce and consume 10e6 messages with a single producer and a single consumer, using new consumer if new_consumer == True Return aggregate throughput statistics for both producer and consumer. (Under the hood, this runs ProducerPerformance.java, and ConsumerPerformance.scala) """ if interbroker_security_protocol is None: interbroker_security_protocol = security_protocol self.start_kafka(security_protocol, interbroker_security_protocol) num_records = 10 * 1000 * 1000 # 10e6 self.producer = ProducerPerformanceService( self.test_context, 1, self.kafka, topic=TOPIC_REP_THREE, num_records=num_records, record_size=DEFAULT_RECORD_SIZE, throughput=-1, settings={'acks': 1, 'batch.size': self.batch_size, 'buffer.memory': self.buffer_memory} ) self.consumer = ConsumerPerformanceService( self.test_context, 1, self.kafka, topic=TOPIC_REP_THREE, new_consumer=new_consumer, messages=num_records) Service.run_parallel(self.producer, self.consumer) data = { "producer": compute_aggregate_throughput(self.producer), "consumer": compute_aggregate_throughput(self.consumer) } summary = [ "Producer + consumer:", str(data)] self.logger.info("\n".join(summary)) return data
def start(self, add_principals=""): self.open_port(self.security_protocol) self.open_port(self.interbroker_security_protocol) self.start_minikdc(add_principals) self._ensure_zk_chroot() Service.start(self) self.logger.info("Waiting for brokers to register at ZK") retries = 30 expected_broker_ids = set(self.nodes) wait_until(lambda: {node for node in self.nodes if self.is_registered(node)} == expected_broker_ids, 30, 1) if retries == 0: raise RuntimeError("Kafka servers didn't register at ZK within 30 seconds") # Create topics if necessary if self.topics is not None: for topic, topic_cfg in self.topics.items(): if topic_cfg is None: topic_cfg = {} topic_cfg["topic"] = topic self.create_topic(topic_cfg)
def start(self, add_principals=""): self.open_port(self.security_protocol) self.interbroker_listener.open = True self.start_minikdc(add_principals) self._ensure_zk_chroot() Service.start(self) self.logger.info("Waiting for brokers to register at ZK") retries = 30 expected_broker_ids = set(self.nodes) wait_until( lambda: {node for node in self.nodes if self.is_registered(node)} == expected_broker_ids, 30, 1) if retries == 0: raise RuntimeError( "Kafka servers didn't register at ZK within 30 seconds") # Create topics if necessary if self.topics is not None: for topic, topic_cfg in self.topics.items(): if topic_cfg is None: topic_cfg = {} topic_cfg["topic"] = topic self.create_topic(topic_cfg)
def test_producer_and_consumer(self): self.logger.info("BENCHMARK: Producer + Consumer") self.producer = ProducerPerformanceService( self.test_context, 1, self.kafka, topic="test-rep-three", num_records=self.msgs_default, record_size=self.msg_size_default, throughput=-1, settings={ 'acks': 1, 'batch.size': self.batch_size, 'buffer.memory': self.buffer_memory }) self.consumer = ConsumerPerformanceService( self.test_context, 1, self.kafka, topic="test-rep-three", num_records=self.msgs_default, throughput=-1, threads=1) Service.run_parallel(self.producer, self.consumer) data = { "producer": compute_throughput(self.producer), "consumer": compute_throughput(self.consumer) } summary = ["Producer + consumer:", str(data)] self.logger.info("\n".join(summary)) return data
def check_allocate_free(self): cluster = JsonCluster( {"nodes": [ {"ssh_config": {"host": "localhost1"}}, {"ssh_config": {"host": "localhost2"}}, {"ssh_config": {"host": "localhost3"}}]}) assert len(cluster) == 3 assert(cluster.num_available_nodes() == 3) nodes = cluster.alloc(Service.setup_node_spec(num_nodes=1)) nodes_hostnames = self.cluster_hostnames(nodes) assert len(cluster) == 3 assert(cluster.num_available_nodes() == 2) nodes2 = cluster.alloc(Service.setup_node_spec(num_nodes=2)) nodes2_hostnames = self.cluster_hostnames(nodes2) assert len(cluster) == 3 assert(cluster.num_available_nodes() == 0) assert(nodes_hostnames.isdisjoint(nodes2_hostnames)) cluster.free(nodes) assert(cluster.num_available_nodes() == 1) cluster.free(nodes2) assert(cluster.num_available_nodes() == 3)
def check_parsing(self): """ Checks that RemoteAccounts are generated correctly from input JSON""" node = JsonCluster( { "nodes": [ {"ssh_config": {"host": "hostname"}}]}).alloc(Service.setup_node_spec(num_nodes=1))[0] assert node.account.hostname == "hostname" assert node.account.user is None ssh_config = { "host": "hostname", "user": "******", "hostname": "localhost", "port": 22 } node = JsonCluster({"nodes": [{"hostname": "hostname", "user": "******", "ssh_config": ssh_config}]}).alloc(Service.setup_node_spec(num_nodes=1))[0] assert node.account.hostname == "hostname" assert node.account.user == "user" # check ssh configs assert node.account.ssh_config.host == "hostname" assert node.account.ssh_config.user == "user" assert node.account.ssh_config.hostname == "localhost" assert node.account.ssh_config.port == 22
def check_allocate_free(self): cluster = JsonCluster( {"nodes": [ {"ssh_config": {"host": "localhost1"}}, {"ssh_config": {"host": "localhost2"}}, {"ssh_config": {"host": "localhost3"}}]}) assert len(cluster) == 3 assert(cluster.num_available_nodes() == 3) nodes = cluster.alloc(Service.setup_cluster_spec(num_nodes=1)) nodes_hostnames = self.cluster_hostnames(nodes) assert len(cluster) == 3 assert(cluster.num_available_nodes() == 2) nodes2 = cluster.alloc(Service.setup_cluster_spec(num_nodes=2)) nodes2_hostnames = self.cluster_hostnames(nodes2) assert len(cluster) == 3 assert(cluster.num_available_nodes() == 0) assert(nodes_hostnames.isdisjoint(nodes2_hostnames)) cluster.free(nodes) assert(cluster.num_available_nodes() == 1) cluster.free(nodes2) assert(cluster.num_available_nodes() == 3)
def check_parsing(self): """ Checks that RemoteAccounts are generated correctly from input JSON""" node = JsonCluster( { "nodes": [ {"ssh_config": {"host": "hostname"}}]}).alloc(Service.setup_cluster_spec(num_nodes=1))[0] assert node.account.hostname == "hostname" assert node.account.user is None ssh_config = { "host": "hostname", "user": "******", "hostname": "localhost", "port": 22 } node = JsonCluster({"nodes": [{"hostname": "hostname", "user": "******", "ssh_config": ssh_config}]}).alloc(Service.setup_cluster_spec(num_nodes=1))[0] assert node.account.hostname == "hostname" assert node.account.user == "user" # check ssh configs assert node.account.ssh_config.host == "hostname" assert node.account.ssh_config.user == "user" assert node.account.ssh_config.hostname == "localhost" assert node.account.ssh_config.port == 22
def start(self, add_principals="", use_zk_to_create_topic=True): if self.zk_client_secure and not self.zk.zk_client_secure_port: raise Exception("Unable to start Kafka: TLS to Zookeeper requested but Zookeeper secure port not enabled") self.open_port(self.security_protocol) self.interbroker_listener.open = True self.start_minikdc_if_necessary(add_principals) self._ensure_zk_chroot() Service.start(self) self.logger.info("Waiting for brokers to register at ZK") retries = 30 expected_broker_ids = set(self.nodes) wait_until(lambda: {node for node in self.nodes if self.is_registered(node)} == expected_broker_ids, 30, 1) if retries == 0: raise RuntimeError("Kafka servers didn't register at ZK within 30 seconds") # Create topics if necessary if self.topics is not None: for topic, topic_cfg in self.topics.items(): if topic_cfg is None: topic_cfg = {} topic_cfg["topic"] = topic self.create_topic(topic_cfg, use_zk_to_create_topic=use_zk_to_create_topic)
def __init__(self, context, nodes, target, mirror, persist="/mnt/kibosh"): """ Create a Kibosh service. :param context: The TestContext object. :param nodes: The nodes to put the Kibosh FS on. Kibosh allocates no nodes of its own. :param target: The target directory, which Kibosh exports a view of. :param mirror: The mirror directory, where Kibosh injects faults. :param persist: Where the log files and pid files will be created. """ Service.__init__(self, context, num_nodes=0) if (len(nodes) == 0): raise RuntimeError("You must supply at least one node to run the service on.") for node in nodes: self.nodes.append(node) self.target = target self.mirror = mirror self.persist = persist self.control_path = os.path.join(self.mirror, "kibosh_control") self.pidfile_path = os.path.join(self.persist, "pidfile") self.stdout_stderr_path = os.path.join(self.persist, "kibosh-stdout-stderr.log") self.log_path = os.path.join(self.persist, "kibosh.log") self.logs = { "kibosh-stdout-stderr.log": { "path": self.stdout_stderr_path, "collect_default": True}, "kibosh.log": { "path": self.log_path, "collect_default": True} }
def __init__(self, context, num_nodes, zk, security_protocol=SecurityConfig.PLAINTEXT, interbroker_security_protocol=SecurityConfig.PLAINTEXT, topics=None, quota_config=None, jmx_object_names=None, jmx_attributes=[]): """ :type context :type zk: ZookeeperService :type topics: dict """ Service.__init__(self, context, num_nodes) JmxMixin.__init__(self, num_nodes, jmx_object_names, jmx_attributes) self.zk = zk if security_protocol == SecurityConfig.SSL or interbroker_security_protocol == SecurityConfig.SSL: self.security_config = SecurityConfig(SecurityConfig.SSL) else: self.security_config = SecurityConfig(SecurityConfig.PLAINTEXT) self.security_protocol = security_protocol self.interbroker_security_protocol = interbroker_security_protocol self.port = 9092 if security_protocol == SecurityConfig.PLAINTEXT else 9093 self.topics = topics self.quota_config = quota_config
def free(self): # We only want to deallocate the coordinator node, not the agent nodes. So we # change self.nodes to include only the coordinator node, and then invoke # the base class' free method. if self.coordinator_node is not None: self.nodes = [self.coordinator_node] self.coordinator_node = None Service.free(self)
def __init__(self, context, num_nodes, zk, security_protocol=SecurityConfig.PLAINTEXT, interbroker_security_protocol=SecurityConfig.PLAINTEXT, client_sasl_mechanism=SecurityConfig.SASL_MECHANISM_GSSAPI, interbroker_sasl_mechanism=SecurityConfig.SASL_MECHANISM_GSSAPI, authorizer_class_name=None, topics=None, version=DEV_BRANCH, jmx_object_names=None, jmx_attributes=None, zk_connect_timeout=5000, zk_session_timeout=6000, server_prop_overides=None, zk_chroot=None): """ :type context :type zk: ZookeeperService :type topics: dict """ Service.__init__(self, context, num_nodes) JmxMixin.__init__(self, num_nodes=num_nodes, jmx_object_names=jmx_object_names, jmx_attributes=(jmx_attributes or []), root=KafkaService.PERSISTENT_ROOT) self.zk = zk self.security_protocol = security_protocol self.interbroker_security_protocol = interbroker_security_protocol self.client_sasl_mechanism = client_sasl_mechanism self.interbroker_sasl_mechanism = interbroker_sasl_mechanism self.topics = topics self.minikdc = None self.authorizer_class_name = authorizer_class_name self.zk_set_acl = False if server_prop_overides is None: self.server_prop_overides = [] else: self.server_prop_overides = server_prop_overides self.log_level = "DEBUG" self.zk_chroot = zk_chroot # # In a heavily loaded and not very fast machine, it is # sometimes necessary to give more time for the zk client # to have its session established, especially if the client # is authenticating and waiting for the SaslAuthenticated # in addition to the SyncConnected event. # # The default value for zookeeper.connect.timeout.ms is # 2 seconds and here we increase it to 5 seconds, but # it can be overridden by setting the corresponding parameter # for this constructor. self.zk_connect_timeout = zk_connect_timeout # Also allow the session timeout to be provided explicitly, # primarily so that test cases can depend on it when waiting # e.g. brokers to deregister after a hard kill. self.zk_session_timeout = zk_session_timeout self.port_mappings = { 'PLAINTEXT': Port('PLAINTEXT', 9092, False), 'SSL': Port('SSL', 9093, False), 'SASL_PLAINTEXT': Port('SASL_PLAINTEXT', 9094, False), 'SASL_SSL': Port('SASL_SSL', 9095, False) } for node in self.nodes: node.version = version node.config = KafkaConfig(**{config_property.BROKER_ID: self.idx(node)})
def __init__(self, test_context, kafka, streams_class_name, user_test_args1, user_test_args2=None, user_test_args3=None, user_test_args4=None): Service.__init__(self, test_context, num_nodes=1) self.kafka = kafka self.args = {'streams_class_name': streams_class_name, 'user_test_args1': user_test_args1, 'user_test_args2': user_test_args2, 'user_test_args3': user_test_args3, 'user_test_args4': user_test_args4} self.log_level = "DEBUG"
def __init__(self, context, num_nodes, zk, security_protocol=SecurityConfig.PLAINTEXT, interbroker_security_protocol=SecurityConfig.PLAINTEXT, sasl_mechanism=SecurityConfig.SASL_MECHANISM_GSSAPI, authorizer_class_name=None, topics=None, version=TRUNK, quota_config=None, jmx_object_names=None, jmx_attributes=[], zk_connect_timeout=5000): """ :type context :type zk: ZookeeperService :type topics: dict """ Service.__init__(self, context, num_nodes) JmxMixin.__init__(self, num_nodes, jmx_object_names, jmx_attributes) self.zk = zk self.quota_config = quota_config self.security_protocol = security_protocol self.interbroker_security_protocol = interbroker_security_protocol self.sasl_mechanism = sasl_mechanism self.topics = topics self.minikdc = None self.authorizer_class_name = authorizer_class_name # # In a heavily loaded and not very fast machine, it is # sometimes necessary to give more time for the zk client # to have its session established, especially if the client # is authenticating and waiting for the SaslAuthenticated # in addition to the SyncConnected event. # # The defaut value for zookeeper.connect.timeout.ms is # 2 seconds and here we increase it to 5 seconds, but # it can be overriden by setting the corresponding parameter # for this constructor. self.zk_connect_timeout = zk_connect_timeout self.port_mappings = { 'PLAINTEXT': Port('PLAINTEXT', 9092, False), 'SSL': Port('SSL', 9093, False), 'SASL_PLAINTEXT': Port('SASL_PLAINTEXT', 9094, False), 'SASL_SSL': Port('SASL_SSL', 9095, False) } for node in self.nodes: node.version = version node.config = KafkaConfig( **{config_property.BROKER_ID: self.idx(node)})
def test_producer_and_consumer(self, compression_type="none", security_protocol="PLAINTEXT", interbroker_security_protocol=None, new_consumer=True, client_version=str(DEV_BRANCH), broker_version=str(DEV_BRANCH)): """ Setup: 1 node zk + 3 node kafka cluster Concurrently produce and consume 10e6 messages with a single producer and a single consumer, using new consumer if new_consumer == True Return aggregate throughput statistics for both producer and consumer. (Under the hood, this runs ProducerPerformance.java, and ConsumerPerformance.scala) """ client_version = KafkaVersion(client_version) broker_version = KafkaVersion(broker_version) self.validate_versions(client_version, broker_version) if interbroker_security_protocol is None: interbroker_security_protocol = security_protocol self.start_kafka(security_protocol, interbroker_security_protocol, broker_version) num_records = 10 * 1000 * 1000 # 10e6 self.producer = ProducerPerformanceService( self.test_context, 1, self.kafka, topic=TOPIC_REP_THREE, num_records=num_records, record_size=DEFAULT_RECORD_SIZE, throughput=-1, version=client_version, settings={ 'acks': 1, 'compression.type': compression_type, 'batch.size': self.batch_size, 'buffer.memory': self.buffer_memory }) self.consumer = ConsumerPerformanceService(self.test_context, 1, self.kafka, topic=TOPIC_REP_THREE, new_consumer=new_consumer, messages=num_records) Service.run_parallel(self.producer, self.consumer) data = { "producer": compute_aggregate_throughput(self.producer), "consumer": compute_aggregate_throughput(self.consumer) } summary = ["Producer + consumer:", str(data)] self.logger.info("\n".join(summary)) return data
def start(self): Service.start(self) # Create topics if necessary if self.topics is not None: for topic, topic_cfg in self.topics.items(): if topic_cfg is None: topic_cfg = {} topic_cfg["topic"] = topic self.create_topic(topic_cfg)
def start(self): if self.security_config.has_sasl_kerberos: if self.minikdc is None: self.minikdc = MiniKdc(self.context, self.nodes) self.minikdc.start() Service.start(self) # Create topics if necessary if self.topics is not None: for topic, topic_cfg in self.topics.items(): if topic_cfg is None: topic_cfg = {} topic_cfg["topic"] = topic self.create_topic(topic_cfg)
def start(self, add_principals=""): self.open_port(self.security_protocol) self.open_port(self.interbroker_security_protocol) self.start_minikdc(add_principals) Service.start(self) # Create topics if necessary if self.topics is not None: for topic, topic_cfg in self.topics.items(): if topic_cfg is None: topic_cfg = {} topic_cfg["topic"] = topic self.create_topic(topic_cfg)
def start(self): if self.security_config.has_sasl_kerberos: self.minikdc = MiniKdc(self.context, self.nodes) self.minikdc.start() else: self.minikdc = None Service.start(self) # Create topics if necessary if self.topics is not None: for topic, topic_cfg in self.topics.items(): if topic_cfg is None: topic_cfg = {} topic_cfg["topic"] = topic self.create_topic(topic_cfg)
def check_free_too_many(self): n = 10 cluster = FiniteSubcluster([MockFiniteSubclusterNode() for _ in range(n)]) nodes = cluster.alloc(Service.setup_cluster_spec(num_nodes=n)) with pytest.raises(NodeNotPresentError): nodes.append(MockFiniteSubclusterNode()) cluster.free(nodes)
def check_with_changing_cluster_availability(self): """Modify cluster usage in between calls to next() """ scheduler = TestScheduler(self.tc_list, self.cluster) # allocate 60 nodes; only test_id 0 should be available nodes = self.cluster.alloc(Service.setup_cluster_spec(num_nodes=60)) assert self.cluster.num_available_nodes() == 40 t = scheduler.next() assert t.test_id == 0 assert scheduler.peek() is None # return 10 nodes, so 50 are available in the cluster # next test from the scheduler should be test id 1 return_nodes = nodes[: 10] keep_nodes = nodes[10:] self.cluster.free(return_nodes) assert self.cluster.num_available_nodes() == 50 t = scheduler.next() assert t.test_id == 1 assert scheduler.peek() is None # return remaining nodes, so cluster is fully available # next test from scheduler should be test id 2 return_nodes = keep_nodes self.cluster.free(return_nodes) assert self.cluster.num_available_nodes() == len(self.cluster) t = scheduler.next() assert t.test_id == 2
def check_with_changing_cluster_availability(self): """Modify cluster usage in between calls to next() """ scheduler = TestScheduler(self.tc_list, self.cluster) # allocate 60 nodes; only test_id 0 should be available slots = self.cluster.alloc(Service.setup_node_spec(num_nodes=60)) assert self.cluster.num_available_nodes() == 40 t = scheduler.next() assert t.test_id == 0 assert scheduler.peek() is None # return 10 nodes, so 50 are available in the cluster # next test from the scheduler should be test id 1 return_slots = slots[: 10] keep_slots = slots[10:] self.cluster.free(return_slots) assert self.cluster.num_available_nodes() == 50 t = scheduler.next() assert t.test_id == 1 assert scheduler.peek() is None # return remaining nodes, so cluster is fully available # next test from scheduler should be test id 2 return_slots = keep_slots self.cluster.free(return_slots) assert self.cluster.num_available_nodes() == len(self.cluster) t = scheduler.next() assert t.test_id == 2
def check_free_too_many(self): n = 10 cluster = FiniteSubcluster( [MockFiniteSubclusterNode() for _ in range(n)]) nodes = cluster.alloc(Service.setup_node_spec(num_nodes=n)) with pytest.raises(AssertionError): nodes.append(object()) cluster.free(nodes)
def __init__(self, context, num_nodes, zk, security_protocol=SecurityConfig.PLAINTEXT, interbroker_security_protocol=SecurityConfig.PLAINTEXT, client_sasl_mechanism=SecurityConfig.SASL_MECHANISM_GSSAPI, interbroker_sasl_mechanism=SecurityConfig.SASL_MECHANISM_GSSAPI, authorizer_class_name=None, topics=None, version=TRUNK, quota_config=None, jmx_object_names=None, jmx_attributes=[], zk_connect_timeout=5000): """ :type context :type zk: ZookeeperService :type topics: dict """ Service.__init__(self, context, num_nodes) JmxMixin.__init__(self, num_nodes, jmx_object_names, jmx_attributes) self.zk = zk self.quota_config = quota_config self.security_protocol = security_protocol self.interbroker_security_protocol = interbroker_security_protocol self.client_sasl_mechanism = client_sasl_mechanism self.interbroker_sasl_mechanism = interbroker_sasl_mechanism self.topics = topics self.minikdc = None self.authorizer_class_name = authorizer_class_name # # In a heavily loaded and not very fast machine, it is # sometimes necessary to give more time for the zk client # to have its session established, especially if the client # is authenticating and waiting for the SaslAuthenticated # in addition to the SyncConnected event. # # The defaut value for zookeeper.connect.timeout.ms is # 2 seconds and here we increase it to 5 seconds, but # it can be overriden by setting the corresponding parameter # for this constructor. self.zk_connect_timeout = zk_connect_timeout self.port_mappings = { 'PLAINTEXT': Port('PLAINTEXT', 9092, False), 'SSL': Port('SSL', 9093, False), 'SASL_PLAINTEXT': Port('SASL_PLAINTEXT', 9094, False), 'SASL_SSL': Port('SASL_SSL', 9095, False) } for node in self.nodes: node.version = version node.config = KafkaConfig(**{config_property.BROKER_ID: self.idx(node)})
def check_cluster_file_read(self, monkeypatch): """check the behavior of VagrantCluster when cluster_file is specified and the file exists. VagrantCluster should read cluster information from cluster_file. """ self._set_monkeypatch_attr(monkeypatch) # To verify that VagrantCluster reads cluster information from the cluster_file, the # content in the file is intentionally made different from that returned by _vagrant_ssh_config(). nodes_expected = [] node1_expected = { "externally_routable_ip": "127.0.0.3", "ssh_config": { "host": "worker3", "hostname": "127.0.0.3", "user": "******", "port": 2222, "password": "******", "identityfile": "/path/to/identfile3" } } nodes_expected.append(node1_expected) node2_expected = { "externally_routable_ip": "127.0.0.2", "ssh_config": { "host": "worker2", "hostname": "127.0.0.2", "user": "******", "port": 2223, "password": None, "identityfile": "/path/to/indentfile2" } } nodes_expected.append(node2_expected) cluster_json_expected = {} cluster_json_expected["nodes"] = nodes_expected json.dump(cluster_json_expected, open(self.cluster_file, 'w+'), indent=2, separators=(',', ': '), sort_keys=True) # Load the cluster from the json file we just created cluster = VagrantCluster(cluster_file=self.cluster_file, is_type_based=False) assert len(cluster) == 2 assert cluster.num_available_nodes() == 2 node2, node3 = cluster.alloc(Service.setup_cluster_spec(num_nodes=2)) assert node3.account.hostname == "worker2" assert node3.account.user == "vagrant" assert node3.account.ssh_hostname == '127.0.0.2' assert node3.account.ssh_config.to_json() == node2_expected["ssh_config"] assert node2.account.hostname == "worker3" assert node2.account.user == "vagrant" assert node2.account.ssh_hostname == '127.0.0.3' assert node2.account.ssh_config.to_json() == node1_expected["ssh_config"]
def test_producer_and_consumer(self, compression_type="none", security_protocol="PLAINTEXT", interbroker_security_protocol=None, client_version=str(DEV_BRANCH), broker_version=str(DEV_BRANCH)): """ Setup: 1 node zk + 3 node kafka cluster Concurrently produce and consume 10e6 messages with a single producer and a single consumer, Return aggregate throughput statistics for both producer and consumer. (Under the hood, this runs ProducerPerformance.java, and ConsumerPerformance.scala) """ client_version = KafkaVersion(client_version) broker_version = KafkaVersion(broker_version) self.validate_versions(client_version, broker_version) if interbroker_security_protocol is None: interbroker_security_protocol = security_protocol self.start_kafka(security_protocol, interbroker_security_protocol, broker_version) num_records = 10 * 1000 * 1000 # 10e6 self.producer = ProducerPerformanceService( self.test_context, 1, self.kafka, topic=TOPIC_REP_THREE, num_records=num_records, record_size=DEFAULT_RECORD_SIZE, throughput=-1, version=client_version, settings={ 'acks': 1, 'compression.type': compression_type, 'batch.size': self.batch_size, 'buffer.memory': self.buffer_memory } ) self.consumer = ConsumerPerformanceService( self.test_context, 1, self.kafka, topic=TOPIC_REP_THREE, messages=num_records) Service.run_parallel(self.producer, self.consumer) data = { "producer": compute_aggregate_throughput(self.producer), "consumer": compute_aggregate_throughput(self.consumer) } summary = [ "Producer + consumer:", str(data)] self.logger.info("\n".join(summary)) return data
def test_producer_and_consumer(self): self.logger.info("BENCHMARK: Producer + Consumer") self.producer = ProducerPerformanceService( self.test_context, 1, self.kafka, topic="test-rep-three", num_records=self.msgs_default, record_size=self.msg_size_default, throughput=-1, settings={'acks':1, 'batch.size':self.batch_size, 'buffer.memory':self.buffer_memory} ) self.consumer = ConsumerPerformanceService( self.test_context, 1, self.kafka, topic="test-rep-three", num_records=self.msgs_default, throughput=-1, threads=1 ) Service.run_parallel(self.producer, self.consumer) summary = [ "Producer + consumer:", " Producer: %s" % throughput(self.producer), " Consumer: %s" % throughput(self.consumer)] self.logger.info("\n".join(summary))
def check_allocate_free(self): n = 10 cluster = FiniteSubcluster([MockFiniteSubclusterNode() for _ in range(n)]) assert len(cluster) == n assert cluster.num_available_nodes() == n nodes = cluster.alloc(Service.setup_cluster_spec(num_nodes=1)) assert len(nodes) == 1 assert len(cluster) == n assert cluster.num_available_nodes() == n - 1 nodes2 = cluster.alloc(Service.setup_cluster_spec(num_nodes=2)) assert len(nodes2) == 2 assert len(cluster) == n assert cluster.num_available_nodes() == n - 3 cluster.free(nodes) assert cluster.num_available_nodes() == n - 2 cluster.free(nodes2) assert cluster.num_available_nodes() == n
def test_producer_and_consumer(self, new_consumer=False): """ Setup: 1 node zk + 3 node kafka cluster Concurrently produce and consume 10e6 messages with a single producer and a single consumer, using new consumer if new_consumer == True Return aggregate throughput statistics for both producer and consumer. (Under the hood, this runs ProducerPerformance.java, and ConsumerPerformance.scala) """ num_records = 10 * 1000 * 1000 # 10e6 self.producer = ProducerPerformanceService( self.test_context, 1, self.kafka, topic=TOPIC_REP_THREE, num_records=num_records, record_size=DEFAULT_RECORD_SIZE, throughput=-1, settings={ 'acks': 1, 'batch.size': self.batch_size, 'buffer.memory': self.buffer_memory }) self.consumer = ConsumerPerformanceService(self.test_context, 1, self.kafka, topic=TOPIC_REP_THREE, new_consumer=new_consumer, messages=num_records) Service.run_parallel(self.producer, self.consumer) data = { "producer": compute_aggregate_throughput(self.producer), "consumer": compute_aggregate_throughput(self.consumer) } summary = ["Producer + consumer:", str(data)] self.logger.info("\n".join(summary)) return data
def __init__(self, context, agent_nodes, agent_port=DEFAULT_AGENT_PORT, coordinator_port=DEFAULT_COORDINATOR_PORT): """ Create a Trogdor service. :param context: The test context. :param agent_nodes: The nodes to run the agents on. :param agent_port: The port to use for the trogdor_agent daemons. :param coordinator_port: The port to use for the trogdor_coordinator daemons. """ Service.__init__(self, context, num_nodes=1) self.coordinator_node = self.nodes[0] if (len(agent_nodes) == 0): raise RuntimeError( "You must supply at least one node to run the service on.") for agent_node in agent_nodes: self.nodes.append(agent_node) self.agent_port = agent_port self.coordinator_port = coordinator_port
def __init__(self, context, num_nodes, zk, security_protocol=SecurityConfig.PLAINTEXT, interbroker_security_protocol=SecurityConfig.PLAINTEXT, sasl_mechanism=SecurityConfig.SASL_MECHANISM_GSSAPI, topics=None, version=TRUNK, quota_config=None, jmx_object_names=None, jmx_attributes=[]): """ :type context :type zk: ZookeeperService :type topics: dict """ Service.__init__(self, context, num_nodes) JmxMixin.__init__(self, num_nodes, jmx_object_names, jmx_attributes) self.log_level = "DEBUG" self.zk = zk self.quota_config = quota_config self.security_protocol = security_protocol self.interbroker_security_protocol = interbroker_security_protocol self.sasl_mechanism = sasl_mechanism self.topics = topics for node in self.nodes: node.version = version node.config = KafkaConfig(**{config_property.BROKER_ID: self.idx(node)})
def check_non_empty_cluster_too_small(self): """Ensure that scheduler does not return tests if the cluster does not have enough available nodes. """ scheduler = TestScheduler(self.tc_list, self.cluster) assert len(scheduler) == len(self.tc_list) assert scheduler.peek() is not None # alloc all cluster nodes so none are available self.cluster.alloc( Service.setup_cluster_spec(num_nodes=len(self.cluster))) assert self.cluster.num_available_nodes() == 0 # peeking should not yield an object assert scheduler.peek() is None
def check_non_empty_cluster_too_small(self): """Ensure that scheduler does not return tests if the cluster does not have enough available nodes. """ scheduler = TestScheduler(self.tc_list, self.cluster) assert len(scheduler) == len(self.tc_list) assert scheduler.peek() is not None # alloc all cluster nodes so none are available self.cluster.alloc(Service.setup_cluster_spec(num_nodes=len(self.cluster))) assert self.cluster.num_available_nodes() == 0 # peeking etc should not yield an object assert scheduler.peek() is None with pytest.raises(RuntimeError): scheduler.next()
def check_one_host_parsing(self, monkeypatch): """check the behavior of VagrantCluster when cluster_file is not specified. VagrantCluster should read cluster information from _vagrant_ssh_config(). """ self._set_monkeypatch_attr(monkeypatch) cluster = VagrantCluster(is_type_based=False) assert len(cluster) == 2 assert cluster.num_available_nodes() == 2 node1, node2 = cluster.alloc(Service.setup_cluster_spec(num_nodes=2)) assert node1.account.hostname == "worker1" assert node1.account.user == "vagrant" assert node1.account.ssh_hostname == '127.0.0.1' assert node2.account.hostname == "worker2" assert node2.account.user == "vagrant" assert node2.account.ssh_hostname == '127.0.0.2'
def _preallocate_subcluster(self, test_context): """Preallocate the subcluster which will be used to run the test. Side effect: store association between the test_id and the preallocated subcluster. :param test_context :return None """ test_cluster_compare = self.cluster.test_capacity_comparison( test_context) assert test_cluster_compare >= 0 if test_cluster_compare == 0 and self.max_parallel > 1: self._log( logging.WARNING, "Test %s is using entire cluster. It's possible this test has no associated cluster metadata." % test_context.test_id) self._test_cluster[TestKey(test_context.test_id, self.test_counter)] = \ FiniteSubcluster(self.cluster.alloc(Service.setup_node_spec(node_spec=test_context.expected_node_spec)))
def check_request_free(self): available = self.cluster.num_available_nodes() initial_size = len(self.cluster) # Should be able to allocate arbitrarily many nodes nodes = self.cluster.alloc(Service.setup_cluster_spec(num_nodes=100)) assert (len(nodes) == 100) for i, node in enumerate(nodes): assert node.account.hostname == 'localhost%d' % i assert node.account.ssh_hostname == 'localhost' assert node.account.ssh_config.hostname == 'localhost' assert node.account.ssh_config.port == 22 assert node.account.user is None assert (self.cluster.num_available_nodes() == (available - 100)) assert len(self.cluster) == initial_size # This shouldn't change self.cluster.free(nodes) assert (self.cluster.num_available_nodes() == available)
def _handle_finished(self, event): test_key = TestKey(event["test_id"], event["test_index"]) self.receiver.send(self.event_response.finished(event)) result = event['result'] if result.test_status == FAIL and self.exit_first: self.stop_testing = True # Transition this test from running to finished del self.active_tests[test_key] self.finished_tests[test_key] = event self.results.append(result) # Free nodes used by the test subcluster = self._test_cluster[test_key] test_context = self._test_context[event["test_id"]] self.cluster.free( subcluster.alloc( Service.setup_node_spec( node_spec=test_context.expected_node_spec))) del self._test_cluster[test_key] # Join on the finished test process self._client_procs[test_key].join() # Report partial result summaries - it is helpful to have partial test reports available if the # ducktape process is killed with a SIGKILL partway through test_results = copy.copy(self.results) # shallow copy reporters = [ SimpleFileSummaryReporter(test_results), HTMLSummaryReporter(test_results), JSONReporter(test_results) ] for r in reporters: r.report() if self._should_print_separator: terminal_width, y = get_terminal_size() self._log(logging.INFO, "~" * int(2 * terminal_width / 3))
def check_with_changing_cluster_availability(self): """Modify cluster usage in between calls to next() """ scheduler = TestScheduler(self.tc_list, self.cluster) # start with 100-node cluster (configured in setup_method()) # allocate 60 nodes; only test_id 0 (which needs 10 nodes) should be available nodes = self.cluster.alloc(Service.setup_cluster_spec(num_nodes=60)) assert self.cluster.num_available_nodes() == 40 t = scheduler.peek() assert t == self.tc0 scheduler.remove(t) assert scheduler.peek() is None # return 10 nodes, so 50 are available in the cluster # next test from the scheduler should be test id 1 (which needs 50 nodes) return_nodes = nodes[:10] keep_nodes = nodes[10:] self.cluster.free(return_nodes) assert self.cluster.num_available_nodes() == 50 t = scheduler.peek() assert t == self.tc1 scheduler.remove(t) assert scheduler.peek() is None # return remaining nodes, so cluster is fully available # next test from scheduler should be test id 2 (which needs 100 nodes) return_nodes = keep_nodes self.cluster.free(return_nodes) assert self.cluster.num_available_nodes() == len(self.cluster) t = scheduler.peek() assert t == self.tc2 scheduler.remove(t) # scheduler should become empty now assert len(scheduler) == 0 assert scheduler.peek() is None
def check_exhausts_supply(self): cluster = JsonCluster(self.single_node_cluster_json) with pytest.raises(InsufficientResourcesError): cluster.alloc(Service.setup_cluster_spec(num_nodes=2))
def free(self): """Clear the nodes list.""" # Because the filesystem runs on nodes which have been allocated by other services, those nodes # are not deallocated here. self.nodes = [] Service.free(self)
def check_alloc_too_many(self): n = 10 cluster = FiniteSubcluster([MockFiniteSubclusterNode() for _ in range(n)]) with pytest.raises(InsufficientResourcesError): cluster.alloc(Service.setup_cluster_spec(num_nodes=(n + 1)))
def __init__(self, context, kafka): Service.__init__(self, context, num_nodes=1) self.bootstrap_servers = kafka.bootstrap_servers(validate=False) self.producer_node = self.nodes[0].account.hostname
def free(self): Service.free(self)