예제 #1
0
 def setup_method(self, _):
     self.cluster = FakeCluster(100)
     self.tc_list = [
         FakeContext(0, expected_num_nodes=10, expected_cluster_spec=ClusterSpec.simple_linux(10)),
         FakeContext(1, expected_num_nodes=50, expected_cluster_spec=ClusterSpec.simple_linux(50)),
         FakeContext(2, expected_num_nodes=100, expected_cluster_spec=ClusterSpec.simple_linux(100)),
     ]
예제 #2
0
 def min_cluster_spec(self):
     """
     Returns the minimum cluster specification that would be required to run all the currently
     extant services.
     """
     cluster_spec = ClusterSpec()
     for service in self._services.values():
         cluster_spec.add(service.cluster_spec)
     return cluster_spec
예제 #3
0
 def check_from_dict(self):
     empty = ClusterSpec.empty()
     assert "[]" == str(empty)
     node_specs_dict = {
         'cpu': 2,
         'mem': '2GB',
         'disk': '30GB',
         'num_nodes': 2
     }
     custom_linux_1 = ClusterSpec.from_dict(node_specs_dict)
     assert '[{"additional_disks(GB)": {}, "cpu": 2, "disk(GB)": 30.0, "mem(GB)": 2.0, "num_nodes": 2, "os": "linux"}]' \
            == str(custom_linux_1)
예제 #4
0
 def setup_method(self, _):
     self.cluster = FakeCluster(100)
     self.tc_list = [
         FakeContext(0,
                     expected_num_nodes=10,
                     expected_cluster_spec=ClusterSpec.simple_linux(10)),
         FakeContext(1,
                     expected_num_nodes=50,
                     expected_cluster_spec=ClusterSpec.simple_linux(50)),
         FakeContext(2,
                     expected_num_nodes=100,
                     expected_cluster_spec=ClusterSpec.simple_linux(100)),
     ]
예제 #5
0
    def expected_cluster_spec(self):
        """
        The cluster spec we expect this test to consume when run.

        :return:            A ClusterSpec object.
        """
        cluster_size = self.cluster_use_metadata.get(CLUSTER_SIZE_KEYWORD)
        if cluster_size is not None:
            return ClusterSpec.simple_linux(cluster_size)
        elif self.cluster is None:
            return ClusterSpec.empty()
        else:
            return self.cluster.all()
예제 #6
0
 def spec(self, linux_nodes, windows_nodes):
     nodes = []
     for i in range(linux_nodes):
         nodes.append(NodeSpec(LINUX))
     for i in range(windows_nodes):
         nodes.append(NodeSpec(WINDOWS))
     return ClusterSpec(nodes)
예제 #7
0
파일: waltz_test.py 프로젝트: zzlbuaa/waltz
    def get_server_service(self, cluster_num_partitions=None, num_nodes=None):
        """
        Return a Waltz Server service that uses config.ini for configuration.
        Optional arguments can be pass to override default settings.
        """
        cluster_num_partitions = cluster_num_partitions or int(self.zk_cfg['ClusterNumPartitions'])
        num_nodes = num_nodes or int(self.server_cfg['NumNodes'])
        cpu = int(self.server_cfg['NumCpuCores'])
        mem = self.server_cfg['MemSize']
        disk = self.server_cfg['DiskSize']
        cluster_spec = ClusterSpec.from_dict({'cpu':cpu, 'mem':mem, 'disk':disk, 'num_nodes':num_nodes})
        zk = self.zk_cfg['ZkUrl']
        cluster_root = self.zk_cfg['ClusterRoot']
        cluster_name = self.zk_cfg['ClusterName']
        port = int(self.server_cfg['Port'])
        jetty_port = int(self.server_cfg['JettyPort'])
        lib_dir = self.server_cfg['LibDir']
        config_file_dir = self.server_cfg['ConfigFileDir']
        ssl_configs = {"ssl_keystore_loc": self.server_cfg['SslKeystoreLoc'],
                       "ssl_keystore_pwd": self.server_cfg['SslKeystorePwd'],
                       "ssl_truststore_loc": self.server_cfg['SslTruststoreLoc'],
                       "ssl_truststore_pwd": self.server_cfg['SslTruststorePwd']}

        return WaltzServerService(self.test_context, cluster_spec, zk, cluster_root, cluster_name, cluster_num_partitions, \
                                  port, jetty_port, lib_dir, config_file_dir, ssl_configs)
예제 #8
0
 def get_verifiable_client(self, num_nodes=None):
     """
     Return a verifiable Waltz client that uses config.ini for configuration.
     Optional arguments can be pass to override default settings.
     """
     num_nodes = num_nodes or int(self.client_cfg['NumNodes'])
     cpu = int(self.client_cfg['NumCpuCores'])
     mem = self.client_cfg['MemSize']
     disk = self.client_cfg['DiskSize']
     cluster_spec = ClusterSpec.from_dict({
         'cpu': cpu,
         'mem': mem,
         'disk': disk,
         'num_nodes': num_nodes
     })
     zk = self.zk_cfg['ZkUrl']
     cluster_root = self.zk_cfg['ClusterRoot']
     lib_dir = self.client_cfg['LibDir']
     config_file_dir = self.client_cfg['ConfigFileDir']
     ssl_configs = {
         "ssl_keystore_loc": self.client_cfg['SslKeystoreLoc'],
         "ssl_keystore_pwd": self.client_cfg['SslKeystorePwd'],
         "ssl_truststore_loc": self.client_cfg['SslTruststoreLoc'],
         "ssl_truststore_pwd": self.client_cfg['SslTruststorePwd']
     }
     return VerifiableClient(self.test_context, cluster_spec, zk,
                             cluster_root, lib_dir, config_file_dir,
                             ssl_configs)
예제 #9
0
    def expected_cluster_spec(self):
        """
        The cluster spec we expect this test to consume when run.

        :return:            A ClusterSpec object.
        """
        cluster_spec = self.cluster_use_metadata.get(CLUSTER_SPEC_KEYWORD)
        cluster_size = self.cluster_use_metadata.get(CLUSTER_SIZE_KEYWORD)
        if cluster_spec is not None:
            return cluster_spec
        elif cluster_size is not None:
            return ClusterSpec.simple_linux(cluster_size)
        elif self.cluster is None:
            return ClusterSpec.empty()
        else:
            return self.cluster.all()
예제 #10
0
    def check_cluster_spec(self):
        """"
        Check cluster_spec.
        """
        @cluster(cluster_spec=ClusterSpec.simple_linux(10))
        def function():
            return 0

        test_context_list = expand_function(func=function,
                                            sess_ctx=mock_session_ctx())
        assert len(test_context_list) == 1
        inserted_spec = test_context_list[0].cluster_use_metadata[
            CLUSTER_SPEC_KEYWORD]

        assert inserted_spec.size() == 10
        for node in inserted_spec.nodes:
            assert node.operating_system == LINUX

        test_context_list = expand_function(
            func=function, sess_ctx=mock_session_ctx(cluster_size="100"))
        assert len(test_context_list) == 1
        inserted_spec = test_context_list[0].cluster_use_metadata[
            CLUSTER_SPEC_KEYWORD]

        assert inserted_spec.size() == 100
        for node in inserted_spec.nodes:
            assert node.operating_system == LINUX
예제 #11
0
 def set_up_trogdor(self, num_agent_nodes):
     self.agent_nodes = self.test_context.cluster.alloc(
         ClusterSpec.simple_linux(num_agent_nodes))
     self.trogdor = TrogdorService(context=self.test_context,
                                   agent_nodes=self.agent_nodes)
     for agent_node in self.agent_nodes:
         agent_node.account.logger = self.trogdor.logger
     self.trogdor.start()
예제 #12
0
 def check_cluster_property(self):
     exp_cluster = ClusterSpec.simple_linux(5)
     tc = TestContext(session_context=ducktape_mock.session_context(),
                      cluster=exp_cluster,
                      cls=DummyTest,
                      function=DummyTest.test_function_description)
     test_obj = tc.cls(tc)
     assert test_obj.cluster == exp_cluster
예제 #13
0
파일: _mark.py 프로젝트: wwjiang007/ignite
    def _modify_metadata(self, new_size):
        cluster_spec = self.metadata.get(CLUSTER_SPEC_KEYWORD)
        cluster_size = self.metadata.get(CLUSTER_SIZE_KEYWORD)

        if cluster_spec is not None and not cluster_spec.empty():
            node_spec = next(iter(cluster_spec))
            self.metadata[CLUSTER_SPEC_KEYWORD] = ClusterSpec.from_nodes([node_spec] * new_size)
        elif cluster_size is not None:
            self.metadata[CLUSTER_SIZE_KEYWORD] = new_size
예제 #14
0
 def check_from_list(self):
     empty = ClusterSpec.empty()
     assert "[]" == str(empty)
     node_specs_dict_list = [{
         'cpu': 2,
         'mem': '2GB',
         'disk': '20GB',
         'num_nodes': 2
     }, {
         'cpu': 4,
         'mem': '4GB',
         'disk': '40GB',
         'num_nodes': 4
     }]
     custom_linux_2 = ClusterSpec.from_list(node_specs_dict_list)
     assert '[{"additional_disks(GB)": {}, "cpu": 2, "disk(GB)": 20.0, "mem(GB)": 2.0, "num_nodes": 2, "os": "linux"},' \
            ' {"additional_disks(GB)": {}, "cpu": 4, "disk(GB)": 40.0, "mem(GB)": 4.0, "num_nodes": 4, "os": "linux"}]' \
            == str(custom_linux_2)
예제 #15
0
class TestClusterSpec(Test):
    @cluster(cluster_spec=ClusterSpec.simple_linux(2))
    def test_create_two_node_service(self):
        self.service = GenericService(self.test_context, 2)
        for node in self.service.nodes:
            node.account.ssh("echo hi")

    @cluster(cluster_spec=ClusterSpec.from_nodes(
        [
            NodeSpec(operating_system=WINDOWS),
            NodeSpec(operating_system=LINUX),
            NodeSpec()  # this one is also linux
        ]
    ))
    def three_nodes_test(self):
        self.service = GenericService(self.test_context, 3)
        for node in self.service.nodes:
            node.account.ssh("echo hi")
예제 #16
0
    def preallocated_nodes(self):
        if not self._preallocated_nodes:
            self._preallocated_nodes = self.test_context.cluster.alloc(
                ClusterSpec.simple_linux(self.node_prealloc_count))

            for node in self._preallocated_nodes:
                self.logger.debug(f'Allocated node {node.name}')

        return self._preallocated_nodes
예제 #17
0
 def set_up_kibosh(self, num_nodes):
     self.nodes = self.test_context.cluster.alloc(ClusterSpec.simple_linux(num_nodes))
     for node in self.nodes:
         node.account.logger = self.logger
         node.account.ssh("mkdir -p -- %s %s" % (KiboshTest.TARGET, KiboshTest.MIRROR))
     self.kibosh = KiboshService(self.test_context, self.nodes,
                                 KiboshTest.TARGET, KiboshTest.MIRROR)
     for node in self.nodes:
         node.account.logger = self.kibosh.logger
     self.kibosh.start()
예제 #18
0
 def setup_cluster_spec(num_nodes=None, cluster_spec=None):
     if num_nodes is None:
         if cluster_spec is None:
             raise RuntimeError("You must set either num_nodes or cluster_spec.")
         else:
             return cluster_spec
     else:
         if cluster_spec is not None:
             raise RuntimeError("You must set only one of (num_nodes, cluster_spec)")
         return ClusterSpec.simple_linux(num_nodes)
예제 #19
0
 def set_up_kibosh(self, num_nodes):
     self.nodes = self.test_context.cluster.alloc(
         ClusterSpec.simple_linux(num_nodes))
     for node in self.nodes:
         node.account.logger = self.logger
         node.account.ssh("mkdir -p -- %s %s" %
                          (KiboshTest.TARGET, KiboshTest.MIRROR))
     self.kibosh = KiboshService(self.test_context, self.nodes,
                                 KiboshTest.TARGET, KiboshTest.MIRROR)
     for node in self.nodes:
         node.account.logger = self.kibosh.logger
     self.kibosh.start()
    def check_basic_usage_cluster_spec(self):
        num_nodes = 200

        @cluster(cluster_spec=ClusterSpec.simple_linux(num_nodes))
        def function():
            return "hi"
        assert hasattr(function, "marks")

        test_context_list = MarkedFunctionExpander(function=function).expand()
        assert len(test_context_list) == 1
        assert len(test_context_list[0].expected_cluster_spec.nodes.os_to_nodes) == 1
        assert len(test_context_list[0].expected_cluster_spec.nodes.os_to_nodes.get('linux')) == num_nodes
    def check_service_constructor(self):
        """Check that BackgroundThreadService constructor corresponds to the base class's one."""
        exp_spec = ClusterSpec.simple_linux(10)
        service = BackgroundThreadService(self.context, cluster_spec=exp_spec)
        assert service.cluster_spec == exp_spec

        service = BackgroundThreadService(self.context, num_nodes=20)
        assert service.cluster_spec.size() == 20

        with pytest.raises(RuntimeError):
            BackgroundThreadService(self.context,
                                    num_nodes=20,
                                    cluster_spec=exp_spec)
예제 #22
0
 def __init__(self, test_context):
     si_settings = SISettings(
         log_segment_size=1024 * 1024,
         cloud_storage_segment_max_upload_interval_sec=5,
         cloud_storage_enable_remote_read=True,
         cloud_storage_enable_remote_write=True)
     self.scale = Scale(test_context)
     self._bucket = si_settings.cloud_storage_bucket
     super().__init__(test_context=test_context, si_settings=si_settings)
     self._ctx = test_context
     self._producer = None
     self._consumer = None
     self._verifier_node = test_context.cluster.alloc(
         ClusterSpec.simple_linux(1))[0]
     self.logger.info(f"Verifier node name: {self._verifier_node.name}")
예제 #23
0
    def check_basic_usage_cluster_spec(self):
        num_nodes = 200

        @cluster(cluster_spec=ClusterSpec.simple_linux(num_nodes))
        def function():
            return "hi"

        assert hasattr(function, "marks")

        test_context_list = MarkedFunctionExpander(function=function).expand()
        assert len(test_context_list) == 1
        assert len(
            test_context_list[0].expected_cluster_spec.nodes.os_to_nodes) == 1
        assert len(
            test_context_list[0].expected_cluster_spec.nodes.os_to_nodes.get(
                'linux')) == num_nodes
예제 #24
0
class BenchmarkTest(ProduceConsumeValidateTest):
    """
    A benchmark of Waltz producer/consumer performance.
    """
    MIN_CLUSTER_SPEC = ClusterSpec.from_list([
        {'cpu':1, 'mem':'1GB', 'disk':'25GB', 'additional_disks':{'/dev/sdb':'100GB'}, 'num_nodes':3},
        {'cpu':1, 'mem':'3GB', 'disk':'15GB', 'num_nodes':2},
        {'cpu':1, 'mem':'1GB', 'disk':'25GB', 'num_nodes':1}])

    def __init__(self, test_context):
        super(BenchmarkTest, self).__init__(test_context=test_context)

    @cluster(cluster_spec=MIN_CLUSTER_SPEC)
    @parametrize(txn_size=512, txn_per_thread=1000, num_thread=100, interval=10, lock_pool_size=0, num_active_partitions=1, timeout=360)
    @parametrize(txn_size=512, txn_per_thread=1000, num_thread=100, interval=20, lock_pool_size=0, num_active_partitions=1, timeout=360)
    @parametrize(txn_size=512, txn_per_thread=2000, num_thread=50, interval=10, lock_pool_size=0, num_active_partitions=1, timeout=360)
    @parametrize(txn_size=1024, txn_per_thread=1000, num_thread=100, interval=10, lock_pool_size=0, num_active_partitions=1, timeout=360)
    @parametrize(txn_size=512, txn_per_thread=100, num_thread=100, interval=10, lock_pool_size=64, num_active_partitions=1, timeout=360)
    @parametrize(txn_size=512, txn_per_thread=100, num_thread=100, interval=10, lock_pool_size=128, num_active_partitions=1, timeout=360)
    @parametrize(txn_size=512, txn_per_thread=100, num_thread=100, interval=10, lock_pool_size=128, num_active_partitions=2, timeout=360)
    def test_producer_performance(self, txn_size, txn_per_thread, num_thread, interval, lock_pool_size, num_active_partitions, timeout):
        test_cmd = self.performance_cli.producer_test_cmd(self.log_file_path, txn_size, txn_per_thread, num_thread,
                                                          interval, lock_pool_size, num_active_partitions)
        test_output = self.run_produce_consume_validate(lambda: self.simple_validation_func(test_cmd, timeout))
        self.print_producer_performance(test_output)

    @cluster(cluster_spec=MIN_CLUSTER_SPEC)
    @parametrize(txn_size=512, num_txn=100000, num_active_partitions=1, timeout=360)
    @parametrize(txn_size=512, num_txn=100000, num_active_partitions=4, timeout=360)
    @parametrize(txn_size=1024, num_txn=100000, num_active_partitions=1, timeout=360)
    def test_consumer_performance(self, txn_size, num_txn, num_active_partitions, timeout):
        test_cmd = self.performance_cli.consumer_test_cmd(self.log_file_path, txn_size, num_txn, num_active_partitions)
        test_output = self.run_produce_consume_validate(lambda: self.simple_validation_func(test_cmd, timeout))
        self.print_consumer_performance(test_output)

    def print_producer_performance(self, test_output):
        performance = search(".*transactions(.|\n)*MilliSec\/Transaction.*", test_output).group(0)
        print("\n####################### PRODUCER PERFORMANCE REPORT #######################\n" + \
              "\n{performance}\n".format(performance=performance) + \
              "\n###########################################################################\n")

    def print_consumer_performance(self, test_output):
        performance = search(".*transactions(.|\n)*MB/sec.*", test_output).group(0)
        print("\n####################### CONSUMER PERFORMANCE REPORT #######################\n" + \
              "\n{performance}\n".format(performance=performance) + \
              "\n###########################################################################\n")
예제 #25
0
    def get_storage_service(self, num_nodes=None):
        """
        Return a Waltz Storage service that uses config.ini for configuration.
        Optional arguments can be pass to override default settings.
        """
        num_nodes = num_nodes or int(self.storage_cfg['NumNodes'])
        cpu = int(self.storage_cfg['NumCpuCores'])
        mem = self.storage_cfg['MemSize']
        disk = self.storage_cfg['DiskSize']
        additional_disks = json.loads(self.storage_cfg['AdditionalDisks'])
        cluster_spec = ClusterSpec.from_dict({
            'cpu':
            cpu,
            'mem':
            mem,
            'disk':
            disk,
            'num_nodes':
            num_nodes,
            'additional_disks':
            additional_disks
        })
        zk = self.zk_cfg['ZkUrl']
        cluster_root = self.zk_cfg['ClusterRoot']
        port = int(self.storage_cfg['Port'])
        admin_port = int(self.storage_cfg['AdminPort'])
        jetty_port = int(self.storage_cfg['JettyPort'])
        lib_dir = self.storage_cfg['LibDir']
        data_dir = self.storage_cfg['DataDir']
        config_file_dir = self.storage_cfg['ConfigFileDir']
        ssl_configs = {
            "ssl_keystore_loc": self.storage_cfg['SslKeystoreLoc'],
            "ssl_keystore_pwd": self.storage_cfg['SslKeystorePwd'],
            "ssl_truststore_loc": self.storage_cfg['SslTruststoreLoc'],
            "ssl_truststore_pwd": self.storage_cfg['SslTruststorePwd']
        }

        return WaltzStorageService(self.test_context, cluster_spec, zk,
                                   cluster_root, port, admin_port, jetty_port,
                                   lib_dir, data_dir, config_file_dir,
                                   ssl_configs)
예제 #26
0
    def min_cluster_spec(self):
        """
        Returns a specification for the minimal cluster we need to run this test.

        This method replaces the deprecated min_cluster_size.  Unlike min_cluster_size, it can handle
        non-Linux operating systems.

        In general, most Tests don't need to override this method.  The default implementation
        seen here works well in most cases.  However, the default implementation only takes into account
        the services that exist at the time of the call.  You may need to override this method if you add
        new services during the course of your test.

        :return:            A ClusterSpec object.
        """
        try:
            # If the Test overrode the deprecated min_cluster_size method, we will use that.
            num_linux_nodes = self.min_cluster_size()
            return ClusterSpec.simple_linux(num_linux_nodes)
        except NotImplementedError:
            # Otherwise, ask the service registry what kind of cluster spec we need for currently
            # extant services.
            return self.test_context.services.min_cluster_spec()
예제 #27
0
    def min_cluster_spec(self):
        """
        Returns a specification for the minimal cluster we need to run this test.

        This method replaces the deprecated min_cluster_size.  Unlike min_cluster_size, it can handle
        non-Linux operating systems.

        In general, most Tests don't need to override this method.  The default implementation
        seen here works well in most cases.  However, the default implementation only takes into account
        the services that exist at the time of the call.  You may need to override this method if you add
        new services during the course of your test.

        :return:            A ClusterSpec object.
        """
        try:
            # If the Test overrode the deprecated min_cluster_size method, we will use that.
            num_linux_nodes = self.min_cluster_size()
            return ClusterSpec.simple_linux(num_linux_nodes)
        except NotImplementedError:
            # Otherwise, ask the service registry what kind of cluster spec we need for currently
            # extant services.
            return self.test_context.services.min_cluster_spec()
예제 #28
0
 def __init__(self, test_context, *args, **kwargs):
     test_name = test_context.test_name
     si_params = self.test_defaults.get(
         test_name) or self.test_defaults.get('default')
     si_settings = SISettings(**si_params)
     self._segment_size = si_params['log_segment_size']
     extra_rp_conf = {
         'disable_metrics': True,
         'election_timeout_ms': 5000,
         'raft_heartbeat_interval_ms': 500,
         'segment_fallocation_step': 0x1000,
         'retention_bytes': self._segment_size,
     }
     super().__init__(test_context,
                      num_brokers=3,
                      extra_rp_conf=extra_rp_conf,
                      si_settings=si_settings)
     self._ctx = test_context
     self._verifier_node = test_context.cluster.alloc(
         ClusterSpec.simple_linux(1))[0]
     self.logger.info(
         f"Verifier node name: {self._verifier_node.name}, segment_size: {self._segment_size}"
     )
예제 #29
0
파일: waltz_test.py 프로젝트: wrp/waltz
    def get_storage_service(self,
                            cluster_key,
                            cluster_num_partitions=None,
                            num_nodes=None):
        """
        Return a Waltz Storage service that uses config.ini for configuration.
        Optional arguments can be pass to override default settings.
        """
        cluster_num_partitions = cluster_num_partitions or int(
            self.zk_cfg['ClusterNumPartitions'])
        num_nodes = num_nodes or int(self.storage_cfg['NumNodes'])
        node_type = self.storage_cfg['NodeType']
        cluster_spec = ClusterSpec.from_dict({node_type: num_nodes})
        zk = self.zk_cfg['ZkUrl']
        cluster_root = self.zk_cfg['ClusterRoot']
        port = int(self.storage_cfg['Port'])
        admin_port = int(self.storage_cfg['AdminPort'])
        jetty_port = int(self.storage_cfg['JettyPort'])
        lib_dir = self.storage_cfg['LibDir']
        data_dir = self.storage_cfg['DataDir']
        config_file_dir = self.storage_cfg['ConfigFileDir']

        return WaltzStorageService(self.test_context, cluster_spec, zk, cluster_root, cluster_num_partitions, \
                                   cluster_key, port, admin_port, jetty_port, lib_dir, data_dir, config_file_dir)
예제 #30
0
 def check_to_string(self):
     empty = ClusterSpec.empty()
     assert "[]" == str(empty)
     simple_linux_5 = ClusterSpec.simple_linux(5)
     assert '[{"num_nodes": 5, "os": "linux"}]' == str(simple_linux_5)
예제 #31
0
 def used(self):
     return ClusterSpec.from_nodes(self._in_use_nodes)
예제 #32
0
 def available(self):
     return ClusterSpec.from_nodes(self._available_nodes)
예제 #33
0
 def check_cluster_spec_sizes(self):
     simple_linux_2 = ClusterSpec.simple_linux(2)
     assert 2 == len(simple_linux_2)
     assert 0 == len(ClusterSpec.empty())
예제 #34
0
class RecoveryTest(ProduceConsumeValidateTest):
    """
    Test Waltz recovery by running offline recovery with CLI tools,
    including recover dirty replicas, bring up offline replica, and
    so on.
    """
    MIN_CLUSTER_SPEC = ClusterSpec.from_list([{
        'cpu': 1,
        'mem': '1GB',
        'disk': '25GB',
        'additional_disks': {
            '/dev/sdb': '100GB'
        },
        'num_nodes': 3
    }, {
        'cpu': 1,
        'mem': '3GB',
        'disk': '15GB',
        'num_nodes': 2
    }, {
        'cpu': 1,
        'mem': '1GB',
        'disk': '25GB',
        'num_nodes': 1
    }])

    def __init__(self, test_context):
        super(RecoveryTest, self).__init__(test_context=test_context)

    @cluster(cluster_spec=MIN_CLUSTER_SPEC)
    @parametrize(num_active_partitions=1,
                 txn_per_client=250,
                 num_clients=1,
                 interval=100,
                 timeout=240)
    def test_recover_dirty_replica(self, num_active_partitions, txn_per_client,
                                   num_clients, interval, timeout):
        src_replica_idx = 0
        dst_replica_idx = 2
        self.run_produce_consume_validate(lambda: self.recover_dirty_replica(
            src_replica_idx, dst_replica_idx, num_active_partitions,
            txn_per_client, num_clients, interval, timeout))

    @cluster(cluster_spec=MIN_CLUSTER_SPEC)
    @parametrize(num_active_partitions=1,
                 txn_per_client=250,
                 num_clients=1,
                 interval=100,
                 timeout=240)
    def test_bring_replica_back_online(self, num_active_partitions,
                                       txn_per_client, num_clients, interval,
                                       timeout):
        offline_replica_idx = 0

        self.run_produce_consume_validate(
            lambda: self.bring_replica_back_online(
                offline_replica_idx, num_active_partitions, txn_per_client,
                num_clients, interval, timeout))

    def recover_dirty_replica(self, src_replica_idx, dst_replica_idx,
                              num_active_partitions, txn_per_client,
                              num_clients, interval, timeout):
        """
        A validate function to test offline recovery if a dirty replica.

        :param src_replica_idx: The index of source replica, where new replica recovers from
        :param dst_replica_idx: The index of destination replica
        :param num_active_partitions: Number of active partitions
        :param txn_per_client: Number of transactions per client
        :param num_clients: Number of total clients
        :param interval: Average interval(millisecond) between transactions
        :param timeout: Test timeout
        """
        port = self.waltz_storage.port
        admin_port = self.waltz_storage.admin_port
        src_node = self.waltz_storage.nodes[src_replica_idx]
        src_node_hostname = src_node.account.ssh_hostname
        src_storage = self.get_host(src_node_hostname, admin_port)
        dst_node = self.waltz_storage.nodes[dst_replica_idx]
        dst_node_hostname = dst_node.account.ssh_hostname
        dst_storage = self.get_host(dst_node_hostname, admin_port)
        partition = randrange(num_active_partitions)

        # Step 1: Submit transactions to all replicas.
        cmd = self.client_cli.validate_txn_cmd(num_active_partitions,
                                               txn_per_client, num_clients,
                                               interval)
        self.verifiable_client.start(cmd)
        wait_until(lambda: self.is_max_transaction_id_updated(
            src_storage, port, partition, -1),
                   timeout_sec=timeout)

        # Step 2: Mark destination replica offline for reads and writes
        self.storage_set_availability(storage=dst_storage,
                                      partition=partition,
                                      online=False)

        # Step 3: Trigger recovery to update source replicas' low watermark.
        self.trigger_recovery(bounce_node_idx=src_replica_idx)
        wait_until(lambda: self.is_triggered_recovery_completed(),
                   timeout_sec=timeout)
        src_node_local_low_watermark = self.get_storage_local_low_watermark(
            self.get_host(src_node_hostname, admin_port), partition)

        # Step 4: Run recovery operation on offline replica.
        # Source replica's partition low watermark will be used as target for recovery.
        self.storage_recover_partition(source_storage=src_storage,
                                       destination_storage=dst_storage,
                                       destination_storage_port=port,
                                       partition=partition,
                                       batch_size=20)

        # Step 5: Check if destination replica catch up with source replica.
        dst_node_max_transaction_id = self.get_storage_max_transaction_id(
            self.get_host(dst_node_hostname, admin_port), port, partition,
            True)
        assert src_node_local_low_watermark == dst_node_max_transaction_id, \
            "partition recovery failed on storage {}, expected max transaction ID = {}, actual max transaction ID = {}" \
            .format(dst_node_hostname, src_node_local_low_watermark, dst_node_max_transaction_id)

        # Step 6: Wait until validation complete.
        wait_until(
            lambda: self.verifiable_client.task_complete() == True,
            timeout_sec=timeout,
            err_msg="verifiable_client failed to complete task in %d seconds."
            % timeout)

    def bring_replica_back_online(self, offline_replica_idx,
                                  num_active_partitions, txn_per_client,
                                  num_clients, interval, timeout):
        """
        A validate function to test if a replica can successfully recover when brought back online.

        :param offline_replica_idx: The index of offline replica
        :param num_active_partitions: Number of active partitions
        :param txn_per_client: Number of transactions per client
        :param num_clients: Number of total clients
        :param interval: Average interval(millisecond) between transactions
        :param timeout: Test timeout
        """
        admin_port = self.waltz_storage.admin_port
        node = self.waltz_storage.nodes[offline_replica_idx]
        hostname = node.account.ssh_hostname
        partition = randrange(num_active_partitions)

        # Step 1: Produce a number of transactions.
        cmd = self.client_cli.validate_txn_cmd(num_active_partitions,
                                               txn_per_client, num_clients,
                                               interval)
        self.verifiable_client.start(cmd)

        # Step 2: Mark storage node 0 offline for reads and writes.
        storage = self.get_host(hostname, admin_port)
        self.storage_set_availability(storage=storage,
                                      partition=partition,
                                      online=False)
        storage_session_id_offline = self.get_storage_session_id(
            self.get_host(hostname, admin_port), partition)

        # Step 3: Mark storage node online. Wait until recovery is completed.
        self.storage_set_availability(storage=storage,
                                      partition=partition,
                                      online=True)
        wait_until(lambda: self.is_triggered_recovery_completed(),
                   timeout_sec=timeout)

        # Step 4: Check if storage node's session ID bumps up by 1.
        storage_session_id_online = self.get_storage_session_id(
            storage, partition)
        assert storage_session_id_online == storage_session_id_offline + 1, \
               "recovery failed to complete on storage {}, expected session ID = {}, actual session ID = {}" \
               .format(hostname, storage_session_id_offline + 1, storage_session_id_online)

        # Step 5: Wait until all transactions appended.
        wait_until(
            lambda: self.verifiable_client.task_complete() == True,
            timeout_sec=timeout,
            err_msg="verifiable_client failed to complete task in %d seconds."
            % timeout)
예제 #35
0
 def min_cluster_spec(self):
     """ This test uses many nodes, wow!"""
     return ClusterSpec.simple_linux(1000)
예제 #36
0
 def min_cluster_spec(self):
     """ This test uses many nodes, wow!"""
     return ClusterSpec.simple_linux(1000)
예제 #37
0
class TestClusterSpec(Test):
    @cluster(cluster_spec=ClusterSpec.simple_linux(2))
    def test_create_two_node_service(self):
        self.service = GenericService(self.test_context, 2)
        for node in self.service.nodes:
            node.account.ssh("echo hi")
예제 #38
0
 def set_up_trogdor(self, num_agent_nodes):
     self.agent_nodes = self.test_context.cluster.alloc(ClusterSpec.simple_linux(num_agent_nodes))
     self.trogdor = TrogdorService(context=self.test_context, agent_nodes=self.agent_nodes)
     for agent_node in self.agent_nodes:
         agent_node.account.logger = self.trogdor.logger
     self.trogdor.start()
예제 #39
0
class SmokeTest(ProduceConsumeValidateTest):
    """
    A class of torture tests that turns on a bunch of ZK, storage, server,
    and client nodes. Fire transactions while turning things off and on,
    to ensure Waltz can recover from expected failure.
    """
    MIN_CLUSTER_SPEC = ClusterSpec.from_list([{
        'cpu': 1,
        'mem': '1GB',
        'disk': '25GB',
        'additional_disks': {
            '/dev/sdb': '100GB'
        },
        'num_nodes': 3
    }, {
        'cpu': 1,
        'mem': '3GB',
        'disk': '15GB',
        'num_nodes': 2
    }, {
        'cpu': 1,
        'mem': '1GB',
        'disk': '25GB',
        'num_nodes': 1
    }])

    def __init__(self, test_context):
        super(SmokeTest, self).__init__(test_context=test_context)

    @cluster(cluster_spec=MIN_CLUSTER_SPEC)
    @parametrize(num_active_partitions=1,
                 txn_per_client=500,
                 num_clients=10,
                 interval=120,
                 timeout=240)
    @parametrize(num_active_partitions=4,
                 txn_per_client=500,
                 num_clients=10,
                 interval=120,
                 timeout=240)
    def test_produce_consume_no_torture(self, num_active_partitions,
                                        txn_per_client, num_clients, interval,
                                        timeout):
        validation_cmd = self.client_cli.validate_txn_cmd(
            num_active_partitions, txn_per_client, num_clients, interval)
        self.run_produce_consume_validate(
            lambda: self.simple_validation_func(validation_cmd, timeout))

    @cluster(cluster_spec=MIN_CLUSTER_SPEC)
    @parametrize(num_active_partitions=1,
                 txn_per_client=500,
                 num_clients=10,
                 interval=120,
                 timeout=480)
    @parametrize(num_active_partitions=4,
                 txn_per_client=500,
                 num_clients=10,
                 interval=120,
                 timeout=480)
    def test_produce_consume_while_bouncing_storage_nodes(
            self, num_active_partitions, txn_per_client, num_clients, interval,
            timeout):
        validation_cmd = self.client_cli.validate_txn_cmd(
            num_active_partitions, txn_per_client, num_clients, interval)
        validation_result = self.run_produce_consume_validate(
            lambda: self.simple_validation_func(validation_cmd, timeout),
            lambda: self._bounce_storage_nodes(3))
        assert "exception" not in validation_result.lower(
        ), "Test failed with exception:\n{}".format(validation_result)

    @cluster(cluster_spec=MIN_CLUSTER_SPEC)
    @parametrize(num_active_partitions=1,
                 txn_per_client=500,
                 num_clients=2,
                 interval=120,
                 timeout=240)
    @parametrize(num_active_partitions=4,
                 txn_per_client=500,
                 num_clients=2,
                 interval=120,
                 timeout=240)
    def test_produce_consume_while_killing_a_server_node(
            self, num_active_partitions, txn_per_client, num_clients, interval,
            timeout):
        validation_cmd = self.client_cli.validate_txn_cmd(
            num_active_partitions, txn_per_client, num_clients, interval)
        self.run_produce_consume_validate(
            lambda: self.simple_validation_func(validation_cmd, timeout),
            lambda: self._kill_a_server_node(num_active_partitions))

    def _bounce_storage_nodes(self, interval):
        storage_node_bounce_scheduler = NodeBounceScheduler(
            service=self.waltz_storage,
            interval=interval,
            stop_condition=lambda: self.verifiable_client.task_complete())
        storage_node_bounce_scheduler.start()

    def _kill_a_server_node(self, num_active_partitions):
        node_idx = self.get_server_node_idx(randrange(num_active_partitions))
        cmd_list = [{
            "action": NodeBounceScheduler.IDLE
        }, {
            "action": NodeBounceScheduler.STOP_A_NODE,
            "node": node_idx
        }]
        server_node_bounce_scheduler = NodeBounceScheduler(
            service=self.waltz_server,
            interval=3,
            stop_condition=lambda: self.verifiable_client.task_complete(),
            iterable_cmd_list=iter(cmd_list))
        server_node_bounce_scheduler.start()
예제 #40
0
 def available(self):
     return ClusterSpec.from_nodes(self._available_nodes)
예제 #41
0
 def used(self):
     return ClusterSpec.from_nodes(self._in_use_nodes)