def test_read_throughput_vs_latency_cache_only(self): # pylint: disable=invalid-name """ Test when one user run load with high latency and another - with high througput The load is run on the data set that fully exists in the cache Troughput - latency test: - Add SLA and grant to user (before any load) - user190 with 190 shares - user950 qith 950 shares - Each user runs load from own loader (round robin): - user950 runs load with throttle - user190 runs load with high throughput Expected results: latency 99th of user950 workload when it runs in parallel with workload of user190 is not significant increased relatively to latency of runed alone user950 workload """ stress_duration = 5 # minutes shares = [190, 950] # Select part of the record to warm the cache (all this data will be in the cache). # This amount of data will be read during the test from cache max_key_for_read = int(self.num_of_partitions*0.5) read_users = [] session = self.prepare_schema() self.create_test_data() # Warm up the cache to guarantee the read will be from disk self.warm_up_cache_before_test(max_key_for_read=max_key_for_read, stress_duration=30) # Define Service Levels/Roles/Users for share in shares: read_users.append({'user': User(session=session, name='user%d' % share, password='******' % share), 'role': Role(session=session, name='role%d' % share), 'service_level': ServiceLevel(session=session, name='sla%d' % share, service_shares=share)}) # Create Service Levels/Roles/Users self.create_auths(entities_list_of_dict=read_users) read_cmds = {'troughput': self.define_read_cassandra_stress_command(user=read_users[0], load_type=self.CACHE_ONLY_LOAD, workload_type=self.WORKLOAD_THROUGHPUT, threads=200, stress_duration_min=stress_duration, max_rows_for_read=max_key_for_read), 'latency': self.define_read_cassandra_stress_command(user=read_users[1], load_type=self.CACHE_ONLY_LOAD, workload_type=self.WORKLOAD_LATENCY, threads=250, stress_duration_min=stress_duration, max_rows_for_read=max_key_for_read) } self._throughput_latency_tests_run(read_users=read_users, read_cmds=read_cmds, latency_user=read_users[1])
def _two_users_load_througput_workload(self, shares, load): session = self.prepare_schema() self.create_test_data() # Define Service Levels/Roles/Users read_users = [] for share in shares: read_users.append({'user': User(session=session, name='user%d' % share, password='******' % share), 'role': Role(session=session, name='role%d' % share), 'service_level': ServiceLevel(session=session, name='sla%d' % share, service_shares=share)}) expected_shares_ratio = self.calculate_metrics_ratio_per_user(two_users_list=read_users) # Create Service Levels/Roles/Users self.create_auths(entities_list_of_dict=read_users) stress_duration = 10 # minutes read_cmds = [self.define_read_cassandra_stress_command(user=read_users[0], load_type=load, workload_type=self.WORKLOAD_THROUGHPUT, threads=250, stress_duration_min=stress_duration), self.define_read_cassandra_stress_command(user=read_users[1], load_type=load, workload_type=self.WORKLOAD_THROUGHPUT, threads=250, stress_duration_min=stress_duration) ] try: start_time = time.time() read_queue = self.run_stress_and_verify_threads(params={'stress_cmd': read_cmds, 'round_robin': True}) results = self.get_c_s_stats(read_queue=read_queue, users=read_users, statistic_name='op rate') self.validate_if_scylla_load_high_enough(start_time=start_time, wait_cpu_utilization=self.MIN_CPU_UTILIZATION) end_time = time.time() self.validate_scheduler_runtime(start_time=start_time, end_time=end_time, read_users=read_users, expected_ratio=expected_shares_ratio) self.assertTrue(results, msg='Not received cassandra-stress results') self.log.debug('Validate cassandra-stress ops deviation') actual_shares_ratio = self.calculate_metrics_ratio_per_user(two_users_list=read_users, metrics=results) self.validate_deviation(expected_ratio=expected_shares_ratio, actual_ratio=actual_shares_ratio, msg='Validate cassandra-stress ops.') finally: self.clean_auth(entities_list_of_dict=read_users)
def test_read_throughput_vs_latency_cache_and_disk(self): # pylint: disable=invalid-name """ Test when one user run load with high latency and another - with high througput The load is run on the full data set (that is read from both the cache and the disk) Troughput - latency test: - Add SLA and grant to user (before any load) - user190 with 190 shares - user950 qith 950 shares - Each user runs load from own loader (round robin): - user950 runs load with throttle - user190 runs load with high throughput Expected results: latency 99th of user950 workload when it runs in parallel with workload of user190 is not significant increased relatively to latency of runed alone user950 workload """ stress_duration = 10 # minutes shares = [190, 950] read_users = [] session = self.prepare_schema() self.create_test_data() # Define Service Levels/Roles/Users for share in shares: read_users.append({'user': User(session=session, name='user%d' % share, password='******' % share), 'role': Role(session=session, name='role%d' % share), 'service_level': ServiceLevel(session=session, name='sla%d' % share, service_shares=share)}) # Create Service Levels/Roles/Users self.create_auths(entities_list_of_dict=read_users) # Define stress commands read_cmds = {'troughput': self.define_read_cassandra_stress_command(user=read_users[0], load_type=self.MIXED_LOAD, workload_type=self.WORKLOAD_THROUGHPUT, threads=200, stress_duration_min=stress_duration), 'latency': self.define_read_cassandra_stress_command(user=read_users[1], load_type=self.MIXED_LOAD, workload_type=self.WORKLOAD_LATENCY, threads=250, stress_duration_min=stress_duration) } self._throughput_latency_tests_run(read_users=read_users, read_cmds=read_cmds, latency_user=read_users[1])
def test_read_50perc_write_50perc_load(self): """ Test scenario: - Add SLA and grant to user (before any load) - user190 with 190 shares - user950 qith 950 shares - Each user runs load from own loader (round robin) - Expect OPS ratio between two loads is 1:5 (e.g. 190:950) - Expect scheduler run time between two loads is 1:5 (e.g. 190:950) """ session = self.prepare_schema() self.create_test_data() stress_duration_min = 10 # Define Service Levels/Roles/Users shares = [190, 950] read_users = [] for share in shares: read_users.append({'user': User(session=session, name='user%d' % share, password='******' % share), 'role': Role(session=session, name='role%d' % share), 'service_level': ServiceLevel(session=session, name='sla%d' % share, service_shares=share)}) # Create Service Levels/Roles/Users self.create_auths(entities_list_of_dict=read_users) read_cmds = {'troughput': self.define_read_cassandra_stress_command(user=read_users[0], load_type=self.MIXED_LOAD, workload_type=self.WORKLOAD_THROUGHPUT, threads=120, stress_duration_min=stress_duration_min, stress_command=self.STRESS_MIXED_CMD, kwargs={'write_ratio': 1, 'read_ratio': 1}), 'latency': self.define_read_cassandra_stress_command(user=read_users[1], load_type=self.MIXED_LOAD, workload_type=self.WORKLOAD_LATENCY, threads=120, stress_duration_min=stress_duration_min, stress_command=self.STRESS_MIXED_CMD, kwargs={'write_ratio': 1, 'read_ratio': 1}) } self._throughput_latency_tests_run(read_users=read_users, read_cmds=read_cmds, latency_user=read_users[1])
def test_read_throughput_vs_latency_disk_only(self): # pylint: disable=invalid-name """ Test when one user run load with high latency and another - with high througput The load is run on the data set that fully exists in the cache Throughput - latency test: - Add SLA and grant to user (before any load) - user190 with 190 shares - user950 qith 950 shares - Each user runs load from own loader (round robin): - user950 runs load with throttle - user190 runs load with high throughput Expected results: latency 99th of user950 workload when it runs in parallel with workload of user190 is not significant increased relatively to latency of runed alone user950 workload """ stress_duration = 5 # minutes session = self.prepare_schema() self.create_test_data_and_wait_no_compaction() for node in self.db_cluster.nodes: node.stop_scylla_server(verify_up=False, verify_down=True) node.start_scylla_server(verify_up=True, verify_down=False) # Select part of the record to warm the cache (all this data will be in the cache). # cassandra-stress "-pop" parameter will start from more then "max_key_for_cache" row number # (for read from the disk) max_key_for_cache = int(self.num_of_partitions * 0.25) # Warm up the cache to guarantee the read will be from disk self.warm_up_cache_before_test(max_key_for_read=max_key_for_cache, stress_duration=30) # Define Service Levels/Roles/Users shares = [190, 950] read_users = [] for share in shares: read_users.append({ 'user': User(session=session, name='user%d' % share, password='******' % share), 'role': Role(session=session, name='role%d' % share), 'service_level': ServiceLevel(session=session, name='sla%d' % share, service_shares=share) }) # Create Service Levels/Roles/Users self.create_auths(entities_list_of_dict=read_users) read_cmds = { 'throughput': self.define_read_cassandra_stress_command( user=read_users[0], load_type=self.DISK_ONLY_LOAD, workload_type=self.WORKLOAD_THROUGHPUT, threads=200, stress_duration_min=stress_duration, max_rows_for_read=max_key_for_cache * 2), 'latency': self.define_read_cassandra_stress_command( user=read_users[1], load_type=self.DISK_ONLY_LOAD, workload_type=self.WORKLOAD_LATENCY, threads=250, stress_duration_min=stress_duration, max_rows_for_read=max_key_for_cache * 3), 'latency_only': self.define_read_cassandra_stress_command( user=read_users[1], load_type=self.DISK_ONLY_LOAD, workload_type=self.WORKLOAD_LATENCY, threads=250, stress_duration_min=stress_duration, max_rows_for_read=max_key_for_cache) } # TODO: improvement_expected number and calculation of actual improvement was set by Eliran for chache only # TODO: test. Should be adjusted for this test improvement_expected = 1.8 self._throughput_latency_tests_run( read_users=read_users, read_cmds=read_cmds, latency_user=read_users[1], improvement_expected=improvement_expected)
def test_workload_types(self): """ Test scenario: run 2 workload types (batch, interactive) using Roles with relevant ServiceLevel objects attached to them. Validate that the metrics differ and that the difference is within the expected margins. """ session = self.prepare_schema() self.create_test_data(rows_amount=100_000) stress_duration_min = 180 # Define Service Levels/Roles/Users interactive_role = Role(session=session, name="interactive", password="******", login=True, verbose=True).create() batch_role = Role(session=session, name="batch", password="******", login=True, verbose=True).create() interactive_sla = ServiceLevel(session=session, name="interactive", shares=None, workload_type="interactive").create() batch_sla = ServiceLevel(session=session, name="batch", shares=None, workload_type="batch").create() interactive_role.attach_service_level(interactive_sla) batch_role.attach_service_level(batch_sla) read_cmds = { 'throughput_interactive': self.define_read_cassandra_stress_command( role=interactive_role, load_type=self.MIXED_LOAD, c_s_workload_type=self.WORKLOAD_THROUGHPUT, threads=120, stress_duration_min=stress_duration_min, stress_command=self.STRESS_MIXED_CMD, kwargs={ 'write_ratio': 1, 'read_ratio': 1 }), 'throughput_batch': self.define_read_cassandra_stress_command( role=batch_role, load_type=self.MIXED_LOAD, c_s_workload_type=self.WORKLOAD_THROUGHPUT, threads=120, stress_duration_min=stress_duration_min, stress_command=self.STRESS_MIXED_CMD, kwargs={ 'write_ratio': 1, 'read_ratio': 1 }), } try: self.log.debug( 'Running interactive and batch workloads in sequence...') workloads_queue = self.run_stress_and_verify_threads( params={ 'stress_cmd': [ read_cmds['throughput_interactive'], read_cmds["throughput_batch"], ], 'round_robin': True }) self._comparison_results = self._compare_workloads_c_s_metrics( workloads_queue) self.log.info("C-S comparison results:\n%s", self._comparison_results) self.upload_c_s_comparison_to_es() finally: pass