Example #1
0
    def _run_stress(self, node, loader_idx, cpu_idx, keyspace_idx):  # pylint: disable=too-many-locals
        stress_cmd = self.create_stress_cmd(node, loader_idx, keyspace_idx)

        if self.profile:
            with open(self.profile, encoding="utf-8") as profile_file:
                LOGGER.info('Profile content:\n%s', profile_file.read())
            node.remoter.send_files(self.profile,
                                    os.path.join(
                                        '/tmp',
                                        os.path.basename(self.profile)),
                                    delete_dst=True)

        # Get next word after `cassandra-stress' in stress_cmd.
        # Do it this way because stress_cmd can contain env variables before `cassandra-stress'.
        stress_cmd_opt = stress_cmd.split("cassandra-stress",
                                          1)[1].split(None, 1)[0]

        LOGGER.info('Stress command:\n%s', stress_cmd)

        os.makedirs(node.logdir, exist_ok=True)
        log_file_name = \
            os.path.join(node.logdir, f'cassandra-stress-l{loader_idx}-c{cpu_idx}-k{keyspace_idx}-{uuid.uuid4()}.log')

        LOGGER.debug('cassandra-stress local log: %s', log_file_name)

        # This tag will be output in the header of c-stress result,
        # we parse it to know the loader & cpu info in _parse_cs_summary().
        tag = f'TAG: loader_idx:{loader_idx}-cpu_idx:{cpu_idx}-keyspace_idx:{keyspace_idx}'

        if self.stress_num > 1:
            node_cmd = f'STRESS_TEST_MARKER={self.shell_marker}; taskset -c {cpu_idx} {stress_cmd}'
        else:
            node_cmd = f'STRESS_TEST_MARKER={self.shell_marker}; {stress_cmd}'
        node_cmd = f'echo {tag}; {node_cmd}'

        result = None

        # disable logging for cassandra stress
        node.remoter.run("cp /etc/scylla/cassandra/logback-tools.xml .",
                         ignore_status=True)

        with CassandraStressExporter(instance_name=node.cql_ip_address,
                                     metrics=nemesis_metrics_obj(),
                                     stress_operation=stress_cmd_opt,
                                     stress_log_filename=log_file_name,
                                     loader_idx=loader_idx, cpu_idx=cpu_idx), \
                CassandraStressEventsPublisher(node=node, cs_log_filename=log_file_name) as publisher, \
                CassandraStressEvent(node=node, stress_cmd=self.stress_cmd,
                                     log_file_name=log_file_name) as cs_stress_event:
            publisher.event_id = cs_stress_event.event_id
            try:
                result = node.remoter.run(cmd=node_cmd,
                                          timeout=self.timeout,
                                          log_file=log_file_name)
            except Exception as exc:  # pylint: disable=broad-except
                cs_stress_event.severity = Severity.CRITICAL if self.stop_test_on_failure else Severity.ERROR
                cs_stress_event.add_error(
                    errors=[format_stress_cmd_error(exc)])

        return node, result, cs_stress_event
    def __init__(
            self,
            loader_set,
            stress_cmd,
            timeout,
            stress_num=1,
            node_list=None,
            round_robin=False,  # pylint: disable=too-many-arguments
            params=None):
        self.loader_set = loader_set
        self.stress_cmd = stress_cmd
        self.timeout = timeout
        self.stress_num = stress_num
        self.node_list = node_list if node_list else []
        self.round_robin = round_robin
        self.params = params if params else dict()

        self.executor = None
        self.results_futures = []
        self.max_workers = 0

        for operation in self.collectible_ops:
            gauge_name = self.gauge_name(operation)
            if gauge_name not in self.METRICS:
                metrics = nemesis_metrics_obj()
                self.METRICS[gauge_name] = metrics.create_gauge(
                    gauge_name, 'Gauge for ycsb metrics',
                    ['instance', 'loader_idx', 'type'])
    def _run_stress_bench(self, node, loader_idx, stress_cmd, node_list):
        if self.sb_mode == ScyllaBenchModes.WRITE and self.sb_workload == ScyllaBenchWorkloads.TIMESERIES:
            node.parent_cluster.sb_write_timeseries_ts = write_timestamp = time.time_ns(
            )
            LOGGER.debug("Set start-time: %s", write_timestamp)
            stress_cmd = re.sub(r"SET_WRITE_TIMESTAMP", f"{write_timestamp}",
                                stress_cmd)
            LOGGER.debug("Replaced stress command: %s", stress_cmd)

        elif self.sb_mode == ScyllaBenchModes.READ and self.sb_workload == ScyllaBenchWorkloads.TIMESERIES:
            write_timestamp = wait_for(
                lambda: node.parent_cluster.sb_write_timeseries_ts,
                step=5,
                timeout=30,
                text=
                'Waiting for "scylla-bench -workload=timeseries -mode=write" been started, to pick up timestamp'
            )
            LOGGER.debug("Found write timestamp %s", write_timestamp)
            stress_cmd = re.sub(r"GET_WRITE_TIMESTAMP", f"{write_timestamp}",
                                stress_cmd)
            LOGGER.debug("replaced stress command %s", stress_cmd)
        else:
            LOGGER.debug("Scylla bench command: %s", stress_cmd)

        os.makedirs(node.logdir, exist_ok=True)

        log_file_name = os.path.join(
            node.logdir, f'scylla-bench-l{loader_idx}-{uuid.uuid4()}.log')
        # Select first seed node to send the scylla-bench cmds
        ips = node_list[0].cql_ip_address

        with ScyllaBenchStressExporter(instance_name=node.cql_ip_address,
                                       metrics=nemesis_metrics_obj(),
                                       stress_operation=self.sb_mode,
                                       stress_log_filename=log_file_name,
                                       loader_idx=loader_idx), \
                ScyllaBenchStressEventsPublisher(node=node, sb_log_filename=log_file_name) as publisher, \
                ScyllaBenchEvent(node=node, stress_cmd=self.stress_cmd,
                                 log_file_name=log_file_name) as scylla_bench_event:
            publisher.event_id = scylla_bench_event.event_id
            result = None
            try:
                result = node.remoter.run(
                    cmd="/$HOME/go/bin/{name} -nodes {ips}".format(
                        name=stress_cmd.strip(), ips=ips),
                    timeout=self.timeout,
                    log_file=log_file_name)
            except Exception as exc:  # pylint: disable=broad-except
                errors_str = format_stress_cmd_error(exc)
                if "truncate: seastar::rpc::timeout_error" in errors_str:
                    scylla_bench_event.severity = Severity.ERROR
                elif self.stop_test_on_failure:
                    scylla_bench_event.severity = Severity.CRITICAL
                else:
                    scylla_bench_event.severity = Severity.ERROR

                scylla_bench_event.add_error([errors_str])

        return node, result
    def _run_stress(self, node, loader_idx, cpu_idx, keyspace_idx):
        stress_cmd = self.create_stress_cmd(node, loader_idx, keyspace_idx)

        if self.profile:
            with open(self.profile) as profile_file:
                LOGGER.info('Profile content:\n%s', profile_file.read())
            node.remoter.send_files(self.profile,
                                    os.path.join(
                                        '/tmp',
                                        os.path.basename(self.profile)),
                                    delete_dst=True)

        stress_cmd_opt = stress_cmd.split()[1]

        LOGGER.info('Stress command:\n%s', stress_cmd)

        log_dir = os.path.join(self.output_dir, self.loader_set.name)
        if not os.path.exists(log_dir):
            makedirs(log_dir)
        log_file_name = os.path.join(
            log_dir,
            f'cassandra-stress-l{loader_idx}-c{cpu_idx}-k{keyspace_idx}-{uuid.uuid4()}.log'
        )

        LOGGER.debug('cassandra-stress local log: %s', log_file_name)

        # This tag will be output in the header of c-stress result,
        # we parse it to know the loader & cpu info in _parse_cs_summary().
        tag = f'TAG: loader_idx:{loader_idx}-cpu_idx:{cpu_idx}-keyspace_idx:{keyspace_idx}'

        if self.stress_num > 1:
            node_cmd = f'STRESS_TEST_MARKER={self.shell_marker}; taskset -c {cpu_idx} {stress_cmd}'
        else:
            node_cmd = f'STRESS_TEST_MARKER={self.shell_marker}; {stress_cmd}'

        node_cmd = f'echo {tag}; {node_cmd}'

        CassandraStressEvent(type='start',
                             node=str(node),
                             stress_cmd=stress_cmd)

        with CassandraStressExporter(instance_name=node.ip_address,
                                     metrics=nemesis_metrics_obj(),
                                     cs_operation=stress_cmd_opt,
                                     cs_log_filename=log_file_name,
                                     loader_idx=loader_idx, cpu_idx=cpu_idx), \
                CassandraStressEventsPublisher(node=node, cs_log_filename=log_file_name):

            result = node.remoter.run(cmd=node_cmd,
                                      timeout=self.timeout,
                                      ignore_status=True,
                                      log_file=log_file_name)

        CassandraStressEvent(type='finish',
                             node=str(node),
                             stress_cmd=stress_cmd,
                             log_file_name=log_file_name)

        return node, result
Example #5
0
    def _run_stress(self, node, loader_idx, cpu_idx, keyspace_idx):  # pylint: disable=too-many-locals
        stress_cmd = self.create_stress_cmd(node, loader_idx, keyspace_idx)

        if self.profile:
            with open(self.profile) as profile_file:
                LOGGER.info('Profile content:\n%s', profile_file.read())
            node.remoter.send_files(self.profile,
                                    os.path.join(
                                        '/tmp',
                                        os.path.basename(self.profile)),
                                    delete_dst=True)

        stress_cmd_opt = stress_cmd.split()[1]

        LOGGER.info('Stress command:\n%s', stress_cmd)

        os.makedirs(node.logdir, exist_ok=True)
        log_file_name = \
            os.path.join(node.logdir, f'cassandra-stress-l{loader_idx}-c{cpu_idx}-k{keyspace_idx}-{uuid.uuid4()}.log')

        LOGGER.debug('cassandra-stress local log: %s', log_file_name)

        # This tag will be output in the header of c-stress result,
        # we parse it to know the loader & cpu info in _parse_cs_summary().
        tag = f'TAG: loader_idx:{loader_idx}-cpu_idx:{cpu_idx}-keyspace_idx:{keyspace_idx}'

        if self.stress_num > 1:
            node_cmd = f'STRESS_TEST_MARKER={self.shell_marker}; taskset -c {cpu_idx} {stress_cmd}'
        else:
            node_cmd = f'STRESS_TEST_MARKER={self.shell_marker}; {stress_cmd}'
        node_cmd = f'echo {tag}; {node_cmd}'

        result = None

        CassandraStressEvent.start(node=node, stress_cmd=stress_cmd).publish()
        with CassandraStressExporter(instance_name=node.ip_address,
                                     metrics=nemesis_metrics_obj(),
                                     stress_operation=stress_cmd_opt,
                                     stress_log_filename=log_file_name,
                                     loader_idx=loader_idx, cpu_idx=cpu_idx), \
                CassandraStressEventsPublisher(node=node, cs_log_filename=log_file_name):
            try:
                result = node.remoter.run(cmd=node_cmd,
                                          timeout=self.timeout,
                                          log_file=log_file_name)
            except Exception as exc:
                event_type = CassandraStressEvent.failure if self.stop_test_on_failure else CassandraStressEvent.error
                event_type(node=node,
                           stress_cmd=stress_cmd,
                           log_file_name=log_file_name,
                           errors=[
                               format_stress_cmd_error(exc),
                           ]).publish()
        CassandraStressEvent.finish(node=node,
                                    stress_cmd=stress_cmd,
                                    log_file_name=log_file_name).publish()

        return node, result
Example #6
0
    def _run_stress(self, node, loader_idx, cpu_idx, keyspace_idx):
        stress_cmd = self.create_stress_cmd(node, loader_idx, keyspace_idx)

        if self.profile:
            with open(self.profile) as fp:
                LOGGER.info('Profile content:\n%s' % fp.read())
            node.remoter.send_files(
                self.profile,
                os.path.join('/tmp', os.path.basename(self.profile)))

        stress_cmd_opt = stress_cmd.split()[1]

        LOGGER.info('Stress command:\n%s' % stress_cmd)

        log_dir = os.path.join(self.output_dir, self.loader_set.name)
        if not os.path.exists(log_dir):
            os.makedirs(log_dir)
        log_file_name = os.path.join(
            log_dir, 'cassandra-stress-l%s-c%s-k%s-%s.log' %
            (loader_idx, cpu_idx, keyspace_idx, uuid.uuid4()))

        LOGGER.debug('cassandra-stress local log: %s', log_file_name)

        # This tag will be output in the header of c-stress result,
        # we parse it to know the loader & cpu info in _parse_cs_summary().
        tag = 'TAG: loader_idx:%s-cpu_idx:%s-keyspace_idx:%s' % (
            loader_idx, cpu_idx, keyspace_idx)

        if self.stress_num > 1:
            node_cmd = 'taskset -c %s bash -c "%s"' % (cpu_idx, stress_cmd)
        else:
            node_cmd = stress_cmd

        node_cmd = 'echo %s; %s' % (tag, node_cmd)

        CassandraStressEvent(type='start',
                             node=str(node),
                             stress_cmd=stress_cmd)

        with CassandraStressExporter(instance_name=node.ip_address,
                                     metrics=nemesis_metrics_obj(),
                                     cs_operation=stress_cmd_opt,
                                     cs_log_filename=log_file_name,
                                     loader_idx=loader_idx, cpu_idx=cpu_idx), \
                CassandraStressEventsPublisher(node=node, cs_log_filename=log_file_name):

            result = node.remoter.run(cmd=node_cmd,
                                      timeout=self.timeout,
                                      ignore_status=True,
                                      log_file=log_file_name)

        CassandraStressEvent(type='finish',
                             node=str(node),
                             stress_cmd=stress_cmd,
                             log_file_name=log_file_name)

        return node, result
Example #7
0
    def run(self):
        events_gauge = nemesis_metrics_obj().create_gauge('sct_events_gauge',
                                                          'Gauge for sct events',
                                                          ['event_type', 'type', 'severity', 'node'])

        for event_type, message_data in EVENTS_PROCESSES['MainDevice'].subscribe_events(stop_event=self.stop_event):
            events_gauge.labels(event_type,
                                getattr(message_data, 'type', ''),
                                message_data.severity,
                                getattr(message_data, 'node', '')).set(message_data.timestamp)
Example #8
0
    def _run_stress_bench(self, node, loader_idx, stress_cmd, node_list):
        read_gap = 480  # reads starts after write, read can look before start read time to current time using several sstables
        stress_cmd = re.sub(r"SCT_TIME", f"{int(time.time()) - read_gap}",
                            stress_cmd)
        LOGGER.debug(f"replaced stress command {stress_cmd}")

        ScyllaBenchEvent.start(node=node, stress_cmd=stress_cmd).publish()
        os.makedirs(node.logdir, exist_ok=True)

        log_file_name = os.path.join(
            node.logdir, f'scylla-bench-l{loader_idx}-{uuid.uuid4()}.log')
        # Select first seed node to send the scylla-bench cmds
        ips = node_list[0].ip_address

        # Find stress mode:
        #    "scylla-bench -workload=sequential -mode=write -replication-factor=3 -partition-count=100"
        #    "scylla-bench -workload=uniform -mode=read -replication-factor=3 -partition-count=100"
        found = re.search(r"-mode=(.+?) ", stress_cmd)
        stress_cmd_opt = found.group(1)

        with ScyllaBenchStressExporter(instance_name=node.ip_address,
                                       metrics=nemesis_metrics_obj(),
                                       stress_operation=stress_cmd_opt,
                                       stress_log_filename=log_file_name,
                                       loader_idx=loader_idx), \
                ScyllaBenchStressEventsPublisher(node=node, sb_log_filename=log_file_name):
            result = None
            try:
                result = node.remoter.run(
                    cmd="/$HOME/go/bin/{name} -nodes {ips}".format(
                        name=stress_cmd.strip(), ips=ips),
                    timeout=self.timeout,
                    log_file=log_file_name)
            except Exception as exc:  # pylint: disable=broad-except
                errors_str = format_stress_cmd_error(exc)
                if "truncate: seastar::rpc::timeout_error" in errors_str:
                    event_type = ScyllaBenchEvent.timeout
                elif self.stop_test_on_failure:
                    event_type = ScyllaBenchEvent.failure
                else:
                    event_type = ScyllaBenchEvent.error
                event_type(
                    node=node,
                    stress_cmd=stress_cmd,
                    log_file_name=log_file_name,
                    errors=[
                        errors_str,
                    ],
                ).publish()
            else:
                ScyllaBenchEvent.finish(node=node,
                                        stress_cmd=stress_cmd,
                                        log_file_name=log_file_name).publish()

        return node, result
 def __init__(self, loader_node, loader_idx, ycsb_log_filename):
     super().__init__()
     self.loader_node = loader_node
     self.loader_idx = loader_idx
     self.ycsb_log_filename = ycsb_log_filename
     self.uuid = generate_random_string(10)
     for operation in self.collectible_ops:
         gauge_name = self.gauge_name(operation)
         if gauge_name not in self.METRICS:
             metrics = nemesis_metrics_obj()
             self.METRICS[gauge_name] = metrics.create_gauge(
                 gauge_name, 'Gauge for ycsb metrics',
                 ['instance', 'loader_idx', 'uuid', 'type'])
Example #10
0
    def __init__(self, loader_node, loader_idx, ndbench_log_filename):
        super().__init__()
        self.loader_node = loader_node
        self.loader_idx = loader_idx
        self.ndbench_log_filename = ndbench_log_filename

        for operation in self.collectible_ops:
            gauge_name = self.gauge_name(operation)
            if gauge_name not in self.METRICS:
                metrics = nemesis_metrics_obj()
                self.METRICS[gauge_name] = metrics.create_gauge(
                    gauge_name, 'Gauge for ndbench metrics',
                    ['instance', 'loader_idx', 'type'])
    def run(self) -> None:
        events_gauge = \
            nemesis_metrics_obj().create_gauge("sct_events_gauge",
                                               "Gauge for SCT events",
                                               ["event_type", "type", "subtype", "severity", "node", ])

        for event_tuple in self.inbound_events():
            with verbose_suppress("PrometheusDumper failed to process %s",
                                  event_tuple):
                event_class, event = event_tuple  # try to unpack event from EventsDevice
                events_gauge.labels(
                    event_class,  # pylint: disable=no-member
                    getattr(event, "type", ""),
                    getattr(event, "subtype", ""),
                    event.severity,
                    getattr(event, "node", "")).set(event.timestamp)
    def _run_stress_harry(self, node, loader_idx, stress_cmd, node_list):

        CassandraHarryEvent.start(node=node, stress_cmd=stress_cmd).publish()
        os.makedirs(node.logdir, exist_ok=True)

        log_file_name = os.path.join(
            node.logdir, f'cassandra-harry-l{loader_idx}-{uuid.uuid4()}.log')
        # Select first seed node to send the scylla-harry cmds
        ip = node_list[0].private_ip_address

        with CassandraHarryStressExporter(instance_name=node.ip_address,
                                          metrics=nemesis_metrics_obj(),
                                          stress_operation='write',
                                          stress_log_filename=log_file_name,
                                          loader_idx=loader_idx), \
                CassandraHarryStressEventsPublisher(node=node, harry_log_filename=log_file_name):
            result = None
            try:
                result = node.remoter.run(cmd=f"{stress_cmd} -node {ip}",
                                          timeout=self.timeout,
                                          log_file=log_file_name)
            except Exception as exc:  # pylint: disable=broad-except
                errors_str = format_stress_cmd_error(exc)
                if "timeout" in errors_str:
                    event_type = CassandraHarryEvent.timeout
                elif self.stop_test_on_failure:
                    event_type = CassandraHarryEvent.failure
                else:
                    event_type = CassandraHarryEvent.error
                event_type(
                    node=node,
                    stress_cmd=stress_cmd,
                    log_file_name=log_file_name,
                    errors=[
                        errors_str,
                    ],
                ).publish()
            else:
                CassandraHarryEvent.finish(
                    node=node,
                    stress_cmd=stress_cmd,
                    log_file_name=log_file_name).publish()

        return node, result
Example #13
0
 def setUpClass(cls):
     cls.prom_address = start_metrics_server()
     cls.metrics = nemesis_metrics_obj()
Example #14
0
 def __init__(self, *args, **kwargs):
     super(GrowClusterTest, self).__init__(*args, **kwargs)
     self._cluster_starting_size = self.params.get('n_db_nodes')
     self._cluster_target_size = self.params.get('cluster_target_size')
     self.metrics_srv = prometheus.nemesis_metrics_obj()
 def __init__(self, *args, **kwargs):
     super(GrowClusterTest, self).__init__(*args, **kwargs)
     self._cluster_starting_size = self.params.get('n_db_nodes', default=3)
     self._cluster_target_size = self.params.get('cluster_target_size', default=5)
     self.metrics_srv = prometheus.nemesis_metrics_obj()