def prepare_iostat(self, clusters, test): # If tests are run locally, no paths are defined, hence # use the paths that are set by the server itself. Get # those paths via ther REST API rest = None if test.cluster_spec.paths: data_path, index_path = test.cluster_spec.paths else: rest = RestHelper(test.cluster_spec) for cluster in clusters: settings = copy(self.settings) settings.cluster = cluster settings.master_node = self.clusters[cluster] if rest is not None: data_path, index_path = rest.get_data_path( settings.master_node) partitions = {'data': data_path} if hasattr(test, 'ddocs'): # all instances of IndexTest have it partitions['index'] = index_path settings.partitions = partitions io_collector = IO(settings) self.collectors.append(io_collector)
def __init__(self, cluster_spec, test_config, verbose, experiment=None): self.cluster_spec = cluster_spec self.test_config = test_config self.target_iterator = TargetIterator(cluster_spec, test_config) self.memcached = MemcachedHelper(test_config) self.monitor = Monitor(cluster_spec) self.rest = RestHelper(cluster_spec) self.remote = RemoteHelper(cluster_spec, test_config, verbose) if experiment: self.experiment = ExperimentHelper(experiment, cluster_spec, test_config) self.master_node = cluster_spec.yield_masters().next() if self.remote.gateways: self.build = SyncGatewayRequestHelper().get_version( self.remote.gateways[0] ) else: self.build = self.rest.get_version(self.master_node) self.cbagent = CbAgent(self) self.metric_helper = MetricHelper(self) self.reporter = Reporter(self) self.reports = {} self.snapshots = [] self.master_events = [] if self.test_config.test_case.use_workers: self.worker_manager = WorkerManager(cluster_spec, test_config)
def __init__(self, cluster_spec: ClusterSpec, test_config: TestConfig, verbose: bool): self.cluster_spec = cluster_spec self.test_config = test_config self.target_iterator = TargetIterator(cluster_spec, test_config) self.cluster = ClusterManager(cluster_spec, test_config) self.memcached = MemcachedHelper(test_config) self.monitor = Monitor(cluster_spec, test_config, verbose) self.rest = RestHelper(cluster_spec) self.remote = RemoteHelper(cluster_spec, verbose) self.profiler = Profiler(cluster_spec, test_config) self.master_node = next(cluster_spec.masters) self.build = self.rest.get_version(self.master_node) self.metrics = MetricHelper(self) self.reporter = ShowFastReporter(cluster_spec, test_config, self.build) self.cbmonitor_snapshots = [] self.cbmonitor_clusters = [] if self.test_config.test_case.use_workers: self.worker_manager = WorkerManager(cluster_spec, test_config, verbose)
def __init__(self, cluster_spec: ClusterSpec, test_config: TestConfig): self.test_config = test_config self.rest = RestHelper(cluster_spec) self.master_node = next(cluster_spec.masters) self.ssh_username, self.ssh_password = cluster_spec.ssh_credentials
def fetch_maps(self): rest = RestHelper(self.cluster_spec) master_node = next(self.cluster_spec.masters) maps = {} for bucket in self.test_config.buckets: vbmap = rest.get_vbmap(master_node, bucket) server_list = rest.get_server_list(master_node, bucket) maps[bucket] = (vbmap, server_list) return maps
def fetch_maps(self): rest = RestHelper(self.cluster_spec) master_node = self.cluster_spec.yield_masters().next() maps = {} for bucket in self.test_config.buckets: vbmap = rest.get_vbmap(master_node, bucket) server_list = rest.get_server_list(master_node, bucket) maps[bucket] = (vbmap, server_list) return maps
def __init__(self, cluster_spec, test_config, options): self.test_config = test_config self.cluster_spec = cluster_spec self.client_settings = self.test_config.client_settings.__dict__ self.options = options self.remote = RemoteHelper(self.cluster_spec, options.verbose) self.client_os = RemoteHelper.detect_client_os( self.cluster_spec.workers[0], self.cluster_spec).lower() self.rest = RestHelper(self.cluster_spec, self.test_config, options.verbose) self.cb_version = version_tuple( self.rest.get_version(host=next(self.cluster_spec.masters)))
def __init__(self, cluster_spec: ClusterSpec, test_config: TestConfig, verbose: bool = False): self.cluster_spec = cluster_spec self.test_config = test_config self.rest = RestHelper(cluster_spec) self.remote = RemoteHelper(cluster_spec, verbose) self.monitor = Monitor(cluster_spec, test_config, verbose) self.memcached = MemcachedHelper(test_config) self.master_node = next(self.cluster_spec.masters) self.initial_nodes = test_config.cluster.initial_nodes
def __init__(self, cluster_spec: ClusterSpec, test_config: TestConfig): self.test_config = test_config self.rest = RestHelper(cluster_spec) self.master_node = next(cluster_spec.masters) self.ssh_username, self.ssh_password = cluster_spec.ssh_credentials self.cluster_spec = cluster_spec self.profiling_settings = test_config.profiling_settings self.linux_perf_path = '/opt/couchbase/var/lib/couchbase/logs/'
def __init__(self, cluster_spec: ClusterSpec, test_config: TestConfig, verbose: bool = False): self.cluster_spec = cluster_spec self.test_config = test_config self.dynamic_infra = self.cluster_spec.dynamic_infrastructure self.rest = RestHelper(cluster_spec) self.remote = RemoteHelper(cluster_spec, verbose) self.monitor = Monitor(cluster_spec, test_config, verbose) self.memcached = MemcachedHelper(test_config) self.master_node = next(self.cluster_spec.masters) self.initial_nodes = test_config.cluster.initial_nodes self.build = self.rest.get_version(self.master_node)
def __init__(self, cluster_spec: ClusterSpec, test_config: TestConfig, build: str): self.cluster_spec = cluster_spec self.test_config = test_config self.build = build + test_config.showfast.build_label self.master_node = next(self.cluster_spec.masters) self.rest = RestHelper(cluster_spec, test_config)
def __init__(self, cluster_spec, test_config, experiment=None): self.cluster_spec = cluster_spec self.test_config = test_config self.target_iterator = TargetIterator(self.cluster_spec, self.test_config) self.memcached = MemcachedHelper(cluster_spec) self.monitor = Monitor(cluster_spec) self.rest = RestHelper(cluster_spec) self.remote = RemoteHelper(cluster_spec) if experiment: self.experiment = ExperimentHelper(experiment, cluster_spec, test_config) self.master_node = cluster_spec.yield_masters().next() self.build = self.rest.get_version(self.master_node) self.cbagent = CbAgent(self) self.metric_helper = MetricHelper(self) self.reporter = Reporter(self) self.reports = {} self.snapshots = [] self.master_events = [] if self.test_config.test_case.use_workers: self.worker_manager = WorkerManager(cluster_spec, test_config)
def __init__(self, cluster_spec, test_config, verbose): self.cluster_spec = cluster_spec self.test_config = test_config self.rest = RestHelper(cluster_spec) self.remote = RemoteHelper(cluster_spec, test_config, verbose) self.monitor = Monitor(cluster_spec) self.memcached = MemcachedHelper(test_config) self.clusters = cluster_spec.yield_clusters() self.servers = cluster_spec.yield_servers self.masters = cluster_spec.yield_masters self.initial_nodes = test_config.cluster.initial_nodes self.mem_quota = test_config.cluster.mem_quota self.group_number = test_config.cluster.group_number or 1
def __init__(self, cluster_spec, test_config, verbose=False): self.cluster_spec = cluster_spec self.test_config = test_config self.rest = RestHelper(cluster_spec) self.remote = RemoteHelper(cluster_spec, test_config, verbose) self.monitor = Monitor(cluster_spec, test_config, verbose) self.memcached = MemcachedHelper(test_config) self.master_node = next(self.cluster_spec.masters) self.initial_nodes = test_config.cluster.initial_nodes self.mem_quota = test_config.cluster.mem_quota self.index_mem_quota = test_config.cluster.index_mem_quota self.fts_mem_quota = test_config.cluster.fts_index_mem_quota self.analytics_mem_quota = test_config.cluster.analytics_mem_quota
def main(): usage = '%prog -v version -c cluster-spec' parser = OptionParser(usage) parser.add_option('-v', '--version', dest='version') parser.add_option('-c', dest='cluster_spec_fname', help='path to cluster specification file', metavar='cluster.spec') parser.add_option('--verbose', dest='verbose', action='store_true', help='enable verbose logging') parser.add_option('-o', dest='toy', help='optional toy build ID', metavar='couchstore') parser.add_option('-t', dest='test_config_fname', help='path to test configuration file', metavar='my_test.test') parser.add_option('-e', '--edition', dest='cluster_edition', default='enterprise', help='the cluster edition (community or enterprise)') parser.add_option('--url', dest='url', default=None, help='The http URL to a Couchbase RPM that should be' ' installed. This overrides the URL to be installed.') options, args = parser.parse_args() cluster_spec = ClusterSpec() cluster_spec.parse(options.cluster_spec_fname) test_config = TestConfig() test_config.parse(options.test_config_fname) cm = ClusterManager(cluster_spec, test_config, options.verbose) installer = CouchbaseInstaller(cluster_spec, options) if True: installer.install() if cm.remote: cm.tune_logging() cm.restart_with_sfwi() cm.restart_with_alternative_num_vbuckets() cm.restart_with_alternative_num_cpus() cm.restart_with_tcmalloc_aggressive_decommit() cm.disable_moxi() cm.configure_internal_settings() cm.set_data_path() cm.set_services() cm.set_mem_quota() cm.set_index_mem_quota() cm.set_auth() time.sleep(30) """host = cluster_spec.yield_masters().next()""" host_ip = cluster_spec.yield_masters().next().split(':')[0] URL = 'http://' + host_ip + ':8093' logger.info('logging the URL: {}'.format(URL)) conn = urllib3.connection_from_url(URL) rest = RestHelper(cluster_spec) airline_result = do_airline_benchmarks(conn, rest, host_ip, installer.remote, cluster_spec) beer_result = do_beer_queries(conn, rest, host_ip, installer.remote) print 'beer_result is', beer_result sys.exit(not (airline_result and beer_result))
def __init__(self, cluster_spec: ClusterSpec, test_config: TestConfig, verbose: bool): self.cluster_spec = cluster_spec self.test_config = test_config self.memcached = MemcachedHelper(test_config) self.remote = RemoteHelper(cluster_spec, test_config, verbose) self.rest = RestHelper(cluster_spec) # self.build = os.environ.get('SGBUILD') or "0.0.0-000" self.master_node = next(cluster_spec.masters) self.build = self.rest.get_sgversion(self.master_node) self.metrics = MetricHelper(self) self.reporter = ShowFastReporter(cluster_spec, test_config, self.build) if self.test_config.test_case.use_workers: self.worker_manager = WorkerManager(cluster_spec, test_config, verbose) self.settings = self.test_config.access_settings self.settings.syncgateway_settings = self.test_config.syncgateway_settings self.profiler = Profiler(cluster_spec, test_config) self.cluster = ClusterManager(cluster_spec, test_config) self.target_iterator = TargetIterator(cluster_spec, test_config) self.monitor = Monitor(cluster_spec, test_config, verbose)
def __init__(self, *args, **kwargs): options, args = get_options() self.cluster_spec = ClusterSpec() self.cluster_spec.parse(options.cluster_spec_fname, args) self.test_config = TestConfig() self.test_config.parse(options.test_config_fname, args) self.target_iterator = TargetIterator(self.cluster_spec, self.test_config) self.memcached = MemcachedHelper(self.test_config) self.remote = RemoteHelper(self.cluster_spec, self.test_config) self.rest = RestHelper(self.cluster_spec) super(FunctionalTest, self).__init__(*args, **kwargs)
def __init__(self, *args, **kwargs): options, _args = get_options() override = \ _args and (arg.split('.') for arg in ' '.join(_args).split(',')) self.cluster_spec = ClusterSpec() self.cluster_spec.parse(options.cluster_spec_fname) self.test_config = TestConfig() self.test_config.parse(options.test_config_fname, override) self.target_iterator = TargetIterator(self.cluster_spec, self.test_config) self.memcached = MemcachedHelper(self.test_config) self.remote = RemoteHelper(self.cluster_spec, self.test_config) self.rest = RestHelper(self.cluster_spec) super(FunctionalTest, self).__init__(*args, **kwargs)
class Profiler: DEBUG_PORTS = { 'fts': 8094, 'index': 9102, 'goxdcr': 9998, 'kv': 9998, # will be deprecated in future 'n1ql': 8093, 'eventing': 8096, 'projector': 9999 } ENDPOINTS = { 'cpu': 'http://127.0.0.1:{}/debug/pprof/profile', 'heap': 'http://127.0.0.1:{}/debug/pprof/heap', 'goroutine': 'http://127.0.0.1:{}/debug/pprof/goroutine?debug=2', } def __init__(self, cluster_spec: ClusterSpec, test_config: TestConfig): self.test_config = test_config self.rest = RestHelper(cluster_spec) self.master_node = next(cluster_spec.masters) self.ssh_username, self.ssh_password = cluster_spec.ssh_credentials self.cluster_spec = cluster_spec self.profiling_settings = test_config.profiling_settings self.linux_perf_path = '/opt/couchbase/var/lib/couchbase/logs/' def new_tunnel(self, host: str, port: int) -> SSHTunnelForwarder: return SSHTunnelForwarder( ssh_address_or_host=host, ssh_username=self.ssh_username, ssh_password=self.ssh_password, remote_bind_address=('127.0.0.1', port), ) def save(self, host: str, service: str, profile: str, content: bytes): fname = '{}_{}_{}_{}.pprof'.format(host, service, profile, uhex()[:6]) logger.info('Collected {} '.format(fname)) with open(fname, 'wb') as fh: fh.write(content) def linux_perf_profile(self, host: str, fname: str, path: str): client = paramiko.SSHClient() client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) try: client.connect(hostname=host, username=self.ssh_username, password=self.ssh_password) except Exception: logger.info( 'Cannot connect to the "{}" via SSH Server'.format(host)) exit() logger.info('Capturing linux profile using linux perf record ') cmd = 'perf record -a -F {} -g --call-graph {} ' \ '-p $(pgrep memcached) -o {}{} ' \ '-- sleep {}'.format(self.profiling_settings.linux_perf_frequency, self.profiling_settings.linux_perf_callgraph, path, fname, self.profiling_settings.linux_perf_profile_duration) stdin, stdout, stderr = client.exec_command(cmd) exit_status = stdout.channel.recv_exit_status() if exit_status == 0: logger.info( "linux perf record: linux perf profile capture completed") else: logger.info("perf record failed , exit_status : ", exit_status) client.close() def profile(self, host: str, service: str, profile: str): logger.info('Collecting {} profile on {}'.format(profile, host)) endpoint = self.ENDPOINTS[profile] port = self.DEBUG_PORTS[service] if self.profiling_settings.linux_perf_profile_flag: logger.info('Collecting {} profile on {} using linux perf ' 'reccord'.format(profile, host)) fname = 'linux_{}_{}_{}_perf.data'.format(host, profile, uhex()[:4]) self.linux_perf_profile(host=host, fname=fname, path=self.linux_perf_path) else: logger.info('Collecting {} profile on {}'.format(profile, host)) with self.new_tunnel(host, port) as tunnel: url = endpoint.format(tunnel.local_bind_port) response = requests.get(url=url, auth=self.rest.auth) self.save(host, service, profile, response.content) def timer(self, **kwargs): timer = Timer( function=self.profile, interval=self.test_config.profiling_settings.interval, num_runs=self.test_config.profiling_settings.num_profiles, kwargs=kwargs, ) timer.start() def schedule(self): for service in self.test_config.profiling_settings.services: logger.info( 'Scheduling profiling of "{}" services'.format(service)) if service == 'projector': active_nodes_by_role = self.rest.get_active_nodes_by_role( self.master_node, role='kv') else: active_nodes_by_role = self.rest.get_active_nodes_by_role( self.master_node, role=service) for server in active_nodes_by_role: for profile in self.test_config.profiling_settings.profiles: self.timer(host=server, service=service, profile=profile)
class ClusterManager: def __init__(self, cluster_spec: ClusterSpec, test_config: TestConfig, verbose: bool = False): self.cluster_spec = cluster_spec self.test_config = test_config self.rest = RestHelper(cluster_spec) self.remote = RemoteHelper(cluster_spec, verbose) self.monitor = Monitor(cluster_spec, test_config, verbose) self.memcached = MemcachedHelper(test_config) self.master_node = next(self.cluster_spec.masters) self.initial_nodes = test_config.cluster.initial_nodes def is_compatible(self, min_release: str) -> bool: for master in self.cluster_spec.masters: version = self.rest.get_version(master) return version >= min_release def set_data_path(self): for server in self.cluster_spec.servers: self.remote.change_owner(server, self.cluster_spec.data_path) self.rest.set_data_path(server, self.cluster_spec.data_path) def set_index_path(self): for server in self.cluster_spec.servers: self.remote.change_owner(server, self.cluster_spec.index_path) self.rest.set_index_path(server, self.cluster_spec.index_path) def set_analytics_path(self): paths = [] for path in self.cluster_spec.analytics_paths: for i in range(self.test_config.analytics_settings.num_io_devices): io_device = '{}/dev{}'.format(path, i) paths.append(io_device) for server in self.cluster_spec.servers_by_role('cbas'): for path in self.cluster_spec.analytics_paths: self.remote.change_owner(server, path) self.rest.set_analytics_paths(server, paths) def rename(self): for server in self.cluster_spec.servers: self.rest.rename(server) def set_auth(self): for server in self.cluster_spec.servers: self.rest.set_auth(server) def set_mem_quotas(self): for master in self.cluster_spec.masters: self.rest.set_mem_quota(master, self.test_config.cluster.mem_quota) self.rest.set_index_mem_quota(master, self.test_config.cluster.index_mem_quota) if self.test_config.cluster.fts_index_mem_quota: self.rest.set_fts_index_mem_quota(master, self.test_config.cluster.fts_index_mem_quota) if self.test_config.cluster.analytics_mem_quota: self.rest.set_analytics_mem_quota(master, self.test_config.cluster.analytics_mem_quota) if self.test_config.cluster.eventing_mem_quota: self.rest.set_eventing_mem_quota(master, self.test_config.cluster.eventing_mem_quota) def set_query_settings(self): logger.info('Setting query settings') query_nodes = self.cluster_spec.servers_by_role('n1ql') if query_nodes: settings = self.test_config.n1ql_settings.cbq_settings if settings: self.rest.set_query_settings(query_nodes[0], settings) settings = self.rest.get_query_settings(query_nodes[0]) settings = pretty_dict(settings) logger.info('Query settings: {}'.format(settings)) def set_index_settings(self): logger.info('Setting index settings') index_nodes = self.cluster_spec.servers_by_role('index') if index_nodes: settings = self.test_config.gsi_settings.settings if settings: self.rest.set_index_settings(index_nodes[0], settings) settings = self.rest.get_index_settings(index_nodes[0]) settings = pretty_dict(settings) logger.info('Index settings: {}'.format(settings)) def set_services(self): if not self.is_compatible(min_release='4.0.0'): return for master in self.cluster_spec.masters: roles = self.cluster_spec.roles[master] self.rest.set_services(master, roles) def add_nodes(self): for (_, servers), initial_nodes in zip(self.cluster_spec.clusters, self.initial_nodes): if initial_nodes < 2: # Single-node cluster continue master = servers[0] for node in servers[1:initial_nodes]: roles = self.cluster_spec.roles[node] self.rest.add_node(master, node, roles) def rebalance(self): for (_, servers), initial_nodes in zip(self.cluster_spec.clusters, self.initial_nodes): master = servers[0] known_nodes = servers[:initial_nodes] ejected_nodes = [] self.rest.rebalance(master, known_nodes, ejected_nodes) self.monitor.monitor_rebalance(master) self.wait_until_healthy() def increase_bucket_limit(self, num_buckets: int): for master in self.cluster_spec.masters: self.rest.increase_bucket_limit(master, num_buckets) def flush_buckets(self): for master in self.cluster_spec.masters: for bucket_name in self.test_config.buckets: self.rest.flush_bucket(host=master, bucket=bucket_name) def delete_buckets(self): for master in self.cluster_spec.masters: for bucket_name in self.test_config.buckets: self.rest.delete_bucket(host=master, name=bucket_name) def create_buckets(self): mem_quota = self.test_config.cluster.mem_quota if self.test_config.cluster.num_buckets > 7: self.increase_bucket_limit(self.test_config.cluster.num_buckets + 3) if self.test_config.cluster.eventing_metadata_bucket_mem_quota: mem_quota -= (self.test_config.cluster.eventing_metadata_bucket_mem_quota + self.test_config.cluster.eventing_bucket_mem_quota) per_bucket_quota = mem_quota // self.test_config.cluster.num_buckets for master in self.cluster_spec.masters: for bucket_name in self.test_config.buckets: self.rest.create_bucket( host=master, name=bucket_name, ram_quota=per_bucket_quota, password=self.test_config.bucket.password, replica_number=self.test_config.bucket.replica_number, replica_index=self.test_config.bucket.replica_index, eviction_policy=self.test_config.bucket.eviction_policy, bucket_type=self.test_config.bucket.bucket_type, conflict_resolution_type=self.test_config.bucket.conflict_resolution_type, compression_mode=self.test_config.bucket.compression_mode, ) def create_eventing_buckets(self): if not self.test_config.cluster.eventing_bucket_mem_quota: return per_bucket_quota = \ self.test_config.cluster.eventing_bucket_mem_quota \ // self.test_config.cluster.eventing_buckets for master in self.cluster_spec.masters: for bucket_name in self.test_config.eventing_buckets: self.rest.create_bucket( host=master, name=bucket_name, ram_quota=per_bucket_quota, password=self.test_config.bucket.password, replica_number=self.test_config.bucket.replica_number, replica_index=self.test_config.bucket.replica_index, eviction_policy=self.test_config.bucket.eviction_policy, bucket_type=self.test_config.bucket.bucket_type, conflict_resolution_type=self.test_config.bucket.conflict_resolution_type, ) def create_eventing_metadata_bucket(self): if not self.test_config.cluster.eventing_metadata_bucket_mem_quota: return for master in self.cluster_spec.masters: self.rest.create_bucket( host=master, name=self.test_config.cluster.EVENTING_METADATA_BUCKET_NAME, ram_quota=self.test_config.cluster.eventing_metadata_bucket_mem_quota, password=self.test_config.bucket.password, replica_number=self.test_config.bucket.replica_number, replica_index=self.test_config.bucket.replica_index, eviction_policy=self.test_config.bucket.EVICTION_POLICY, bucket_type=self.test_config.bucket.BUCKET_TYPE, ) def configure_auto_compaction(self): compaction_settings = self.test_config.compaction for master in self.cluster_spec.masters: self.rest.configure_auto_compaction(master, compaction_settings) settings = self.rest.get_auto_compaction_settings(master) logger.info('Auto-compaction settings: {}' .format(pretty_dict(settings))) def configure_internal_settings(self): internal_settings = self.test_config.internal_settings for master in self.cluster_spec.masters: for parameter, value in internal_settings.items(): self.rest.set_internal_settings(master, {parameter: maybe_atoi(value)}) def configure_xdcr_settings(self): xdcr_cluster_settings = self.test_config.xdcr_cluster_settings for master in self.cluster_spec.masters: for parameter, value in xdcr_cluster_settings.items(): self.rest.set_xdcr_cluster_settings(master, {parameter: maybe_atoi(value)}) def tweak_memory(self): self.remote.reset_swap() self.remote.drop_caches() self.remote.set_swappiness() self.remote.disable_thp() def restart_with_alternative_num_vbuckets(self): num_vbuckets = self.test_config.cluster.num_vbuckets if num_vbuckets is not None: self.remote.restart_with_alternative_num_vbuckets(num_vbuckets) def restart_with_alternative_bucket_options(self): """Apply custom buckets settings. Tune bucket settings (e.g., max_num_shards or max_num_auxio) using "/diag/eval" and restart the entire cluster. """ if self.test_config.bucket_extras: self.remote.enable_nonlocal_diag_eval() cmd = 'ns_bucket:update_bucket_props("{}", ' \ '[{{extra_config_string, "{}={}"}}]).' for option, value in self.test_config.bucket_extras.items(): logger.info('Changing {} to {}'.format(option, value)) for master in self.cluster_spec.masters: for bucket in self.test_config.buckets: diag_eval = cmd.format(bucket, option, value) self.rest.run_diag_eval(master, diag_eval) if self.test_config.bucket_extras: self.remote.restart() self.wait_until_healthy() def tune_logging(self): self.remote.tune_log_rotation() self.remote.restart() def enable_auto_failover(self): failover_min = self.test_config.bucket.failover_min failover_max = self.test_config.bucket.failover_max for master in self.cluster_spec.masters: self.rest.enable_auto_failover(master, failover_min, failover_max) def wait_until_warmed_up(self): if self.test_config.bucket.bucket_type in ('ephemeral', 'memcached'): return for master in self.cluster_spec.masters: for bucket in self.test_config.buckets: self.monitor.monitor_warmup(self.memcached, master, bucket) def wait_until_healthy(self): for master in self.cluster_spec.masters: self.monitor.monitor_node_health(master) for analytics_node in self.rest.get_active_nodes_by_role(master, 'cbas'): self.monitor.monitor_analytics_node_active(analytics_node) def gen_disabled_audit_events(self, master: str) -> List[str]: curr_settings = self.rest.get_audit_settings(master) curr_disabled = {str(event) for event in curr_settings['disabled']} disabled = curr_disabled - self.test_config.audit_settings.extra_events return list(disabled) def enable_audit(self): if not self.is_compatible(min_release='4.0.0') or \ self.rest.is_community(self.master_node): return if not self.test_config.audit_settings.enabled: return for master in self.cluster_spec.masters: disabled = [] if self.test_config.audit_settings.extra_events: disabled = self.gen_disabled_audit_events(master) self.rest.enable_audit(master, disabled) def generate_ce_roles(self) -> List[str]: return ['admin'] def generate_ee_roles(self) -> List[str]: existing_roles = {r['role'] for r in self.rest.get_rbac_roles(self.master_node)} roles = [] for role in ( 'bucket_admin', 'data_dcp_reader', 'data_monitoring', 'data_reader_writer', 'data_reader', 'data_writer', 'fts_admin', 'fts_searcher', 'query_delete', 'query_insert', 'query_select', 'query_update', 'views_admin', ): if role in existing_roles: roles.append(role + '[{bucket}]') return roles def delete_rbac_users(self): if not self.is_compatible(min_release='5.0'): return for master in self.cluster_spec.masters: for bucket in self.test_config.buckets: self.rest.delete_rbac_user( host=master, bucket=bucket ) def add_rbac_users(self): if not self.rest.supports_rbac(self.master_node): logger.info('RBAC not supported - skipping adding RBAC users') return if self.rest.is_community(self.master_node): roles = self.generate_ce_roles() else: roles = self.generate_ee_roles() for master in self.cluster_spec.masters: admin_user, admin_password = self.cluster_spec.rest_credentials self.rest.add_rbac_user( host=master, user=admin_user, password=admin_password, roles=['admin'], ) for bucket in self.test_config.buckets: bucket_roles = [role.format(bucket=bucket) for role in roles] bucket_roles.append("admin") self.rest.add_rbac_user( host=master, user=bucket, # Backward compatibility password=self.test_config.bucket.password, roles=bucket_roles, ) def throttle_cpu(self): if self.remote.os == 'Cygwin': return self.remote.enable_cpu() if self.test_config.cluster.online_cores: self.remote.disable_cpu(self.test_config.cluster.online_cores) def tune_memory_settings(self): kernel_memory = self.test_config.cluster.kernel_mem_limit if kernel_memory: for service in self.test_config.cluster.kernel_mem_limit_services: for server in self.cluster_spec.servers_by_role(service): self.remote.tune_memory_settings(host_string=server, size=kernel_memory) self.monitor.wait_for_servers() def reset_memory_settings(self): for service in self.test_config.cluster.kernel_mem_limit_services: for server in self.cluster_spec.servers_by_role(service): self.remote.reset_memory_settings(host_string=server) self.monitor.wait_for_servers() def flush_iptables(self): self.remote.flush_iptables() def clear_login_history(self): self.remote.clear_wtmp() def disable_wan(self): self.remote.disable_wan() def enable_ipv6(self): if self.test_config.cluster.ipv6: self.remote.enable_ipv6() def set_x509_certificates(self): logger.info('Setting x509 settings') if self.test_config.access_settings.ssl_mode == "auth": self.remote.setup_x509() for host in self.cluster_spec.servers: self.rest.upload_cluster_certificate(host) for host in self.cluster_spec.servers: self.rest.reload_cluster_certificate(host) self.rest.enable_certificate_auth(host)
class ClusterManager(object): def __init__(self, cluster_spec, test_config): self.cluster_spec = cluster_spec self.test_config = test_config self.rest = RestHelper(cluster_spec) self.remote = RemoteHelper(cluster_spec) self.monitor = Monitor(cluster_spec) self.memcached = MemcachedHelper(cluster_spec) self.clusters = cluster_spec.yield_clusters() self.servers = cluster_spec.yield_servers self.masters = cluster_spec.yield_masters self.hostnames = cluster_spec.yield_hostnames self.initial_nodes = test_config.cluster.initial_nodes self.mem_quota = test_config.cluster.mem_quota self.group_number = test_config.cluster.group_number or 1 def set_data_path(self): data_path, index_path = self.cluster_spec.paths for server in self.servers(): self.rest.set_data_path(server, data_path, index_path) def set_auth(self): for server in self.servers(): self.rest.set_auth(server) def set_mem_quota(self): for server in self.servers(): self.rest.set_mem_quota(server, self.mem_quota) def disable_moxi(self): if self.test_config.cluster.disable_moxi is not None: self.remote.disable_moxi() def create_server_groups(self): for master in self.masters(): for i in range(1, self.group_number): name = 'Group {}'.format(i + 1) self.rest.create_server_group(master, name=name) def add_nodes(self): for (_, servers), initial_nodes in zip(self.clusters, self.initial_nodes): if initial_nodes < 2: # Single-node cluster continue # Adding initial nodes master = servers[0] if self.group_number > 1: groups = self.rest.get_server_groups(master) else: groups = {} for i, host_port in enumerate(servers[1:initial_nodes], start=1): host = host_port.split(':')[0] uri = groups.get(server_group(servers[:initial_nodes], self.group_number, i)) self.rest.add_node(master, host, uri) # Rebalance master = servers[0] known_nodes = servers[:initial_nodes] ejected_nodes = [] self.rest.rebalance(master, known_nodes, ejected_nodes) self.monitor.monitor_rebalance(master) def create_buckets(self): ram_quota = self.mem_quota / self.test_config.cluster.num_buckets replica_number = self.test_config.bucket.replica_number replica_index = self.test_config.bucket.replica_index eviction_policy = self.test_config.bucket.eviction_policy threads_number = self.test_config.bucket.threads_number for master in self.masters(): for bucket_name in self.test_config.buckets: self.rest.create_bucket(host_port=master, name=bucket_name, ram_quota=ram_quota, replica_number=replica_number, replica_index=replica_index, eviction_policy=eviction_policy, threads_number=threads_number, ) def configure_auto_compaction(self): compaction_settings = self.test_config.compaction for master in self.masters(): self.rest.configure_auto_compaction(master, compaction_settings) def configure_internal_settings(self): internal_settings = self.test_config.internal_settings for master in self.masters(): for parameter, value in internal_settings.items(): self.rest.set_internal_settings(master, {parameter: int(value)}) def tweak_memory(self): self.remote.reset_swap() self.remote.drop_caches() self.remote.set_swappiness() self.remote.disable_thp() def restart_with_alternative_num_vbuckets(self): num_vbuckets = self.test_config.cluster.num_vbuckets if num_vbuckets is not None: self.remote.restart_with_alternative_num_vbuckets(num_vbuckets) def restart_with_alternative_bucket_options(self): cmd = 'ns_bucket:update_bucket_props("{}", ' \ '[{{extra_config_string, "{}={}"}}]).' for option in ('max_num_shards', 'max_threads'): value = getattr(self.test_config.bucket, option) if value: logger.info('Changing {} to {}'.format(option, value)) for master in self.masters(): for bucket in self.test_config.buckets: diag_eval = cmd.format(bucket, option, value) self.rest.run_diag_eval(master, diag_eval) self.remote.restart() def restart_with_alternative_num_cpus(self): num_cpus = self.test_config.cluster.num_cpus if num_cpus: self.remote.restart_with_alternative_num_cpus(num_cpus) def enable_auto_failover(self): for master in self.masters(): self.rest.enable_auto_failover(master) def wait_until_warmed_up(self): target_iterator = TargetIterator(self.cluster_spec, self.test_config) for target in target_iterator: host = target.node.split(':')[0] self.monitor.monitor_warmup(self.memcached, host, target.bucket) def change_watermarks(self): watermark_settings = self.test_config.watermark_settings for hostname, initial_nodes in zip(self.hostnames(), self.initial_nodes): for bucket in self.test_config.buckets: for key, val in watermark_settings.items(): val = self.memcached.calc_watermark(val, self.mem_quota) self.memcached.set_flusher_param(hostname, bucket, key, val) def start_cbq_engine(self): if self.test_config.cluster.run_cbq: self.remote.start_cbq()
def run_query(rest: RestHelper, node: str, query: Query) -> float: t0 = time.time() response = rest.exec_analytics_statement(node, query.statement) latency = time.time() - t0 # Latency in seconds store_metrics(query.statement, response.json()['metrics']) return latency
class PerfTest: COLLECTORS = {} ROOT_CERTIFICATE = 'root.pem' def __init__(self, cluster_spec: ClusterSpec, test_config: TestConfig, verbose: bool): self.cluster_spec = cluster_spec self.test_config = test_config self.dynamic_infra = self.cluster_spec.dynamic_infrastructure self.target_iterator = TargetIterator(cluster_spec, test_config) self.cluster = ClusterManager(cluster_spec, test_config) self.remote = RemoteHelper(cluster_spec, verbose) self.profiler = Profiler(cluster_spec, test_config) self.master_node = next(cluster_spec.masters) self.memcached = MemcachedHelper(test_config) self.monitor = Monitor(cluster_spec, test_config, verbose) self.rest = RestHelper(cluster_spec, test_config) self.build = self.rest.get_version(self.master_node) self.metrics = MetricHelper(self) self.reporter = ShowFastReporter(cluster_spec, test_config, self.build) self.cbmonitor_snapshots = [] self.cbmonitor_clusters = [] if self.test_config.test_case.use_workers: self.worker_manager = WorkerManager(cluster_spec, test_config, verbose) if self.test_config.cluster.enable_n2n_encryption: self.download_certificate() def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): failure = self.debug() self.tear_down() if exc_type == KeyboardInterrupt: logger.warn('The test was interrupted') return True if failure: logger.interrupt(failure) @property def query_nodes(self) -> List[str]: return self.rest.get_active_nodes_by_role(self.master_node, 'n1ql') @property def index_nodes(self) -> List[str]: return self.rest.get_active_nodes_by_role(self.master_node, 'index') @property def fts_nodes(self) -> List[str]: return self.rest.get_active_nodes_by_role(self.master_node, 'fts') @property def analytics_nodes(self) -> List[str]: return self.rest.get_active_nodes_by_role(self.master_node, 'cbas') @property def eventing_nodes(self) -> List[str]: return self.rest.get_active_nodes_by_role(self.master_node, 'eventing') def tear_down(self): if self.test_config.profiling_settings.linux_perf_profile_flag: self.collect_linux_perf_profiles() if self.test_config.test_case.use_workers: self.worker_manager.download_celery_logs() self.worker_manager.terminate() if self.test_config.cluster.online_cores: self.remote.enable_cpu() if self.test_config.cluster.kernel_mem_limit: self.collect_logs() self.cluster.reset_memory_settings() def collect_linux_perf_profiles(self): self.remote.generate_linux_perf_script() self.remote.get_linuxperf_files() def collect_logs(self): self.remote.collect_info() for hostname in self.cluster_spec.servers: for fname in glob.glob('{}/*.zip'.format(hostname)): shutil.move(fname, '{}.zip'.format(hostname)) def reset_memory_settings(self): if self.test_config.cluster.kernel_mem_limit: for service in self.test_config.cluster.kernel_mem_limit_services: for server in self.cluster_spec.servers_by_role(service): self.remote.reset_memory_settings(host_string=server) self.monitor.wait_for_servers() def debug(self) -> str: failure = self.check_core_dumps() failure = self.check_rebalance() or failure return self.check_failover() or failure def download_certificate(self): cert = self.rest.get_certificate(self.master_node) with open(self.ROOT_CERTIFICATE, 'w') as fh: fh.write(cert) def check_rebalance(self) -> str: if self.dynamic_infra: pass else: for master in self.cluster_spec.masters: if self.rest.is_not_balanced(master): return 'The cluster is not balanced' def check_failover(self) -> Optional[str]: if self.dynamic_infra: return if hasattr(self, 'rebalance_settings'): if self.rebalance_settings.failover or \ self.rebalance_settings.graceful_failover: return for master in self.cluster_spec.masters: num_failovers = self.rest.get_failover_counter(master) if num_failovers: return 'Failover happened {} time(s)'.format(num_failovers) def check_core_dumps(self) -> str: dumps_per_host = self.remote.detect_core_dumps() core_dumps = { host: dumps for host, dumps in dumps_per_host.items() if dumps } if core_dumps: return pretty_dict(core_dumps) def restore(self): logger.info('Restoring data') self.remote.purge_restore_progress( self.test_config.restore_settings.backup_storage, self.test_config.restore_settings.backup_repo) self.remote.restore_data( self.test_config.restore_settings.backup_storage, self.test_config.restore_settings.backup_repo, ) def fts_collections_restore(self): restore_mapping = None collection_map = self.test_config.collection.collection_map for target in self.target_iterator: if not collection_map.get(target.bucket, {}).get( "_default", {}).get("_default", {}).get('load', 0): restore_mapping = \ "{0}._default._default={0}.scope-1.collection-1"\ .format(target.bucket) logger.info('Restoring data') self.remote.purge_restore_progress( self.test_config.restore_settings.backup_storage, self.test_config.restore_settings.backup_repo) self.remote.restore_data( self.test_config.restore_settings.backup_storage, self.test_config.restore_settings.backup_repo, map_data=restore_mapping) def fts_cbimport(self): logger.info('Restoring data into collections') num_collections = self.test_config.jts_access_settings.collections_number scope_prefix = self.test_config.jts_access_settings.scope_prefix collection_prefix = self.test_config.jts_access_settings.collection_prefix scope = self.test_config.jts_access_settings.scope_number name_of_backup = self.test_config.restore_settings.backup_repo self.remote.export_data(num_collections, collection_prefix, scope_prefix, scope, name_of_backup) def restore_local(self): logger.info('Restoring data') local.extract_cb(filename='couchbase.rpm') local.purge_restore_progress( self.cluster_spec, archive=self.test_config.restore_settings.backup_storage, repo=self.test_config.restore_settings.backup_repo) local.cbbackupmgr_restore( master_node=self.master_node, cluster_spec=self.cluster_spec, threads=self.test_config.restore_settings.threads, archive=self.test_config.restore_settings.backup_storage, repo=self.test_config.restore_settings.backup_repo, include_data=self.test_config.backup_settings.include_data, map_data=self.test_config.restore_settings.map_data, use_tls=self.test_config.restore_settings.use_tls) def load_tpcds_json_data(self): logger.info('Importing data') if self.test_config.collection.collection_map is not None: cm = self.test_config.collection.collection_map for bucket in self.test_config.buckets: for scope in cm[bucket]: for collection in cm[bucket][scope]: if cm[bucket][scope][collection]['load'] == 1: self.remote.load_tpcds_data_json_collection( self.test_config.import_settings.import_file, bucket, scope, collection, self.test_config. import_settings.docs_per_collections) else: for bucket in self.test_config.buckets: self.remote.load_tpcds_data_json( self.test_config.import_settings.import_file, bucket, ) def compact_bucket(self, wait: bool = True): for target in self.target_iterator: self.rest.trigger_bucket_compaction(target.node, target.bucket) if wait: for target in self.target_iterator: self.monitor.monitor_task(target.node, 'bucket_compaction') def wait_for_persistence(self): for target in self.target_iterator: self.monitor.monitor_disk_queues(target.node, target.bucket) self.monitor.monitor_dcp_queues(target.node, target.bucket) self.monitor.monitor_replica_count(target.node, target.bucket) def wait_for_indexing(self): if self.test_config.index_settings.statements: for server in self.index_nodes: self.monitor.monitor_indexing(server) def check_num_items(self, bucket_items: dict = None, max_retry: int = None): if bucket_items: for target in self.target_iterator: num_items = bucket_items.get(target.bucket, None) if num_items: num_items = num_items * ( 1 + self.test_config.bucket.replica_number) self.monitor.monitor_num_items(target.node, target.bucket, num_items, max_retry=max_retry) elif getattr(self.test_config.load_settings, 'collections', None): for target in self.target_iterator: num_load_targets = 0 target_scope_collections = self.test_config.load_settings.collections[ target.bucket] for scope in target_scope_collections.keys(): for collection in target_scope_collections[scope].keys(): if target_scope_collections[scope][collection][ 'load'] == 1: num_load_targets += 1 num_items = \ (self.test_config.load_settings.items // num_load_targets) * \ num_load_targets * \ (1 + self.test_config.bucket.replica_number) self.monitor.monitor_num_items(target.node, target.bucket, num_items, max_retry=max_retry) else: num_items = self.test_config.load_settings.items * ( 1 + self.test_config.bucket.replica_number) for target in self.target_iterator: self.monitor.monitor_num_items(target.node, target.bucket, num_items, max_retry=max_retry) def reset_kv_stats(self): master_node = next(self.cluster_spec.masters) if self.test_config.cluster.enable_n2n_encryption: local.get_cbstats(self.master_node, 11210, "reset", self.cluster_spec) else: for bucket in self.test_config.buckets: for server in self.rest.get_server_list(master_node, bucket): port = self.rest.get_memcached_port(server) self.memcached.reset_stats(server, port, bucket) def create_indexes(self): logger.info('Creating and building indexes') if not self.test_config.index_settings.couchbase_fts_index_name: create_statements = [] build_statements = [] for statement in self.test_config.index_settings.statements: check_stmt = statement.replace(" ", "").upper() if 'CREATEINDEX' in check_stmt \ or 'CREATEPRIMARYINDEX' in check_stmt: create_statements.append(statement) elif 'BUILDINDEX' in check_stmt: build_statements.append(statement) for statement in create_statements: logger.info('Creating index: ' + statement) self.rest.exec_n1ql_statement(self.query_nodes[0], statement) cont = False while not cont: building = 0 index_status = self.rest.get_index_status( self.index_nodes[0]) index_list = index_status['status'] for index in index_list: if index['status'] != "Ready" and index[ 'status'] != "Created": building += 1 if building < 10: cont = True else: time.sleep(10) for statement in build_statements: logger.info('Building index: ' + statement) self.rest.exec_n1ql_statement(self.query_nodes[0], statement) cont = False while not cont: building = 0 index_status = self.rest.get_index_status( self.index_nodes[0]) index_list = index_status['status'] for index in index_list: if index['status'] != "Ready" and index[ 'status'] != "Created": building += 1 if building < 10: cont = True else: time.sleep(10) logger.info('Index Create and Build Complete') else: self.create_fts_index_n1ql() def create_fts_index_n1ql(self): logger.info("Creating FTS index") definition = read_json( self.test_config.index_settings.couchbase_fts_index_configfile) bucket_name = self.test_config.buckets[0] definition.update( {'name': self.test_config.index_settings.couchbase_fts_index_name}) if self.test_config.collection.collection_map: collection_map = self.test_config.collection.collection_map definition["params"]["doc_config"][ "mode"] = "scope.collection.type_field" scope_name = list(collection_map[bucket_name].keys())[1:][0] collection_name = list( collection_map[bucket_name][scope_name].keys())[0] ind_type_mapping = \ copy.deepcopy(definition["params"]["mapping"]["default_mapping"]) definition["params"]["mapping"]["default_mapping"][ "enabled"] = False new_type_mapping_name = "{}.{}".format(scope_name, collection_name) definition["params"]["mapping"]["types"] = { new_type_mapping_name: ind_type_mapping } logger.info('Index definition: {}'.format(pretty_dict(definition))) self.rest.create_fts_index( self.fts_nodes[0], self.test_config.index_settings.couchbase_fts_index_name, definition) self.monitor.monitor_fts_indexing_queue( self.fts_nodes[0], self.test_config.index_settings.couchbase_fts_index_name, int(self.test_config.access_settings.items)) def create_functions(self): logger.info('Creating n1ql functions') for statement in self.test_config.n1ql_function_settings.statements: self.rest.exec_n1ql_statement(self.query_nodes[0], statement) def sleep(self): access_settings = self.test_config.access_settings logger.info('Running phase for {} seconds'.format( access_settings.time)) time.sleep(access_settings.time) def run_phase(self, phase: str, task: Callable, settings: PhaseSettings, target_iterator: Iterable, timer: int = None, wait: bool = True): logger.info('Running {}: {}'.format(phase, pretty_dict(settings))) self.worker_manager.run_tasks(task, settings, target_iterator, timer) if wait: self.worker_manager.wait_for_workers() def load(self, task: Callable = spring_task, settings: PhaseSettings = None, target_iterator: Iterable = None): if settings is None: settings = self.test_config.load_settings if target_iterator is None: target_iterator = self.target_iterator self.run_phase('load phase', task, settings, target_iterator) def hot_load(self, task: Callable = spring_task, settings: PhaseSettings = None, target_iterator: Iterable = None): if settings is None: settings = self.test_config.hot_load_settings if target_iterator is None: target_iterator = self.target_iterator self.run_phase('hot load phase', task, settings, target_iterator) def xattr_load(self, task: Callable = spring_task, target_iterator: Iterable = None): if target_iterator is None: target_iterator = self.target_iterator settings = self.test_config.xattr_load_settings self.run_phase('xattr phase', task, settings, target_iterator) def access(self, task: Callable = spring_task, settings: PhaseSettings = None, target_iterator: Iterable = None): if settings is None: settings = self.test_config.access_settings if target_iterator is None: target_iterator = self.target_iterator self.run_phase('access phase', task, settings, target_iterator, timer=settings.time) def access_bg(self, task: Callable = spring_task, settings: PhaseSettings = None, target_iterator: Iterable = None): if settings is None: settings = self.test_config.access_settings if target_iterator is None: target_iterator = self.target_iterator self.run_phase('background access phase', task, settings, target_iterator, timer=settings.time, wait=False) def report_kpi(self, *args, **kwargs): if self.test_config.stats_settings.enabled: self._report_kpi(*args, **kwargs) def _report_kpi(self, *args, **kwargs): pass def _measure_curr_ops(self) -> int: ops = 0 for bucket in self.test_config.buckets: for server in self.rest.get_active_nodes_by_role( self.master_node, "kv"): port = self.rest.get_memcached_port(server) stats = self.memcached.get_stats(server, port, bucket) for stat in 'cmd_get', 'cmd_set': ops += int(stats[stat]) return ops def _measure_disk_ops(self): ret_stats = dict() for bucket in self.test_config.buckets: for server in self.rest.get_active_nodes_by_role( self.master_node, "kv"): ret_stats[server] = dict() port = self.rest.get_memcached_port(server) stats = self.memcached.get_stats(server, port, bucket) ret_stats[server]["get_ops"] = int(stats['ep_bg_fetched']) sets = \ int(stats['vb_active_ops_create']) \ + int(stats['vb_replica_ops_create']) \ + int(stats['vb_pending_ops_create']) \ + int(stats['vb_active_ops_update']) \ + int(stats['vb_replica_ops_update']) \ + int(stats['vb_pending_ops_update']) ret_stats[server]["set_ops"] = sets return ret_stats
class ClusterManager: def __init__(self, cluster_spec: ClusterSpec, test_config: TestConfig, verbose: bool = False): self.cluster_spec = cluster_spec self.test_config = test_config self.dynamic_infra = self.cluster_spec.dynamic_infrastructure self.rest = RestHelper(cluster_spec) self.remote = RemoteHelper(cluster_spec, verbose) self.monitor = Monitor(cluster_spec, test_config, verbose) self.memcached = MemcachedHelper(test_config) self.master_node = next(self.cluster_spec.masters) self.initial_nodes = test_config.cluster.initial_nodes self.build = self.rest.get_version(self.master_node) def is_compatible(self, min_release: str) -> bool: for master in self.cluster_spec.masters: version = self.rest.get_version(master) return version >= min_release def set_data_path(self): if self.dynamic_infra: return for server in self.cluster_spec.servers: self.remote.change_owner(server, self.cluster_spec.data_path) self.rest.set_data_path(server, self.cluster_spec.data_path) def set_index_path(self): if self.dynamic_infra: return for server in self.cluster_spec.servers: self.remote.change_owner(server, self.cluster_spec.index_path) self.rest.set_index_path(server, self.cluster_spec.index_path) def set_analytics_path(self): if self.dynamic_infra: return paths = [] for path in self.cluster_spec.analytics_paths: for i in range(self.test_config.analytics_settings.num_io_devices): io_device = '{}/dev{}'.format(path, i) paths.append(io_device) for server in self.cluster_spec.servers_by_role('cbas'): for path in self.cluster_spec.analytics_paths: self.remote.change_owner(server, path) self.rest.set_analytics_paths(server, paths) def rename(self): if self.dynamic_infra: return else: for server in self.cluster_spec.servers: self.rest.rename(server) def set_auth(self): if self.dynamic_infra: return else: for server in self.cluster_spec.servers: self.rest.set_auth(server) def set_mem_quotas(self): if self.dynamic_infra: cluster = self.remote.get_cluster() cluster['spec']['cluster']['dataServiceMemoryQuota'] = \ '{}Mi'.format(self.test_config.cluster.mem_quota) cluster['spec']['cluster']['indexServiceMemoryQuota'] = \ '{}Mi'.format(self.test_config.cluster.index_mem_quota) if self.test_config.cluster.fts_index_mem_quota: cluster['spec']['cluster']['searchServiceMemoryQuota'] = \ '{}Mi'.format(self.test_config.cluster.fts_index_mem_quota) if self.test_config.cluster.analytics_mem_quota: cluster['spec']['cluster']['analyticsServiceMemoryQuota'] = \ '{}Mi'.format(self.test_config.cluster.analytics_mem_quota) if self.test_config.cluster.eventing_mem_quota: cluster['spec']['cluster']['eventingServiceMemoryQuota'] = \ '{}Mi'.format(self.test_config.cluster.eventing_mem_quota) self.remote.update_cluster_config(cluster) else: for master in self.cluster_spec.masters: self.rest.set_mem_quota(master, self.test_config.cluster.mem_quota) self.rest.set_index_mem_quota( master, self.test_config.cluster.index_mem_quota) if self.test_config.cluster.fts_index_mem_quota: self.rest.set_fts_index_mem_quota( master, self.test_config.cluster.fts_index_mem_quota) if self.test_config.cluster.analytics_mem_quota: self.rest.set_analytics_mem_quota( master, self.test_config.cluster.analytics_mem_quota) if self.test_config.cluster.eventing_mem_quota: self.rest.set_eventing_mem_quota( master, self.test_config.cluster.eventing_mem_quota) def set_query_settings(self): logger.info('Setting query settings') if self.dynamic_infra: return query_nodes = self.cluster_spec.servers_by_role('n1ql') if query_nodes: settings = self.test_config.n1ql_settings.cbq_settings if settings: self.rest.set_query_settings(query_nodes[0], settings) settings = self.rest.get_query_settings(query_nodes[0]) settings = pretty_dict(settings) logger.info('Query settings: {}'.format(settings)) def set_index_settings(self): logger.info('Setting index settings') index_nodes = self.cluster_spec.servers_by_role('index') if index_nodes: settings = self.test_config.gsi_settings.settings if settings: if self.dynamic_infra: cluster = self.remote.get_cluster() cluster['spec']['cluster']['indexStorageSetting'] = \ settings['indexer.settings.storage_mode'] self.remote.update_cluster_config(cluster, timeout=300, reboot=True) logger.info('Index settings: {}'.format(settings)) else: self.rest.set_index_settings(index_nodes[0], settings) settings = self.rest.get_index_settings(index_nodes[0]) settings = pretty_dict(settings) logger.info('Index settings: {}'.format(settings)) def set_services(self): if self.dynamic_infra: cluster = self.remote.get_cluster() server_types = dict() server_roles = self.cluster_spec.roles for server, role in server_roles.items(): role = role\ .replace('kv', 'data')\ .replace('n1ql', 'query') server_type_count = server_types.get(role, 0) server_types[role] = server_type_count + 1 istio = 'false' if self.cluster_spec.istio_enabled(cluster_name='k8s_cluster_1'): istio = 'true' cluster_servers = [] operator_version = self.remote.get_operator_version() operator_major = int(operator_version.split(".")[0]) operator_minor = int(operator_version.split(".")[1]) for server_role, server_role_count in server_types.items(): node_selector = { '{}_enabled'.format( service.replace('data', 'kv').replace('query', 'n1ql')): 'true' for service in server_role.split(",") } node_selector['NodeRoles'] = 'couchbase1' spec = { 'imagePullSecrets': [{ 'name': 'regcred' }], 'nodeSelector': node_selector, } if (operator_major, operator_minor) <= (2, 1): spec['containers'] = [] pod_def =\ { 'spec': spec, 'metadata': { 'annotations': {'sidecar.istio.io/inject': istio} } } server_def = \ { 'name': server_role.replace(",", "-"), 'services': server_role.split(","), 'pod': pod_def, 'size': server_role_count, 'volumeMounts': {'default': 'couchbase_kv'} } cluster_servers.append(server_def) cluster['spec']['servers'] = cluster_servers self.remote.update_cluster_config(cluster, timeout=300, reboot=True) else: if not self.is_compatible(min_release='4.0.0'): return for master in self.cluster_spec.masters: roles = self.cluster_spec.roles[master] self.rest.set_services(master, roles) def add_nodes(self): if self.dynamic_infra: return for (_, servers), initial_nodes \ in zip(self.cluster_spec.clusters, self.initial_nodes): if initial_nodes < 2: # Single-node cluster continue master = servers[0] for node in servers[1:initial_nodes]: roles = self.cluster_spec.roles[node] self.rest.add_node(master, node, roles) def rebalance(self): if self.dynamic_infra: return for (_, servers), initial_nodes \ in zip(self.cluster_spec.clusters, self.initial_nodes): master = servers[0] known_nodes = servers[:initial_nodes] ejected_nodes = [] self.rest.rebalance(master, known_nodes, ejected_nodes) self.monitor.monitor_rebalance(master) self.wait_until_healthy() def increase_bucket_limit(self, num_buckets: int): if self.dynamic_infra: return for master in self.cluster_spec.masters: self.rest.increase_bucket_limit(master, num_buckets) def flush_buckets(self): for master in self.cluster_spec.masters: for bucket_name in self.test_config.buckets: self.rest.flush_bucket(host=master, bucket=bucket_name) def delete_buckets(self): for master in self.cluster_spec.masters: for bucket_name in self.test_config.buckets: self.rest.delete_bucket(host=master, name=bucket_name) def create_buckets(self): mem_quota = self.test_config.cluster.mem_quota if self.test_config.cluster.num_buckets > 7: self.increase_bucket_limit(self.test_config.cluster.num_buckets + 3) if self.test_config.cluster.eventing_metadata_bucket_mem_quota: mem_quota -= ( self.test_config.cluster.eventing_metadata_bucket_mem_quota + self.test_config.cluster.eventing_bucket_mem_quota) per_bucket_quota = mem_quota // self.test_config.cluster.num_buckets if self.dynamic_infra: self.remote.delete_all_buckets() for bucket_name in self.test_config.buckets: self.remote.create_bucket(bucket_name, per_bucket_quota, self.test_config.bucket) else: if self.test_config.bucket.backend_storage == 'magma': self.enable_developer_preview() for master in self.cluster_spec.masters: for bucket_name in self.test_config.buckets: self.rest.create_bucket( host=master, name=bucket_name, ram_quota=per_bucket_quota, password=self.test_config.bucket.password, replica_number=self.test_config.bucket.replica_number, replica_index=self.test_config.bucket.replica_index, eviction_policy=self.test_config.bucket. eviction_policy, bucket_type=self.test_config.bucket.bucket_type, backend_storage=self.test_config.bucket. backend_storage, conflict_resolution_type=self.test_config.bucket. conflict_resolution_type, compression_mode=self.test_config.bucket. compression_mode, ) def create_collections(self): if self.dynamic_infra: return collection_map = self.test_config.collection.collection_map for master in self.cluster_spec.masters: if collection_map is not None: if self.test_config.collection.use_bulk_api: for bucket in collection_map.keys(): create_scopes = [] for scope in collection_map[bucket]: scope_collections = [] for collection in collection_map[bucket][scope]: scope_collections.append({"name": collection}) create_scopes.append({ "name": scope, "collections": scope_collections }) self.rest.set_collection_map(master, bucket, {"scopes": create_scopes}) else: for bucket in collection_map.keys(): delete_default = True for scope in collection_map[bucket]: if scope == '_default': for collection in collection_map[bucket][ scope]: if collection == "_default": delete_default = False if delete_default: self.rest.delete_collection( master, bucket, '_default', '_default') for bucket in collection_map.keys(): for scope in collection_map[bucket]: if scope != '_default': self.rest.create_scope(master, bucket, scope) for collection in collection_map[bucket][scope]: if collection != '_default': self.rest.create_collection( master, bucket, scope, collection) def create_eventing_buckets(self): if not self.test_config.cluster.eventing_bucket_mem_quota: return if self.dynamic_infra: return per_bucket_quota = \ self.test_config.cluster.eventing_bucket_mem_quota \ // self.test_config.cluster.eventing_buckets for master in self.cluster_spec.masters: for bucket_name in self.test_config.eventing_buckets: self.rest.create_bucket( host=master, name=bucket_name, ram_quota=per_bucket_quota, password=self.test_config.bucket.password, replica_number=self.test_config.bucket.replica_number, replica_index=self.test_config.bucket.replica_index, eviction_policy=self.test_config.bucket.eviction_policy, bucket_type=self.test_config.bucket.bucket_type, conflict_resolution_type=self.test_config.bucket. conflict_resolution_type, ) def create_eventing_metadata_bucket(self): if not self.test_config.cluster.eventing_metadata_bucket_mem_quota: return if self.dynamic_infra: return for master in self.cluster_spec.masters: self.rest.create_bucket( host=master, name=self.test_config.cluster.EVENTING_METADATA_BUCKET_NAME, ram_quota=self.test_config.cluster. eventing_metadata_bucket_mem_quota, password=self.test_config.bucket.password, replica_number=self.test_config.bucket.replica_number, replica_index=self.test_config.bucket.replica_index, eviction_policy=self.test_config.bucket.EVICTION_POLICY, bucket_type=self.test_config.bucket.BUCKET_TYPE, ) def configure_auto_compaction(self): compaction_settings = self.test_config.compaction if self.dynamic_infra: cluster = self.remote.get_cluster() db = int(compaction_settings.db_percentage) view = int(compaction_settings.view_percentage) para = bool(str(compaction_settings.parallel).lower()) auto_compaction = cluster['spec']['cluster']\ .get('autoCompaction', {'databaseFragmentationThreshold': {'percent': 30}, 'viewFragmentationThreshold': {'percent': 30}, 'parallelCompaction': False}) db_percent = auto_compaction.get('databaseFragmentationThreshold', {'percent': 30}) db_percent['percent'] = db auto_compaction['databaseFragmentationThreshold'] = db_percent views_percent = auto_compaction.get('viewFragmentationThreshold', {'percent': 30}) views_percent['percent'] = view auto_compaction['viewFragmentationThreshold'] = views_percent auto_compaction['parallelCompaction'] = para self.remote.update_cluster_config(cluster) else: for master in self.cluster_spec.masters: self.rest.configure_auto_compaction(master, compaction_settings) settings = self.rest.get_auto_compaction_settings(master) logger.info('Auto-compaction settings: {}'.format( pretty_dict(settings))) def configure_internal_settings(self): internal_settings = self.test_config.internal_settings for master in self.cluster_spec.masters: for parameter, value in internal_settings.items(): if self.dynamic_infra: raise Exception( 'not supported for dynamic infrastructure yet') else: self.rest.set_internal_settings( master, {parameter: maybe_atoi(value)}) def configure_xdcr_settings(self): xdcr_cluster_settings = self.test_config.xdcr_cluster_settings if self.dynamic_infra: return for master in self.cluster_spec.masters: for parameter, value in xdcr_cluster_settings.items(): self.rest.set_xdcr_cluster_settings( master, {parameter: maybe_atoi(value)}) def tweak_memory(self): if self.dynamic_infra: return self.remote.reset_swap() self.remote.drop_caches() self.remote.set_swappiness() self.remote.disable_thp() def enable_n2n_encryption(self): if self.dynamic_infra: return if self.test_config.cluster.enable_n2n_encryption: for master in self.cluster_spec.masters: self.remote.enable_n2n_encryption( master, self.test_config.cluster.enable_n2n_encryption) def restart_with_alternative_num_vbuckets(self): num_vbuckets = self.test_config.cluster.num_vbuckets if num_vbuckets is not None: if self.dynamic_infra: raise Exception('not supported for dynamic infrastructure yet') else: self.remote.restart_with_alternative_num_vbuckets(num_vbuckets) def restart_with_alternative_bucket_options(self): """Apply custom buckets settings. Tune bucket settings (e.g., max_num_shards or max_num_auxio) using "/diag/eval" and restart the entire cluster. """ if self.dynamic_infra: return if self.test_config.bucket_extras: self.remote.enable_nonlocal_diag_eval() cmd = 'ns_bucket:update_bucket_props("{}", ' \ '[{{extra_config_string, "{}={}"}}]).' for option, value in self.test_config.bucket_extras.items(): if re.search("^num_.*_threads$", option): self.rest.set_num_threads(self.master_node, option, value) else: logger.info('Changing {} to {}'.format(option, value)) for master in self.cluster_spec.masters: for bucket in self.test_config.buckets: diag_eval = cmd.format(bucket, option, value) self.rest.run_diag_eval(master, diag_eval) if self.test_config.bucket_extras: self.disable_auto_failover() self.remote.restart() self.wait_until_healthy() self.enable_auto_failover() def tune_logging(self): if self.dynamic_infra: return self.remote.tune_log_rotation() self.remote.restart() def enable_auto_failover(self): enabled = self.test_config.bucket.autofailover_enabled failover_min = self.test_config.bucket.failover_min failover_max = self.test_config.bucket.failover_max if self.dynamic_infra: cluster = self.remote.get_cluster() cluster['spec']['cluster']['autoFailoverMaxCount'] = 1 cluster['spec']['cluster']['autoFailoverServerGroup'] = bool( enabled) cluster['spec']['cluster']['autoFailoverOnDataDiskIssues'] = bool( enabled) cluster['spec']['cluster']['autoFailoverOnDataDiskIssuesTimePeriod'] = \ '{}s'.format(10) cluster['spec']['cluster']['autoFailoverTimeout'] = \ '{}s'.format(failover_max) self.remote.update_cluster_config(cluster) else: for master in self.cluster_spec.masters: self.rest.set_auto_failover(master, enabled, failover_min, failover_max) def disable_auto_failover(self): enabled = 'false' failover_min = self.test_config.bucket.failover_min failover_max = self.test_config.bucket.failover_max if self.dynamic_infra: cluster = self.remote.get_cluster() cluster['spec']['cluster']['autoFailoverMaxCount'] = 1 cluster['spec']['cluster']['autoFailoverServerGroup'] = bool( enabled) cluster['spec']['cluster']['autoFailoverOnDataDiskIssues'] = bool( enabled) cluster['spec']['cluster']['autoFailoverOnDataDiskIssuesTimePeriod'] = \ '{}s'.format(10) cluster['spec']['cluster']['autoFailoverTimeout'] = \ '{}s'.format(failover_max) self.remote.update_cluster_config(cluster) else: for master in self.cluster_spec.masters: self.rest.set_auto_failover(master, enabled, failover_min, failover_max) def wait_until_warmed_up(self): if self.test_config.bucket.bucket_type in ('ephemeral', 'memcached'): return if self.dynamic_infra: self.remote.wait_for_cluster_ready() else: for master in self.cluster_spec.masters: for bucket in self.test_config.buckets: self.monitor.monitor_warmup(self.memcached, master, bucket) def wait_until_healthy(self): if self.dynamic_infra: self.remote.wait_for_cluster_ready() else: for master in self.cluster_spec.masters: self.monitor.monitor_node_health(master) for analytics_node in self.rest.get_active_nodes_by_role( master, 'cbas'): self.monitor.monitor_analytics_node_active(analytics_node) def gen_disabled_audit_events(self, master: str) -> List[str]: curr_settings = self.rest.get_audit_settings(master) curr_disabled = {str(event) for event in curr_settings['disabled']} disabled = curr_disabled - self.test_config.audit_settings.extra_events return list(disabled) def enable_audit(self): if self.dynamic_infra: return if not self.is_compatible(min_release='4.0.0') or \ self.rest.is_community(self.master_node): return if not self.test_config.audit_settings.enabled: return for master in self.cluster_spec.masters: disabled = [] if self.test_config.audit_settings.extra_events: disabled = self.gen_disabled_audit_events(master) self.rest.enable_audit(master, disabled) def generate_ce_roles(self) -> List[str]: return ['admin'] def generate_ee_roles(self) -> List[str]: existing_roles = { r['role'] for r in self.rest.get_rbac_roles(self.master_node) } roles = [] for role in ( 'bucket_admin', 'data_dcp_reader', 'data_monitoring', 'data_reader_writer', 'data_reader', 'data_writer', 'fts_admin', 'fts_searcher', 'query_delete', 'query_insert', 'query_select', 'query_update', 'views_admin', ): if role in existing_roles: roles.append(role + '[{bucket}]') return roles def delete_rbac_users(self): if not self.is_compatible(min_release='5.0'): return for master in self.cluster_spec.masters: for bucket in self.test_config.buckets: self.rest.delete_rbac_user(host=master, bucket=bucket) def add_rbac_users(self): if self.dynamic_infra: self.remote.create_from_file( "cloud/operator/2/1/user-password-secret.yaml") # self.remote.create_from_file("cloud/operator/2/1/admin-user.yaml") self.remote.create_from_file("cloud/operator/2/1/bucket-user.yaml") self.remote.create_from_file( "cloud/operator/2/1/rbac-admin-group.yaml") self.remote.create_from_file( "cloud/operator/2/1/rbac-admin-role-binding.yaml") else: if not self.rest.supports_rbac(self.master_node): logger.info('RBAC not supported - skipping adding RBAC users') return if self.rest.is_community(self.master_node): roles = self.generate_ce_roles() else: roles = self.generate_ee_roles() for master in self.cluster_spec.masters: admin_user, admin_password = self.cluster_spec.rest_credentials self.rest.add_rbac_user( host=master, user=admin_user, password=admin_password, roles=['admin'], ) buckets = self.test_config.buckets + self.test_config.eventing_buckets for bucket in buckets: bucket_roles = [ role.format(bucket=bucket) for role in roles ] bucket_roles.append("admin") self.rest.add_rbac_user( host=master, user=bucket, # Backward compatibility password=self.test_config.bucket.password, roles=bucket_roles, ) def add_extra_rbac_users(self, num_users): if not self.rest.supports_rbac(self.master_node): logger.info('RBAC not supported - skipping adding RBAC users') return if self.rest.is_community(self.master_node): roles = self.generate_ce_roles() else: roles = self.generate_ee_roles() for master in self.cluster_spec.masters: admin_user, admin_password = self.cluster_spec.rest_credentials self.rest.add_rbac_user( host=master, user=admin_user, password=admin_password, roles=['admin'], ) for bucket in self.test_config.buckets: bucket_roles = [role.format(bucket=bucket) for role in roles] bucket_roles.append("admin") for i in range(1, num_users + 1): user = '******'.format(user_number=str(i)) self.rest.add_rbac_user( host=master, user=user, password=self.test_config.bucket.password, roles=bucket_roles, ) def throttle_cpu(self): if self.dynamic_infra: cluster = self.remote.get_cluster() if self.test_config.cluster.enable_cpu_cores: server_groups = cluster['spec']['servers'] updated_server_groups = [] default_cpu = 80 for server_group in server_groups: resources = server_group.get('resources', {}) limits = resources.get('limits', {}) limits['cpu'] = default_cpu resources['limits'] = limits server_group['resources'] = resources updated_server_groups.append(server_group) cluster['spec']['servers'] = updated_server_groups if self.test_config.cluster.online_cores: server_groups = cluster['spec']['servers'] updated_server_groups = [] online_vcpus = self.test_config.cluster.online_cores * 2 for server_group in server_groups: resources = server_group.get('resources', {}) limits = resources.get('limits', {}) limits['cpu'] = online_vcpus resources['limits'] = limits server_group['resources'] = resources updated_server_groups.append(server_group) cluster['spec']['servers'] = updated_server_groups self.remote.update_cluster_config(cluster, timeout=300, reboot=True) else: if self.remote.os == 'Cygwin': return if self.test_config.cluster.enable_cpu_cores: self.remote.enable_cpu() if self.test_config.cluster.online_cores: self.remote.disable_cpu(self.test_config.cluster.online_cores) def tune_memory_settings(self): kernel_memory = self.test_config.cluster.kernel_mem_limit if kernel_memory: if self.dynamic_infra: cluster = self.remote.get_cluster() server_groups = cluster['spec']['servers'] tune_services = set() # CAO uses different service names than perfrunner for service in self.test_config.cluster.kernel_mem_limit_services: if service == 'kv': service = 'data' elif service == 'n1ql': service = 'query' elif service == 'fts': service = 'search' elif service == 'cbas': service = 'analytics' tune_services.add(service) updated_server_groups = [] default_mem = '128Gi' for server_group in server_groups: services_in_group = set(server_group['services']) resources = server_group.get('resources', {}) limits = resources.get('limits', {}) mem_limit = limits.get('memory', default_mem) if services_in_group.intersection( tune_services) and kernel_memory != 0: mem_limit = '{}Mi'.format(kernel_memory) limits['memory'] = mem_limit resources['limits'] = limits server_group['resources'] = resources updated_server_groups.append(server_group) cluster['spec']['servers'] = updated_server_groups self.remote.update_cluster_config(cluster, timeout=300, reboot=True) else: for service in self.test_config.cluster.kernel_mem_limit_services: for server in self.cluster_spec.servers_by_role(service): self.remote.tune_memory_settings(host_string=server, size=kernel_memory) self.monitor.wait_for_servers() def reset_memory_settings(self): if self.dynamic_infra: return for service in self.test_config.cluster.kernel_mem_limit_services: for server in self.cluster_spec.servers_by_role(service): self.remote.reset_memory_settings(host_string=server) self.monitor.wait_for_servers() def flush_iptables(self): if self.dynamic_infra: return self.remote.flush_iptables() def clear_login_history(self): if self.dynamic_infra: return self.remote.clear_wtmp() def disable_wan(self): if self.dynamic_infra: return self.remote.disable_wan() def enable_ipv6(self): if self.dynamic_infra: return if self.test_config.cluster.ipv6: version, build_number = self.build.split('-') build = tuple(map(int, version.split('.'))) + (int(build_number), ) if build < (6, 5, 0, 0): self.remote.update_ip_family_rest() else: self.remote.update_ip_family_cli() self.remote.enable_ipv6() def set_x509_certificates(self): if self.dynamic_infra: return logger.info('Setting x509 settings') if self.test_config.access_settings.ssl_mode == "auth": self.remote.setup_x509() for host in self.cluster_spec.servers: self.rest.upload_cluster_certificate(host) for host in self.cluster_spec.servers: self.rest.reload_cluster_certificate(host) self.rest.enable_certificate_auth(host) def set_cipher_suite(self): if self.dynamic_infra: return if self.test_config.access_settings.cipher_list: check_cipher_suit = self.rest.get_cipher_suite(self.master_node) logger.info('current cipher suit: {}'.format(check_cipher_suit)) self.rest.set_cipher_suite( self.master_node, self.test_config.access_settings.cipher_list) check_cipher_suit = self.rest.get_cipher_suite(self.master_node) logger.info('new cipher suit: {}'.format(check_cipher_suit)) def set_min_tls_version(self): if self.dynamic_infra: return if self.test_config.access_settings.min_tls_version: check_tls_version = self.rest.get_minimum_tls_version( self.master_node) logger.info('current tls version: {}'.format(check_tls_version)) self.rest.set_minimum_tls_version( self.master_node, self.test_config.access_settings.min_tls_version) check_tls_version = self.rest.get_minimum_tls_version( self.master_node) logger.info('new tls version: {}'.format(check_tls_version)) def get_debug_rpm_url(self): release, build_number = self.build.split('-') build = tuple(map(int, release.split('.'))) + (int(build_number), ) if build > (7, 0, 0, 0): release = 'cheshire-cat' elif build > (6, 5, 0, 0) and build < (7, 0, 0, 0): release = 'mad-hatter' elif build < (6, 5, 0, 0): release = 'alice' centos_version = self.remote.detect_centos_release() rpm_url = 'http://latestbuilds.service.couchbase.com/builds/' \ 'latestbuilds/couchbase-server/{}/{}/' \ 'couchbase-server-enterprise-debuginfo-{}-centos{}.x86_64.rpm' \ ''.format(release, build_number, self.build, centos_version) return rpm_url def install_cb_debug_rpm(self): self.remote.install_cb_debug_rpm(url=self.get_debug_rpm_url()) def enable_developer_preview(self): release, build_number = self.build.split('-') build = tuple(map(int, release.split('.'))) + (int(build_number), ) if build > (7, 0, 0, 4698) or build < (0, 0, 0, 9999): self.remote.enable_developer_preview()
class ClusterManager(object): def __init__(self, cluster_spec, test_config, verbose): self.cluster_spec = cluster_spec self.test_config = test_config self.rest = RestHelper(cluster_spec) self.remote = RemoteHelper(cluster_spec, test_config, verbose) self.monitor = Monitor(cluster_spec) self.memcached = MemcachedHelper(test_config) self.clusters = cluster_spec.yield_clusters self.servers = cluster_spec.yield_servers self.masters = cluster_spec.yield_masters self.initial_nodes = test_config.cluster.initial_nodes self.mem_quota = test_config.cluster.mem_quota self.index_mem_quota = test_config.cluster.index_mem_quota self.fts_index_mem_quota = test_config.cluster.fts_index_mem_quota self.group_number = test_config.cluster.group_number or 1 self.roles = cluster_spec.roles def set_data_path(self): if self.cluster_spec.paths: data_path, index_path = self.cluster_spec.paths for server in self.servers(): self.rest.set_data_path(server, data_path, index_path) def set_auth(self): for server in self.servers(): self.rest.set_auth(server) def set_mem_quota(self): for server in self.servers(): self.rest.set_mem_quota(server, self.mem_quota) def set_index_mem_quota(self): for server in self.servers(): self.rest.set_index_mem_quota(server, self.index_mem_quota) def set_fts_index_mem_quota(self): for server in self.servers(): self.rest.set_fts_index_mem_quota(server, self.fts_index_mem_quota) def set_query_settings(self): settings = self.test_config.n1ql_settings.settings for _, servers in self.cluster_spec.yield_servers_by_role('n1ql'): for server in servers: self.rest.set_query_settings(server, settings) def set_index_settings(self): if self.test_config.secondaryindex_settings.db != 'memdb': settings = self.test_config.secondaryindex_settings.settings for _, servers in self.cluster_spec.yield_servers_by_role('index'): for server in servers: self.rest.set_index_settings(server, settings) self.remote.restart() time.sleep(60) else: logger.info("DB type is memdb. Not setting the indexer settings to take the default settings") def set_services(self): for (_, servers), initial_nodes in zip(self.clusters(), self.initial_nodes): master = servers[0] self.rest.set_services(master, self.roles[master]) def disable_moxi(self): if self.test_config.cluster.disable_moxi is not None: self.remote.disable_moxi() def create_server_groups(self): for master in self.masters(): for i in range(1, self.group_number): name = 'Group {}'.format(i + 1) self.rest.create_server_group(master, name=name) def add_nodes(self): for (_, servers), initial_nodes in zip(self.clusters(), self.initial_nodes): if initial_nodes < 2: # Single-node cluster continue # Adding initial nodes master = servers[0] if self.group_number > 1: groups = self.rest.get_server_groups(master) else: groups = {} for i, host_port in enumerate(servers[1:initial_nodes], start=1): uri = groups.get(server_group(servers[:initial_nodes], self.group_number, i)) self.rest.add_node(master, host_port, self.roles[host_port], uri) # Rebalance master = servers[0] known_nodes = servers[:initial_nodes] ejected_nodes = [] self.rest.rebalance(master, known_nodes, ejected_nodes) self.monitor.monitor_rebalance(master) def create_buckets(self, empty_buckets=False): ram_quota = self.mem_quota / (self.test_config.cluster.num_buckets + self.test_config.cluster.emptybuckets) replica_number = self.test_config.bucket.replica_number replica_index = self.test_config.bucket.replica_index eviction_policy = self.test_config.bucket.eviction_policy threads_number = self.test_config.bucket.threads_number proxy_port = self.test_config.bucket.proxy_port password = self.test_config.bucket.password buckets = self.test_config.emptybuckets if empty_buckets else self.test_config.buckets for master in self.masters(): for bucket_name in buckets: self.rest.create_bucket(host_port=master, name=bucket_name, ram_quota=ram_quota, replica_number=replica_number, replica_index=replica_index, eviction_policy=eviction_policy, threads_number=threads_number, password=password, proxy_port=proxy_port) def configure_auto_compaction(self): compaction_settings = self.test_config.compaction for master in self.masters(): self.rest.configure_auto_compaction(master, compaction_settings) def configure_internal_settings(self): internal_settings = self.test_config.internal_settings for master in self.masters(): for parameter, value in internal_settings.items(): self.rest.set_internal_settings(master, {parameter: int(value)}) def configure_xdcr_settings(self): xdcr_cluster_settings = self.test_config.xdcr_cluster_settings for master in self.masters(): for parameter, value in xdcr_cluster_settings.items(): self.rest.set_xdcr_cluster_settings(master, {parameter: int(value)}) def tweak_memory(self): self.remote.reset_swap() self.remote.drop_caches() self.remote.set_swappiness() self.remote.disable_thp() def restart_with_alternative_num_vbuckets(self): num_vbuckets = self.test_config.cluster.num_vbuckets if num_vbuckets is not None: self.remote.restart_with_alternative_num_vbuckets(num_vbuckets) def restart_with_alternative_bucket_options(self): cmd = 'ns_bucket:update_bucket_props("{}", ' \ '[{{extra_config_string, "{}={}"}}]).' for option in ('defragmenter_enabled', 'exp_pager_stime', 'ht_locks', 'max_num_shards', 'max_threads', 'warmup_min_memory_threshold', 'bfilter_enabled'): value = getattr(self.test_config.bucket, option) if value != -1 and value is not None: logger.info('Changing {} to {}'.format(option, value)) for master in self.masters(): for bucket in self.test_config.buckets: diag_eval = cmd.format(bucket, option, value) self.rest.run_diag_eval(master, diag_eval) self.remote.restart() def tune_logging(self): self.remote.tune_log_rotation() self.remote.restart() def restart_with_alternative_num_cpus(self): num_cpus = self.test_config.cluster.num_cpus if num_cpus: self.remote.restart_with_alternative_num_cpus(num_cpus) def restart_with_tcmalloc_aggressive_decommit(self): if self.test_config.cluster.tcmalloc_aggressive_decommit: self.remote.restart_with_tcmalloc_aggressive_decommit() def restart_with_sfwi(self): if self.test_config.cluster.sfwi: self.remote.restart_with_sfwi() def enable_auto_failover(self): for master in self.masters(): self.rest.enable_auto_failover(master) def wait_until_warmed_up(self): target_iterator = TargetIterator(self.cluster_spec, self.test_config) for target in target_iterator: self.monitor.monitor_warmup(self.memcached, target.node, target.bucket) def wait_until_healthy(self): for master in self.cluster_spec.yield_masters(): self.monitor.monitor_node_health(master) def change_watermarks(self): watermark_settings = self.test_config.watermark_settings for host_port, initial_nodes in zip(self.servers(), self.initial_nodes): host = host_port.split(':')[0] memcached_port = 11210 for bucket in self.test_config.buckets: for key, val in watermark_settings.items(): val = self.memcached.calc_watermark(val, self.mem_quota) self.memcached.set_flusher_param(host, memcached_port, bucket, key, val) def start_cbq_engine(self): if self.test_config.cluster.run_cbq: self.remote.start_cbq() def change_dcp_io_threads(self): if self.test_config.secondaryindex_settings.db == 'memdb': self.remote.set_dcp_io_threads() time.sleep(30) self.remote.restart()
class ClusterManager(object): def __init__(self, cluster_spec, test_config, verbose): self.cluster_spec = cluster_spec self.test_config = test_config self.rest = RestHelper(cluster_spec) self.remote = RemoteHelper(cluster_spec, test_config, verbose) self.monitor = Monitor(cluster_spec) self.memcached = MemcachedHelper(test_config) self.clusters = cluster_spec.yield_clusters self.servers = cluster_spec.yield_servers self.masters = cluster_spec.yield_masters self.initial_nodes = test_config.cluster.initial_nodes self.mem_quota = test_config.cluster.mem_quota self.index_mem_quota = test_config.cluster.index_mem_quota self.fts_index_mem_quota = test_config.cluster.fts_index_mem_quota self.group_number = test_config.cluster.group_number or 1 self.roles = cluster_spec.roles def set_data_path(self): if self.cluster_spec.paths: data_path, index_path = self.cluster_spec.paths for server in self.servers(): self.rest.set_data_path(server, data_path, index_path) def rename(self): for server in self.servers(): self.rest.rename(server) def set_auth(self): for server in self.servers(): self.rest.set_auth(server) def set_mem_quota(self): for server in self.servers(): self.rest.set_mem_quota(server, self.mem_quota) def set_index_mem_quota(self): for server in self.servers(): self.rest.set_index_mem_quota(server, self.index_mem_quota) def set_fts_index_mem_quota(self): master_node = self.masters().next() version = self.rest.get_version(master_node) if version < '4.5.0': # FTS was introduced in 4.5.0 return for server in self.servers(): self.rest.set_fts_index_mem_quota(server, self.fts_index_mem_quota) def set_query_settings(self): settings = self.test_config.n1ql_settings.settings for _, servers in self.cluster_spec.yield_servers_by_role('n1ql'): for server in servers: self.rest.set_query_settings(server, settings) def set_index_settings(self): settings = self.test_config.secondaryindex_settings.settings for _, servers in self.cluster_spec.yield_servers_by_role('index'): for server in servers: if settings: self.rest.set_index_settings(server, settings) curr_settings = self.rest.get_index_settings(server) curr_settings = pretty_dict(curr_settings) logger.info("Index settings: {}".format(curr_settings)) def set_services(self): master_node = self.masters().next() version = self.rest.get_version(master_node) if version < '4.0.0': # Service were introduced in 4.0.0 return for (_, servers), initial_nodes in zip(self.clusters(), self.initial_nodes): master = servers[0] self.rest.set_services(master, self.roles[master]) def create_server_groups(self): for master in self.masters(): for i in range(1, self.group_number): name = 'Group {}'.format(i + 1) self.rest.create_server_group(master, name=name) def add_nodes(self): for (_, servers), initial_nodes in zip(self.clusters(), self.initial_nodes): if initial_nodes < 2: # Single-node cluster continue master = servers[0] if self.group_number > 1: groups = self.rest.get_server_groups(master) else: groups = {} for i, host_port in enumerate(servers[1:initial_nodes], start=1): uri = groups.get(server_group(servers[:initial_nodes], self.group_number, i)) self.rest.add_node(master, host_port, self.roles[host_port], uri) def rebalance(self): for (_, servers), initial_nodes in zip(self.clusters(), self.initial_nodes): master = servers[0] known_nodes = servers[:initial_nodes] ejected_nodes = [] self.rest.rebalance(master, known_nodes, ejected_nodes) self.monitor.monitor_rebalance(master) self.wait_until_healthy() def create_buckets(self): ram_quota = self.mem_quota / self.test_config.cluster.num_buckets for master in self.masters(): for bucket_name in self.test_config.buckets: self.rest.create_bucket( host_port=master, name=bucket_name, ram_quota=ram_quota, password=self.test_config.bucket.password, replica_number=self.test_config.bucket.replica_number, replica_index=self.test_config.bucket.replica_index, eviction_policy=self.test_config.bucket.eviction_policy, conflict_resolution_type=self.test_config.bucket.conflict_resolution_type, ) def configure_auto_compaction(self): compaction_settings = self.test_config.compaction for master in self.masters(): self.rest.configure_auto_compaction(master, compaction_settings) settings = self.rest.get_auto_compaction_settings(master) logger.info('Auto-compaction settings: {}' .format(pretty_dict(settings))) def configure_internal_settings(self): internal_settings = self.test_config.internal_settings for master in self.masters(): for parameter, value in internal_settings.items(): self.rest.set_internal_settings(master, {parameter: int(value)}) def configure_xdcr_settings(self): xdcr_cluster_settings = self.test_config.xdcr_cluster_settings for master in self.masters(): for parameter, value in xdcr_cluster_settings.items(): self.rest.set_xdcr_cluster_settings(master, {parameter: int(value)}) def tweak_memory(self): self.remote.reset_swap() self.remote.drop_caches() self.remote.set_swappiness() self.remote.disable_thp() def restart_with_alternative_num_vbuckets(self): num_vbuckets = self.test_config.cluster.num_vbuckets if num_vbuckets is not None: self.remote.restart_with_alternative_num_vbuckets(num_vbuckets) def restart_with_alternative_bucket_options(self): """Apply custom buckets settings (e.g., max_num_shards or max_num_auxio) using "/diag/eval" and restart the entire cluster.""" cmd = 'ns_bucket:update_bucket_props("{}", ' \ '[{{extra_config_string, "{}={}"}}]).' for option, value in self.test_config.bucket_extras.items(): logger.info('Changing {} to {}'.format(option, value)) for master in self.masters(): for bucket in self.test_config.buckets: diag_eval = cmd.format(bucket, option, value) self.rest.run_diag_eval(master, diag_eval) if self.test_config.bucket_extras: self.remote.restart() self.wait_until_healthy() def tune_logging(self): self.remote.tune_log_rotation() self.remote.restart() def enable_auto_failover(self): for master in self.masters(): self.rest.enable_auto_failover(master) def wait_until_warmed_up(self): for master in self.cluster_spec.yield_masters(): for bucket in self.test_config.buckets: self.monitor.monitor_warmup(self.memcached, master, bucket) def wait_until_healthy(self): for master in self.cluster_spec.yield_masters(): self.monitor.monitor_node_health(master)
class PerfTest(object): COLLECTORS = {} def __init__(self, cluster_spec, test_config, experiment=None): self.cluster_spec = cluster_spec self.test_config = test_config self.target_iterator = TargetIterator(self.cluster_spec, self.test_config) self.memcached = MemcachedHelper(cluster_spec) self.monitor = Monitor(cluster_spec) self.rest = RestHelper(cluster_spec) self.remote = RemoteHelper(cluster_spec) if experiment: self.experiment = ExperimentHelper(experiment, cluster_spec, test_config) self.master_node = cluster_spec.yield_masters().next() self.build = self.rest.get_version(self.master_node) self.cbagent = CbAgent(self) self.metric_helper = MetricHelper(self) self.reporter = Reporter(self) self.reports = {} self.snapshots = [] self.master_events = [] if self.test_config.test_case.use_workers: self.worker_manager = WorkerManager(cluster_spec, test_config) def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): if self.test_config.test_case.use_workers: self.worker_manager.terminate() if exc_type != exc.KeyboardInterrupt: self.debug() for master in self.cluster_spec.yield_masters(): if not self.rest.is_balanced(master): logger.interrupt('Rebalance failed') num_failovers = self.rest.get_failover_counter(master) if hasattr(self, 'rebalance_settings'): if self.rebalance_settings.failover or \ self.rebalance_settings.graceful_failover: continue if num_failovers: logger.interrupt( 'Failover happened {} time(s)'.format(num_failovers) ) def compact_bucket(self): for master in self.cluster_spec.yield_masters(): for bucket in self.test_config.buckets: self.rest.trigger_bucket_compaction(master, bucket) for master in self.cluster_spec.yield_masters(): self.monitor.monitor_task(master, 'bucket_compaction') def wait_for_persistence(self): for master in self.cluster_spec.yield_masters(): for bucket in self.test_config.buckets: self.monitor.monitor_disk_queue(master, bucket) self.monitor.monitor_tap_replication(master, bucket) def load(self): load_settings = self.test_config.load_settings log_phase('load phase', load_settings) self.worker_manager.run_workload(load_settings, self.target_iterator) self.worker_manager.wait_for_workers() def hot_load(self): hot_load_settings = self.test_config.hot_load_settings log_phase('hot load phase', hot_load_settings) self.worker_manager.run_workload(hot_load_settings, self.target_iterator) self.worker_manager.wait_for_workers() def access(self): access_settings = self.test_config.access_settings log_phase('access phase', access_settings) self.worker_manager.run_workload(access_settings, self.target_iterator) self.worker_manager.wait_for_workers() def access_bg(self): access_settings = self.test_config.access_settings log_phase('access in background', access_settings) self.worker_manager.run_workload(access_settings, self.target_iterator, timer=access_settings.time) def access_bg_with_ddocs(self): access_settings = self.test_config.access_settings log_phase('access phase', access_settings) index_type = self.test_config.index_settings.index_type self.worker_manager.run_workload(access_settings, self.target_iterator, timer=access_settings.time, ddocs=self.ddocs, index_type=index_type) def timer(self): access_settings = self.test_config.access_settings logger.info('Running phase for {} seconds'.format(access_settings.time)) time.sleep(access_settings.time) def debug(self): self.remote.collect_info() for hostname in self.cluster_spec.yield_hostnames(): for fname in glob.glob('{}/*.zip'.format(hostname)): shutil.move(fname, '{}.zip'.format(hostname)) self.reporter.save_web_logs()
class PerfTest: COLLECTORS = {} ROOT_CERTIFICATE = 'root.pem' def __init__(self, cluster_spec: ClusterSpec, test_config: TestConfig, verbose: bool): self.cluster_spec = cluster_spec self.test_config = test_config self.target_iterator = TargetIterator(cluster_spec, test_config) self.cluster = ClusterManager(cluster_spec, test_config) self.memcached = MemcachedHelper(test_config) self.monitor = Monitor(cluster_spec, test_config, verbose) self.rest = RestHelper(cluster_spec) self.remote = RemoteHelper(cluster_spec, verbose) self.index = IndexHelper(cluster_spec, test_config, self.rest, self.monitor) self.master_node = next(cluster_spec.masters) self.build = self.rest.get_version(self.master_node) self.metrics = MetricHelper(self) self.reporter = ShowFastReporter(cluster_spec, test_config, self.build) self.cbmonitor_snapshots = [] self.cbmonitor_clusters = [] if self.test_config.test_case.use_workers: self.worker_manager = WorkerManager(cluster_spec, test_config, verbose) def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): failure = self.check_failures() self.tear_down() if exc_type == KeyboardInterrupt: logger.warn('The test was interrupted') return True if failure: logger.interrupt(failure) def tear_down(self): if self.test_config.test_case.use_workers: self.worker_manager.download_celery_logs() self.worker_manager.terminate() if self.test_config.cluster.online_cores: self.remote.enable_cpu() if self.test_config.cluster.kernel_mem_limit: self.cluster.reset_memory_settings() def reset_memory_settings(self): if self.test_config.cluster.kernel_mem_limit: for service in self.test_config.cluster.kernel_mem_limit_services: for server in self.cluster_spec.servers_by_role(service): self.remote.reset_memory_settings(host_string=server) self.monitor.wait_for_servers() def check_failures(self) -> str: failure = self.check_core_dumps() failure = self.check_rebalance() or failure return self.check_failover() or failure def download_certificate(self) -> None: cert = self.rest.get_certificate(self.master_node) with open(self.ROOT_CERTIFICATE, 'w') as fh: fh.write(cert) def check_rebalance(self) -> str: for master in self.cluster_spec.masters: if self.rest.is_not_balanced(master): return 'The cluster is not balanced' def check_failover(self) -> str: if hasattr(self, 'rebalance_settings'): if self.rebalance_settings.failover or \ self.rebalance_settings.graceful_failover: return for master in self.cluster_spec.masters: num_failovers = self.rest.get_failover_counter(master) if num_failovers: return 'Failover happened {} time(s)'.format(num_failovers) def check_core_dumps(self) -> str: dumps_per_host = self.remote.detect_core_dumps() core_dumps = { host: dumps for host, dumps in dumps_per_host.items() if dumps } if core_dumps: return pretty_dict(core_dumps) def compact_bucket(self, wait: bool = True) -> None: for target in self.target_iterator: self.rest.trigger_bucket_compaction(target.node, target.bucket) if wait: for target in self.target_iterator: self.monitor.monitor_task(target.node, 'bucket_compaction') def wait_for_persistence(self) -> None: for target in self.target_iterator: self.monitor.monitor_disk_queues(target.node, target.bucket) self.monitor.monitor_dcp_queues(target.node, target.bucket) def check_num_items(self) -> None: for target in self.target_iterator: self.monitor.monitor_num_items(target.node, target.bucket, self.test_config.load_settings.items) def restore(self) -> None: with RestoreHelper(self.cluster_spec, self.test_config) as rh: rh.restore() rh.warmup() def reset_kv_stats(self): master_node = next(self.cluster_spec.masters) for bucket in self.test_config.buckets: for server in self.rest.get_server_list(master_node, bucket): port = self.rest.get_memcached_port(server) self.memcached.reset_stats(server, port, bucket) def run_phase(self, phase: str, task: Callable, settings: PhaseSettings, target_iterator: TargetIterator, timer: int = None, wait: bool = True) -> None: logger.info('Running {}: {}'.format(phase, pretty_dict(settings))) self.worker_manager.run_tasks(task, settings, target_iterator, timer) if wait: self.worker_manager.wait_for_workers() def load(self, task: Callable = spring_task, settings: PhaseSettings = None, target_iterator: TargetIterator = None) -> None: if settings is None: settings = self.test_config.load_settings if target_iterator is None: target_iterator = self.target_iterator self.run_phase('load phase', task, settings, target_iterator) def hot_load(self, task: Callable = spring_task) -> None: settings = self.test_config.hot_load_settings self.run_phase('hot load phase', task, settings, self.target_iterator) def access(self, task: Callable = spring_task, settings: PhaseSettings = None) -> None: if settings is None: settings = self.test_config.access_settings self.run_phase('access phase', task, settings, self.target_iterator, settings.time) def trigger_tasks(self, task: Callable = spring_task, settings: PhaseSettings = None, target_iterator: TargetIterator = None, wait: bool = True) -> None: if settings is None: settings = self.test_config.access_settings if target_iterator is None: target_iterator = self.target_iterator self.run_phase('trigger task phase', task, settings, target_iterator, settings.time, wait) def access_bg(self, task: Callable = spring_task, settings: PhaseSettings = None, target_iterator: TargetIterator = None) -> None: if settings is None: settings = self.test_config.access_settings if target_iterator is None: target_iterator = self.target_iterator self.run_phase('background access phase', task, settings, target_iterator, settings.time, False) def sleep(self) -> None: access_settings = self.test_config.access_settings logger.info('Running phase for {} seconds'.format(access_settings.time)) time.sleep(access_settings.time) def report_kpi(self, *args, **kwargs) -> None: if self.test_config.stats_settings.enabled: self._report_kpi(*args, **kwargs) def _report_kpi(self, *args, **kwargs) -> None: pass
class PerfTest: COLLECTORS = {} ROOT_CERTIFICATE = 'root.pem' def __init__(self, cluster_spec: ClusterSpec, test_config: TestConfig, verbose: bool): self.cluster_spec = cluster_spec self.test_config = test_config self.target_iterator = TargetIterator(cluster_spec, test_config) self.cluster = ClusterManager(cluster_spec, test_config) self.memcached = MemcachedHelper(test_config) self.monitor = Monitor(cluster_spec, test_config, verbose) self.rest = RestHelper(cluster_spec) self.remote = RemoteHelper(cluster_spec, verbose) self.profiler = Profiler(cluster_spec, test_config) self.master_node = next(cluster_spec.masters) self.build = self.rest.get_version(self.master_node) self.metrics = MetricHelper(self) self.reporter = ShowFastReporter(cluster_spec, test_config, self.build) self.cbmonitor_snapshots = [] self.cbmonitor_clusters = [] if self.test_config.test_case.use_workers: self.worker_manager = WorkerManager(cluster_spec, test_config, verbose) def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): failure = self.debug() self.tear_down() if exc_type == KeyboardInterrupt: logger.warn('The test was interrupted') return True if failure: logger.interrupt(failure) @property def query_nodes(self) -> List[str]: return self.rest.get_active_nodes_by_role(self.master_node, 'n1ql') @property def index_nodes(self) -> List[str]: return self.rest.get_active_nodes_by_role(self.master_node, 'index') @property def fts_nodes(self) -> List[str]: return self.rest.get_active_nodes_by_role(self.master_node, 'fts') @property def analytics_nodes(self) -> List[str]: return self.rest.get_active_nodes_by_role(self.master_node, 'cbas') @property def eventing_nodes(self) -> List[str]: return self.rest.get_active_nodes_by_role(self.master_node, 'eventing') def tear_down(self): if self.test_config.test_case.use_workers: self.worker_manager.download_celery_logs() self.worker_manager.terminate() if self.test_config.cluster.online_cores: self.remote.enable_cpu() if self.test_config.cluster.kernel_mem_limit: self.collect_logs() self.cluster.reset_memory_settings() def collect_logs(self): self.remote.collect_info() for hostname in self.cluster_spec.servers: for fname in glob.glob('{}/*.zip'.format(hostname)): shutil.move(fname, '{}.zip'.format(hostname)) def reset_memory_settings(self): if self.test_config.cluster.kernel_mem_limit: for service in self.test_config.cluster.kernel_mem_limit_services: for server in self.cluster_spec.servers_by_role(service): self.remote.reset_memory_settings(host_string=server) self.monitor.wait_for_servers() def debug(self) -> str: failure = self.check_core_dumps() failure = self.check_rebalance() or failure return self.check_failover() or failure def download_certificate(self): cert = self.rest.get_certificate(self.master_node) with open(self.ROOT_CERTIFICATE, 'w') as fh: fh.write(cert) def check_rebalance(self) -> str: for master in self.cluster_spec.masters: if self.rest.is_not_balanced(master): return 'The cluster is not balanced' def check_failover(self) -> Optional[str]: if hasattr(self, 'rebalance_settings'): if self.rebalance_settings.failover or \ self.rebalance_settings.graceful_failover: return for master in self.cluster_spec.masters: num_failovers = self.rest.get_failover_counter(master) if num_failovers: return 'Failover happened {} time(s)'.format(num_failovers) def check_core_dumps(self) -> str: dumps_per_host = self.remote.detect_core_dumps() core_dumps = { host: dumps for host, dumps in dumps_per_host.items() if dumps } if core_dumps: return pretty_dict(core_dumps) def restore(self): logger.info('Restoring data') self.remote.restore_data( self.test_config.restore_settings.backup_storage, self.test_config.restore_settings.backup_repo, ) def restore_local(self): logger.info('Restoring data') local.extract_cb(filename='couchbase.rpm') local.cbbackupmgr_restore( master_node=self.master_node, cluster_spec=self.cluster_spec, threads=self.test_config.restore_settings.threads, archive=self.test_config.restore_settings.backup_storage, repo=self.test_config.restore_settings.backup_repo, ) def import_data(self): logger.info('Importing data') for bucket in self.test_config.buckets: self.remote.import_data( self.test_config.restore_settings.import_file, bucket, ) def compact_bucket(self, wait: bool = True): for target in self.target_iterator: self.rest.trigger_bucket_compaction(target.node, target.bucket) if wait: for target in self.target_iterator: self.monitor.monitor_task(target.node, 'bucket_compaction') def wait_for_persistence(self): for target in self.target_iterator: self.monitor.monitor_disk_queues(target.node, target.bucket) self.monitor.monitor_dcp_queues(target.node, target.bucket) self.monitor.monitor_replica_count(target.node, target.bucket) def wait_for_indexing(self): if self.test_config.index_settings.statements: for server in self.index_nodes: self.monitor.monitor_indexing(server) def check_num_items(self): num_items = self.test_config.load_settings.items * ( 1 + self.test_config.bucket.replica_number) for target in self.target_iterator: self.monitor.monitor_num_items(target.node, target.bucket, num_items) def reset_kv_stats(self): master_node = next(self.cluster_spec.masters) for bucket in self.test_config.buckets: for server in self.rest.get_server_list(master_node, bucket): port = self.rest.get_memcached_port(server) self.memcached.reset_stats(server, port, bucket) def create_indexes(self): logger.info('Creating and building indexes') if not self.test_config.index_settings.couchbase_fts_index_name: for statement in self.test_config.index_settings.statements: self.rest.exec_n1ql_statement(self.query_nodes[0], statement) else: self.create_fts_index_n1ql() def create_fts_index_n1ql(self): definition = read_json( self.test_config.index_settings.couchbase_fts_index_configfile) definition.update( {'name': self.test_config.index_settings.couchbase_fts_index_name}) logger.info('Index definition: {}'.format(pretty_dict(definition))) self.rest.create_fts_index( self.fts_nodes[0], self.test_config.index_settings.couchbase_fts_index_name, definition) self.monitor.monitor_fts_indexing_queue( self.fts_nodes[0], self.test_config.index_settings.couchbase_fts_index_name, int(self.test_config.access_settings.items)) def create_functions(self): logger.info('Creating n1ql functions') for statement in self.test_config.n1ql_function_settings.statements: self.rest.exec_n1ql_statement(self.query_nodes[0], statement) def sleep(self): access_settings = self.test_config.access_settings logger.info('Running phase for {} seconds'.format( access_settings.time)) time.sleep(access_settings.time) def run_phase(self, phase: str, task: Callable, settings: PhaseSettings, target_iterator: Iterable, timer: int = None, wait: bool = True): logger.info('Running {}: {}'.format(phase, pretty_dict(settings))) self.worker_manager.run_tasks(task, settings, target_iterator, timer) if wait: self.worker_manager.wait_for_workers() def load(self, task: Callable = spring_task, settings: PhaseSettings = None, target_iterator: Iterable = None): if settings is None: settings = self.test_config.load_settings if target_iterator is None: target_iterator = self.target_iterator self.run_phase('load phase', task, settings, target_iterator) def hot_load(self, task: Callable = spring_task): settings = self.test_config.hot_load_settings self.run_phase('hot load phase', task, settings, self.target_iterator) def xattr_load(self, task: Callable = spring_task, target_iterator: Iterable = None): if target_iterator is None: target_iterator = self.target_iterator settings = self.test_config.xattr_load_settings self.run_phase('xattr phase', task, settings, target_iterator) def access(self, task: Callable = spring_task, settings: PhaseSettings = None, target_iterator: Iterable = None): if settings is None: settings = self.test_config.access_settings if target_iterator is None: target_iterator = self.target_iterator self.run_phase('access phase', task, settings, target_iterator, timer=settings.time) def access_bg(self, task: Callable = spring_task, settings: PhaseSettings = None, target_iterator: Iterable = None): if settings is None: settings = self.test_config.access_settings if target_iterator is None: target_iterator = self.target_iterator self.run_phase('background access phase', task, settings, target_iterator, timer=settings.time, wait=False) def report_kpi(self, *args, **kwargs): if self.test_config.stats_settings.enabled: self._report_kpi(*args, **kwargs) def _report_kpi(self, *args, **kwargs): pass def _measure_curr_ops(self) -> int: ops = 0 for bucket in self.test_config.buckets: for server in self.rest.get_active_nodes_by_role( self.master_node, "kv"): port = self.rest.get_memcached_port(server) stats = self.memcached.get_stats(server, port, bucket) for stat in b'cmd_get', b'cmd_set': ops += int(stats[stat]) return ops
class Profiler: DEBUG_PORTS = { 'fts': 8094, 'index': 9102, 'kv': 9998, # goxdcr 'n1ql': 8093, } ENDPOINTS = { 'cpu': 'http://127.0.0.1:{}/debug/pprof/profile', 'heap': 'http://127.0.0.1:{}/debug/pprof/heap', 'goroutine': 'http://127.0.0.1:{}/debug/pprof/goroutine?debug=2', } def __init__(self, cluster_spec: ClusterSpec, test_config: TestConfig): self.test_config = test_config self.rest = RestHelper(cluster_spec) self.master_node = next(cluster_spec.masters) self.ssh_username, self.ssh_password = cluster_spec.ssh_credentials def new_tunnel(self, host: str, port: int) -> SSHTunnelForwarder: return SSHTunnelForwarder( ssh_address_or_host=host, ssh_username=self.ssh_username, ssh_password=self.ssh_password, remote_bind_address=('127.0.0.1', port), ) def save(self, host: str, service: str, profile: str, content: bytes): fname = '{}_{}_{}_{}.pprof'.format(host, service, profile, uhex()[:6]) with open(fname, 'wb') as fh: fh.write(content) def profile(self, host: str, service: str, profile: str): logger.info('Collecting {} profile on {}'.format(profile, host)) endpoint = self.ENDPOINTS[profile] port = self.DEBUG_PORTS[service] with self.new_tunnel(host, port) as tunnel: url = endpoint.format(tunnel.local_bind_port) response = requests.get(url=url, auth=self.rest.auth) self.save(host, service, profile, response.content) def timer(self, **kwargs): timer = Timer( function=self.profile, interval=self.test_config.profiling_settings.interval, num_runs=self.test_config.profiling_settings.num_profiles, kwargs=kwargs, ) timer.start() def schedule(self): for service in self.test_config.profiling_settings.services: logger.info('Scheduling profiling of "{}" services'.format(service)) for server in self.rest.get_active_nodes_by_role(self.master_node, role=service): for profile in self.test_config.profiling_settings.profiles: self.timer(host=server, service=service, profile=profile)
class Profiler: DEBUG_PORTS = { 'fts': 8094, 'index': 9102, 'kv': 9998, # goxdcr 'n1ql': 8093, } ENDPOINTS = { 'cpu': 'http://127.0.0.1:{}/debug/pprof/profile', 'heap': 'http://127.0.0.1:{}/debug/pprof/heap', 'goroutine': 'http://127.0.0.1:{}/debug/pprof/goroutine?debug=2', } def __init__(self, cluster_spec: ClusterSpec, test_config: TestConfig): self.test_config = test_config self.rest = RestHelper(cluster_spec) self.master_node = next(cluster_spec.masters) self.ssh_username, self.ssh_password = cluster_spec.ssh_credentials def new_tunnel(self, host: str, port: int) -> SSHTunnelForwarder: return SSHTunnelForwarder( ssh_address_or_host=host, ssh_username=self.ssh_username, ssh_password=self.ssh_password, remote_bind_address=('127.0.0.1', port), ) def save(self, host: str, service: str, profile: str, content: bytes): fname = '{}_{}_{}_{}.pprof'.format(host, service, profile, uhex()[:6]) with open(fname, 'wb') as fh: fh.write(content) def profile(self, host: str, service: str, profile: str): logger.info('Collecting {} profile on {}'.format(profile, host)) endpoint = self.ENDPOINTS[profile] port = self.DEBUG_PORTS[service] with self.new_tunnel(host, port) as tunnel: url = endpoint.format(tunnel.local_bind_port) response = requests.get(url=url) self.save(host, service, profile, response.content) def timer(self, **kwargs): timer = Timer( function=self.profile, interval=self.test_config.profiling_settings.interval, num_runs=self.test_config.profiling_settings.num_profiles, kwargs=kwargs, ) timer.start() def schedule(self): for service in self.test_config.profiling_settings.services: logger.info( 'Scheduling profiling of "{}" services'.format(service)) for server in self.rest.get_active_nodes_by_role(self.master_node, role=service): for profile in self.test_config.profiling_settings.profiles: self.timer(host=server, service=service, profile=profile)
class ClusterManager(object): def __init__(self, cluster_spec, test_config, verbose): self.cluster_spec = cluster_spec self.test_config = test_config self.rest = RestHelper(cluster_spec) self.remote = RemoteHelper(cluster_spec, test_config, verbose) self.monitor = Monitor(cluster_spec) self.memcached = MemcachedHelper(test_config) self.clusters = cluster_spec.yield_clusters self.servers = cluster_spec.yield_servers self.masters = cluster_spec.yield_masters self.initial_nodes = test_config.cluster.initial_nodes self.mem_quota = test_config.cluster.mem_quota self.index_mem_quota = test_config.cluster.index_mem_quota self.group_number = test_config.cluster.group_number or 1 self.roles = cluster_spec.roles def set_data_path(self): if self.cluster_spec.paths: data_path, index_path = self.cluster_spec.paths for server in self.servers(): self.rest.set_data_path(server, data_path, index_path) def set_auth(self): for server in self.servers(): self.rest.set_auth(server) def set_mem_quota(self): for server in self.servers(): self.rest.set_mem_quota(server, self.mem_quota) def set_index_mem_quota(self): for server in self.servers(): self.rest.set_index_mem_quota(server, self.index_mem_quota) def set_query_settings(self): settings = self.test_config.n1ql_settings.settings for _, servers in self.cluster_spec.yield_servers_by_role('n1ql'): for server in servers: self.rest.set_query_settings(server, settings) def set_index_settings(self): settings = self.test_config.secondaryindex_settings.settings for _, servers in self.cluster_spec.yield_servers_by_role('index'): for server in servers: self.rest.set_index_settings(server, settings) self.remote.restart() time.sleep(60) def set_services(self): for (_, servers), initial_nodes in zip(self.clusters(), self.initial_nodes): master = servers[0] self.rest.set_services(master, self.roles[master]) def disable_moxi(self): if self.test_config.cluster.disable_moxi is not None: self.remote.disable_moxi() def create_server_groups(self): for master in self.masters(): for i in range(1, self.group_number): name = 'Group {}'.format(i + 1) self.rest.create_server_group(master, name=name) def add_nodes(self): for (_, servers), initial_nodes in zip(self.clusters(), self.initial_nodes): if initial_nodes < 2: # Single-node cluster continue # Adding initial nodes master = servers[0] if self.group_number > 1: groups = self.rest.get_server_groups(master) else: groups = {} for i, host_port in enumerate(servers[1:initial_nodes], start=1): uri = groups.get( server_group(servers[:initial_nodes], self.group_number, i)) self.rest.add_node(master, host_port, self.roles[host_port], uri) # Rebalance master = servers[0] known_nodes = servers[:initial_nodes] ejected_nodes = [] self.rest.rebalance(master, known_nodes, ejected_nodes) self.monitor.monitor_rebalance(master) def create_buckets(self, emptyBuckets=False): ram_quota = self.mem_quota / (self.test_config.cluster.num_buckets + self.test_config.cluster.emptybuckets) replica_number = self.test_config.bucket.replica_number replica_index = self.test_config.bucket.replica_index eviction_policy = self.test_config.bucket.eviction_policy threads_number = self.test_config.bucket.threads_number proxyPort = self.test_config.bucket.proxyPort password = self.test_config.bucket.password buckets = self.test_config.emptybuckets if emptyBuckets else self.test_config.buckets for master in self.masters(): for bucket_name in buckets: self.rest.create_bucket(host_port=master, name=bucket_name, ram_quota=ram_quota, replica_number=replica_number, replica_index=replica_index, eviction_policy=eviction_policy, threads_number=threads_number, password=password, proxyPort=proxyPort) def configure_auto_compaction(self): compaction_settings = self.test_config.compaction for master in self.masters(): self.rest.configure_auto_compaction(master, compaction_settings) def configure_internal_settings(self): internal_settings = self.test_config.internal_settings for master in self.masters(): for parameter, value in internal_settings.items(): self.rest.set_internal_settings(master, {parameter: int(value)}) def tweak_memory(self): self.remote.reset_swap() self.remote.drop_caches() self.remote.set_swappiness() self.remote.disable_thp() def restart_with_alternative_num_vbuckets(self): num_vbuckets = self.test_config.cluster.num_vbuckets if num_vbuckets is not None: self.remote.restart_with_alternative_num_vbuckets(num_vbuckets) def restart_with_alternative_bucket_options(self): cmd = 'ns_bucket:update_bucket_props("{}", ' \ '[{{extra_config_string, "{}={}"}}]).' for option in ('defragmenter_enabled', 'exp_pager_stime', 'ht_locks', 'max_num_shards', 'max_threads', 'warmup_min_memory_threshold', 'bfilter_enabled'): value = getattr(self.test_config.bucket, option) if value != -1 and value is not None: logger.info('Changing {} to {}'.format(option, value)) for master in self.masters(): for bucket in self.test_config.buckets: diag_eval = cmd.format(bucket, option, value) self.rest.run_diag_eval(master, diag_eval) self.remote.restart() def tune_logging(self): self.remote.tune_log_rotation() self.remote.restart() def restart_with_alternative_num_cpus(self): num_cpus = self.test_config.cluster.num_cpus if num_cpus: self.remote.restart_with_alternative_num_cpus(num_cpus) def restart_with_tcmalloc_aggressive_decommit(self): if self.test_config.cluster.tcmalloc_aggressive_decommit: self.remote.restart_with_tcmalloc_aggressive_decommit() def restart_with_sfwi(self): if self.test_config.cluster.sfwi: self.remote.restart_with_sfwi() def enable_auto_failover(self): for master in self.masters(): self.rest.enable_auto_failover(master) def wait_until_warmed_up(self): target_iterator = TargetIterator(self.cluster_spec, self.test_config) for target in target_iterator: self.monitor.monitor_warmup(self.memcached, target.node, target.bucket) def wait_until_healthy(self): for master in self.cluster_spec.yield_masters(): self.monitor.monitor_node_health(master) def change_watermarks(self): watermark_settings = self.test_config.watermark_settings for host_port, initial_nodes in zip(self.servers(), self.initial_nodes): host = host_port.split(':')[0] memcached_port = self.rest.get_memcached_port(host_port) for bucket in self.test_config.buckets: for key, val in watermark_settings.items(): val = self.memcached.calc_watermark(val, self.mem_quota) self.memcached.set_flusher_param(host, memcached_port, bucket, key, val) def start_cbq_engine(self): if self.test_config.cluster.run_cbq: self.remote.start_cbq()
class ClusterManager(object): def __init__(self, cluster_spec, test_config): self.cluster_spec = cluster_spec self.test_config = test_config self.rest = RestHelper(cluster_spec) self.remote = RemoteHelper(cluster_spec) self.monitor = Monitor(cluster_spec) self.memcached = MemcachedHelper(cluster_spec) self.clusters = cluster_spec.yield_clusters() self.servers = cluster_spec.yield_servers self.masters = cluster_spec.yield_masters self.hostnames = cluster_spec.yield_hostnames self.initial_nodes = test_config.cluster.initial_nodes self.mem_quota = test_config.cluster.mem_quota self.group_number = test_config.cluster.group_number or 1 def set_data_path(self): data_path, index_path = self.cluster_spec.paths for server in self.servers(): self.rest.set_data_path(server, data_path, index_path) def set_auth(self): for server in self.servers(): self.rest.set_auth(server) def set_mem_quota(self): for server in self.servers(): self.rest.set_mem_quota(server, self.mem_quota) def disable_moxi(self): if self.test_config.cluster.disable_moxi is not None: self.remote.disable_moxi() def create_server_groups(self): for master in self.masters(): for i in range(1, self.group_number): name = 'Group {}'.format(i + 1) self.rest.create_server_group(master, name=name) def add_nodes(self): for (_, servers), initial_nodes in zip(self.clusters, self.initial_nodes): if initial_nodes < 2: # Single-node cluster continue # Adding initial nodes master = servers[0] if self.group_number > 1: groups = self.rest.get_server_groups(master) else: groups = {} for i, host_port in enumerate(servers[1:initial_nodes], start=1): host = host_port.split(':')[0] uri = groups.get( server_group(servers[:initial_nodes], self.group_number, i)) self.rest.add_node(master, host, uri) # Rebalance master = servers[0] known_nodes = servers[:initial_nodes] ejected_nodes = [] self.rest.rebalance(master, known_nodes, ejected_nodes) self.monitor.monitor_rebalance(master) def create_buckets(self): ram_quota = self.mem_quota / self.test_config.cluster.num_buckets replica_number = self.test_config.bucket.replica_number replica_index = self.test_config.bucket.replica_index eviction_policy = self.test_config.bucket.eviction_policy threads_number = self.test_config.bucket.threads_number for master in self.masters(): for bucket_name in self.test_config.buckets: self.rest.create_bucket( host_port=master, name=bucket_name, ram_quota=ram_quota, replica_number=replica_number, replica_index=replica_index, eviction_policy=eviction_policy, threads_number=threads_number, ) def configure_auto_compaction(self): compaction_settings = self.test_config.compaction for master in self.masters(): self.rest.configure_auto_compaction(master, compaction_settings) def configure_internal_settings(self): internal_settings = self.test_config.internal_settings for master in self.masters(): for parameter, value in internal_settings.items(): self.rest.set_internal_settings(master, {parameter: int(value)}) def tweak_memory(self): self.remote.reset_swap() self.remote.drop_caches() self.remote.set_swappiness() self.remote.disable_thp() def restart_with_alternative_num_vbuckets(self): num_vbuckets = self.test_config.cluster.num_vbuckets if num_vbuckets is not None: self.remote.restart_with_alternative_num_vbuckets(num_vbuckets) def restart_with_alternative_bucket_options(self): cmd = 'ns_bucket:update_bucket_props("{}", ' \ '[{{extra_config_string, "{}={}"}}]).' for option in ('max_num_shards', 'max_threads'): value = getattr(self.test_config.bucket, option) if value: logger.info('Changing {} to {}'.format(option, value)) for master in self.masters(): for bucket in self.test_config.buckets: diag_eval = cmd.format(bucket, option, value) self.rest.run_diag_eval(master, diag_eval) self.remote.restart() def restart_with_alternative_num_cpus(self): num_cpus = self.test_config.cluster.num_cpus if num_cpus: self.remote.restart_with_alternative_num_cpus(num_cpus) def enable_auto_failover(self): for master in self.masters(): self.rest.enable_auto_failover(master) def wait_until_warmed_up(self): target_iterator = TargetIterator(self.cluster_spec, self.test_config) for target in target_iterator: host = target.node.split(':')[0] self.monitor.monitor_warmup(self.memcached, host, target.bucket) def change_watermarks(self): watermark_settings = self.test_config.watermark_settings for hostname, initial_nodes in zip(self.hostnames(), self.initial_nodes): for bucket in self.test_config.buckets: for key, val in watermark_settings.items(): val = self.memcached.calc_watermark(val, self.mem_quota) self.memcached.set_flusher_param(hostname, bucket, key, val) def start_cbq_engine(self): if self.test_config.cluster.run_cbq: self.remote.start_cbq()
class SGPerfTest(PerfTest): COLLECTORS = { 'disk': False, 'ns_server': False, 'ns_server_overview': False, 'active_tasks': False, 'syncgateway_stats': True } ALL_HOSTNAMES = True LOCAL_DIR = "YCSB" def __init__(self, cluster_spec: ClusterSpec, test_config: TestConfig, verbose: bool): self.cluster_spec = cluster_spec self.test_config = test_config self.memcached = MemcachedHelper(test_config) self.remote = RemoteHelper(cluster_spec, test_config, verbose) self.rest = RestHelper(cluster_spec) # self.build = os.environ.get('SGBUILD') or "0.0.0-000" self.master_node = next(cluster_spec.masters) self.build = self.rest.get_sgversion(self.master_node) self.metrics = MetricHelper(self) self.reporter = ShowFastReporter(cluster_spec, test_config, self.build) if self.test_config.test_case.use_workers: self.worker_manager = WorkerManager(cluster_spec, test_config, verbose) self.settings = self.test_config.access_settings self.settings.syncgateway_settings = self.test_config.syncgateway_settings self.profiler = Profiler(cluster_spec, test_config) self.cluster = ClusterManager(cluster_spec, test_config) self.target_iterator = TargetIterator(cluster_spec, test_config) self.monitor = Monitor(cluster_spec, test_config, verbose) def download_ycsb(self): if self.worker_manager.is_remote: self.remote.clone_ycsb( repo=self.test_config.syncgateway_settings.repo, branch=self.test_config.syncgateway_settings.branch, worker_home=self.worker_manager.WORKER_HOME, ycsb_instances=int(self.test_config.syncgateway_settings. instances_per_client)) else: local.clone_ycsb( repo=self.test_config.syncgateway_settings.repo, branch=self.test_config.syncgateway_settings.branch) def collect_execution_logs(self): if self.worker_manager.is_remote: if os.path.exists(self.LOCAL_DIR): shutil.rmtree(self.LOCAL_DIR, ignore_errors=True) os.makedirs(self.LOCAL_DIR) self.remote.get_syncgateway_YCSB_logs( self.worker_manager.WORKER_HOME, self.test_config.syncgateway_settings, self.LOCAL_DIR) def run_sg_phase(self, phase: str, task: Callable, settings: PhaseSettings, timer: int = None, distribute: bool = False) -> None: logger.info('Running {}: {}'.format(phase, pretty_dict(settings))) self.worker_manager.run_sg_tasks(task, settings, timer, distribute, phase) self.worker_manager.wait_for_workers() def start_memcached(self): self.run_sg_phase("start memcached", syncgateway_task_start_memcached, self.settings, self.settings.time, False) def load_users(self): self.run_sg_phase("load users", syncgateway_task_load_users, self.settings, self.settings.time, False) def init_users(self): if self.test_config.syncgateway_settings.auth == 'true': self.run_sg_phase("init users", syncgateway_task_init_users, self.settings, self.settings.time, False) def grant_access(self): if self.test_config.syncgateway_settings.grant_access == 'true': self.run_sg_phase("grant access to users", syncgateway_task_grant_access, self.settings, self.settings.time, False) def load_docs(self): self.run_sg_phase("load docs", syncgateway_task_load_docs, self.settings, self.settings.time, False) @with_stats @with_profiles def run_test(self): self.run_sg_phase("run test", syncgateway_task_run_test, self.settings, self.settings.time, True) def compress_sg_logs(self): self.remote.compress_sg_logs() def get_sg_logs(self): initial_nodes = int(self.test_config.syncgateway_settings.nodes) ssh_user, ssh_pass = self.cluster_spec.ssh_credentials for _server in range(initial_nodes): server = self.cluster_spec.servers[_server] local.get_sg_logs(host=server, ssh_user=ssh_user, ssh_pass=ssh_pass) def run(self): self.download_ycsb() self.start_memcached() self.load_users() self.load_docs() self.init_users() self.grant_access() self.run_test() self.report_kpi() def __exit__(self, exc_type, exc_val, exc_tb): if self.test_config.test_case.use_workers: self.worker_manager.download_celery_logs() self.worker_manager.terminate() if self.test_config.cluster.online_cores: self.remote.enable_cpu() if self.test_config.cluster.kernel_mem_limit: self.remote.reset_memory_settings() self.monitor.wait_for_servers()
class ClusterManager: def __init__(self, cluster_spec: ClusterSpec, test_config: TestConfig, verbose: bool = False): self.cluster_spec = cluster_spec self.test_config = test_config self.rest = RestHelper(cluster_spec) self.remote = RemoteHelper(cluster_spec, verbose) self.monitor = Monitor(cluster_spec, test_config, verbose) self.memcached = MemcachedHelper(test_config) self.master_node = next(self.cluster_spec.masters) self.initial_nodes = test_config.cluster.initial_nodes def is_compatible(self, min_release: str) -> bool: for master in self.cluster_spec.masters: version = self.rest.get_version(master) return version >= min_release def set_data_path(self): for server in self.cluster_spec.servers: self.remote.change_owner(server, self.cluster_spec.data_path) self.rest.set_data_path(server, self.cluster_spec.data_path) def set_index_path(self): for server in self.cluster_spec.servers: self.remote.change_owner(server, self.cluster_spec.index_path) self.rest.set_index_path(server, self.cluster_spec.index_path) def set_analytics_path(self): paths = [] for path in self.cluster_spec.analytics_paths: for i in range(self.test_config.analytics_settings.num_io_devices): io_device = '{}/dev{}'.format(path, i) paths.append(io_device) for server in self.cluster_spec.servers_by_role('cbas'): for path in self.cluster_spec.analytics_paths: self.remote.change_owner(server, path) self.rest.set_analytics_paths(server, paths) def rename(self): for server in self.cluster_spec.servers: self.rest.rename(server) def set_auth(self): for server in self.cluster_spec.servers: self.rest.set_auth(server) def set_mem_quotas(self): for master in self.cluster_spec.masters: self.rest.set_mem_quota(master, self.test_config.cluster.mem_quota) self.rest.set_index_mem_quota( master, self.test_config.cluster.index_mem_quota) if self.test_config.cluster.fts_index_mem_quota: self.rest.set_fts_index_mem_quota( master, self.test_config.cluster.fts_index_mem_quota) if self.test_config.cluster.analytics_mem_quota: self.rest.set_analytics_mem_quota( master, self.test_config.cluster.analytics_mem_quota) if self.test_config.cluster.eventing_mem_quota: self.rest.set_eventing_mem_quota( master, self.test_config.cluster.eventing_mem_quota) def set_query_settings(self): logger.info('Setting query settings') query_nodes = self.cluster_spec.servers_by_role('n1ql') if query_nodes: settings = self.test_config.n1ql_settings.cbq_settings if settings: self.rest.set_query_settings(query_nodes[0], settings) settings = self.rest.get_query_settings(query_nodes[0]) settings = pretty_dict(settings) logger.info('Query settings: {}'.format(settings)) def set_index_settings(self): logger.info('Setting index settings') index_nodes = self.cluster_spec.servers_by_role('index') if index_nodes: settings = self.test_config.gsi_settings.settings if settings: self.rest.set_index_settings(index_nodes[0], settings) settings = self.rest.get_index_settings(index_nodes[0]) settings = pretty_dict(settings) logger.info('Index settings: {}'.format(settings)) def set_services(self): if not self.is_compatible(min_release='4.0.0'): return for master in self.cluster_spec.masters: roles = self.cluster_spec.roles[master] self.rest.set_services(master, roles) def add_nodes(self): for (_, servers), initial_nodes in zip(self.cluster_spec.clusters, self.initial_nodes): if initial_nodes < 2: # Single-node cluster continue master = servers[0] for node in servers[1:initial_nodes]: roles = self.cluster_spec.roles[node] self.rest.add_node(master, node, roles) def rebalance(self): for (_, servers), initial_nodes in zip(self.cluster_spec.clusters, self.initial_nodes): master = servers[0] known_nodes = servers[:initial_nodes] ejected_nodes = [] self.rest.rebalance(master, known_nodes, ejected_nodes) self.monitor.monitor_rebalance(master) self.wait_until_healthy() def increase_bucket_limit(self, num_buckets: int): for master in self.cluster_spec.masters: self.rest.increase_bucket_limit(master, num_buckets) def flush_buckets(self): for master in self.cluster_spec.masters: for bucket_name in self.test_config.buckets: self.rest.flush_bucket(host=master, bucket=bucket_name) def delete_buckets(self): for master in self.cluster_spec.masters: for bucket_name in self.test_config.buckets: self.rest.delete_bucket(host=master, name=bucket_name) def create_buckets(self): mem_quota = self.test_config.cluster.mem_quota if self.test_config.cluster.num_buckets > 7: self.increase_bucket_limit(self.test_config.cluster.num_buckets + 3) if self.test_config.cluster.eventing_metadata_bucket_mem_quota: mem_quota -= ( self.test_config.cluster.eventing_metadata_bucket_mem_quota + self.test_config.cluster.eventing_bucket_mem_quota) per_bucket_quota = mem_quota // self.test_config.cluster.num_buckets for master in self.cluster_spec.masters: for bucket_name in self.test_config.buckets: self.rest.create_bucket( host=master, name=bucket_name, ram_quota=per_bucket_quota, password=self.test_config.bucket.password, replica_number=self.test_config.bucket.replica_number, replica_index=self.test_config.bucket.replica_index, eviction_policy=self.test_config.bucket.eviction_policy, bucket_type=self.test_config.bucket.bucket_type, conflict_resolution_type=self.test_config.bucket. conflict_resolution_type, compression_mode=self.test_config.bucket.compression_mode, ) def create_eventing_buckets(self): if not self.test_config.cluster.eventing_bucket_mem_quota: return per_bucket_quota = \ self.test_config.cluster.eventing_bucket_mem_quota \ // self.test_config.cluster.eventing_buckets for master in self.cluster_spec.masters: for bucket_name in self.test_config.eventing_buckets: self.rest.create_bucket( host=master, name=bucket_name, ram_quota=per_bucket_quota, password=self.test_config.bucket.password, replica_number=self.test_config.bucket.replica_number, replica_index=self.test_config.bucket.replica_index, eviction_policy=self.test_config.bucket.eviction_policy, bucket_type=self.test_config.bucket.bucket_type, conflict_resolution_type=self.test_config.bucket. conflict_resolution_type, ) def create_eventing_metadata_bucket(self): if not self.test_config.cluster.eventing_metadata_bucket_mem_quota: return for master in self.cluster_spec.masters: self.rest.create_bucket( host=master, name=self.test_config.cluster.EVENTING_METADATA_BUCKET_NAME, ram_quota=self.test_config.cluster. eventing_metadata_bucket_mem_quota, password=self.test_config.bucket.password, replica_number=self.test_config.bucket.replica_number, replica_index=self.test_config.bucket.replica_index, eviction_policy=self.test_config.bucket.EVICTION_POLICY, bucket_type=self.test_config.bucket.BUCKET_TYPE, ) def configure_auto_compaction(self): compaction_settings = self.test_config.compaction for master in self.cluster_spec.masters: self.rest.configure_auto_compaction(master, compaction_settings) settings = self.rest.get_auto_compaction_settings(master) logger.info('Auto-compaction settings: {}'.format( pretty_dict(settings))) def configure_internal_settings(self): internal_settings = self.test_config.internal_settings for master in self.cluster_spec.masters: for parameter, value in internal_settings.items(): self.rest.set_internal_settings(master, {parameter: maybe_atoi(value)}) def configure_xdcr_settings(self): xdcr_cluster_settings = self.test_config.xdcr_cluster_settings for master in self.cluster_spec.masters: for parameter, value in xdcr_cluster_settings.items(): self.rest.set_xdcr_cluster_settings( master, {parameter: maybe_atoi(value)}) def tweak_memory(self): self.remote.reset_swap() self.remote.drop_caches() self.remote.set_swappiness() self.remote.disable_thp() def enable_n2n_encryption(self): if self.test_config.cluster.enable_n2n_encryption: for master in self.cluster_spec.masters: self.remote.enable_n2n_encryption( master, self.test_config.cluster.enable_n2n_encryption) def restart_with_alternative_num_vbuckets(self): num_vbuckets = self.test_config.cluster.num_vbuckets if num_vbuckets is not None: self.remote.restart_with_alternative_num_vbuckets(num_vbuckets) def restart_with_alternative_bucket_options(self): """Apply custom buckets settings. Tune bucket settings (e.g., max_num_shards or max_num_auxio) using "/diag/eval" and restart the entire cluster. """ if self.test_config.bucket_extras: self.remote.enable_nonlocal_diag_eval() cmd = 'ns_bucket:update_bucket_props("{}", ' \ '[{{extra_config_string, "{}={}"}}]).' for option, value in self.test_config.bucket_extras.items(): if option == 'num_writer_threads': self.rest.set_num_writer_threads(self.master_node, int(value)) elif option == 'num_reader_threads': self.rest.set_num_reader_threads(self.master_node, int(value)) else: logger.info('Changing {} to {}'.format(option, value)) for master in self.cluster_spec.masters: for bucket in self.test_config.buckets: diag_eval = cmd.format(bucket, option, value) self.rest.run_diag_eval(master, diag_eval) if self.test_config.bucket_extras: self.remote.restart() self.wait_until_healthy() def tune_logging(self): self.remote.tune_log_rotation() self.remote.restart() def enable_auto_failover(self): enabled = self.test_config.bucket.autofailover_enabled failover_min = self.test_config.bucket.failover_min failover_max = self.test_config.bucket.failover_max for master in self.cluster_spec.masters: self.rest.set_auto_failover(master, enabled, failover_min, failover_max) def wait_until_warmed_up(self): if self.test_config.bucket.bucket_type in ('ephemeral', 'memcached'): return for master in self.cluster_spec.masters: for bucket in self.test_config.buckets: self.monitor.monitor_warmup(self.memcached, master, bucket) def wait_until_healthy(self): for master in self.cluster_spec.masters: self.monitor.monitor_node_health(master) for analytics_node in self.rest.get_active_nodes_by_role( master, 'cbas'): self.monitor.monitor_analytics_node_active(analytics_node) def gen_disabled_audit_events(self, master: str) -> List[str]: curr_settings = self.rest.get_audit_settings(master) curr_disabled = {str(event) for event in curr_settings['disabled']} disabled = curr_disabled - self.test_config.audit_settings.extra_events return list(disabled) def enable_audit(self): if not self.is_compatible(min_release='4.0.0') or \ self.rest.is_community(self.master_node): return if not self.test_config.audit_settings.enabled: return for master in self.cluster_spec.masters: disabled = [] if self.test_config.audit_settings.extra_events: disabled = self.gen_disabled_audit_events(master) self.rest.enable_audit(master, disabled) def generate_ce_roles(self) -> List[str]: return ['admin'] def generate_ee_roles(self) -> List[str]: existing_roles = { r['role'] for r in self.rest.get_rbac_roles(self.master_node) } roles = [] for role in ( 'bucket_admin', 'data_dcp_reader', 'data_monitoring', 'data_reader_writer', 'data_reader', 'data_writer', 'fts_admin', 'fts_searcher', 'query_delete', 'query_insert', 'query_select', 'query_update', 'views_admin', ): if role in existing_roles: roles.append(role + '[{bucket}]') return roles def delete_rbac_users(self): if not self.is_compatible(min_release='5.0'): return for master in self.cluster_spec.masters: for bucket in self.test_config.buckets: self.rest.delete_rbac_user(host=master, bucket=bucket) def add_rbac_users(self): if not self.rest.supports_rbac(self.master_node): logger.info('RBAC not supported - skipping adding RBAC users') return if self.rest.is_community(self.master_node): roles = self.generate_ce_roles() else: roles = self.generate_ee_roles() for master in self.cluster_spec.masters: admin_user, admin_password = self.cluster_spec.rest_credentials self.rest.add_rbac_user( host=master, user=admin_user, password=admin_password, roles=['admin'], ) for bucket in self.test_config.buckets: bucket_roles = [role.format(bucket=bucket) for role in roles] bucket_roles.append("admin") self.rest.add_rbac_user( host=master, user=bucket, # Backward compatibility password=self.test_config.bucket.password, roles=bucket_roles, ) def throttle_cpu(self): if self.remote.os == 'Cygwin': return self.remote.enable_cpu() if self.test_config.cluster.online_cores: self.remote.disable_cpu(self.test_config.cluster.online_cores) def tune_memory_settings(self): kernel_memory = self.test_config.cluster.kernel_mem_limit if kernel_memory: for service in self.test_config.cluster.kernel_mem_limit_services: for server in self.cluster_spec.servers_by_role(service): self.remote.tune_memory_settings(host_string=server, size=kernel_memory) self.monitor.wait_for_servers() def reset_memory_settings(self): for service in self.test_config.cluster.kernel_mem_limit_services: for server in self.cluster_spec.servers_by_role(service): self.remote.reset_memory_settings(host_string=server) self.monitor.wait_for_servers() def flush_iptables(self): self.remote.flush_iptables() def clear_login_history(self): self.remote.clear_wtmp() def disable_wan(self): self.remote.disable_wan() def enable_ipv6(self): if self.test_config.cluster.ipv6: self.remote.enable_ipv6() def set_x509_certificates(self): logger.info('Setting x509 settings') if self.test_config.access_settings.ssl_mode == "auth": self.remote.setup_x509() for host in self.cluster_spec.servers: self.rest.upload_cluster_certificate(host) for host in self.cluster_spec.servers: self.rest.reload_cluster_certificate(host) self.rest.enable_certificate_auth(host) def set_cipher_suite(self): if self.test_config.access_settings.cipher_list: check_cipher_suit = self.rest.get_cipher_suite(self.master_node) logger.info('current cipher suit: {}'.format(check_cipher_suit)) self.rest.set_cipher_suite( self.master_node, self.test_config.access_settings.cipher_list) check_cipher_suit = self.rest.get_cipher_suite(self.master_node) logger.info('new cipher suit: {}'.format(check_cipher_suit)) def set_min_tls_version(self): if self.test_config.access_settings.min_tls_version: check_tls_version = self.rest.get_minimum_tls_version( self.master_node) logger.info('current tls version: {}'.format(check_tls_version)) self.rest.set_minimum_tls_version( self.master_node, self.test_config.access_settings.min_tls_version) check_tls_version = self.rest.get_minimum_tls_version( self.master_node) logger.info('new tls version: {}'.format(check_tls_version))
class PerfTest(object): COLLECTORS = {} MONITORING_DELAY = 10 def __init__(self, cluster_spec, test_config, verbose, experiment=None): self.cluster_spec = cluster_spec self.test_config = test_config self.target_iterator = TargetIterator(cluster_spec, test_config) self.memcached = MemcachedHelper(test_config) self.monitor = Monitor(cluster_spec) self.rest = RestHelper(cluster_spec) self.remote = RemoteHelper(cluster_spec, test_config, verbose) if experiment: self.experiment = ExperimentHelper(experiment, cluster_spec, test_config) self.master_node = cluster_spec.yield_masters().next() if self.remote.gateways: self.build = SyncGatewayRequestHelper().get_version( self.remote.gateways[0] ) else: self.build = self.rest.get_version(self.master_node) self.cbagent = CbAgent(self) self.metric_helper = MetricHelper(self) self.reporter = Reporter(self) self.reports = {} self.snapshots = [] self.master_events = [] if self.test_config.test_case.use_workers: self.worker_manager = WorkerManager(cluster_spec, test_config) def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): if self.test_config.test_case.use_workers: self.worker_manager.terminate() if exc_type != exc.KeyboardInterrupt and '--nodebug' not in sys.argv: self.debug() self.check_core_dumps() for master in self.cluster_spec.yield_masters(): if not self.rest.is_balanced(master): logger.interrupt('Rebalance failed') self.check_failover(master) def check_failover(self, master): if hasattr(self, 'rebalance_settings'): if self.rebalance_settings.failover or \ self.rebalance_settings.graceful_failover: return num_failovers = self.rest.get_failover_counter(master) if num_failovers: logger.interrupt( 'Failover happened {} time(s)'.format(num_failovers) ) def check_core_dumps(self): dumps_per_host = self.remote.detect_core_dumps() core_dumps = { host: dumps for host, dumps in dumps_per_host.items() if dumps } if core_dumps: logger.interrupt(pretty_dict(core_dumps)) def compact_bucket(self): for master in self.cluster_spec.yield_masters(): for bucket in self.test_config.buckets: self.rest.trigger_bucket_compaction(master, bucket) time.sleep(self.MONITORING_DELAY) for master in self.cluster_spec.yield_masters(): self.monitor.monitor_task(master, 'bucket_compaction') def wait_for_persistence(self): for master in self.cluster_spec.yield_masters(): for bucket in self.test_config.buckets: self.monitor.monitor_disk_queues(master, bucket) self.monitor.monitor_tap_queues(master, bucket) self.monitor.monitor_upr_queues(master, bucket) def load(self): load_settings = self.test_config.load_settings log_phase('load phase', load_settings) self.worker_manager.run_workload(load_settings, self.target_iterator) self.worker_manager.wait_for_workers() def hot_load(self): hot_load_settings = self.test_config.hot_load_settings log_phase('hot load phase', hot_load_settings) self.worker_manager.run_workload(hot_load_settings, self.target_iterator) self.worker_manager.wait_for_workers() def access(self): access_settings = self.test_config.access_settings log_phase('access phase', access_settings) self.worker_manager.run_workload(access_settings, self.target_iterator) self.worker_manager.wait_for_workers() def access_bg(self): access_settings = self.test_config.access_settings log_phase('access phase in background', access_settings) access_settings.index_type = self.test_config.index_settings.index_type access_settings.n1ql = getattr(self, 'n1ql', None) access_settings.ddocs = getattr(self, 'ddocs', None) self.worker_manager.run_workload(access_settings, self.target_iterator, timer=access_settings.time) def timer(self): access_settings = self.test_config.access_settings logger.info('Running phase for {} seconds'.format(access_settings.time)) time.sleep(access_settings.time) def debug(self): self.remote.collect_info() for hostname in self.cluster_spec.yield_hostnames(): for fname in glob.glob('{}/*.zip'.format(hostname)): shutil.move(fname, '{}.zip'.format(hostname)) self.reporter.save_web_logs() if self.test_config.cluster.run_cbq: self.remote.collect_cbq_logs() for hostname in self.cluster_spec.yield_hostnames(): for fname in glob.glob('{}/cbq.log'.format(hostname)): shutil.move(fname, '{}-cbq.log'.format(hostname)) if self.cluster_spec.gateways: self.remote.collect_info_gateway() self.remote.collect_info_gateload() self.reporter.check_sgw_logs() self.reporter.save_expvar()
class PerfTest(object): COLLECTORS = {} MONITORING_DELAY = 10 def __init__(self, cluster_spec, test_config, verbose): self.cluster_spec = cluster_spec self.test_config = test_config self.target_iterator = TargetIterator(cluster_spec, test_config) self.memcached = MemcachedHelper(test_config) self.monitor = Monitor(cluster_spec) self.rest = RestHelper(cluster_spec) self.remote = RemoteHelper(cluster_spec, test_config, verbose) self.restore_helper = RestoreHelper(cluster_spec, test_config, verbose) self.master_node = cluster_spec.yield_masters().next() self.build = self.rest.get_version(self.master_node) self.cbagent = CbAgent(self, verbose=verbose) self.metric_helper = MetricHelper(self) self.reporter = Reporter(self) self.reports = {} self.snapshots = [] self.master_events = [] if self.test_config.test_case.use_workers: self.worker_manager = WorkerManager(cluster_spec, test_config) def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): if self.test_config.test_case.use_workers: self.worker_manager.terminate() if exc_type != exc.KeyboardInterrupt: self.check_core_dumps() for master in self.cluster_spec.yield_masters(): if not self.rest.is_balanced(master): logger.interrupt('Rebalance failed') self.check_failover(master) def check_failover(self, master): if hasattr(self, 'rebalance_settings'): if self.rebalance_settings.failover or \ self.rebalance_settings.graceful_failover: return num_failovers = self.rest.get_failover_counter(master) if num_failovers: logger.interrupt( 'Failover happened {} time(s)'.format(num_failovers) ) def check_core_dumps(self): dumps_per_host = self.remote.detect_core_dumps() core_dumps = { host: dumps for host, dumps in dumps_per_host.items() if dumps } if core_dumps: logger.interrupt(pretty_dict(core_dumps)) def compact_bucket(self): for master in self.cluster_spec.yield_masters(): for bucket in self.test_config.buckets: self.rest.trigger_bucket_compaction(master, bucket) time.sleep(self.MONITORING_DELAY) for master in self.cluster_spec.yield_masters(): self.monitor.monitor_task(master, 'bucket_compaction') def wait_for_persistence(self): for master in self.cluster_spec.yield_masters(): for bucket in self.test_config.buckets: self.monitor.monitor_disk_queues(master, bucket) self.monitor.monitor_tap_queues(master, bucket) self.monitor.monitor_dcp_queues(master, bucket) def restore(self): self.restore_helper.restore() self.restore_helper.warmup() def load(self, load_settings=None, target_iterator=None): if load_settings is None: load_settings = self.test_config.load_settings if target_iterator is None: target_iterator = self.target_iterator log_phase('load phase', load_settings) self.worker_manager.run_workload(load_settings, target_iterator) self.worker_manager.wait_for_workers() def hot_load(self): hot_load_settings = self.test_config.hot_load_settings log_phase('hot load phase', hot_load_settings) self.worker_manager.run_workload(hot_load_settings, self.target_iterator) self.worker_manager.wait_for_workers() def access(self, access_settings=None): if access_settings is None: access_settings = self.test_config.access_settings log_phase('access phase', access_settings) self.worker_manager.run_workload(access_settings, self.target_iterator) self.worker_manager.wait_for_workers() def access_bg(self, access_settings=None, target_iterator=None): if access_settings is None: access_settings = self.test_config.access_settings if target_iterator is None: target_iterator = self.target_iterator log_phase('access phase in background', access_settings) access_settings.index_type = self.test_config.index_settings.index_type access_settings.ddocs = getattr(self, 'ddocs', None) self.worker_manager.run_workload(access_settings, target_iterator, timer=access_settings.time) def timer(self): access_settings = self.test_config.access_settings logger.info('Running phase for {} seconds'.format(access_settings.time)) time.sleep(access_settings.time) def report_kpi(self, *args, **kwargs): if self.test_config.stats_settings.enabled: self._report_kpi(*args, **kwargs) def _report_kpi(self, *args, **kwargs): pass
def main(): usage = '%prog -v version -c cluster-spec -f conffile -b build -t tag' parser = OptionParser(usage) parser.add_option('-v', '--version', dest='version') parser.add_option('-c', dest='cluster_spec_fname', help='path to cluster specification file', metavar='cluster.spec') parser.add_option('--verbose', dest='verbose', action='store_true', help='enable verbose logging') parser.add_option('-o', dest='toy', help='optional toy build ID', metavar='couchstore') parser.add_option('-t', dest='test_config_fname', help='path to test configuration file', metavar='my_test.test') parser.add_option('-e', '--edition', dest='cluster_edition', default='enterprise', help='the cluster edition (community or enterprise)') parser.add_option('--url', dest='url', default=None, help='The http URL to a Couchbase RPM that should be' ' installed. This overrides the URL to be installed.') parser.add_option('-f', '--filename', dest='filename') parser.add_option('-b', '--build', dest='build') parser.add_option('-n', '--number', dest='number') options, args = parser.parse_args() test_id = options.number + '_' + options.build data = None try: with open(options.filename) as data_file: data = json.load(data_file) except (OSError, IOError, ValueError) as e: raise e mng_data = manage_test_result(['beer_queries', 'travel_queries']) mng_data.create_cb_instance(data["couchbase_server"], data["couchbase_query_bucket"]) options, args = parser.parse_args() cluster_spec = ClusterSpec() cluster_spec.parse(options.cluster_spec_fname, args) test_config = TestConfig() test_config.parse(options.test_config_fname, args) cm = ClusterManager(cluster_spec, test_config, options.verbose) installer = CouchbaseInstaller(cluster_spec, options) if True: installer.install() if cm.remote: cm.tune_logging() cm.restart_with_sfwi() cm.restart_with_alternative_num_vbuckets() cm.restart_with_alternative_num_cpus() cm.restart_with_tcmalloc_aggressive_decommit() cm.disable_moxi() cm.configure_internal_settings() cm.set_data_path() cm.set_services() cm.set_mem_quota() cm.set_index_mem_quota() cm.set_auth() time.sleep(30) """host = cluster_spec.yield_masters().next()""" host_ip = cluster_spec.yield_masters().next().split(':')[0] URL = 'http://' + host_ip + ':8093' logger.info('logging the URL: {}'.format(URL)) conn = urllib3.connection_from_url(URL) rest = RestHelper(cluster_spec) airline_result = do_airline_benchmarks(mng_data, conn, rest, host_ip, installer.remote, cluster_spec) beer_result = do_beer_queries(mng_data, conn, rest, host_ip, installer.remote) print 'beer_result is', beer_result #mng_data.cb_load_test(data["couchbase_query_bucket"],beer_result) mng_data.load_data_query_benchmark(data["couchbase_query_bucket"], 'query_benchmark', test_id, options.version) mng_data.show_data_query_benchmark(data["couchbase_query_bucket"], 'query_benchmark', test_id) sys.exit(not (airline_result and beer_result))
class ClusterManager: def __init__(self, cluster_spec, test_config, verbose=False): self.cluster_spec = cluster_spec self.test_config = test_config self.rest = RestHelper(cluster_spec) self.remote = RemoteHelper(cluster_spec, test_config, verbose) self.monitor = Monitor(cluster_spec, test_config, verbose) self.memcached = MemcachedHelper(test_config) self.master_node = next(self.cluster_spec.masters) self.initial_nodes = test_config.cluster.initial_nodes self.mem_quota = test_config.cluster.mem_quota self.index_mem_quota = test_config.cluster.index_mem_quota self.fts_mem_quota = test_config.cluster.fts_index_mem_quota self.analytics_mem_quota = test_config.cluster.analytics_mem_quota def is_compatible(self, min_release): for master in self.cluster_spec.masters: version = self.rest.get_version(master) return version >= min_release def set_data_path(self): if self.cluster_spec.paths: data_path, index_path = self.cluster_spec.paths for server in self.cluster_spec.servers: self.rest.set_data_path(server, data_path, index_path) def rename(self): for server in self.cluster_spec.servers: self.rest.rename(server) def set_auth(self): for server in self.cluster_spec.servers: self.rest.set_auth(server) def set_mem_quota(self): for master in self.cluster_spec.masters: self.rest.set_mem_quota(master, self.mem_quota) self.rest.set_index_mem_quota(master, self.index_mem_quota) if self.is_compatible(min_release='4.5.0'): self.rest.set_fts_index_mem_quota(master, self.fts_mem_quota) if self.analytics_mem_quota: self.rest.set_analytics_mem_quota(master, self.analytics_mem_quota) def set_query_settings(self): settings = self.test_config.n1ql_settings.settings for server in self.cluster_spec.servers_by_role('n1ql'): if settings: self.rest.set_query_settings(server, settings) def set_index_settings(self): settings = self.test_config.gsi_settings.settings for server in self.cluster_spec.servers_by_role('index'): if settings: self.rest.set_index_settings(server, settings) curr_settings = self.rest.get_index_settings(server) curr_settings = pretty_dict(curr_settings) logger.info("Index settings: {}".format(curr_settings)) def set_services(self): if not self.is_compatible(min_release='4.0.0'): return for server in self.cluster_spec.servers: roles = self.cluster_spec.roles[server] if 'kv' in roles: self.rest.set_services(server, roles) def add_nodes(self): for (_, servers), initial_nodes in zip(self.cluster_spec.clusters, self.initial_nodes): if initial_nodes < 2: # Single-node cluster continue master = servers[0] for node in servers[1:initial_nodes]: roles = self.cluster_spec.roles[node] self.rest.add_node(master, node, roles) def rebalance(self): for (_, servers), initial_nodes in zip(self.cluster_spec.clusters, self.initial_nodes): master = servers[0] known_nodes = servers[:initial_nodes] ejected_nodes = [] self.rest.rebalance(master, known_nodes, ejected_nodes) self.monitor.monitor_rebalance(master) self.wait_until_healthy() def create_buckets(self): if self.test_config.cluster.eventing_bucket_mem_quota: ram_quota = (self.mem_quota - self.test_config.cluster.eventing_bucket_mem_quota) \ // self.test_config.cluster.num_buckets else: ram_quota = self.mem_quota // self.test_config.cluster.num_buckets for master in self.cluster_spec.masters: for bucket_name in self.test_config.buckets: self.rest.create_bucket( host=master, name=bucket_name, ram_quota=ram_quota, password=self.test_config.bucket.password, replica_number=self.test_config.bucket.replica_number, replica_index=self.test_config.bucket.replica_index, eviction_policy=self.test_config.bucket.eviction_policy, bucket_type=self.test_config.bucket.bucket_type, conflict_resolution_type=self.test_config.bucket. conflict_resolution_type, ) if self.test_config.cluster.eventing_bucket_mem_quota: ram_quota = self.test_config.cluster.eventing_bucket_mem_quota - 1000 \ // self.test_config.cluster.eventing_buckets for master in self.cluster_spec.masters: # Create buckets for eventing operations for bucket_name in self.test_config.eventing_buckets: self.rest.create_bucket( host=master, name=bucket_name, ram_quota=ram_quota, password=self.test_config.bucket.password, replica_number=self.test_config.bucket.replica_number, replica_index=self.test_config.bucket.replica_index, eviction_policy=self.test_config.bucket. eviction_policy, bucket_type=self.test_config.bucket.bucket_type, conflict_resolution_type=self.test_config.bucket. conflict_resolution_type, ) # Create eventing metadata bucket self.rest.create_bucket( host=master, name="eventing", ram_quota=1000, password="******", replica_number=0, replica_index=0, eviction_policy="valueOnly", bucket_type="membase", ) def configure_auto_compaction(self): compaction_settings = self.test_config.compaction for master in self.cluster_spec.masters: self.rest.configure_auto_compaction(master, compaction_settings) settings = self.rest.get_auto_compaction_settings(master) logger.info('Auto-compaction settings: {}'.format( pretty_dict(settings))) def configure_internal_settings(self): internal_settings = self.test_config.internal_settings for master in self.cluster_spec.masters: for parameter, value in internal_settings.items(): self.rest.set_internal_settings(master, {parameter: int(value)}) def configure_xdcr_settings(self): xdcr_cluster_settings = self.test_config.xdcr_cluster_settings for master in self.cluster_spec.masters: for parameter, value in xdcr_cluster_settings.items(): self.rest.set_xdcr_cluster_settings(master, {parameter: int(value)}) def tweak_memory(self): self.remote.reset_swap() self.remote.drop_caches() self.remote.set_swappiness() self.remote.disable_thp() def restart_with_alternative_num_vbuckets(self): num_vbuckets = self.test_config.cluster.num_vbuckets if num_vbuckets is not None: self.remote.restart_with_alternative_num_vbuckets(num_vbuckets) def restart_with_alternative_bucket_options(self): """Apply custom buckets settings. Tune bucket settings (e.g., max_num_shards or max_num_auxio) using "/diag/eval" and restart the entire cluster. """ cmd = 'ns_bucket:update_bucket_props("{}", ' \ '[{{extra_config_string, "{}={}"}}]).' for option, value in self.test_config.bucket_extras.items(): logger.info('Changing {} to {}'.format(option, value)) for master in self.cluster_spec.masters: for bucket in self.test_config.buckets: diag_eval = cmd.format(bucket, option, value) self.rest.run_diag_eval(master, diag_eval) if self.test_config.bucket_extras: self.remote.restart() self.wait_until_healthy() def tune_logging(self): self.remote.tune_log_rotation() self.remote.restart() def tune_analytics_logging(self): for analytics_node in self.cluster_spec.servers_by_role("cbas"): self.rest.set_analytics_loglevel( analytics_node, self.test_config.cluster.analytics_log_level) self.rest.restart_analytics(analytics_node) def enable_auto_failover(self): for master in self.cluster_spec.masters: self.rest.enable_auto_failover(master) def wait_until_warmed_up(self): if self.test_config.bucket.bucket_type in ('ephemeral', 'memcached'): return for master in self.cluster_spec.masters: for bucket in self.test_config.buckets: self.monitor.monitor_warmup(self.memcached, master, bucket) def wait_until_healthy(self): for master in self.cluster_spec.masters: self.monitor.monitor_node_health(master) for analytics_node in self.cluster_spec.servers_by_role("cbas"): self.monitor.monitor_analytics_node_active(analytics_node) def enable_audit(self): if not self.is_compatible(min_release='4.0.0') or \ self.rest.is_community(self.master_node): return for master in self.cluster_spec.masters: self.rest.enable_audit(master) def generate_ce_roles(self) -> List[str]: return ['admin'] def generate_ee_roles(self) -> List[str]: existing_roles = { r['role'] for r in self.rest.get_rbac_roles(self.master_node) } roles = [] for role in ( 'bucket_admin', 'data_dcp_reader', 'data_monitoring', 'data_reader_writer', 'data_reader', 'data_writer', 'fts_admin', 'fts_searcher', 'query_delete', 'query_insert', 'query_select', 'query_update', 'views_admin', ): if role in existing_roles: roles.append(role + '[{bucket}]') return ['admin'] def add_rbac_users(self): if not self.is_compatible(min_release='5.0'): return if self.rest.is_community(self.master_node): roles = self.generate_ce_roles() else: roles = self.generate_ee_roles() for master in self.cluster_spec.masters: for bucket in self.test_config.buckets: bucket_roles = [role.format(bucket=bucket) for role in roles] self.rest.add_rbac_user( host=master, bucket=bucket, password=self.test_config.bucket.password, roles=bucket_roles, ) def throttle_cpu(self): if self.remote.os == 'Cygwin': return self.remote.enable_cpu() if self.test_config.cluster.online_cores: self.remote.disable_cpu(self.test_config.cluster.online_cores) def tune_memory_settings(self): kernel_memory = self.test_config.cluster.kernel_mem_limit if kernel_memory: self.remote.tune_memory_settings(size=kernel_memory) self.monitor.wait_for_servers() def flush_iptables(self): self.remote.flush_iptables()
class PerfTest: COLLECTORS = {} ROOT_CERTIFICATE = 'root.pem' def __init__(self, cluster_spec: ClusterSpec, test_config: TestConfig, verbose: bool): self.cluster_spec = cluster_spec self.test_config = test_config self.target_iterator = TargetIterator(cluster_spec, test_config) self.cluster = ClusterManager(cluster_spec, test_config) self.memcached = MemcachedHelper(test_config) self.monitor = Monitor(cluster_spec, test_config, verbose) self.rest = RestHelper(cluster_spec) self.remote = RemoteHelper(cluster_spec, verbose) self.profiler = Profiler(cluster_spec, test_config) self.master_node = next(cluster_spec.masters) self.build = self.rest.get_version(self.master_node) self.metrics = MetricHelper(self) self.reporter = ShowFastReporter(cluster_spec, test_config, self.build) self.cbmonitor_snapshots = [] self.cbmonitor_clusters = [] if self.test_config.test_case.use_workers: self.worker_manager = WorkerManager(cluster_spec, test_config, verbose) def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): failure = self.debug() self.tear_down() if exc_type == KeyboardInterrupt: logger.warn('The test was interrupted') return True if failure: logger.interrupt(failure) @property def query_nodes(self) -> List[str]: return self.rest.get_active_nodes_by_role(self.master_node, 'n1ql') @property def index_nodes(self) -> List[str]: return self.rest.get_active_nodes_by_role(self.master_node, 'index') @property def fts_nodes(self) -> List[str]: return self.rest.get_active_nodes_by_role(self.master_node, 'fts') @property def analytics_nodes(self) -> List[str]: return self.rest.get_active_nodes_by_role(self.master_node, 'cbas') @property def eventing_nodes(self) -> List[str]: return self.rest.get_active_nodes_by_role(self.master_node, 'eventing') def tear_down(self): if self.test_config.test_case.use_workers: self.worker_manager.download_celery_logs() self.worker_manager.terminate() if self.test_config.cluster.online_cores: self.remote.enable_cpu() if self.test_config.cluster.kernel_mem_limit: self.collect_logs() self.cluster.reset_memory_settings() def collect_logs(self): self.remote.collect_info() for hostname in self.cluster_spec.servers: for fname in glob.glob('{}/*.zip'.format(hostname)): shutil.move(fname, '{}.zip'.format(hostname)) def reset_memory_settings(self): if self.test_config.cluster.kernel_mem_limit: for service in self.test_config.cluster.kernel_mem_limit_services: for server in self.cluster_spec.servers_by_role(service): self.remote.reset_memory_settings(host_string=server) self.monitor.wait_for_servers() def debug(self) -> str: failure = self.check_core_dumps() failure = self.check_rebalance() or failure return self.check_failover() or failure def download_certificate(self): cert = self.rest.get_certificate(self.master_node) with open(self.ROOT_CERTIFICATE, 'w') as fh: fh.write(cert) def check_rebalance(self) -> str: for master in self.cluster_spec.masters: if self.rest.is_not_balanced(master): return 'The cluster is not balanced' def check_failover(self) -> Optional[str]: if hasattr(self, 'rebalance_settings'): if self.rebalance_settings.failover or \ self.rebalance_settings.graceful_failover: return for master in self.cluster_spec.masters: num_failovers = self.rest.get_failover_counter(master) if num_failovers: return 'Failover happened {} time(s)'.format(num_failovers) def check_core_dumps(self) -> str: dumps_per_host = self.remote.detect_core_dumps() core_dumps = { host: dumps for host, dumps in dumps_per_host.items() if dumps } if core_dumps: return pretty_dict(core_dumps) def restore(self): logger.info('Restoring data') self.remote.restore_data( self.test_config.restore_settings.backup_storage, self.test_config.restore_settings.backup_repo, ) def restore_local(self): logger.info('Restoring data') local.extract_cb(filename='couchbase.rpm') local.cbbackupmgr_restore( master_node=self.master_node, cluster_spec=self.cluster_spec, threads=self.test_config.restore_settings.threads, archive=self.test_config.restore_settings.backup_storage, repo=self.test_config.restore_settings.backup_repo, ) def import_data(self): logger.info('Importing data') for bucket in self.test_config.buckets: self.remote.import_data( self.test_config.restore_settings.import_file, bucket, ) def compact_bucket(self, wait: bool = True): for target in self.target_iterator: self.rest.trigger_bucket_compaction(target.node, target.bucket) if wait: for target in self.target_iterator: self.monitor.monitor_task(target.node, 'bucket_compaction') def wait_for_persistence(self): for target in self.target_iterator: self.monitor.monitor_disk_queues(target.node, target.bucket) self.monitor.monitor_dcp_queues(target.node, target.bucket) def wait_for_indexing(self): if self.test_config.index_settings.statements: for server in self.index_nodes: self.monitor.monitor_indexing(server) def check_num_items(self): num_items = self.test_config.load_settings.items * ( 1 + self.test_config.bucket.replica_number ) for target in self.target_iterator: self.monitor.monitor_num_items(target.node, target.bucket, num_items) def reset_kv_stats(self): master_node = next(self.cluster_spec.masters) for bucket in self.test_config.buckets: for server in self.rest.get_server_list(master_node, bucket): port = self.rest.get_memcached_port(server) self.memcached.reset_stats(server, port, bucket) def create_indexes(self): logger.info('Creating and building indexes') for statement in self.test_config.index_settings.statements: self.rest.exec_n1ql_statement(self.query_nodes[0], statement) def create_functions(self): logger.info('Creating n1ql functions') for statement in self.test_config.n1ql_function_settings.statements: self.rest.exec_n1ql_statement(self.query_nodes[0], statement) def sleep(self): access_settings = self.test_config.access_settings logger.info('Running phase for {} seconds'.format(access_settings.time)) time.sleep(access_settings.time) def run_phase(self, phase: str, task: Callable, settings: PhaseSettings, target_iterator: Iterable, timer: int = None, wait: bool = True): logger.info('Running {}: {}'.format(phase, pretty_dict(settings))) self.worker_manager.run_tasks(task, settings, target_iterator, timer) if wait: self.worker_manager.wait_for_workers() def load(self, task: Callable = spring_task, settings: PhaseSettings = None, target_iterator: Iterable = None): if settings is None: settings = self.test_config.load_settings if target_iterator is None: target_iterator = self.target_iterator self.run_phase('load phase', task, settings, target_iterator) def hot_load(self, task: Callable = spring_task): settings = self.test_config.hot_load_settings self.run_phase('hot load phase', task, settings, self.target_iterator) def xattr_load(self, task: Callable = spring_task, target_iterator: Iterable = None): if target_iterator is None: target_iterator = self.target_iterator settings = self.test_config.xattr_load_settings self.run_phase('xattr phase', task, settings, target_iterator) def access(self, task: Callable = spring_task, settings: PhaseSettings = None, target_iterator: Iterable = None): if settings is None: settings = self.test_config.access_settings if target_iterator is None: target_iterator = self.target_iterator self.run_phase('access phase', task, settings, target_iterator, timer=settings.time) def access_bg(self, task: Callable = spring_task, settings: PhaseSettings = None, target_iterator: Iterable = None): if settings is None: settings = self.test_config.access_settings if target_iterator is None: target_iterator = self.target_iterator self.run_phase('background access phase', task, settings, target_iterator, timer=settings.time, wait=False) def report_kpi(self, *args, **kwargs): if self.test_config.stats_settings.enabled: self._report_kpi(*args, **kwargs) def _report_kpi(self, *args, **kwargs): pass