Beispiel #1
0
    def stress_seq_range(self, row_count, command_part1, command_part2):
        load_ip_count = len(self.load_ips)
        row_count_per_ip = row_count // load_ip_count
        range_points = [1]
        for i in range(load_ip_count):
            range_points.append(range_points[-1] + row_count_per_ip)
        range_points[-1] = row_count

        population_commands = []
        # FIXME - cleanup
        for i in range(len(range_points) - 1):
            population_commands.append(
                f' n={range_points[i + 1] - range_points[i] + 1} -pop seq={range_points[i]}..{range_points[i + 1]} '
            )

        log(population_commands)

        log_important("Cassandra-Stress: started")
        run_parallel(
            self.__stress,
            [(ip, 10 if i > 0 else 0,
              command_part1 + pop_command + command_part2)
             for i, (ip, pop_command
                     ) in enumerate(zip(self.load_ips, population_commands))])
        log_important("Cassandra-Stress: done")
    def __stress(self, ip, cmd):
        full_cmd = f'go/bin/scylla-bench {cmd}'

        dt = datetime.now().strftime("%d-%m-%Y_%H-%M-%S")
        full_cmd = full_cmd + f" 2>&1 | tee -a scylla-bench-{dt}.log"
        log(full_cmd)
        self.__new_ssh(ip).exec(full_cmd)
 def run(self, command):
     log_important(
         f'Disk Explorer run: started [{datetime.now().strftime("%H:%M:%S")}]'
     )
     log(f"python3 diskplorer.py {command}")
     run_parallel(self.__run, [(ip, command) for ip in self.ips])
     log_important(
         f'Disk Explorer run: done [{datetime.now().strftime("%H:%M:%S")}]')
 def stress(self, command, load_index=None):
     if load_index is None:
         log_important("scylla-bench: started")
         run_parallel(self.__stress,
                      [(ip, command) for ip in self.load_ips])
         log_important("scylla-bench: done")
     else:
         log("using load_index " + str(load_index))
         self.__stress(self.load_ips[load_index], command)
    def start(self):
        log(f"Starting Scylla nodes {self.cluster_public_ips}")
        for public_ip in self.cluster_public_ips:
            ssh = self.__new_ssh(public_ip)
            ssh.exec("sudo systemctl start scylla-server")

        for public_ip in self.cluster_public_ips:
            wait_for_cql_start(public_ip)
            log_machine(public_ip, "Node finished bootstrapping")
        log(f"Starting Scylla nodes {self.cluster_public_ips}: done")
    def stop(self, load_index=None, erase_data=False):
        if load_index is None:
            log("Not implemented!")
        else:
            self.nodetool("drain", load_index=load_index)
            ssh = self.__new_ssh(self.cluster_public_ips[load_index])
            ssh.exec("sudo systemctl stop scylla-server")

            if erase_data:
                ssh.exec("sudo rm -rf /var/lib/scylla/data/*")
                ssh.exec("sudo rm -rf /var/lib/scylla/commitlog/*")
Beispiel #7
0
 def exec(self, command):
     """
     Returns the perf command on the remote machine.
     The command needs to be the full command like 'sudo perf record ...'
     """
     log_important(f"Perf: started")
     log(command)
     pssh = PSSH(self.ip_list, self.user, self.ssh_options)
     pssh.exec(f"""
             cd /tmp
             {command}
             """)
     log_important(f"Perf: done")
def clear_cluster(cluster_public_ips, cluster_user, ssh_options, duration_seconds=90):
    log_important("Shutting down cluster and removing all data")
    pssh = PSSH(cluster_public_ips, cluster_user, ssh_options)
    # pssh.exec("nodetool flush")
    log("Stopping scylla")
    pssh.exec("sudo systemctl stop scylla-server")
    log("Removing data dir")
    pssh.exec("sudo rm -fr /var/lib/scylla/data/*")
    log("Removing commit log")
    pssh.exec("sudo rm -fr /var/lib/scylla/commitlog/*")
    log("Starting scylla")
    pssh.exec("sudo systemctl start scylla-server")
    log(f"Waiting {duration_seconds} seconds")
    sleep(duration_seconds)
    log_important("Cluster cleared and restarted")
    def __trim_recursivly(self, dir):
        if self.warmup_seconds is None and self.warmup_seconds is None:
            return

        log_important("HdrLogProcessor.trim_recursively")

        for hdr_file in glob.iglob(dir + '/*/*.hdr', recursive=True):
            filename = os.path.basename(hdr_file)
            if filename.startswith("trimmed_"):
                continue

            log(hdr_file)
            self.__trim(hdr_file)

        log_important("HdrLogProcessor.trim_recursively")
    def __trim(self, file):
        filename = os.path.basename(file)
        filename_no_ext = os.path.splitext(filename)[0]

        old_cwd = os.getcwd()
        new_cwd = os.path.dirname(os.path.realpath(file))
        os.chdir(new_cwd)

        args = f'union -if {filename} -of trimmed_{filename_no_ext}.hdr'
        if self.warmup_seconds is not None:
            args = f'{args} -start {self.warmup_seconds}'
        if self.cooldown_seconds is not None:
            args = f'{args} -end {self.cooldown_seconds}'

        cmd = f'{self.java_path} -cp {self.lib_dir}/processor.jar CommandDispatcherMain {args}'
        log(cmd)
        os.system(cmd)
        os.chdir(old_cwd)
    def collect_results(self, dir, warmup_seconds=None, cooldown_seconds=None):
        """
        Parameters
        ----------
        dir: str
            The download directory.
        """

        log_important(f"Collecting results: started")
        run_parallel(self.__collect, [(ip, dir) for ip in self.load_ips])

        p = HdrLogProcessor(self.properties,
                            warmup_seconds=warmup_seconds,
                            cooldown_seconds=cooldown_seconds)
        p.process(dir)

        log_important(f"Collecting results: done")
        log(f"Results can be found in [{dir}]")
    def insert(self,
               partition_count,
               nodes,
               partition_offset=0,
               concurrency=64,
               clustering_row_count=1,
               extra_args=""):
        log_important(f"Inserting {partition_count} partitions")
        start_seconds = time.time()

        # todo: there could be some loss if there is a reaminer.
        pc_per_lg = partition_count // len(self.load_ips)

        cmd_list = []
        for i in range(0, len(self.load_ips)):
            cmd = f"""-workload sequential \
                      -clustering-row-count {clustering_row_count} \
                      -mode write \
                      -partition-count {pc_per_lg} \
                      -partition-offset {partition_offset} \
                      -nodes {nodes} \
                      -concurrency {concurrency} \ 
                      {extra_args}"""
            # clean the string up.
            cmd = " ".join(cmd.split())
            cmd_list.append(cmd)
            partition_offset = partition_offset + pc_per_lg

        futures = []
        for i in range(0, len(self.load_ips)):
            f = self.async_stress(cmd_list[i], load_index=i)
            futures.append(f)
            if i == 0:
                # first one is given some extra time to set up the tables and all that.
                time.sleep(10)

        for f in futures:
            f.join()

        duration_seconds = time.time() - start_seconds
        log(f"Duration : {duration_seconds} seconds")
        log(f"Insertion rate: {partition_count // duration_seconds} items/second"
            )
        log_important(f"Inserting {partition_count} partitions: done")
Beispiel #13
0
    def collect_results(self, dir, warmup_seconds=None, cooldown_seconds=None):
        """
        Parameters
        ----------
        dir: str
            The download directory.
        warmup_seconds : str
            The warmup period in seconds. If the value is set, additional files will 
            be created where the warmup period is trimmed.
        cooldown_seconds : str
            The cooldown period in seconds. If the value is set, additional files will 
            be created where the cooldown period is trimmed.            
        """

        log_important(f"Collecting results: started")
        run_parallel(self.__collect, [(ip, dir) for ip in self.load_ips])
        p = HdrLogProcessor(self.properties,
                            warmup_seconds=warmup_seconds,
                            cooldown_seconds=cooldown_seconds)
        p.process(dir)
        log_important(f"Collecting results: done")
        log(f"Results can be found in [{dir}]")
Beispiel #14
0
    def __stress(self, ip, startup_delay, cmd):
        time.sleep(startup_delay)

        if self.scylla_tools:
            cs_cmd = f'cassandra-stress {cmd}'
        else:
            cassandra_version = self.properties['cassandra_version']
            cassandra_stress_dir = f'apache-cassandra-{cassandra_version}/tools/bin'
            cs_cmd = f'{cassandra_stress_dir}/cassandra-stress {cmd}'

        log(cs_cmd)
        dt = datetime.now().strftime("%d-%m-%Y_%H-%M-%S")
        cs_cmd = cs_cmd + f" 2>&1 | tee -a cassandra-stress-{dt}.log"

        full_cmd = f"""
        set -e
        set -o pipefail
        {cs_cmd}
        set +o pipefail
        """

        self.__new_ssh(ip).exec(full_cmd)
Beispiel #15
0
    def install(self):
        ips = ','.join(self.public_ips)
        log(f'[{ips}] raid: starting creating RAID')
        pssh = PSSH(self.public_ips, self.user, self.properties['ssh_options'])
        pssh.exec(f"""
            if [[ ! -b /dev/md/{self.raid_device_name} ]]; then
                sudo mdadm --create --verbose /dev/md/{self.raid_device_name} --chunk=256 --metadata=1.2 --level={self.level} --force --raid-devices=$(ls {self.device_name_wildcard} | wc -l) {self.device_name_wildcard}
                
                # /dev/md/raid_device_name maps to /dev/md[0-9]+
                MD_NAME=$(basename $(readlink /dev/md/{self.raid_device_name}))

                # Tuning
                sudo sh -c "echo 1 > /sys/block/$MD_NAME/queue/nomerges"
                sudo sh -c "echo 8 > /sys/block/$MD_NAME/queue/read_ahead_kb"
                sudo sh -c "echo none > /sys/block/$MD_NAME/queue/scheduler"

                sudo mkfs.xfs -f /dev/$MD_NAME
                mkdir {self.raid_device_name}
                sudo mount /dev/$MD_NAME {self.raid_device_name}
                sudo chown $(id -u) {self.raid_device_name}
            fi
        """)
        log(f'[{ips}] raid: finished creating RAID')
def restart_cluster(cluster_public_ips, cluster_user, ssh_options, duration_seconds=90):
    log_important("Restart cluster ")
    pssh = PSSH(cluster_public_ips, cluster_user, ssh_options)
    log("nodetool drain")
    pssh.exec("nodetool drain")
    log("sudo systemctl restart scylla-server")
    pssh.exec("sudo systemctl restart scylla-server")
    log(f"Waiting {duration_seconds} seconds")
    sleep(duration_seconds)
    log_important("Cluster restarted")
Beispiel #17
0
 def append_env_configuration(self, configuration):
     log(f"Appending cassandra-env.sh configuration to nodes {self.cluster_public_ips}: {configuration}"
         )
     pssh = PSSH(self.cluster_public_ips, self.ssh_user,
                 self.properties['ssh_options'])
     path_prefix = 'cassandra-raid/' if self.setup_raid else './'
     log("configuration[" + configuration + "]")
     pssh.exec(
         f'''echo '{configuration}' >> {path_prefix}apache-cassandra-{self.cassandra_version}/conf/cassandra-env.sh'''
     )
     log(f"echo '{configuration}' >> {path_prefix}apache-cassandra-{self.cassandra_version}/conf/cassandra-env.sh"
         )
Beispiel #18
0
    def insert(self,
               profile,
               item_count,
               nodes,
               mode="native cql3",
               rate="threads=100",
               sequence_start=None):
        log_important(f"Inserting {item_count} items")
        start_seconds = time.time()

        per_load_generator = item_count // len(self.load_ips)
        start = sequence_start
        if sequence_start is None:
            start = 1

        end = start + per_load_generator - 1
        cmd_list = []
        for i in range(0, len(self.load_ips)):
            cmd = f'user profile={profile} "ops(insert=1)" n={per_load_generator} no-warmup -pop seq={start}..{end} -mode {mode} -rate {rate}  -node {nodes}'
            log(self.load_ips[i] + " " + cmd)
            cmd_list.append(cmd)
            start = end + 1
            end = end + per_load_generator

        futures = []
        for i in range(0, len(self.load_ips)):
            f = self.async_stress(cmd_list[i], load_index=i)
            futures.append(f)
            if i == 0:
                time.sleep(10)

        for f in futures:
            f.join()

        duration_seconds = time.time() - start_seconds
        log(f"Duration : {duration_seconds} seconds")
        log(f"Insertion rate: {item_count // duration_seconds} items/second")
        log_important(f"Inserting {item_count} items: done")
    def __merge_recursivly(self, dir):
        log_important("HdrLogProcessor.merge_recursively")
        log(dir)
        # todo be careful with merging the merge file.
        files_map = {}

        for hdr_file in glob.iglob(dir + '/*/*.hdr', recursive=True):
            log(hdr_file)
            base = os.path.splitext(os.path.basename(hdr_file))[0]
            files = files_map.get(base)
            if files is None:
                files = []
                files_map[base] = files
            files.append(hdr_file)

        for name, files in files_map.items():
            input = ""
            for file in files:
                input = input + " -ifp " + file
            cmd = f'{self.java_path} -cp {self.lib_dir}/processor.jar CommandDispatcherMain union {input} -of {dir}/{name}.hdr'
            log(cmd)
            os.system(cmd)

        log_important("HdrLogProcessor.merge_recursively")
 def __process_recursivly(self, dir):
     log_important("HdrLogProcessor.process_recursively")
     for hdr_file in glob.iglob(dir + '/**/*.hdr', recursive=True):
         log(hdr_file)
         self.__process(hdr_file)
     log_important("HdrLogProcessor.process_recursively")
def download(self, dir):
    log_important("Disk Explorer Download: started")
    run_parallel(self.__download, [(ip, dir) for ip in self.ips])
    log_important("Disk Explorer Download: done")
    log(f"Results can be found in [{dir}]")
Beispiel #22
0
 def __init__(self, ip_list, user, ssh_options):
     log(ip_list)
     self.updated = False
     self.ip_list = ip_list
     self.user = user
     self.ssh_options = ssh_options
Beispiel #23
0
 def run(self, options):
     log_important(f"fio run: started")
     log(f"sudo fio {options}")
     run_parallel(self.__run, [(ip, options) for ip in self.ips])
     log_important(f"fio run: done")
 def append_configuration(self, configuration):
     log(f"Appending configuration to nodes {self.cluster_public_ips}: {configuration}")
     pssh = PSSH(self.cluster_public_ips, self.ssh_user, self.properties['ssh_options'])
     pssh.exec(f"sudo sh -c \"echo '{configuration}' >> /etc/scylla/scylla.yaml\"")