def _setup_g5k_kube_volumes(self, kube_workers, n_pv=3): logger.info('Setting volumes on %s kubernetes workers' % len(kube_workers)) cmd = '''umount /dev/sda5; mount -t ext4 /dev/sda5 /tmp''' execute_cmd(cmd, kube_workers) logger.debug( 'Create n_pv partitions on the physical disk to make a PV can be shared' ) cmd = '''for i in $(seq 1 %s); do mkdir -p /tmp/pv/vol${i} mkdir -p /mnt/disks/vol${i} mount --bind /tmp/pv/vol${i} /mnt/disks/vol${i} done''' % n_pv execute_cmd(cmd, kube_workers) logger.info('Creating local persistance volumes on Kubernetes cluster') logger.debug('Init configurator: k8s_resources_configurator') configurator = k8s_resources_configurator() antidote_k8s_dir = self.configs['exp_env']['antidotedb_yaml_path'] deploy_files = [ os.path.join(antidote_k8s_dir, 'local_persistentvolume.yaml'), os.path.join(antidote_k8s_dir, 'storageClass.yaml'), ] configurator.deploy_k8s_resources(files=deploy_files) logger.info('Waiting for setting local persistance volumes') configurator.wait_k8s_resources( resource='pod', label_selectors= 'app.kubernetes.io/instance=local-volume-provisioner', )
def clean_exp_env(self, kube_namespace, n_fmke_client_per_dc): logger.info('1. Cleaning the experiment environment') logger.info( 'Deleting all k8s running resources from the previous run in namespace "%s"' % kube_namespace) logger.debug( 'Delete namespace "%s" to delete all the running resources, then create it again' % kube_namespace) configurator = k8s_resources_configurator() configurator.delete_namespace(kube_namespace) configurator.create_namespace(kube_namespace) if n_fmke_client_per_dc > 0: logger.info( 'Delete all files in /tmp/results folder on fmke_client nodes') fmke_nodes_info = configurator.get_k8s_resources( resource='node', label_selectors='service=fmke', kube_namespace=kube_namespace) fmke_nodes = [ r.metadata.annotations['flannel.alpha.coreos.com/public-ip'] for r in fmke_nodes_info.items ] cmd = 'rm -rf /tmp/results && mkdir -p /tmp/results' execute_cmd(cmd, fmke_nodes)
def _setup_g5k_kube_volumes(self, kube_workers): logger.info("Setting volumes on %s kubernetes workers" % len(kube_workers)) N_PV = 3 cmd = '''umount /dev/sda5; mount -t ext4 /dev/sda5 /tmp''' execute_cmd(cmd, kube_workers) cmd = '''for i in $(seq 1 %s); do mkdir -p /tmp/pv/vol${i} mkdir -p /mnt/disks/vol${i} mount --bind /tmp/pv/vol${i} /mnt/disks/vol${i} done''' % N_PV execute_cmd(cmd, kube_workers) logger.info("Creating local persistance volumes on Kubernetes workers") logger.debug("Init configurator: k8s_resources_configurator") configurator = k8s_resources_configurator() antidote_k8s_dir = self.args.yaml_path deploy_files = [ os.path.join(antidote_k8s_dir, 'local_persistentvolume.yaml'), os.path.join(antidote_k8s_dir, 'storageClass.yaml') ] configurator.deploy_k8s_resources(files=deploy_files) logger.info('Waiting for setting local persistance volumes') configurator.wait_k8s_resources( resource='pod', label_selectors= "app.kubernetes.io/instance=local-volume-provisioner")
def _install_kubeadm(self): logger.info('Starting installing kubeadm on %s nodes' % len(self.hosts)) logger.debug('Turning off Firewall on hosts') cmd = '''cat <<EOF | sudo tee /etc/sysctl.d/k8s.conf net.bridge.bridge-nf-call-ip6tables = 1 net.bridge.bridge-nf-call-iptables = 1''' execute_cmd(cmd, self.hosts) cmd = 'sudo sysctl --system' execute_cmd(cmd, self.hosts) logger.debug('Turning off swap on hosts') cmd = 'swapoff -a' execute_cmd(cmd, self.hosts) logger.debug('Installing kubeadm kubelet kubectl') configurator = packages_configurator() configurator.install_packages(['apt-transport-https', 'curl'], self.hosts) cmd = 'curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -' execute_cmd(cmd, self.hosts) cmd = '''cat <<EOF | sudo tee /etc/apt/sources.list.d/kubernetes.list deb https://apt.kubernetes.io/ kubernetes-xenial main''' execute_cmd(cmd, self.hosts) configurator.install_packages(['kubelet', 'kubeadm', 'kubectl'], self.hosts)
def _setup_g5k_kube_volumes(self, kube_workers, n_pv=3): logger.info("Setting volumes on %s kubernetes workers" % len(kube_workers)) cmd = """umount /dev/sda5; mount -t ext4 /dev/sda5 /tmp""" execute_cmd(cmd, kube_workers) logger.debug( "Create n_pv partitions on the physical disk to make a PV can be shared" ) cmd = """for i in $(seq 1 %s); do mkdir -p /tmp/pv/vol${i} mkdir -p /mnt/disks/vol${i} mount --bind /tmp/pv/vol${i} /mnt/disks/vol${i} done""" % n_pv execute_cmd(cmd, kube_workers) logger.info("Creating local persistance volumes on Kubernetes cluster") logger.debug("Init configurator: k8s_resources_configurator") configurator = k8s_resources_configurator() antidote_k8s_dir = self.configs["exp_env"]["antidote_yaml_path"] deploy_files = [ os.path.join(antidote_k8s_dir, "local_persistentvolume.yaml"), os.path.join(antidote_k8s_dir, "storageClass.yaml"), ] configurator.deploy_k8s_resources(files=deploy_files) logger.info("Waiting for setting local persistance volumes") configurator.wait_k8s_resources( resource="pod", label_selectors= "app.kubernetes.io/instance=local-volume-provisioner", )
def setup_env(self, kube_master_site, kube_namespace): logger.info('STARTING SETTING THE EXPERIMENT ENVIRONMENT') logger.info('Starting provisioning nodes on OVHCloud') provisioner = ovh_provisioner(configs=self.configs, node_ids_file=self.args.node_ids_file) provisioner.provisioning() self.nodes = provisioner.nodes self.hosts = provisioner.hosts node_ids_file = provisioner.node_ids_file kube_master = self.args.kube_master if kube_master is None: for node in self.nodes: if node['region'] == kube_master_site: kube_master = node['ipAddresses'][0]['ip'] kube_master_id = node['id'] break else: for node in self.nodes: if node['ipAddresses'][0]['ip'] == kube_master: kube_master_id = node['id'] break data_nodes = list() clusters = dict() for node in self.nodes: if node['id'] == kube_master_id: continue cluster = node['region'] clusters[cluster] = [node] + clusters.get(cluster, list()) for region, nodes in clusters.items(): data_nodes += nodes[0:max(self. normalized_parameters['n_nodes_per_dc'])] data_hosts = [node['ipAddresses'][0]['ip'] for node in data_nodes] self.data_node_ids = [node['id'] for node in data_nodes] if self.args.attach_volume: logger.info('Attaching external volumes to %s nodes' % len(data_nodes)) provisioner.attach_volume(nodes=data_nodes) logger.info('Formatting the new external volumes') cmd = '''disk=$(ls -lt /dev/ | grep '^b' | head -n 1 | awk {'print $NF'}) mkfs.ext4 -F /dev/$disk; mount -t ext4 /dev/$disk /tmp; chmod 777 /tmp''' execute_cmd(cmd, data_hosts) self.config_host(kube_master, kube_namespace) logger.info('Kubernetes master: %s' % kube_master) logger.info('FINISH SETTING THE EXPERIMENT ENVIRONMENT\n') return kube_master, node_ids_file
def deploy_kubernetes_cluster(self): configurator = docker_configurator(self.hosts) configurator.config_docker() self._install_kubeadm() if self.kube_master is None: self.kube_master = self.hosts[0] kube_workers = self.hosts[1:] else: kube_workers = [host for host in self.hosts if host != self.kube_master] logger.info('Initializing kubeadm on master') cmd = 'kubeadm init --pod-network-cidr=10.244.0.0/16' execute_cmd(cmd, [self.kube_master]) cmd = '''mkdir -p $HOME/.kube cp -i /etc/kubernetes/admin.conf $HOME/.kube/config chown $(id -u):$(id -g) $HOME/.kube/config''' execute_cmd(cmd, [self.kube_master]) cmd = 'kubectl apply -f https://github.com/coreos/flannel/raw/master/Documentation/kube-flannel.yml' execute_cmd(cmd, [self.kube_master]) cmd = 'kubeadm token create --print-join-command' _, result = execute_cmd(cmd, [self.kube_master]) logger.debug('Adding %s kube workers' % len(kube_workers)) cmd = 'kubeadm join' + \ result.processes[0].stdout.split('kubeadm join')[-1] execute_cmd(cmd.strip(), kube_workers) logger.info('Deploying Kubernetes cluster successfully') logger.info('Kubernetes master: %s' % self.kube_master) return self.kube_master, kube_workers
def clean_k8s_resources(self, kube_namespace): logger.info('1. Deleting all k8s resource from the previous run in namespace "%s"' % kube_namespace) logger.debug('Delete namespace "%s" to delete all the resources, then create it again' % kube_namespace) configurator = k8s_resources_configurator() configurator.delete_namespace(kube_namespace) configurator.create_namespace(kube_namespace) logger.debug('Delete all files in /tmp/results folder on fmke_client nodes') results_nodes = configurator.get_k8s_resources_name(resource='node', label_selectors='service_g5k=fmke_client', kube_namespace=kube_namespace) cmd = 'rm -rf /tmp/results && mkdir -p /tmp/results' execute_cmd(cmd, results_nodes)
def deploy_glusterfs(self, gluster_hosts, indices, gluster_mountpoint, gluster_volume_name): indices = sorted(indices) hosts = [host for index,host in enumerate(gluster_hosts) if index in indices] logger.info('Creating volumes on %s hosts: \n %s' % (len(hosts), hosts)) volume_path = '/tmp/glusterd/volume' cmd = 'mkdir -p %s' % volume_path execute_cmd(cmd, hosts) volume_params = list() for index, host in zip(indices, hosts): volume_params.append("gluster-%s.%s.local:%s" % (index, host, volume_path)) volume_params = " ".join(volume_params) cmd ='gluster --mode=script volume create %s replica 3 %s force' % (gluster_volume_name, volume_params) execute_cmd(cmd, hosts[0]) logger.info('Starting volumes on hosts') cmd = 'gluster --mode=script volume start %s' % gluster_volume_name execute_cmd(cmd, hosts[0]) cmd = ''' mkdir -p %s && mount -t glusterfs gluster-0:%s %s''' % (gluster_mountpoint, gluster_volume_name, gluster_mountpoint) execute_cmd(cmd, hosts) logger.info("Finish deploying glusterfs") return True, hosts
def config_host(self, hosts): logger.info("Starting configure AntidoteDB on nodes") logger.debug("Init configurator: docker_configurator") configurator = docker_configurator(hosts) configurator.config_docker() logger.info("Pull AntidoteDB docker image") cmd = 'docker pull antidotedb/antidote' execute_cmd(cmd, hosts) logger.info("Run AntidoteDB container") cmd = 'docker run -d --name antidote -p "8087:8087" antidotedb/antidote' execute_cmd(cmd, hosts) logger.info("Finish configuring AntidoteDB on all hosts")
def set_latency(self, latency, antidote_dc): """Limit the latency in host""" logger.info("--Setting network latency=%s on hosts" % latency) latency = latency / 2 for cur_cluster, cur_cluster_info in antidote_dc.items(): other_clusters = { cluster_name: cluster_info for cluster_name, cluster_info in antidote_dc.items() if cluster_name != cur_cluster } for _, cluster_info in other_clusters.items(): for pod_ip in cluster_info['pod_ips']: cmd = "tcset flannel.1 --delay %s --network %s" % (latency, pod_ip) execute_cmd(cmd, cur_cluster_info['host_names'])
def _get_os_name(self, host): '''Get the OS name of a host Parameters ---------- host: str the host name or IP address of a host Returns ------- os_name: str code name of an OS os_full_name: str full name of an OS ''' os_name = None for attempt in range(MAX_RETRIES): cmd = 'hostnamectl | grep "Operating System"' _, r = execute_cmd(cmd, host) os_info = r.processes[0].stdout.strip().lower() if os_info: for os_name, os_full_name in OS_NAMES.items(): if os_name in os_info: logger.debug('OS of %s: %s' % (host, os_full_name)) return os_name, os_full_name return None, None logger.info('---> Retrying: "%s" on host %s ' % (cmd, host)) sleep(10) return None, None
def config_host(self, kube_master_site, kube_namespace): kube_master = self.args.kube_master if self.args.kube_master is None: antidote_hosts = list() for cluster in self.configs['clusters']: cluster_name = cluster['cluster'] if cluster_name == self.configs['exp_env']['kube_master_site']: antidote_hosts += [ host for host in self.hosts if host.startswith(cluster_name) ][0:cluster['n_nodes'] + 1] else: antidote_hosts += [ host for host in self.hosts if host.startswith(cluster_name) ][0:cluster['n_nodes']] for host in antidote_hosts: if host.startswith(kube_master_site): kube_master = host break kube_workers = self.config_kube(kube_master, antidote_hosts, kube_namespace) else: logger.info('Kubernetes master: %s' % kube_master) self._get_credential(kube_master) configurator = k8s_resources_configurator() deployed_hosts = configurator.get_k8s_resources(resource='node') kube_workers = [ host.metadata.name for host in deployed_hosts.items ] kube_workers.remove(kube_master) logger.info('Installing elmerfs dependencies') configurator = packages_configurator() configurator.install_packages(['libfuse2', 'wget', 'jq'], kube_workers) # Create mount point on elmerfs hosts cmd = 'mkdir -p /tmp/dc-$(hostname)' execute_cmd(cmd, kube_workers) # Installing filebench for running the experiments logger.info('Installing Filebench') self.install_filebench(kube_workers)
def config_docker(self): """Install Docker on the given hosts Parameters ---------- hosts: str a list of hosts """ logger.info('Starting installing Docker on %s hosts' % len(self.hosts)) configurator = packages_configurator() configurator.install_packages(['wget'], self.hosts) logger.info('Downloading the official get_docker script') cmd = 'wget https://get.docker.com -O get-docker.sh' self.error_hosts = execute_cmd(cmd, self.hosts) logger.info('Installing Docker by using get_docker script') cmd = 'sh get-docker.sh' self.error_hosts = execute_cmd(cmd, self.hosts) logger.info('Finish installing Docker on %s hosts' % len(self.hosts))
def clean_exp_env(self, kube_namespace, n_fmke_client_per_dc): logger.info('1. Cleaning the experiment environment') logger.info( 'Deleting all k8s running resources from the previous run in namespace "%s"' % kube_namespace) logger.debug( 'Delete namespace "%s" to delete all the running resources, then create it again' % kube_namespace) configurator = k8s_resources_configurator() configurator.delete_namespace(kube_namespace) configurator.create_namespace(kube_namespace) if n_fmke_client_per_dc > 0: logger.info( 'Delete old result files in /tmp/results on FMKe client nodes') fmke_nodes = configurator.get_k8s_resources_name( resource='node', label_selectors='service=fmke', kube_namespace=kube_namespace) cmd = 'rm -rf /tmp/results && mkdir -p /tmp/results' execute_cmd(cmd, fmke_nodes)
def install_packages_with_dnf(self, packages, hosts): '''Install a list of given packages Parameters ---------- packages: list of string the list of package names to be installed hosts: list of string the list of hostnames ''' logger.debug("Installing packages: %s on %s hosts" % (', '.join(packages), len(hosts))) cmd = ("dnf update -y -q && " "dnf install -y -q %s") % ' '.join(packages) try: execute_cmd(cmd, hosts) except Exception as e: logger.error("---> Bug [%s] with command: %s" % (e, cmd), exc_info=True)
def deploy_docker_swarm_cluster(self): configurator = docker_configurator(self.hosts) configurator.config_docker() if self.ds_manager is None: self.ds_manager = self.hosts[0] ds_workers = self.hosts[1:] else: ds_workers = [host for host in self.hosts if host != self.ds_manager] logger.info('Getting IP of docker swarm manager') cmd = "hostname -I" _, r = execute_cmd(cmd, [self.ds_manager]) ds_manager_ip = r.processes[0].stdout.strip().split(' ')[0] logger.info('Creating a new swarm') cmd = 'docker swarm init --advertise-addr %s' % ds_manager_ip execute_cmd(cmd, [self.ds_manager]) logger.info('Joining all docker swarm worker') cmd = 'docker swarm join-token worker | grep "docker swarm join"' _, r = execute_cmd(cmd, [self.ds_manager]) cmd = r.processes[0].stdout.strip() execute_cmd(cmd, ds_workers) logger.info('Finish deploying docker swarm cluster') logger.info('Docker swarm manager: %s (%s)' % (self.ds_manager, ds_manager_ip)) return self.ds_manager, ds_workers
def setup_env(self): """Setting the experiment environment base on the user's requirements This function normally contains two steps: 1. Provisioning hosts on G5k if needed (if you provided the OAR_JOB_ID of the already reserved hosts, the provisioner will not make a reservation again) 2. Configuring all your necessary packages/services on those hosts. """ provisioner = g5k_provisioner( config_file_path=self.args.config_file_path, keep_alive=self.args.keep_alive, out_of_chart=self.args.out_of_chart, oar_job_ids=self.args.oar_job_ids, no_deploy_os=self.args.no_deploy_os, is_reservation=self.args.is_reservation, job_name="cloudal") provisioner.provisioning() self.hosts = provisioner.hosts oar_job_ids = provisioner.oar_result self.oar_result = provisioner.oar_result ################################################## # Configuring hosts with your applications here # ################################################## # For example: install some dependencies configurator = packages_configurator() configurator.install_packages(['sysstat', 'htop'], self.hosts) # or call the provided configurator (by cloudal) to deploy some well-known services configurator = docker_configurator(self.hosts) configurator.config_docker() # or perform some commands on all of hosts logger.info("Downloading cloudal") cmd = "cd /tmp/ && git clone https://github.com/ntlinh16/cloudal.git" execute_cmd(cmd, self.hosts) return oar_job_ids
def run_workflow(self, comb, sweeper): """Run user-defined steps of an experiment scenario and save the result. The input of this run is one combination of parameters from the combination queue. """ comb_ok = False try: logger.info('Performing combination: ' + slugify(comb)) # write your code here to perform a run of your experiment # you can get the current combination of parameters # (specified in the config file exp_setting.yaml) # and use them in this run of your experiment # For example, you can get the parameters out of the combination # and then use them. message = "parameter_1: %s, parameter_2: %s, parameter_3: %s, parameter_4: %s, parameter_5: %s" % ( comb['parameter_1'], comb['parameter_2'], comb['parameter_3'], comb['parameter_4'], comb['parameter_5']) cmd = "echo %s > /tmp/result_$(hostname).txt" % message execute_cmd(cmd, self.hosts) # then download the remote_result_files on the remote hosts and save it to local_result_dir get_results( comb=comb, hosts=self.hosts, remote_result_files=['/tmp/result_*.txt'], local_result_dir=self.configs['exp_env']['results_dir']) comb_ok = True except ExecuteCommandException as e: comb_ok = False finally: if comb_ok: sweeper.done(comb) logger.info('Finish combination: %s' % slugify(comb)) else: sweeper.cancel(comb) logger.warning(slugify(comb) + ' is canceled') logger.info('%s combinations remaining\n' % len(sweeper.get_remaining())) return sweeper
def copy_time(self, comb, kube_master): list_hosts = [host for host in self.hosts if host != kube_master] # choose the smallest cluster to perform copy cluster = min(self.configs["exp_env"]["antidote_clusters"], key=lambda x: x["n_antidotedb_per_dc"]) # pick 1 host in the chosen cluster host = [ host for host in list_hosts if host.startswith(cluster["cluster"]) ][0] logger.info("----Copying file to elmerfs mount point on %s" % host) cmd = "bash %(copy_script_file)s %(file_path)s %(dest_path)s %(log_path)s %(hash)s %(checksum_path)s %(mount_check_path)s" % { "copy_script_file": "/tmp/convergence_files/timing_copy_file.sh", "file_path": "/tmp/convergence_files/sample", "dest_path": "/tmp/dc-$(hostname)/sample", "log_path": "/tmp/results/time_$(hostname)_start", "hash": self.configs['exp_env']['convergence_checksum'], "checksum_path": "/tmp/results/checksum_copy_$(hostname)", "mount_check_path": "/tmp/results/checkelmerfs_$(hostname)", } execute_cmd(cmd, host) self.save_results(comb, host)
def install_packages_with_apt(self, packages, hosts): '''Install a list of given packages Parameters ---------- packages: list of string the list of package names to be installed hosts: list of string the list of hostnames ''' logger.debug("Installing packages: %s on %s hosts" % (', '.join(packages), len(hosts))) cmd = ("export DEBIAN_FRONTEND=noninteractive && " "apt-get update && " "apt-get install -q -y --allow-change-held-packages %s" ) % ' '.join(packages) try: execute_cmd(cmd, hosts) except Exception as e: logger.error("---> Bug [%s] with command: %s" % (e, cmd), exc_info=True)
def _convergence(self, src_host, dest_hosts): logger.info("----Start checksum process on destination hosts") cmd = "touch /tmp/dc-$(hostname)/sample" execute_cmd(cmd, src_host) sleep(5) cmd = "bash /tmp/convergence_files/periodically_checksum.sh /tmp/dc-$(hostname)/sample %s /tmp/results/time_$(hostname)_end" % ( self.configs['exp_env']['convergence_checksum']) execute_cmd(cmd, dest_hosts, mode='start') logger.info("----Copying file to elmerfs mount point on source host") cmd = "bash %(copy_script_file)s %(file_path)s %(dest_path)s %(log_path)s %(hash)s %(checksum_path)s %(mount_check_path)s" % { "copy_script_file": "/tmp/convergence_files/timing_copy_file.sh", "file_path": "/tmp/convergence_files/sample", "dest_path": "/tmp/dc-$(hostname)/sample", "log_path": "/tmp/results/time_$(hostname)_start", "hash": self.configs['exp_env']['convergence_checksum'], "checksum_path": "/tmp/results/checksum_copy_$(hostname)", "mount_check_path": "/tmp/results/checkelmerfs_$(hostname)", } execute_cmd(cmd, src_host) logger.info( "----Waiting for checksum process on all destination hosts complete" ) checksum_ok = False for i in range(200): sleep(30) cmd = "ps aux | grep periodically_checksum | grep sample | awk '{print$2}'" _, r = execute_cmd(cmd, dest_hosts) for p in r.processes: if len(p.stdout.strip().split('\n')) > 1: break else: checksum_ok = True break if not checksum_ok: cmd = "pkill -f periodically_checksum" execute_cmd(cmd, dest_hosts) return True
def install_glusterfs(self, hosts): logger.info('Installing GlusterFS') configurator = packages_configurator() configurator.install_packages(["glusterfs-server"], hosts) cmd = 'systemctl start glusterd' execute_cmd(cmd, hosts) gluster_configuration = list() for index, host in enumerate(hosts): cmd = "hostname -I | awk '{print $1}'" _, r = execute_cmd(cmd, host) host_ip = r.processes[0].stdout.strip() gluster_configuration.append("%s gluster-%s.%s.local gluster-%s " % (host_ip, index, host, index)) gluster_configuration = "\n".join(gluster_configuration) cmd = "echo '%s' >> /etc/hosts" % gluster_configuration execute_cmd(cmd, hosts) for index, _ in enumerate(hosts): cmd = 'gluster peer probe gluster-%s' % index execute_cmd(cmd, hosts[0])
def install_filebench(self, hosts): configurator = packages_configurator() configurator.install_packages(['build-essential', 'bison', 'flex', 'libtool'], hosts) cmd = 'wget https://github.com/filebench/filebench/archive/refs/tags/1.5-alpha3.tar.gz -P /tmp/ -N' execute_cmd(cmd, hosts) cmd = 'tar -xf /tmp/1.5-alpha3.tar.gz --directory /tmp/' execute_cmd(cmd, hosts) cmd = '''cd /tmp/filebench-1.5-alpha3/ && libtoolize && aclocal && autoheader && automake --add-missing && autoconf && ./configure && make && make install''' execute_cmd(cmd, hosts)
def run_mailserver(self, elmerfs_hosts, duration, n_client): if n_client == 100: n_hosts = 1 else: n_hosts = n_client hosts = random.sample(elmerfs_hosts, n_hosts) logger.info('Dowloading Filebench configuration file') cmd = 'wget https://raw.githubusercontent.com/filebench/filebench/master/workloads/varmail.f -P /tmp/ -N' execute_cmd(cmd, hosts) logger.info('Editing the configuration file') cmd = 'sed -i "s/tmp/tmp\/dc-$(hostname)/g" /tmp/varmail.f' execute_cmd(cmd, hosts) cmd = 'sed -i "s/run 60/run %s/g" /tmp/varmail.f' % duration execute_cmd(cmd, hosts) cmd = 'sed -i "s/name=bigfileset/name=bigfileset-$(hostname)/g" /tmp/varmail.f' execute_cmd(cmd, hosts) cmd = 'sed -i "s/meandirwidth=1000000/meandirwidth=1000/g" /tmp/varmail.f' execute_cmd(cmd, hosts) if n_client != 100: cmd = 'sed -i "s/nthreads=16/nthreads=32/g" /tmp/varmail.f' execute_cmd(cmd, hosts) logger.info('Clearing cache ') cmd = 'rm -rf /tmp/dc-$(hostname)/bigfileset' execute_cmd(cmd, hosts) cmd = 'sync; echo 3 > /proc/sys/vm/drop_caches' execute_cmd(cmd, hosts) logger.info('hosts = %s' % hosts) logger.info('Running filebench in %s second' % duration) cmd = 'setarch $(arch) -R filebench -f /tmp/varmail.f > /tmp/results/filebench_$(hostname)' execute_cmd(cmd, hosts) return True, hosts
def deploy_monitoring(self, kube_master, kube_namespace): logger.info("Deploying monitoring system") monitoring_k8s_dir = self.configs['exp_env']['monitoring_yaml_path'] logger.info("Deleting old deployment") cmd = "rm -rf /root/antidote_stats" execute_cmd(cmd, kube_master) logger.debug("Init configurator: k8s_resources_configurator") configurator = k8s_resources_configurator() cmd = "git clone https://github.com/AntidoteDB/antidote_stats.git" execute_cmd(cmd, kube_master) logger.info("Setting to allow pods created on kube_master") cmd = "kubectl taint nodes --all node-role.kubernetes.io/master-" execute_cmd(cmd, kube_master, is_continue=True) pods = configurator.get_k8s_resources_name( resource='pod', label_selectors='app=antidote', kube_namespace=kube_namespace) antidote_info = ["%s.antidote:3001" % pod for pod in pods] logger.debug( 'Modify the prometheus.yml file with antidote instances info') file_path = os.path.join(monitoring_k8s_dir, 'prometheus.yml.template') with open(file_path) as f: doc = f.read().replace('antidotedc_info', '%s' % antidote_info) prometheus_configmap_file = os.path.join(monitoring_k8s_dir, 'prometheus.yml') with open(prometheus_configmap_file, 'w') as f: f.write(doc) configurator.create_configmap(file=prometheus_configmap_file, namespace=kube_namespace, configmap_name='prometheus-configmap') logger.debug( 'Modify the deploy_prometheus.yaml file with kube_master info') kube_master_info = configurator.get_k8s_resources( resource='node', label_selectors='kubernetes.io/hostname=%s' % kube_master) for item in kube_master_info.items[0].status.addresses: if item.type == 'InternalIP': kube_master_ip = item.address file_path = os.path.join(monitoring_k8s_dir, 'deploy_prometheus.yaml.template') with open(file_path) as f: doc = f.read().replace('kube_master_ip', '%s' % kube_master_ip) doc = doc.replace("kube_master_hostname", '%s' % kube_master) prometheus_deploy_file = os.path.join(monitoring_k8s_dir, 'deploy_prometheus.yaml') with open(prometheus_deploy_file, 'w') as f: f.write(doc) logger.info("Starting Prometheus service") configurator.deploy_k8s_resources(files=[prometheus_deploy_file], namespace=kube_namespace) logger.info('Waiting until Prometheus instance is up') configurator.wait_k8s_resources(resource='pod', label_selectors="app=prometheus", kube_namespace=kube_namespace) logger.debug( 'Modify the deploy_grafana.yaml file with kube_master info') file_path = os.path.join(monitoring_k8s_dir, 'deploy_grafana.yaml.template') with open(file_path) as f: doc = f.read().replace('kube_master_ip', '%s' % kube_master_ip) doc = doc.replace("kube_master_hostname", '%s' % kube_master) grafana_deploy_file = os.path.join(monitoring_k8s_dir, 'deploy_grafana.yaml') with open(grafana_deploy_file, 'w') as f: f.write(doc) file = '/root/antidote_stats/monitoring/grafana-config/provisioning/datasources/all.yml' cmd = """ sed -i "s/localhost/%s/" %s """ % (kube_master_ip, file) execute_cmd(cmd, kube_master) logger.info("Starting Grafana service") configurator.deploy_k8s_resources(files=[grafana_deploy_file], namespace=kube_namespace) logger.info('Waiting until Grafana instance is up') configurator.wait_k8s_resources(resource='pod', label_selectors="app=grafana", kube_namespace=kube_namespace) logger.info("Finish deploying monitoring system\n") logger.info("Connect to Grafana at: http://%s:3000" % kube_master_ip) logger.info("Connect to Prometheus at: http://%s:9090" % kube_master_ip)
def perform_combination(self, kube_namespace, concurrent_clients): logger.info('-----------------------------------------------------------------') logger.info('5. Starting deploying fmke client to stress the Antidote database') fmke_client_k8s_dir = self.configs['exp_env']['fmke_yaml_path'] logger.debug('Delete old k8s yaml files if exists') for filename in os.listdir(fmke_client_k8s_dir): if filename.startswith('create_fmke_client_') or filename.startswith('fmke_client_'): if '.template' not in filename: try: os.remove(os.path.join(fmke_client_k8s_dir, filename)) except OSError: logger.debug("Error while deleting file") logger.debug('Create fmke_client folder on each fmke_client node') configurator = k8s_resources_configurator() exp_nodes = configurator.get_k8s_resources_name(resource='node', label_selectors='service_g5k=fmke_client') cmd = 'mkdir -p /tmp/fmke_client' execute_cmd(cmd, exp_nodes) logger.debug('Create fmke_client config files to stress database for each Antidote DC') file_path = os.path.join(fmke_client_k8s_dir, 'fmke_client.config.template') fmke_list = configurator.get_k8s_resources(resource='pod', label_selectors='app=fmke', kube_namespace=kube_namespace) for cluster in self.configs['exp_env']['clusters']: fmke_IPs = list() for fmke in fmke_list.items: if cluster in fmke.metadata.name: fmke_IPs.append(fmke.status.pod_ip) fmke_ports = [9090 for i in range(0, len(fmke_IPs))] # Modify fmke_client config files with new values with open(file_path) as f: doc = f.read().replace('["127.0.0.1"]', '%s' % fmke_IPs) doc = doc.replace("[9090]", '%s' % fmke_ports) doc = doc.replace("{concurrent, 16}.", '{concurrent, %s}.' % concurrent_clients) doc = doc.replace("'", '"') file_path2 = os.path.join(fmke_client_k8s_dir, 'fmke_client_%s.config' % cluster) with open(file_path2, 'w') as f: f.write(doc) logger.debug('Upload fmke_client config files to kube_master to be used by kubectl to run fmke_client pods') getput_file(hosts=exp_nodes, file_paths=[file_path2], dest_location='/tmp/fmke_client/', action='put') logger.debug('Create create_fmke_client.yaml files to run job stress for each Antidote DC') file_path = os.path.join(fmke_client_k8s_dir, 'create_fmke_client.yaml.template') with open(file_path) as f: doc = yaml.safe_load(f) fmke_client_files = list() for cluster in self.configs['exp_env']['clusters']: doc['spec']['parallelism'] = self.configs['exp_env']['n_fmke_client_per_dc'] doc['spec']['completions'] = self.configs['exp_env']['n_fmke_client_per_dc'] doc['metadata']['name'] = 'fmke-client-%s' % cluster doc['spec']['template']['spec']['containers'][0]['lifecycle']['postStart']['exec']['command'] = [ "cp", "/cluster_node/fmke_client_%s.config" % cluster, "/fmke_client/fmke_client.config"] doc['spec']['template']['spec']['nodeSelector'] = { 'service_g5k': 'fmke_client', 'cluster_g5k': '%s' % cluster} file_path = os.path.join(fmke_client_k8s_dir, 'create_fmke_client_%s.yaml' % cluster) with open(file_path, 'w') as f: yaml.safe_dump(doc, f) fmke_client_files.append(file_path) logger.info("Running fmke client instances on each DC") logger.debug("Init configurator: k8s_resources_configurator") configurator = k8s_resources_configurator() configurator.deploy_k8s_resources(files=fmke_client_files, namespace=kube_namespace) t = '0' with open(os.path.join(fmke_client_k8s_dir, 'fmke_client.config.template')) as search: for line in search: line = line.rstrip() # remove '\n' at end of line if "{duration" in line: t = line.split(',')[1].split('}')[0].strip() timeout = (int(t) + 5)*60 logger.info("Stressing database in %s minutes ....." % t) configurator.wait_k8s_resources(resource='job', label_selectors="app=fmke-client", timeout=timeout, kube_namespace=kube_namespace) logger.info("Finish stressing Antidote database")
def run_mailserver(self, hosts, mountpoint, duration, n_threads): logger.info('Dowloading Filebench configuration file') cmd = 'wget https://raw.githubusercontent.com/filebench/filebench/master/workloads/varmail.f -P /tmp/ -N' execute_cmd(cmd, hosts) logger.info('Editing the configuration file') cmd = 'sed -i "s/tmp/%s/g" /tmp/varmail.f' % mountpoint execute_cmd(cmd, hosts) cmd = 'sed -i "s/run 60/run %s/g" /tmp/varmail.f' % duration execute_cmd(cmd, hosts) cmd = 'sed -i "s/name=bigfileset/name=bigfileset-$(hostname)/g" /tmp/varmail.f' execute_cmd(cmd, hosts) cmd = 'sed -i "s/meandirwidth=1000000/meandirwidth=1000/g" /tmp/varmail.f' execute_cmd(cmd, hosts) cmd = 'sed -i "s/nthreads=16/nthreads=%s/g" /tmp/varmail.f' % n_threads execute_cmd(cmd, hosts) logger.info('Clearing cache ') cmd = 'rm -rf /tmp/dc-$(hostname)/bigfileset' execute_cmd(cmd, hosts) cmd = 'sync; echo 3 > /proc/sys/vm/drop_caches' execute_cmd(cmd, hosts) logger.info('Running mailserver on hosts:\n%s' % hosts) logger.info('Running filebench in %s second' % duration) cmd = 'setarch $(arch) -R filebench -f /tmp/varmail.f > /tmp/results/filebench_$(hostname)' _, results = execute_cmd(cmd, hosts, mode='start') for each in results.processes: if 'Failed to create filesets' in each.stdout.strip(): logger.info('Cannot run filebench.') return False sleep(duration + 60) return True
def deploy_fmke_client(self, fmke_yaml_path, test_duration, concurrent_clients, n_total_fmke_clients, workload=None, kube_namespace='default'): """Deploy FMKe client on the given K8s cluster Parameters ---------- fmke_yaml_path: str a path to the K8s yaml deployment files test_duration: int the duration to perform the workload concurrent_clients: int the number of concurrent clients n_total_fmke_clients: int the total number of clients need to be deployed on the system workload: dict the workload ratio of FMKe benchmark kube_namespace: str the name of K8s namespace """ logger.debug('Delete old k8s yaml files if exists') for filename in os.listdir(fmke_yaml_path): if filename.startswith('create_fmke_client_') or filename.startswith('fmke_client_'): if '.template' not in filename: try: os.remove(os.path.join(fmke_yaml_path, filename)) except OSError: logger.debug('Error while deleting file') if workload: logger.debug('Create the new workload ratio') new_workload = ',\n'.join([' {%s, %s}' % (key, val) for key, val in workload.items()]) operations = '{operations,[\n%s\n]}.' % new_workload logger.debug('Init configurator: k8s_resources_configurator') configurator = k8s_resources_configurator() fmke_list = configurator.get_k8s_resources(resource='pod', label_selectors='app=fmke', kube_namespace=kube_namespace) fmke_client_files = list() config_file_path = os.path.join(fmke_yaml_path, 'fmke_client.config.template') create_file_path = os.path.join(fmke_yaml_path, 'create_fmke_client.yaml.template') for fmke in fmke_list.items: node = fmke.spec.node_name.split('.')[0] # Modify fmke_client config files with new values logger.debug('Create fmke_client config files to stress database for each AntidoteDB DC') with open(config_file_path) as f: doc = f.read() doc = doc.replace('127.0.0.1', '%s' % fmke.status.pod_ip) doc = doc.replace('{concurrent, 16}.', '{concurrent, %s}.' % concurrent_clients) doc = doc.replace('{duration, 3}.', '{duration, %s}.' % test_duration) doc = doc.replace("'", '"') if workload: doc = re.sub(r'{operations.*', operations, doc, flags=re.S) file_path = os.path.join(fmke_yaml_path, 'fmke_client_%s.config' % node) with open(file_path, 'w') as f: f.write(doc) logger.debug('Create fmke_client folder on each fmke_client node') cmd = 'mkdir -p /tmp/fmke_client' execute_cmd(cmd, fmke.status.host_ip) logger.debug('Upload fmke_client config files to kube_master to be used by kubectl to run fmke_client pods') getput_file(hosts=fmke.status.host_ip, file_paths=[file_path], dest_location='/tmp/fmke_client/', action='put') logger.debug('Create create_fmke_client.yaml files to deploy one FMKe client') with open(create_file_path) as f: doc = yaml.safe_load(f) doc['metadata']['name'] = 'fmke-client-%s' % node doc['spec']['template']['spec']['containers'][0]['lifecycle']['postStart']['exec']['command'] = [ 'cp', '/cluster_node/fmke_client_%s.config' % node, '/fmke_client/fmke_client.config'] doc['spec']['template']['spec']['nodeSelector'] = { 'service': 'fmke', 'kubernetes.io/hostname': '%s' % fmke.spec.node_name} file_path = os.path.join(fmke_yaml_path, 'create_fmke_client_%s.yaml' % node) with open(file_path, 'w') as f: yaml.safe_dump(doc, f) fmke_client_files.append(file_path) logger.info('Starting FMKe client instances on each AntidoteDB DC') configurator.deploy_k8s_resources(files=fmke_client_files, namespace=kube_namespace) sleep(20) logger.info('Checking if deploying enough the number of running FMKe_client or not') fmke_client_list = configurator.get_k8s_resources_name(resource='pod', label_selectors='app=fmke-client', kube_namespace=kube_namespace) if len(fmke_client_list) != n_total_fmke_clients: logger.info('n_fmke_client = %s, n_deployed_fmke_client = %s' %(n_total_fmke_clients, len(fmke_client_list))) raise CancelException('Cannot deploy enough FMKe_client') logger.info('Stressing database in %s minutes .....' % test_duration) deploy_ok = configurator.wait_k8s_resources(resource='job', label_selectors='app=fmke-client', timeout=(test_duration + 5)*60, kube_namespace=kube_namespace) if not deploy_ok: logger.error('Cannot wait until all FMKe client instances running completely') raise CancelException('Cannot wait until all FMKe client instances running completely') logger.info('Finish stressing AntidoteDB cluster')
def deploy_elmerfs(self, kube_master, kube_namespace, elmerfs_hosts): logger.info("Starting deploying elmerfs on hosts") configurator = packages_configurator() # configurator.install_packages(["libfuse2", "wget", "jq"], elmerfs_hosts) elmerfs_repo = self.configs["exp_env"]["elmerfs_repo"] elmerfs_version = self.configs["exp_env"]["elmerfs_version"] elmerfs_file_path = self.configs["exp_env"]["elmerfs_path"] if elmerfs_repo is None: elmerfs_repo = "https://github.com/scality/elmerfs" if elmerfs_version is None: elmerfs_version = "latest" logger.info("Killing elmerfs process if it is running") for host in elmerfs_hosts: cmd = "pidof elmerfs" _, r = execute_cmd(cmd, host) pids = r.processes[0].stdout.strip().split(" ") if len(pids) >= 1 and pids[0] != '': for pid in pids: cmd = "kill %s" % pid.strip() execute_cmd(cmd, host) cmd = "umount /tmp/dc-$(hostname)" execute_cmd(cmd, host) cmd = "rm -rf /tmp/dc-$(hostname)" execute_cmd(cmd, host) logger.info("Delete elmerfs project folder on host (if existing)") cmd = "rm -rf /tmp/elmerfs_repo" execute_cmd(cmd, kube_master) if elmerfs_file_path is None: logger.info("Downloading elmerfs project from the repo") cmd = """curl \ -H "Accept: application/vnd.github.v3+json" \ https://api.github.com/repos/scality/elmerfs/releases/%s | jq ".tag_name" \ | xargs -I tag_name git clone https://github.com/scality/elmerfs.git --branch tag_name --single-branch /tmp/elmerfs_repo """ % elmerfs_version execute_cmd(cmd, kube_master) cmd = "cd /tmp/elmerfs_repo \ && git submodule update --init --recursive" execute_cmd(cmd, kube_master) cmd = """cat <<EOF | sudo tee /tmp/elmerfs_repo/Dockerfile FROM rust:1.47 RUN mkdir /elmerfs WORKDIR /elmerfs COPY . . RUN apt-get update \ && apt-get -y install libfuse-dev RUN cargo build --release CMD ["/bin/bash"] """ execute_cmd(cmd, kube_master) logger.info("Building elmerfs") cmd = " cd /tmp/elmerfs_repo/ \ && docker build -t elmerfs ." execute_cmd(cmd, kube_master) cmd = "docker run --name elmerfs elmerfs \ && docker cp -L elmerfs:/elmerfs/target/release/main /tmp/elmerfs \ && docker rm elmerfs" execute_cmd(cmd, kube_master) getput_file( hosts=[kube_master], file_paths=["/tmp/elmerfs"], dest_location="/tmp", action="get", ) elmerfs_file_path = "/tmp/elmerfs" logger.info( "Uploading elmerfs binary file from local to %s elmerfs hosts" % len(elmerfs_hosts)) getput_file( hosts=elmerfs_hosts, file_paths=[elmerfs_file_path], dest_location="/tmp", action="put", ) cmd = "chmod +x /tmp/elmerfs \ && mkdir -p /tmp/dc-$(hostname)" execute_cmd(cmd, elmerfs_hosts) logger.debug("Getting IP of antidoteDB instances on nodes") antidote_ips = dict() configurator = k8s_resources_configurator() pod_list = configurator.get_k8s_resources( resource="pod", label_selectors="app=antidote", kube_namespace=kube_namespace, ) for pod in pod_list.items: node = pod.spec.node_name if node not in antidote_ips: antidote_ips[node] = list() antidote_ips[node].append(pod.status.pod_ip) for host in elmerfs_hosts: antidote_options = [ "--antidote=%s:8087" % ip for ip in antidote_ips[host] ] elmerfs_cmd = "RUST_BACKTRACE=1 RUST_LOG=debug nohup /tmp/elmerfs %s --mount=/tmp/dc-$(hostname) --force-view=$ELMERFS_UID > /tmp/elmer.log 2>&1" % " ".join( antidote_options) logger.info("Starting elmerfs on %s with cmd: %s" % (host, elmerfs_cmd)) execute_cmd(elmerfs_cmd, host, mode='start') sleep(10) for i in range(10): cmd = "pidof elmerfs" _, r = execute_cmd(cmd, host) pid = r.processes[0].stdout.strip().split(" ") if len(pid) >= 1 and pid[0].strip(): break else: execute_cmd(elmerfs_cmd, host, mode="start") sleep(10) else: logger.info("Cannot deploy elmerfs on host %s" % host) return False logger.info("Finish deploying elmerfs\n") return True