def _check_signatures(self, hostname, cname, jar_sha256sum, script_sha256): self.logger.info('>>> Check signatures.') script = f""" # noqa: E501 if [[ -f /tmp/run.cid ]]; then cat ~/.passwd | sudo -S -p '' rm /tmp/run.cid fi [[ $? -ne 0 ]] && exit 100 cat ~/.passwd | sudo -S -p '' docker run --rm -i --entrypoint='' {cname} bash -c 'sha256sum -c < <(echo {jar_sha256sum})' [[ $? -ne 0 ]] && exit 101 cat ~/.passwd | sudo -S -p '' docker run --rm -i --entrypoint='' {cname} bash -c 'sha256sum -c < <(echo {script_sha256})' [[ $? -ne 0 ]] && exit 102 """.rstrip() returncode, outs, _ = self._run_remote_script(hostname, script) if returncode == 0: return elif returncode == 100: raise WorkflowError(f"Failed to check signature on {hostname}.", error_code=returncode) elif returncode == 101: raise WorkflowError( f"Failed to check {hostname} app signature.", error_code=self.FAILED_TO_CHECK_IMAGE_APP_SIGNATURE) elif returncode == 102: raise WorkflowError( f"Failed to check script signature on {hostname}.", error_code=self.FAILED_TO_CHECK_SCRIPT_SIGNATURE)
def _upload_docker_file(self, hostname): self.logger.info('>>> Upload Dockerfile and entrypoint.sh .') remote = self.workspace.remote curPath = os.path.abspath(os.path.dirname(__file__)) rootPath = curPath[:curPath.find("benchmarker/") + len("benchmarker/")] dockerfilePath = os.path.abspath(rootPath + 'dockerfile') script = f""" if [[ -d {dockerfilePath} ]]; then scp -r \ {dockerfilePath}/* \ {remote.user}@{hostname}:{remote.docker_file} else echo "Dockerfile not exists." exit 1 fi exit 0 """.rstrip() returncode, outs, _ = self._run_local_script(script) if returncode == 0: return if returncode == 1: raise WorkflowError( 'Failed to upload Dockerfile due to file not exists.', # noqa: E501 error_code=1051) raise WorkflowError('Failed to upload Dockerfile.', error_code=self.FAILED_TO_UPLOAD_DOCKER_FILE)
def _warmup_then_pressure(self): template = """ sleep {sleep} wrk -t{threads} -c{connections} -d{duration} -T{timeout} \ --script=./benchmark/wrk.lua \ --latency http://{hostname}:{port}/invoke exit 0 """.rstrip() tpl = partial( template.format, timeout=self.config.wrk_timeout, hostname=self.config.consumer_hostname, port=self.config.consumer_port) connections = self.config.connections self.logger.info('>>> Warmup.') script = '' script += tpl( sleep=5, threads=self.config.wrk_threads, connections=connections, duration=self.config.warmup_duration) returncode, outs, _ = self._run_local_script(script) if returncode != 0: raise WorkflowError( 'Failed to warmup applications.', error_code=self.FAILED_TO_WARMUP_APPLICATIONS) self.logger.info(f">>> Pressure with {connections} connections.") script = '' script += tpl( sleep=5, threads=self.config.wrk_threads, connections=connections, duration=self.config.pressure_duration) returncode, outs, _ = self._run_local_script(script) if returncode != 0: raise WorkflowError( f"Failed to pressure applications with {connections} connections.", error_code=self.FAILED_TO_PRESSURE_APPLICATIONS) qps = self._extract_qps(outs) self.logger.info('QPS = %s', qps) self.qps_result = qps
def _lock_local_workspace(self): self.logger.info('>>> Lock local workspace.') local = self.workspace.local path = Path(local.home) if not path.exists(): path.mkdir(parents=True) path = Path(local.lock_file) try: path.touch(exist_ok=False) except FileExistsError as err: raise WorkflowError( 'Failed to lock local workspace due to lock file exists.', error_code=self.FAILED_TO_LOCK_LOCAL_WORKSPACE) from err except Exception as err: raise WorkflowError( 'Failed to lock local workspace.', error_code=self.FAILED_TO_LOCK_LOCAL_WORKSPACE) from err
def _build_docker_images(self, hostname, user_code_address, cname): self.logger.info(f">>> build docker images on {hostname}.") remote = self.workspace.remote script = f""" cat ~/.passwd | sudo -S -p '' docker build --no-cache \ --build-arg user_code_address={user_code_address} \ --tag={cname}:latest {remote.docker_file}/{self.DOCKERFILE_PREFIX_PATH}{cname} """.rstrip() #cat ~/.passwd | sudo -S -p '' docker pull {self.NCAT_IMAGE_PATH} returncode, _, _ = self._run_remote_script(hostname, script) if returncode != 0: raise WorkflowError('Failed to build Docker images.', error_code=self.FAILED_TO_BUILD_DOCKER_IMAGES)
def _lock_remote_task_home(self, hostname): self.logger.info(f">>> Lock remote task home on {hostname}.") remote = self.workspace.remote script = """ if [[ -f {ws.lock_file} ]]; then echo "Lock file exists." exit 1 else touch {ws.lock_file} fi exit 0 """.format(ws=remote).rstrip() returncode, outs, _ = self._run_remote_script(hostname, script) if returncode == 0: return if returncode == 1: raise WorkflowError( 'Failed to lock remote task home due to lock file exists.', error_code=1041) raise WorkflowError('Failed to lock remote task home.', error_code=self.FAILED_TO_LOCK_REMOTE_TASK_HOME)
def _create_remote_task_home(self, hostname): self.logger.info(f">>> Create remote task home on {hostname}.") remote = self.workspace.remote script = """ mkdir -p {ws.task_home} mkdir -p {ws.task_home}/dockerfile exit 0 """.format(ws=remote).rstrip() returncode, outs, _ = self._run_remote_script(hostname, script) if returncode != 0: raise WorkflowError( 'Failed to create remote task home.', error_code=self.FAILED_TO_CREATE_REMOTE_TASK_HOME)
def start(self): self.logger.info('>>> Start consumer service.') consumer_port = self.config.consumer_port task_home = self.workspace.remote.task_home period = self.config.cpu_period quota = self.config.consumer_cpu_quota cpu_set = self.config.consumer_cpu_set memory = self.config.consumer_memory image_path = self.task.image_path max_attempts = self.config.max_attempts ncat_image_path = self.NCAT_IMAGE_PATH sleep = self.config.sleep_interval provider_small_ip = self.config.provider_small_ip provider_medium_ip = self.config.provider_medium_ip provider_large_ip = self.config.provider_large_ip #--cpu-period={period} \ #--cpu-quota={quota} \ script = f""" # noqa: E501 CONSUMER_HOME={task_home}/consumer rm -rf $CONSUMER_HOME mkdir -p $CONSUMER_HOME/logs cat ~/.passwd | sudo -S -p '' docker run -d \ --net=host \ --name=consumer \ --cpuset-cpus={cpu_set} \ --cidfile=$CONSUMER_HOME/run.cid \ --memory={memory} \ --ulimit nofile=4096:20480 \ -p {consumer_port}:{consumer_port} \ --add-host=provider-small:{provider_small_ip} \ --add-host=provider-medium:{provider_medium_ip} \ --add-host=provider-large:{provider_large_ip} \ -v $CONSUMER_HOME/logs:/root/runtime/logs \ consumer """ #cat ~/.passwd | sudo -S -p '' \ # docker run --network host --rm {ncat_image_path} \ script += f""" # noqa: E501 ATTEMPTS=0 MAX_ATTEMPTS={max_attempts} while true; do echo "Trying to connect consumer..." ncat -v -w 1 --send-only localhost {consumer_port}; r1=$? if [[ $? -eq 0 ]]; then exit 0 fi if [[ $ATTEMPTS -eq $MAX_ATTEMPTS ]]; then echo "Cannot connect to consumer service after $ATTEMPTS attempts." exit 1 fi ATTEMPTS=$((ATTEMPTS+1)) echo "Waiting for {sleep} seconds... ($ATTEMPTS/$MAX_ATTEMPTS)" sleep {sleep} done """.rstrip() returncode, outs, _ = self._run_remote_script(self.hostname, script) if returncode != 0: raise WorkflowError( 'Failed to start consumer service.', error_code=self.FAILED_TO_START_CONSUMER_SERVICE)
def _start(self, hostname, scale): self.logger.info(f">>> Start provider services on {hostname}.") #remote = RemoteHost(self.config, scale, self.task) remote = self.workspace.remote task = self.task task_home = remote.task_home period = self.config.cpu_period max_attempts = self.config.max_attempts ncat_image_path = self.NCAT_IMAGE_PATH sleep = self.config.sleep_interval if scale == "small": cpu_set = self.config.small_provider_cpu_set quota = self.config.small_provider_cpu_quota memory = self.config.small_provider_memory provider_port = self.config.provider_small_port elif scale == "medium": cpu_set = self.config.medium_provider_cpu_set quota = self.config.medium_provider_cpu_quota memory = self.config.medium_provider_memory provider_port = self.config.provider_medium_port else: cpu_set = self.config.large_provider_cpu_set quota = self.config.large_provider_cpu_quota memory = self.config.large_provider_memory provider_port = self.config.provider_large_port #--cpu-period={period} \ #--cpu-quota={quota} \ script = f""" PROVIDER_HOME={task_home}/provider-{scale} rm -rf $PROVIDER_HOME mkdir -p $PROVIDER_HOME/logs cat ~/.passwd | sudo -S -p '' docker run -d \ --net=host \ --name=provider-{scale} \ --cpuset-cpus={cpu_set} \ --cidfile=$PROVIDER_HOME/run.cid \ --memory={memory} \ --ulimit nofile=4096:20480 \ -p {provider_port}:{provider_port} \ -v $PROVIDER_HOME/logs:/root/runtime/logs \ provider provider-{scale} """.rstrip() #cat ~/.passwd | sudo -S -p '' \ # docker run --network host --rm {ncat_image_path} \ script += f""" # noqa: E501 ATTEMPTS=0 MAX_ATTEMPTS={max_attempts} while true; do echo "Trying to connect provider-small..." ncat -v -w 1 --send-only {hostname} {provider_port}; r1=$? if [[ $r1 -eq 0 ]]; then exit 0 fi if [[ $ATTEMPTS -eq $MAX_ATTEMPTS ]]; then echo "Cannot connect to some of the provider services after $ATTEMPTS attempts." exit 1 fi ATTEMPTS=$((ATTEMPTS+1)) echo "Waiting for {sleep} seconds... ($ATTEMPTS/$MAX_ATTEMPTS)" sleep {sleep} done """.rstrip() returncode, _, _ = self._run_remote_script(hostname, script) if returncode != 0: raise WorkflowError( 'Failed to start provider services.', error_code=self.FAILED_TO_START_PROVIDER_SERVICES)