def build(self, watch=False): build_cmd = ['mlt', 'build'] if watch: build_cmd.append('--watch') build_proc = Popen(build_cmd, cwd=self.project_dir) if watch: # ensure that `mlt build --watch` has started time.sleep(1) # we need to simulate our training file changing run_popen("echo \"print('hello')\" >> {}".format(self.train_file), shell=True).wait() # wait for 30 seconds (for timeout) or until we've built our image # then kill the build proc or it won't terminate start = time.time() while not os.path.exists(self.build_json): time.sleep(1) if time.time() - start >= 30: break build_proc.kill() else: assert build_proc.wait() == 0 assert os.path.isfile(self.build_json) with open(self.build_json) as f: build_data = json.loads(f.read()) assert 'last_container' in build_data and \ 'last_build_duration' in build_data # verify that we created a docker image assert run_popen("docker image inspect {}".format( build_data['last_container']), shell=True, stdout=None, stderr=None).wait() == 0
def deploy(self, no_push=False, interactive=False): deploy_cmd = ['mlt', 'deploy'] if no_push: deploy_cmd.append('--no-push') if interactive: deploy_cmd.append('--interactive') p = Popen(deploy_cmd, cwd=self.project_dir) out, err = p.communicate() assert p.wait() == 0 if not no_push: assert os.path.isfile(self.deploy_json) with open(self.deploy_json) as f: deploy_data = json.loads(f.read()) assert 'last_push_duration' in deploy_data and \ 'last_remote_container' in deploy_data # verify that the docker image has been pushed to our registry # need to decode because in python3 this output is in bytes assert 'true' in run_popen( "{} | jq .repositories | jq 'contains([\"{}\"])'".format( self.registry_catalog_call, self.app_name), shell=True).stdout.read().decode("utf-8") # verify that our job did indeed get deployed to k8s # TODO: fix this check: https://github.com/IntelAI/mlt/issues/105 assert run_popen( "kubectl get jobs --namespace={}".format(self.namespace), shell=True).wait() == 0
def clone_repo(repo): destination = tempfile.mkdtemp() process_helpers.run_popen("git clone {} {}".format(repo, destination), shell=True, stdout=False, stderr=False).wait() try: yield destination finally: shutil.rmtree(destination)
def _build(self): last_build_duration = files.fetch_action_arg( 'build', 'last_build_duration') schema.validate() started_build_time = time.time() container_name = "{}:{}".format(self.config['name'], uuid.uuid4()) print("Starting build {}".format(container_name)) template_parameters = config_helpers.\ get_template_parameters(self.config) params = "" for key, val in template_parameters.items(): params += "{}={} ".format(key.upper(), val) build_cmd = "CONTAINER_NAME={} {}make build".format( container_name, params) if self.args['--verbose']: build_process = process_helpers.run_popen(build_cmd, shell=True, stdout=True, stderr=True) else: build_process = process_helpers.run_popen(build_cmd, shell=True) with process_helpers.prevent_deadlock(build_process): progress_bar.duration_progress( 'Building {}'.format( self.config["name"]), last_build_duration, lambda: build_process.poll() is not None) if build_process.poll() != 0: # When we have an error, get the stdout and error output # and display them both with the error output in red. output, error_msg = build_process.communicate() if output: print(output.decode("utf-8")) if error_msg: error_handling.throw_error(error_msg.decode("utf-8"), 'red') built_time = time.time() # Write last container to file with open('.build.json', 'w') as f: f.write(json.dumps({ "last_container": container_name, "last_build_duration": built_time - started_build_time })) print("Built {}".format(container_name))
def clone_repo(repo): destination = tempfile.mkdtemp() process_helpers.run_popen( "git clone {} {}".format(repo, destination), shell=True, stdout=False, stderr=False).wait() try: yield destination finally: # This is really a bug in 'shutil' as described here: # https://bugs.python.org/issue29699 if os.path.exists(destination): shutil.rmtree(destination)
def test_run_popen_failed_cmd(popen_mock): """If the cmd isn't valid assert some sort of error output + SystemExit""" bad_cmd = "foo bar" bad_cmd_output = "Not a valid command" popen_mock.side_effect = CalledProcessError(returncode=2, cmd=bad_cmd, output=bad_cmd_output) with catch_stdout() as caught_output: with pytest.raises(SystemExit): run_popen(bad_cmd) output = caught_output.getvalue().strip() assert output == bad_cmd_output
def _push_docker(self): self.remote_container_name = "{}/{}".format( self.config['registry'], self.container_name) self._tag() push_cmd = ["docker", "push", self.remote_container_name] if self.args['--verbose']: self.push_process = process_helpers.run_popen( push_cmd, stdout=True, stderr=True) self.push_process.wait() # add newline to separate push output from container deploy output print('') else: self.push_process = process_helpers.run_popen(push_cmd)
def _get_pods_by_start_time(self): """helper func to return pods in current namespace by start time""" return process_helpers.run_popen( "kubectl get pods --namespace {} ".format( self.namespace) + "--sort-by=.status.startTime", shell=True ).stdout.read().decode('utf-8').strip().splitlines()
def undeploy(self): p = Popen(['mlt', 'undeploy'], cwd=self.project_dir) assert p.wait() == 0 # verify no more deployment job assert run_popen("kubectl get jobs --namespace={}".format( self.namespace), shell=True).wait() == 0
def _generic_status(self, job, namespace, job_type): """displays simple pod information""" status = process_helpers.run_popen( ["kubectl", "get", "pods", "--namespace", namespace, "-o", "wide", "-a", "-l", "job-name={}".format(job)], stdout=True, stderr=True) status.wait()
def _get_logs(prefix, since, namespace): """ Fetches logs using kubetail """ log_cmd = ["kubetail", prefix, "--since", since, "--namespace", namespace] try: logs = process_helpers.run_popen(log_cmd, stdout=True, stderr=subprocess.PIPE) output, error_msg = logs.communicate() if output: print(output) if error_msg: if 'command not found' in error_msg: print( colored( "Please install `{}`. " "It is a prerequisite " "for `mlt logs` to work".format(error_msg.split()[1]), 'red')) else: print(colored(error_msg, 'red')) sys.exit(1) except KeyboardInterrupt: sys.exit()
def _get_events(filter_tag, namespace): """ Fetches events """ events_cmd = "kubectl get events --namespace {}".format(namespace) try: events = process_helpers.run_popen(events_cmd, shell=True) header_line = True header = events.stdout.readline() while True: output = events.stdout.readline() if output == '' and events.poll() is not None: error = events.stderr.readline() if error: raise Exception(error) break if output is not '' and filter_tag in output: if header_line: print(header) header_line = False sys.stdout.write(output) sys.stdout.flush() if header_line: print("No events to display for this job") except Exception as ex: if 'command not found' in str(ex): print("Please install `{}`. " "It is a prerequisite for `mlt events` " "to work".format(str(ex).split()[1])) else: print("Exception: {}".format(ex)) sys.exit()
def _build(self): last_build_duration = files.fetch_action_arg('build', 'last_build_duration') started_build_time = time.time() container_name = "{}:{}".format(self.config['name'], uuid.uuid4()) print("Starting build {}".format(container_name)) # Add bar build_process = process_helpers.run_popen( "CONTAINER_NAME={} make build".format(container_name), shell=True) progress_bar.duration_progress( 'Building', last_build_duration, lambda: build_process.poll() is not None) if build_process.poll() != 0: print( colored(build_process.communicate()[0].decode("utf-8"), 'red')) sys.exit(1) built_time = time.time() # Write last container to file with open('.build.json', 'w') as f: f.write( json.dumps({ "last_container": container_name, "last_build_duration": built_time - started_build_time })) print("Built {}".format(container_name))
def _get_logs(prefix, since, namespace): """ Fetches logs using kubetail """ log_cmd = "kubetail {} --since {} " \ "--namespace {}".format(prefix, since, namespace) try: # TODO: remove shell=True. and make log_cmd as List. logs = process_helpers.run_popen(log_cmd, shell=True) while True: output = logs.stdout.readline() if output == '' and logs.poll() is not None: error = logs.stderr.readline() if error: raise Exception(error) break if output: if 'No pods exists that matches' not in output: print(output.strip()) except Exception as ex: if 'command not found' in str(ex): print("Please install `{}`. " "It is a prerequisite for `mlt logs` " "to work".format(str(ex).split()[1])) else: print("Exception: {}".format(ex)) sys.exit()
def clone_repo(repo): destination = tempfile.mkdtemp() process_helpers.run_popen("git clone {} {}".format(repo, destination), shell=True, stdout=False, stderr=False).wait() # If the template repo is a local path, then copy the local directory over # the git clone so that the templates reflect the local changes. if not is_git_repo(repo): copy_tree(repo, destination) try: yield destination finally: # This is really a bug in 'shutil' as described here: # https://bugs.python.org/issue29699 if os.path.exists(destination): shutil.rmtree(destination)
def _fetch_registry_catalog_call(self): """returns either a local registry curl call or one for gcr""" if 'gcr' in self.registry: gcr_token = run_popen( "gcloud auth print-access-token", shell=True).stdout.read().decode("utf-8").strip() catalog_call = 'curl -v -u _token:{} '.format( gcr_token) + '"https://gcr.io/v2/_catalog"' else: catalog_call = 'curl --noproxy \"*\" registry:5000/v2/_catalog' return catalog_call
def _exec_into_pod(self, podname): """wait til pod comes up and then exec into it""" print("Connecting to pod...") tries = 0 while True: pod = process_helpers.run_popen( "kubectl get pods --namespace {} {} -o json".format( self.namespace, podname), shell=True).stdout.read().decode('utf-8') if not pod: continue # check if pod is in running state # gcr stores an auth token which could be returned as part # of the pod json data pod = json.loads(pod) if pod.get('items') or pod.get('status'): # if there's more than 1 thing returned, we have # `pod['items']['status']` otherwise we will always have # `pod['status'], so by the second if below we're safe # first item is what we care about (or only item) if pod.get('items'): pod = pod['items'][0] if pod['status']['phase'] == 'Running': break if tries == self.args['--retries']: raise ValueError("Pod {} not Running".format(podname)) tries += 1 print("Retrying {}/{} \r".format( tries, self.args['--retries'])), time.sleep(1) # Get shell to the specified pod running in the user's namespace kubectl_exec = ["kubectl", "exec", "-it", podname, "--namespace", self.namespace, "--", "/bin/bash", "-c", "cd /src/app; bash"] process_helpers.run_popen(kubectl_exec, stdout=None, stderr=None).wait()
def clone_repo(repo): destination = tempfile.mkdtemp() process_helpers.run_popen("git clone {} {}".format(repo, destination), shell=True, stdout=False, stderr=False).wait() # If the template repo is a local path, then copy the local directory over # the git clone so that the templates reflect the local changes. if not is_git_repo(repo): copy_tree(repo, destination) try: yield destination finally: # This is really a bug in 'shutil' as described here: # https://bugs.python.org/issue29699 # Also the option ignore_errors is set to True mainly for Unit tests: # https://stackoverflow.com/questions/303200/how-do-i-remove-delete-a-folder-that-is-not-empty-with-python if os.path.exists(destination): shutil.rmtree(destination, ignore_errors=True)
def _crd_status(self, job, namespace, job_type): """Handles statuses for various crd deployments CRDs handled: 1. TFJob 2. PyTorchJob """ label = 'tf_job_name' if job_type == 'tfjob' else 'pytorch_job_name' print("CRD: {}".format(job_type.upper())) status = process_helpers.run_popen( ["kubectl", "get", job_type, job, "--namespace", namespace]) out, err = status.communicate() if status.wait() != 0: print("The job may have been undeployed.") else: print(out.decode('utf-8')) print("Pods: ") status = process_helpers.run_popen( ["kubectl", "get", "pods", "--namespace", namespace, "-o", "wide", "-a", "-l", "{}={}".format(label, job)], stdout=True, stderr=True) status.communicate()
def _get_most_recent_podname(self): """don't know of a better way to do this; grab the pod created by the job we just deployed this gets the most recent pod by name, so we can exec into it once everything is done deploying """ pod = process_helpers.run_popen( "kubectl get pods --namespace {} ".format(self.namespace) + "--sort-by=.status.startTime", shell=True).stdout.read().decode('utf-8').strip().splitlines() if pod: # we want last pod listed, podname is always first return pod[-1].split()[0] else: raise ValueError("No pods found in namespace: {}".format( self.namespace))
def checking_crds_on_k8(crd_set): """ Check if given crd list installed on K8 or not. """ try: current_crds_json = process_helpers.run_popen( "kubectl get crd -o json", shell=True ).stdout.read().decode('utf-8') current_crds = set([str(x['metadata']['name']) for x in json.loads(current_crds_json)['items']]) return crd_set - current_crds except Exception as ex: print("Crd_Checking - Exception: {}".format(ex)) return set()
def _grab_latest_pod_or_tfjob(self): """grabs latest pod by startTime""" if self.template in self.tfjob_templates: # NOTE: do we need some sort of time filter here too? obj_string = "kubectl get tfjob -a --namespace {} -o json".format( self.namespace) else: obj_string = "kubectl get pods -a --namespace {}".format( self.namespace) + " --sort-by=.status.startTime -o json" objs = run_popen(obj_string, shell=True).stdout.read().decode('utf-8') if objs: objs = json.loads(objs).get('items')[-1]['status'] # tfjob is special and puts the `Succeeded` status on the `state` # rather than the `phase` like everything else return objs["state"] if "tfjob" in obj_string else objs["phase"] else: raise ValueError("No pod(s) deployed to namespace {}".format( self.namespace))
def check_for_pods_readiness(namespace, filter_tag, retries): print("Checking for pod(s) readiness") tries = 0 pods_found = 0 pods_running = 0 while True: if tries == retries: print("Max retries Reached.") break # TODO: Remove shell=True and make it to work. pods = process_helpers.run_popen( "kubectl get pods --namespace {} ".format(namespace), shell=True).stdout.read().strip().splitlines() if not pods: tries += 1 print("Retrying {}/{}".format(tries, retries)) sleep(1) continue else: for pod in pods: if filter_tag in pod: pods_found += 1 status = str(pod.split()[2].strip()) if status == 'Running': pods_running += 1 else: tries += 1 print("Retrying {}/{}".format(tries, retries)) sleep(1) continue if pods_running == pods_found and pods_found > 0: break else: pods_found = 0 pods_running = 0 tries += 1 print("Retrying {}/{}".format(tries, retries)) sleep(1) return pods_found > 0
def check_for_pods_readiness(namespace, filter_tag, retries): print("Checking for pod(s) readiness") tries = 0 pods_found = 0 pods_running = 0 while True: if tries == retries: print("Max retries Reached.") break try: kubectl_cmd = [ "kubectl", "get", "pods", "--namespace", namespace, "--show-all" ] pods = process_helpers.run_popen(kubectl_cmd)\ .stdout.read().strip().splitlines() if not pods: tries += 1 print("Retrying {}/{} \r".format(tries, retries)), sleep(1) continue else: for pod in pods: if filter_tag in pod.decode('utf-8'): pods_found += 1 status = pod.split()[2].strip().decode('utf-8') if status in ['Running', 'Completed']: pods_running += 1 if pods_running == pods_found and pods_found > 0: break else: pods_found = 0 pods_running = 0 tries += 1 print("Retrying {}/{} \r".format(tries, retries)), sleep(1) except KeyboardInterrupt: sys.exit() return pods_running > 0
def deploy(self, no_push=False, interactive=False): deploy_cmd = ['mlt', 'deploy'] if no_push: deploy_cmd.append('--no-push') if interactive: deploy_cmd.append('--interactive') p = Popen(deploy_cmd, cwd=self.project_dir) out, err = p.communicate() assert p.wait() == 0 if not no_push: assert os.path.isfile(self.deploy_json) with open(self.deploy_json) as f: deploy_data = json.loads(f.read()) assert 'last_push_duration' in deploy_data and \ 'last_remote_container' in deploy_data # verify that the docker image has been pushed to our registry # need to decode because in python3 this output is in bytes assert 'true' in run_popen( "{} | jq .repositories | jq 'contains([\"{}\"])'".format( self.registry_catalog_call, self.app_name), shell=True).stdout.read().decode("utf-8") self._verify_pod_success(interactive)
def _launch_popen_call(self, command, cwd=None, return_output=False, shell=False, stdout=PIPE, stderr=PIPE, stderr_is_not_okay=False, wait=False, preexec_fn=None, allow_err=False, expected_err_msg=None): """Lightweight wrapper that launches run_popen and handles dumping output if there was an error cwd: where to launch popen call from return_output: whether to return the process itself or the output and error instead. Need 2 options for things like `--watch` flag, where we kill the proc later shell: whether to shell out the popen call, NOTE: if True, the command needs to be a string. This is helpful for things like `|` in the command. stderr_is_not_okay: some commands like `git checkout` or progressbar dump their output to stderr so we need to allow it THEORY: stderr is unbuffered so dynamic progressbars might want to dump to stderr. This is for those commands that actually don't want any stderr. wait: for the build watch command, we don't want to call `.wait()` on it because we need to check when we edit watched file preexec_fn: runs a func after the fork() call but before exec() to run the shell. Useful for killing subprocesses of subprocesses (like `mlt deploy -l` --> `kubetail`) allow_err: if True, we expect the command to fail and want to test with an expected error message `expected_err_msg`. expected_err_msg: the expected error message if the command execution fails, it's used when `allow_err` is True. """ if cwd is None: # default to project dir if that's defined, otherwise just use /tmp cwd = getattr(self, 'project_dir', '/tmp') p = run_popen(command, stdout=stdout, stderr=stderr, shell=shell, cwd=cwd, preexec_fn=preexec_fn) if not wait: out, err = p.communicate() error_msg = "Popen call failed:\nSTDOUT:{}\n\nSTDERR:{}".format( str(out), colored(str(err), 'red')) if allow_err and p.wait() != 0: output = out.decode("utf-8").strip() assert output == expected_err_msg, error_msg else: assert p.wait() == 0, error_msg if stderr_is_not_okay is True: assert not err, error_msg if return_output: out = out.decode('utf-8').strip() return out, err else: return p else: return p
def test_run_popen_invalid_cmd(popen_mock): """Assert passing not a string or list causes SystemExit""" with pytest.raises(SystemExit) as pytest_raised_err: run_popen(0) assert pytest_raised_err.value.code == 1
def test_run_popen(popen): """Popen call should succeed""" popen.return_value = 0 result = run_popen(['ls', '/tmp']) assert result == 0
def action(self): """Update the template instance with new template version if template is updated """ if "template_name" not in self.config or \ "template_git_sha" not in self.config: print("ERROR: mlt.json does not have either template_name " "or template_git_sha. Template update is not possible.") return app_name = self.config["name"] template_name = self.config["template_name"] current_template_git_sha = self.config["template_git_sha"] orig_project_backup_dir = self._get_backup_dir_name(app_name) with git_helpers.clone_repo(self.template_repo) as temp_clone: application_dir = os.getcwd() clone_template_dir = os.path.join(temp_clone, constants.TEMPLATES_DIR, template_name) if not os.path.exists(clone_template_dir): print("Unable to update, template {} does " "not exist in MLT git repo.".format(template_name)) return latest_template_git_sha = \ git_helpers.get_latest_sha(clone_template_dir) if current_template_git_sha == latest_template_git_sha: print("Template is up to date, no need for update.") else: print("Template is not up to date, updating template...") copy_tree(application_dir, orig_project_backup_dir) os.chdir(temp_clone) # create temp-branch using git sha from which template # was initiated and clean un-tracked files cmd = "git checkout -f {} -b temp-branch && git clean -f .". \ format(current_template_git_sha) process_helpers.run_popen(cmd, shell=True) # copy app dir content to temp clone template dir copy_tree(application_dir, clone_template_dir) # if there are any uncommitted changes to temp-branch, # commit them otherwise 'pull' from master will fail. output = process_helpers.run("git status".split(" ")) if "Your branch is up-to-date" not in output: process_helpers.run("git add --all ".split(" ")) commit_command = "git commit --message 'temp-commit'" process_helpers.run(commit_command.split(" ")) # merging latest template changes by pulling from master # into temp-branch try: process_helpers.run("git pull origin master".split(" "), raise_on_failure=True) except CalledProcessError as e: # When auto merge failed do not error out, # let user review and fix conflicts # for other errors exit error_string = "Automatic merge failed; " \ "fix conflicts and then commit the result" if error_string not in e.output: error_handling.throw_error(e.output) # copy content of clone template dir back to app dir copy_tree(clone_template_dir, application_dir) print("Latest template changes have merged using git, " "please review changes for conflicts. ") print("Backup directory path: {}".format( os.path.abspath(orig_project_backup_dir))) os.chdir(application_dir)
def test_run_popen_shell(popen): """Popen call should succeed""" popen.return_value = 0 result = run_popen('ls /tmp', shell=True) assert result == 0