def _create(self): push_file = '.push.json' with open(push_file, 'r') as f: push_data = json.load(f) if sync_helpers.get_sync_spec() is not None: error_handling.throw_error( "Syncing spec has been already created for this app") app_run_id = push_data.get('app_run_id', "") job_name = '-'.join([self.config["name"], app_run_id]) namespace = self.config['namespace'] user_env = dict(os.environ, SYNC_SPEC=job_name, NAMESPACE=namespace, JOB_NAME=job_name) try: subprocess.check_output(["make", "sync-create"], env=user_env, stderr=subprocess.STDOUT) with open('.sync.json', 'w') as json_file: sync_data = {'sync_spec': job_name} json.dump(sync_data, json_file, indent=2) print("Syncing spec is created successfully") except subprocess.CalledProcessError as e: if "No rule to make target `sync-create'" in str(e.output): # TODO: when we have a template updating capability, add a # note recommending that he user update's their template to # get the sync create command print("This app does not support the `mlt sync create` " "command. No `sync-create` target was found in the " "Makefile.") else: print("Error while creating sync spec: {}".format(e.output))
def _get_events(self, filter_tag, namespace): """ Fetches events """ events_cmd = "kubectl get events --namespace {}".format(namespace) try: events = process_helpers.run_popen(events_cmd, shell=True) header_line = True header = events.stdout.readline() while True: output = events.stdout.readline().decode('utf-8') if output == '' and events.poll() is not None: error = events.stderr.readline() if error: raise Exception(error) break if output is not '' and filter_tag and filter_tag in output: if header_line: print(header.decode('utf-8')) header_line = False sys.stdout.write(output) sys.stdout.flush() if header_line: print("No events to display for this job") except Exception as ex: if 'command not found' in str(ex): error_msg = "Please install `{}`. " + \ "It is a prerequisite for `mlt events` " + \ "to work".format(str(ex).split()[1]) else: error_msg = "Exception: {}".format(ex) error_handling.throw_error(error_msg)
def _delete(self): sync_spec = sync_helpers.get_sync_spec() if sync_spec is None: error_handling.throw_error( "No syncing spec has been created for this app yet") user_env = dict(os.environ, SYNC_SPEC=sync_spec) try: subprocess.check_output(["make", "sync-delete"], env=user_env, stderr=subprocess.STDOUT) with open('.sync.json', 'r+') as json_file: sync_data = json.load(json_file) if 'sync_spec' in sync_data.keys(): del(sync_data['sync_spec']) json_file.seek(0) json.dump(sync_data, json_file, indent=2) json_file.truncate() print("Syncing spec is successfully deleted") except subprocess.CalledProcessError as e: if "No rule to make target `sync-delete'" in str(e.output): # TODO: when we have a template updating capability, add a # note recommending that he user update's their template to # get the sync delete command print("This app does not support the `mlt sync delete` " "command. No `sync-delete` target was found in the " "Makefile.") elif "{} does not exist. Did you mean something else".format( sync_spec) in str(e.output): print("No syncing spec has been created for this app yet") else: print("Error while deleting syncing spec: {}".format(e.output))
def _reload(self): sync_spec = sync_helpers.get_sync_spec() if sync_spec is None: error_handling.throw_error( "No syncing spec has been created for this app yet") user_env = dict(os.environ, SYNC_SPEC=sync_spec) try: subprocess.check_output(["make", "sync-reload"], env=user_env, stderr=subprocess.STDOUT) print("Sync agent is restarted") except subprocess.CalledProcessError as e: if "No rule to make target `sync-reload'" in str(e.output): # TODO: when we have a template updating capability, add a # note recommending that he user update's their template to # get the sync reload command print("This app does not support the `mlt sync reload` " "command. No `sync-reload` target was found in the " "Makefile.") elif "{} does not exist. Did you mean something else".format( sync_spec) in str(e.output): print("Syncing spec has not been created for this app yet") else: print("Error while reloading sync agent: {}".format(e.output))
def load_config(): """stores mlt.json data in self.config""" if os.path.isfile(constants.MLT_CONFIG): with open(constants.MLT_CONFIG) as f: return json.load(f) else: error_handling.throw_error("This command requires you to be in an " "`mlt init` built directory.")
def run(command, cwd=None, raise_on_failure=False): try: output = check_output(command, cwd=cwd).decode("utf-8") except CalledProcessError as e: if raise_on_failure: raise e error_handling.throw_error(e.output) return output
def _build(self): last_build_duration = files.fetch_action_arg( 'build', 'last_build_duration') schema.validate() started_build_time = time.time() container_name = "{}:{}".format(self.config['name'], uuid.uuid4()) print("Starting build {}".format(container_name)) template_parameters = config_helpers.\ get_template_parameters(self.config) params = "" for key, val in template_parameters.items(): params += "{}={} ".format(key.upper(), val) build_cmd = "CONTAINER_NAME={} {}make build".format( container_name, params) if self.args['--verbose']: build_process = process_helpers.run_popen(build_cmd, shell=True, stdout=True, stderr=True) else: build_process = process_helpers.run_popen(build_cmd, shell=True) with process_helpers.prevent_deadlock(build_process): progress_bar.duration_progress( 'Building {}'.format( self.config["name"]), last_build_duration, lambda: build_process.poll() is not None) if build_process.poll() != 0: # When we have an error, get the stdout and error output # and display them both with the error output in red. output, error_msg = build_process.communicate() if output: print(output.decode("utf-8")) if error_msg: error_handling.throw_error(error_msg.decode("utf-8"), 'red') built_time = time.time() # Write last container to file with open('.build.json', 'w') as f: f.write(json.dumps({ "last_container": container_name, "last_build_duration": built_time - started_build_time })) print("Built {}".format(container_name))
def action(self): """deletes current kubernetes namespace""" if sync_helpers.get_sync_spec() is not None: error_handling.throw_error( "This app is currently being synced, please run " "`mlt sync delete` to unsync first", "red") namespace = self.config['namespace'] jobs = files.get_deployed_jobs(job_names_only=True) if not jobs: error_handling.throw_error("This app has not been deployed yet.") else: if self.args.get('--job-name'): job_name = self.args['--job-name'] if job_name in jobs: self._undeploy_jobs(namespace, job_name) else: error_handling.throw_error( 'Job name {} not found in: {}'.format(job_name, jobs)) elif self.args.get('--all') or len(jobs) == 1: self._undeploy_jobs(namespace, jobs, all_jobs=True) else: error_handling.throw_error( "Multiple jobs are found under this application, " "please try `mlt undeploy --all` or specify a single " "job to undeploy using " "`mlt undeploy --job-name <job-name>`")
def _check_update_yaml_for_sync(self): # update k8s-template job spec, to keep the containers # running so that we can update code locally and have ksync # upload it to the running containers k8s_template_dir = os.path.join(self.app_name, "k8s-templates") if not os.path.exists(k8s_template_dir): return False k8s_template_specs = [] for filename in os.listdir(k8s_template_dir): if fnmatch.fnmatch(filename, '*.yaml'): k8s_template_specs.append( os.path.join(k8s_template_dir, filename)) return_val_list = [] for filename in k8s_template_specs: with open(filename, 'r+') as f: orig_filedata = f.readlines() # find matching begin and end commented sections for KSYNC and # exit with error if those sections are not properly matching begin_comment_indices = \ [i for i, x in enumerate(orig_filedata) if "### BEGIN KSYNC SECTION" in x] end_comment_indices = \ [i for i, x in enumerate(orig_filedata) if "### END KSYNC SECTION" in x] if len(begin_comment_indices) != len(end_comment_indices): error_handling.throw_error( "KSYNC comment section in file {} is " "malformed".format(filename), 'red') final_filedata = deepcopy(orig_filedata) # using matched begin and end pairs for KSYNC comments, create # new lines to be written to the file for (begin, end) in zip(begin_comment_indices, end_comment_indices): for k in range(begin, end + 1): final_filedata[k] = orig_filedata[k].replace( '# ', ' ') if final_filedata[k] != orig_filedata[k]: return_val_list.append(True) if True in return_val_list[:-1]: f.seek(0) for line in final_filedata: f.write(line) f.truncate() return True in return_val_list
def action(self): if not os.path.isfile('.stignore'): error_handling.throw_error( "This app is not initialized with '--enable-sync' option") push_file = '.push.json' if not os.path.isfile(push_file): error_handling.throw_error("This app has not been deployed yet") # Call the specified sub-command if self.args.get('create'): self._create() elif self.args.get('reload'): self._reload() else: self._delete()
def _poll_docker_proc(self): """used only in the case of non-verbose deploy mode to dump loading bar and any error that happened """ last_push_duration = files.fetch_action_arg( 'push', 'last_push_duration') with process_helpers.prevent_deadlock(self.push_process): progress_bar.duration_progress( 'Pushing {}'.format(self.config["name"]), last_push_duration, lambda: self.push_process.poll() is not None) # If the push fails, get stdout/stderr messages and display them # to the user, with the error message in red. if self.push_process.poll() != 0: push_stdout, push_error = self.push_process.communicate() print(push_stdout.decode("utf-8")) error_handling.throw_error(push_error.decode("utf-8"), 'red')
def _find_config(self, param_keys, add_if_not_found, key_not_found_error=""): """ Finds the specified parameter in the config. If add_if_not_found is True, then the parameter is added if it does not exist. Otherwise, throws an error. """ matched_config = self.config for n in param_keys[:-1]: if n in matched_config: matched_config = matched_config[n] else: if add_if_not_found: matched_config[n] = {} else: error_handling.throw_error(key_not_found_error) return matched_config
def _remove_config(self, param_keys): """ Finds, then removes the specified config parameter from the MLT config file. If the config parameter does not exist in the MLT config file, then an error is displayed. """ key_not_found = "Unable to find config '{}'. To see " \ "list of configs, use `mlt template_config list`.". \ format(self.args.get('<name>')) # Find the specified config, and display an error if it does not exist. matched_config = self._find_config(param_keys, add_if_not_found=False, key_not_found_error=key_not_found) # Delete the config parameter and then write it back to the config file if param_keys[-1] in matched_config: del matched_config[param_keys[-1]] else: error_handling.throw_error(key_not_found) config_helpers.update_config(self.config)
def _get_logs(prefix, since, namespace): """ Fetches logs using kubetail """ log_cmd = ["kubetail", prefix, "--since", since, "--namespace", namespace] try: logs = process_helpers.run_popen(log_cmd, stdout=True, stderr=subprocess.PIPE) output, error_msg = logs.communicate() if output: print(output) if error_msg: if 'command not found' in error_msg: error_msg = "Please install `{}`.".format( error_msg.split()[1]) + \ "It is a prerequisite for `mlt logs` to work" error_handling.throw_error(error_msg, 'red') except KeyboardInterrupt: sys.exit()
def get_only_one_job(job_desired, error_msg): """Checks if job desired is in the list of jobs available Throws error if more than 1 job found or job doesn't exist Function assumes error handling of if jobs exist happens elsewhere TODO: This could be more efficient If there are no jobs deployed or job desired not found, we'll print an error message and exit. print + exit(1) is nicer for the user than a traceback via ValueError so we'll go with the former job_desired: `--job-name` parameter string passed to us error_msg: What to print if > 1 job or job not found """ jobs = get_deployed_jobs(job_names_only=True) # too many jobs exist with no --job-name flag if len(jobs) > 1 and not job_desired: error_handling.throw_error('{}\nJobs to choose from are:\n{}'.format( error_msg, '\n'.join(jobs))) elif job_desired: # --job-name was passed in to us if job_desired in jobs: return get_truncated_job_name(job_desired) else: error_handling.throw_error( "Job {} not found.\nJobs to choose from are:\n{}".format( job_desired, '\n'.join(jobs))) elif jobs: # no --job-name flag passed and only 1 job exists return get_truncated_job_name(jobs[0]) else: error_handling.throw_error("No jobs are deployed.")
def action(self): if not os.path.isfile('.push.json'): error_handling.throw_error("This app has not been deployed yet") namespace = self.config['namespace'] jobs = files.get_deployed_jobs()[:self.args["<count>"]] # display status for only `--count` amount of jobs for job in jobs: job_name = job.replace('k8s/', '') print('Job: {} -- Creation Time: {}'.format( # replacing tzinfo with UTC to print `+0000` so users know # output is in utc # TODO: better way to print this? job_name, datetime.utcfromtimestamp( int(os.path.getmtime(job))).replace( tzinfo=timezone('UTC')))) self._display_status(job_name, namespace) # TODO: something more fancy to separate different statuses? print('') if len(jobs) == 0: print("No jobs are deployed.")
def run_popen(command, shell=False, stdout=PIPE, stderr=PIPE, cwd=None, preexec_fn=None): """to suppress output, pass False to stdout or stderr None is a valid option that we want to allow""" with open(os.devnull, 'w') as quiet: stdout = quiet if stdout is False else stdout stderr = quiet if stderr is False else stderr if not (isinstance(command, str) or isinstance(command, list)): error_handling.throw_error( "The following command is invalid:\n{}".format(command)) try: return Popen(command, stdout=stdout, stderr=stderr, shell=shell, cwd=cwd, preexec_fn=preexec_fn) except CalledProcessError as e: error_handling.throw_error(e.output)
def _undeploy_jobs(self, namespace, jobs, all_jobs=False): """undeploy the jobs passed to us jobs: 1 or more jobs to undeploy NOTE: right now there's no case in which some template has both custom and not custom jobs because we check for custom job by if there's a Makefile in the top level of the project """ # simplify logic by `looping` over all jobs even if there's just 1 if not isinstance(jobs, list): jobs = [jobs] # custom jobs require looping over all of them and calling # `make undeploy` on each job recursive_delete = False if files.is_custom('undeploy:') else True if recursive_delete: folder_to_delete = 'k8s' if not all_jobs: # only way all_jobs won't be false is if there's # a --job-name flag passed or there's only 1 job to undeploy if len(jobs) != 1: error_handling.throw_error( "There should be only 1 job to undeploy, " "something went wrong. Please file a bug on " "https://github.com/IntelAI/mlt") folder_to_delete = os.path.join(folder_to_delete, jobs[0]) process_helpers.run([ "kubectl", "--namespace", namespace, "delete", "-f", folder_to_delete, "--recursive" ], raise_on_failure=True) # TODO: have this not be in a loop for job in jobs: self.remove_job_dir(os.path.join('k8s', job)) else: for job in jobs: self._custom_undeploy(job, namespace) self.remove_job_dir(os.path.join('k8s', job))
def _display_status(self, job, namespace): """detects what kind of job was deployed and calls the correct status display function """ status_options = { "job": self._generic_status, "tfjob": self._crd_status, "pytorchjob": self._crd_status, # experiments have yaml templates but also a bash script to call "experiment": self._custom_status } # if we have more than 1 k8 object created and types don't match # go with a custom job type since we won't know what kubectl call # to make to get status from everything # also, if `status:` in Makefile we'll assume it's custom always job_types, all_same_job_type = files.get_job_kinds() if (job_types and not all_same_job_type) or files.is_custom('status:'): job_types = "custom" elif job_types: job_types = job_types.pop() try: status_options.get(job_types, self._custom_status)( job, namespace, job_types) except subprocess.CalledProcessError as e: if "No rule to make target `status'" in str(e.output): # TODO: when we have a template updating capability, add a # note recommending that he user update's their template to # get the status command error_msg = "This app does not support the `mlt status` " + \ "command. No `status` target was found in the Makefile." else: error_msg = "Error while getting app status: {}".format( e.output) error_handling.throw_error(error_msg)
def action(self): """Creates a new git repository based on an mlt template in the current working directory. """ template_name = self.args["--template"] template_repo = self.args["--template-repo"] skip_crd_check = self.args["--skip-crd-check"] with git_helpers.clone_repo(template_repo) as temp_clone: templates_directory = os.path.join(temp_clone, constants.TEMPLATES_DIR, template_name) try: # The template configs get pulled into the mlt.json file, so # don't grab a copy of that in this app's directory copytree(templates_directory, self.app_name, ignore=ignore_patterns(constants.TEMPLATE_CONFIG)) # Get the template configs from the template and include them # when building the mlt json file param_file = os.path.join(templates_directory, constants.TEMPLATE_CONFIG) template_params = config_helpers.\ get_template_parameters_from_file(param_file) template_git_sha = git_helpers.get_latest_sha( os.path.join(temp_clone, constants.TEMPLATES_DIR, template_name)) if not skip_crd_check: kubernetes_helpers.check_crds(app_name=self.app_name) if self.args["--enable-sync"]: if localhost_helpers.binary_path('ksync'): # Syncthing uses '.stignore' to ignore files during # sync we also don't want to upload unneeded local data app_ignore_file = os.path.join(self.app_name, ".gitignore") ksync_ignore_file = os.path.join( self.app_name, ".stignore") if self._check_update_yaml_for_sync(): copyfile(app_ignore_file, ksync_ignore_file) with open(ksync_ignore_file, 'a+') as f: f.write("\n.git/**") else: error_handling.throw_error( "This app doesn't support syncing", 'yellow') else: error_handling.throw_error( "ksync is not installed on localhost.", 'red') data = self._build_mlt_json(template_params, template_git_sha) # If the app has option for debugging failures, grab the # Kubernetes debug wrapper file and put it in the app directory if any(param["name"] == "debug_on_fail" for param in template_params): self._enable_debug_on_fail(temp_clone) with open(os.path.join(self.app_name, constants.MLT_CONFIG), 'w') as f: json.dump(data, f, indent=2) self._init_git_repo() except OSError as exc: if exc.errno == 17: error_msg = "Directory '{}' already exists: ".format( self.app_name) + \ "delete before trying to initialize new application" color = 'red' else: error_msg = traceback.format_exc() color = None error_handling.throw_error(error_msg, color)
def action(self): """Update the template instance with new template version if template is updated """ if "template_name" not in self.config or \ "template_git_sha" not in self.config: print("ERROR: mlt.json does not have either template_name " "or template_git_sha. Template update is not possible.") return app_name = self.config["name"] template_name = self.config["template_name"] current_template_git_sha = self.config["template_git_sha"] orig_project_backup_dir = self._get_backup_dir_name(app_name) with git_helpers.clone_repo(self.template_repo) as temp_clone: application_dir = os.getcwd() clone_template_dir = os.path.join(temp_clone, constants.TEMPLATES_DIR, template_name) if not os.path.exists(clone_template_dir): print("Unable to update, template {} does " "not exist in MLT git repo.".format(template_name)) return latest_template_git_sha = \ git_helpers.get_latest_sha(clone_template_dir) if current_template_git_sha == latest_template_git_sha: print("Template is up to date, no need for update.") else: print("Template is not up to date, updating template...") copy_tree(application_dir, orig_project_backup_dir) os.chdir(temp_clone) # create temp-branch using git sha from which template # was initiated and clean un-tracked files cmd = "git checkout -f {} -b temp-branch && git clean -f .". \ format(current_template_git_sha) process_helpers.run_popen(cmd, shell=True) # copy app dir content to temp clone template dir copy_tree(application_dir, clone_template_dir) # if there are any uncommitted changes to temp-branch, # commit them otherwise 'pull' from master will fail. output = process_helpers.run("git status".split(" ")) if "Your branch is up-to-date" not in output: process_helpers.run("git add --all ".split(" ")) commit_command = "git commit --message 'temp-commit'" process_helpers.run(commit_command.split(" ")) # merging latest template changes by pulling from master # into temp-branch try: process_helpers.run("git pull origin master".split(" "), raise_on_failure=True) except CalledProcessError as e: # When auto merge failed do not error out, # let user review and fix conflicts # for other errors exit error_string = "Automatic merge failed; " \ "fix conflicts and then commit the result" if error_string not in e.output: error_handling.throw_error(e.output) # copy content of clone template dir back to app dir copy_tree(clone_template_dir, application_dir) print("Latest template changes have merged using git, " "please review changes for conflicts. ") print("Backup directory path: {}".format( os.path.abspath(orig_project_backup_dir))) os.chdir(application_dir)