Example #1
0
    def decorator_hidden(func):
        @functools.wraps(func)
        def wrapper_hidden(*args, **kwargs):
            return func(*args, **kwargs)

        if debug_decorators:
            console.print("hidden decorator called, name=", name, ", func=",
                          func.__name__)

        global current_cmd_info
        if not current_cmd_info:
            errors.internal_error(
                "@hidden decorators must be followed by a single @command decorator"
            )

        # a hidden is really just a hidden option
        type_name = type if isinstance(type, str) else type.__name__
        option_info = {
            "name": name,
            "hidden": True,
            "type": type_name,
            "default": default,
            "help": help
        }

        #current_cmd_info["hiddens"].append(hidden_info)
        update_or_insert_argument(current_cmd_info, "options", option_info)

        return wrapper_hidden
Example #2
0
    def decorator_keyword_arg(func):
        @functools.wraps(func)
        def wrapper_keyword_arg(*args, **kwargs):
            return func(*args, **kwargs)

        if debug_decorators:
            console.print("keyword_arg decorator called, name=", name,
                          ", func=", func.__name__)

        global current_cmd_info
        if not current_cmd_info:
            errors.internal_error(
                "@keyword_arg decorators must be followed by a single @command decorator"
            )

        type_name = type if isinstance(type, str) else type.__name__
        arg_info = {
            "name": name,
            "keywords": keywords,
            "required": required,
            "type": type_name,
            "help": help,
            "default": default
        }

        #current_cmd_info["keyword_args"].insert(0, arg_info)
        update_or_insert_argument(current_cmd_info, "arguments", arg_info)

        return wrapper_keyword_arg
Example #3
0
    def report_on_runs(self, runs_by_exper, stage, max_items=None):
        runs_reported = 0

        console.print("target={} runs: {}".format(self.compute, stage))

        exper_names = list(runs_by_exper.keys())
        exper_names.sort()

        for exper_name in exper_names:
            runs = runs_by_exper[exper_name]

            # filter runs for this stage
            runs = [run for run in runs if self.match_stage(run, stage)]
            if runs:
                console.print("\nruns for experiment {}:".format(exper_name))

                columns = [
                    "xt_run_name", "status", "id", "number", "PORTAL_URL"
                ]
                lb = ReportBuilder(self.config, self.store, client=None)

                if max_items and len(runs) > max_items:
                    runs = runs[:max_items]

                text, rows = lb.build_formatted_table(runs,
                                                      columns,
                                                      max_col_width=100)
                console.print(text)
                runs_reported += len(runs)

        if runs_reported:
            console.print("total runs {}: {}".format(runs_reported, stage))
        else:
            console.print("  no {} runs found\n".format(stage))
    def _restart_runs_for_node(self):
        '''
        non-atomic update of all active runs for this node: set to constants.WAITING
        '''
        elem_dict = {
            "node_id": self.node_id,
            "status": {
                "$in": [constants.STARTED, constants.RESTARTED]
            }
        }
        fd = {"_id": self.job_id, "active_runs": {"$elemMatch": elem_dict}}

        while True:
            # this will only update a single array entry at a time (using mongo 3.2)
            cmd = lambda: self.mongo.mongo_db["__jobs__"].find_and_modify(
                fd,
                update={"$set": {
                    "active_runs.$.status": constants.WAITING
                }},
                new=True)
            dd = self.mongo.mongo_with_retries("_restart_runs_for_node", cmd)

            if not dd:
                break
            console.print("_restart_runs_for_node: found a run on node=" +
                          self.node_id)
Example #5
0
    def decorator_clone(func):
        @functools.wraps(func)
        def wrapper_clone(*args, **kwargs):
            return func(*args, **kwargs)

        if debug_decorators:
            console.print("clone decorator called, source=", source, ", func=",
                          func.__name__)

        global current_cmd_info, root_cmd_info

        if not current_cmd_info:
            errors.internal_error(
                "@clone decorators must be followed by a single @command or @root decorator"
            )

        source_cmd_info = get_command_by_words(source.split("_"))

        if arguments:
            current_cmd_info["arguments"] += source_cmd_info["arguments"]

        if options:
            current_cmd_info["options"] += source_cmd_info["options"]

        return wrapper_clone
    def root(self, name, value):
        #console.print("setting root name={} to value={}".format(name, value))
        if name == "help":
            pass  # will be handled later

        elif name == "console":
            console.set_level(value)

        elif name == "stack-trace":
            utils.show_stack_trace = value

        elif name == "new":
            if value and process_utils.can_create_console_window():
                cmd = qfe.current_dispatcher.dispatch_cmd
                echo_cmd = "xt " + cmd.replace("--new", "--echo", 1)

                process_utils.run_cmd_in_new_console(echo_cmd)
                errors.early_exit_without_error()

        elif name == "echo":
            if value:
                cmd = qfe.current_dispatcher.dispatch_cmd
                console.print("xt " + cmd, flush=True)

        elif name == "quick-start":
            pass  # was already handled

        elif name == "prep":
            self.prep_machine_for_controller()

        else:
            errors.syntax_error("unrecognized root flag=" + name)
Example #7
0
    def decorator_flag(func):
        @functools.wraps(func)
        def wrapper_flag(*args, **kwargs):
            return func(*args, **kwargs)

        if debug_decorators:
            console.print("flag decorator called, name=", name, ", func=",
                          func.__name__)

        global current_cmd_info, root_cmd_info

        # a flag is really just a type=flag option
        option_info = {
            "name": name,
            "hidden": False,
            "type": "flag",
            "multiple": False,
            "default": default,
            "help": help
        }
        if not current_cmd_info:
            errors.internal_error(
                "@flag decorators must be followed by a single @command or @root decorator"
            )

        update_or_insert_argument(current_cmd_info, "options", option_info)

        return wrapper_flag
Example #8
0
    def decorator_option(func):
        @functools.wraps(func)
        def wrapper_option(*args, **kwargs):
            return func(*args, **kwargs)

        if debug_decorators:
            console.print("option decorator called, name=", name, ", func=",
                          func.__name__)

        global current_cmd_info
        if not current_cmd_info:
            errors.internal_error(
                "@option decorators must be followed by a single @command decorator"
            )

        type_name = type if isinstance(type, str) else type.__name__
        option_info = {
            "name": name,
            "hidden": False,
            "required": required,
            "type": type_name,
            "multiple": multiple,
            "default": default,
            "values": values,
            "help": help
        }

        #current_cmd_info["options"].append(option_info)
        update_or_insert_argument(current_cmd_info, "options", option_info)

        return wrapper_option
Example #9
0
    def decorator_command(func):
        @functools.wraps(func)
        def wrapper_command(*args, **kwargs):
            return func(*args, **kwargs)

        # begin actual decorater processing
        global first_command
        if first_command:
            first_command = False
            #   console.diag("processing first cmd decorator")
            #console.print("first command...")

        if name:
            cmd_name = name
        else:
            cmd_name = func.__name__.replace("_", " ")

        if debug_decorators:
            console.print("command decorator called, func=", func.__name__)
        dd = commands

        for name_part in cmd_name.split(" "):
            if name_part not in dd:
                dd[name_part] = {}
            dd = dd[name_part]

        cmd_info = {
            "name": cmd_name,
            "options_before_args": options_before_args,
            "keyword_optional": keyword_optional,
            "pass_by_args": pass_by_args,
            "group": group,
            "func": func,
            "arguments": [],
            "options": [],
            "examples": [],
            "faqs": [],
            "hidden": False,
            "see_alsos": [],
            "kwgroup": kwgroup,
            "kwhelp": kwhelp,
            "help": help
        }

        dd[""] = cmd_info

        if keyword_optional:
            # only 1 command can use this
            if "" in commands:
                errors.internal_error(
                    "processing command decoration for '{}'; only 1 command can use 'keyword_optional'"
                    .format(func.__name__))
            commands[""] = cmd_info

        global current_cmd_info
        current_cmd_info = cmd_info
        # end actual decorater processing

        return wrapper_command
Example #10
0
    def syntax_error(self, msg):
        console.print(msg)
        self.show_current_command_syntax()

        if self.raise_syntax_exception:
            errors.syntax_error("syntax error")

        errors.syntax_error_exit()
Example #11
0
 def warning(self, *msg_args):
     msg = "WARNING: xt_config file -"
     for arg in msg_args:
         msg += " " + str(arg)
     if self.get("internal", "raise", suppress_warning=True):
         errors.config_error(msg)
     else:
         console.print(msg)
Example #12
0
def debug_break():
    import ptvsd

    # 5678 is the default attach port in the VS Code debug configurations
    console.print("Waiting for debugger attach")
    ptvsd.enable_attach(address=('localhost', 5678), redirect_output=True)
    ptvsd.wait_for_attach()
    breakpoint()
Example #13
0
    def rerun(self, run_name, workspace, response):
        # NOTE: validate_run_name() call must be AFTER we call process_named_options()
        run_name, workspace = run_helper.parse_run_name(workspace, run_name)

        # extract "prompt" and "args" from cmdline
        cmdline, xt_cmdline, box_name, parent_name, node_index = self.get_info_for_run(
            workspace, run_name)

        #console.print("cmdline=", cmdline)
        prompt = ""

        if xt_cmdline:
            args = "  " + xt_cmdline
        else:
            # legacy run; just use subset of xt cmd
            args = "  xt " + cmdline

        console.print("edit/accept xt cmd for {}/{}".format(
            workspace, run_name))
        if response:
            # allow user to supplement the cmd with automation
            if "$cmd" in response:
                response = response.replace("$cmd", args)
            console.print(response)
        else:
            response = pc_utils.input_with_default(prompt, args)

        # keep RERUN cmd simple by reusing parse_python_or_run_cmd()
        full_cmd = response.strip()
        #console.print("  new_cmd=" + full_cmd)
        if not full_cmd.startswith("xt "):
            errors.syntax_error(
                "command must start with 'xt ': {}".format(full_cmd))

        # this temp dir cannot be removed immediately after job is submitted (TBD why)
        tmp_dir = file_utils.make_tmp_dir("rerun_cmd")
        job_id = self.store.get_job_id_of_run(workspace, run_name)
        capture.download_before_files(self.store,
                                      job_id,
                                      workspace,
                                      run_name,
                                      tmp_dir,
                                      silent=True,
                                      log_events=False)

        # move to tmp_dir so files get captured correctly
        prev_cwd = os.getcwd()
        os.chdir(tmp_dir)

        try:
            # recursive invoke of QFE parser to parse command (orginal + user additions)
            args = full_cmd.split(" ")
            args = args[1:]  # drop the "xt" at beginning
            inner_dispatch(args, is_rerun=True)
        finally:
            # change back to original dir
            os.chdir(prev_cwd)
Example #14
0
 def print_cancel_all_results(self, cancel_results_by_boxes):
     for target, results in cancel_results_by_boxes.items():
         console.print("Target: {}".format(target))
         for result in results:
             console.print(
                 "canceled: {}, service_status: {}, simple_status: {}".
                 format(result.get("cancelled"),
                        result.get("service_status"),
                        result.get("simple_status")))
    def zip(self, files, zipfile):
        filenames = file_helper.get_filenames_from_include_lists(
            [files], [".git", "__pycache__"], recursive=True)
        count = len(filenames)
        source_dir = os.path.dirname(files)
        remove_prefix_len = 1 + len(source_dir)

        file_helper.zip_up_filenames(zipfile, filenames, True,
                                     remove_prefix_len)
        console.print("{:,} files written to: {}".format(count, zipfile))
Example #16
0
def get_merged_config(create_if_needed=True,
                      local_overrides_path=None,
                      suppress_warning=False,
                      mini=False):

    fn_default = get_default_config_path()
    config = load_and_validate_config(fn_default, validate_as_default=True)

    # apply local override file, if present
    fn_overrides = local_overrides_path if local_overrides_path else constants.FN_CONFIG_FILE
    fn_overrides = os.path.realpath(fn_overrides)

    sc = os.getenv("XT_STORE_CREDS")
    mc = os.getenv("XT_MONGO_CONN_STR")
    if sc and mc:
        # we are running on compute node (launched by script)
        console.print(
            "XT: detected run on compute node; setting mini_mode=False")
        config.mini_mode = False
    else:
        # get mini_mode value from default config (modified further below)
        config.mini_mode = not config.get("general", "advanced-mode")
        if config.mini_mode:
            suppress_warning = True

    if os.path.exists(fn_overrides):
        overrides = load_and_validate_config(fn_overrides,
                                             validate_as_default=False)

        if not overrides.data:
            console.warning("local xt_config.yaml file contains no properties")
        else:
            # allow overrides to override the mini_mode flag
            if not (sc and mc):
                config.mini_mode = not overrides.get(
                    "general", "advanced-mode", suppress_warning=True)

            # hardcoded MINI options (can be overwritten by local confile file)
            if config.mini_mode:
                # single workspace
                config.data["general"]["workspace"] = "txt"

                # single target
                config.data["xt-services"]["target"] = "batch"

            # merge the overrides config with the default config
            merge_configs(config, overrides)

    else:
        if not suppress_warning:
            console.print("warning: no local config file found")

    console.detail("after loading/validation of merged config files")

    return config
Example #17
0
    def remove_cache(self, ws_name):
        if self.run_cache_dir:
            # remove appropriate node of run_cache_dir
            cache_fn = os.path.expanduser(
                self.run_cache_dir) + "/" + constants.RUN_SUMMARY_CACHE_FN
            cache_fn = cache_fn.replace("$ws", ws_name)
            cache_dir = os.path.dirname(cache_fn)

            if os.path.exists(cache_dir):
                console.print("  zapping cache_dir=", cache_dir)
                file_utils.zap_dir(cache_dir)
Example #18
0
 def load_runs(self, all_run_records, plot_x_metric_name,
               plot_y_metric_name, hist_x_metric_name):
     self.runs = []
     for record in all_run_records:
         run = Run(record, plot_x_metric_name, plot_y_metric_name,
                   hist_x_metric_name)
         if len(run.metric_reports) == 0:  # Exclude parent runs.
             continue
         self.runs.append(run)
         if MAX_NUM_RUNS > 0:
             if len(self.runs) == MAX_NUM_RUNS:
                 break
     console.print("{} runs downloaded".format(len(self.runs)))
Example #19
0
    def monitor_with_jupyter(self, workspace, run_name):
        if not self.is_aml_ws(workspace):
            errors.combo_error(
                "the monitor command is only supported for Azure ML runs")

        run_name, actual_ws = run_helper.parse_run_name(workspace, run_name)

        fn = self.azure_ml.make_monitor_notebook(actual_ws, run_name)
        dir = os.path.dirname(fn)
        #console.print("jupyter notebook written to: " + fn)
        monitor_cmd = "jupyter notebook --notebook-dir=" + dir
        console.print("monitoring notebook created; to run:")
        console.print("  " + monitor_cmd)
    def addr(self, box):
        box_name = box

        info = box_information.get_box_addr(self.config, box_name, self.store)
        box_addr = info["box_addr"]
        controller_port = info["controller_port"]
        tb_port = info["tensorboard_port"]

        if controller_port:
            console.print(
                "{} address: {}, controller port={}, tensorboard port".format(
                    box_name, box_addr, controller_port, tb_port))
        else:
            console.print("{} address: {}".format(box_name, box_addr))
Example #21
0
    def collect_logs(self, workspace, run_names, log_path):

        run_names, actual_ws = run_helper.parse_run_list(
            self.store, workspace, run_names)
        if len(run_names) == 0:
            self.store_error("No matching runs found")

        grok_server = None  # self.config.get("logging", "grok-server")

        count = 0
        for run_name in run_names:
            count += self.core.collect_logs_for_run(actual_ws, run_name,
                                                    log_path, grok_server)

        console.print("{} log file collected to grok server: {}".format(
            count, grok_server))
Example #22
0
    def cancel_runs_by_property(self, prop_name, prop_value, box_name):
        cancel_results = None

        try:
            # connect to specified box
            if self.client.change_box(box_name):
                cancel_results = self.client.cancel_runs_by_property(
                    prop_name, prop_value)
            else:
                console.print(
                    "couldn't connect to controller for {}".format(box_name))
        except BaseException as ex:
            errors.report_exception(ex)
            pass

        return cancel_results
    def docker_login(self, target, docker):
        reg_creds = self.get_registry_creds(target, docker)
        if not reg_creds:
            if docker:
                errors.env_error(
                    "no dockers entry defined for docker '{}'".format(docker))
            else:
                errors.env_error(
                    "no docker property defined for target '{}'".format(
                        target))

        server = reg_creds["login-server"]
        username = reg_creds["username"]
        password = reg_creds["password"]

        text = self.core.docker_login(server, username, password)
        console.print(text)
Example #24
0
    def mongo_with_retries(self, name, mongo_cmd, ignore_error=False):
        retry_count = 25
        result = None
        import pymongo.errors

        for i in range(retry_count):
            try:
                result = mongo_cmd()
                break
            # watch out for these exceptions: AutoReconnect, OperationFailure (and ???)
            except BaseException as ex:  # pymongo.errors.OperationFailure as ex:

                # since we cannot config logger to supress stderr, don't log this
                #logger.exception("Error in mongo_with_retries, ex={}".format(ex))

                # pymongo.errors.OperationFailure: Message: {"Errors":["Request rate is large"]}
                if ignore_error:
                    console.print(
                        "ignoring mongo-db error: name={}, ex={}".format(
                            name, ex))
                    break

                if i == retry_count - 1:
                    # we couldn't recover - signal a hard error/failure
                    raise ex

                # we get hit hard on the "Request rate is large" errors when running
                # large jobs (500 simultaneous runs), so beef up the backoff times to
                # [1,61] so we don't die with a hard failure here
                if i == 0:
                    backoff = 1 + 10 * np.random.random()
                    self.retry_errors += 1
                else:
                    backoff = 1 + 60 * np.random.random()

                ex_code = ex.code if hasattr(ex, "code") else ""
                ex_msg = str(ex)[0:60] + "..."

                console.print(
                    "retrying mongo-db: name={}, retry={}/{}, backoff={:.2f}, ex.code={}, ex.msg={}"
                    .format(name, i + 1, retry_count, backoff, ex_code,
                            ex_msg))

                time.sleep(backoff)

        return result
    def create_demo(self, destination, response, overwrite):
        '''
        This command will removed the specified destination directory if it exists (prompting the user for approval).
        Specifying the current directory as the destination will produce an error.
        '''

        # set up from_dir
        from_dir = file_utils.get_xtlib_dir() + "/demo_files"

        # set up dest_dir
        dest_dir = destination
        if not dest_dir:
            errors.syntax_error("An output directory must be specified")

        create = True
        console.print("creating demo files at: {}".format(
            os.path.abspath(dest_dir)))

        if os.path.exists(dest_dir):
            answer = pc_utils.input_response(
                "'{}' already exists; OK to delete? (y/n): ".format(dest_dir),
                response)
            if answer != "y":
                create = False

        if create:
            file_utils.ensure_dir_deleted(dest_dir)

            shutil.copytree(from_dir, dest_dir)
            #file_utils.copy_tree(from_dir, dest_dir)

            if not self.store.does_workspace_exist("xt-demo"):
                # import xt-demo workspace from archive file
                console.print(
                    "importing xt-demo workspace (usually takes about 30 seconds)"
                )
                impl_storage_api = ImplStorageApi(self.config, self.store)

                fn_archive = os.path.join(file_utils.get_xtlib_dir(),
                                          "demo_files", "xt-demo-archive.zip")
                impl_storage_api.import_workspace(fn_archive,
                                                  "xt-demo",
                                                  "xtd",
                                                  overwrite=overwrite,
                                                  show_output=False)
Example #26
0
    def parse_string_list(self, tok, scanner, pipe_objects_enabled=True):
        global pipe_object_list
        #print("parse_string_list, tok=", tok)

        if not tok:
            # empty string specified
            value = []
            tok = scanner.scan()  # skip over the empty string
        elif tok == "$":
            if pipe_objects_enabled:
                global pipe_object_list
                pipe_object_list = get_xt_objects_from_cmd_piping()
                console.diag("pipe_object_list: {}".format(pipe_object_list))

            if pipe_objects_enabled and pipe_object_list:
                #print("found '*', pipe_object_list=", pipe_object_list)
                value = pipe_object_list
                console.print("replacing '$' with: ", value)
            else:
                errors.combo_error(
                    "'$' can only be used for piping the output of a previous XT command into this run"
                )

            # mark pipe objects as having been consumed by this parsing
            pipe_object_list = None

            tok = scanner.scan()  # skip over the $
        else:
            # scan a comma separated list of tokens (some of which can be single quoted strings)
            value = []

            while tok != None:
                if tok.startswith("--"):
                    break

                ev = self.expand_system_values(tok)
                value.append(ev)

                tok = scanner.scan()
                if tok != ",":
                    break

                tok = scanner.scan()  # skip over the comma

        return value, tok
Example #27
0
    def download_runs(self, store, ws_name, run_group_name, run_group_type,
                      hp_config_cloud_path, hp_config_local_dir):
        # Download the all_runs file
        local_cache_path = "{}/{}/{}/".format(hp_config_local_dir, ws_name,
                                              run_group_type)
        local_config_file_path = "{}{}".format(local_cache_path,
                                               "hp-config.yaml")

        if run_group_name == "experiment":
            console.print(
                "downloading runs for EXPERIMENT={}...".format(run_group_type))
            # files are at EXPERIMENT LEVEL
            # read SWEEPS file
            if not store.does_experiment_file_exist(ws_name, run_group_type,
                                                    hp_config_cloud_path):
                errors.store_error(
                    "missing experiment hp_config file (ws={}, exper={}, fn={})"
                    .format(ws_name, run_group_type, hp_config_cloud_path))
            store.download_file_from_experiment(ws_name, run_group_type,
                                                hp_config_cloud_path,
                                                local_config_file_path)

            # read ALLRUNS info aggregated in EXPERIMENT
            allrun_records = store.get_all_runs(run_group_name, ws_name,
                                                run_group_type)
        else:
            console.print(
                "downloading runs for JOB={}...".format(run_group_type))
            # files are at JOB LEVEL
            # read SWEEPS file
            if not store.does_job_file_exist(run_group_type,
                                             hp_config_cloud_path):
                errors.store_error(
                    "missing job hp_config file (job={}, fn={})".format(
                        run_group_type, hp_config_cloud_path))
            store.download_file_from_job(run_group_type, hp_config_cloud_path,
                                         local_config_file_path)

            # read ALLRUNS info aggregated in JOB
            allrun_records = store.get_all_runs(run_group_name, ws_name,
                                                run_group_type)

        console.diag("after downloading all runs")
        return local_config_file_path, allrun_records
Example #28
0
    def restart_controller(self, job_id, node_index, delay):

        result = None

        # get the connection string for the job/node
        cs_plus = job_helper.get_client_cs(self.core, job_id, node_index)
        cs = cs_plus["cs"]
        box_secret = cs_plus["box_secret"]

        with XTClient(self.config, cs, box_secret) as xtc:
            if xtc.connect():
                result = xtc.restart_controller(delay)

        if result:
            console.print("controller restarted")
        else:
            console.print(
                "could not connect to controller: ip={}, port={}".format(
                    cs["ip"], cs["port"]))
Example #29
0
    def import_workspace(self,
                         input_file,
                         new_workspace,
                         job_prefix,
                         overwrite,
                         show_output=True):
        if not job_prefix:
            errors.combo_error("job prefix cannot be blank")

        with tempfile.TemporaryDirectory(prefix="import-") as temp_dir:
            self.import_workspace_core(temp_dir,
                                       input_file,
                                       new_workspace,
                                       job_prefix,
                                       overwrite,
                                       show_output=show_output)

        if show_output:
            console.print("  import completed")
Example #30
0
    def run(self, timeout=None):
        if len(self.runs) == 0:
            console.print("error - no valid runs found")
            return

        if timeout:
            # build a thread to close our plot window after specified time
            from threading import Thread

            def set_timer(timeout):
                console.print("set_timer called: timeout=", timeout)
                time.sleep(timeout)
                console.diag("timer triggered!")
                plt.close("all")

            thread = Thread(target=set_timer, args=[timeout])
            thread.daemon = True  # mark as background thread
            thread.start()

        plt.show()