Example #1
0
    def __init__(self, env):
        self.env = env
        self.fns = OrderedDict()
        for model_fn in Command.find_functions(env.model):
            cmd = Command.discover(model_fn)
            if cmd.kind == 'predict':
                self.fns[cmd.name] = (cmd, model_fn)

        if not len(self.fns):
            raise VergeMLError(f"@{env.AI} can't be run as a REST service.")
Example #2
0
def main(argv=None, plugins=PLUGINS):
    if argv is None:
        argv = sys.argv[1:]
    _configure_logging()
    try:
        run(argv, plugins=plugins)

    except VergeMLError as e:
        # NOTE- when the error is encountered before the environment is created, it will be empty.
        from vergeml.env import ENV
        # in case there is an error with the config file, but the user just says 'ml help <topic>'
        # where topic is the topic suggested by VergeML, try to display the help message
        # instead of the error the user is experiencing
        if ["help", e.help_topic] == argv:
            help = HelpCommand('help')
            from vergeml.env import ENV
            print(help.get_help(ENV, e.help_topic))
        else:
            # display the error. Can't use logging.error because for an unknown reason pytest does not
            # capture stderr when using logging, so fall back to print
            err_string = str(e).strip()
            print("Error! " + err_string, file=sys.stderr)
            # find all command topics
            commands = list(plugins.keys('vergeml.cmd'))
            if ENV and ENV.model_plugin:
                fns = Command.find_functions(ENV.model_plugin)
                mcommands = list(map(lambda f: Command.discover(f).name, fns))
                commands.extend(mcommands)
            # if the error is just one line and there is command help available, display the help message too.
            if e.help_topic and len(err_string.splitlines(
            )) == 1 and e.help_topic in plugins.keys('vergeml.cmd'):
                print("")
                help = HelpCommand('help')
                help_topic = "" if e.help_topic == "*help*" else e.help_topic
                print(help.get_help(ENV, help_topic, short=True))
            # else just hint at the help topic
            elif e.help_topic:
                print("", file=sys.stderr)
                help_topic = "" if e.help_topic == "*help*" else " " + e.help_topic
                print(f"See 'ml help" + help_topic + "'.", file=sys.stderr)

    except Exception as err:  # pylint: disable=W0703
        if err.__class__.__name__ == 'ResourceExhaustedError':
            print("Error! Your GPU ran out of memory.")
            print(
                "Try lowering resource usage by decreasing model parameters such as batch size."
            )
        else:
            raise err
Example #3
0
    def format_general_help(self, env=None, short=False):
        buffer = io.StringIO()
        print(USAGE, file=buffer)
        print("", file=buffer)

        terms = set(glossary.LONG_DESCR.keys())
        terms.update(self.plugins.keys('vergeml.cmd'))
        rng = random.Random()
        rng.seed(datetime.datetime.now())
        random_term = rng.choice(list(terms))

        if env and env.model:
            print(f"Current Model: {env.get('model')}", file=buffer)
            print("", file=buffer)

        print("General Help:", file=buffer)
        for topic, descr in _GENERAL_HELP:
            print("  {:<16} {}".format(topic, descr), file=buffer)
        print("", file=buffer)

        print("Commands:", file=buffer)
        for cmd_name in self.plugins.keys('vergeml.cmd'):
            descr = Command.discover(self.plugins.get('vergeml.cmd',
                                                      cmd_name)).descr
            print("  {:<16} {}".format(cmd_name, descr), file=buffer)
        print("", file=buffer)

        if env and env.model:
            print("Model Commands:", file=buffer)
            for fn in Command.find_functions(env.model):
                cmd = Command.discover(fn)
                print("  {:<16} {}".format(cmd.name, cmd.descr), file=buffer)
            print("", file=buffer)

        if not short:
            print(
                "See 'ml help <command>' or 'ml help <topic>' to read about a specific subcommand or topic.",
                file=buffer)
            print(f"For example, try 'ml help {random_term}'", file=buffer)

        return buffer.getvalue().strip()
Example #4
0
    def __init__(self,
                 model=None,
                 project_file=None,
                 samples_dir=None,
                 test_split=None,
                 val_split=None,
                 cache_dir=None,
                 random_seed=None,
                 trainings_dir=None,
                 project_dir=None,
                 AI=None,
                 is_global_instance=False,
                 config={},
                 plugins=PLUGINS,
                 display=DISPLAY):
        """Configure, train and save the results.

        :param model:           Name of the model plugin.
        :param project_file:    Optional path to the project file.
        :param samples_dir:     The directory where samples can be found. [default: samples]
        :param test_split:      The test split. [default: 10%]
        :param val_split:       The val split. [default: 10%]
        :param cache_dir:       The directory used for caching [default: .cache]
        :param random_seed:     Random seed. [default 2204]
        :param trainings_dir:   The directory to save training results to. [default: trainings]
        :param project_dir:     The directory of the project. [default: current directory]
        :param AI:              Optional name of a trained AI.
        :is_global_instance:    If true, this env can be accessed under the global var env.ENV. [default: false] 
        :config:                Additional configuration to pass to env, i.e. if not using a project file
        """

        super().__init__()

        # when called from the command line, we need to have a global instance
        if is_global_instance:
            global ENV
            ENV = self

        # setup the display
        self.display = display
        # set the name of the AI if given
        self.AI = AI
        # this holds the model object (not the name of the model)
        self.model = None
        # the results class (responsible for updating data.yaml with the latest results during training)
        self.results = None
        # when a training is started, this holds the object responsible for coordinating the training
        self.training = None
        # hold a proxy to the data loader
        self._data = None

        self.plugins = plugins

        # set up the base options from constructor arguments
        self._config = {}
        self._config['samples-dir'] = samples_dir
        self._config['test-split'] = test_split
        self._config['val-split'] = val_split
        self._config['cache-dir'] = cache_dir
        self._config['random-seed'] = random_seed
        self._config['trainings-dir'] = trainings_dir
        self._config['model'] = model

        validators = {}
        # add validators for commands
        for k, v in plugins.all('vergeml.cmd').items():
            cmd = Command.discover(v)
            validators[cmd.name] = ValidateOptions(cmd.options,
                                                   k,
                                                   plugins=plugins)
        # now it gets a bit tricky - we need to peek at the model name
        # to find the right validators to create for model commands.
        peek_model_name = model
        peek_trainings_dir = trainings_dir
        # to do this, we have to first have a look at the project file
        try:
            project_doc = load_yaml_file(project_file) if project_file else {}
            # only update model name if empty (project file does not override command line)
            peek_model_name = peek_model_name or project_doc.get('model', None)
            # pick up trainings-dir in the same way
            peek_trainings_dir = peek_trainings_dir or project_doc.get(
                'trainings-dir', None)
            # if we don't have a trainings dir yet, set to default
            peek_trainings_dir = peek_trainings_dir or os.path.join(
                project_dir or "", "trainings")
            # now, try to load the data.yaml file and see if we have a model definition there
            data_doc = load_yaml_file(peek_trainings_dir, AI,
                                      "data.yaml") if AI else {}
            # if we do, this overrides everything, also the one from the command line
            peek_model_name = data_doc.get('model', peek_model_name)
            # finally, if we have a model name, set up validators
            if peek_model_name:
                for fn in Command.find_functions(plugins.get(
                        "vergeml.model", peek_model_name),
                                                 plugins=plugins):
                    cmd = Command.discover(fn)
                    validators[cmd.name] = ValidateOptions(
                        cmd.options, cmd.name, plugins)
        except Exception:
            # in this case we don't care if something went wrong - the error
            # will be reported later
            pass
        # finally, validators for device and data sections
        validators['device'] = ValidateDevice('device', plugins)
        validators['data'] = ValidateData('data', plugins)

        # merge project file
        if project_file:
            doc = _load_and_configure(project_file, 'project file', validators)
            # the project file DOES NOT override values passed to the environment
            # TODO reserved: hyperparameters and results
            for k, v in doc.items():
                if not k in self._config or self._config[k] is None:
                    self._config[k] = v

        # after the project file is loaded, fill missing values
        project_dir = project_dir or ''
        defaults = {
            'samples-dir': os.path.join(project_dir, "samples"),
            'test-split': '10%',
            'val-split': '10%',
            'cache-dir': os.path.join(project_dir, ".cache"),
            'random-seed': 2204,
            'trainings-dir': os.path.join(project_dir, "trainings"),
        }
        for k, v in defaults.items():
            if self._config[k] is None:
                self._config[k] = v

        # verify split values
        for split in ('val-split', 'test-split'):
            spltype, splval = parse_split(self._config[split])
            if spltype == 'dir':
                path = os.path.join(project_dir, splval)
                if not os.path.exists(path):
                    raise VergeMLError(
                        f"Invalid value for option {split} - no such directory: {splval}",
                        f"Please set {split} to a percentage, number or directory.",
                        hint_key=split,
                        hint_type='value',
                        help_topic='split')
                self._config[split] = path

        # need to have data_file variable in outer scope for later when reporting errors
        data_file = None
        if self.AI:
            ai_path = os.path.join(self._config['trainings-dir'], self.AI)
            if not os.path.exists(ai_path):
                raise VergeMLError("AI not found: {}".format(self.AI))
            # merge data.yaml
            data_file = os.path.join(self._config['trainings-dir'], self.AI,
                                     'data.yaml')
            if not os.path.exists(data_file):
                raise VergeMLError(
                    "data.yaml file not found for AI {}: {}".format(
                        self.AI, data_file))
            doc = load_yaml_file(data_file, 'data file')
            self._config['hyperparameters'] = doc.get('hyperparameters', {})
            self._config['results'] = doc.get('results', {})
            self._config['model'] = doc.get('model')
            self.results = _Results(self, data_file)

        try:
            # merge device and data config
            self._config.update(apply_config(config, validators))
        except VergeMLError as e:
            # improve the error message when this runs on the command line
            if is_global_instance and e.hint_key:
                key = e.hint_key
                e.message = f"Option --{key}: " + e.message
            raise e

        if self._config['model']:
            # load the model plugin
            modelname = self._config['model']
            self.model = plugins.get("vergeml.model", modelname)

            if not self.model:
                message = f"Unknown model name '{modelname}'"
                suggestion = did_you_mean(plugins.keys('vergeml.model'),
                                          modelname) or "See 'ml help models'."

                # if model was passed in via --model
                if model and is_global_instance:
                    message = f"Invalid value for option --model: {message}"
                else:
                    res = None
                    if not res and data_file:
                        # first check if model was defined in the data file
                        res = _check_definition(data_file, 'model', 'value')
                    if not res and project_file:
                        # next check the project file
                        res = _check_definition(project_file, 'model', 'value')
                    if res:
                        filename, definition = res
                        line, column, length = definition
                        # display a nice error message
                        message = display_err_in_file(
                            filename, line, column, f"{message} {suggestion}",
                            length)
                        # set suggestion to None since it is now contained in message
                        suggestion = None
                raise VergeMLError(message, suggestion)
            else:
                # instantiate the model plugin
                self.model = self.model(modelname, plugins)

        # update env from validators
        for _, plugin in validators.items():
            for k, v in plugin.values.items():
                self._config[k] = v

        # always set up numpy and python
        self.configure('python')
        self.configure('numpy')
Example #5
0
    def format_topics(self, env):
        buffer = io.StringIO()

        print("General Help:", file=buffer)
        for topic, descr in _GENERAL_HELP:
            print("  {:<16} {}".format(topic, descr), file=buffer)
        print("", file=buffer)

        print("Commands:", file=buffer)
        for cmd_name in self.plugins.keys('vergeml.cmd'):
            descr = Command.discover(self.plugins.get('vergeml.cmd',
                                                      cmd_name)).descr
            print("  {:<16} {}".format(cmd_name, descr), file=buffer)
        print("", file=buffer)

        if env and env.model:
            print("Model Commands:", file=buffer)
            for fn in Command.find_functions(env.model):
                cmd = Command.discover(fn)
                print("  {:<16} {}".format(cmd.name, cmd.descr), file=buffer)
            print("", file=buffer)

        print("Configuration:", file=buffer)
        for topic, descr in _CONFIGURATION_HELP:
            print("  {:<16} {}".format(topic, descr), file=buffer)
        print("", file=buffer)

        inputs = []
        for k in self.plugins.keys('vergeml.io'):
            plugin = self.plugins.get('vergeml.io', k)
            source = Source.discover(plugin)
            inputs.append((k, source.descr))

        print("Data Input:", file=buffer)
        print(_get_table(inputs, IND=2, colon=False), file=buffer)
        print("", file=buffer)

        ops = {}
        for k in self.plugins.keys('vergeml.operation'):
            plugin = self.plugins.get('vergeml.operation', k)
            op = Operation.discover(plugin)
            topic = op.topic or "general"
            descr = op.descr
            ops.setdefault(topic, [])
            ops[topic].append((k, descr))

        for k, v in sorted(ops.items()):
            topic = k.capitalize()
            print(f"{topic} Operations:", file=buffer)
            print(format_info_text(_get_table(v), indent=2), file=buffer)
            print("", file=buffer)

        models = []
        for name in self.plugins.keys("vergeml.model"):
            plugin = self.plugins.get('vergeml.model', name)
            model = Model.discover(plugin)
            models.append((name, model.descr))

        if models:
            print(_get_table(models), file=buffer)

        print("Glossary:", file=buffer)
        items = ", ".join(glossary.LONG_DESCR.keys())
        print(format_info_text(items, indent=2), file=buffer)

        return buffer.getvalue().strip()
Example #6
0
    def get_help(self, env=None, topic="", short=False):

        if topic:
            model_commands = {}
            if env and env.model:
                for fn in Command.find_functions(env.model):
                    cmd = Command.discover(fn)
                    model_commands[cmd.name] = cmd

            if topic == "-a":
                return self.format_topics(env)
            # show VergeML options
            elif topic == "options":
                return self.format_options()

            # display the glossary
            elif topic == "glossary":
                return self.format_glossary()

            # show available models
            elif topic == "models":
                return _with_header(self.format_models(),
                                    help="models",
                                    topic=topic)

            # explain the data.input section
            elif topic == "input":
                return _with_header(self.format_input_list(),
                                    help="configuration",
                                    topic=topic)

            # explain the data.cache section
            elif topic == "cache":
                return _with_header(format_info_text(_CACHE_HELP),
                                    help="configuration",
                                    topic=topic)

            # explain the data.output section
            elif topic == "output":
                return _with_header(format_info_text(_OUTPUT_HELP),
                                    help="configuration",
                                    topic=topic)

            # explain preprocessing
            elif topic in ("preprocess", "preprocessing"):
                return _with_header(self.format_preprocessing_list(),
                                    help="configuration",
                                    topic=topic)

            # explain the data section
            elif topic == "data":
                return _with_header(format_info_text(_DATA_HELP.strip()),
                                    help="configuration",
                                    topic=topic)

            # explain the device section
            elif topic == "device":
                return _with_header(format_info_text(_DEVICE_HELP.strip()),
                                    help="configuration",
                                    topic=topic)

            # show a random robot
            elif topic == "random robot":
                robot = ascii_robot(datetime.datetime.now(),
                                    random_robot_name(datetime.datetime.now()))
                return f"\n{robot}\n"

            elif ":" in topic and topic.split(
                    ":", 1)[0] in self.plugins.keys('vergeml.cmd'):
                command, subcommand = topic.split(":")
                cmd = Command.discover(self.plugins.get(
                    'vergeml.cmd', command))
                subcommand_option = next(
                    filter(lambda o: bool(o.subcommand), cmd.options), None)
                if not subcommand_option:
                    raise VergeMLError(f"{command} takes no subcommand",
                                       help_topic=command)

                plugin = self.plugins.get(subcommand_option.subcommand,
                                          subcommand)
                if not plugin:
                    raise VergeMLError(f"Invalid {subcommand_option.name}",
                                       help_topic=command)

                cmd = Command.discover(plugin)
                return cmd.usage(short, parent_command=command)

            # display model command help
            elif topic in model_commands:
                return model_commands[topic].usage(short)

            # display command help
            elif topic in self.plugins.keys('vergeml.cmd'):
                cmd = Command.discover(self.plugins.get('vergeml.cmd', topic))
                return cmd.usage(short)

            elif topic in self.plugins.keys('vergeml.operation'):
                return _with_header(self.format_source_or_operation(
                    topic, 'vergeml.operation', Operation),
                                    help="preprocessing operation",
                                    topic=topic)

            elif topic in self.plugins.keys('vergeml.io'):
                return _with_header(self.format_source_or_operation(
                    topic, 'vergeml.io', Source),
                                    help="data source",
                                    topic=topic)

            elif topic in self.plugins.keys('vergeml.model'):
                return _with_header(self.format_model(topic),
                                    help="models",
                                    topic=topic)

            # show a glossary entry
            elif glossary.long_descr(topic):
                topic = glossary.SYNONYMS.get(topic, topic)
                return _with_header(format_info_text(
                    glossary.long_descr(topic)),
                                    help="glossary",
                                    topic=topic)

            # show base options help
            elif topic in dict(HELP_OPTIONS):
                return _with_header(format_info_text(
                    dict(HELP_OPTIONS).get(topic)),
                                    help="base options",
                                    topic=topic)

            else:
                candidates = set()
                candidates.update(map(lambda h: h[0], _GENERAL_HELP))
                candidates.update(self.plugins.keys("vergeml.cmd"))
                candidates.update(map(lambda h: h[0], _CONFIGURATION_HELP))
                candidates.update(self.plugins.keys("vergeml.io"))
                candidates.update(self.plugins.keys("vergeml.operation"))
                candidates.update(self.plugins.keys("vergeml.model"))
                if env and env.model:
                    for fn in Command.find_functions(env.model):
                        cmd = Command.discover(fn)
                        candidates.add(cmd.name)
                candidates.update(glossary.LONG_DESCR.keys())
                candidates.update(glossary.SYNONYMS.keys())

                suggestion = did_you_mean(list(candidates), topic)
                if suggestion:
                    return f"No help found for topic '{topic}'. " + suggestion
                else:
                    return f"No help found for topic '{topic}'."

        else:
            return self.format_general_help(env, short=short)
Example #7
0
def run(argv, plugins=PLUGINS):
    try:
        argv = _forgive_wrong_option_order(argv)
        args, config, rest = _parsebase(argv, plugins=plugins)
    except getopt.GetoptError as err:
        if err.opt:
            opt = err.opt.lstrip("-")
            dashes = '-' if len(opt) == 1 else '--'
            raise VergeMLError(f"Invalid option {dashes}{opt}.", help_topic='options')
        else:
            raise VergeMLError(f"Invalid option.", help_topic='options')
    
    if 'version' in args:
       print_version()
       exit()

    args = _prepare_args(args)
    ai_names, after_names = parse_ai_names(rest)

    AI = next(iter(ai_names), None)
  
    env = _env_from_args(args, config, AI, plugins=plugins)

    if after_names:
        cmdname = after_names.pop(0) 
    else: 
        cmdname = 'help'
        rest = ['help']

    if ":" in cmdname:
        cmdname = cmdname.split(":")[0]

    plugin = None
    cmd_plugin = plugins.get('vergeml.cmd', cmdname)
    if cmd_plugin:
        plugin = cmd_plugin(cmdname, plugins=plugins)
    elif env.model:
        for model_fn in Command.find_functions(env.model):
            if cmdname == Command.discover(model_fn).name:
                plugin = model_fn
                break

    if not plugin:
        # collect all possible command names
        command_names = set(plugins.keys('vergeml.cmd'))
        if env.model:
            model_commands = set(map(lambda f:Command.discover(f).name, Command.find_functions(env.model)))
            command_names.update(model_commands)

        raise VergeMLError(f"Invalid command '{cmdname}'.",  
                           suggestion=did_you_mean(command_names, cmdname),
                           help_topic='*help*')
    try:
        env_conf = env.get(cmdname) or {}
        cmd = Command.discover(plugin)
        assert cmd
        args = cmd.parse(rest, env_conf)

        if not cmd.free_form:
            # set defaults
            for opt in cmd.options:
                if opt.name not in args:
                    args[opt.name] = opt.default
            # merge back into env
            for k,v in args.items():
                env.set(f"{cmdname}.{k}", v)
        env.set("command", cmdname)
        env.set_defaults(cmdname, args, plugins=plugins)
    
        try:
            # return the result for unit testing
            return plugin(args, env)
        finally:
            if env.training is not None:
                env.cancel_training()
       
    except KeyboardInterrupt:
        # silence the stacktrace
        pass