Esempio n. 1
0
def test_command_usage5():
    cmd = Command('train',
                  options=[
                      Option(name='optimizer', type=str),
                      Option(name='learning-rate', default=0.0001, short='l')
                  ])
    assert cmd.usage() == USAGE_5
Esempio n. 2
0
def test_command_2():
    cmd = Command('run', options=[Option('<args>', type=list), Option('@AIs', type=list)])
    assert cmd.parse(["run", "tensorboard"]) == {'@AIs': [], '<args>': ["tensorboard"]}
    assert cmd.parse(["@funky-terminator", "run", "tensorboard"]) == \
        {'@AIs': ['funky-terminator'], '<args>': ["tensorboard"]}
    assert cmd.parse(["@funky-terminator", "@touchy-brobot", "run", "tensorboard", "--port=2204"]) == \
                    {'@AIs': ['funky-terminator', 'touchy-brobot'], '<args>': ["tensorboard", "--port=2204"]}
Esempio n. 3
0
def test_command_usage6():
    cmd = Command('train', options=[
        Option(name='a', type=str, default="A"),
        Option(name='b', type=str, default="B"),
        Option(name='c', type=str, default="C"),
    ])
    assert cmd.usage() == USAGE_6
Esempio n. 4
0
def test_command_3():
    cmd = Command('predict', options=[Option(name="@AI")])
    assert cmd.parse(["@stubborn-dishwasher", "predict"]) == {
        '@AI': 'stubborn-dishwasher'
    }
    with pytest.raises(VergeMLError):
        cmd.parse(["predict"])
Esempio n. 5
0
def test_command_usage9():
    cmd = Command('predict', options=[
        Option(name='<file>', type='Optional[str]', descr="The file to use when predicting."),
        Option(name='a', type=str, default="A"),
        Option(name='b', type=str, default="B"),
        Option(name='c', type=str, default="C"),
    ])
    assert cmd.usage() == USAGE_9
Esempio n. 6
0
def test_command_usage8():
    cmd = Command('predict', options=[
        Option(name='<file>', type='Optional[str]'),
        Option(name='a', type=str, default="A"),
        Option(name='b', type=str, default="B"),
        Option(name='c', type=str, default="C"),
    ])
    assert cmd.usage() == USAGE_8
Esempio n. 7
0
def test_command_7():
    cmd = Command('help', options=[Option(name='<topic>'),
                                   Option(name="@AI", type='Optional[@]')], free_form=True)
    assert cmd.parse(["@funky-robot", "help", "--option=xyz", "something"]) == \
           ('funky-robot', ["--option=xyz", "something"])

    assert cmd.parse(["help", "--option=xyz", "something"]) == \
           (None, ["--option=xyz", "something"])
Esempio n. 8
0
def test_command_usage11():
    cmd = Command('predict',
                  options=[
                      Option(name='@AIs', type="List[AI]"),
                      Option(name='threshold',
                             default=0.2,
                             descr="Prediction Threshold.")
                  ])
    assert cmd.usage() == USAGE_11
Esempio n. 9
0
def test_command_usage4():
    cmd = Command('predict',
                  options=[
                      Option(name='@AI'),
                      Option(name='threshold',
                             default=0.2,
                             descr="Prediction Threshold.")
                  ])
    assert cmd.usage() == USAGE_4
Esempio n. 10
0
    def __init__(self, env):
        self.env = env
        self.fns = OrderedDict()
        for model_fn in Command.find_functions(env.model):
            cmd = Command.discover(model_fn)
            if cmd.kind == 'predict':
                self.fns[cmd.name] = (cmd, model_fn)

        if not len(self.fns):
            raise VergeMLError(f"@{env.AI} can't be run as a REST service.")
Esempio n. 11
0
def test_command_usage13():
    cmd = Command('predict',
                  long_descr="Make a prediction.",
                  examples="ml @skynet predict",
                  options=[
                      Option(name='@AIs', type="List[AI]"),
                      Option(name='threshold',
                             default=0.2,
                             descr="Prediction Threshold.")
                  ])
    assert cmd.usage() == USAGE_13
Esempio n. 12
0
def main(argv=None, plugins=PLUGINS):
    if argv is None:
        argv = sys.argv[1:]
    _configure_logging()
    try:
        run(argv, plugins=plugins)

    except VergeMLError as e:
        # NOTE- when the error is encountered before the environment is created, it will be empty.
        from vergeml.env import ENV
        # in case there is an error with the config file, but the user just says 'ml help <topic>'
        # where topic is the topic suggested by VergeML, try to display the help message
        # instead of the error the user is experiencing
        if ["help", e.help_topic] == argv:
            help = HelpCommand('help')
            from vergeml.env import ENV
            print(help.get_help(ENV, e.help_topic))
        else:
            # display the error. Can't use logging.error because for an unknown reason pytest does not
            # capture stderr when using logging, so fall back to print
            err_string = str(e).strip()
            print("Error! " + err_string, file=sys.stderr)
            # find all command topics
            commands = list(plugins.keys('vergeml.cmd'))
            if ENV and ENV.model_plugin:
                fns = Command.find_functions(ENV.model_plugin)
                mcommands = list(map(lambda f: Command.discover(f).name, fns))
                commands.extend(mcommands)
            # if the error is just one line and there is command help available, display the help message too.
            if e.help_topic and len(err_string.splitlines(
            )) == 1 and e.help_topic in plugins.keys('vergeml.cmd'):
                print("")
                help = HelpCommand('help')
                help_topic = "" if e.help_topic == "*help*" else e.help_topic
                print(help.get_help(ENV, help_topic, short=True))
            # else just hint at the help topic
            elif e.help_topic:
                print("", file=sys.stderr)
                help_topic = "" if e.help_topic == "*help*" else " " + e.help_topic
                print(f"See 'ml help" + help_topic + "'.", file=sys.stderr)

    except Exception as err:  # pylint: disable=W0703
        if err.__class__.__name__ == 'ResourceExhaustedError':
            print("Error! Your GPU ran out of memory.")
            print(
                "Try lowering resource usage by decreasing model parameters such as batch size."
            )
        else:
            raise err
Esempio n. 13
0
def test_command_1():
    cmd = Command('train', options=[Option('epochs', 20, int, validate='>=1')])
    assert cmd.parse(["train", "--epochs=14"]) == {'epochs': 14}

    with pytest.raises(VergeMLError):
        cmd.parse(["train", "--epochs=abc"])

    with pytest.raises(VergeMLError):
        cmd.parse(["train", "--epochz=14"])

    with pytest.raises(VergeMLError):
        cmd.parse(["train", "--epochs=-1"])
Esempio n. 14
0
    def format_preprocessing_list(self):
        buffer = io.StringIO()

        cmd = Command.discover(PreprocessCommand)
        print(cmd.usage(), file=buffer)
        print("", file=buffer)
        print("Configuration:", file=buffer)

        print(format_info_text(_PREPROCESS_HELP.strip(), indent=2),
              file=buffer)
        print("", file=buffer)

        ops = {}
        for k in self.plugins.keys('vergeml.operation'):
            plugin = self.plugins.get('vergeml.operation', k)
            op = Operation.discover(plugin)
            topic = op.topic or "general"
            descr = op.descr
            ops.setdefault(topic, [])
            ops[topic].append((k, descr))

        for k, v in sorted(ops.items()):
            topic = k.capitalize()
            print(f"{topic} Operations:", file=buffer)
            print(format_info_text(_get_table(v), indent=2), file=buffer)
            print("", file=buffer)

        return buffer.getvalue().strip()
Esempio n. 15
0
def _parse_args(args, env):
    args = args[1]

    comps = []
    for idx, arg in enumerate(args):
        if arg in ('-gt', '-lt', '-eq', '-neq', '-gte', '-lte'):
            start, end = idx - 1, idx + 1
            if start < 0 or end >= len(args):
                raise VergeMLError("Invalid options.", help_topic='list')
            comps.append((start, end))

    cargs = []
    for start, end in reversed(comps):
        cargs.append(args[start:end+1])
        del args[start:end+1]

    cmd = deepcopy(Command.discover(ListCommand))
    cmd.free_form = False
    args.insert(0, 'list')
    args = cmd.parse(args)

    # If existent, read settings from the config file
    config = parse_command(cmd, env.get(cmd.name))

    # Set missing args from the config file
    for k, arg in config.items():
        args.setdefault(k, arg)

    # Set missing args from default
    for opt in cmd.options:
        if opt.name not in args and (opt.default is not None or not opt.is_required()):
            args[opt.name] = opt.default

    return args, cargs
Esempio n. 16
0
    def __init__(self, name, plugins=PLUGINS):
        self.name = name
        self.plugins = plugins

        # avoid circular dependency
        from vergeml.command import Command

        cmd = Command.discover(self)
        assert(cmd)
        cmd.name = name
Esempio n. 17
0
    def format_general_help(self, env=None, short=False):
        buffer = io.StringIO()
        print(USAGE, file=buffer)
        print("", file=buffer)

        terms = set(glossary.LONG_DESCR.keys())
        terms.update(self.plugins.keys('vergeml.cmd'))
        rng = random.Random()
        rng.seed(datetime.datetime.now())
        random_term = rng.choice(list(terms))

        if env and env.model:
            print(f"Current Model: {env.get('model')}", file=buffer)
            print("", file=buffer)

        print("General Help:", file=buffer)
        for topic, descr in _GENERAL_HELP:
            print("  {:<16} {}".format(topic, descr), file=buffer)
        print("", file=buffer)

        print("Commands:", file=buffer)
        for cmd_name in self.plugins.keys('vergeml.cmd'):
            descr = Command.discover(self.plugins.get('vergeml.cmd',
                                                      cmd_name)).descr
            print("  {:<16} {}".format(cmd_name, descr), file=buffer)
        print("", file=buffer)

        if env and env.model:
            print("Model Commands:", file=buffer)
            for fn in Command.find_functions(env.model):
                cmd = Command.discover(fn)
                print("  {:<16} {}".format(cmd.name, cmd.descr), file=buffer)
            print("", file=buffer)

        if not short:
            print(
                "See 'ml help <command>' or 'ml help <topic>' to read about a specific subcommand or topic.",
                file=buffer)
            print(f"For example, try 'ml help {random_term}'", file=buffer)

        return buffer.getvalue().strip()
Esempio n. 18
0
 def decorator(o):
     assert(getattr(o, _CMD_META_KEY, None) is None)
     _name = name or getattr(o, '__name__', None)
     options = list(reversed(Option.discover(o)))
     cmd = Command(_name, 
                   descr=descr, 
                   long_descr=long_descr, 
                   examples=examples, 
                   options=options,
                   free_form=free_form,
                   kind=kind)
     setattr(o, _CMD_META_KEY, cmd)
     return o
Esempio n. 19
0
def test_command_5():
    options = [
        Option('threshold', type=float, validate=">0", short='t'),
        Option('id', default=False, type=bool, flag=True, short='i')
    ]
    cmd = Command('predict', options=options)
    assert cmd.parse(["predict", "--threshold=0.2"]) == {'threshold': 0.2, 'id': False}
    assert cmd.parse(["predict", "-t0.2"]) == {'threshold': 0.2, 'id': False}
    assert cmd.parse(["predict", "-t0.2", "--id"]) == {'threshold': 0.2, 'id': True}
    assert cmd.parse(["predict", "-t0.2", "-i"]) == {'threshold': 0.2, 'id': True}
Esempio n. 20
0
    def format_topics(self, env):
        buffer = io.StringIO()

        print("General Help:", file=buffer)
        for topic, descr in _GENERAL_HELP:
            print("  {:<16} {}".format(topic, descr), file=buffer)
        print("", file=buffer)

        print("Commands:", file=buffer)
        for cmd_name in self.plugins.keys('vergeml.cmd'):
            descr = Command.discover(self.plugins.get('vergeml.cmd',
                                                      cmd_name)).descr
            print("  {:<16} {}".format(cmd_name, descr), file=buffer)
        print("", file=buffer)

        if env and env.model:
            print("Model Commands:", file=buffer)
            for fn in Command.find_functions(env.model):
                cmd = Command.discover(fn)
                print("  {:<16} {}".format(cmd.name, cmd.descr), file=buffer)
            print("", file=buffer)

        print("Configuration:", file=buffer)
        for topic, descr in _CONFIGURATION_HELP:
            print("  {:<16} {}".format(topic, descr), file=buffer)
        print("", file=buffer)

        inputs = []
        for k in self.plugins.keys('vergeml.io'):
            plugin = self.plugins.get('vergeml.io', k)
            source = Source.discover(plugin)
            inputs.append((k, source.descr))

        print("Data Input:", file=buffer)
        print(_get_table(inputs, IND=2, colon=False), file=buffer)
        print("", file=buffer)

        ops = {}
        for k in self.plugins.keys('vergeml.operation'):
            plugin = self.plugins.get('vergeml.operation', k)
            op = Operation.discover(plugin)
            topic = op.topic or "general"
            descr = op.descr
            ops.setdefault(topic, [])
            ops[topic].append((k, descr))

        for k, v in sorted(ops.items()):
            topic = k.capitalize()
            print(f"{topic} Operations:", file=buffer)
            print(format_info_text(_get_table(v), indent=2), file=buffer)
            print("", file=buffer)

        models = []
        for name in self.plugins.keys("vergeml.model"):
            plugin = self.plugins.get('vergeml.model', name)
            model = Model.discover(plugin)
            models.append((name, model.descr))

        if models:
            print(_get_table(models), file=buffer)

        print("Glossary:", file=buffer)
        items = ", ".join(glossary.LONG_DESCR.keys())
        print(format_info_text(items, indent=2), file=buffer)

        return buffer.getvalue().strip()
Esempio n. 21
0
    def get_help(self, env=None, topic="", short=False):

        if topic:
            model_commands = {}
            if env and env.model:
                for fn in Command.find_functions(env.model):
                    cmd = Command.discover(fn)
                    model_commands[cmd.name] = cmd

            if topic == "-a":
                return self.format_topics(env)
            # show VergeML options
            elif topic == "options":
                return self.format_options()

            # display the glossary
            elif topic == "glossary":
                return self.format_glossary()

            # show available models
            elif topic == "models":
                return _with_header(self.format_models(),
                                    help="models",
                                    topic=topic)

            # explain the data.input section
            elif topic == "input":
                return _with_header(self.format_input_list(),
                                    help="configuration",
                                    topic=topic)

            # explain the data.cache section
            elif topic == "cache":
                return _with_header(format_info_text(_CACHE_HELP),
                                    help="configuration",
                                    topic=topic)

            # explain the data.output section
            elif topic == "output":
                return _with_header(format_info_text(_OUTPUT_HELP),
                                    help="configuration",
                                    topic=topic)

            # explain preprocessing
            elif topic in ("preprocess", "preprocessing"):
                return _with_header(self.format_preprocessing_list(),
                                    help="configuration",
                                    topic=topic)

            # explain the data section
            elif topic == "data":
                return _with_header(format_info_text(_DATA_HELP.strip()),
                                    help="configuration",
                                    topic=topic)

            # explain the device section
            elif topic == "device":
                return _with_header(format_info_text(_DEVICE_HELP.strip()),
                                    help="configuration",
                                    topic=topic)

            # show a random robot
            elif topic == "random robot":
                robot = ascii_robot(datetime.datetime.now(),
                                    random_robot_name(datetime.datetime.now()))
                return f"\n{robot}\n"

            elif ":" in topic and topic.split(
                    ":", 1)[0] in self.plugins.keys('vergeml.cmd'):
                command, subcommand = topic.split(":")
                cmd = Command.discover(self.plugins.get(
                    'vergeml.cmd', command))
                subcommand_option = next(
                    filter(lambda o: bool(o.subcommand), cmd.options), None)
                if not subcommand_option:
                    raise VergeMLError(f"{command} takes no subcommand",
                                       help_topic=command)

                plugin = self.plugins.get(subcommand_option.subcommand,
                                          subcommand)
                if not plugin:
                    raise VergeMLError(f"Invalid {subcommand_option.name}",
                                       help_topic=command)

                cmd = Command.discover(plugin)
                return cmd.usage(short, parent_command=command)

            # display model command help
            elif topic in model_commands:
                return model_commands[topic].usage(short)

            # display command help
            elif topic in self.plugins.keys('vergeml.cmd'):
                cmd = Command.discover(self.plugins.get('vergeml.cmd', topic))
                return cmd.usage(short)

            elif topic in self.plugins.keys('vergeml.operation'):
                return _with_header(self.format_source_or_operation(
                    topic, 'vergeml.operation', Operation),
                                    help="preprocessing operation",
                                    topic=topic)

            elif topic in self.plugins.keys('vergeml.io'):
                return _with_header(self.format_source_or_operation(
                    topic, 'vergeml.io', Source),
                                    help="data source",
                                    topic=topic)

            elif topic in self.plugins.keys('vergeml.model'):
                return _with_header(self.format_model(topic),
                                    help="models",
                                    topic=topic)

            # show a glossary entry
            elif glossary.long_descr(topic):
                topic = glossary.SYNONYMS.get(topic, topic)
                return _with_header(format_info_text(
                    glossary.long_descr(topic)),
                                    help="glossary",
                                    topic=topic)

            # show base options help
            elif topic in dict(HELP_OPTIONS):
                return _with_header(format_info_text(
                    dict(HELP_OPTIONS).get(topic)),
                                    help="base options",
                                    topic=topic)

            else:
                candidates = set()
                candidates.update(map(lambda h: h[0], _GENERAL_HELP))
                candidates.update(self.plugins.keys("vergeml.cmd"))
                candidates.update(map(lambda h: h[0], _CONFIGURATION_HELP))
                candidates.update(self.plugins.keys("vergeml.io"))
                candidates.update(self.plugins.keys("vergeml.operation"))
                candidates.update(self.plugins.keys("vergeml.model"))
                if env and env.model:
                    for fn in Command.find_functions(env.model):
                        cmd = Command.discover(fn)
                        candidates.add(cmd.name)
                candidates.update(glossary.LONG_DESCR.keys())
                candidates.update(glossary.SYNONYMS.keys())

                suggestion = did_you_mean(list(candidates), topic)
                if suggestion:
                    return f"No help found for topic '{topic}'. " + suggestion
                else:
                    return f"No help found for topic '{topic}'."

        else:
            return self.format_general_help(env, short=short)
Esempio n. 22
0
     def __call__(self, args, env):
        args = args[1]

        comps = []
        for ix, arg in enumerate(args):
            if arg in ('-gt', '-lt', '-eq', '-neq', '-gte', '-lte'):
                start, end = ix - 1, ix + 1
                if start < 0 or end >= len(args):
                    raise VergeMLError("Invalid options.", help_topic='list')
                comps.append((start, end))

        comp_args = []
        for start, end in reversed(comps):
            comp_args.append(args[start:end+1])
            del args[start:end+1]

        cmd = deepcopy(Command.discover(ListCommand))
        cmd.free_form = False
        args.insert(0, 'list')
        args = cmd.parse(args)
        args.setdefault('sort', 'created_at')
        args.setdefault('order', 'asc')
        args.setdefault('columns', None)
        args.setdefault('output', 'table')

        train_dir = env.get("trainings-dir")
        if not os.path.exists(train_dir):
            return

        info = {}
        hyper = {}

        for AI in os.listdir(train_dir):
            data_yaml = os.path.join(train_dir, AI, 'data.yaml')
            if os.path.isfile(data_yaml):
                with open(data_yaml) as f:
                    doc = yaml.load(f)
            else:
                doc = {}
            info[AI] = {}
            hyper[AI] = {}

            if 'model' in doc:
                info[AI]['model'] = doc['model']

            if 'results' in doc:
                info[AI].update(doc['results'])

            if 'hyperparameters' in doc:
                hyper[AI].update(doc['hyperparameters'])

        if args['columns']:
            theader = ['AI'] + [s.strip() for s in args['columns'].split(",")]
            exclude = []
        else:
            theader = ['AI', 'model', 'status', 'num_samples', 'training_start', 'epochs']
            exclude = ['training_end', 'steps', 'created_at']

        sort = [s.strip() for s in args['sort'].split(",")]

        info = OrderedDict(sorted(info.items(), reverse=(args['order'] == 'asc'),
                           key=lambda x: [x[1].get(s, 0) for s in sort]))

        tdata = []
        left_align = set([0])

        for AI, results in info.items():
            rdata = [""] * len(theader)
            rdata[0] = "@" + AI

            if not _filter(results, hyper[AI], comp_args):
                continue

            for k, v in sorted(results.items()):
                if k in exclude and not args['columns']:
                    continue

                if not k in theader and not args['columns'] and isinstance(v, (str, int, float)):
                    theader.append(k)
                    rdata.append(None)
                if k in theader:
                    pos = theader.index(k)

                    if k in ('training_start', 'training_end', 'created_at'):
                        v = datetime.utcfromtimestamp(v)
                        v = v.strftime("%Y-%m-%d %H:%M")
                    elif isinstance(v, float):
                        v = "%.4f" % v
                    elif isinstance(v, str):
                        left_align.add(pos)

                    rdata[pos] = v

            for k, v in sorted(hyper[AI].items()):
                if k in theader:
                    pos = theader.index(k)
                    if isinstance(v, float):
                        v = "%.4f" % v
                    elif isinstance(v, str):
                        left_align.add(pos)

                    rdata[pos] = v

            tdata.append(rdata)
        
        if args['output'] == 'table':
            if not tdata:
                return
            tdata.insert(0, theader)
            print(DISPLAY.table(tdata, left_align=left_align).getvalue(fit=True))
        elif args['output'] == 'json':
            res = []
            for row in tdata:
                res.append(dict(zip(theader, row)))
            print(json.dumps(res))

        elif args['output'] == 'csv':
            writer = csv.writer(sys.stdout)
            writer.writerow(theader)
            for row in tdata:
                writer.writerow(row)
Esempio n. 23
0
def test_command_4():
    cmd = Command('predict', options=[Option(name="@AI", type="Optional[AI]")])
    assert cmd.parse(["@stubborn-dishwasher", "predict"]) == {
        '@AI': 'stubborn-dishwasher'
    }
    assert cmd.parse(["predict"]) == {'@AI': None}
Esempio n. 24
0
def test_command_6():
    cmd = Command('new', options=[Option(name='<project-name>', type='str')])
    assert cmd.parse(["new", "xxx"]) == {'<project-name>': "xxx"}
    with pytest.raises(VergeMLError):
        cmd.parse(["new"])
Esempio n. 25
0
def test_command_usage1():
    cmd = Command('new', options=[Option(name='<project-name>')])
    assert cmd.usage() == USAGE_1
Esempio n. 26
0
def test_command_usage2():
    cmd = Command('train',
                  options=[Option(name='learning-rate', default=0.0001)])
    assert cmd.usage() == USAGE_2
Esempio n. 27
0
    def __init__(self,
                 model=None,
                 project_file=None,
                 samples_dir=None,
                 test_split=None,
                 val_split=None,
                 cache_dir=None,
                 random_seed=None,
                 trainings_dir=None,
                 project_dir=None,
                 AI=None,
                 is_global_instance=False,
                 config={},
                 plugins=PLUGINS,
                 display=DISPLAY):
        """Configure, train and save the results.

        :param model:           Name of the model plugin.
        :param project_file:    Optional path to the project file.
        :param samples_dir:     The directory where samples can be found. [default: samples]
        :param test_split:      The test split. [default: 10%]
        :param val_split:       The val split. [default: 10%]
        :param cache_dir:       The directory used for caching [default: .cache]
        :param random_seed:     Random seed. [default 2204]
        :param trainings_dir:   The directory to save training results to. [default: trainings]
        :param project_dir:     The directory of the project. [default: current directory]
        :param AI:              Optional name of a trained AI.
        :is_global_instance:    If true, this env can be accessed under the global var env.ENV. [default: false] 
        :config:                Additional configuration to pass to env, i.e. if not using a project file
        """

        super().__init__()

        # when called from the command line, we need to have a global instance
        if is_global_instance:
            global ENV
            ENV = self

        # setup the display
        self.display = display
        # set the name of the AI if given
        self.AI = AI
        # this holds the model object (not the name of the model)
        self.model = None
        # the results class (responsible for updating data.yaml with the latest results during training)
        self.results = None
        # when a training is started, this holds the object responsible for coordinating the training
        self.training = None
        # hold a proxy to the data loader
        self._data = None

        self.plugins = plugins

        # set up the base options from constructor arguments
        self._config = {}
        self._config['samples-dir'] = samples_dir
        self._config['test-split'] = test_split
        self._config['val-split'] = val_split
        self._config['cache-dir'] = cache_dir
        self._config['random-seed'] = random_seed
        self._config['trainings-dir'] = trainings_dir
        self._config['model'] = model

        validators = {}
        # add validators for commands
        for k, v in plugins.all('vergeml.cmd').items():
            cmd = Command.discover(v)
            validators[cmd.name] = ValidateOptions(cmd.options,
                                                   k,
                                                   plugins=plugins)
        # now it gets a bit tricky - we need to peek at the model name
        # to find the right validators to create for model commands.
        peek_model_name = model
        peek_trainings_dir = trainings_dir
        # to do this, we have to first have a look at the project file
        try:
            project_doc = load_yaml_file(project_file) if project_file else {}
            # only update model name if empty (project file does not override command line)
            peek_model_name = peek_model_name or project_doc.get('model', None)
            # pick up trainings-dir in the same way
            peek_trainings_dir = peek_trainings_dir or project_doc.get(
                'trainings-dir', None)
            # if we don't have a trainings dir yet, set to default
            peek_trainings_dir = peek_trainings_dir or os.path.join(
                project_dir or "", "trainings")
            # now, try to load the data.yaml file and see if we have a model definition there
            data_doc = load_yaml_file(peek_trainings_dir, AI,
                                      "data.yaml") if AI else {}
            # if we do, this overrides everything, also the one from the command line
            peek_model_name = data_doc.get('model', peek_model_name)
            # finally, if we have a model name, set up validators
            if peek_model_name:
                for fn in Command.find_functions(plugins.get(
                        "vergeml.model", peek_model_name),
                                                 plugins=plugins):
                    cmd = Command.discover(fn)
                    validators[cmd.name] = ValidateOptions(
                        cmd.options, cmd.name, plugins)
        except Exception:
            # in this case we don't care if something went wrong - the error
            # will be reported later
            pass
        # finally, validators for device and data sections
        validators['device'] = ValidateDevice('device', plugins)
        validators['data'] = ValidateData('data', plugins)

        # merge project file
        if project_file:
            doc = _load_and_configure(project_file, 'project file', validators)
            # the project file DOES NOT override values passed to the environment
            # TODO reserved: hyperparameters and results
            for k, v in doc.items():
                if not k in self._config or self._config[k] is None:
                    self._config[k] = v

        # after the project file is loaded, fill missing values
        project_dir = project_dir or ''
        defaults = {
            'samples-dir': os.path.join(project_dir, "samples"),
            'test-split': '10%',
            'val-split': '10%',
            'cache-dir': os.path.join(project_dir, ".cache"),
            'random-seed': 2204,
            'trainings-dir': os.path.join(project_dir, "trainings"),
        }
        for k, v in defaults.items():
            if self._config[k] is None:
                self._config[k] = v

        # verify split values
        for split in ('val-split', 'test-split'):
            spltype, splval = parse_split(self._config[split])
            if spltype == 'dir':
                path = os.path.join(project_dir, splval)
                if not os.path.exists(path):
                    raise VergeMLError(
                        f"Invalid value for option {split} - no such directory: {splval}",
                        f"Please set {split} to a percentage, number or directory.",
                        hint_key=split,
                        hint_type='value',
                        help_topic='split')
                self._config[split] = path

        # need to have data_file variable in outer scope for later when reporting errors
        data_file = None
        if self.AI:
            ai_path = os.path.join(self._config['trainings-dir'], self.AI)
            if not os.path.exists(ai_path):
                raise VergeMLError("AI not found: {}".format(self.AI))
            # merge data.yaml
            data_file = os.path.join(self._config['trainings-dir'], self.AI,
                                     'data.yaml')
            if not os.path.exists(data_file):
                raise VergeMLError(
                    "data.yaml file not found for AI {}: {}".format(
                        self.AI, data_file))
            doc = load_yaml_file(data_file, 'data file')
            self._config['hyperparameters'] = doc.get('hyperparameters', {})
            self._config['results'] = doc.get('results', {})
            self._config['model'] = doc.get('model')
            self.results = _Results(self, data_file)

        try:
            # merge device and data config
            self._config.update(apply_config(config, validators))
        except VergeMLError as e:
            # improve the error message when this runs on the command line
            if is_global_instance and e.hint_key:
                key = e.hint_key
                e.message = f"Option --{key}: " + e.message
            raise e

        if self._config['model']:
            # load the model plugin
            modelname = self._config['model']
            self.model = plugins.get("vergeml.model", modelname)

            if not self.model:
                message = f"Unknown model name '{modelname}'"
                suggestion = did_you_mean(plugins.keys('vergeml.model'),
                                          modelname) or "See 'ml help models'."

                # if model was passed in via --model
                if model and is_global_instance:
                    message = f"Invalid value for option --model: {message}"
                else:
                    res = None
                    if not res and data_file:
                        # first check if model was defined in the data file
                        res = _check_definition(data_file, 'model', 'value')
                    if not res and project_file:
                        # next check the project file
                        res = _check_definition(project_file, 'model', 'value')
                    if res:
                        filename, definition = res
                        line, column, length = definition
                        # display a nice error message
                        message = display_err_in_file(
                            filename, line, column, f"{message} {suggestion}",
                            length)
                        # set suggestion to None since it is now contained in message
                        suggestion = None
                raise VergeMLError(message, suggestion)
            else:
                # instantiate the model plugin
                self.model = self.model(modelname, plugins)

        # update env from validators
        for _, plugin in validators.items():
            for k, v in plugin.values.items():
                self._config[k] = v

        # always set up numpy and python
        self.configure('python')
        self.configure('numpy')
Esempio n. 28
0
def run(argv, plugins=PLUGINS):
    try:
        argv = _forgive_wrong_option_order(argv)
        args, config, rest = _parsebase(argv, plugins=plugins)
    except getopt.GetoptError as err:
        if err.opt:
            opt = err.opt.lstrip("-")
            dashes = '-' if len(opt) == 1 else '--'
            raise VergeMLError(f"Invalid option {dashes}{opt}.", help_topic='options')
        else:
            raise VergeMLError(f"Invalid option.", help_topic='options')
    
    if 'version' in args:
       print_version()
       exit()

    args = _prepare_args(args)
    ai_names, after_names = parse_ai_names(rest)

    AI = next(iter(ai_names), None)
  
    env = _env_from_args(args, config, AI, plugins=plugins)

    if after_names:
        cmdname = after_names.pop(0) 
    else: 
        cmdname = 'help'
        rest = ['help']

    if ":" in cmdname:
        cmdname = cmdname.split(":")[0]

    plugin = None
    cmd_plugin = plugins.get('vergeml.cmd', cmdname)
    if cmd_plugin:
        plugin = cmd_plugin(cmdname, plugins=plugins)
    elif env.model:
        for model_fn in Command.find_functions(env.model):
            if cmdname == Command.discover(model_fn).name:
                plugin = model_fn
                break

    if not plugin:
        # collect all possible command names
        command_names = set(plugins.keys('vergeml.cmd'))
        if env.model:
            model_commands = set(map(lambda f:Command.discover(f).name, Command.find_functions(env.model)))
            command_names.update(model_commands)

        raise VergeMLError(f"Invalid command '{cmdname}'.",  
                           suggestion=did_you_mean(command_names, cmdname),
                           help_topic='*help*')
    try:
        env_conf = env.get(cmdname) or {}
        cmd = Command.discover(plugin)
        assert cmd
        args = cmd.parse(rest, env_conf)

        if not cmd.free_form:
            # set defaults
            for opt in cmd.options:
                if opt.name not in args:
                    args[opt.name] = opt.default
            # merge back into env
            for k,v in args.items():
                env.set(f"{cmdname}.{k}", v)
        env.set("command", cmdname)
        env.set_defaults(cmdname, args, plugins=plugins)
    
        try:
            # return the result for unit testing
            return plugin(args, env)
        finally:
            if env.training is not None:
                env.cancel_training()
       
    except KeyboardInterrupt:
        # silence the stacktrace
        pass