Esempio n. 1
0
    def write_preview(self, output_dir: str, split: str, sample: Sample):

        # make sure x and y have the right types
        if not isinstance(sample.x, ImageType):
            raise VergeMLError("Can't write sample with type: {}".format(
                type(sample.x)))

        if not isinstance(sample.y, Labels):
            raise VergeMLError("Can't write ground truth with type: {}".format(
                type(sample.y)))

        # get the right filename in .data to write the sample to
        data_dir = os.path.join(output_dir, ".data")
        name = fixext(os.path.basename(sample.meta['filename']), sample.x)
        path = self.preview_filename(os.path.join(data_dir, name))
        sample.x.save(path)

        # create directories and hyperlinks so that split and label are visible in a file
        # manager
        for label in sample.y:
            link_dir = os.path.join(output_dir, split, label)
            if not os.path.exists(link_dir):
                os.makedirs(link_dir)
            link_path = self.preview_filename(os.path.join(link_dir, name))
            os.symlink(os.path.abspath(path), link_path)
Esempio n. 2
0
    def transform_xy(self, x, y, rng):
        imgs = [img for img in (x, y) if isinstance(img, ImageType)]

        if not len(imgs):
            raise VergeMLError("random_crop needs samples of type image")

        maxwidth = min([img.size[0] for img in imgs])
        maxheight = min([img.size[1] for img in imgs])

        if maxwidth < self.width:
            raise VergeMLError("Can't crop sample with width {} to {}.".format(
                maxwidth, self.width))

        if maxheight < self.height:
            raise VergeMLError(
                "Can't crop sample with height {} to {}.".format(
                    maxheight, self.height))

        maxx = maxwidth - self.width
        maxy = maxheight - self.height

        xco = rng.randint(0, maxx)
        yco = rng.randint(0, maxy)
        params = xco, yco, xco + self.width, yco + self.height

        if isinstance(x, ImageType):
            x = x.crop(params)

        if isinstance(y, ImageType):
            y = y.crop(params)

        return x, y
Esempio n. 3
0
def load_yaml_file(filename, label='config file', loader=yaml.Loader):
    """Load a yaml config file.
    """
    try:
        with open(filename, "r") as file:
            res = yaml.load(file.read(), Loader=loader) or {}
            if not isinstance(res, dict):
                msg = f"Please ensure that {label} consists of key value pairs."
                raise VergeMLError(f"Invalid {label}: {filename}", msg)
            return res
    except yaml.YAMLError as err:
        if hasattr(err, 'problem_mark'):
            mark = getattr(err, 'problem_mark')
            problem = getattr(err, 'problem')
            message = f"Could not read {label} {filename}:"
            message += "\n" + display_err_in_file(filename, mark.line,
                                                  mark.column, problem)
        elif hasattr(err, 'problem'):
            problem = getattr(err, 'problem')
            message = f"Could not read {label} {filename}: {problem}"
        else:
            message = f"Could not read {label} {filename}: YAML Error"

        suggestion = f"There is a syntax error in your {label} - please fix it and try again."

        raise VergeMLError(message, suggestion)

    except OSError as err:
        msg = "Please ensure the file exists and you have the required access privileges."
        raise VergeMLError(
            f"Could not open {label} {filename}: {err.strerror}", msg)
Esempio n. 4
0
    def __init__(self,
                 width: int,
                 height: int,
                 x: int = None,
                 y: int = None,
                 position: str = "center",
                 apply=None):

        super().__init__(apply)

        if bool(x) ^ bool(y):
            raise VergeMLError(
                "Must specify both x and y when using absolute coordinates")

        VALID_POSITIONS = ("top-left", "top-right", "bottom-left",
                           "bottom-right", "center")

        if not position in VALID_POSITIONS:
            raise VergeMLError("position must be one of: {}".format(
                ", ".join(VALID_POSITIONS)))

        self.width = width
        self.height = height
        self.x = x
        self.y = y
        self.position = position
Esempio n. 5
0
    def _parse_subcommand(self, argv, rest):

        sub_option = next((filter(lambda o: bool(o.subcommand), self.options)),
                          None)

        if sub_option:
            if not ":" in rest[0]:
                raise VergeMLError(f"Missing {sub_option.name}.",
                                   help_topic=self.name)
            cmd_name, sub_name = rest[0].split(":", 1)
            assert cmd_name == self.name
            argv = deepcopy(argv)
            argv[argv.index(rest[0])] = sub_name

            plugin = self.plugins.get(sub_option.subcommand, sub_name)
            if not plugin:
                raise VergeMLError(f"Invalid {sub_option.name}.",
                                   help_topic=self.name)

            cmd = Command.discover(plugin)
            try:
                res = cmd.parse(argv)
                res[sub_option.name] = sub_name
                return res
            except VergeMLError as err:
                err.help_topic = f"{cmd_name}:{sub_name}"
                raise err
        else:
            return None
Esempio n. 6
0
    def __call__(self, args, env):
        trainings_dir = env.get('trainings-dir')
        ais_with_tbstats = []

        if not os.path.exists(trainings_dir):
            raise VergeMLError(
                "No trainings found.",
                "To run tensorboard, please train an AI first.")

        for dir in os.listdir(trainings_dir):
            if dir.startswith("."):
                continue
            stats_dir = os.path.join(trainings_dir, dir, "stats")

            if not os.path.exists(stats_dir):
                continue

            if not any(
                    map(lambda d: d.startswith("events.out"),
                        os.listdir(stats_dir))):
                continue

            ais_with_tbstats.append(dir)

        AIs = args["@AIs"] or ais_with_tbstats

        if not AIs:
            raise VergeMLError(
                "No trainings found.",
                "To run tensorboard, please train an AI first.")

        for AI in AIs:
            if AI not in ais_with_tbstats:
                raise VergeMLError(
                    "Not tensorboard stats found for @{}".format(AI))

        dirs = []
        for AI in AIs:
            dirs.append(AI + ":" + os.path.join(trainings_dir, AI, "stats"))
        dirs = ",".join(dirs)
        cmd = ["tensorboard", "--logdir", dirs, "--port", str(args['port'])]
        if 'host' in args and args['host']:
            cmd.append('--host')
            cmd.append(args['host'])

        url = None

        try:
            for line in _run_command(cmd):
                line = line.decode('utf-8').rstrip()
                match = re.match(r".*?(http:[^ ]*)", line)
                if match:
                    url = match.group(1)
                    webbrowser.open(url)
                print(line)
        except FileNotFoundError:
            raise VergeMLError(
                "Command 'tensorboard' not found.",
                "Please install tensorboard (pip install tensorboard)")
Esempio n. 7
0
 def samples_dir(self):
     """Return the samples_dir or throw an error if it does not exist.
     """
     samples_dir = self._config['samples-dir']
     if not os.path.exists(samples_dir):
         raise VergeMLError(f'Could not find samples directory: {samples_dir}')
     elif not os.path.isdir(samples_dir):
         raise VergeMLError(f'Configured samples-dir is not a directory: {samples_dir}')
     return samples_dir
Esempio n. 8
0
def evaluate_args(cnn, trainings_dir, variant, alpha, size):

    if not cnn.startswith('@') and not cnn in ARCHITECTURES:
        raise VergeMLError("Unknown CNN: {}".format(cnn))
    elif cnn.startswith('@'):
        name = cnn.lstrip('@')
        path = os.path.join(trainings_dir, name, 'checkpoints', 'model.h5')
        if not os.path.isfile(path):
            raise VergeMLError("Unknown CNN: {}".format(cnn))

    if cnn == 'densenet':
        if variant == '*auto*':
            variant = DENSENET_VARIANTS[0]
        if not variant in DENSENET_VARIANTS:
            raise VergeMLError("Invalid densenet variant: {}".format(variant))

    elif cnn == 'mobilenet':
        if size not in MOBILENET_SIZES and size != "*auto*":
            raise VergeMLError("Invalid mobilenet size: {}".format(size))
        if alpha not in MOBILENET_ALPHA_VALUES:
            raise VergeMLError("Invalid alpha value: {}".format(alpha))

    elif cnn == 'mobilenet-v2':
        if size not in MOBILENET_V2_SIZES and size != "*auto*":
            raise VergeMLError("Invalid mobilenet size: {}".format(size))

        if alpha not in MOBILENET_V2_ALPHA_VALUES:
            raise VergeMLError("Invalid alpha value: {}".format(alpha))

    elif cnn == 'nasnet':
        if not variant in NASNET_VARIANTS and variant != '*auto*':
            raise VergeMLError("Invalid nasnet variant: {}".format(variant))
Esempio n. 9
0
def _parse_args(args, env):
    args = args[1]

    comps = []
    for idx, arg in enumerate(args):
        if arg in ('-gt', '-lt', '-eq', '-neq', '-gte', '-lte'):
            start, end = idx - 1, idx + 1
            if start < 0 or end >= len(args):
                raise VergeMLError("Invalid options.", help_topic='list')
            comps.append((start, end))

    cargs = []
    for start, end in reversed(comps):
        cargs.append(args[start:end+1])
        del args[start:end+1]

    cmd = deepcopy(Command.discover(ListCommand))
    cmd.free_form = False
    args.insert(0, 'list')
    args = cmd.parse(args)

    # If existent, read settings from the config file
    config = parse_command(cmd, env.get(cmd.name))

    # Set missing args from the config file
    for k, arg in config.items():
        args.setdefault(k, arg)

    # Set missing args from default
    for opt in cmd.options:
        if opt.name not in args and (opt.default is not None or not opt.is_required()):
            args[opt.name] = opt.default

    return args, cargs
Esempio n. 10
0
    def _load_yaml_and_configure(self, path, label, cache, device, device_memory): # pylint: disable=R0913
        doc = load_yaml_file(path, label)
        try:
            doc['device'] = parse_device(doc.get('device', {}),
                                         device_id=device,
                                         device_memory=device_memory)

            doc['data'] = parse_data(doc.get('data', {}), cache=cache, plugins=self.plugins)

            if 'random-seed' in doc and not isinstance(doc['random-seed'], int):
                raise VergeMLError('Invalid value option random-seed.',
                                   'random-seed must be an integer value.',
                                   hint_type='value',
                                   hint_key='random-seed')
        except VergeMLError as err:

            if err.hint_key:

                with open(path) as file:
                    definition = yaml_find_definition(file, err.hint_key, err.hint_type)

                if definition:
                    line, column, length = definition
                    err.message = display_err_in_file(path, line, column, str(err), length)
                    # clear suggestion because it is already contained in the error message.
                    err.suggestion = None
                    raise err
                else:
                    raise err
            else:
                raise err
        return doc
Esempio n. 11
0
    def split(self, num_samples: int):
        """Split the dataset in train, val and test sets by percentage or absolute count.

        It works by receiving the total number of samples and a configuration
        object, and calculates an array of indices per split.:

        :param num_samples: the total number of samples

        :return: a tuple of indices for (train, val, test)
        """

        val_num = int(num_samples * self.val_perc // 100) if self.val_perc else self.val_num or 0
        test_num = int(num_samples * self.test_perc // 100) if self.test_perc else self.test_num or 0

        if val_num + test_num > num_samples:
            hint_key = None
            hint_type = None

            if self.val_num:
                hint_key = 'val'
                hint_type = 'val-split'
            elif self.test_num:
                hint_key = 'val'
                hint_type = 'test-split'

            raise VergeMLError("There are not enough samples to provide the configured number for the val and test split",
                               "If you use absolute numbers for 'val-split' or 'test-split', try to lower them",
                               help_topic='split', hint_key=hint_key, hint_type=hint_type)

        rng = random.Random(self.random_seed)
        indices = rng.sample(range(num_samples), num_samples)
        val, test, train = indices[:val_num], indices[val_num:val_num + test_num], indices[val_num + test_num:]
        return train, val, test
Esempio n. 12
0
    def __call__(self, args, env):
        samples_dir = env.get('samples-dir')
        for label in ("cat", "dog"):
            dest = os.path.join(samples_dir, label)
            if os.path.exists(dest):
                raise VergeMLError(
                    "Directory {} already exists in samples dir: {}".format(
                        label, dest))
        print("Downloading cats and dogs to {}.".format(samples_dir))
        src_dir = self.download_files([(_URL, "catsdogs.zip")], env)
        path = os.path.join(src_dir, "catsdogs.zip")

        print("Extracting data.")
        zipf = zipfile.ZipFile(path, 'r')
        zipf.extractall(src_dir)
        zipf.close()

        for file, dest in (("PetImages/Dog", "dog"), ("PetImages/Cat", "cat")):
            shutil.copytree(os.path.join(src_dir, file),
                            os.path.join(samples_dir, dest))

        shutil.rmtree(src_dir)

        # WTF?
        os.unlink(os.path.join(samples_dir, "cat", "666.jpg"))
        os.unlink(os.path.join(samples_dir, "dog", "11702.jpg"))

        print("Finished downloading cats and dogs.")
Esempio n. 13
0
def _invalid_option(key, help_topic=None, suggestion=None, kind='value'):
    label = "Invalid value for option" if kind == 'value' else "Invalid option"
    return VergeMLError(f"{label} '{key}'.",
                        suggestion,
                        help_topic=help_topic,
                        hint_type=kind,
                        hint_key=key)
Esempio n. 14
0
 def __init__(self, variants, apply=None):
     super().__init__(apply)
     
     if not isinstance(variants, int):
         raise VergeMLError("The parameter 'variants' of 'augment' must be of type 'int'.")
     
     self.variants = variants
Esempio n. 15
0
def _load_and_configure(file, label, validators):
    doc = load_yaml_file(file, label)
    try:
        doc = apply_config(doc, validators)
        if 'random-seed' in doc and not isinstance(doc['random-seed'], int):
            raise VergeMLError('Invalid value option random-seed.',
                               'random-seed must be an integer value.',
                               hint_type='value',
                               hint_key='random-seed')
    except VergeMLError as e:
        if e.hint_key:
            key, kind = e.hint_key, e.hint_type
            with open(file) as f:
                definition = yaml_find_definition(f, key, kind)
            if definition:
                line, column, length = definition
                message = display_err_in_file(file, line, column, str(e),
                                              length)
                e.message = message
                # clear suggestion because it is already contained in the formatted error message.
                e.suggestion = None
                raise e
            else:
                raise e
        else:
            raise e
    return doc
Esempio n. 16
0
    def _get_classes_from_json(self):

        for filename in ("labels.txt", "classes.json"):
            path = os.path.join(self.samples_dir, filename)
            if not os.path.exists(path):
                raise VergeMLError("{} is missing".format(filename))

            with open(path) as f:
                if filename == "labels.txt":
                    items = filter(
                        None, map(methodcaller("strip"),
                                  f.read().splitlines()))
                    labels = Labels(items)
                else:
                    self.classes = json.load(f)
        files = {}
        # prefix the sample with input_dir
        for k, v in self.classes['files'].items():

            # on windows and linux, separator is /
            path = k.split("/")
            path.insert(0, self.samples_dir)
            fname = os.path.join(*path)
            files[fname] = v

        self.classes['files'] = files
        self.meta['labels'] = labels
Esempio n. 17
0
def _prepare_args(args):
    """Prepare args by appending the project dir and setting defaults.
    """
    args = deepcopy(args)
    project_dir = args.get('project-dir', '')

    if not 'file' in args:
        default_file = os.path.join(project_dir, "vergeml.yaml")
        if os.path.exists(default_file):
            args['file'] = default_file

    if 'file' in args:
        args['project-file'] = args['file']
        del args['file']

    if 'random-seed' in args:
        try:
            args['random-seed'] = int(args['random-seed'])
        except ValueError:
            raise VergeMLError("Invalid value for --random-seed.",
                               "--random-seed must be an integer value.",
                               ('value', 'random-seed'))

    cache_opts = ('none', 'disk', 'mem', 'disk-in', 'mem-in')

    if 'cache' in args:
        if args['cache'] not in cache_opts:
            raise VergeMLError("Invalid value for --cache.",
                               "Must be one of: " + ", ".join(cache_opts),
                               help_topic='cache')

    if 'device' in args:
        if not re.match(r"^(gpu:[0-9]+|gpu|cpu|auto)", args['device']):
            raise VergeMLError(
                "Invalid value for --device.",
                "Please specify a valid device, e.g gpu:0 or cpu.",
                help_topic='device')

    if 'device-memory' in args:
        if not re.match(r"(([1-9]?[0-9]|100)%|(0\.[0-9]+)|1\.0)|auto",
                        args['device-memory']):
            raise VergeMLError(
                "Invalid value for --device-memory.",
                "Please specify device memory as a percentage, e.g. 100%.",
                help_topic='device')

    return args
Esempio n. 18
0
    def __call__(self, args, env):
        samples_dir = env.get('samples-dir')
        if not os.path.exists(samples_dir):
            raise VergeMLError(
                "samples dir does not exist: {}".format(samples_dir))

        plugin = self.plugins.get('vergeml.download', args['dataset'])()
        plugin(args, env)
Esempio n. 19
0
    def _setup_ops(self):
        """Set up ops from env.
        """

        # set up preprocessing operations
        self.ops = []

        for conf in self.env.get('data.preprocess') or []:
            if isinstance(conf, str):
                conf = dict(name=conf)
            else:
                conf = conf.copy()

            # every preprocessing operations needs a name property
            name = conf.get('op', None)
            if not name:
                raise VergeMLError("Name missing in data.preprocess item.")
            del conf['op']

            # instantiate the preprocessing plugin
            plugin = self.plugins.get('vergeml.operation', name)
            if not plugin:
                raise VergeMLError(
                    "preprocess plugin not found: {}".format(name))

            # check arguments
            intro = introspect(plugin)
            mandatory = set(intro.args[1:]).difference(
                set(intro.defaults.keys()))
            missing = set(mandatory).difference(conf.keys())
            unknown = set(conf.keys()).difference(intro.args[1:])

            # TODO type checking

            # report missing or unknown arguments
            if missing:
                msg = "preprocess operation {} is missing argument(s): {}"
                raise VergeMLError(msg.format(name, missing))

            if unknown:
                msg = "preprocess operation {} received unknown argument(s): {}"
                raise VergeMLError(msg.format(name, unknown))

            operation = plugin(**conf)
            self.ops.append(operation)
Esempio n. 20
0
    def _parse_opts(self, rest):
        longopts = []
        shortopts = ""

        for opt in self.options:

            # Arguments and @names are dealt with elsewhere.
            if opt.is_at_option() or opt.is_argument_option():
                continue

            # Prepare getopt syntax for long options.
            if opt.flag:
                assert opt.has_type(str, bool)
                longopts.append(opt.name)
            else:
                longopts.append(opt.name + "=")

            # Getopt for short options
            if opt.short:
                assert opt.short not in shortopts

                if opt.has_type(bool):
                    shortopts += opt.short
                else:
                    shortopts += opt.short + ":"

        try:
            # Run getopt. Returns parsed arguments and leftover.
            args, extra = getopt.getopt(rest, shortopts, longopts)

        except getopt.GetoptError as err:

            # in case of an error hint, display a nicer error message.
            if err.opt:
                cand_s = list(shortopts.replace(":", ""))
                cand_l = list(map(lambda o: o.rstrip("="), longopts))
                suggestion = did_you_mean(cand_s + cand_l, err.opt)
                dashes = '-' if len(err.opt) == 1 else '--'
                raise VergeMLError(f"Invalid option {dashes}{err.opt}",
                                   suggestion,
                                   help_topic=self.name)
            else:
                raise VergeMLError(f"Invalid option.", help_topic=self.name)

        return args, extra
Esempio n. 21
0
def get_custom_architecture(name, trainings_dir, output_layer):
    from keras.models import load_model, Model
    name = name.lstrip("@")
    model = load_model(os.path.join(trainings_dir, name, 'checkpoints', 'model.h5'))
    try:
        if isinstance(output_layer, int):
            layer = model.layers[output_layer]
        else:
            layer = model.get_layer(output_layer)
    except Exception:
        if isinstance(output_layer, int):
            raise VergeMLError(f'output-layer {output_layer} not found - model has only {len(model.layers)} layers.')
        else:
            candidates = list(map(lambda l: l.name, model.layers))
            raise VergeMLError(f'output-layer named {output_layer} not found.',
                               suggestion=did_you_mean(candidates, output_layer))
    model = Model(inputs=model.input, outputs=layer.output)
    return model
Esempio n. 22
0
    def __init__(self, env):
        self.env = env
        self.fns = OrderedDict()
        for model_fn in Command.find_functions(env.model):
            cmd = Command.discover(model_fn)
            if cmd.kind == 'predict':
                self.fns[cmd.name] = (cmd, model_fn)

        if not len(self.fns):
            raise VergeMLError(f"@{env.AI} can't be run as a REST service.")
Esempio n. 23
0
def get_imagenet_architecture(architecture, variant, size, alpha, output_layer, include_top=False, weights='imagenet'):
    from keras import applications, Model

    if include_top:
        assert output_layer == 'last'

    if size == 'auto':
        size = get_image_size(architecture, variant, size)

    shape = (size, size, 3)

    if architecture == 'densenet':
        if variant == 'auto':
            variant = 'densenet-121'
        if variant == 'densenet-121':
            model = applications.DenseNet121(weights=weights, include_top=include_top, input_shape=shape)
        elif variant == 'densenet-169':
            model = applications.DenseNet169(weights=weights, include_top=include_top, input_shape=shape)
        elif variant == 'densenet-201':
            model = applications.DenseNet201(weights=weights, include_top=include_top, input_shape=shape)
    elif architecture == 'inception-resnet-v2':
        model = applications.InceptionResNetV2(weights=weights, include_top=include_top, input_shape=shape)
    elif architecture == 'mobilenet':
        model = applications.MobileNet(weights=weights, include_top=include_top, input_shape=shape, alpha=alpha)
    elif architecture == 'mobilenet-v2':
        model = applications.MobileNetV2(weights=weights, include_top=include_top, input_shape=shape, alpha=alpha)
    elif architecture == 'nasnet':
        if variant == 'auto':
            variant = 'large'
        if variant == 'large':
            model = applications.NASNetLarge(weights=weights, include_top=include_top, input_shape=shape)
        else:
            model = applications.NASNetMobile(weights=weights, include_top=include_top, input_shape=shape)
    elif architecture == 'resnet-50':
        model = applications.ResNet50(weights=weights, include_top=include_top, input_shape=shape)
    elif architecture == 'vgg-16':
        model = applications.VGG16(weights=weights, include_top=include_top, input_shape=shape)
    elif architecture == 'vgg-19':
        model = applications.VGG19(weights=weights, include_top=include_top, input_shape=shape)
    elif architecture == 'xception':
        model = applications.Xception(weights=weights, include_top=include_top, input_shape=shape)
    elif architecture == 'inception-v3':
        model = applications.InceptionV3(weights=weights, include_top=include_top, input_shape=shape)

    if output_layer != 'last':
        try:
            if isinstance(output_layer, int):
                layer = model.layers[output_layer]
            else:
                layer = model.get_layer(output_layer)
        except Exception:
            raise VergeMLError('layer not found: {}'.format(output_layer))
        model = Model(inputs=model.input, outputs=layer.output)

    return model
Esempio n. 24
0
File: pr.py Progetto: mme/vergeml
    def __call__(self, args, env):

        import numpy as np
        import matplotlib.pyplot as plt
        from sklearn.metrics import average_precision_score
        from sklearn.metrics import precision_recall_curve
        from vergeml.plots import load_labels, load_predictions

        try:
            labels = load_labels(env)
        except FileNotFoundError:
            raise VergeMLError("Can't plot PR curve - not supported by model.")

        nclasses = len(labels)
        if args['class'] not in labels:
            raise VergeMLError("Unknown class: " + args['class'])

        try:
            y_test, y_score = load_predictions(env, nclasses)
        except FileNotFoundError:
            raise VergeMLError("Can't plot PR curve - not supported by model.")

        # From:
        # https://scikit-learn.org/stable/auto_examples/model_selection/plot_precision_recall.html#sphx-glr-auto-examples-model-selection-plot-precision-recall-py

        ix = labels.index(args['class'])
        y_test = y_test[:, ix].astype(np.int)
        y_score = y_score[:, ix]

        precision, recall, _ = precision_recall_curve(y_test, y_score)
        average_precision = average_precision_score(y_test, y_score)

        plt.step(recall, precision, color='b', alpha=0.2, where='post')
        plt.fill_between(recall, precision, alpha=0.2, color='b', step='post')

        plt.xlabel('Recall ({})'.format(args['class']))
        plt.ylabel('Precision ({})'.format(args['class']))
        plt.ylim([0.0, 1.05])
        plt.xlim([0.0, 1.0])
        plt.title('Precision-Recall curve for @{0}: AP={1:0.2f}'.format(
            args['@AI'], average_precision))
        plt.show()
Esempio n. 25
0
    def __call__(self, args, env):

        output_dir = args['<directory>']

        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        res = _preview(env.data, output_dir, args['split'], args['num-samples'])

        if not res:
            raise VergeMLError("Command preprocess not supported.")
Esempio n. 26
0
    def transform(self, img, rng):
        width, height = img.size

        if width < self.width:
            raise VergeMLError("Can't crop sample with width {} to {}.".format(
                width, self.width))

        if height < self.height:
            raise VergeMLError(
                "Can't crop sample with height {} to {}.".format(
                    height, self.height))

        if self.x or self.y:

            if width < self.width + self.x:
                raise VergeMLError(
                    "Can't crop sample with width {} to {} from x {}.".format(
                        width, self.width, self.x))
            if height < self.height + self.y:
                raise VergeMLError(
                    "Can't crop sample with height {} to {} from y {}.".format(
                        height, self.height, self.y))

            x = self.x
            y = self.y

        elif self.position == "top-left":
            x, y = 0, 0
        elif self.position == "top-right":
            x, y = width - self.width, 0
        elif self.position == "bottom-left":
            x, y = 0, height - self.height
        elif self.position == "bottom-right":
            x, y = width - self.width, height - self.height
        elif self.position == "center":
            x, y = math.floor(width / 2 -
                              self.width / 2), math.floor(height / 2 -
                                                          self.height / 2)

        params = x, y, x + self.width, y + self.height
        return img.crop(params)
Esempio n. 27
0
    def _setup_input(self):
        """Set up input from env.
        """

        # get the name of the input plugin
        input_name = self.env.get('data.input.type')
        if not input_name:
            raise VergeMLError("data.input.type is not defined.")

        # get input configuration and merge base config
        input_conf = self.env.get('data.input').copy()
        input_conf.update(self._base_env_config())

        # instantiate the input plugin
        input_class = self.plugins.get('vergeml.io', input_name)
        if not input_class:
            raise VergeMLError("input name not found: {}".format(input_name))

        # TODO validate configuration and set defaults
        del input_conf['type']

        self.input = input_class(input_conf)
Esempio n. 28
0
    def _wrap_call(cmd, fun, args, env):
        fn_args = deepcopy(args)

        config_name = cmd.name

        if env.current_command:

            # find the previous command and check for sub option
            sub_option = next(
                filter(lambda c: c.subcommand, env.current_command[0].options),
                None)

            if sub_option and args.get(sub_option.name) == cmd.name:
                # we are a sub command
                config_name = env.current_command[0].name + '.' + cmd.name

        # Free form commands deal with this manually
        if not cmd.free_form:
            # If existent, read settings from the config file
            config = parse_command(cmd, env.get(config_name))

            # Set missing args from the config file
            for k, arg in config.items():
                fn_args.setdefault(k, arg)

            # Set missing args from default
            for opt in cmd.options:
                if opt.name not in fn_args and (opt.default is not None
                                                or not opt.is_required()):
                    fn_args[opt.name] = opt.default

            # When required arguments are missing now, raise an error
            for opt in cmd.options:
                if opt.is_required() and opt.name not in fn_args:

                    # TODO show --name only when called via the command line
                    raise VergeMLError(f'Missing argument --{opt.name}.',
                                       help_topic=cmd.name)

        # Let the environment know about the name of the command being
        # executed
        env.current_command = (cmd, fn_args)

        # Set up defaults for the command. This will also give models a chance
        # to alter the configuration of the environment before command
        # execution.

        env.set_defaults(cmd.name, fn_args)

        return fun(fn_args, env)
Esempio n. 29
0
    def _validate_preprocess(self, value):
        operations = []
        for ix, config in enumerate(value):
            if not isinstance(config, dict):
                raise VergeMLError(f"Invalid entry in preprocess - must be key value pairs.",
                                    "Please fix the entry in the project file.", 
                                    help_topic="preprocess", 
                                    hint_type='key', 
                                    hint_key='data.preprocess.' + str(ix))
            elif not 'op' in config:
                raise VergeMLError(f"Invalid entry in preprocess - missing 'op' key.",
                                    "Please fix the entry in the project file.", 
                                    help_topic="preprocess", 
                                    hint_type='key', 
                                    hint_key='data.preprocess.' + str(ix))
            op_name = config['op']
            plugin = self.plugins.get("vergeml.operation", op_name)
            if not plugin:
                raise VergeMLError(f"Invalid entry in preprocess - unknown operation '{op_name}'.",
                                    "Please fix the entry in the project file.", 
                                    help_topic="preprocess", 
                                    hint_type='value', 
                                    hint_key="data.preprocess.{ix}.op")

            op = Operation.discover(plugin)
            options = list(filter(lambda o: o.name != 'op', op.options))
            validator = ValidatePreprocess(options, op_name, self.plugins)
            config = deepcopy(config)
            del config['op']
            try:
                apply_config(config, {None: validator})
            except VergeMLError as err:
                err.hint_key = "data.preprocess.{ix}." + err.hint_key
                raise err
            validator.values['op'] = op_name
            operations.append(validator.values)
        dict_merge(self.values, dict(data=dict(preprocess=operations)))
Esempio n. 30
0
    def _load_trained_model(self):
        """Load a trained models hyperparameters and results
        """

        train_mod_path = os.path.join(self._config['trainings-dir'], self.trained_model)
        if not os.path.exists(train_mod_path):
            raise VergeMLError("Trained model not found: {}".format(self.trained_model))

        # Merge data.yaml
        data_file = os.path.join(self._config['trainings-dir'], self.trained_model, 'data.yaml')
        if not os.path.exists(data_file):
            raise VergeMLError("data.yaml file not found for {}: {}".format(
                self.trained_model, data_file))

        doc = load_yaml_file(data_file, 'data file')
        self._config.update({
            'hyperparameters': doc.get('hyperparameters', {}),
            'results': doc.get('results', {}),
            'model': doc.get('model')
        })

        self.results = _Results(self, data_file)

        return data_file