Example #1
0
def check_one_shot_input(data, target):
    if not isinstance(target, str):
        raise TypeError("'target' must be of type string.")
    _tkutl._raise_error_if_column_exists(data, target, "data", target)
    if isinstance(data, _tc.SFrame):
        image_column_name = _tkutl._find_only_image_column(data)
        target_column_name = target
        dataset_to_augment = data
    elif isinstance(data, _tc.Image):
        image_column_name = "image"
        target_column_name = "target"
        dataset_to_augment = _tc.SFrame({
            image_column_name: [data],
            target_column_name: [target]
        })
    else:
        raise TypeError("'data' must be of type SFrame or Image.")
    return dataset_to_augment, image_column_name, target_column_name
Example #2
0
def check_one_shot_input(data, target, backgrounds):
    if backgrounds is not None and not(isinstance(backgrounds, _tc.SArray)):
        raise TypeError("'backgrounds' must be None or an SArray.")
    if (isinstance(backgrounds, _tc.SArray) and len(backgrounds) == 0):
        raise _ToolkitError('Unable to train with no background images')
    if not isinstance(target, str):
        raise TypeError("'target' must be of type string.")
    if isinstance(data, _tc.SFrame):
        _tkutl._raise_error_if_column_exists(data, target, "data", target)
        image_column_name = _tkutl._find_only_image_column(data)
        target_column_name = target
        dataset_to_augment = data
    elif isinstance(data, _tc.Image):
        image_column_name = "image"
        target_column_name = "target"
        dataset_to_augment = _tc.SFrame({image_column_name: [data],
                                         target_column_name: [target]})
    else:
        raise TypeError("'data' must be of type SFrame or Image.")
    return dataset_to_augment, image_column_name, target_column_name
Example #3
0
def create(dataset, annotations=None, feature=None, model='darknet-yolo',
           classes=None, max_iterations=0, verbose=True, **kwargs):
    """
    Create a :class:`ObjectDetector` model.

    Parameters
    ----------
    dataset : SFrame
        Input data. The columns named by the ``feature`` and ``annotations``
        parameters will be extracted for training the detector.

    annotations : string
        Name of the column containing the object detection annotations.
        This column should be a list of dictionaries, with each dictionary
        representing a bounding box of an object instance. Here is an example
        of the annotations for a single image with two object instances::

            [{'label': 'dog',
              'type': 'rectangle',
              'coordinates': {'x': 223, 'y': 198,
                              'width': 130, 'height': 230}},
             {'label': 'cat',
              'type': 'rectangle',
              'coordinates': {'x': 40, 'y': 73,
                              'width': 80, 'height': 123}}]

        The value for `x` is the horizontal center of the box paired with
        `width` and `y` is the vertical center of the box paired with `height`.
        'None' (the default) indicates the only list column in `dataset` should
        be used for the annotations.

    feature : string
        Name of the column containing the input images. 'None' (the default)
        indicates the only image column in `dataset` should be used as the
        feature.

    model : string optional
        Object detection model to use:

           - "darknet-yolo" : Fast and medium-sized model

    classes : list optional
        List of strings containing the names of the classes of objects.
        Inferred from the data if not provided.

    max_iterations : int
        The number of training iterations. If 0, then it will be automatically
        be determined based on the amount of data you provide.

    verbose : bool, optional
        If True, print progress updates and model details.

    Returns
    -------
    out : ObjectDetector
        A trained :class:`ObjectDetector` model.

    See Also
    --------
    ObjectDetector

    Examples
    --------
    .. sourcecode:: python

        # Train an object detector model
        >>> model = turicreate.object_detector.create(data)

        # Make predictions on the training set and as column to the SFrame
        >>> data['predictions'] = model.predict(data)

        # Visualize predictions by generating a new column of marked up images
        >>> data['image_pred'] = turicreate.object_detector.util.draw_bounding_boxes(data['image'], data['predictions'])
    """
    _raise_error_if_not_sframe(dataset, "dataset")
    from ._mx_detector import YOLOLoss as _YOLOLoss
    from ._model import tiny_darknet as _tiny_darknet
    from ._sframe_loader import SFrameDetectionIter as _SFrameDetectionIter
    from ._manual_scheduler import ManualScheduler as _ManualScheduler
    import mxnet as _mx
    if len(dataset) == 0:
        raise _ToolkitError('Unable to train on empty dataset')

    _numeric_param_check_range('max_iterations', max_iterations, 0, _six.MAXSIZE)
    start_time = _time.time()

    supported_detectors = ['darknet-yolo']

    if feature is None:
        feature = _tkutl._find_only_image_column(dataset)
        if verbose:
            print("Using '%s' as feature column" % feature)
    if annotations is None:
        annotations = _tkutl._find_only_column_of_type(dataset,
                                                       target_type=list,
                                                       type_name='list',
                                                       col_name='annotations')
        if verbose:
            print("Using '%s' as annotations column" % annotations)

    _raise_error_if_not_detection_sframe(dataset, feature, annotations,
                                         require_annotations=True)

    _tkutl._check_categorical_option_type('model', model,
            supported_detectors)

    base_model = model.split('-', 1)[0]
    ref_model = _pre_trained_models.OBJECT_DETECTION_BASE_MODELS[base_model]()

    params = {
        'anchors': [
            (1.0, 2.0), (1.0, 1.0), (2.0, 1.0),
            (2.0, 4.0), (2.0, 2.0), (4.0, 2.0),
            (4.0, 8.0), (4.0, 4.0), (8.0, 4.0),
            (8.0, 16.0), (8.0, 8.0), (16.0, 8.0),
            (16.0, 32.0), (16.0, 16.0), (32.0, 16.0),
        ],
        'grid_shape': [13, 13],
        'batch_size': 32,
        'aug_resize': 0,
        'aug_rand_crop': 0.9,
        'aug_rand_pad': 0.9,
        'aug_rand_gray': 0.0,
        'aug_aspect_ratio': 1.25,
        'aug_hue': 0.05,
        'aug_brightness': 0.05,
        'aug_saturation': 0.05,
        'aug_contrast': 0.05,
        'aug_horizontal_flip': True,
        'aug_min_object_covered': 0,
        'aug_min_eject_coverage': 0.5,
        'aug_area_range': (.15, 2),
        'aug_pca_noise': 0.0,
        'aug_max_attempts': 20,
        'aug_inter_method': 2,
        'lmb_coord_xy': 10.0,
        'lmb_coord_wh': 10.0,
        'lmb_obj': 100.0,
        'lmb_noobj': 5.0,
        'lmb_class': 2.0,
        'non_maximum_suppression_threshold': 0.45,
        'rescore': True,
        'clip_gradients': 0.025,
        'learning_rate': 1.0e-3,
        'shuffle': True,
    }

    if '_advanced_parameters' in kwargs:
        # Make sure no additional parameters are provided
        new_keys = set(kwargs['_advanced_parameters'].keys())
        set_keys = set(params.keys()) 
        unsupported = new_keys - set_keys
        if unsupported:
            raise _ToolkitError('Unknown advanced parameters: {}'.format(unsupported))

        params.update(kwargs['_advanced_parameters'])

    anchors = params['anchors']
    num_anchors = len(anchors)

    num_gpus = _mxnet_utils.get_num_gpus_in_use(max_devices=params['batch_size'])
    batch_size_each = params['batch_size'] // max(num_gpus, 1)
    # Note, this may slightly alter the batch size to fit evenly on the GPUs
    batch_size = max(num_gpus, 1) * batch_size_each

    grid_shape = params['grid_shape']
    input_image_shape = (3,
                         grid_shape[0] * ref_model.spatial_reduction,
                         grid_shape[1] * ref_model.spatial_reduction)

    try:
        instances = (dataset.stack(annotations, new_column_name='_bbox', drop_na=True)
                            .unpack('_bbox', limit=['label']))
    except (TypeError, RuntimeError):
        # If this fails, the annotation format isinvalid at the coarsest level
        raise _ToolkitError("Annotations format is invalid. Must be a list of "
                            "dictionaries containing 'label' and 'coordinates'.")
    num_images = len(dataset)
    num_instances = len(instances)
    if classes is None:
        classes = instances['_bbox.label'].unique()
    classes = sorted(classes)

    # Make a class-to-index look-up table
    class_to_index = {name: index for index, name in enumerate(classes)}
    num_classes = len(classes)

    # Create data loader
    loader = _SFrameDetectionIter(dataset,
                                  batch_size=batch_size,
                                  input_shape=input_image_shape[1:],
                                  output_shape=grid_shape,
                                  anchors=anchors,
                                  class_to_index=class_to_index,
                                  aug_params=params,
                                  shuffle=params['shuffle'],
                                  loader_type='augmented',
                                  feature_column=feature,
                                  annotations_column=annotations)

    # Predictions per anchor box: x/y + w/h + object confidence + class probs
    preds_per_box = 5 + num_classes
    output_size = preds_per_box * num_anchors
    ymap_shape = (batch_size_each,) + tuple(grid_shape) + (num_anchors, preds_per_box)

    net = _tiny_darknet(output_size=output_size)

    loss = _YOLOLoss(input_shape=input_image_shape[1:],
                     output_shape=grid_shape,
                     batch_size=batch_size_each,
                     num_classes=num_classes,
                     anchors=anchors,
                     parameters=params)

    base_lr = params['learning_rate']
    if max_iterations == 0:
        # Set number of iterations through a heuristic
        num_iterations_raw = 5000 * _np.sqrt(num_instances) / batch_size
        num_iterations = 1000 * max(1, int(round(num_iterations_raw / 1000)))
    else:
        num_iterations = max_iterations

    steps = [num_iterations // 2, 3 * num_iterations // 4, num_iterations]
    steps_and_factors = [(step, 10**(-i)) for i, step in enumerate(steps)]

    steps, factors = zip(*steps_and_factors)
    lr_scheduler = _ManualScheduler(step=steps, factor=factors)

    ctx = _mxnet_utils.get_mxnet_context(max_devices=batch_size)

    net_params = net.collect_params()
    net_params.initialize(_mx.init.Xavier(), ctx=ctx)
    net_params['conv7_weight'].initialize(_mx.init.Xavier(factor_type='avg'), ctx=ctx, force_reinit=True)
    net_params['conv8_weight'].initialize(_mx.init.Uniform(0.00005), ctx=ctx, force_reinit=True)
    # Initialize object confidence low, preventing an unnecessary adjustment
    # period toward conservative estimates
    bias = _np.zeros(output_size, dtype=_np.float32)
    bias[4::preds_per_box] -= 6
    from ._mx_detector import ConstantArray
    net_params['conv8_bias'].initialize(ConstantArray(bias), ctx, force_reinit=True)

    # Take a subset and then load the rest of the parameters. It is possible to
    # do allow_missing=True directly on net_params. However, this will more
    # easily hide bugs caused by names getting out of sync.
    ref_model.available_parameters_subset(net_params).load(ref_model.model_path, ctx)

    options = {'learning_rate': base_lr, 'lr_scheduler': lr_scheduler,
               'momentum': 0.9, 'wd': 0.00005, 'rescale_grad': 1.0}
    clip_grad = params.get('clip_gradients')
    if clip_grad:
        options['clip_gradient'] = clip_grad

    trainer = _mx.gluon.Trainer(net.collect_params(), 'sgd', options)

    iteration = 0
    smoothed_loss = None
    last_time = 0
    while iteration < num_iterations:
        loader.reset()
        for batch in loader:
            data = _mx.gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0)
            label = _mx.gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0)

            Ls = []
            with _mx.autograd.record():
                for x, y in zip(data, label):
                    z = net(x)
                    z0 = _mx.nd.transpose(z, [0, 2, 3, 1]).reshape(ymap_shape)
                    L = loss(z0, y)
                    Ls.append(L)
                for L in Ls:
                    L.backward()

            cur_loss = _np.mean([L.asnumpy()[0] for L in Ls])
            if smoothed_loss is None:
                smoothed_loss = cur_loss
            else:
                smoothed_loss = 0.9 * smoothed_loss + 0.1 * cur_loss
            trainer.step(1)
            iteration += 1
            cur_time = _time.time()
            if verbose and cur_time > last_time + 10:
                print('{now:%Y-%m-%d %H:%M:%S}  Training {cur_iter:{width}d}/{num_iterations:{width}d}  Loss {loss:6.3f}'.format(
                    now=_datetime.now(), cur_iter=iteration, num_iterations=num_iterations,
                    loss=smoothed_loss, width=len(str(num_iterations))))
                last_time = cur_time
            if iteration == num_iterations:
                break

    training_time = _time.time() - start_time

    # Save the model
    state = {
        '_model': net,
        '_class_to_index': class_to_index,
        '_training_time_as_string': _seconds_as_string(training_time),
        '_grid_shape': grid_shape,
        'anchors': anchors,
        'model': model,
        'classes': classes,
        'batch_size': batch_size,
        'input_image_shape': input_image_shape,
        'feature': feature,
        'non_maximum_suppression_threshold': params['non_maximum_suppression_threshold'],
        'annotations': annotations,
        'num_classes': num_classes,
        'num_examples': num_images,
        'num_bounding_boxes': num_instances,
        'training_time': training_time,
        'training_epochs': loader.cur_epoch,
        'training_iterations': iteration,
        'max_iterations': max_iterations,
        'training_loss': smoothed_loss,
    }
    return ObjectDetector(state)
Example #4
0
def create(
        dataset,
        target,
        feature=None,
        model='resnet-50',
        l2_penalty=0.01,
        l1_penalty=0.0,
        solver='auto',
        feature_rescaling=True,
        convergence_threshold=_DEFAULT_SOLVER_OPTIONS['convergence_threshold'],
        step_size=_DEFAULT_SOLVER_OPTIONS['step_size'],
        lbfgs_memory_level=_DEFAULT_SOLVER_OPTIONS['lbfgs_memory_level'],
        max_iterations=_DEFAULT_SOLVER_OPTIONS['max_iterations'],
        class_weights=None,
        validation_set='auto',
        verbose=True,
        seed=None,
        batch_size=64):
    """
    Create a :class:`ImageClassifier` model.

    Parameters
    ----------
    dataset : SFrame
        Input data. The column named by the 'feature' parameter will be
        extracted for modeling.

    target : string, or int
        Name of the column containing the target variable. The values in this
        column must be of string or integer type. String target variables are
        automatically mapped to integers in the order in which they are provided.
        For example, a target variable with 'cat' and 'dog' as possible
        values is mapped to 0 and 1 respectively with 0 being the base class
        and 1 being the reference class. Use `model.classes` to retrieve
        the order in which the classes are mapped.

    feature : string, optional
        indicates that the SFrame has only column of Image type and that will
        Name of the column containing the input images. 'None' (the default)
        indicates the only image column in `dataset` should be used as the
        feature.

    l2_penalty : float, optional
        Weight on l2 regularization of the model. The larger this weight, the
        more the model coefficients shrink toward 0. This introduces bias into
        the model but decreases variance, potentially leading to better
        predictions. The default value is 0.01; setting this parameter to 0
        corresponds to unregularized logistic regression. See the ridge
        regression reference for more detail.

    l1_penalty : float, optional
        Weight on l1 regularization of the model. Like the l2 penalty, the
        higher the l1 penalty, the more the estimated coefficients shrink toward
        0. The l1 penalty, however, completely zeros out sufficiently small
        coefficients, automatically indicating features that are not useful
        for the model. The default weight of 0 prevents any features from
        being discarded. See the LASSO regression reference for more detail.

    solver : string, optional
        Name of the solver to be used to solve the regression. See the
        references for more detail on each solver. Available solvers are:

        - *auto (default)*: automatically chooses the best solver for the data
          and model parameters.
        - *newton*: Newton-Raphson
        - *lbfgs*: limited memory BFGS
        - *fista*: accelerated gradient descent

        For this model, the Newton-Raphson method is equivalent to the
        iteratively re-weighted least squares algorithm. If the l1_penalty is
        greater than 0, use the 'fista' solver.

        The model is trained using a carefully engineered collection of methods
        that are automatically picked based on the input data. The ``newton``
        method  works best for datasets with plenty of examples and few features
        (long datasets). Limited memory BFGS (``lbfgs``) is a robust solver for
        wide datasets (i.e datasets with many coefficients).  ``fista`` is the
        default solver for l1-regularized linear regression. The solvers are all
        automatically tuned and the default options should function well. See
        the solver options guide for setting additional parameters for each of
        the solvers.

        See the user guide for additional details on how the solver is chosen.
        (see `here
        <https://apple.github.io/turicreate/docs/userguide/supervised-learning/linear-regression.html>`_)

    feature_rescaling : boolean, optional
        Feature rescaling is an important pre-processing step that ensures that
        all features are on the same scale. An l2-norm rescaling is performed
        to make sure that all features are of the same norm. Categorical
        features are also rescaled by rescaling the dummy variables that are
        used to represent them. The coefficients are returned in original scale
        of the problem. This process is particularly useful when features
        vary widely in their ranges.

    convergence_threshold : float, optional
        Convergence is tested using variation in the training objective. The
        variation in the training objective is calculated using the difference
        between the objective values between two steps. Consider reducing this
        below the default value (0.01) for a more accurately trained model.
        Beware of overfitting (i.e a model that works well only on the training
        data) if this parameter is set to a very low value.

    lbfgs_memory_level : float, optional
        The L-BFGS algorithm keeps track of gradient information from the
        previous ``lbfgs_memory_level`` iterations. The storage requirement for
        each of these gradients is the ``num_coefficients`` in the problem.
        Increasing the ``lbfgs_memory_level ``can help improve the quality of
        the model trained. Setting this to more than ``max_iterations`` has the
        same effect as setting it to ``max_iterations``.

    model : string optional
        Uses a pretrained model to bootstrap an image classifier:

           - "resnet-50" : Uses a pretrained resnet model.
                           Exported Core ML model will be ~90M.

           - "squeezenet_v1.1" : Uses a pretrained squeezenet model.
                                 Exported Core ML model will be ~4.7M.

           - "VisionFeaturePrint_Scene": Uses an OS internal feature extractor.
                                          Only on available on iOS 12.0+,
                                          macOS 10.14+ and tvOS 12.0+.
                                          Exported Core ML model will be ~41K.

        Models are downloaded from the internet if not available locally. Once
        downloaded, the models are cached for future use.

    step_size : float, optional
        The starting step size to use for the ``fista`` solver. The default is
        set to 1.0, this is an aggressive setting. If the first iteration takes
        a considerable amount of time, reducing this parameter may speed up
        model training.

    class_weights : {dict, `auto`}, optional
        Weights the examples in the training data according to the given class
        weights. If set to `None`, all classes are supposed to have weight one. The
        `auto` mode set the class weight to be inversely proportional to number of
        examples in the training data with the given class.

    validation_set : SFrame, optional
        A dataset for monitoring the model's generalization performance.
        The format of this SFrame must be the same as the training set.
        By default this argument is set to 'auto' and a validation set is
        automatically sampled and used for progress printing. If
        validation_set is set to None, then no additional metrics
        are computed. The default value is 'auto'.

    max_iterations : int, optional
        The maximum number of allowed passes through the data. More passes over
        the data can result in a more accurately trained model. Consider
        increasing this (the default value is 10) if the training accuracy is
        low and the *Grad-Norm* in the display is large.

    verbose : bool, optional
        If True, prints progress updates and model details.

    seed : int, optional
        Seed for random number generation. Set this value to ensure that the
        same model is created every time.

    batch_size : int, optional
        If you are getting memory errors, try decreasing this value. If you
        have a powerful computer, increasing this value may improve performance.

    Returns
    -------
    out : ImageClassifier
        A trained :class:`ImageClassifier` model.

    Examples
    --------
    .. sourcecode:: python

        >>> model = turicreate.image_classifier.create(data, target='is_expensive')

        # Make predictions (in various forms)
        >>> predictions = model.predict(data)      # predictions
        >>> predictions = model.classify(data)     # predictions with confidence
        >>> predictions = model.predict_topk(data) # Top-5 predictions (multiclass)

        # Evaluate the model with ground truth data
        >>> results = model.evaluate(data)

    See Also
    --------
    ImageClassifier
    """
    start_time = _time.time()

    # Check model parameter
    allowed_models = list(_pre_trained_models.MODELS.keys())
    if _mac_ver() >= (10, 14):
        allowed_models.append('VisionFeaturePrint_Scene')

        # Also, to make sure existing code doesn't break, replace incorrect name
        # with the correct name version
        if model == "VisionFeaturePrint_Screen":
            print(
                "WARNING: Correct spelling of model name is VisionFeaturePrint_Scene; VisionFeaturePrint_Screen will be removed in subsequent versions."
            )
            model = "VisionFeaturePrint_Scene"

    _tkutl._check_categorical_option_type('model', model, allowed_models)

    # Check dataset parameter
    if len(dataset) == 0:
        raise _ToolkitError('Unable to train on empty dataset')
    if (feature is not None) and (feature not in dataset.column_names()):
        raise _ToolkitError("Image feature column '%s' does not exist" %
                            feature)
    if target not in dataset.column_names():
        raise _ToolkitError("Target column '%s' does not exist" % target)

    if (batch_size < 1):
        raise ValueError("'batch_size' must be greater than or equal to 1")

    if not (isinstance(validation_set, _tc.SFrame) or validation_set == 'auto'
            or validation_set is None):
        raise TypeError("Unrecognized value for 'validation_set'.")

    if feature is None:
        feature = _tkutl._find_only_image_column(dataset)

    feature_extractor = _image_feature_extractor._create_feature_extractor(
        model)

    # Extract features
    extracted_features = _tc.SFrame({
        target:
        dataset[target],
        '__image_features__':
        feature_extractor.extract_features(dataset,
                                           feature,
                                           verbose=verbose,
                                           batch_size=batch_size),
    })
    if isinstance(validation_set, _tc.SFrame):
        extracted_features_validation = _tc.SFrame({
            target:
            validation_set[target],
            '__image_features__':
            feature_extractor.extract_features(validation_set,
                                               feature,
                                               verbose=verbose,
                                               batch_size=batch_size),
        })
    else:
        extracted_features_validation = validation_set

    # Train a classifier using the extracted features
    extracted_features[target] = dataset[target]
    lr_model = _tc.logistic_classifier.create(
        extracted_features,
        features=['__image_features__'],
        target=target,
        max_iterations=max_iterations,
        validation_set=extracted_features_validation,
        seed=seed,
        verbose=verbose,
        l2_penalty=l2_penalty,
        l1_penalty=l1_penalty,
        solver=solver,
        feature_rescaling=feature_rescaling,
        convergence_threshold=convergence_threshold,
        step_size=step_size,
        lbfgs_memory_level=lbfgs_memory_level,
        class_weights=class_weights)

    # set input image shape
    if model in _pre_trained_models.MODELS:
        input_image_shape = _pre_trained_models.MODELS[model].input_image_shape
    else:  # model == VisionFeaturePrint_Scene
        input_image_shape = (3, 299, 299)

    # Save the model
    state = {
        'classifier': lr_model,
        'model': model,
        'max_iterations': max_iterations,
        'feature_extractor': feature_extractor,
        'input_image_shape': input_image_shape,
        'target': target,
        'feature': feature,
        'num_features': 1,
        'num_classes': lr_model.num_classes,
        'classes': lr_model.classes,
        'num_examples': lr_model.num_examples,
        'training_time': _time.time() - start_time,
        'training_loss': lr_model.training_loss,
    }
    return ImageClassifier(state)
Example #5
0
def create(dataset,
           label=None,
           feature=None,
           model="resnet-50",
           verbose=True,
           batch_size=64):
    """
    Create a :class:`ImageSimilarityModel` model.

    Parameters
    ----------
    dataset : SFrame
        Input data. The column named by the 'feature' parameter will be
        extracted for modeling.

    label : string
        Name of the SFrame column with row labels to be used as uuid's to
        identify the data. If 'label' is set to None, row numbers are used to
        identify reference dataset rows when the model is queried.

    feature : string
        Name of the column containing the input images. 'None' (the default)
        indicates that the SFrame has only one column of Image type and that will
        be used for similarity.

    model: string, optional
        Uses a pretrained model to bootstrap an image similarity model

           - "resnet-50" : Uses a pretrained resnet model.

           - "squeezenet_v1.1" : Uses a pretrained squeezenet model.

           - "VisionFeaturePrint_Scene": Uses an OS internal feature extractor.
                                          Only on available on iOS 12.0+,
                                          macOS 10.14+ and tvOS 12.0+.

        Models are downloaded from the internet if not available locally. Once
        downloaded, the models are cached for future use.

    verbose : bool, optional
        If True, print progress updates and model details.

    batch_size : int, optional
        If you are getting memory errors, try decreasing this value. If you
        have a powerful computer, increasing this value may improve performance.

    Returns
    -------
    out : ImageSimilarityModel
        A trained :class:`ImageSimilarityModel` model.

    See Also
    --------
    ImageSimilarityModel

    Examples
    --------
    .. sourcecode:: python

        # Train an image similarity model
        >>> model = turicreate.image_similarity.create(data)

        # Query the model for similar images
        >>> similar_images = model.query(data)
        +-------------+-----------------+-------------------+------+
        | query_label | reference_label |      distance     | rank |
        +-------------+-----------------+-------------------+------+
        |      0      |        0        |        0.0        |  1   |
        |      0      |       519       |   12.5319706301   |  2   |
        |      0      |       1619      |   12.5563764596   |  3   |
        |      0      |       186       |   12.6132604915   |  4   |
        |      0      |       1809      |   12.9180964745   |  5   |
        |      1      |        1        | 2.02304872852e-06 |  1   |
        |      1      |       1579      |   11.4288186151   |  2   |
        |      1      |       1237      |   12.3764325949   |  3   |
        |      1      |        80       |   12.7264363676   |  4   |
        |      1      |        58       |   12.7675058558   |  5   |
        +-------------+-----------------+-------------------+------+
        [500 rows x 4 columns]
    """
    start_time = _time.time()
    if not isinstance(dataset, _tc.SFrame):
        raise TypeError("'dataset' must be of type SFrame.")

    # Check parameters
    allowed_models = list(_pre_trained_models.IMAGE_MODELS.keys())
    if _mac_ver() >= (10, 14):
        allowed_models.append("VisionFeaturePrint_Scene")

        # Also, to make sure existing code doesn't break, replace incorrect name
        # with the correct name version
        if model == "VisionFeaturePrint_Screen":
            print(
                "WARNING: Correct spelling of model name is VisionFeaturePrint_Scene.  VisionFeaturePrint_Screen will be removed in future releases."
            )
            model = "VisionFeaturePrint_Scene"

    _tkutl._check_categorical_option_type("model", model, allowed_models)
    if len(dataset) == 0:
        raise _ToolkitError("Unable to train on empty dataset")
    if (label is not None) and (label not in dataset.column_names()):
        raise _ToolkitError("Row label column '%s' does not exist" % label)
    if (feature is not None) and (feature not in dataset.column_names()):
        raise _ToolkitError("Image feature column '%s' does not exist" %
                            feature)
    if batch_size < 1:
        raise ValueError("'batch_size' must be greater than or equal to 1")

    # Set defaults
    if feature is None:
        feature = _tkutl._find_only_image_column(dataset)

    feature_extractor = _image_feature_extractor._create_feature_extractor(
        model)

    # Extract features
    extracted_features = _tc.SFrame({
        "__image_features__":
        feature_extractor.extract_features(dataset,
                                           feature,
                                           verbose=verbose,
                                           batch_size=batch_size),
    })

    # Train a similarity model using the extracted features
    if label is not None:
        extracted_features[label] = dataset[label]
    nn_model = _tc.nearest_neighbors.create(
        extracted_features,
        label=label,
        features=["__image_features__"],
        verbose=verbose,
    )

    # set input image shape
    if model in _pre_trained_models.IMAGE_MODELS:
        input_image_shape = _pre_trained_models.IMAGE_MODELS[
            model].input_image_shape
    else:  # model == VisionFeaturePrint_Scene
        input_image_shape = (3, 299, 299)

    # Save the model
    state = {
        "similarity_model": nn_model,
        "model": model,
        "feature_extractor": feature_extractor,
        "input_image_shape": input_image_shape,
        "label": label,
        "feature": feature,
        "num_features": 1,
        "num_examples": nn_model.num_examples,
        "training_time": _time.time() - start_time,
    }
    return ImageSimilarityModel(state)
Example #6
0
def annotate(data, image_column=None, annotation_column="annotations"):
    """
    Annotate images using a GUI assisted application. When the GUI is
    terminated an SFrame with the representative images and annotations is
    returned.

    Parameters
    ----------
    data : SArray | SFrame
        The data containing the input images.

    image_column: string, optional
        The name of the input column in the SFrame that contains the image that
        needs to be annotated. In case `data` is of type SArray, then the
        output SFrame contains a column (with this name) containing the input
        images.

    annotation_column : string, optional
        The column containing the annotations in the output SFrame.

    Returns
    -------
    out : SFrame
        A new SFrame that contains the newly annotated data.

    Examples
    --------
    >>> import turicreate as tc
    >>> images = tc.image_analysis.load_images("path/to/images")
    >>> print(images)
        +------------------------+--------------------------+
        |          path          |          image           |
        +------------------------+--------------------------+
        | /Users/username/Doc... | Height: 1712 Width: 1952 |
        | /Users/username/Doc... | Height: 1386 Width: 1000 |
        | /Users/username/Doc... |  Height: 536 Width: 858  |
        | /Users/username/Doc... | Height: 1512 Width: 2680 |
        +------------------------+--------------------------+
        [4 rows x 2 columns]

    >>> images = tc.image_classifier.annotate(images)
    >>> print(images)
        +------------------------+--------------------------+-------------------+
        |          path          |          image           |    annotations    |
        +------------------------+--------------------------+-------------------+
        | /Users/username/Doc... | Height: 1712 Width: 1952 |        dog        |
        | /Users/username/Doc... | Height: 1386 Width: 1000 |        dog        |
        | /Users/username/Doc... |  Height: 536 Width: 858  |        cat        |
        | /Users/username/Doc... | Height: 1512 Width: 2680 |       mouse       |
        +------------------------+--------------------------+-------------------+
        [4 rows x 3 columns]

    """
    # Check Value of Column Variables
    if not isinstance(data, __tc.SFrame):
        raise TypeError('"data" must be of type SFrame.')

    # Check if Value is Empty
    if data.num_rows() == 0:
        raise Exception("input data cannot be empty")

    if image_column == None:
        image_column = _tkutl._find_only_image_column(data)

    if image_column == None:
        raise ValueError("'image_column' cannot be 'None'")

    if type(image_column) != str:
        raise TypeError("'image_column' has to be of type 'str'")

    if annotation_column == None:
        annotation_column = ""

    if type(annotation_column) != str:
        raise TypeError("'annotation_column' has to be of type 'str'")

    # Check Data Structure
    if type(data) == __tc.data_structures.image.Image:
        data = __tc.SFrame({image_column: __tc.SArray([data])})

    elif type(data) == __tc.data_structures.sframe.SFrame:
        if data.shape[0] == 0:
            return data
        if not (data[image_column].dtype == __tc.data_structures.image.Image):
            raise TypeError("'data[image_column]' must be an SFrame or SArray")

    elif type(data) == __tc.data_structures.sarray.SArray:
        if data.shape[0] == 0:
            return data

        data = __tc.SFrame({image_column: data})
    else:
        raise TypeError("'data' must be an SFrame or SArray")

    annotation_window = __tc.extensions.create_image_classification_annotation(
        data, [image_column], annotation_column)

    with _QuietProgress(False):
        annotation_window.annotate(_get_client_app_path())
        return annotation_window.returnAnnotations()
Example #7
0
def create(dataset, target, feature = None, model = 'resnet-50',
           validation_set='auto', max_iterations = 10, verbose = True,
           seed = None, batch_size=64):
    """
    Create a :class:`ImageClassifier` model.

    Parameters
    ----------
    dataset : SFrame
        Input data. The column named by the 'feature' parameter will be
        extracted for modeling.

    target : string, or int
        Name of the column containing the target variable. The values in this
        column must be of string or integer type. String target variables are
        automatically mapped to integers in the order in which they are provided.
        For example, a target variable with 'cat' and 'dog' as possible
        values is mapped to 0 and 1 respectively with 0 being the base class
        and 1 being the reference class. Use `model.classes` to retrieve
        the order in which the classes are mapped.

    feature : string, optional
        indicates that the SFrame has only column of Image type and that will
        Name of the column containing the input images. 'None' (the default)
        indicates the only image column in `dataset` should be used as the
        feature.

    model : string optional
        Uses a pretrained model to bootstrap an image classifier:

           - "resnet-50" : Uses a pretrained resnet model.
                           Exported Core ML model will be ~90M.

           - "squeezenet_v1.1" : Uses a pretrained squeezenet model.
                                 Exported Core ML model will be ~4.7M.

           - "VisionFeaturePrint_Screen": Uses an OS internal feature extractor.
                                          Only on available on iOS 12.0+,
                                          macOS 10.14+ and tvOS 12.0+.
                                          Exported Core ML model will be ~41K.

        Models are downloaded from the internet if not available locally. Once
        downloaded, the models are cached for future use.

    validation_set : SFrame, optional
        A dataset for monitoring the model's generalization performance.
        The format of this SFrame must be the same as the training set.
        By default this argument is set to 'auto' and a validation set is
        automatically sampled and used for progress printing. If
        validation_set is set to None, then no additional metrics
        are computed. The default value is 'auto'.

    max_iterations : float, optional
        The maximum number of allowed passes through the data. More passes over
        the data can result in a more accurately trained model. Consider
        increasing this (the default value is 10) if the training accuracy is
        low and the *Grad-Norm* in the display is large.

    verbose : bool, optional
        If True, prints progress updates and model details.

    seed : int, optional
        Seed for random number generation. Set this value to ensure that the
        same model is created every time.

    batch_size : int, optional
        If you are getting memory errors, try decreasing this value. If you
        have a powerful computer, increasing this value may improve performance.

    Returns
    -------
    out : ImageClassifier
        A trained :class:`ImageClassifier` model.

    Examples
    --------
    .. sourcecode:: python

        >>> model = turicreate.image_classifier.create(data, target='is_expensive')

        # Make predictions (in various forms)
        >>> predictions = model.predict(data)      # predictions
        >>> predictions = model.classify(data)     # predictions with confidence
        >>> predictions = model.predict_topk(data) # Top-5 predictions (multiclass)

        # Evaluate the model with ground truth data
        >>> results = model.evaluate(data)

    See Also
    --------
    ImageClassifier
    """
    start_time = _time.time()

    # Check model parameter
    allowed_models = list(_pre_trained_models.MODELS.keys())
    if _mac_ver() >= (10,14):
        allowed_models.append('VisionFeaturePrint_Screen')
    _tkutl._check_categorical_option_type('model', model, allowed_models)

    # Check dataset parameter
    if len(dataset) == 0:
        raise _ToolkitError('Unable to train on empty dataset')
    if (feature is not None) and (feature not in dataset.column_names()):
        raise _ToolkitError("Image feature column '%s' does not exist" % feature)
    if target not in dataset.column_names():
        raise _ToolkitError("Target column '%s' does not exist" % target)

    if(batch_size < 1):
        raise ValueError("'batch_size' must be greater than or equal to 1")

    if not (isinstance(validation_set, _tc.SFrame) or validation_set == 'auto' or validation_set is None):
        raise TypeError("Unrecognized value for 'validation_set'.")

    if feature is None:
        feature = _tkutl._find_only_image_column(dataset)

    feature_extractor = _image_feature_extractor._create_feature_extractor(model)

    # Extract features
    extracted_features = _tc.SFrame({
        target: dataset[target],
        '__image_features__': feature_extractor.extract_features(dataset, feature, verbose=verbose, batch_size=batch_size),
        })
    if isinstance(validation_set, _tc.SFrame):
        extracted_features_validation = _tc.SFrame({
            target: validation_set[target],
            '__image_features__': feature_extractor.extract_features(validation_set, feature, verbose=verbose, batch_size=batch_size),
        })
    else:
        extracted_features_validation = validation_set

    # Train a classifier using the extracted features
    extracted_features[target] = dataset[target]
    lr_model = _tc.logistic_classifier.create(extracted_features,
                                              features=['__image_features__'],
                                              target=target,
                                              max_iterations=max_iterations,
                                              validation_set=extracted_features_validation,
                                              seed=seed,
                                              verbose=verbose)

    # set input image shape
    if model in _pre_trained_models.MODELS:
        input_image_shape = _pre_trained_models.MODELS[model].input_image_shape
    else:    # model == VisionFeaturePrint_Screen
        input_image_shape = (3, 299, 299)

    # Save the model
    state = {
        'classifier': lr_model,
        'model': model,
        'max_iterations': max_iterations,
        'feature_extractor': feature_extractor,
        'input_image_shape': input_image_shape,
        'target': target,
        'feature': feature,
        'num_features': 1,
        'num_classes': lr_model.num_classes,
        'classes': lr_model.classes,
        'num_examples': lr_model.num_examples,
        'training_time': _time.time() - start_time,
        'training_loss': lr_model.training_loss,
    }
    return ImageClassifier(state)
Example #8
0
def create(dataset,
           target,
           feature=None,
           model='resnet-50',
           max_iterations=10,
           verbose=True,
           seed=None):
    """
    Create a :class:`ImageClassifier` model.

    Parameters
    ----------
    dataset : SFrame
        Input data. The column named by the 'feature' parameter will be
        extracted for modeling.

    target : string, or int
        Name of the column containing the target variable. The values in this
        column must be of string or integer type. String target variables are
        automatically mapped to integers in the order in which they are provided.
        For example, a target variable with 'cat' and 'dog' as possible
        values is mapped to 0 and 1 respectively with 0 being the base class
        and 1 being the reference class. Use `model.classes` to retrieve
        the order in which the classes are mapped.

    feature : string, optional
        indicates that the SFrame has only column of Image type and that will
        Name of the column containing the input images. 'None' (the default)
        indicates the only image column in `dataset` should be used as the
        feature.

    model : string optional
        Uses a pretrained model to bootstrap an image classifier

           - "resnet-50" : Uses a pretrained resnet model.
           - "squeezenet_v1.1" : Uses a pretrained squeezenet model.

        Models are downloaded from the internet if not available locally. Once
        downloaded, the models are cached for future use.

    max_iterations : float, optional
        The maximum number of allowed passes through the data. More passes over
        the data can result in a more accurately trained model. Consider
        increasing this (the default value is 10) if the training accuracy is
        low and the *Grad-Norm* in the display is large.

    verbose : bool, optional
        If True, prints progress updates and model details.

    seed : int, optional
        Seed for random number generation. Set this value to ensure that the
        same model is created every time.

    Returns
    -------
    out : ImageClassifier
        A trained :class:`ImageClassifier` model.

    Examples
    --------
    .. sourcecode:: python

        >>> model = turicreate.image_classifier.create(data, target='is_expensive')

        # Make predictions (in various forms)
        >>> predictions = model.predict(data)      # predictions
        >>> predictions = model.classify(data)     # predictions with confidence
        >>> predictions = model.predict_topk(data) # Top-5 predictions (multiclass)

        # Evaluate the model with ground truth data
        >>> results = model.evaluate(data)

    See Also
    --------
    ImageClassifier
    """
    start_time = _time.time()

    # Check parameters
    _tkutl._check_categorical_option_type('model', model,
                                          _pre_trained_models.MODELS.keys())
    if len(dataset) == 0:
        raise _ToolkitError('Unable to train on empty dataset')
    if (feature is not None) and (feature not in dataset.column_names()):
        raise _ToolkitError("Image feature column '%s' does not exist" %
                            feature)
    if target not in dataset.column_names():
        raise _ToolkitError("Target column '%s' does not exist" % target)

    if feature is None:
        feature = _tkutl._find_only_image_column(dataset)

    # Load pre-trained model & feature extractor
    ptModel = _pre_trained_models.MODELS[model]()
    feature_extractor = _image_feature_extractor.MXFeatureExtractor(ptModel)

    # Extract features
    extracted_features = _tc.SFrame({
        target:
        dataset[target],
        '__image_features__':
        feature_extractor.extract_features(dataset, feature, verbose=verbose),
    })

    # Train a classifier using the extracted features
    extracted_features[target] = dataset[target]
    lr_model = _tc.logistic_classifier.create(extracted_features,
                                              features=['__image_features__'],
                                              target=target,
                                              max_iterations=max_iterations,
                                              seed=seed,
                                              verbose=verbose)

    # Save the model
    state = {
        'classifier': lr_model,
        'model': model,
        'max_iterations': max_iterations,
        'feature_extractor': feature_extractor,
        'input_image_shape': ptModel.input_image_shape,
        'target': target,
        'feature': feature,
        'num_features': 1,
        'num_classes': lr_model.num_classes,
        'classes': lr_model.classes,
        'num_examples': lr_model.num_examples,
        'training_time': _time.time() - start_time,
        'training_loss': lr_model.training_loss,
    }
    return ImageClassifier(state)
Example #9
0
def create(dataset, label=None, feature=None, model='resnet-50', verbose=True):
    """
    Create a :class:`ImageSimilarityModel` model.

    Parameters
    ----------
    dataset : SFrame
        Input data. The column named by the 'feature' parameter will be
        extracted for modeling.

    label : string
        Name of the SFrame column with row labels to be used as uuid's to
        identify the data. If 'label' is set to None, row numbers are used to
        identify reference dataset rows when the model is queried.

    feature : string
        indicates that the SFrame has only column of Image type and that will
        Name of the column containing the input images. 'None' (the default)
        be used for similarity.

    model: string, optional
        Uses a pretrained model to bootstrap an image similarity model

           - "resnet-50" : Uses a pretrained resnet model.

        Models are downloaded from the internet if not available locally. Once
        downloaded, the models are cached for future use.

    verbose : bool, optional
        If True, print progress updates and model details.

    Returns
    -------
    out : ImageSimilarityModel
        A trained :class:`ImageSimilarityModel` model.

    See Also
    --------
    ImageSimilarityModel

    Examples
    --------
    .. sourcecode:: python

        # Train an image similarity model
        >>> model = turicreate.image_similarity.create(data)

        # Query the model for similar images
        >>> similar_images = model.query(data)
        +-------------+-----------------+-------------------+------+
        | query_label | reference_label |      distance     | rank |
        +-------------+-----------------+-------------------+------+
        |      0      |        0        |        0.0        |  1   |
        |      0      |       519       |   12.5319706301   |  2   |
        |      0      |       1619      |   12.5563764596   |  3   |
        |      0      |       186       |   12.6132604915   |  4   |
        |      0      |       1809      |   12.9180964745   |  5   |
        |      1      |        1        | 2.02304872852e-06 |  1   |
        |      1      |       1579      |   11.4288186151   |  2   |
        |      1      |       1237      |   12.3764325949   |  3   |
        |      1      |        80       |   12.7264363676   |  4   |
        |      1      |        58       |   12.7675058558   |  5   |
        +-------------+-----------------+-------------------+------+
        [500 rows x 4 columns]
    """
    start_time = _time.time()

    # Check parameters
    _tkutl._check_categorical_option_type('model', model,
                                          _pre_trained_models.MODELS.keys())
    if len(dataset) == 0:
        raise _ToolkitError('Unable to train on empty dataset')
    if (label is not None) and (label not in dataset.column_names()):
        raise _ToolkitError("Row label column '%s' does not exist" % label)
    if (feature is not None) and (feature not in dataset.column_names()):
        raise _ToolkitError("Image feature column '%s' does not exist" %
                            feature)

    # Set defaults
    if feature is None:
        feature = _tkutl._find_only_image_column(dataset)

    # Load pre-trained model & feature extractor
    ptModel = _pre_trained_models.MODELS[model]()
    feature_extractor = _image_feature_extractor.MXFeatureExtractor(ptModel)

    # Extract features
    extracted_features = _tc.SFrame({
        '__image_features__':
        feature_extractor.extract_features(dataset, feature, verbose=verbose),
    })

    # Train a similarity model using the extracted features
    if label is not None:
        extracted_features[label] = dataset[label]
    nn_model = _tc.nearest_neighbors.create(extracted_features,
                                            label=label,
                                            features=['__image_features__'],
                                            verbose=verbose)

    # Save the model
    state = {
        'similarity_model': nn_model,
        'model': model,
        'feature_extractor': feature_extractor,
        'input_image_shape': ptModel.input_image_shape,
        'label': label,
        'feature': feature,
        'num_features': 1,
        'num_examples': nn_model.num_examples,
        'training_time': _time.time() - start_time,
    }
    return ImageSimilarityModel(state)
Example #10
0
    def stylize(self, images, style=None, verbose=True, max_size=800, batch_size = 4):
        """
        Stylize an SFrame of Images given a style index or a list of
        styles.

        Parameters
        ----------
        images : SFrame | Image
            A dataset that has the same content image column that was used
            during training.

        style : int or list, optional
            The selected style or list of styles to use on the ``images``. If
            `None`, all styles will be applied to each image in ``images``.

        verbose : bool, optional
            If True, print progress updates.

        max_size : int or tuple
            Max input image size that will not get resized during stylization.

            Images with a side larger than this value, will be scaled down, due
            to time and memory constraints. If tuple, interpreted as (max
            width, max height). Without resizing, larger input images take more
            time to stylize.  Resizing can effect the quality of the final
            stylized image.

        batch_size : int, optional
            If you are getting memory errors, try decreasing this value. If you
            have a powerful computer, increasing this value may improve
            performance.

        Returns
        -------
        out : SFrame or SArray or turicreate.Image
            If ``style`` is a list, an SFrame is always returned. If ``style``
            is a single integer, the output type will match the input type
            (Image, SArray, or SFrame).

        See Also
        --------
        create

        Examples
        --------
        >>> image = tc.Image("/path/to/image.jpg")
        >>> stylized_images = model.stylize(image, style=[0, 1])
        Data:
        +--------+-------+------------------------+
        | row_id | style |     stylized_image     |
        +--------+-------+------------------------+
        |   0    |   0   | Height: 256 Width: 256 |
        |   0    |   1   | Height: 256 Width: 256 |
        +--------+-------+------------------------+
        [2 rows x 3 columns]

        >>> images = tc.image_analysis.load_images('/path/to/images')
        >>> stylized_images = model.stylize(images)
        Data:
        +--------+-------+------------------------+
        | row_id | style |     stylized_image     |
        +--------+-------+------------------------+
        |   0    |   0   | Height: 256 Width: 256 |
        |   0    |   1   | Height: 256 Width: 256 |
        |   0    |   2   | Height: 256 Width: 256 |
        |   0    |   3   | Height: 256 Width: 256 |
        |   1    |   0   | Height: 640 Width: 648 |
        |   1    |   1   | Height: 640 Width: 648 |
        |   1    |   2   | Height: 640 Width: 648 |
        |   1    |   3   | Height: 640 Width: 648 |
        +--------+-------+------------------------+
        [8 rows x 3 columns]
        """
        if(batch_size < 1):
            raise _ToolkitError("'batch_size' must be greater than or equal to 1")

        from ._sframe_loader import SFrameSTIter as _SFrameSTIter
        import mxnet as _mx
        from mxnet import gluon as _gluon
        set_of_all_idx = self._style_indices()
        style, single_style = self._style_input_check(style)

        if isinstance(max_size, _six.integer_types):
            input_shape = (max_size, max_size)
        else:
            # Outward-facing, we use (width, height), but internally we use
            # (height, width)
            input_shape = max_size[::-1]

        images, unpack = self._canonize_content_input(images, single_style=single_style)

        dataset_size = len(images)
        output_size = dataset_size * len(style)
        batch_size_each = min(batch_size, output_size)
        num_mxnet_gpus = _mxnet_utils.get_num_gpus_in_use(max_devices=batch_size_each)

        if num_mxnet_gpus == 0:
            # CPU processing prefers native size to prevent stylizing
            # unnecessary regions
            batch_size_each = 1
            loader_type = 'favor-native-size'
        else:
            # GPU processing prefers batches of same size, using padding
            # for smaller images
            loader_type = 'pad'

        self._model.batch_size = batch_size_each
        self._model.hybridize()

        ctx = _mxnet_utils.get_mxnet_context(max_devices=batch_size_each)
        batch_size = max(num_mxnet_gpus, 1) * batch_size_each
        last_time = 0
        if dataset_size == 0:
            raise _ToolkitError("SFrame cannot be empty")
        content_feature = _tkutl._find_only_image_column(images)
        _raise_error_if_not_training_sframe(images, content_feature)

        max_h = 0
        max_w = 0
        oversized_count = 0
        for img in images[content_feature]:
            if img.height > input_shape[0] or img.width > input_shape[1]:
                oversized_count += 1
            max_h = max(img.height, max_h)
            max_w = max(img.width, max_w)

        if input_shape[0] > max_h:
            input_shape = (max_h, input_shape[1])
        if input_shape[1] > max_w:
            input_shape = (input_shape[0], max_w)

        # If we find large images, let's switch to sequential iterator
        # pre-processing, to prevent memory issues.
        sequential = max(max_h, max_w) > 2000

        if verbose and output_size != 1:
            print('Stylizing {} image(s) using {} style(s)'.format(dataset_size, len(style)))
            if oversized_count > 0:
                print('Scaling down {} image(s) exceeding {}x{}'.format(oversized_count, input_shape[1], input_shape[0]))

        content_images_loader = _SFrameSTIter(images, batch_size,
                                              shuffle=False,
                                              feature_column=content_feature,
                                              input_shape=input_shape,
                                              num_epochs=1,
                                              loader_type=loader_type,
                                              repeat_each_image=len(style),
                                              sequential=sequential)

        sb = _tc.SFrameBuilder([int, int, _tc.Image],
                               column_names=['row_id', 'style', 'stylized_{}'.format(self.content_feature)])

        count = 0
        for i, batch in enumerate(content_images_loader):
            if loader_type == 'favor-native-size':
                c_data = [batch.data[0][0].expand_dims(0)]
            else:
                c_data = _gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0)
            indices_data = _gluon.utils.split_and_load(_mx.nd.array(batch.repeat_indices, dtype=_np.int64),
                                                       ctx_list=ctx, batch_axis=0)
            outputs = []
            for b_img, b_indices in zip(c_data, indices_data):
                mx_style = _mx.nd.array(style, dtype=_np.int64, ctx=b_indices.context)
                b_batch_styles = mx_style[b_indices]
                output = self._model(b_img, b_batch_styles)
                outputs.append(output)

            image_data = _np.concatenate([
                (output.asnumpy().transpose(0, 2, 3, 1) * 255).astype(_np.uint8)
                for output in outputs], axis=0)

            batch_styles = [style[idx] for idx in batch.repeat_indices]

            for b in range(batch_size - (batch.pad or 0)):
                image = image_data[b]
                # Crop to remove added padding
                crop = batch.crop[b]
                cropped_image = image[crop[0]:crop[1], crop[2]:crop[3]]
                tc_img = _tc.Image(_image_data=cropped_image.tobytes(),
                                   _width=cropped_image.shape[1],
                                   _height=cropped_image.shape[0],
                                   _channels=cropped_image.shape[2],
                                   _format_enum=2,
                                   _image_data_size=cropped_image.size)
                sb.append([batch.indices[b], batch_styles[b], tc_img])
                count += 1

            cur_time = _time.time()
            if verbose and output_size != 1 and (cur_time > last_time + 10 or count == output_size):
                print('Stylizing {curr_image:{width}d}/{max_n:{width}d}'.
                      format(curr_image=count, max_n=output_size, width=len(str(output_size))))
                last_time = cur_time

        return unpack(sb.close())
Example #11
0
def create(style_dataset, content_dataset, style_feature=None,
        content_feature=None, max_iterations=None, model='resnet-16',
        verbose=True, batch_size = 6, **kwargs):
    """
    Create a :class:`StyleTransfer` model.

    Parameters
    ----------
    style_dataset: SFrame
        Input style images. The columns named by the ``style_feature`` parameters will
        be extracted for training the model.

    content_dataset : SFrame
        Input content images. The columns named by the ``content_feature`` parameters will
        be extracted for training the model.

    style_feature: string
        Name of the column containing the input images in style SFrame.
        'None' (the default) indicates the only image column in the style SFrame
        should be used as the feature.

    content_feature: string
        Name of the column containing the input images in content SFrame.
        'None' (the default) indicates the only image column in the content
        SFrame should be used as the feature.

    max_iterations : int
        The number of training iterations. If 'None' (the default), then it will
        be automatically determined based on the amount of data you provide.

    model : string optional
        Style transfer model to use:

            - "resnet-16" : Fast and small-sized residual network that uses
                            VGG-16 as reference network during training.

    batch_size : int, optional
        If you are getting memory errors, try decreasing this value. If you
        have a powerful computer, increasing this value may improve training
        throughput.

    verbose : bool, optional
        If True, print progress updates and model details.


    Returns
    -------
    out : StyleTransfer
        A trained :class:`StyleTransfer` model.

    See Also
    --------
    StyleTransfer

    Examples
    --------
    .. sourcecode:: python

        # Create datasets
        >>> content_dataset = turicreate.image_analysis.load_images('content_images/')
        >>> style_dataset = turicreate.image_analysis.load_images('style_images/')

        # Train a style transfer model
        >>> model = turicreate.style_transfer.create(content_dataset, style_dataset)

        # Stylize an image on all styles
        >>> stylized_images = model.stylize(data)

        # Visualize the stylized images
        >>> stylized_images.explore()

    """
    if len(style_dataset) == 0:
        raise _ToolkitError("style_dataset SFrame cannot be empty")
    if len(content_dataset) == 0:
        raise _ToolkitError("content_dataset SFrame cannot be empty")
    if(batch_size < 1):
        raise _ToolkitError("'batch_size' must be greater than or equal to 1")

    from ._sframe_loader import SFrameSTIter as _SFrameSTIter
    import mxnet as _mx

    if style_feature is None:
        style_feature = _tkutl._find_only_image_column(style_dataset)
    if content_feature is None:
        content_feature = _tkutl._find_only_image_column(content_dataset)
    if verbose:
        print("Using '{}' in style_dataset as feature column and using "
              "'{}' in content_dataset as feature column".format(style_feature, content_feature))

    _raise_error_if_not_training_sframe(style_dataset, style_feature)
    _raise_error_if_not_training_sframe(content_dataset, content_feature)

    params = {
        'batch_size': batch_size,
        'vgg16_content_loss_layer': 2,  # conv3_3 layer
        'lr': 0.001,
        'content_loss_mult': 1.0,
        'style_loss_mult': [1e-4, 1e-4, 1e-4, 1e-4],  # conv 1-4 layers
        'finetune_all_params': False,
        'print_loss_breakdown': False,
        'input_shape': (256, 256),
        'training_content_loader_type': 'stretch',
        'use_augmentation': False,
        'sequential_image_processing': False,
        # Only used if use_augmentaion is True
        'aug_resize': 0,
        'aug_min_object_covered': 0,
        'aug_rand_crop': 0.9,
        'aug_rand_pad': 0.9,
        'aug_rand_gray': 0.0,
        'aug_aspect_ratio': 1.25,
        'aug_hue': 0.05,
        'aug_brightness': 0.05,
        'aug_saturation': 0.05,
        'aug_contrast': 0.05,
        'aug_horizontal_flip': True,
        'aug_area_range': (.05, 1.5),
        'aug_pca_noise': 0.0,
        'aug_max_attempts': 20,
        'aug_inter_method': 2,
    }

    if '_advanced_parameters' in kwargs:
        # Make sure no additional parameters are provided
        new_keys = set(kwargs['_advanced_parameters'].keys())
        set_keys = set(params.keys())
        unsupported = new_keys - set_keys
        if unsupported:
            raise _ToolkitError('Unknown advanced parameters: {}'.format(unsupported))

        params.update(kwargs['_advanced_parameters'])

    _content_loss_mult = params['content_loss_mult']
    _style_loss_mult = params['style_loss_mult']

    num_gpus = _mxnet_utils.get_num_gpus_in_use(max_devices=params['batch_size'])
    batch_size_each = params['batch_size'] // max(num_gpus, 1)
    batch_size = max(num_gpus, 1) * batch_size_each
    input_shape = params['input_shape']

    iterations = 0
    if max_iterations is None:
        max_iterations = len(style_dataset) * 500 + 2000
        if verbose:
            print('Setting max_iterations to be {}'.format(max_iterations))

    # data loader
    if params['use_augmentation']:
        content_loader_type = '%s-with-augmentation' % params['training_content_loader_type']
    else:
        content_loader_type = params['training_content_loader_type']

    content_images_loader = _SFrameSTIter(content_dataset, batch_size, shuffle=True,
                                  feature_column=content_feature, input_shape=input_shape,
                                  loader_type=content_loader_type, aug_params=params,
                                  sequential=params['sequential_image_processing'])
    ctx = _mxnet_utils.get_mxnet_context(max_devices=params['batch_size'])

    num_styles = len(style_dataset)

    # TRANSFORMER MODEL
    from ._model import Transformer as _Transformer
    transformer_model_path = _pre_trained_models.STYLE_TRANSFER_BASE_MODELS[model]().get_model_path()
    transformer = _Transformer(num_styles, batch_size_each)
    transformer.collect_params().initialize(ctx=ctx)
    transformer.load_params(transformer_model_path, ctx, allow_missing=True)
    # For some reason, the transformer fails to hybridize for training, so we
    # avoid this until resolved
    # transformer.hybridize()

    # VGG MODEL
    from ._model import Vgg16 as _Vgg16
    vgg_model_path = _pre_trained_models.STYLE_TRANSFER_BASE_MODELS['Vgg16']().get_model_path()
    vgg_model = _Vgg16()
    vgg_model.collect_params().initialize(ctx=ctx)
    vgg_model.load_params(vgg_model_path, ctx=ctx, ignore_extra=True)
    vgg_model.hybridize()

    # TRAINER
    from mxnet import gluon as _gluon
    from ._model import gram_matrix as _gram_matrix

    if params['finetune_all_params']:
        trainable_params = transformer.collect_params()
    else:
        trainable_params = transformer.collect_params('.*gamma|.*beta')

    trainer = _gluon.Trainer(trainable_params, 'adam', {'learning_rate': params['lr']})
    mse_loss = _gluon.loss.L2Loss()
    start_time = _time.time()
    smoothed_loss = None
    last_time = 0

    cuda_gpus = _mxnet_utils.get_gpus_in_use(max_devices=params['batch_size'])
    num_mxnet_gpus = len(cuda_gpus)

    if verbose:
        # Estimate memory usage (based on experiments)
        cuda_mem_req = 260 + batch_size_each * 880 + num_styles * 1.4

        _tkutl._print_neural_compute_device(cuda_gpus=cuda_gpus, use_mps=False,
                                            cuda_mem_req=cuda_mem_req, has_mps_impl=False)
    #
    # Pre-compute gram matrices for style images
    #
    if verbose:
        print('Analyzing visual features of the style images')

    style_images_loader = _SFrameSTIter(style_dataset, batch_size, shuffle=False, num_epochs=1,
                                        feature_column=style_feature, input_shape=input_shape,
                                        loader_type='stretch',
                                        sequential=params['sequential_image_processing'])
    num_layers = len(params['style_loss_mult'])
    gram_chunks = [[] for _ in range(num_layers)]
    for s_batch in style_images_loader:
        s_data = _gluon.utils.split_and_load(s_batch.data[0], ctx_list=ctx, batch_axis=0)
        for s in s_data:
            vgg16_s = _vgg16_data_prep(s)
            ret = vgg_model(vgg16_s)
            grams = [_gram_matrix(x) for x in ret]
            for i, gram in enumerate(grams):
                if gram.context != _mx.cpu(0):
                    gram = gram.as_in_context(_mx.cpu(0))
                gram_chunks[i].append(gram)
    del style_images_loader

    grams = [
        # The concatenated styles may be padded, so we slice overflow
        _mx.nd.concat(*chunks, dim=0)[:num_styles]
        for chunks in gram_chunks
    ]

    # A context->grams look-up table, where all the gram matrices have been
    # distributed
    ctx_grams = {}
    if ctx[0] == _mx.cpu(0):
        ctx_grams[_mx.cpu(0)] = grams
    else:
        for ctx0 in ctx:
            ctx_grams[ctx0] = [gram.as_in_context(ctx0) for gram in grams]

    #
    # Training loop
    #

    vgg_content_loss_layer = params['vgg16_content_loss_layer']
    rs = _np.random.RandomState(1234)
    while iterations < max_iterations:
        content_images_loader.reset()
        for c_batch in content_images_loader:
            c_data = _gluon.utils.split_and_load(c_batch.data[0], ctx_list=ctx, batch_axis=0)

            Ls = []
            curr_content_loss = []
            curr_style_loss = []
            with _mx.autograd.record():
                for c in c_data:
                    # Randomize styles to train
                    indices = _mx.nd.array(rs.randint(num_styles, size=batch_size_each),
                                           dtype=_np.int64, ctx=c.context)

                    # Generate pastiche
                    p = transformer(c, indices)

                    # mean subtraction
                    vgg16_p = _vgg16_data_prep(p)
                    vgg16_c = _vgg16_data_prep(c)

                    # vgg forward
                    p_vgg_outputs = vgg_model(vgg16_p)

                    c_vgg_outputs = vgg_model(vgg16_c)
                    c_content_layer = c_vgg_outputs[vgg_content_loss_layer]
                    p_content_layer = p_vgg_outputs[vgg_content_loss_layer]

                    # Calculate Loss
                    # Style Loss between style image and stylized image
                    # Ls = sum of L2 norm of gram matrix of vgg16's conv layers
                    style_losses = []
                    for gram, p_vgg_output, style_loss_mult in zip(ctx_grams[c.context], p_vgg_outputs, _style_loss_mult):
                        gram_s_vgg = gram[indices]
                        gram_p_vgg = _gram_matrix(p_vgg_output)

                        style_losses.append(style_loss_mult * mse_loss(gram_s_vgg, gram_p_vgg))

                    style_loss = _mx.nd.add_n(*style_losses)

                    # Content Loss between content image and stylized image
                    # Lc = L2 norm at a single layer in vgg16
                    content_loss = _content_loss_mult * mse_loss(c_content_layer,
                                                                 p_content_layer)

                    curr_content_loss.append(content_loss)
                    curr_style_loss.append(style_loss)
                    # Divide loss by large number to get into a more legible
                    # range
                    total_loss = (content_loss + style_loss) / 10000.0
                    Ls.append(total_loss)
                for L in Ls:
                    L.backward()

            cur_loss = _np.mean([L.asnumpy()[0] for L in Ls])

            if smoothed_loss is None:
                smoothed_loss = cur_loss
            else:
                smoothed_loss = 0.9 * smoothed_loss + 0.1 * cur_loss
            iterations += 1
            trainer.step(batch_size)

            if verbose and iterations == 1:
                # Print progress table header
                column_names = ['Iteration', 'Loss', 'Elapsed Time']
                num_columns = len(column_names)
                column_width = max(map(lambda x: len(x), column_names)) + 2
                hr = '+' + '+'.join(['-' * column_width] * num_columns) + '+'
                print(hr)
                print(('| {:<{width}}' * num_columns + '|').format(*column_names, width=column_width-1))
                print(hr)

            cur_time = _time.time()
            if verbose and (cur_time > last_time + 10 or iterations == max_iterations):
                # Print progress table row
                elapsed_time = cur_time - start_time
                print("| {cur_iter:<{width}}| {loss:<{width}.3f}| {time:<{width}.1f}|".format(
                    cur_iter = iterations, loss = smoothed_loss,
                    time = elapsed_time , width = column_width-1))
                if params['print_loss_breakdown']:
                    print_content_loss = _np.mean([L.asnumpy()[0] for L in curr_content_loss])
                    print_style_loss = _np.mean([L.asnumpy()[0] for L in curr_style_loss])
                    print('Total Loss: {:6.3f} | Content Loss: {:6.3f} | Style Loss: {:6.3f}'.format(cur_loss, print_content_loss, print_style_loss))
                last_time = cur_time
            if iterations == max_iterations:
                print(hr)
                break

    training_time = _time.time() - start_time
    style_sa = style_dataset[style_feature]
    idx_column = _tc.SArray(range(0, style_sa.shape[0]))
    style_sframe = _tc.SFrame({"style": idx_column, style_feature: style_sa})

    # Save the model state
    state = {
        '_model': transformer,
        '_training_time_as_string': _seconds_as_string(training_time),
        'batch_size': batch_size,
        'num_styles': num_styles,
        'model': model,
        'input_image_shape': input_shape,
        'styles': style_sframe,
        'num_content_images': len(content_dataset),
        'training_time': training_time,
        'max_iterations': max_iterations,
        'training_iterations': iterations,
        'training_epochs': content_images_loader.cur_epoch,
        'style_feature': style_feature,
        'content_feature': content_feature,
        "_index_column": "style",
        'training_loss': smoothed_loss,
    }

    return StyleTransfer(state)
Example #12
0
def create(dataset,
           annotations=None,
           feature=None,
           model="darknet-yolo",
           classes=None,
           batch_size=0,
           max_iterations=0,
           verbose=True,
           grid_shape=[13, 13],
           **kwargs):
    """
    Create a :class:`ObjectDetector` model.

    Parameters
    ----------
    dataset : SFrame
        Input data. The columns named by the ``feature`` and ``annotations``
        parameters will be extracted for training the detector.

    annotations : string
        Name of the column containing the object detection annotations.  This
        column should be a list of dictionaries (or a single dictionary), with
        each dictionary representing a bounding box of an object instance. Here
        is an example of the annotations for a single image with two object
        instances::

            [{'label': 'dog',
              'type': 'rectangle',
              'coordinates': {'x': 223, 'y': 198,
                              'width': 130, 'height': 230}},
             {'label': 'cat',
              'type': 'rectangle',
              'coordinates': {'x': 40, 'y': 73,
                              'width': 80, 'height': 123}}]

        The value for `x` is the horizontal center of the box paired with
        `width` and `y` is the vertical center of the box paired with `height`.
        'None' (the default) indicates the only list column in `dataset` should
        be used for the annotations.

    feature : string
        Name of the column containing the input images. 'None' (the default)
        indicates the only image column in `dataset` should be used as the
        feature.

    model : string optional
        Object detection model to use:

           - "darknet-yolo" : Fast and medium-sized model

    grid_shape : array optional
        Shape of the grid used for object detection. Higher values increase precision for small objects, but at a higher computational cost

           - [13, 13] : Default grid value for a Fast and medium-sized model

    classes : list optional
        List of strings containing the names of the classes of objects.
        Inferred from the data if not provided.

    batch_size: int
        The number of images per training iteration. If 0, then it will be
        automatically determined based on resource availability.

    max_iterations : int
        The number of training iterations. If 0, then it will be automatically
        be determined based on the amount of data you provide.

    verbose : bool, optional
        If True, print progress updates and model details.

    Returns
    -------
    out : ObjectDetector
        A trained :class:`ObjectDetector` model.

    See Also
    --------
    ObjectDetector

    Examples
    --------
    .. sourcecode:: python

        # Train an object detector model
        >>> model = turicreate.object_detector.create(data)

        # Make predictions on the training set and as column to the SFrame
        >>> data['predictions'] = model.predict(data)

        # Visualize predictions by generating a new column of marked up images
        >>> data['image_pred'] = turicreate.object_detector.util.draw_bounding_boxes(data['image'], data['predictions'])
    """
    _raise_error_if_not_sframe(dataset, "dataset")

    if len(dataset) == 0:
        raise _ToolkitError("Unable to train on empty dataset")

    _numeric_param_check_range("max_iterations", max_iterations, 0,
                               _six.MAXSIZE)
    start_time = _time.time()

    supported_detectors = ["darknet-yolo"]

    if feature is None:
        feature = _tkutl._find_only_image_column(dataset)
        if verbose:
            print("Using '%s' as feature column" % feature)
    if annotations is None:
        annotations = _tkutl._find_only_column_of_type(
            dataset,
            target_type=[list, dict],
            type_name="list",
            col_name="annotations")
        if verbose:
            print("Using '%s' as annotations column" % annotations)

    _raise_error_if_not_detection_sframe(dataset,
                                         feature,
                                         annotations,
                                         require_annotations=True)
    _tkutl._handle_missing_values(dataset, feature, "dataset")
    _tkutl._check_categorical_option_type("model", model, supported_detectors)

    base_model = model.split("-", 1)[0]
    ref_model = _pre_trained_models.OBJECT_DETECTION_BASE_MODELS[base_model]()

    pretrained_model = _pre_trained_models.OBJECT_DETECTION_BASE_MODELS[
        "darknet_mlmodel"]()
    pretrained_model_path = pretrained_model.get_model_path()

    params = {
        "anchors": [
            (1.0, 2.0),
            (1.0, 1.0),
            (2.0, 1.0),
            (2.0, 4.0),
            (2.0, 2.0),
            (4.0, 2.0),
            (4.0, 8.0),
            (4.0, 4.0),
            (8.0, 4.0),
            (8.0, 16.0),
            (8.0, 8.0),
            (16.0, 8.0),
            (16.0, 32.0),
            (16.0, 16.0),
            (32.0, 16.0),
        ],
        "grid_shape":
        grid_shape,
        "aug_resize":
        0,
        "aug_rand_crop":
        0.9,
        "aug_rand_pad":
        0.9,
        "aug_rand_gray":
        0.0,
        "aug_aspect_ratio":
        1.25,
        "aug_hue":
        0.05,
        "aug_brightness":
        0.05,
        "aug_saturation":
        0.05,
        "aug_contrast":
        0.05,
        "aug_horizontal_flip":
        True,
        "aug_min_object_covered":
        0,
        "aug_min_eject_coverage":
        0.5,
        "aug_area_range": (0.15, 2),
        "aug_pca_noise":
        0.0,
        "aug_max_attempts":
        20,
        "aug_inter_method":
        2,
        "lmb_coord_xy":
        10.0,
        "lmb_coord_wh":
        10.0,
        "lmb_obj":
        100.0,
        "lmb_noobj":
        5.0,
        "lmb_class":
        2.0,
        "non_maximum_suppression_threshold":
        0.45,
        "rescore":
        True,
        "clip_gradients":
        0.025,
        "weight_decay":
        0.0005,
        "sgd_momentum":
        0.9,
        "learning_rate":
        1.0e-3,
        "shuffle":
        True,
        "mps_loss_mult":
        8,
        # This large buffer size (8 batches) is an attempt to mitigate against
        # the SFrame shuffle operation that can occur after each epoch.
        "io_thread_buffer_size":
        8,
        "mlmodel_path":
        pretrained_model_path,
    }

    # create tensorflow model here
    import turicreate.toolkits.libtctensorflow

    if classes == None:
        classes = []

    _raise_error_if_not_iterable(classes)
    _raise_error_if_not_iterable(grid_shape)

    grid_shape = [int(x) for x in grid_shape]
    assert len(grid_shape) == 2

    tf_config = {
        "grid_height": params["grid_shape"][0],
        "grid_width": params["grid_shape"][1],
        "mlmodel_path": params["mlmodel_path"],
        "classes": classes,
        "compute_final_metrics": False,
        "verbose": verbose,
        "model": "darknet-yolo",
    }

    # If batch_size or max_iterations = 0, they will be automatically
    # generated in C++.
    if batch_size > 0:
        tf_config["batch_size"] = batch_size

    if max_iterations > 0:
        tf_config["max_iterations"] = max_iterations

    model = _tc.extensions.object_detector()
    model.train(
        data=dataset,
        annotations_column_name=annotations,
        image_column_name=feature,
        options=tf_config,
    )
    return ObjectDetector(model_proxy=model, name="object_detector")
Example #13
0
def create(style_dataset,
           content_dataset,
           style_feature=None,
           content_feature=None,
           max_iterations=None,
           model='resnet-16',
           verbose=True,
           batch_size=1,
           **kwargs):
    """
    Create a :class:`StyleTransfer` model.

    Parameters
    ----------
    style_dataset: SFrame
        Input style images. The columns named by the ``style_feature`` parameters will
        be extracted for training the model.

    content_dataset : SFrame
        Input content images. The columns named by the ``content_feature`` parameters will
        be extracted for training the model.

    style_feature: string
        Name of the column containing the input images in style SFrame.
        'None' (the default) indicates the only image column in the style SFrame
        should be used as the feature.

    content_feature: string
        Name of the column containing the input images in content SFrame.
        'None' (the default) indicates the only image column in the content
        SFrame should be used as the feature.

    max_iterations : int
        The number of training iterations. If 'None' (the default), then it will
        be automatically determined based on the amount of data you provide.

    model : string optional
        Style transfer model to use:

            - "resnet-16" : Fast and small-sized residual network that uses
                            VGG-16 as reference network during training.

    batch_size : int, optional
        If you are getting memory errors, try decreasing this value. If you
        have a powerful computer, increasing this value may improve training
        throughput.

    verbose : bool, optional
        If True, print progress updates and model details.


    Returns
    -------
    out : StyleTransfer
        A trained :class:`StyleTransfer` model.

    See Also
    --------
    StyleTransfer

    Examples
    --------
    .. sourcecode:: python

        # Create datasets
        >>> content_dataset = turicreate.image_analysis.load_images('content_images/')
        >>> style_dataset = turicreate.image_analysis.load_images('style_images/')

        # Train a style transfer model
        >>> model = turicreate.style_transfer.create(content_dataset, style_dataset)

        # Stylize an image on all styles
        >>> stylized_images = model.stylize(data)

        # Visualize the stylized images
        >>> stylized_images.explore()

    """
    if not isinstance(style_dataset, _tc.SFrame):
        raise TypeError('"style_dataset" must be of type SFrame.')
    if not isinstance(content_dataset, _tc.SFrame):
        raise TypeError('"content_dataset" must be of type SFrame.')
    if len(style_dataset) == 0:
        raise _ToolkitError("style_dataset SFrame cannot be empty")
    if len(content_dataset) == 0:
        raise _ToolkitError("content_dataset SFrame cannot be empty")
    if (batch_size < 1):
        raise _ToolkitError("'batch_size' must be greater than or equal to 1")
    if max_iterations is not None and (not isinstance(max_iterations, int)
                                       or max_iterations < 0):
        raise _ToolkitError(
            "'max_iterations' must be an integer greater than or equal to 0")

    if style_feature is None:
        style_feature = _tkutl._find_only_image_column(style_dataset)

    if content_feature is None:
        content_feature = _tkutl._find_only_image_column(content_dataset)
    if verbose:
        print("Using '{}' in style_dataset as feature column and using "
              "'{}' in content_dataset as feature column".format(
                  style_feature, content_feature))

    _raise_error_if_not_training_sframe(style_dataset, style_feature)
    _raise_error_if_not_training_sframe(content_dataset, content_feature)
    _tkutl._handle_missing_values(style_dataset, style_feature,
                                  'style_dataset')
    _tkutl._handle_missing_values(content_dataset, content_feature,
                                  'content_dataset')

    params = {
        'batch_size': batch_size,
        'vgg16_content_loss_layer': 2,  # conv3_3 layer
        'lr': 0.001,
        'content_loss_mult': 1.0,
        'style_loss_mult': [1e-4, 1e-4, 1e-4, 1e-4],  # conv 1-4 layers
        'finetune_all_params': True,
        'pretrained_weights': False,
        'print_loss_breakdown': False,
        'input_shape': (256, 256),
        'training_content_loader_type': 'stretch',
        'use_augmentation': False,
        'sequential_image_processing': False,
        # Only used if use_augmentaion is True
        'aug_resize': 0,
        'aug_min_object_covered': 0,
        'aug_rand_crop': 0.9,
        'aug_rand_pad': 0.9,
        'aug_rand_gray': 0.0,
        'aug_aspect_ratio': 1.25,
        'aug_hue': 0.05,
        'aug_brightness': 0.05,
        'aug_saturation': 0.05,
        'aug_contrast': 0.05,
        'aug_horizontal_flip': True,
        'aug_area_range': (.05, 1.5),
        'aug_pca_noise': 0.0,
        'aug_max_attempts': 20,
        'aug_inter_method': 2,
        'checkpoint': False,
        'checkpoint_prefix': 'style_transfer',
        'checkpoint_increment': 1000
    }

    if '_advanced_parameters' in kwargs:
        # Make sure no additional parameters are provided
        new_keys = set(kwargs['_advanced_parameters'].keys())
        set_keys = set(params.keys())
        unsupported = new_keys - set_keys
        if unsupported:
            raise _ToolkitError(
                'Unknown advanced parameters: {}'.format(unsupported))

        params.update(kwargs['_advanced_parameters'])

    name = 'style_transfer'

    import turicreate as _turicreate

    # Imports tensorflow
    import turicreate.toolkits.libtctensorflow

    model = _turicreate.extensions.style_transfer()
    pretrained_resnet_model = _pre_trained_models.STYLE_TRANSFER_BASE_MODELS[
        'resnet-16']()
    pretrained_vgg16_model = _pre_trained_models.STYLE_TRANSFER_BASE_MODELS[
        'Vgg16']()
    options = {}
    options['image_height'] = params['input_shape'][0]
    options['image_width'] = params['input_shape'][1]
    options['content_feature'] = content_feature
    options['style_feature'] = style_feature
    if verbose is not None:
        options['verbose'] = verbose
    else:
        options['verbose'] = False
    if batch_size is not None:
        options['batch_size'] = batch_size
    if max_iterations is not None:
        options['max_iterations'] = max_iterations
    options['num_styles'] = len(style_dataset)
    options['resnet_mlmodel_path'] = pretrained_resnet_model.get_model_path(
        'coreml')
    options['vgg_mlmodel_path'] = pretrained_vgg16_model.get_model_path(
        'coreml')

    model.train(style_dataset[style_feature], content_dataset[content_feature],
                options)
    return StyleTransfer(model_proxy=model, name=name)
Example #14
0
    def stylize(self,
                images,
                style=None,
                verbose=True,
                max_size=800,
                batch_size=4):
        """
        Stylize an SFrame of Images given a style index or a list of
        styles.

        Parameters
        ----------
        images : SFrame | SArray | turicreate.Image
            A dataset that has the same content image column that was used
            during training.

        style : None | int | list
            The selected style or list of styles to use on the ``images``. If
            `None`, all styles will be applied to each image in ``images``.

        verbose : bool, optional
            If True, print progress updates.

        max_size : int or tuple
            Max input image size that will not get resized during stylization.

            Images with a side larger than this value, will be scaled down, due
            to time and memory constraints. If tuple, interpreted as (max
            width, max height). Without resizing, larger input images take more
            time to stylize.  Resizing can effect the quality of the final
            stylized image.

        batch_size : int, optional
            If you are getting memory errors, try decreasing this value. If you
            have a powerful computer, increasing this value may improve
            performance.

        Returns
        -------
        out : SFrame or SArray or turicreate.Image
            If ``style`` is a list, an SFrame is always returned. If ``style``
            is a single integer, the output type will match the input type
            (Image, SArray, or SFrame).

        See Also
        --------
        create

        Examples
        --------
        >>> image = tc.Image("/path/to/image.jpg")
        >>> stylized_images = model.stylize(image, style=[0, 1])
        Data:
        +--------+-------+------------------------+
        | row_id | style |     stylized_image     |
        +--------+-------+------------------------+
        |   0    |   0   | Height: 256 Width: 256 |
        |   0    |   1   | Height: 256 Width: 256 |
        +--------+-------+------------------------+
        [2 rows x 3 columns]

        >>> images = tc.image_analysis.load_images('/path/to/images')
        >>> stylized_images = model.stylize(images)
        Data:
        +--------+-------+------------------------+
        | row_id | style |     stylized_image     |
        +--------+-------+------------------------+
        |   0    |   0   | Height: 256 Width: 256 |
        |   0    |   1   | Height: 256 Width: 256 |
        |   0    |   2   | Height: 256 Width: 256 |
        |   0    |   3   | Height: 256 Width: 256 |
        |   1    |   0   | Height: 640 Width: 648 |
        |   1    |   1   | Height: 640 Width: 648 |
        |   1    |   2   | Height: 640 Width: 648 |
        |   1    |   3   | Height: 640 Width: 648 |
        +--------+-------+------------------------+
        [8 rows x 3 columns]
        """
        if not isinstance(images, (_tc.SFrame, _tc.SArray, _tc.Image)):
            raise TypeError(
                '"image" parameter must be of type SFrame, SArray or turicreate.Image.'
            )
        if isinstance(images, (_tc.SFrame, _tc.SArray)) and len(images) == 0:
            raise _ToolkitError('"image" parameter cannot be empty')
        if style is not None and not isinstance(style, (int, list)):
            raise TypeError(
                '"style" must parameter must be a None, int or a list')
        if not isinstance(max_size, int):
            raise TypeError('"max_size" must parameter must be an int')
        if (max_size < 1):
            raise _ToolkitError(
                "'max_size' must be greater than or equal to 1")
        if not isinstance(batch_size, int):
            raise TypeError('"batch_size" must parameter must be an int')
        if (batch_size < 1):
            raise _ToolkitError(
                "'batch_size' must be greater than or equal to 1")

        options = {}
        options['style_idx'] = style
        options['verbose'] = verbose
        options['max_size'] = max_size
        options['batch_size'] = batch_size

        if isinstance(style, list) or style is None:
            if isinstance(images, _tc.SFrame):
                image_feature = _tkutl._find_only_image_column(images)
                stylized_images = self.__proxy__.predict(
                    images[image_feature], options)
                stylized_images = stylized_images.rename(
                    {'stylized_image': 'stylized_' + str(image_feature)})
                return stylized_images
            return self.__proxy__.predict(images, options)
        else:
            if isinstance(images, _tc.SFrame):
                if len(images) == 0:
                    raise _ToolkitError("SFrame cannot be empty")
                image_feature = _tkutl._find_only_image_column(images)
                stylized_images = self.__proxy__.predict(
                    images[image_feature], options)
                stylized_images = stylized_images.rename(
                    {'stylized_image': 'stylized_' + str(image_feature)})
                return stylized_images
            elif isinstance(images, (_tc.Image)):
                stylized_images = self.__proxy__.predict(images, options)
                return stylized_images["stylized_image"][0]
            elif isinstance(images, (_tc.SArray)):
                stylized_images = self.__proxy__.predict(images, options)
                return stylized_images["stylized_image"]
Example #15
0
def annotate(data, image_column=None, annotation_column='annotations'):
    """
        Annotate your images loaded in either an SFrame or SArray Format

        The annotate util is a GUI assisted application used to create labels in
        SArray Image data. Specifying a column, with dtype Image, in an SFrame
        works as well since SFrames are composed of multiple SArrays.

        When the GUI is terminated an SFrame is returned with the representative,
        images and annotations.

        The returned SFrame includes the newly created annotations.

        Parameters
        --------------
        data : SArray | SFrame
            The data containing the images. If the data type is 'SArray'
            the 'image_column', and 'annotation_column' variables are used to construct
            a new 'SFrame' containing the 'SArray' data for annotation.
            If the data type is 'SFrame' the 'image_column', and 'annotation_column'
            variables are used to annotate the images.

        image_column: string, optional
            If the data type is SFrame and the 'image_column' parameter is specified
            then the column name is used as the image column used in the annotation. If
            the data type is 'SFrame' and the 'image_column' variable is left empty. A
            default column value of 'image' is used in the annotation. If the data type is
            'SArray', the 'image_column' is used to construct the 'SFrame' data for
            the annotation

        annotation_column : string, optional
            If the data type is SFrame and the 'annotation_column' parameter is specified
            then the column name is used as the annotation column used in the annotation. If
            the data type is 'SFrame' and the 'annotation_column' variable is left empty. A
            default column value of 'annotation' is used in the annotation. If the data type is
            'SArray', the 'annotation_column' is used to construct the 'SFrame' data for
            the annotation


        Returns
        -------

        out : SFrame
            A new SFrame that contains the newly annotated data.

        Examples
        --------

        >> import turicreate as tc
        >> images = tc.image_analysis.load_images("path/to/images")
        >> print(images)

            Columns:

                path    str
                image   Image

            Rows: 4

            Data:
            +------------------------+--------------------------+
            |          path          |          image           |
            +------------------------+--------------------------+
            | /Users/username/Doc... | Height: 1712 Width: 1952 |
            | /Users/username/Doc... | Height: 1386 Width: 1000 |
            | /Users/username/Doc... |  Height: 536 Width: 858  |
            | /Users/username/Doc... | Height: 1512 Width: 2680 |
            +------------------------+--------------------------+
            [4 rows x 2 columns]

        >> images = tc.image_classifier.annotate(images)
        >> print(images)

            Columns:
                path    str
                image   Image
                annotation  str

            Rows: 4

            Data:
            +------------------------+--------------------------+-------------------+
            |          path          |          image           |    annotation     |
            +------------------------+--------------------------+-------------------+
            | /Users/username/Doc... | Height: 1712 Width: 1952 |        dog        |
            | /Users/username/Doc... | Height: 1386 Width: 1000 |        dog        |
            | /Users/username/Doc... |  Height: 536 Width: 858  |        cat        |
            | /Users/username/Doc... | Height: 1512 Width: 2680 |       mouse       |
            +------------------------+--------------------------+-------------------+
            [4 rows x 3 columns]


    """

    # Check Value of Column Variables
    if image_column == None:
        image_column = _tkutl._find_only_image_column(data)

    if image_column == None:
        raise ValueError("'image_column' cannot be 'None'")

    if type(image_column) != str:
        raise TypeError("'image_column' has to be of type 'str'")

    if annotation_column == None:
        annotation_column = ""

    if type(annotation_column) != str:
        raise TypeError("'annotation_column' has to be of type 'str'")

    # Check Data Structure
    if type(data) == __tc.data_structures.image.Image:
        data = __tc.SFrame({image_column: __tc.SArray([data])})

    elif type(data) == __tc.data_structures.sframe.SFrame:
        if (data.shape[0] == 0):
            return data

        if not (data[image_column].dtype == __tc.data_structures.image.Image):
            raise TypeError("'data[image_column]' must be an SFrame or SArray")

    elif type(data) == __tc.data_structures.sarray.SArray:
        if (data.shape[0] == 0):
            return data

        data = __tc.SFrame({image_column: data})
    else:
        raise TypeError("'data' must be an SFrame or SArray")

    _warning_annotations()

    annotation_window = __tc.extensions.create_image_classification_annotation(
        data, [image_column], annotation_column)
    annotation_window.annotate(_get_client_app_path())

    return annotation_window.returnAnnotations()