コード例 #1
0
ファイル: image_analysis.py プロジェクト: chrinide/turicreate
def _find_only_image_extracted_features_column(sframe, model_name):
    """
    Finds the only column in `sframe` with a type of array.array and has
    the length same as the last layer of the model in use.
    If there are zero or more than one image columns, an exception will
    be raised.
    """
    from array import array

    feature_column = _tkutl._find_only_column_of_type(sframe,
                                                      target_type=array,
                                                      type_name="array",
                                                      col_name="deep_features")
    if _is_image_deep_feature_sarray(sframe[feature_column], model_name):
        return feature_column
    else:
        raise _ToolkitError(
            'No "{col_name}" column specified and no column with expected type "{type_name}" is found.'
            .format(col_name="deep_features", type_name="array"))
コード例 #2
0
def create(dataset, annotations=None, feature=None, model='darknet-yolo',
           classes=None, max_iterations=0, verbose=True, **kwargs):
    """
    Create a :class:`ObjectDetector` model.

    Parameters
    ----------
    dataset : SFrame
        Input data. The columns named by the ``feature`` and ``annotations``
        parameters will be extracted for training the detector.

    annotations : string
        Name of the column containing the object detection annotations.
        This column should be a list of dictionaries, with each dictionary
        representing a bounding box of an object instance. Here is an example
        of the annotations for a single image with two object instances::

            [{'label': 'dog',
              'type': 'rectangle',
              'coordinates': {'x': 223, 'y': 198,
                              'width': 130, 'height': 230}},
             {'label': 'cat',
              'type': 'rectangle',
              'coordinates': {'x': 40, 'y': 73,
                              'width': 80, 'height': 123}}]

        The value for `x` is the horizontal center of the box paired with
        `width` and `y` is the vertical center of the box paired with `height`.
        'None' (the default) indicates the only list column in `dataset` should
        be used for the annotations.

    feature : string
        Name of the column containing the input images. 'None' (the default)
        indicates the only image column in `dataset` should be used as the
        feature.

    model : string optional
        Object detection model to use:

           - "darknet-yolo" : Fast and medium-sized model

    classes : list optional
        List of strings containing the names of the classes of objects.
        Inferred from the data if not provided.

    max_iterations : int
        The number of training iterations. If 0, then it will be automatically
        be determined based on the amount of data you provide.

    verbose : bool, optional
        If True, print progress updates and model details.

    Returns
    -------
    out : ObjectDetector
        A trained :class:`ObjectDetector` model.

    See Also
    --------
    ObjectDetector

    Examples
    --------
    .. sourcecode:: python

        # Train an object detector model
        >>> model = turicreate.object_detector.create(data)

        # Make predictions on the training set and as column to the SFrame
        >>> data['predictions'] = model.predict(data)

        # Visualize predictions by generating a new column of marked up images
        >>> data['image_pred'] = turicreate.object_detector.util.draw_bounding_boxes(data['image'], data['predictions'])
    """
    _raise_error_if_not_sframe(dataset, "dataset")
    from ._mx_detector import YOLOLoss as _YOLOLoss
    from ._model import tiny_darknet as _tiny_darknet
    from ._sframe_loader import SFrameDetectionIter as _SFrameDetectionIter
    from ._manual_scheduler import ManualScheduler as _ManualScheduler
    import mxnet as _mx
    if len(dataset) == 0:
        raise _ToolkitError('Unable to train on empty dataset')

    _numeric_param_check_range('max_iterations', max_iterations, 0, _six.MAXSIZE)
    start_time = _time.time()

    supported_detectors = ['darknet-yolo']

    if feature is None:
        feature = _tkutl._find_only_image_column(dataset)
        if verbose:
            print("Using '%s' as feature column" % feature)
    if annotations is None:
        annotations = _tkutl._find_only_column_of_type(dataset,
                                                       target_type=list,
                                                       type_name='list',
                                                       col_name='annotations')
        if verbose:
            print("Using '%s' as annotations column" % annotations)

    _raise_error_if_not_detection_sframe(dataset, feature, annotations,
                                         require_annotations=True)

    _tkutl._check_categorical_option_type('model', model,
            supported_detectors)

    base_model = model.split('-', 1)[0]
    ref_model = _pre_trained_models.OBJECT_DETECTION_BASE_MODELS[base_model]()

    params = {
        'anchors': [
            (1.0, 2.0), (1.0, 1.0), (2.0, 1.0),
            (2.0, 4.0), (2.0, 2.0), (4.0, 2.0),
            (4.0, 8.0), (4.0, 4.0), (8.0, 4.0),
            (8.0, 16.0), (8.0, 8.0), (16.0, 8.0),
            (16.0, 32.0), (16.0, 16.0), (32.0, 16.0),
        ],
        'grid_shape': [13, 13],
        'batch_size': 32,
        'aug_resize': 0,
        'aug_rand_crop': 0.9,
        'aug_rand_pad': 0.9,
        'aug_rand_gray': 0.0,
        'aug_aspect_ratio': 1.25,
        'aug_hue': 0.05,
        'aug_brightness': 0.05,
        'aug_saturation': 0.05,
        'aug_contrast': 0.05,
        'aug_horizontal_flip': True,
        'aug_min_object_covered': 0,
        'aug_min_eject_coverage': 0.5,
        'aug_area_range': (.15, 2),
        'aug_pca_noise': 0.0,
        'aug_max_attempts': 20,
        'aug_inter_method': 2,
        'lmb_coord_xy': 10.0,
        'lmb_coord_wh': 10.0,
        'lmb_obj': 100.0,
        'lmb_noobj': 5.0,
        'lmb_class': 2.0,
        'non_maximum_suppression_threshold': 0.45,
        'rescore': True,
        'clip_gradients': 0.025,
        'learning_rate': 1.0e-3,
        'shuffle': True,
    }

    if '_advanced_parameters' in kwargs:
        # Make sure no additional parameters are provided
        new_keys = set(kwargs['_advanced_parameters'].keys())
        set_keys = set(params.keys()) 
        unsupported = new_keys - set_keys
        if unsupported:
            raise _ToolkitError('Unknown advanced parameters: {}'.format(unsupported))

        params.update(kwargs['_advanced_parameters'])

    anchors = params['anchors']
    num_anchors = len(anchors)

    num_gpus = _mxnet_utils.get_num_gpus_in_use(max_devices=params['batch_size'])
    batch_size_each = params['batch_size'] // max(num_gpus, 1)
    # Note, this may slightly alter the batch size to fit evenly on the GPUs
    batch_size = max(num_gpus, 1) * batch_size_each

    grid_shape = params['grid_shape']
    input_image_shape = (3,
                         grid_shape[0] * ref_model.spatial_reduction,
                         grid_shape[1] * ref_model.spatial_reduction)

    try:
        instances = (dataset.stack(annotations, new_column_name='_bbox', drop_na=True)
                            .unpack('_bbox', limit=['label']))
    except (TypeError, RuntimeError):
        # If this fails, the annotation format isinvalid at the coarsest level
        raise _ToolkitError("Annotations format is invalid. Must be a list of "
                            "dictionaries containing 'label' and 'coordinates'.")
    num_images = len(dataset)
    num_instances = len(instances)
    if classes is None:
        classes = instances['_bbox.label'].unique()
    classes = sorted(classes)

    # Make a class-to-index look-up table
    class_to_index = {name: index for index, name in enumerate(classes)}
    num_classes = len(classes)

    # Create data loader
    loader = _SFrameDetectionIter(dataset,
                                  batch_size=batch_size,
                                  input_shape=input_image_shape[1:],
                                  output_shape=grid_shape,
                                  anchors=anchors,
                                  class_to_index=class_to_index,
                                  aug_params=params,
                                  shuffle=params['shuffle'],
                                  loader_type='augmented',
                                  feature_column=feature,
                                  annotations_column=annotations)

    # Predictions per anchor box: x/y + w/h + object confidence + class probs
    preds_per_box = 5 + num_classes
    output_size = preds_per_box * num_anchors
    ymap_shape = (batch_size_each,) + tuple(grid_shape) + (num_anchors, preds_per_box)

    net = _tiny_darknet(output_size=output_size)

    loss = _YOLOLoss(input_shape=input_image_shape[1:],
                     output_shape=grid_shape,
                     batch_size=batch_size_each,
                     num_classes=num_classes,
                     anchors=anchors,
                     parameters=params)

    base_lr = params['learning_rate']
    if max_iterations == 0:
        # Set number of iterations through a heuristic
        num_iterations_raw = 5000 * _np.sqrt(num_instances) / batch_size
        num_iterations = 1000 * max(1, int(round(num_iterations_raw / 1000)))
    else:
        num_iterations = max_iterations

    steps = [num_iterations // 2, 3 * num_iterations // 4, num_iterations]
    steps_and_factors = [(step, 10**(-i)) for i, step in enumerate(steps)]

    steps, factors = zip(*steps_and_factors)
    lr_scheduler = _ManualScheduler(step=steps, factor=factors)

    ctx = _mxnet_utils.get_mxnet_context(max_devices=batch_size)

    net_params = net.collect_params()
    net_params.initialize(_mx.init.Xavier(), ctx=ctx)
    net_params['conv7_weight'].initialize(_mx.init.Xavier(factor_type='avg'), ctx=ctx, force_reinit=True)
    net_params['conv8_weight'].initialize(_mx.init.Uniform(0.00005), ctx=ctx, force_reinit=True)
    # Initialize object confidence low, preventing an unnecessary adjustment
    # period toward conservative estimates
    bias = _np.zeros(output_size, dtype=_np.float32)
    bias[4::preds_per_box] -= 6
    from ._mx_detector import ConstantArray
    net_params['conv8_bias'].initialize(ConstantArray(bias), ctx, force_reinit=True)

    # Take a subset and then load the rest of the parameters. It is possible to
    # do allow_missing=True directly on net_params. However, this will more
    # easily hide bugs caused by names getting out of sync.
    ref_model.available_parameters_subset(net_params).load(ref_model.model_path, ctx)

    options = {'learning_rate': base_lr, 'lr_scheduler': lr_scheduler,
               'momentum': 0.9, 'wd': 0.00005, 'rescale_grad': 1.0}
    clip_grad = params.get('clip_gradients')
    if clip_grad:
        options['clip_gradient'] = clip_grad

    trainer = _mx.gluon.Trainer(net.collect_params(), 'sgd', options)

    iteration = 0
    smoothed_loss = None
    last_time = 0
    while iteration < num_iterations:
        loader.reset()
        for batch in loader:
            data = _mx.gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0)
            label = _mx.gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0)

            Ls = []
            with _mx.autograd.record():
                for x, y in zip(data, label):
                    z = net(x)
                    z0 = _mx.nd.transpose(z, [0, 2, 3, 1]).reshape(ymap_shape)
                    L = loss(z0, y)
                    Ls.append(L)
                for L in Ls:
                    L.backward()

            cur_loss = _np.mean([L.asnumpy()[0] for L in Ls])
            if smoothed_loss is None:
                smoothed_loss = cur_loss
            else:
                smoothed_loss = 0.9 * smoothed_loss + 0.1 * cur_loss
            trainer.step(1)
            iteration += 1
            cur_time = _time.time()
            if verbose and cur_time > last_time + 10:
                print('{now:%Y-%m-%d %H:%M:%S}  Training {cur_iter:{width}d}/{num_iterations:{width}d}  Loss {loss:6.3f}'.format(
                    now=_datetime.now(), cur_iter=iteration, num_iterations=num_iterations,
                    loss=smoothed_loss, width=len(str(num_iterations))))
                last_time = cur_time
            if iteration == num_iterations:
                break

    training_time = _time.time() - start_time

    # Save the model
    state = {
        '_model': net,
        '_class_to_index': class_to_index,
        '_training_time_as_string': _seconds_as_string(training_time),
        '_grid_shape': grid_shape,
        'anchors': anchors,
        'model': model,
        'classes': classes,
        'batch_size': batch_size,
        'input_image_shape': input_image_shape,
        'feature': feature,
        'non_maximum_suppression_threshold': params['non_maximum_suppression_threshold'],
        'annotations': annotations,
        'num_classes': num_classes,
        'num_examples': num_images,
        'num_bounding_boxes': num_instances,
        'training_time': training_time,
        'training_epochs': loader.cur_epoch,
        'training_iterations': iteration,
        'max_iterations': max_iterations,
        'training_loss': smoothed_loss,
    }
    return ObjectDetector(state)
コード例 #3
0
def create(dataset,
           annotations=None,
           feature=None,
           model="darknet-yolo",
           classes=None,
           batch_size=0,
           max_iterations=0,
           verbose=True,
           grid_shape=[13, 13],
           **kwargs):
    """
    Create a :class:`ObjectDetector` model.

    Parameters
    ----------
    dataset : SFrame
        Input data. The columns named by the ``feature`` and ``annotations``
        parameters will be extracted for training the detector.

    annotations : string
        Name of the column containing the object detection annotations.  This
        column should be a list of dictionaries (or a single dictionary), with
        each dictionary representing a bounding box of an object instance. Here
        is an example of the annotations for a single image with two object
        instances::

            [{'label': 'dog',
              'type': 'rectangle',
              'coordinates': {'x': 223, 'y': 198,
                              'width': 130, 'height': 230}},
             {'label': 'cat',
              'type': 'rectangle',
              'coordinates': {'x': 40, 'y': 73,
                              'width': 80, 'height': 123}}]

        The value for `x` is the horizontal center of the box paired with
        `width` and `y` is the vertical center of the box paired with `height`.
        'None' (the default) indicates the only list column in `dataset` should
        be used for the annotations.

    feature : string
        Name of the column containing the input images. 'None' (the default)
        indicates the only image column in `dataset` should be used as the
        feature.

    model : string optional
        Object detection model to use:

           - "darknet-yolo" : Fast and medium-sized model

    grid_shape : array optional
        Shape of the grid used for object detection. Higher values increase precision for small objects, but at a higher computational cost

           - [13, 13] : Default grid value for a Fast and medium-sized model

    classes : list optional
        List of strings containing the names of the classes of objects.
        Inferred from the data if not provided.

    batch_size: int
        The number of images per training iteration. If 0, then it will be
        automatically determined based on resource availability.

    max_iterations : int
        The number of training iterations. If 0, then it will be automatically
        be determined based on the amount of data you provide.

    verbose : bool, optional
        If True, print progress updates and model details.

    Returns
    -------
    out : ObjectDetector
        A trained :class:`ObjectDetector` model.

    See Also
    --------
    ObjectDetector

    Examples
    --------
    .. sourcecode:: python

        # Train an object detector model
        >>> model = turicreate.object_detector.create(data)

        # Make predictions on the training set and as column to the SFrame
        >>> data['predictions'] = model.predict(data)

        # Visualize predictions by generating a new column of marked up images
        >>> data['image_pred'] = turicreate.object_detector.util.draw_bounding_boxes(data['image'], data['predictions'])
    """
    _raise_error_if_not_sframe(dataset, "dataset")

    if len(dataset) == 0:
        raise _ToolkitError("Unable to train on empty dataset")

    _numeric_param_check_range("max_iterations", max_iterations, 0,
                               _six.MAXSIZE)
    start_time = _time.time()

    supported_detectors = ["darknet-yolo"]

    if feature is None:
        feature = _tkutl._find_only_image_column(dataset)
        if verbose:
            print("Using '%s' as feature column" % feature)
    if annotations is None:
        annotations = _tkutl._find_only_column_of_type(
            dataset,
            target_type=[list, dict],
            type_name="list",
            col_name="annotations")
        if verbose:
            print("Using '%s' as annotations column" % annotations)

    _raise_error_if_not_detection_sframe(dataset,
                                         feature,
                                         annotations,
                                         require_annotations=True)
    _tkutl._handle_missing_values(dataset, feature, "dataset")
    _tkutl._check_categorical_option_type("model", model, supported_detectors)

    base_model = model.split("-", 1)[0]
    ref_model = _pre_trained_models.OBJECT_DETECTION_BASE_MODELS[base_model]()

    pretrained_model = _pre_trained_models.OBJECT_DETECTION_BASE_MODELS[
        "darknet_mlmodel"]()
    pretrained_model_path = pretrained_model.get_model_path()

    params = {
        "anchors": [
            (1.0, 2.0),
            (1.0, 1.0),
            (2.0, 1.0),
            (2.0, 4.0),
            (2.0, 2.0),
            (4.0, 2.0),
            (4.0, 8.0),
            (4.0, 4.0),
            (8.0, 4.0),
            (8.0, 16.0),
            (8.0, 8.0),
            (16.0, 8.0),
            (16.0, 32.0),
            (16.0, 16.0),
            (32.0, 16.0),
        ],
        "grid_shape":
        grid_shape,
        "aug_resize":
        0,
        "aug_rand_crop":
        0.9,
        "aug_rand_pad":
        0.9,
        "aug_rand_gray":
        0.0,
        "aug_aspect_ratio":
        1.25,
        "aug_hue":
        0.05,
        "aug_brightness":
        0.05,
        "aug_saturation":
        0.05,
        "aug_contrast":
        0.05,
        "aug_horizontal_flip":
        True,
        "aug_min_object_covered":
        0,
        "aug_min_eject_coverage":
        0.5,
        "aug_area_range": (0.15, 2),
        "aug_pca_noise":
        0.0,
        "aug_max_attempts":
        20,
        "aug_inter_method":
        2,
        "lmb_coord_xy":
        10.0,
        "lmb_coord_wh":
        10.0,
        "lmb_obj":
        100.0,
        "lmb_noobj":
        5.0,
        "lmb_class":
        2.0,
        "non_maximum_suppression_threshold":
        0.45,
        "rescore":
        True,
        "clip_gradients":
        0.025,
        "weight_decay":
        0.0005,
        "sgd_momentum":
        0.9,
        "learning_rate":
        1.0e-3,
        "shuffle":
        True,
        "mps_loss_mult":
        8,
        # This large buffer size (8 batches) is an attempt to mitigate against
        # the SFrame shuffle operation that can occur after each epoch.
        "io_thread_buffer_size":
        8,
        "mlmodel_path":
        pretrained_model_path,
    }

    # create tensorflow model here
    import turicreate.toolkits.libtctensorflow

    if classes == None:
        classes = []

    _raise_error_if_not_iterable(classes)
    _raise_error_if_not_iterable(grid_shape)

    grid_shape = [int(x) for x in grid_shape]
    assert len(grid_shape) == 2

    tf_config = {
        "grid_height": params["grid_shape"][0],
        "grid_width": params["grid_shape"][1],
        "mlmodel_path": params["mlmodel_path"],
        "classes": classes,
        "compute_final_metrics": False,
        "verbose": verbose,
        "model": "darknet-yolo",
    }

    # If batch_size or max_iterations = 0, they will be automatically
    # generated in C++.
    if batch_size > 0:
        tf_config["batch_size"] = batch_size

    if max_iterations > 0:
        tf_config["max_iterations"] = max_iterations

    model = _tc.extensions.object_detector()
    model.train(
        data=dataset,
        annotations_column_name=annotations,
        image_column_name=feature,
        options=tf_config,
    )
    return ObjectDetector(model_proxy=model, name="object_detector")