def _find_only_image_extracted_features_column(sframe, model_name): """ Finds the only column in `sframe` with a type of array.array and has the length same as the last layer of the model in use. If there are zero or more than one image columns, an exception will be raised. """ from array import array feature_column = _tkutl._find_only_column_of_type(sframe, target_type=array, type_name="array", col_name="deep_features") if _is_image_deep_feature_sarray(sframe[feature_column], model_name): return feature_column else: raise _ToolkitError( 'No "{col_name}" column specified and no column with expected type "{type_name}" is found.' .format(col_name="deep_features", type_name="array"))
def create(dataset, annotations=None, feature=None, model='darknet-yolo', classes=None, max_iterations=0, verbose=True, **kwargs): """ Create a :class:`ObjectDetector` model. Parameters ---------- dataset : SFrame Input data. The columns named by the ``feature`` and ``annotations`` parameters will be extracted for training the detector. annotations : string Name of the column containing the object detection annotations. This column should be a list of dictionaries, with each dictionary representing a bounding box of an object instance. Here is an example of the annotations for a single image with two object instances:: [{'label': 'dog', 'type': 'rectangle', 'coordinates': {'x': 223, 'y': 198, 'width': 130, 'height': 230}}, {'label': 'cat', 'type': 'rectangle', 'coordinates': {'x': 40, 'y': 73, 'width': 80, 'height': 123}}] The value for `x` is the horizontal center of the box paired with `width` and `y` is the vertical center of the box paired with `height`. 'None' (the default) indicates the only list column in `dataset` should be used for the annotations. feature : string Name of the column containing the input images. 'None' (the default) indicates the only image column in `dataset` should be used as the feature. model : string optional Object detection model to use: - "darknet-yolo" : Fast and medium-sized model classes : list optional List of strings containing the names of the classes of objects. Inferred from the data if not provided. max_iterations : int The number of training iterations. If 0, then it will be automatically be determined based on the amount of data you provide. verbose : bool, optional If True, print progress updates and model details. Returns ------- out : ObjectDetector A trained :class:`ObjectDetector` model. See Also -------- ObjectDetector Examples -------- .. sourcecode:: python # Train an object detector model >>> model = turicreate.object_detector.create(data) # Make predictions on the training set and as column to the SFrame >>> data['predictions'] = model.predict(data) # Visualize predictions by generating a new column of marked up images >>> data['image_pred'] = turicreate.object_detector.util.draw_bounding_boxes(data['image'], data['predictions']) """ _raise_error_if_not_sframe(dataset, "dataset") from ._mx_detector import YOLOLoss as _YOLOLoss from ._model import tiny_darknet as _tiny_darknet from ._sframe_loader import SFrameDetectionIter as _SFrameDetectionIter from ._manual_scheduler import ManualScheduler as _ManualScheduler import mxnet as _mx if len(dataset) == 0: raise _ToolkitError('Unable to train on empty dataset') _numeric_param_check_range('max_iterations', max_iterations, 0, _six.MAXSIZE) start_time = _time.time() supported_detectors = ['darknet-yolo'] if feature is None: feature = _tkutl._find_only_image_column(dataset) if verbose: print("Using '%s' as feature column" % feature) if annotations is None: annotations = _tkutl._find_only_column_of_type(dataset, target_type=list, type_name='list', col_name='annotations') if verbose: print("Using '%s' as annotations column" % annotations) _raise_error_if_not_detection_sframe(dataset, feature, annotations, require_annotations=True) _tkutl._check_categorical_option_type('model', model, supported_detectors) base_model = model.split('-', 1)[0] ref_model = _pre_trained_models.OBJECT_DETECTION_BASE_MODELS[base_model]() params = { 'anchors': [ (1.0, 2.0), (1.0, 1.0), (2.0, 1.0), (2.0, 4.0), (2.0, 2.0), (4.0, 2.0), (4.0, 8.0), (4.0, 4.0), (8.0, 4.0), (8.0, 16.0), (8.0, 8.0), (16.0, 8.0), (16.0, 32.0), (16.0, 16.0), (32.0, 16.0), ], 'grid_shape': [13, 13], 'batch_size': 32, 'aug_resize': 0, 'aug_rand_crop': 0.9, 'aug_rand_pad': 0.9, 'aug_rand_gray': 0.0, 'aug_aspect_ratio': 1.25, 'aug_hue': 0.05, 'aug_brightness': 0.05, 'aug_saturation': 0.05, 'aug_contrast': 0.05, 'aug_horizontal_flip': True, 'aug_min_object_covered': 0, 'aug_min_eject_coverage': 0.5, 'aug_area_range': (.15, 2), 'aug_pca_noise': 0.0, 'aug_max_attempts': 20, 'aug_inter_method': 2, 'lmb_coord_xy': 10.0, 'lmb_coord_wh': 10.0, 'lmb_obj': 100.0, 'lmb_noobj': 5.0, 'lmb_class': 2.0, 'non_maximum_suppression_threshold': 0.45, 'rescore': True, 'clip_gradients': 0.025, 'learning_rate': 1.0e-3, 'shuffle': True, } if '_advanced_parameters' in kwargs: # Make sure no additional parameters are provided new_keys = set(kwargs['_advanced_parameters'].keys()) set_keys = set(params.keys()) unsupported = new_keys - set_keys if unsupported: raise _ToolkitError('Unknown advanced parameters: {}'.format(unsupported)) params.update(kwargs['_advanced_parameters']) anchors = params['anchors'] num_anchors = len(anchors) num_gpus = _mxnet_utils.get_num_gpus_in_use(max_devices=params['batch_size']) batch_size_each = params['batch_size'] // max(num_gpus, 1) # Note, this may slightly alter the batch size to fit evenly on the GPUs batch_size = max(num_gpus, 1) * batch_size_each grid_shape = params['grid_shape'] input_image_shape = (3, grid_shape[0] * ref_model.spatial_reduction, grid_shape[1] * ref_model.spatial_reduction) try: instances = (dataset.stack(annotations, new_column_name='_bbox', drop_na=True) .unpack('_bbox', limit=['label'])) except (TypeError, RuntimeError): # If this fails, the annotation format isinvalid at the coarsest level raise _ToolkitError("Annotations format is invalid. Must be a list of " "dictionaries containing 'label' and 'coordinates'.") num_images = len(dataset) num_instances = len(instances) if classes is None: classes = instances['_bbox.label'].unique() classes = sorted(classes) # Make a class-to-index look-up table class_to_index = {name: index for index, name in enumerate(classes)} num_classes = len(classes) # Create data loader loader = _SFrameDetectionIter(dataset, batch_size=batch_size, input_shape=input_image_shape[1:], output_shape=grid_shape, anchors=anchors, class_to_index=class_to_index, aug_params=params, shuffle=params['shuffle'], loader_type='augmented', feature_column=feature, annotations_column=annotations) # Predictions per anchor box: x/y + w/h + object confidence + class probs preds_per_box = 5 + num_classes output_size = preds_per_box * num_anchors ymap_shape = (batch_size_each,) + tuple(grid_shape) + (num_anchors, preds_per_box) net = _tiny_darknet(output_size=output_size) loss = _YOLOLoss(input_shape=input_image_shape[1:], output_shape=grid_shape, batch_size=batch_size_each, num_classes=num_classes, anchors=anchors, parameters=params) base_lr = params['learning_rate'] if max_iterations == 0: # Set number of iterations through a heuristic num_iterations_raw = 5000 * _np.sqrt(num_instances) / batch_size num_iterations = 1000 * max(1, int(round(num_iterations_raw / 1000))) else: num_iterations = max_iterations steps = [num_iterations // 2, 3 * num_iterations // 4, num_iterations] steps_and_factors = [(step, 10**(-i)) for i, step in enumerate(steps)] steps, factors = zip(*steps_and_factors) lr_scheduler = _ManualScheduler(step=steps, factor=factors) ctx = _mxnet_utils.get_mxnet_context(max_devices=batch_size) net_params = net.collect_params() net_params.initialize(_mx.init.Xavier(), ctx=ctx) net_params['conv7_weight'].initialize(_mx.init.Xavier(factor_type='avg'), ctx=ctx, force_reinit=True) net_params['conv8_weight'].initialize(_mx.init.Uniform(0.00005), ctx=ctx, force_reinit=True) # Initialize object confidence low, preventing an unnecessary adjustment # period toward conservative estimates bias = _np.zeros(output_size, dtype=_np.float32) bias[4::preds_per_box] -= 6 from ._mx_detector import ConstantArray net_params['conv8_bias'].initialize(ConstantArray(bias), ctx, force_reinit=True) # Take a subset and then load the rest of the parameters. It is possible to # do allow_missing=True directly on net_params. However, this will more # easily hide bugs caused by names getting out of sync. ref_model.available_parameters_subset(net_params).load(ref_model.model_path, ctx) options = {'learning_rate': base_lr, 'lr_scheduler': lr_scheduler, 'momentum': 0.9, 'wd': 0.00005, 'rescale_grad': 1.0} clip_grad = params.get('clip_gradients') if clip_grad: options['clip_gradient'] = clip_grad trainer = _mx.gluon.Trainer(net.collect_params(), 'sgd', options) iteration = 0 smoothed_loss = None last_time = 0 while iteration < num_iterations: loader.reset() for batch in loader: data = _mx.gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0) label = _mx.gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0) Ls = [] with _mx.autograd.record(): for x, y in zip(data, label): z = net(x) z0 = _mx.nd.transpose(z, [0, 2, 3, 1]).reshape(ymap_shape) L = loss(z0, y) Ls.append(L) for L in Ls: L.backward() cur_loss = _np.mean([L.asnumpy()[0] for L in Ls]) if smoothed_loss is None: smoothed_loss = cur_loss else: smoothed_loss = 0.9 * smoothed_loss + 0.1 * cur_loss trainer.step(1) iteration += 1 cur_time = _time.time() if verbose and cur_time > last_time + 10: print('{now:%Y-%m-%d %H:%M:%S} Training {cur_iter:{width}d}/{num_iterations:{width}d} Loss {loss:6.3f}'.format( now=_datetime.now(), cur_iter=iteration, num_iterations=num_iterations, loss=smoothed_loss, width=len(str(num_iterations)))) last_time = cur_time if iteration == num_iterations: break training_time = _time.time() - start_time # Save the model state = { '_model': net, '_class_to_index': class_to_index, '_training_time_as_string': _seconds_as_string(training_time), '_grid_shape': grid_shape, 'anchors': anchors, 'model': model, 'classes': classes, 'batch_size': batch_size, 'input_image_shape': input_image_shape, 'feature': feature, 'non_maximum_suppression_threshold': params['non_maximum_suppression_threshold'], 'annotations': annotations, 'num_classes': num_classes, 'num_examples': num_images, 'num_bounding_boxes': num_instances, 'training_time': training_time, 'training_epochs': loader.cur_epoch, 'training_iterations': iteration, 'max_iterations': max_iterations, 'training_loss': smoothed_loss, } return ObjectDetector(state)
def create(dataset, annotations=None, feature=None, model="darknet-yolo", classes=None, batch_size=0, max_iterations=0, verbose=True, grid_shape=[13, 13], **kwargs): """ Create a :class:`ObjectDetector` model. Parameters ---------- dataset : SFrame Input data. The columns named by the ``feature`` and ``annotations`` parameters will be extracted for training the detector. annotations : string Name of the column containing the object detection annotations. This column should be a list of dictionaries (or a single dictionary), with each dictionary representing a bounding box of an object instance. Here is an example of the annotations for a single image with two object instances:: [{'label': 'dog', 'type': 'rectangle', 'coordinates': {'x': 223, 'y': 198, 'width': 130, 'height': 230}}, {'label': 'cat', 'type': 'rectangle', 'coordinates': {'x': 40, 'y': 73, 'width': 80, 'height': 123}}] The value for `x` is the horizontal center of the box paired with `width` and `y` is the vertical center of the box paired with `height`. 'None' (the default) indicates the only list column in `dataset` should be used for the annotations. feature : string Name of the column containing the input images. 'None' (the default) indicates the only image column in `dataset` should be used as the feature. model : string optional Object detection model to use: - "darknet-yolo" : Fast and medium-sized model grid_shape : array optional Shape of the grid used for object detection. Higher values increase precision for small objects, but at a higher computational cost - [13, 13] : Default grid value for a Fast and medium-sized model classes : list optional List of strings containing the names of the classes of objects. Inferred from the data if not provided. batch_size: int The number of images per training iteration. If 0, then it will be automatically determined based on resource availability. max_iterations : int The number of training iterations. If 0, then it will be automatically be determined based on the amount of data you provide. verbose : bool, optional If True, print progress updates and model details. Returns ------- out : ObjectDetector A trained :class:`ObjectDetector` model. See Also -------- ObjectDetector Examples -------- .. sourcecode:: python # Train an object detector model >>> model = turicreate.object_detector.create(data) # Make predictions on the training set and as column to the SFrame >>> data['predictions'] = model.predict(data) # Visualize predictions by generating a new column of marked up images >>> data['image_pred'] = turicreate.object_detector.util.draw_bounding_boxes(data['image'], data['predictions']) """ _raise_error_if_not_sframe(dataset, "dataset") if len(dataset) == 0: raise _ToolkitError("Unable to train on empty dataset") _numeric_param_check_range("max_iterations", max_iterations, 0, _six.MAXSIZE) start_time = _time.time() supported_detectors = ["darknet-yolo"] if feature is None: feature = _tkutl._find_only_image_column(dataset) if verbose: print("Using '%s' as feature column" % feature) if annotations is None: annotations = _tkutl._find_only_column_of_type( dataset, target_type=[list, dict], type_name="list", col_name="annotations") if verbose: print("Using '%s' as annotations column" % annotations) _raise_error_if_not_detection_sframe(dataset, feature, annotations, require_annotations=True) _tkutl._handle_missing_values(dataset, feature, "dataset") _tkutl._check_categorical_option_type("model", model, supported_detectors) base_model = model.split("-", 1)[0] ref_model = _pre_trained_models.OBJECT_DETECTION_BASE_MODELS[base_model]() pretrained_model = _pre_trained_models.OBJECT_DETECTION_BASE_MODELS[ "darknet_mlmodel"]() pretrained_model_path = pretrained_model.get_model_path() params = { "anchors": [ (1.0, 2.0), (1.0, 1.0), (2.0, 1.0), (2.0, 4.0), (2.0, 2.0), (4.0, 2.0), (4.0, 8.0), (4.0, 4.0), (8.0, 4.0), (8.0, 16.0), (8.0, 8.0), (16.0, 8.0), (16.0, 32.0), (16.0, 16.0), (32.0, 16.0), ], "grid_shape": grid_shape, "aug_resize": 0, "aug_rand_crop": 0.9, "aug_rand_pad": 0.9, "aug_rand_gray": 0.0, "aug_aspect_ratio": 1.25, "aug_hue": 0.05, "aug_brightness": 0.05, "aug_saturation": 0.05, "aug_contrast": 0.05, "aug_horizontal_flip": True, "aug_min_object_covered": 0, "aug_min_eject_coverage": 0.5, "aug_area_range": (0.15, 2), "aug_pca_noise": 0.0, "aug_max_attempts": 20, "aug_inter_method": 2, "lmb_coord_xy": 10.0, "lmb_coord_wh": 10.0, "lmb_obj": 100.0, "lmb_noobj": 5.0, "lmb_class": 2.0, "non_maximum_suppression_threshold": 0.45, "rescore": True, "clip_gradients": 0.025, "weight_decay": 0.0005, "sgd_momentum": 0.9, "learning_rate": 1.0e-3, "shuffle": True, "mps_loss_mult": 8, # This large buffer size (8 batches) is an attempt to mitigate against # the SFrame shuffle operation that can occur after each epoch. "io_thread_buffer_size": 8, "mlmodel_path": pretrained_model_path, } # create tensorflow model here import turicreate.toolkits.libtctensorflow if classes == None: classes = [] _raise_error_if_not_iterable(classes) _raise_error_if_not_iterable(grid_shape) grid_shape = [int(x) for x in grid_shape] assert len(grid_shape) == 2 tf_config = { "grid_height": params["grid_shape"][0], "grid_width": params["grid_shape"][1], "mlmodel_path": params["mlmodel_path"], "classes": classes, "compute_final_metrics": False, "verbose": verbose, "model": "darknet-yolo", } # If batch_size or max_iterations = 0, they will be automatically # generated in C++. if batch_size > 0: tf_config["batch_size"] = batch_size if max_iterations > 0: tf_config["max_iterations"] = max_iterations model = _tc.extensions.object_detector() model.train( data=dataset, annotations_column_name=annotations, image_column_name=feature, options=tf_config, ) return ObjectDetector(model_proxy=model, name="object_detector")