def train_model_retinanet(model, dataset, expt='', test_size=.2, n_epoch=10, batch_size=1, num_gpus=None, include_masks=False, panoptic=False, panoptic_weight=0.1, transforms=['watershed'], transforms_kwargs={}, anchor_params=None, pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'], min_objects=3, mask_size=(28, 28), optimizer=SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True), log_dir='/data/tensorboard_logs', model_dir='/data/models', model_name=None, sigma=3.0, alpha=0.25, gamma=2.0, score_threshold=0.01, iou_threshold=0.5, max_detections=100, weighted_average=True, lr_sched=rate_scheduler(lr=0.01, decay=0.95), rotation_range=0, flip=True, shear=0, zoom_range=0, compute_map=True, seed=0, **kwargs): """Train a RetinaNet model from the given backbone. Adapted from: https://github.com/fizyr/keras-retinanet & https://github.com/fizyr/keras-maskrcnn Args: model (tensorflow.keras.Model): The model to train. dataset (str): Path to a dataset to train the model with. expt (str): Experiment, substring to include in model name. test_size (float): Percent of data to leave as test data. n_epoch (int): Number of training epochs. batch_size (int): Number of batches per training step. num_gpus (int): The number of GPUs to train on. include_masks (bool): Whether to generate masks using MaskRCNN. panoptic (bool): Whether to include semantic segmentation heads. panoptic_weight (float): Weight applied to the semantic loss. transforms (list): List of transform names as strings. Each transform will have its own semantic segmentation head. transforms_kwargs (list): List of dicts of optional values for each transform in transforms. anchor_params (AnchorParameters): Struct containing anchor parameters. If None, default values are used. pyramid_levels (list): Pyramid levels to attach the object detection heads to. min_objects (int): If a training image has fewer than min_objects objects, the image will not be used for training. mask_size (tuple): The size of the masks. log_dir (str): Filepath to save tensorboard logs. If None, disables the tensorboard callback. model_dir (str): Directory to save the model file. model_name (str): Name of the model (and name of output file). sigma (float): The point where the loss changes from L2 to L1. alpha (float): Scale the focal weight with alpha. gamma (float): Take the power of the focal weight with gamma. iou_threshold (float): The threshold used to consider when a detection is positive or negative. score_threshold (float): The score confidence threshold to use for detections. max_detections (int): The maximum number of detections to use per image weighted_average (bool): Use a weighted average in evaluation. optimizer (object): Pre-initialized optimizer object (SGD, Adam, etc.) lr_sched (function): Learning rate schedular function rotation_range (int): Maximum rotation range for image augmentation flip (bool): Enables horizontal and vertical flipping for augmentation shear (int): Maximum rotation range for image augmentation zoom_range (tuple): Minimum and maximum zoom values (0.8, 1.2) seed (int): Random seed compute_map (bool): Whether to compute mAP at end of training. kwargs (dict): Other parameters to pass to _transform_masks Returns: tensorflow.keras.Model: The trained model """ is_channels_first = K.image_data_format() == 'channels_first' if model_name is None: todays_date = datetime.datetime.now().strftime('%Y-%m-%d') data_name = os.path.splitext(os.path.basename(dataset))[0] model_name = '{}_{}_{}'.format(todays_date, data_name, expt) model_path = os.path.join(model_dir, '{}.h5'.format(model_name)) loss_path = os.path.join(model_dir, '{}.npz'.format(model_name)) train_dict, test_dict = get_data(dataset, seed=seed, test_size=test_size) channel_axis = 1 if is_channels_first else -1 n_classes = model.layers[-1].output_shape[channel_axis] if panoptic: n_semantic_classes = [ layer.output_shape[channel_axis] for layer in model.layers if 'semantic' in layer.name ] else: n_semantic_classes = [] # the data, shuffled and split between train and test sets print('X_train shape:', train_dict['X'].shape) print('y_train shape:', train_dict['y'].shape) print('X_test shape:', test_dict['X'].shape) print('y_test shape:', test_dict['y'].shape) print('Output Shape:', model.layers[-1].output_shape) print('Number of Classes:', n_classes) if num_gpus is None: num_gpus = train_utils.count_gpus() if num_gpus >= 1e6: batch_size = batch_size * num_gpus model = train_utils.MultiGpuModel(model, num_gpus) print('Training on {} GPUs'.format(num_gpus)) # evaluation of model is done on `retinanet_bbox` if include_masks: prediction_model = model else: prediction_model = retinanet_bbox( model, nms=True, anchor_params=anchor_params, num_semantic_heads=len(n_semantic_classes), panoptic=panoptic, class_specific_filter=False) retinanet_losses = losses.RetinaNetLosses(sigma=sigma, alpha=alpha, gamma=gamma, iou_threshold=iou_threshold, mask_size=mask_size) def semantic_loss(n_classes): def _semantic_loss(y_pred, y_true): return panoptic_weight * losses.weighted_categorical_crossentropy( y_pred, y_true, n_classes=n_classes) return _semantic_loss loss = { 'regression': retinanet_losses.regress_loss, 'classification': retinanet_losses.classification_loss } if include_masks: loss['masks'] = retinanet_losses.mask_loss if panoptic: # Give losses for all of the semantic heads for layer in model.layers: if 'semantic' in layer.name: n_classes = layer.output_shape[channel_axis] loss[layer.name] = semantic_loss(n_classes) model.compile(loss=loss, optimizer=optimizer) if num_gpus >= 2: # Each GPU must have at least one validation example if test_dict['y'].shape[0] < num_gpus: raise ValueError('Not enough validation data for {} GPUs. ' 'Received {} validation sample.'.format( test_dict['y'].shape[0], num_gpus)) # When using multiple GPUs and skip_connections, # the training data must be evenly distributed across all GPUs num_train = train_dict['y'].shape[0] nb_samples = num_train - num_train % batch_size if nb_samples: train_dict['y'] = train_dict['y'][:nb_samples] train_dict['X'] = train_dict['X'][:nb_samples] # this will do preprocessing and realtime data augmentation datagen = image_generators.RetinaNetGenerator( # fill_mode='constant', # for rotations rotation_range=rotation_range, shear_range=shear, zoom_range=zoom_range, horizontal_flip=flip, vertical_flip=flip) datagen_val = image_generators.RetinaNetGenerator( # fill_mode='constant', # for rotations rotation_range=0, shear_range=0, zoom_range=0, horizontal_flip=0, vertical_flip=0) # if 'vgg' in backbone or 'densenet' in backbone: # compute_shapes = make_shapes_callback(model) # else: # compute_shapes = guess_shapes compute_shapes = guess_shapes train_data = datagen.flow(train_dict, seed=seed, include_masks=include_masks, panoptic=panoptic, transforms=transforms, transforms_kwargs=transforms_kwargs, pyramid_levels=pyramid_levels, min_objects=min_objects, anchor_params=anchor_params, compute_shapes=compute_shapes, batch_size=batch_size) val_data = datagen_val.flow(test_dict, seed=seed, include_masks=include_masks, panoptic=panoptic, transforms=transforms, transforms_kwargs=transforms_kwargs, pyramid_levels=pyramid_levels, min_objects=min_objects, anchor_params=anchor_params, compute_shapes=compute_shapes, batch_size=batch_size) train_callbacks = get_callbacks(model_path, lr_sched=lr_sched, tensorboard_log_dir=log_dir, save_weights_only=num_gpus >= 2, monitor='val_loss', verbose=1) eval_callback = RedirectModel( Evaluate(val_data, iou_threshold=iou_threshold, score_threshold=score_threshold, max_detections=max_detections, tensorboard=train_callbacks[-1] if log_dir else None, weighted_average=weighted_average), prediction_model) train_callbacks.append(eval_callback) # fit the model on the batches generated by datagen.flow() loss_history = model.fit_generator( train_data, steps_per_epoch=train_data.y.shape[0] // batch_size, epochs=n_epoch, validation_data=val_data, validation_steps=val_data.y.shape[0] // batch_size, callbacks=train_callbacks) model.save_weights(model_path) np.savez(loss_path, loss_history=loss_history.history) if compute_map: average_precisions = evaluate( val_data, prediction_model, iou_threshold=iou_threshold, score_threshold=score_threshold, max_detections=max_detections, ) # print evaluation total_instances = [] precisions = [] for label, (average_precision, num_annotations) in average_precisions.items(): print('{:.0f} instances of class'.format(num_annotations), label, 'with average precision: {:.4f}'.format(average_precision)) total_instances.append(num_annotations) precisions.append(average_precision) if sum(total_instances) == 0: print('No test instances found.') else: print( 'mAP using the weighted average of precisions among classes: {:.4f}' .format( sum([a * b for a, b in zip(total_instances, precisions)]) / sum(total_instances))) print('mAP: {:.4f}'.format( sum(precisions) / sum(x > 0 for x in total_instances))) return model
def train_model_retinanet(model, dataset, backbone, expt='', test_size=.1, n_epoch=10, batch_size=1, num_gpus=None, include_masks=False, panoptic=False, panoptic_weight=1, anchor_params=None, pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'], mask_size=(28, 28), optimizer=SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True), log_dir='/data/tensorboard_logs', model_dir='/data/models', model_name=None, sigma=3.0, alpha=0.25, gamma=2.0, score_threshold=0.01, iou_threshold=0.5, max_detections=100, weighted_average=True, lr_sched=rate_scheduler(lr=0.01, decay=0.95), rotation_range=0, flip=True, shear=0, zoom_range=0, seed=None, **kwargs): """Train a RetinaNet model from the given backbone Adapted from: https://github.com/fizyr/keras-retinanet & https://github.com/fizyr/keras-maskrcnn """ is_channels_first = K.image_data_format() == 'channels_first' if model_name is None: todays_date = datetime.datetime.now().strftime('%Y-%m-%d') data_name = os.path.splitext(os.path.basename(dataset))[0] model_name = '{}_{}_{}'.format(todays_date, data_name, expt) model_path = os.path.join(model_dir, '{}.h5'.format(model_name)) loss_path = os.path.join(model_dir, '{}.npz'.format(model_name)) train_dict, test_dict = get_data(dataset, seed=seed, test_size=test_size) channel_axis = 1 if is_channels_first else -1 n_classes = model.layers[-1].output_shape[channel_axis] if panoptic: n_semantic_classes = model.get_layer( name='semantic').output_shape[channel_axis] # the data, shuffled and split between train and test sets print('X_train shape:', train_dict['X'].shape) print('y_train shape:', train_dict['y'].shape) print('X_test shape:', test_dict['X'].shape) print('y_test shape:', test_dict['y'].shape) print('Output Shape:', model.layers[-1].output_shape) print('Number of Classes:', n_classes) if num_gpus is None: num_gpus = train_utils.count_gpus() if num_gpus >= 1e6: batch_size = batch_size * num_gpus model = train_utils.MultiGpuModel(model, num_gpus) print('Training on {} GPUs'.format(num_gpus)) # evaluation of model is done on `retinanet_bbox` if include_masks: prediction_model = model else: prediction_model = retinanet_bbox(model, nms=True, anchor_params=anchor_params, panoptic=panoptic, class_specific_filter=False) retinanet_losses = losses.RetinaNetLosses(sigma=sigma, alpha=alpha, gamma=gamma, iou_threshold=iou_threshold, mask_size=mask_size) def semantic_loss(y_pred, y_true): return panoptic_weight * losses.weighted_categorical_crossentropy( y_pred, y_true, n_classes=n_semantic_classes) loss = { 'regression': retinanet_losses.regress_loss, 'classification': retinanet_losses.classification_loss } if include_masks: loss['masks'] = retinanet_losses.mask_loss if panoptic: loss['semantic'] = semantic_loss model.compile(loss=loss, optimizer=optimizer) if num_gpus >= 2: # Each GPU must have at least one validation example if test_dict['y'].shape[0] < num_gpus: raise ValueError('Not enough validation data for {} GPUs. ' 'Received {} validation sample.'.format( test_dict['y'].shape[0], num_gpus)) # When using multiple GPUs and skip_connections, # the training data must be evenly distributed across all GPUs num_train = train_dict['y'].shape[0] nb_samples = num_train - num_train % batch_size if nb_samples: train_dict['y'] = train_dict['y'][:nb_samples] train_dict['X'] = train_dict['X'][:nb_samples] # this will do preprocessing and realtime data augmentation datagen = image_generators.RetinaNetGenerator( # fill_mode='constant', # for rotations rotation_range=rotation_range, shear_range=shear, zoom_range=zoom_range, horizontal_flip=flip, vertical_flip=flip) datagen_val = image_generators.RetinaNetGenerator( # fill_mode='constant', # for rotations rotation_range=0, shear_range=0, zoom_range=0, horizontal_flip=0, vertical_flip=0) if 'vgg' in backbone or 'densenet' in backbone: compute_shapes = make_shapes_callback(model) else: compute_shapes = guess_shapes train_data = datagen.flow(train_dict, seed=seed, include_masks=include_masks, panoptic=panoptic, pyramid_levels=pyramid_levels, anchor_params=anchor_params, compute_shapes=compute_shapes, batch_size=batch_size) val_data = datagen_val.flow(test_dict, seed=seed, include_masks=include_masks, panoptic=panoptic, pyramid_levels=pyramid_levels, anchor_params=anchor_params, compute_shapes=compute_shapes, batch_size=batch_size) tensorboard_callback = callbacks.TensorBoard( log_dir=os.path.join(log_dir, model_name)) # fit the model on the batches generated by datagen.flow() loss_history = model.fit_generator( train_data, steps_per_epoch=train_data.y.shape[0] // batch_size, epochs=n_epoch, validation_data=val_data, validation_steps=val_data.y.shape[0] // batch_size, callbacks=[ callbacks.LearningRateScheduler(lr_sched), callbacks.ModelCheckpoint(model_path, monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=num_gpus >= 2), tensorboard_callback, callbacks.ReduceLROnPlateau(monitor='loss', factor=0.1, patience=10, verbose=1, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0), RedirectModel( Evaluate(val_data, iou_threshold=iou_threshold, score_threshold=score_threshold, max_detections=max_detections, tensorboard=tensorboard_callback, weighted_average=weighted_average), prediction_model), ]) model.save_weights(model_path) np.savez(loss_path, loss_history=loss_history.history) average_precisions = evaluate( val_data, prediction_model, iou_threshold=iou_threshold, score_threshold=score_threshold, max_detections=max_detections, ) # print evaluation total_instances = [] precisions = [] for label, (average_precision, num_annotations) in average_precisions.items(): print('{:.0f} instances of class'.format(num_annotations), label, 'with average precision: {:.4f}'.format(average_precision)) total_instances.append(num_annotations) precisions.append(average_precision) if sum(total_instances) == 0: print('No test instances found.') else: print( 'mAP using the weighted average of precisions among classes: {:.4f}' .format( sum([a * b for a, b in zip(total_instances, precisions)]) / sum(total_instances))) print('mAP: {:.4f}'.format( sum(precisions) / sum(x > 0 for x in total_instances))) return model
def train_model_retinanet(model, dataset, backbone, expt='', test_size=.1, n_epoch=10, batch_size=1, num_gpus=None, include_masks=False, mask_size=(28, 28), optimizer=SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True), log_dir='/data/tensorboard_logs', model_dir='/data/models', model_name=None, sigma=3.0, alpha=0.25, gamma=2.0, score_threshold=0.01, iou_threshold=0.5, max_detections=100, weighted_average=True, lr_sched=rate_scheduler(lr=0.01, decay=0.95), rotation_range=0, flip=True, shear=0, zoom_range=0, **kwargs): """Train a RetinaNet model from the given backbone Adapted from: https://github.com/fizyr/keras-retinanet & https://github.com/fizyr/keras-maskrcnn """ is_channels_first = K.image_data_format() == 'channels_first' if model_name is None: todays_date = datetime.datetime.now().strftime('%Y-%m-%d') data_name = os.path.splitext(os.path.basename(dataset))[0] model_name = '{}_{}_{}'.format(todays_date, data_name, expt) model_path = os.path.join(model_dir, '{}.h5'.format(model_name)) loss_path = os.path.join(model_dir, '{}.npz'.format(model_name)) train_dict, test_dict = get_data(dataset, mode='conv', test_size=test_size) n_classes = model.layers[-1].output_shape[1 if is_channels_first else -1] # the data, shuffled and split between train and test sets print('X_train shape:', train_dict['X'].shape) print('y_train shape:', train_dict['y'].shape) print('X_test shape:', test_dict['X'].shape) print('y_test shape:', test_dict['y'].shape) print('Output Shape:', model.layers[-1].output_shape) print('Number of Classes:', n_classes) if num_gpus is None: num_gpus = train_utils.count_gpus() if num_gpus >= 1e6: batch_size = batch_size * num_gpus model = train_utils.MultiGpuModel(model, num_gpus) print('Training on {} GPUs'.format(num_gpus)) def regress_loss(y_true, y_pred): # separate target and state regression = y_pred regression_target = y_true[..., :-1] anchor_state = y_true[..., -1] # filter out "ignore" anchors indices = tf.where(K.equal(anchor_state, 1)) regression = tf.gather_nd(regression, indices) regression_target = tf.gather_nd(regression_target, indices) # compute the loss loss = losses.smooth_l1(regression_target, regression, sigma=sigma) # compute the normalizer: the number of positive anchors normalizer = K.maximum(1, K.shape(indices)[0]) normalizer = K.cast(normalizer, dtype=K.floatx()) return K.sum(loss) / normalizer def classification_loss(y_true, y_pred): # TODO: try weighted_categorical_crossentropy labels = y_true[..., :-1] # -1 for ignore, 0 for background, 1 for object anchor_state = y_true[..., -1] classification = y_pred # filter out "ignore" anchors indices = tf.where(K.not_equal(anchor_state, -1)) labels = tf.gather_nd(labels, indices) classification = tf.gather_nd(classification, indices) # compute the loss loss = losses.focal(labels, classification, alpha=alpha, gamma=gamma) # compute the normalizer: the number of positive anchors normalizer = tf.where(K.equal(anchor_state, 1)) normalizer = K.cast(K.shape(normalizer)[0], K.floatx()) normalizer = K.maximum(K.cast_to_floatx(1.0), normalizer) return K.sum(loss) / normalizer def mask_loss(y_true, y_pred): def _mask(y_true, y_pred, iou_threshold=0.5, mask_size=(28, 28)): # split up the different predicted blobs boxes = y_pred[:, :, :4] masks = y_pred[:, :, 4:] # split up the different blobs annotations = y_true[:, :, :5] width = K.cast(y_true[0, 0, 5], dtype='int32') height = K.cast(y_true[0, 0, 6], dtype='int32') masks_target = y_true[:, :, 7:] # reshape the masks back to their original size masks_target = K.reshape(masks_target, (K.shape(masks_target)[0] * K.shape(masks_target)[1], height, width)) masks = K.reshape(masks, (K.shape(masks)[0] * K.shape(masks)[1], mask_size[0], mask_size[1], -1)) # batch size > 1 fix boxes = K.reshape(boxes, (-1, K.shape(boxes)[2])) annotations = K.reshape(annotations, (-1, K.shape(annotations)[2])) # compute overlap of boxes with annotations iou = overlap(boxes, annotations) argmax_overlaps_inds = K.argmax(iou, axis=1) max_iou = K.max(iou, axis=1) # filter those with IoU > 0.5 indices = tf.where(K.greater_equal(max_iou, iou_threshold)) boxes = tf.gather_nd(boxes, indices) masks = tf.gather_nd(masks, indices) argmax_overlaps_inds = tf.gather_nd(argmax_overlaps_inds, indices) argmax_overlaps_inds = K.cast(argmax_overlaps_inds, 'int32') labels = K.gather(annotations[:, 4], argmax_overlaps_inds) labels = K.cast(labels, 'int32') # make normalized boxes x1 = boxes[:, 0] y1 = boxes[:, 1] x2 = boxes[:, 2] y2 = boxes[:, 3] boxes = K.stack([ y1 / (K.cast(height, dtype=K.floatx()) - 1), x1 / (K.cast(width, dtype=K.floatx()) - 1), (y2 - 1) / (K.cast(height, dtype=K.floatx()) - 1), (x2 - 1) / (K.cast(width, dtype=K.floatx()) - 1), ], axis=1) # crop and resize masks_target # append a fake channel dimension masks_target = K.expand_dims(masks_target, axis=3) masks_target = tf.image.crop_and_resize(masks_target, boxes, argmax_overlaps_inds, mask_size) # remove fake channel dimension masks_target = masks_target[:, :, :, 0] # gather the predicted masks using the annotation label masks = tf.transpose(masks, (0, 3, 1, 2)) label_indices = K.stack([tf.range(K.shape(labels)[0]), labels], axis=1) masks = tf.gather_nd(masks, label_indices) # compute mask loss mask_loss = K.binary_crossentropy(masks_target, masks) normalizer = K.shape(masks)[0] * K.shape(masks)[1] * K.shape( masks)[2] normalizer = K.maximum(K.cast(normalizer, K.floatx()), 1) mask_loss = K.sum(mask_loss) / normalizer return mask_loss # if there are no masks annotations, return 0; else, compute the masks loss return tf.cond( K.any(K.equal(K.shape(y_true), 0)), lambda: K.cast_to_floatx(0.0), lambda: _mask(y_true, y_pred, iou_threshold=iou_threshold, mask_size=mask_size)) # evaluation of model is done on `retinanet_bbox` if include_masks: prediction_model = model else: prediction_model = retinanet_bbox(model, nms=True, class_specific_filter=False) loss = {'regression': regress_loss, 'classification': classification_loss} if include_masks: loss['masks'] = mask_loss model.compile(loss=loss, optimizer=optimizer) if num_gpus >= 2: # Each GPU must have at least one validation example if test_dict['y'].shape[0] < num_gpus: raise ValueError('Not enough validation data for {} GPUs. ' 'Received {} validation sample.'.format( test_dict['y'].shape[0], num_gpus)) # When using multiple GPUs and skip_connections, # the training data must be evenly distributed across all GPUs num_train = train_dict['y'].shape[0] nb_samples = num_train - num_train % batch_size if nb_samples: train_dict['y'] = train_dict['y'][:nb_samples] train_dict['X'] = train_dict['X'][:nb_samples] # this will do preprocessing and realtime data augmentation datagen = image_generators.RetinaNetGenerator( # fill_mode='constant', # for rotations rotation_range=rotation_range, shear_range=shear, zoom_range=zoom_range, horizontal_flip=flip, vertical_flip=flip) datagen_val = image_generators.RetinaNetGenerator( # fill_mode='constant', # for rotations rotation_range=0, shear_range=0, zoom_range=0, horizontal_flip=0, vertical_flip=0) if 'vgg' in backbone or 'densenet' in backbone: compute_shapes = make_shapes_callback(model) else: compute_shapes = guess_shapes train_data = datagen.flow(train_dict, include_masks=include_masks, compute_shapes=compute_shapes, batch_size=batch_size) val_data = datagen_val.flow(test_dict, include_masks=include_masks, compute_shapes=compute_shapes, batch_size=batch_size) tensorboard_callback = callbacks.TensorBoard( log_dir=os.path.join(log_dir, model_name)) # fit the model on the batches generated by datagen.flow() loss_history = model.fit_generator( train_data, steps_per_epoch=train_data.y.shape[0] // batch_size, epochs=n_epoch, validation_data=val_data, validation_steps=val_data.y.shape[0] // batch_size, callbacks=[ callbacks.LearningRateScheduler(lr_sched), callbacks.ModelCheckpoint(model_path, monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=num_gpus >= 2), tensorboard_callback, callbacks.ReduceLROnPlateau(monitor='loss', factor=0.1, patience=10, verbose=1, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0), RedirectModel( Evaluate(val_data, iou_threshold=iou_threshold, score_threshold=score_threshold, max_detections=max_detections, tensorboard=tensorboard_callback, weighted_average=weighted_average), prediction_model), ]) model.save_weights(model_path) np.savez(loss_path, loss_history=loss_history.history) average_precisions = evaluate( val_data, prediction_model, iou_threshold=iou_threshold, score_threshold=score_threshold, max_detections=max_detections, ) # print evaluation total_instances = [] precisions = [] for label, (average_precision, num_annotations) in average_precisions.items(): print('{:.0f} instances of class'.format(num_annotations), label, 'with average precision: {:.4f}'.format(average_precision)) total_instances.append(num_annotations) precisions.append(average_precision) if sum(total_instances) == 0: print('No test instances found.') else: print( 'mAP using the weighted average of precisions among classes: {:.4f}' .format( sum([a * b for a, b in zip(total_instances, precisions)]) / sum(total_instances))) print('mAP: {:.4f}'.format( sum(precisions) / sum(x > 0 for x in total_instances))) return model