def load_data(path='3T3_NIH.npz', test_size=.2, seed=0): """Loads the 3T3-NIH dataset. # Args: path: path where to cache the dataset locally (relative to ~/.keras/datasets). test_size: fraction of data to reserve as test data seed: the seed for randomly shuffling the dataset Returns: Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. """ basepath = os.path.expanduser(os.path.join('~', '.keras', 'datasets')) prefix = path.split(os.path.sep)[:-1] data_dir = os.path.join(basepath, *prefix) if prefix else basepath if not os.path.exists(data_dir): os.makedirs(data_dir) elif not os.path.isdir(data_dir): raise IOError('{} exists but is not a directory'.format(data_dir)) path = get_file( path, origin='https://deepcell-data.s3.amazonaws.com/nuclei/3T3_NIH.npz', file_hash='954b6f4ad6a71435b84c40726837e4ba') train_dict, test_dict = get_data(path, test_size=test_size, seed=seed) x_train, y_train = train_dict['X'], train_dict['y'] x_test, y_test = test_dict['X'], test_dict['y'] return (x_train, y_train), (x_test, y_test)
def test_get_data(self): test_size = .1 img_w, img_h = 30, 30 X = np.random.random((10, img_w, img_h, 1)) y = np.random.randint(3, size=(10, img_w, img_h, 1)) temp_dir = self.get_temp_dir() good_file = os.path.join(temp_dir, 'good.npz') np.savez(good_file, X=X, y=y) train_dict, test_dict = data_utils.get_data(good_file, test_size=test_size) X_test, X_train = test_dict['X'], train_dict['X'] self.assertIsInstance(train_dict, dict) self.assertIsInstance(test_dict, dict) self.assertAlmostEqual(X_test.size / (X_test.size + X_train.size), test_size) # test bad filepath bad_file = os.path.join(temp_dir, 'bad.npz') np.savez(bad_file, X_bad=X, y_bad=y) with self.assertRaises(KeyError): _, _ = data_utils.get_data(bad_file) # test siamese_daughters mode good_file = os.path.join(temp_dir, 'siamese.trks') self._write_test_trks(good_file) train_dict, test_dict = data_utils.get_data(good_file, mode='siamese_daughters', test_size=test_size) X_test, X_train = test_dict['X'], train_dict['X'] d_test, d_train = test_dict['daughters'], train_dict['daughters'] self.assertIsInstance(train_dict, dict) self.assertIsInstance(test_dict, dict) self.assertIsInstance(d_test, list) self.assertIsInstance(d_train, list) self.assertEqual(len(d_train), X_train.shape[0]) self.assertEqual(len(d_test), X_test.shape[0]) self.assertAlmostEqual(X_test.size / (X_test.size + X_train.size), test_size)
def load_data(path='mousebrain.npz'): """Loads the MNIST dataset. # Arguments path: path where to cache the dataset locally (relative to ~/.keras/datasets). # Returns Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. """ path = get_file(path, origin='https://deepcell-data.s3.amazonaws.com/nuclei/mousebrain.npz', file_hash='9c91304f7da7cc5559f46b2c5fc2eace') train_dict, test_dict = get_data(path, seed=0, test_size=.2) x_train, y_train = train_dict['X'], train_dict['y'] x_test, y_test = test_dict['X'], test_dict['y'] return (x_train, y_train), (x_test, y_test)
def load_data(path='HEK293.npz'): """Loads the MNIST dataset. # Arguments path: path where to cache the dataset locally (relative to ~/.keras/datasets). # Returns Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. """ path = get_file( path, origin='https://deepcell-data.s3.amazonaws.com/nuclei/HEK293.npz', file_hash='c0bbfba54b90e63a2010133a198e6e63') train_dict, test_dict = get_data(path, seed=0, test_size=.2) x_train, y_train = train_dict['X'], train_dict['y'] x_test, y_test = test_dict['X'], test_dict['y'] return (x_train, y_train), (x_test, y_test)
def load_data(path='3T3_NIH.npz'): """Loads the MNIST dataset. # Arguments path: path where to cache the dataset locally (relative to ~/.keras/datasets). # Returns Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. """ path = get_file( path, origin='https://deepcell-data.s3.amazonaws.com/nuclei/HeLa_S3.npz', file_hash='42c631726713bbb180d4a0a07c2e8107') train_dict, test_dict = get_data(path, seed=0, test_size=.2) x_train, y_train = train_dict['X'], train_dict['y'] x_test, y_test = test_dict['X'], test_dict['y'] return (x_train, y_train), (x_test, y_test)
def load_data(path='3T3_NIH.npz'): """Loads the MNIST dataset. # Arguments path: path where to cache the dataset locally (relative to ~/.keras/datasets). # Returns Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. """ path = get_file( path, origin='https://deepcell-data.s3.amazonaws.com/nuclei/3T3_NIH.npz', file_hash='954b6f4ad6a71435b84c40726837e4ba') train_dict, test_dict = get_data(path, seed=0, test_size=.2) x_train, y_train = train_dict['X'], train_dict['y'] x_test, y_test = test_dict['X'], test_dict['y'] return (x_train, y_train), (x_test, y_test)
def train_model_sample(model, dataset, expt='', test_size=.1, n_epoch=10, batch_size=32, num_gpus=None, transform=None, window_size=None, balance_classes=True, max_class_samples=None, log_dir='/data/tensorboard_logs', model_dir='/data/models', model_name=None, focal=False, gamma=0.5, optimizer=SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True), lr_sched=rate_scheduler(lr=0.01, decay=0.95), rotation_range=0, flip=False, shear=0, zoom_range=0, seed=None, **kwargs): is_channels_first = K.image_data_format() == 'channels_first' if model_name is None: todays_date = datetime.datetime.now().strftime('%Y-%m-%d') data_name = os.path.splitext(os.path.basename(dataset))[0] model_name = '{}_{}_{}'.format(todays_date, data_name, expt) model_path = os.path.join(model_dir, '{}.h5'.format(model_name)) loss_path = os.path.join(model_dir, '{}.npz'.format(model_name)) train_dict, test_dict = get_data(dataset, test_size=test_size, seed=seed) n_classes = model.layers[-1].output_shape[1 if is_channels_first else -1] # the data, shuffled and split between train and test sets print('X_train shape:', train_dict['X'].shape) print('y_train shape:', train_dict['y'].shape) print('X_test shape:', test_dict['X'].shape) print('y_test shape:', test_dict['y'].shape) print('Output Shape:', model.layers[-1].output_shape) print('Number of Classes:', n_classes) def loss_function(y_true, y_pred): if isinstance(transform, str) and transform.lower() == 'disc': return losses.discriminative_instance_loss(y_true, y_pred) if focal: return losses.weighted_focal_loss(y_true, y_pred, gamma=gamma, n_classes=n_classes) return losses.weighted_categorical_crossentropy(y_true, y_pred, n_classes=n_classes) if num_gpus is None: num_gpus = train_utils.count_gpus() if num_gpus >= 2: batch_size = batch_size * num_gpus model = train_utils.MultiGpuModel(model, num_gpus) print('Training on {} GPUs'.format(num_gpus)) model.compile(loss=loss_function, optimizer=optimizer, metrics=['accuracy']) if train_dict['X'].ndim == 4: DataGenerator = image_generators.SampleDataGenerator window_size = window_size if window_size else (30, 30) elif train_dict['X'].ndim == 5: DataGenerator = image_generators.SampleMovieDataGenerator window_size = window_size if window_size else (30, 30, 3) else: raise ValueError('Expected `X` to have ndim 4 or 5. Got', train_dict['X'].ndim) # this will do preprocessing and realtime data augmentation datagen = DataGenerator(rotation_range=rotation_range, shear_range=shear, zoom_range=zoom_range, horizontal_flip=flip, vertical_flip=flip) # no validation augmentation datagen_val = DataGenerator(rotation_range=0, shear_range=0, zoom_range=0, horizontal_flip=0, vertical_flip=0) train_data = datagen.flow(train_dict, batch_size=batch_size, transform=transform, transform_kwargs=kwargs, window_size=window_size, balance_classes=balance_classes, max_class_samples=max_class_samples) val_data = datagen_val.flow(test_dict, batch_size=batch_size, transform=transform, transform_kwargs=kwargs, window_size=window_size, balance_classes=False, max_class_samples=max_class_samples) # fit the model on the batches generated by datagen.flow() loss_history = model.fit_generator( train_data, steps_per_epoch=train_data.y.shape[0] // batch_size, epochs=n_epoch, validation_data=val_data, validation_steps=val_data.y.shape[0] // batch_size, callbacks=[ callbacks.LearningRateScheduler(lr_sched), callbacks.ModelCheckpoint(model_path, monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=num_gpus >= 2), callbacks.TensorBoard(log_dir=os.path.join(log_dir, model_name)) ]) np.savez(loss_path, loss_history=loss_history.history) return model
def train_model_siamese_daughter(model, dataset, expt='', test_size=.1, n_epoch=100, batch_size=1, num_gpus=None, crop_dim=32, min_track_length=1, neighborhood_scale_size=10, features=None, optimizer=SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True), log_dir='/data/tensorboard_logs', model_dir='/data/models', model_name=None, focal=False, gamma=0.5, lr_sched=rate_scheduler(lr=0.01, decay=0.95), rotation_range=0, flip=True, shear=0, zoom_range=0, seed=None, **kwargs): is_channels_first = K.image_data_format() == 'channels_first' if model_name is None: todays_date = datetime.datetime.now().strftime('%Y-%m-%d') data_name = os.path.splitext(os.path.basename(dataset))[0] model_name = '{}_{}_[{}]_neighs={}_epochs={}_seed={}_{}'.format( todays_date, data_name, ','.join(f[0] for f in sorted(features)), neighborhood_scale_size, n_epoch, seed, expt) model_path = os.path.join(model_dir, '{}.h5'.format(model_name)) loss_path = os.path.join(model_dir, '{}.npz'.format(model_name)) print('training on dataset:', dataset) print('saving model at:', model_path) print('saving loss at:', loss_path) train_dict, val_dict = get_data(dataset, mode='siamese_daughters', seed=seed, test_size=test_size) # the data, shuffled and split between train and test sets print('X_train shape:', train_dict['X'].shape) print('y_train shape:', train_dict['y'].shape) print('X_test shape:', val_dict['X'].shape) print('y_test shape:', val_dict['y'].shape) print('Output Shape:', model.layers[-1].output_shape) n_classes = model.layers[-1].output_shape[1 if is_channels_first else -1] def loss_function(y_true, y_pred): if focal: return losses.weighted_focal_loss(y_true, y_pred, gamma=gamma, n_classes=n_classes, from_logits=False) return losses.weighted_categorical_crossentropy(y_true, y_pred, n_classes=n_classes, from_logits=False) if num_gpus is None: num_gpus = train_utils.count_gpus() if num_gpus >= 2: batch_size = batch_size * num_gpus model = train_utils.MultiGpuModel(model, num_gpus) print('Training on {} GPUs'.format(num_gpus)) model.compile(loss=loss_function, optimizer=optimizer, metrics=['accuracy']) print('Using real-time data augmentation.') # this will do preprocessing and realtime data augmentation datagen = image_generators.SiameseDataGenerator( rotation_range=rotation_range, shear_range=shear, zoom_range=zoom_range, horizontal_flip=flip, vertical_flip=flip) datagen_val = image_generators.SiameseDataGenerator(rotation_range=0, zoom_range=0, shear_range=0, horizontal_flip=0, vertical_flip=0) total_train_pairs = tracking_utils.count_pairs(train_dict['y'], same_probability=5.0) total_test_pairs = tracking_utils.count_pairs(val_dict['y'], same_probability=5.0) train_data = datagen.flow(train_dict, crop_dim=crop_dim, batch_size=batch_size, min_track_length=min_track_length, neighborhood_scale_size=neighborhood_scale_size, features=features) val_data = datagen_val.flow( val_dict, crop_dim=crop_dim, batch_size=batch_size, min_track_length=min_track_length, neighborhood_scale_size=neighborhood_scale_size, features=features) print('total_train_pairs:', total_train_pairs) print('total_test_pairs:', total_test_pairs) print('batch size:', batch_size) print('validation_steps: ', total_test_pairs // batch_size) # fit the model on the batches generated by datagen.flow() loss_history = model.fit_generator( train_data, steps_per_epoch=total_train_pairs // batch_size, epochs=n_epoch, validation_data=val_data, validation_steps=total_test_pairs // batch_size, callbacks=[ callbacks.LearningRateScheduler(lr_sched), callbacks.ModelCheckpoint(model_path, monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=num_gpus >= 2), callbacks.TensorBoard(log_dir=os.path.join(log_dir, model_name)) ]) model.save_weights(model_path) np.savez(loss_path, loss_history=loss_history.history) return model
def train_model_sample(model, dataset, expt='', test_size=.2, n_epoch=10, batch_size=32, num_gpus=None, transform=None, window_size=None, balance_classes=True, max_class_samples=None, log_dir='/data/tensorboard_logs', model_dir='/data/models', model_name=None, focal=False, gamma=0.5, optimizer=SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True), lr_sched=rate_scheduler(lr=0.01, decay=0.95), rotation_range=0, flip=False, shear=0, zoom_range=0, seed=0, **kwargs): """Train a model using sample mode. Args: model (tensorflow.keras.Model): The model to train. dataset (str): Path to a dataset to train the model with. expt (str): Experiment, substring to include in model name. test_size (float): Percent of data to leave as test data. n_epoch (int): Number of training epochs. batch_size (int): Number of batches per training step. num_gpus (int): The number of GPUs to train on. transform (str): Defines the transformation of the training data. One of 'watershed', 'fgbg', 'pixelwise'. window_size (tuple(int, int)): Size of sampling window balance_classes (bool): Whether to perform class-balancing on data max_class_samples (int): Maximum number of examples per class to sample log_dir (str): Filepath to save tensorboard logs. If None, disables the tensorboard callback. model_dir (str): Directory to save the model file. model_name (str): Name of the model (and name of output file). focal (bool): If true, uses focal loss. gamma (float): Parameter for focal loss optimizer (object): Pre-initialized optimizer object (SGD, Adam, etc.) lr_sched (function): Learning rate schedular function rotation_range (int): Maximum rotation range for image augmentation flip (bool): Enables horizontal and vertical flipping for augmentation shear (int): Maximum rotation range for image augmentation zoom_range (tuple): Minimum and maximum zoom values (0.8, 1.2) seed (int): Random seed kwargs (dict): Other parameters to pass to _transform_masks Returns: tensorflow.keras.Model: The trained model """ is_channels_first = K.image_data_format() == 'channels_first' if model_name is None: todays_date = datetime.datetime.now().strftime('%Y-%m-%d') data_name = os.path.splitext(os.path.basename(dataset))[0] model_name = '{}_{}_{}'.format(todays_date, data_name, expt) model_path = os.path.join(model_dir, '{}.h5'.format(model_name)) loss_path = os.path.join(model_dir, '{}.npz'.format(model_name)) train_dict, test_dict = get_data(dataset, test_size=test_size, seed=seed) n_classes = model.layers[-1].output_shape[1 if is_channels_first else -1] # the data, shuffled and split between train and test sets print('X_train shape:', train_dict['X'].shape) print('y_train shape:', train_dict['y'].shape) print('X_test shape:', test_dict['X'].shape) print('y_test shape:', test_dict['y'].shape) print('Output Shape:', model.layers[-1].output_shape) print('Number of Classes:', n_classes) def loss_function(y_true, y_pred): if isinstance(transform, str) and transform.lower() == 'disc': return losses.discriminative_instance_loss(y_true, y_pred) if focal: return losses.weighted_focal_loss(y_true, y_pred, gamma=gamma, n_classes=n_classes) return losses.weighted_categorical_crossentropy(y_true, y_pred, n_classes=n_classes) if num_gpus is None: num_gpus = train_utils.count_gpus() print('Training on {} GPUs'.format(num_gpus)) model.compile(loss=loss_function, optimizer=optimizer, metrics=['accuracy']) if train_dict['X'].ndim == 4: DataGenerator = image_generators.SampleDataGenerator window_size = window_size if window_size else (30, 30) elif train_dict['X'].ndim == 5: DataGenerator = image_generators.SampleMovieDataGenerator window_size = window_size if window_size else (30, 30, 3) else: raise ValueError('Expected `X` to have ndim 4 or 5. Got', train_dict['X'].ndim) # this will do preprocessing and realtime data augmentation datagen = DataGenerator(rotation_range=rotation_range, shear_range=shear, zoom_range=zoom_range, horizontal_flip=flip, vertical_flip=flip) # no validation augmentation datagen_val = DataGenerator(rotation_range=0, shear_range=0, zoom_range=0, horizontal_flip=0, vertical_flip=0) train_data = datagen.flow(train_dict, seed=seed, batch_size=batch_size, transform=transform, transform_kwargs=kwargs, window_size=window_size, balance_classes=balance_classes, max_class_samples=max_class_samples) val_data = datagen_val.flow(test_dict, seed=seed, batch_size=batch_size, transform=transform, transform_kwargs=kwargs, window_size=window_size, balance_classes=False, max_class_samples=max_class_samples) train_callbacks = get_callbacks(model_path, lr_sched=lr_sched, tensorboard_log_dir=log_dir, save_weights_only=num_gpus >= 2, monitor='val_loss', verbose=1) # fit the model on the batches generated by datagen.flow() loss_history = model.fit( train_data, steps_per_epoch=train_data.y.shape[0] // batch_size, epochs=n_epoch, validation_data=val_data, validation_steps=val_data.y.shape[0] // batch_size, callbacks=train_callbacks) np.savez(loss_path, loss_history=loss_history.history) return model
def train_model_siamese_daughter(model, dataset, expt='', test_size=.2, n_epoch=100, batch_size=1, num_gpus=None, crop_dim=32, min_track_length=1, neighborhood_scale_size=10, features=None, optimizer=SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True), log_dir='/data/tensorboard_logs', model_dir='/data/models', model_name=None, focal=False, gamma=0.5, lr_sched=rate_scheduler(lr=0.01, decay=0.95), rotation_range=0, flip=True, shear=0, zoom_range=0, seed=0, **kwargs): is_channels_first = K.image_data_format() == 'channels_first' if model_name is None: todays_date = datetime.datetime.now().strftime('%Y-%m-%d') data_name = os.path.splitext(os.path.basename(dataset))[0] model_name = '{}_{}_[{}]_neighs={}_epochs={}_seed={}_{}'.format( todays_date, data_name, ','.join(f[0] for f in sorted(features)), neighborhood_scale_size, n_epoch, seed, expt) model_path = os.path.join(model_dir, '{}.h5'.format(model_name)) loss_path = os.path.join(model_dir, '{}.npz'.format(model_name)) print('training on dataset:', dataset) print('saving model at:', model_path) print('saving loss at:', loss_path) train_dict, val_dict = get_data(dataset, mode='siamese_daughters', seed=seed, test_size=test_size) # the data, shuffled and split between train and test sets print('X_train shape:', train_dict['X'].shape) print('y_train shape:', train_dict['y'].shape) print('X_test shape:', val_dict['X'].shape) print('y_test shape:', val_dict['y'].shape) print('Output Shape:', model.layers[-1].output_shape) n_classes = model.layers[-1].output_shape[1 if is_channels_first else -1] def loss_function(y_true, y_pred): if focal: return losses.weighted_focal_loss(y_true, y_pred, gamma=gamma, n_classes=n_classes, from_logits=False) return losses.weighted_categorical_crossentropy(y_true, y_pred, n_classes=n_classes, from_logits=False) if num_gpus is None: num_gpus = train_utils.count_gpus() print('Training on {} GPUs'.format(num_gpus)) model.compile(loss=loss_function, optimizer=optimizer, metrics=['accuracy']) print('Using real-time data augmentation.') # this will do preprocessing and realtime data augmentation datagen = image_generators.SiameseDataGenerator( rotation_range=rotation_range, shear_range=shear, zoom_range=zoom_range, horizontal_flip=flip, vertical_flip=flip) datagen_val = image_generators.SiameseDataGenerator(rotation_range=0, zoom_range=0, shear_range=0, horizontal_flip=0, vertical_flip=0) # same_probability values have varied from 0.5 to 5.0 total_train_pairs = tracking_utils.count_pairs(train_dict['y'], same_probability=5.0) total_test_pairs = tracking_utils.count_pairs(val_dict['y'], same_probability=5.0) train_data = datagen.flow(train_dict, seed=seed, crop_dim=crop_dim, batch_size=batch_size, min_track_length=min_track_length, neighborhood_scale_size=neighborhood_scale_size, features=features) val_data = datagen_val.flow( val_dict, seed=seed, crop_dim=crop_dim, batch_size=batch_size, min_track_length=min_track_length, neighborhood_scale_size=neighborhood_scale_size, features=features) print('total_train_pairs:', total_train_pairs) print('total_test_pairs:', total_test_pairs) print('batch size:', batch_size) print('validation_steps: ', total_test_pairs // batch_size) # Make dicts to map the two generator outputs to the Dataset and model # input here is model input and output is model output features = sorted(features) input_type_dict = {} input_shape_dict = {} for feature in features: feature_name1 = '{}_input1'.format(feature) feature_name2 = '{}_input2'.format(feature) input_type_dict[feature_name1] = tf.float32 input_type_dict[feature_name2] = tf.float32 if feature == 'appearance': app1 = tuple([ None, train_data.min_track_length, train_data.crop_dim, train_data.crop_dim, 1 ]) app2 = tuple( [None, 1, train_data.crop_dim, train_data.crop_dim, 1]) input_shape_dict[feature_name1] = app1 input_shape_dict[feature_name2] = app2 elif feature == 'distance': dist1 = tuple([None, train_data.min_track_length, 2]) dist2 = tuple([None, 1, 2]) input_shape_dict[feature_name1] = dist1 input_shape_dict[feature_name2] = dist2 elif feature == 'neighborhood': neighborhood_size = 2 * train_data.neighborhood_scale_size + 1 neigh1 = tuple([ None, train_data.min_track_length, neighborhood_size, neighborhood_size, 1 ]) neigh2 = tuple([None, 1, neighborhood_size, neighborhood_size, 1]) input_shape_dict[feature_name1] = neigh1 input_shape_dict[feature_name2] = neigh2 elif feature == 'regionprop': rprop1 = tuple([None, train_data.min_track_length, 3]) rprop2 = tuple([None, 1, 3]) input_shape_dict[feature_name1] = rprop1 input_shape_dict[feature_name2] = rprop2 output_type_dict = {'classification': tf.int32} # Ouput_shape has to be None because we dont know how many cells output_shape_dict = {'classification': (None, 3)} train_dataset = Dataset.from_generator(lambda: train_data, (input_type_dict, output_type_dict), output_shapes=(input_shape_dict, output_shape_dict)) val_dataset = Dataset.from_generator(lambda: val_data, (input_type_dict, output_type_dict), output_shapes=(input_shape_dict, output_shape_dict)) train_callbacks = get_callbacks(model_path, lr_sched=lr_sched, tensorboard_log_dir=log_dir, save_weights_only=num_gpus >= 2, monitor='val_loss', verbose=1) # fit the model on the batches generated by datagen.flow() loss_history = model.fit(train_dataset, steps_per_epoch=total_train_pairs // batch_size, epochs=n_epoch, validation_data=val_dataset, validation_steps=total_test_pairs // batch_size, callbacks=train_callbacks) np.savez(loss_path, loss_history=loss_history.history) return model
def train_model_retinanet(model, dataset, backbone, expt='', test_size=.1, n_epoch=10, batch_size=1, num_gpus=None, include_masks=False, panoptic=False, panoptic_weight=1, anchor_params=None, pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'], mask_size=(28, 28), optimizer=SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True), log_dir='/data/tensorboard_logs', model_dir='/data/models', model_name=None, sigma=3.0, alpha=0.25, gamma=2.0, score_threshold=0.01, iou_threshold=0.5, max_detections=100, weighted_average=True, lr_sched=rate_scheduler(lr=0.01, decay=0.95), rotation_range=0, flip=True, shear=0, zoom_range=0, seed=None, **kwargs): """Train a RetinaNet model from the given backbone Adapted from: https://github.com/fizyr/keras-retinanet & https://github.com/fizyr/keras-maskrcnn """ is_channels_first = K.image_data_format() == 'channels_first' if model_name is None: todays_date = datetime.datetime.now().strftime('%Y-%m-%d') data_name = os.path.splitext(os.path.basename(dataset))[0] model_name = '{}_{}_{}'.format(todays_date, data_name, expt) model_path = os.path.join(model_dir, '{}.h5'.format(model_name)) loss_path = os.path.join(model_dir, '{}.npz'.format(model_name)) train_dict, test_dict = get_data(dataset, seed=seed, test_size=test_size) channel_axis = 1 if is_channels_first else -1 n_classes = model.layers[-1].output_shape[channel_axis] if panoptic: n_semantic_classes = model.get_layer( name='semantic').output_shape[channel_axis] # the data, shuffled and split between train and test sets print('X_train shape:', train_dict['X'].shape) print('y_train shape:', train_dict['y'].shape) print('X_test shape:', test_dict['X'].shape) print('y_test shape:', test_dict['y'].shape) print('Output Shape:', model.layers[-1].output_shape) print('Number of Classes:', n_classes) if num_gpus is None: num_gpus = train_utils.count_gpus() if num_gpus >= 1e6: batch_size = batch_size * num_gpus model = train_utils.MultiGpuModel(model, num_gpus) print('Training on {} GPUs'.format(num_gpus)) # evaluation of model is done on `retinanet_bbox` if include_masks: prediction_model = model else: prediction_model = retinanet_bbox(model, nms=True, anchor_params=anchor_params, panoptic=panoptic, class_specific_filter=False) retinanet_losses = losses.RetinaNetLosses(sigma=sigma, alpha=alpha, gamma=gamma, iou_threshold=iou_threshold, mask_size=mask_size) def semantic_loss(y_pred, y_true): return panoptic_weight * losses.weighted_categorical_crossentropy( y_pred, y_true, n_classes=n_semantic_classes) loss = { 'regression': retinanet_losses.regress_loss, 'classification': retinanet_losses.classification_loss } if include_masks: loss['masks'] = retinanet_losses.mask_loss if panoptic: loss['semantic'] = semantic_loss model.compile(loss=loss, optimizer=optimizer) if num_gpus >= 2: # Each GPU must have at least one validation example if test_dict['y'].shape[0] < num_gpus: raise ValueError('Not enough validation data for {} GPUs. ' 'Received {} validation sample.'.format( test_dict['y'].shape[0], num_gpus)) # When using multiple GPUs and skip_connections, # the training data must be evenly distributed across all GPUs num_train = train_dict['y'].shape[0] nb_samples = num_train - num_train % batch_size if nb_samples: train_dict['y'] = train_dict['y'][:nb_samples] train_dict['X'] = train_dict['X'][:nb_samples] # this will do preprocessing and realtime data augmentation datagen = image_generators.RetinaNetGenerator( # fill_mode='constant', # for rotations rotation_range=rotation_range, shear_range=shear, zoom_range=zoom_range, horizontal_flip=flip, vertical_flip=flip) datagen_val = image_generators.RetinaNetGenerator( # fill_mode='constant', # for rotations rotation_range=0, shear_range=0, zoom_range=0, horizontal_flip=0, vertical_flip=0) if 'vgg' in backbone or 'densenet' in backbone: compute_shapes = make_shapes_callback(model) else: compute_shapes = guess_shapes train_data = datagen.flow(train_dict, seed=seed, include_masks=include_masks, panoptic=panoptic, pyramid_levels=pyramid_levels, anchor_params=anchor_params, compute_shapes=compute_shapes, batch_size=batch_size) val_data = datagen_val.flow(test_dict, seed=seed, include_masks=include_masks, panoptic=panoptic, pyramid_levels=pyramid_levels, anchor_params=anchor_params, compute_shapes=compute_shapes, batch_size=batch_size) tensorboard_callback = callbacks.TensorBoard( log_dir=os.path.join(log_dir, model_name)) # fit the model on the batches generated by datagen.flow() loss_history = model.fit_generator( train_data, steps_per_epoch=train_data.y.shape[0] // batch_size, epochs=n_epoch, validation_data=val_data, validation_steps=val_data.y.shape[0] // batch_size, callbacks=[ callbacks.LearningRateScheduler(lr_sched), callbacks.ModelCheckpoint(model_path, monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=num_gpus >= 2), tensorboard_callback, callbacks.ReduceLROnPlateau(monitor='loss', factor=0.1, patience=10, verbose=1, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0), RedirectModel( Evaluate(val_data, iou_threshold=iou_threshold, score_threshold=score_threshold, max_detections=max_detections, tensorboard=tensorboard_callback, weighted_average=weighted_average), prediction_model), ]) model.save_weights(model_path) np.savez(loss_path, loss_history=loss_history.history) average_precisions = evaluate( val_data, prediction_model, iou_threshold=iou_threshold, score_threshold=score_threshold, max_detections=max_detections, ) # print evaluation total_instances = [] precisions = [] for label, (average_precision, num_annotations) in average_precisions.items(): print('{:.0f} instances of class'.format(num_annotations), label, 'with average precision: {:.4f}'.format(average_precision)) total_instances.append(num_annotations) precisions.append(average_precision) if sum(total_instances) == 0: print('No test instances found.') else: print( 'mAP using the weighted average of precisions among classes: {:.4f}' .format( sum([a * b for a, b in zip(total_instances, precisions)]) / sum(total_instances))) print('mAP: {:.4f}'.format( sum(precisions) / sum(x > 0 for x in total_instances))) return model
def train_model_siamese(model=None, dataset=None, optimizer=None, expt='', it=0, batch_size=1, n_epoch=100, direc_save='/data/models', direc_data='/data/npz_data', focal=False, gamma=0.5, lr_sched=rate_scheduler(lr=0.01, decay=0.95), rotation_range=0, flip=True, shear=0, class_weight=None): is_channels_first = K.image_data_format() == 'channels_first' training_data_file_name = os.path.join(direc_data, dataset + '.npz') todays_date = datetime.datetime.now().strftime('%Y-%m-%d') file_name_save = os.path.join( direc_save, '{}_{}_{}_{}.h5'.format(todays_date, dataset, expt, it)) file_name_save_loss = os.path.join( direc_save, '{}_{}_{}_{}.npz'.format(todays_date, dataset, expt, it)) train_dict, test_dict = get_data(training_data_file_name, mode='siamese') class_weights = train_dict['class_weights'] # the data, shuffled and split between train and test sets print('X_train shape:', train_dict['X'].shape) print('y_train shape:', train_dict['y'].shape) print('X_test shape:', test_dict['X'].shape) print('y_test shape:', test_dict['y'].shape) print('Output Shape:', model.layers[-1].output_shape) n_classes = model.layers[-1].output_shape[1 if is_channels_first else -1] def loss_function(y_true, y_pred): if focal: return losses.weighted_focal_loss(y_true, y_pred, gamma=gamma, n_classes=n_classes, from_logits=False) else: return losses.weighted_categorical_crossentropy( y_true, y_pred, n_classes=n_classes, from_logits=False) model.compile(loss=loss_function, optimizer=optimizer, metrics=['accuracy']) print('Using real-time data augmentation.') # this will do preprocessing and realtime data augmentation datagen = generators.SiameseDataGenerator( rotation_range= rotation_range, # randomly rotate images by 0 to rotation_range degrees shear_range= shear, # randomly shear images in the range (radians , -shear_range to shear_range) horizontal_flip=flip, # randomly flip images vertical_flip=flip) # randomly flip images datagen_val = generators.SiameseDataGenerator( rotation_range= 0, # randomly rotate images by 0 to rotation_range degrees shear_range= 0, # randomly shear images in the range (radians , -shear_range to shear_range) horizontal_flip=0, # randomly flip images vertical_flip=0) # randomly flip images def count_pairs(y): """ Compute number of training samples needed to (stastically speaking) observe all cell pairs. Assume that the number of images is encoded in the second dimension. Assume that y values are a cell-uniquely-labeled mask. Assume that a cell is paired with one of its other frames 50% of the time and a frame from another cell 50% of the time. """ # TODO: channels_first axes total_pairs = 0 for image_set in range(y.shape[0]): set_cells = 0 cells_per_image = [] for image in range(y.shape[1]): image_cells = int(y[image_set, image, :, :, :].max()) set_cells = set_cells + image_cells cells_per_image.append(image_cells) # Since there are many more possible non-self pairings than there are self pairings, # we want to estimate the number of possible non-self pairings and then multiply # that number by two, since the odds of getting a non-self pairing are 50%, to # find out how many pairs we would need to sample to (statistically speaking) # observe all possible cell-frame pairs. # We're going to assume that the average cell is present in every frame. This will # lead to an underestimate of the number of possible non-self pairings, but it's # unclear how significant the underestimate is. average_cells_per_frame = int( sum(cells_per_image) / len(cells_per_image)) non_self_cellframes = (average_cells_per_frame - 1) * len(cells_per_image) non_self_pairings = non_self_cellframes * max(cells_per_image) cell_pairings = non_self_pairings * 2 total_pairs = total_pairs + cell_pairings return total_pairs # This shouldn't remain long term. magic_number = 2048 # A power of 2 chosen just to reduce training time. total_train_pairs = count_pairs(train_dict['y']) total_train_pairs = int(total_train_pairs // magic_number) total_test_pairs = count_pairs(test_dict['y']) total_test_pairs = int(total_test_pairs // magic_number) # fit the model on the batches generated by datagen.flow() loss_history = model.fit_generator( datagen.flow(train_dict, batch_size=batch_size), steps_per_epoch=total_train_pairs // batch_size, epochs=n_epoch, validation_data=datagen_val.flow(test_dict, batch_size=batch_size), validation_steps=total_test_pairs // batch_size, callbacks=[ callbacks.LearningRateScheduler(lr_sched), callbacks.ModelCheckpoint(file_name_save, monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=num_gpus >= 2), ]) model.save_weights(file_name_save) np.savez(file_name_save_loss, loss_history=loss_history.history) return model
def train_model_conv(model, dataset, expt='', test_size=.1, n_epoch=10, batch_size=1, num_gpus=None, frames_per_batch=5, transform=None, optimizer=SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True), log_dir='/data/tensorboard_logs', direc_save='/data/models', direc_data='/data/npz_data', focal=False, gamma=0.5, lr_sched=rate_scheduler(lr=0.01, decay=0.95), rotation_range=0, flip=True, shear=0, zoom_range=0, **kwargs): is_channels_first = K.image_data_format() == 'channels_first' todays_date = datetime.datetime.now().strftime('%Y-%m-%d') basename = '{}_{}_{}'.format(todays_date, dataset, expt) file_name_save = os.path.join(direc_save, '{}.h5'.format(basename)) file_name_save_loss = os.path.join(direc_save, '{}.npz'.format(basename)) training_data_file_name = os.path.join(direc_data, dataset + '.npz') train_dict, test_dict = get_data(training_data_file_name, mode='conv', test_size=test_size) n_classes = model.layers[-1].output_shape[1 if is_channels_first else -1] # the data, shuffled and split between train and test sets print('X_train shape:', train_dict['X'].shape) print('y_train shape:', train_dict['y'].shape) print('X_test shape:', test_dict['X'].shape) print('y_test shape:', test_dict['y'].shape) print('Output Shape:', model.layers[-1].output_shape) print('Number of Classes:', n_classes) def loss_function(y_true, y_pred): if isinstance(transform, str) and transform.lower() == 'disc': return losses.discriminative_instance_loss(y_true, y_pred) if focal: return losses.weighted_focal_loss(y_true, y_pred, gamma=gamma, n_classes=n_classes) return losses.weighted_categorical_crossentropy(y_true, y_pred, n_classes=n_classes) if num_gpus is None: devices = device_lib.list_local_devices() gpus = [d for d in devices if d.name.lower().startswith('/device:gpu')] num_gpus = len(gpus) if num_gpus >= 2: batch_size = batch_size * num_gpus model = MultiGpuModel(model, num_gpus) print('Training on {} GPUs'.format(num_gpus)) model.compile(loss=loss_function, optimizer=optimizer, metrics=['accuracy']) if isinstance(model.output_shape, list): skip = len(model.output_shape) - 1 else: skip = None if train_dict['X'].ndim == 4: DataGenerator = generators.ImageFullyConvDataGenerator elif train_dict['X'].ndim == 5: DataGenerator = generators.MovieDataGenerator else: raise ValueError('Expected `X` to have ndim 4 or 5. Got', train_dict['X'].ndim) if num_gpus >= 2: # Each GPU must have at least one validation example if test_dict['y'].shape[0] < num_gpus: raise ValueError('Not enough validation data for {} GPUs. ' 'Received {} validation sample.'.format( test_dict['y'].shape[0], num_gpus)) # When using multiple GPUs and skip_connections, # the training data must be evenly distributed across all GPUs num_train = train_dict['y'].shape[0] nb_samples = num_train - num_train % batch_size if nb_samples: train_dict['y'] = train_dict['y'][:nb_samples] train_dict['X'] = train_dict['X'][:nb_samples] # this will do preprocessing and realtime data augmentation datagen = DataGenerator(rotation_range=rotation_range, shear_range=shear, zoom_range=zoom_range, horizontal_flip=flip, vertical_flip=flip) datagen_val = DataGenerator(rotation_range=0, shear_range=0, zoom_range=0, horizontal_flip=0, vertical_flip=0) if train_dict['X'].ndim == 5: train_data = datagen_val.flow(train_dict, skip=skip, batch_size=batch_size, transform=transform, transform_kwargs=kwargs, frames_per_batch=frames_per_batch) val_data = datagen_val.flow(test_dict, skip=skip, batch_size=batch_size, transform=transform, transform_kwargs=kwargs, frames_per_batch=frames_per_batch) else: train_data = datagen.flow(train_dict, skip=skip, batch_size=batch_size, transform=transform, transform_kwargs=kwargs) val_data = datagen_val.flow(test_dict, skip=skip, batch_size=batch_size, transform=transform, transform_kwargs=kwargs) # fit the model on the batches generated by datagen.flow() loss_history = model.fit_generator( train_data, steps_per_epoch=train_data.y.shape[0] // batch_size, epochs=n_epoch, validation_data=val_data, validation_steps=val_data.y.shape[0] // batch_size, callbacks=[ callbacks.LearningRateScheduler(lr_sched), callbacks.ModelCheckpoint(file_name_save, monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=num_gpus >= 2), callbacks.TensorBoard(log_dir=os.path.join(log_dir, basename)) ]) model.save_weights(file_name_save) np.savez(file_name_save_loss, loss_history=loss_history.history) return model
def train_model_retinanet(model, dataset, expt='', test_size=.2, n_epoch=10, batch_size=1, num_gpus=None, include_masks=False, panoptic=False, panoptic_weight=0.1, transforms=['watershed'], transforms_kwargs={}, anchor_params=None, pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'], min_objects=3, mask_size=(28, 28), optimizer=SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True), log_dir='/data/tensorboard_logs', model_dir='/data/models', model_name=None, sigma=3.0, alpha=0.25, gamma=2.0, score_threshold=0.01, iou_threshold=0.5, max_detections=100, weighted_average=True, lr_sched=rate_scheduler(lr=0.01, decay=0.95), rotation_range=0, flip=True, shear=0, zoom_range=0, compute_map=True, seed=0, **kwargs): """Train a RetinaNet model from the given backbone. Adapted from: https://github.com/fizyr/keras-retinanet & https://github.com/fizyr/keras-maskrcnn Args: model (tensorflow.keras.Model): The model to train. dataset (str): Path to a dataset to train the model with. expt (str): Experiment, substring to include in model name. test_size (float): Percent of data to leave as test data. n_epoch (int): Number of training epochs. batch_size (int): Number of batches per training step. num_gpus (int): The number of GPUs to train on. include_masks (bool): Whether to generate masks using MaskRCNN. panoptic (bool): Whether to include semantic segmentation heads. panoptic_weight (float): Weight applied to the semantic loss. transforms (list): List of transform names as strings. Each transform will have its own semantic segmentation head. transforms_kwargs (list): List of dicts of optional values for each transform in transforms. anchor_params (AnchorParameters): Struct containing anchor parameters. If None, default values are used. pyramid_levels (list): Pyramid levels to attach the object detection heads to. min_objects (int): If a training image has fewer than min_objects objects, the image will not be used for training. mask_size (tuple): The size of the masks. log_dir (str): Filepath to save tensorboard logs. If None, disables the tensorboard callback. model_dir (str): Directory to save the model file. model_name (str): Name of the model (and name of output file). sigma (float): The point where the loss changes from L2 to L1. alpha (float): Scale the focal weight with alpha. gamma (float): Take the power of the focal weight with gamma. iou_threshold (float): The threshold used to consider when a detection is positive or negative. score_threshold (float): The score confidence threshold to use for detections. max_detections (int): The maximum number of detections to use per image weighted_average (bool): Use a weighted average in evaluation. optimizer (object): Pre-initialized optimizer object (SGD, Adam, etc.) lr_sched (function): Learning rate schedular function rotation_range (int): Maximum rotation range for image augmentation flip (bool): Enables horizontal and vertical flipping for augmentation shear (int): Maximum rotation range for image augmentation zoom_range (tuple): Minimum and maximum zoom values (0.8, 1.2) seed (int): Random seed compute_map (bool): Whether to compute mAP at end of training. kwargs (dict): Other parameters to pass to _transform_masks Returns: tensorflow.keras.Model: The trained model """ is_channels_first = K.image_data_format() == 'channels_first' if model_name is None: todays_date = datetime.datetime.now().strftime('%Y-%m-%d') data_name = os.path.splitext(os.path.basename(dataset))[0] model_name = '{}_{}_{}'.format(todays_date, data_name, expt) model_path = os.path.join(model_dir, '{}.h5'.format(model_name)) loss_path = os.path.join(model_dir, '{}.npz'.format(model_name)) train_dict, test_dict = get_data(dataset, seed=seed, test_size=test_size) channel_axis = 1 if is_channels_first else -1 n_classes = model.layers[-1].output_shape[channel_axis] if panoptic: n_semantic_classes = [ layer.output_shape[channel_axis] for layer in model.layers if 'semantic' in layer.name ] else: n_semantic_classes = [] # the data, shuffled and split between train and test sets print('X_train shape:', train_dict['X'].shape) print('y_train shape:', train_dict['y'].shape) print('X_test shape:', test_dict['X'].shape) print('y_test shape:', test_dict['y'].shape) print('Output Shape:', model.layers[-1].output_shape) print('Number of Classes:', n_classes) if num_gpus is None: num_gpus = train_utils.count_gpus() if num_gpus >= 1e6: batch_size = batch_size * num_gpus model = train_utils.MultiGpuModel(model, num_gpus) print('Training on {} GPUs'.format(num_gpus)) # evaluation of model is done on `retinanet_bbox` if include_masks: prediction_model = model else: prediction_model = retinanet_bbox( model, nms=True, anchor_params=anchor_params, num_semantic_heads=len(n_semantic_classes), panoptic=panoptic, class_specific_filter=False) retinanet_losses = losses.RetinaNetLosses(sigma=sigma, alpha=alpha, gamma=gamma, iou_threshold=iou_threshold, mask_size=mask_size) def semantic_loss(n_classes): def _semantic_loss(y_pred, y_true): return panoptic_weight * losses.weighted_categorical_crossentropy( y_pred, y_true, n_classes=n_classes) return _semantic_loss loss = { 'regression': retinanet_losses.regress_loss, 'classification': retinanet_losses.classification_loss } if include_masks: loss['masks'] = retinanet_losses.mask_loss if panoptic: # Give losses for all of the semantic heads for layer in model.layers: if 'semantic' in layer.name: n_classes = layer.output_shape[channel_axis] loss[layer.name] = semantic_loss(n_classes) model.compile(loss=loss, optimizer=optimizer) if num_gpus >= 2: # Each GPU must have at least one validation example if test_dict['y'].shape[0] < num_gpus: raise ValueError('Not enough validation data for {} GPUs. ' 'Received {} validation sample.'.format( test_dict['y'].shape[0], num_gpus)) # When using multiple GPUs and skip_connections, # the training data must be evenly distributed across all GPUs num_train = train_dict['y'].shape[0] nb_samples = num_train - num_train % batch_size if nb_samples: train_dict['y'] = train_dict['y'][:nb_samples] train_dict['X'] = train_dict['X'][:nb_samples] # this will do preprocessing and realtime data augmentation datagen = image_generators.RetinaNetGenerator( # fill_mode='constant', # for rotations rotation_range=rotation_range, shear_range=shear, zoom_range=zoom_range, horizontal_flip=flip, vertical_flip=flip) datagen_val = image_generators.RetinaNetGenerator( # fill_mode='constant', # for rotations rotation_range=0, shear_range=0, zoom_range=0, horizontal_flip=0, vertical_flip=0) # if 'vgg' in backbone or 'densenet' in backbone: # compute_shapes = make_shapes_callback(model) # else: # compute_shapes = guess_shapes compute_shapes = guess_shapes train_data = datagen.flow(train_dict, seed=seed, include_masks=include_masks, panoptic=panoptic, transforms=transforms, transforms_kwargs=transforms_kwargs, pyramid_levels=pyramid_levels, min_objects=min_objects, anchor_params=anchor_params, compute_shapes=compute_shapes, batch_size=batch_size) val_data = datagen_val.flow(test_dict, seed=seed, include_masks=include_masks, panoptic=panoptic, transforms=transforms, transforms_kwargs=transforms_kwargs, pyramid_levels=pyramid_levels, min_objects=min_objects, anchor_params=anchor_params, compute_shapes=compute_shapes, batch_size=batch_size) train_callbacks = get_callbacks(model_path, lr_sched=lr_sched, tensorboard_log_dir=log_dir, save_weights_only=num_gpus >= 2, monitor='val_loss', verbose=1) eval_callback = RedirectModel( Evaluate(val_data, iou_threshold=iou_threshold, score_threshold=score_threshold, max_detections=max_detections, tensorboard=train_callbacks[-1] if log_dir else None, weighted_average=weighted_average), prediction_model) train_callbacks.append(eval_callback) # fit the model on the batches generated by datagen.flow() loss_history = model.fit_generator( train_data, steps_per_epoch=train_data.y.shape[0] // batch_size, epochs=n_epoch, validation_data=val_data, validation_steps=val_data.y.shape[0] // batch_size, callbacks=train_callbacks) model.save_weights(model_path) np.savez(loss_path, loss_history=loss_history.history) if compute_map: average_precisions = evaluate( val_data, prediction_model, iou_threshold=iou_threshold, score_threshold=score_threshold, max_detections=max_detections, ) # print evaluation total_instances = [] precisions = [] for label, (average_precision, num_annotations) in average_precisions.items(): print('{:.0f} instances of class'.format(num_annotations), label, 'with average precision: {:.4f}'.format(average_precision)) total_instances.append(num_annotations) precisions.append(average_precision) if sum(total_instances) == 0: print('No test instances found.') else: print( 'mAP using the weighted average of precisions among classes: {:.4f}' .format( sum([a * b for a, b in zip(total_instances, precisions)]) / sum(total_instances))) print('mAP: {:.4f}'.format( sum(precisions) / sum(x > 0 for x in total_instances))) return model
def train_model_retinanet(model, dataset, backbone, expt='', test_size=.1, n_epoch=10, batch_size=1, num_gpus=None, include_masks=False, mask_size=(28, 28), optimizer=SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True), log_dir='/data/tensorboard_logs', model_dir='/data/models', model_name=None, sigma=3.0, alpha=0.25, gamma=2.0, score_threshold=0.01, iou_threshold=0.5, max_detections=100, weighted_average=True, lr_sched=rate_scheduler(lr=0.01, decay=0.95), rotation_range=0, flip=True, shear=0, zoom_range=0, **kwargs): """Train a RetinaNet model from the given backbone Adapted from: https://github.com/fizyr/keras-retinanet & https://github.com/fizyr/keras-maskrcnn """ is_channels_first = K.image_data_format() == 'channels_first' if model_name is None: todays_date = datetime.datetime.now().strftime('%Y-%m-%d') data_name = os.path.splitext(os.path.basename(dataset))[0] model_name = '{}_{}_{}'.format(todays_date, data_name, expt) model_path = os.path.join(model_dir, '{}.h5'.format(model_name)) loss_path = os.path.join(model_dir, '{}.npz'.format(model_name)) train_dict, test_dict = get_data(dataset, mode='conv', test_size=test_size) n_classes = model.layers[-1].output_shape[1 if is_channels_first else -1] # the data, shuffled and split between train and test sets print('X_train shape:', train_dict['X'].shape) print('y_train shape:', train_dict['y'].shape) print('X_test shape:', test_dict['X'].shape) print('y_test shape:', test_dict['y'].shape) print('Output Shape:', model.layers[-1].output_shape) print('Number of Classes:', n_classes) if num_gpus is None: num_gpus = train_utils.count_gpus() if num_gpus >= 1e6: batch_size = batch_size * num_gpus model = train_utils.MultiGpuModel(model, num_gpus) print('Training on {} GPUs'.format(num_gpus)) def regress_loss(y_true, y_pred): # separate target and state regression = y_pred regression_target = y_true[..., :-1] anchor_state = y_true[..., -1] # filter out "ignore" anchors indices = tf.where(K.equal(anchor_state, 1)) regression = tf.gather_nd(regression, indices) regression_target = tf.gather_nd(regression_target, indices) # compute the loss loss = losses.smooth_l1(regression_target, regression, sigma=sigma) # compute the normalizer: the number of positive anchors normalizer = K.maximum(1, K.shape(indices)[0]) normalizer = K.cast(normalizer, dtype=K.floatx()) return K.sum(loss) / normalizer def classification_loss(y_true, y_pred): # TODO: try weighted_categorical_crossentropy labels = y_true[..., :-1] # -1 for ignore, 0 for background, 1 for object anchor_state = y_true[..., -1] classification = y_pred # filter out "ignore" anchors indices = tf.where(K.not_equal(anchor_state, -1)) labels = tf.gather_nd(labels, indices) classification = tf.gather_nd(classification, indices) # compute the loss loss = losses.focal(labels, classification, alpha=alpha, gamma=gamma) # compute the normalizer: the number of positive anchors normalizer = tf.where(K.equal(anchor_state, 1)) normalizer = K.cast(K.shape(normalizer)[0], K.floatx()) normalizer = K.maximum(K.cast_to_floatx(1.0), normalizer) return K.sum(loss) / normalizer def mask_loss(y_true, y_pred): def _mask(y_true, y_pred, iou_threshold=0.5, mask_size=(28, 28)): # split up the different predicted blobs boxes = y_pred[:, :, :4] masks = y_pred[:, :, 4:] # split up the different blobs annotations = y_true[:, :, :5] width = K.cast(y_true[0, 0, 5], dtype='int32') height = K.cast(y_true[0, 0, 6], dtype='int32') masks_target = y_true[:, :, 7:] # reshape the masks back to their original size masks_target = K.reshape(masks_target, (K.shape(masks_target)[0] * K.shape(masks_target)[1], height, width)) masks = K.reshape(masks, (K.shape(masks)[0] * K.shape(masks)[1], mask_size[0], mask_size[1], -1)) # batch size > 1 fix boxes = K.reshape(boxes, (-1, K.shape(boxes)[2])) annotations = K.reshape(annotations, (-1, K.shape(annotations)[2])) # compute overlap of boxes with annotations iou = overlap(boxes, annotations) argmax_overlaps_inds = K.argmax(iou, axis=1) max_iou = K.max(iou, axis=1) # filter those with IoU > 0.5 indices = tf.where(K.greater_equal(max_iou, iou_threshold)) boxes = tf.gather_nd(boxes, indices) masks = tf.gather_nd(masks, indices) argmax_overlaps_inds = tf.gather_nd(argmax_overlaps_inds, indices) argmax_overlaps_inds = K.cast(argmax_overlaps_inds, 'int32') labels = K.gather(annotations[:, 4], argmax_overlaps_inds) labels = K.cast(labels, 'int32') # make normalized boxes x1 = boxes[:, 0] y1 = boxes[:, 1] x2 = boxes[:, 2] y2 = boxes[:, 3] boxes = K.stack([ y1 / (K.cast(height, dtype=K.floatx()) - 1), x1 / (K.cast(width, dtype=K.floatx()) - 1), (y2 - 1) / (K.cast(height, dtype=K.floatx()) - 1), (x2 - 1) / (K.cast(width, dtype=K.floatx()) - 1), ], axis=1) # crop and resize masks_target # append a fake channel dimension masks_target = K.expand_dims(masks_target, axis=3) masks_target = tf.image.crop_and_resize(masks_target, boxes, argmax_overlaps_inds, mask_size) # remove fake channel dimension masks_target = masks_target[:, :, :, 0] # gather the predicted masks using the annotation label masks = tf.transpose(masks, (0, 3, 1, 2)) label_indices = K.stack([tf.range(K.shape(labels)[0]), labels], axis=1) masks = tf.gather_nd(masks, label_indices) # compute mask loss mask_loss = K.binary_crossentropy(masks_target, masks) normalizer = K.shape(masks)[0] * K.shape(masks)[1] * K.shape( masks)[2] normalizer = K.maximum(K.cast(normalizer, K.floatx()), 1) mask_loss = K.sum(mask_loss) / normalizer return mask_loss # if there are no masks annotations, return 0; else, compute the masks loss return tf.cond( K.any(K.equal(K.shape(y_true), 0)), lambda: K.cast_to_floatx(0.0), lambda: _mask(y_true, y_pred, iou_threshold=iou_threshold, mask_size=mask_size)) # evaluation of model is done on `retinanet_bbox` if include_masks: prediction_model = model else: prediction_model = retinanet_bbox(model, nms=True, class_specific_filter=False) loss = {'regression': regress_loss, 'classification': classification_loss} if include_masks: loss['masks'] = mask_loss model.compile(loss=loss, optimizer=optimizer) if num_gpus >= 2: # Each GPU must have at least one validation example if test_dict['y'].shape[0] < num_gpus: raise ValueError('Not enough validation data for {} GPUs. ' 'Received {} validation sample.'.format( test_dict['y'].shape[0], num_gpus)) # When using multiple GPUs and skip_connections, # the training data must be evenly distributed across all GPUs num_train = train_dict['y'].shape[0] nb_samples = num_train - num_train % batch_size if nb_samples: train_dict['y'] = train_dict['y'][:nb_samples] train_dict['X'] = train_dict['X'][:nb_samples] # this will do preprocessing and realtime data augmentation datagen = image_generators.RetinaNetGenerator( # fill_mode='constant', # for rotations rotation_range=rotation_range, shear_range=shear, zoom_range=zoom_range, horizontal_flip=flip, vertical_flip=flip) datagen_val = image_generators.RetinaNetGenerator( # fill_mode='constant', # for rotations rotation_range=0, shear_range=0, zoom_range=0, horizontal_flip=0, vertical_flip=0) if 'vgg' in backbone or 'densenet' in backbone: compute_shapes = make_shapes_callback(model) else: compute_shapes = guess_shapes train_data = datagen.flow(train_dict, include_masks=include_masks, compute_shapes=compute_shapes, batch_size=batch_size) val_data = datagen_val.flow(test_dict, include_masks=include_masks, compute_shapes=compute_shapes, batch_size=batch_size) tensorboard_callback = callbacks.TensorBoard( log_dir=os.path.join(log_dir, model_name)) # fit the model on the batches generated by datagen.flow() loss_history = model.fit_generator( train_data, steps_per_epoch=train_data.y.shape[0] // batch_size, epochs=n_epoch, validation_data=val_data, validation_steps=val_data.y.shape[0] // batch_size, callbacks=[ callbacks.LearningRateScheduler(lr_sched), callbacks.ModelCheckpoint(model_path, monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=num_gpus >= 2), tensorboard_callback, callbacks.ReduceLROnPlateau(monitor='loss', factor=0.1, patience=10, verbose=1, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0), RedirectModel( Evaluate(val_data, iou_threshold=iou_threshold, score_threshold=score_threshold, max_detections=max_detections, tensorboard=tensorboard_callback, weighted_average=weighted_average), prediction_model), ]) model.save_weights(model_path) np.savez(loss_path, loss_history=loss_history.history) average_precisions = evaluate( val_data, prediction_model, iou_threshold=iou_threshold, score_threshold=score_threshold, max_detections=max_detections, ) # print evaluation total_instances = [] precisions = [] for label, (average_precision, num_annotations) in average_precisions.items(): print('{:.0f} instances of class'.format(num_annotations), label, 'with average precision: {:.4f}'.format(average_precision)) total_instances.append(num_annotations) precisions.append(average_precision) if sum(total_instances) == 0: print('No test instances found.') else: print( 'mAP using the weighted average of precisions among classes: {:.4f}' .format( sum([a * b for a, b in zip(total_instances, precisions)]) / sum(total_instances))) print('mAP: {:.4f}'.format( sum(precisions) / sum(x > 0 for x in total_instances))) return model