def testOptimizerWithCallbacks(self): np.random.seed(1331) input_np = np.random.random((10, 3)) output_np = np.random.random((10, 4)) a = input_layer.Input(shape=(3, ), name='input_a') model = sequential.Sequential() model.add(core.Dense(4, name='dense')) model.add(core.Dropout(0.5, name='dropout')) model(a) optimizer = gradient_descent.SGD(learning_rate=0.1) model.compile(optimizer, loss='mse', metrics=['mae']) # This does not reduce the LR after the first epoch (due to low delta). cbks = [ callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, min_delta=0, patience=1, cooldown=5) ] model.fit(input_np, output_np, batch_size=10, validation_data=(input_np, output_np), callbacks=cbks, epochs=2, verbose=0) self.assertAllClose(float(backend.get_value(model.optimizer.lr)), 0.1, atol=1e-4) # This should reduce the LR after the first epoch (due to high delta). cbks = [ callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, min_delta=10, patience=1, cooldown=5) ] model.fit(input_np, output_np, batch_size=10, validation_data=(input_np, output_np), callbacks=cbks, epochs=2, verbose=2) self.assertAllClose(float(backend.get_value(model.optimizer.lr)), 0.01, atol=1e-4)
def test_validate_callbacks_predefined_callbacks(self): supported_predefined_callbacks = [ callbacks.TensorBoard(), callbacks.CSVLogger(filename='./log.csv'), callbacks.EarlyStopping(), callbacks.ModelCheckpoint(filepath='./checkpoint'), callbacks.TerminateOnNaN(), callbacks.ProgbarLogger(), callbacks.History(), callbacks.RemoteMonitor() ] distributed_training_utils.validate_callbacks( supported_predefined_callbacks, adam.Adam()) unsupported_predefined_callbacks = [ callbacks.ReduceLROnPlateau(), callbacks.LearningRateScheduler(schedule=lambda epoch: 0.001) ] for callback in unsupported_predefined_callbacks: with self.assertRaisesRegex(ValueError, 'You must specify a Keras Optimizer V2'): distributed_training_utils.validate_callbacks([callback], v1_adam.AdamOptimizer())
def create_callbacks(early_stopping, model_checkpoint, reduce_lr_on_plateau, tensor_board): ''' Создание списка callbacks :param early_stopping: остановка обучения, если параметр 'monitor' не меняется в течении 'patience' эпох :param model_checkpoint: сохранение весов сети с лучшим показателем параметра 'monitor' :param reduce_lr_on_plateau: уменьшение learning rate в процессе обучения :param tensor_board: :return: ''' callbacks_list = [] # if early_stopping == True: # callbacks_list.append(callbacks.EarlyStopping(monitor='val_acc', patience=7)) if model_checkpoint == True: callbacks_list.append( callbacks.ModelCheckpoint( filepath= 'weight_checkpoints/weights.{epoch:02d}-{val_loss:.2f}.hdf5', monitor='val_loss', save_best_only=True)) if reduce_lr_on_plateau == True: callbacks_list.append( callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=10)) # if tensor_board == True: # callbacks_list.append(callbacks.TensorBoard(log_dir='log_dir', histogram_freq=1)) return callbacks_list
def get_callbacks(use_early_stopping=True, use_reduce_lr=True): callback_list = [] if (use_early_stopping): callback_list.append( callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=keras_verbosity, mode='auto')) if (use_reduce_lr): callback_list.append( callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=keras_verbosity, mode='auto', epsilon=0.0001, cooldown=0, min_lr=0)) return callback_list
def dense_train(space): ''' train lightgbm booster based on training / validaton set -> give predictions of Y ''' params = space.copy() input_shape = (X_train.shape[-1], ) # input shape depends on x_fields used input_img = Input(shape=input_shape) init_nodes = params['init_nodes'] # fisrt dense layer - number of nodes nodes_mult = params['nodes_mult'] # nodes growth rate mult_freq = params['mult_freq'] # grow every X layer mult_start = params['mult_start'] # grow from X layer end_nodes = params['end_nodes'] # maximum number of nodes if params['num_Dense_layer'] < 4: params['init_nodes'] = init_nodes = 16 d_1 = Dense(init_nodes, activation=params['activation'])( input_img) # remove kernel_regularizer=regularizers.l1(params['l1']) d_1 = Dropout(params['dropout'])(d_1) for i in range(1, params['num_Dense_layer']): temp_nodes = int( min( init_nodes * (2**(nodes_mult * max( (i - mult_start + 3) // mult_freq, 0))), end_nodes)) d_1 = Dense(temp_nodes, activation=params['activation'])(d_1) if i != params[ 'num_Dense_layer'] - 1: # last dense layer has no dropout d_1 = Dropout(params['dropout'])(d_1) f_x = Dense(1)(d_1) callbacks_list = [ callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=10), callbacks.EarlyStopping(monitor='val_loss', patience=10, mode='auto') ] # add callbacks lr_val = 10**-int(params['learning_rate']) adam = optimizers.Adam(lr=lr_val) model = Model(input_img, f_x) model.compile(adam, loss='mae') model.summary() history = model.fit(X_train, Y_train, epochs=50, batch_size=params['batch_size'], validation_data=(X_valid, Y_valid), callbacks=callbacks_list, verbose=1) Y_test_pred = model.predict(X_test) Y_train_pred = model.predict(X_train) Y_valid_pred = model.predict(X_valid) return Y_test_pred, Y_train_pred, Y_valid_pred, history
def callableForTestReduceLROnPlateau(model, test_obj, train_ds, num_epoch, steps, strategy, saving_filepath, **kwargs): cbks = [ callbacks.ReduceLROnPlateau(monitor='loss', factor=0.1, min_delta=1, patience=1, cooldown=5, verbose=1) ] # It is expected that the learning rate would drop by `factor` within # 3 epochs with `min_delta=1`. model.fit(x=train_ds, epochs=3, steps_per_epoch=steps, callbacks=cbks) test_obj.assertAllClose(float(K.get_value(model.optimizer.lr)), 0.0001, atol=1e-8) # It is expected that the learning rate would drop by another `factor` # within 3 epochs with `min_delta=1`. model.fit(x=train_ds, epochs=3, steps_per_epoch=steps, callbacks=cbks) test_obj.assertAllClose(float(K.get_value(model.optimizer.lr)), 0.00001, atol=1e-8)
def create_learning_rate_reducer(cfg_solver: dict) -> callbacks.ReduceLROnPlateau: """Create a ReduceLROnPlateau callback. Args: cfg_solver: dict, solver subsection of config. Returns: ReduceLROnPlateau, ReduceLROnPlateau callback. """ params = cfg_solver["learning_rate_reducer"] params["verbose"] = 1 return callbacks.ReduceLROnPlateau(**params)
def get_callbacks(args): """Define callbacks for distributed training.""" callbacks = [ # This is necessary to ensure consistent initialization of all workers hvd.callbacks.BroadcastGlobalVariablesCallback(0), # Note: must be in the list before the ReduceLROnPlateau or other metrics-based callbacks. hvd.callbacks.MetricAverageCallback(), # Adjust Learning Rate hvd.callbacks.LearningRateWarmupCallback( warmup_epochs=args.warmup_epochs) ] if args.train_only: # Reduce learning rate on a schedule onethirds_point = int(math.floor(args.epochs / 3)) twothirds_point = int(math.floor(args.epochs / 3 * 2)) callbacks.append( hvd.callbacks.LearningRateScheduleCallback( start_epoch=args.warmup_epochs, end_epoch=onethirds_point, multiplier=1.)) callbacks.append( hvd.callbacks.LearningRateScheduleCallback( start_epoch=onethirds_point, end_epoch=twothirds_point, multiplier=1e-1)) callbacks.append( hvd.callbacks.LearningRateScheduleCallback( start_epoch=twothirds_point, end_epoch=args.epochs + 1, multiplier=1e-2)) else: # Reduce learning rate on validation loss plateau callbacks.append( cb.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, min_lr=0.001, verbose=1 if hvd.rank() == 0 else 0)) if args.early_stopping: callbacks.append( cb.EarlyStopping(monitor='loss', patience=7, restore_best_weights=True)) print('Callbacks created on rank ' + str(hvd.rank())) return callbacks
def callb(path_checkpoint): callback_checkpoint = tf_cb.ModelCheckpoint( filepath=path_checkpoint, monitor = 'loss', verbose=1, save_weights_only=True, save_best_only=True) callback_earlystopping = tf_cb.EarlyStopping(monitor='loss', patience=20, verbose=1) callback_reduce_lr = tf_cb.ReduceLROnPlateau(monitor='loss', factor=0.98, min_lr=0.3e-4, patience=0, verbose=1) callBacks = [ callback_checkpoint, callback_earlystopping, callback_reduce_lr ] return callBacks
def test_TensorBoard_with_ReduceLROnPlateau(self): with self.cached_session(): temp_dir = self.get_temp_dir() self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True) (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data( train_samples=TRAIN_SAMPLES, test_samples=TEST_SAMPLES, input_shape=(INPUT_DIM, ), num_classes=NUM_CLASSES) y_test = np_utils.to_categorical(y_test) y_train = np_utils.to_categorical(y_train) model = testing_utils.get_small_sequential_mlp( num_hidden=NUM_HIDDEN, num_classes=NUM_CLASSES, input_dim=INPUT_DIM) model.compile(loss='binary_crossentropy', optimizer='sgd', metrics=['accuracy']) cbks = [ callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=4, verbose=1), callbacks_v1.TensorBoard(log_dir=temp_dir) ] model.fit(x_train, y_train, batch_size=BATCH_SIZE, validation_data=(x_test, y_test), callbacks=cbks, epochs=2, verbose=0) assert os.path.exists(temp_dir)
def create_learning_rate_reducer( cfg_solver: dict, metrics_names: List[str]) -> callbacks.ReduceLROnPlateau: """Create a ReduceLROnPlateau callback. Args: cfg_solver: dict, solver subsection of config. metrics_names: list[str], 'metrics' names. Returns: ReduceLROnPlateau, ReduceLROnPlateau callback. Raises: ValueError, monitor not in 'metrics' names. """ monitor = cfg_solver["learning_rate_reducer"]["monitor"] val_metrics_names = [f"val_{mm}" for mm in metrics_names] if (monitor not in metrics_names) and (monitor not in val_metrics_names): raise ValueError( f"monitor: {monitor} not found in model metrics names: " f"{metrics_names + val_metrics_names}") params = cfg_solver["learning_rate_reducer"] params["verbose"] = 1 return callbacks.ReduceLROnPlateau(**params)
def train_model_retinanet(model, dataset, backbone, expt='', test_size=.1, n_epoch=10, batch_size=1, num_gpus=None, include_masks=False, panoptic=False, panoptic_weight=1, anchor_params=None, pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'], mask_size=(28, 28), optimizer=SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True), log_dir='/data/tensorboard_logs', model_dir='/data/models', model_name=None, sigma=3.0, alpha=0.25, gamma=2.0, score_threshold=0.01, iou_threshold=0.5, max_detections=100, weighted_average=True, lr_sched=rate_scheduler(lr=0.01, decay=0.95), rotation_range=0, flip=True, shear=0, zoom_range=0, seed=None, **kwargs): """Train a RetinaNet model from the given backbone Adapted from: https://github.com/fizyr/keras-retinanet & https://github.com/fizyr/keras-maskrcnn """ is_channels_first = K.image_data_format() == 'channels_first' if model_name is None: todays_date = datetime.datetime.now().strftime('%Y-%m-%d') data_name = os.path.splitext(os.path.basename(dataset))[0] model_name = '{}_{}_{}'.format(todays_date, data_name, expt) model_path = os.path.join(model_dir, '{}.h5'.format(model_name)) loss_path = os.path.join(model_dir, '{}.npz'.format(model_name)) train_dict, test_dict = get_data(dataset, seed=seed, test_size=test_size) channel_axis = 1 if is_channels_first else -1 n_classes = model.layers[-1].output_shape[channel_axis] if panoptic: n_semantic_classes = model.get_layer( name='semantic').output_shape[channel_axis] # the data, shuffled and split between train and test sets print('X_train shape:', train_dict['X'].shape) print('y_train shape:', train_dict['y'].shape) print('X_test shape:', test_dict['X'].shape) print('y_test shape:', test_dict['y'].shape) print('Output Shape:', model.layers[-1].output_shape) print('Number of Classes:', n_classes) if num_gpus is None: num_gpus = train_utils.count_gpus() if num_gpus >= 1e6: batch_size = batch_size * num_gpus model = train_utils.MultiGpuModel(model, num_gpus) print('Training on {} GPUs'.format(num_gpus)) # evaluation of model is done on `retinanet_bbox` if include_masks: prediction_model = model else: prediction_model = retinanet_bbox(model, nms=True, anchor_params=anchor_params, panoptic=panoptic, class_specific_filter=False) retinanet_losses = losses.RetinaNetLosses(sigma=sigma, alpha=alpha, gamma=gamma, iou_threshold=iou_threshold, mask_size=mask_size) def semantic_loss(y_pred, y_true): return panoptic_weight * losses.weighted_categorical_crossentropy( y_pred, y_true, n_classes=n_semantic_classes) loss = { 'regression': retinanet_losses.regress_loss, 'classification': retinanet_losses.classification_loss } if include_masks: loss['masks'] = retinanet_losses.mask_loss if panoptic: loss['semantic'] = semantic_loss model.compile(loss=loss, optimizer=optimizer) if num_gpus >= 2: # Each GPU must have at least one validation example if test_dict['y'].shape[0] < num_gpus: raise ValueError('Not enough validation data for {} GPUs. ' 'Received {} validation sample.'.format( test_dict['y'].shape[0], num_gpus)) # When using multiple GPUs and skip_connections, # the training data must be evenly distributed across all GPUs num_train = train_dict['y'].shape[0] nb_samples = num_train - num_train % batch_size if nb_samples: train_dict['y'] = train_dict['y'][:nb_samples] train_dict['X'] = train_dict['X'][:nb_samples] # this will do preprocessing and realtime data augmentation datagen = image_generators.RetinaNetGenerator( # fill_mode='constant', # for rotations rotation_range=rotation_range, shear_range=shear, zoom_range=zoom_range, horizontal_flip=flip, vertical_flip=flip) datagen_val = image_generators.RetinaNetGenerator( # fill_mode='constant', # for rotations rotation_range=0, shear_range=0, zoom_range=0, horizontal_flip=0, vertical_flip=0) if 'vgg' in backbone or 'densenet' in backbone: compute_shapes = make_shapes_callback(model) else: compute_shapes = guess_shapes train_data = datagen.flow(train_dict, seed=seed, include_masks=include_masks, panoptic=panoptic, pyramid_levels=pyramid_levels, anchor_params=anchor_params, compute_shapes=compute_shapes, batch_size=batch_size) val_data = datagen_val.flow(test_dict, seed=seed, include_masks=include_masks, panoptic=panoptic, pyramid_levels=pyramid_levels, anchor_params=anchor_params, compute_shapes=compute_shapes, batch_size=batch_size) tensorboard_callback = callbacks.TensorBoard( log_dir=os.path.join(log_dir, model_name)) # fit the model on the batches generated by datagen.flow() loss_history = model.fit_generator( train_data, steps_per_epoch=train_data.y.shape[0] // batch_size, epochs=n_epoch, validation_data=val_data, validation_steps=val_data.y.shape[0] // batch_size, callbacks=[ callbacks.LearningRateScheduler(lr_sched), callbacks.ModelCheckpoint(model_path, monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=num_gpus >= 2), tensorboard_callback, callbacks.ReduceLROnPlateau(monitor='loss', factor=0.1, patience=10, verbose=1, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0), RedirectModel( Evaluate(val_data, iou_threshold=iou_threshold, score_threshold=score_threshold, max_detections=max_detections, tensorboard=tensorboard_callback, weighted_average=weighted_average), prediction_model), ]) model.save_weights(model_path) np.savez(loss_path, loss_history=loss_history.history) average_precisions = evaluate( val_data, prediction_model, iou_threshold=iou_threshold, score_threshold=score_threshold, max_detections=max_detections, ) # print evaluation total_instances = [] precisions = [] for label, (average_precision, num_annotations) in average_precisions.items(): print('{:.0f} instances of class'.format(num_annotations), label, 'with average precision: {:.4f}'.format(average_precision)) total_instances.append(num_annotations) precisions.append(average_precision) if sum(total_instances) == 0: print('No test instances found.') else: print( 'mAP using the weighted average of precisions among classes: {:.4f}' .format( sum([a * b for a, b in zip(total_instances, precisions)]) / sum(total_instances))) print('mAP: {:.4f}'.format( sum(precisions) / sum(x > 0 for x in total_instances))) return model
print(model.summary()) # ''' file_path = os.path.join(save_dir, model_name) checkpoint = callbacks.ModelCheckpoint( file_path, monitor='val_predictions_categorical_accuracy', verbose=1, save_best_only=True, mode='auto', save_weights_only=True, period=1) reduce_lr = callbacks.ReduceLROnPlateau(monitor='val_predictions_loss', factor=0.25, patience=10, verbose=1, mode='auto', min_delta=1e-6, cooldown=0, min_lr=0) csv_logger = callbacks.CSVLogger(os.path.join(save_dir, 'Log_V1.log'), separator=',', append=False) train_data_generator = Train_data_generator(batch_size) valid_data_generator = Valid_data_generator(batch_size) model.fit_generator(generator=train_data_generator, steps_per_epoch=int(210030 / batch_size), epochs=epochs, verbose=1, callbacks=[checkpoint, reduce_lr, csv_logger], validation_data=valid_data_generator,
def rnn_train(space): #functional ''' train lightgbm booster based on training / validaton set -> give predictions of Y ''' params = space.copy() lookback = 20 # lookback = 5Y * 4Q = 20Q x_fields = 10 # lgbm top15 features -> 10 features in rnn inputs_loss_weight = 0.1 # loss weights for individual outputs from each rnn model dense_loss_weight = 2 # loss weights for final output loss_weights = [inputs_loss_weight] * x_fields + [ dense_loss_weight ] # loss weights for training loss = [args.objective] * ( x_fields + 1) # use MAE loss function for all inputs and final metrics = [args.objective] * (x_fields + 1) input_img = Input(shape=(lookback, x_fields)) outputs = [] states = [] for col in range(10): # build model for each feature g_1 = K.expand_dims( input_img[:, :, col], axis=2 ) # add dimension to certain feature: shape = (samples, 20, 1) for i in range(params['num_gru_layer']): temp_nodes = int( min(params['gru_nodes'] * (2**(params['gru_nodes_mult'] * i)), 8)) extra = dict(return_sequences=True) if args.bi == False: if i == params['num_gru_layer'] - 1: extra = dict(return_sequences=False) g_state = GRU(temp_nodes, **extra)(g_1) # forecast state elif i == 0: g_1 = GRU(temp_nodes, **extra)(g_1) else: g_1 = GRU(temp_nodes, dropout=params['gru_dropout'], **extra)(g_1) else: # try bidirectional one if i == params['num_gru_layer'] - 1: extra = dict(return_sequences=False) g_state = GRU(temp_nodes, **extra)(g_1) # forecast state elif i == 0: g_1 = Bidirectional(GRU(temp_nodes, **extra))(g_1) else: g_1 = Bidirectional( GRU(temp_nodes, dropout=params['gru_dropout'], **extra))(g_1) g_output = Dense(1)(g_state) states.append(g_state) outputs.append(g_output) f_x = Concatenate(axis=1)(states) for i in range( params['num_dense_layer']): # for second or third dense layers f_x = Dense(10)(f_x) f_x = Dense(1, name='final_dense')(f_x) outputs.append(f_x) model = Model( inputs=input_img, outputs=outputs) # outputs = 10 forecast states + final forecast callbacks_list = [ callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=10), callbacks.EarlyStopping(monitor='val_loss', patience=10, mode='auto') ] # add callbacks lr_val = 10**-int(params['learning_rate']) adam = optimizers.Adam(lr=lr_val) model.compile(adam, loss=loss, metrics=metrics, loss_weights=loss_weights) model.summary() history = model.fit(X_train, [Y_train] * (x_fields + 1), epochs=50, batch_size=params['batch_size'], validation_data=(X_valid, [Y_valid] * (x_fields + 1)), verbose=1, callbacks=callbacks_list) Y_test_pred = model.predict(X_test)[-1] # final dense predictions Y_train_pred = model.predict(X_train)[-1] Y_valid_pred = model.predict(X_valid)[-1] return Y_test_pred, Y_train_pred, Y_valid_pred, history
def train_model_retinanet(model, dataset, backbone, expt='', test_size=.1, n_epoch=10, batch_size=1, num_gpus=None, include_masks=False, mask_size=(28, 28), optimizer=SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True), log_dir='/data/tensorboard_logs', model_dir='/data/models', model_name=None, sigma=3.0, alpha=0.25, gamma=2.0, score_threshold=0.01, iou_threshold=0.5, max_detections=100, weighted_average=True, lr_sched=rate_scheduler(lr=0.01, decay=0.95), rotation_range=0, flip=True, shear=0, zoom_range=0, **kwargs): """Train a RetinaNet model from the given backbone Adapted from: https://github.com/fizyr/keras-retinanet & https://github.com/fizyr/keras-maskrcnn """ is_channels_first = K.image_data_format() == 'channels_first' if model_name is None: todays_date = datetime.datetime.now().strftime('%Y-%m-%d') data_name = os.path.splitext(os.path.basename(dataset))[0] model_name = '{}_{}_{}'.format(todays_date, data_name, expt) model_path = os.path.join(model_dir, '{}.h5'.format(model_name)) loss_path = os.path.join(model_dir, '{}.npz'.format(model_name)) train_dict, test_dict = get_data(dataset, mode='conv', test_size=test_size) n_classes = model.layers[-1].output_shape[1 if is_channels_first else -1] # the data, shuffled and split between train and test sets print('X_train shape:', train_dict['X'].shape) print('y_train shape:', train_dict['y'].shape) print('X_test shape:', test_dict['X'].shape) print('y_test shape:', test_dict['y'].shape) print('Output Shape:', model.layers[-1].output_shape) print('Number of Classes:', n_classes) if num_gpus is None: num_gpus = train_utils.count_gpus() if num_gpus >= 1e6: batch_size = batch_size * num_gpus model = train_utils.MultiGpuModel(model, num_gpus) print('Training on {} GPUs'.format(num_gpus)) def regress_loss(y_true, y_pred): # separate target and state regression = y_pred regression_target = y_true[..., :-1] anchor_state = y_true[..., -1] # filter out "ignore" anchors indices = tf.where(K.equal(anchor_state, 1)) regression = tf.gather_nd(regression, indices) regression_target = tf.gather_nd(regression_target, indices) # compute the loss loss = losses.smooth_l1(regression_target, regression, sigma=sigma) # compute the normalizer: the number of positive anchors normalizer = K.maximum(1, K.shape(indices)[0]) normalizer = K.cast(normalizer, dtype=K.floatx()) return K.sum(loss) / normalizer def classification_loss(y_true, y_pred): # TODO: try weighted_categorical_crossentropy labels = y_true[..., :-1] # -1 for ignore, 0 for background, 1 for object anchor_state = y_true[..., -1] classification = y_pred # filter out "ignore" anchors indices = tf.where(K.not_equal(anchor_state, -1)) labels = tf.gather_nd(labels, indices) classification = tf.gather_nd(classification, indices) # compute the loss loss = losses.focal(labels, classification, alpha=alpha, gamma=gamma) # compute the normalizer: the number of positive anchors normalizer = tf.where(K.equal(anchor_state, 1)) normalizer = K.cast(K.shape(normalizer)[0], K.floatx()) normalizer = K.maximum(K.cast_to_floatx(1.0), normalizer) return K.sum(loss) / normalizer def mask_loss(y_true, y_pred): def _mask(y_true, y_pred, iou_threshold=0.5, mask_size=(28, 28)): # split up the different predicted blobs boxes = y_pred[:, :, :4] masks = y_pred[:, :, 4:] # split up the different blobs annotations = y_true[:, :, :5] width = K.cast(y_true[0, 0, 5], dtype='int32') height = K.cast(y_true[0, 0, 6], dtype='int32') masks_target = y_true[:, :, 7:] # reshape the masks back to their original size masks_target = K.reshape(masks_target, (K.shape(masks_target)[0] * K.shape(masks_target)[1], height, width)) masks = K.reshape(masks, (K.shape(masks)[0] * K.shape(masks)[1], mask_size[0], mask_size[1], -1)) # batch size > 1 fix boxes = K.reshape(boxes, (-1, K.shape(boxes)[2])) annotations = K.reshape(annotations, (-1, K.shape(annotations)[2])) # compute overlap of boxes with annotations iou = overlap(boxes, annotations) argmax_overlaps_inds = K.argmax(iou, axis=1) max_iou = K.max(iou, axis=1) # filter those with IoU > 0.5 indices = tf.where(K.greater_equal(max_iou, iou_threshold)) boxes = tf.gather_nd(boxes, indices) masks = tf.gather_nd(masks, indices) argmax_overlaps_inds = tf.gather_nd(argmax_overlaps_inds, indices) argmax_overlaps_inds = K.cast(argmax_overlaps_inds, 'int32') labels = K.gather(annotations[:, 4], argmax_overlaps_inds) labels = K.cast(labels, 'int32') # make normalized boxes x1 = boxes[:, 0] y1 = boxes[:, 1] x2 = boxes[:, 2] y2 = boxes[:, 3] boxes = K.stack([ y1 / (K.cast(height, dtype=K.floatx()) - 1), x1 / (K.cast(width, dtype=K.floatx()) - 1), (y2 - 1) / (K.cast(height, dtype=K.floatx()) - 1), (x2 - 1) / (K.cast(width, dtype=K.floatx()) - 1), ], axis=1) # crop and resize masks_target # append a fake channel dimension masks_target = K.expand_dims(masks_target, axis=3) masks_target = tf.image.crop_and_resize(masks_target, boxes, argmax_overlaps_inds, mask_size) # remove fake channel dimension masks_target = masks_target[:, :, :, 0] # gather the predicted masks using the annotation label masks = tf.transpose(masks, (0, 3, 1, 2)) label_indices = K.stack([tf.range(K.shape(labels)[0]), labels], axis=1) masks = tf.gather_nd(masks, label_indices) # compute mask loss mask_loss = K.binary_crossentropy(masks_target, masks) normalizer = K.shape(masks)[0] * K.shape(masks)[1] * K.shape( masks)[2] normalizer = K.maximum(K.cast(normalizer, K.floatx()), 1) mask_loss = K.sum(mask_loss) / normalizer return mask_loss # if there are no masks annotations, return 0; else, compute the masks loss return tf.cond( K.any(K.equal(K.shape(y_true), 0)), lambda: K.cast_to_floatx(0.0), lambda: _mask(y_true, y_pred, iou_threshold=iou_threshold, mask_size=mask_size)) # evaluation of model is done on `retinanet_bbox` if include_masks: prediction_model = model else: prediction_model = retinanet_bbox(model, nms=True, class_specific_filter=False) loss = {'regression': regress_loss, 'classification': classification_loss} if include_masks: loss['masks'] = mask_loss model.compile(loss=loss, optimizer=optimizer) if num_gpus >= 2: # Each GPU must have at least one validation example if test_dict['y'].shape[0] < num_gpus: raise ValueError('Not enough validation data for {} GPUs. ' 'Received {} validation sample.'.format( test_dict['y'].shape[0], num_gpus)) # When using multiple GPUs and skip_connections, # the training data must be evenly distributed across all GPUs num_train = train_dict['y'].shape[0] nb_samples = num_train - num_train % batch_size if nb_samples: train_dict['y'] = train_dict['y'][:nb_samples] train_dict['X'] = train_dict['X'][:nb_samples] # this will do preprocessing and realtime data augmentation datagen = image_generators.RetinaNetGenerator( # fill_mode='constant', # for rotations rotation_range=rotation_range, shear_range=shear, zoom_range=zoom_range, horizontal_flip=flip, vertical_flip=flip) datagen_val = image_generators.RetinaNetGenerator( # fill_mode='constant', # for rotations rotation_range=0, shear_range=0, zoom_range=0, horizontal_flip=0, vertical_flip=0) if 'vgg' in backbone or 'densenet' in backbone: compute_shapes = make_shapes_callback(model) else: compute_shapes = guess_shapes train_data = datagen.flow(train_dict, include_masks=include_masks, compute_shapes=compute_shapes, batch_size=batch_size) val_data = datagen_val.flow(test_dict, include_masks=include_masks, compute_shapes=compute_shapes, batch_size=batch_size) tensorboard_callback = callbacks.TensorBoard( log_dir=os.path.join(log_dir, model_name)) # fit the model on the batches generated by datagen.flow() loss_history = model.fit_generator( train_data, steps_per_epoch=train_data.y.shape[0] // batch_size, epochs=n_epoch, validation_data=val_data, validation_steps=val_data.y.shape[0] // batch_size, callbacks=[ callbacks.LearningRateScheduler(lr_sched), callbacks.ModelCheckpoint(model_path, monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=num_gpus >= 2), tensorboard_callback, callbacks.ReduceLROnPlateau(monitor='loss', factor=0.1, patience=10, verbose=1, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0), RedirectModel( Evaluate(val_data, iou_threshold=iou_threshold, score_threshold=score_threshold, max_detections=max_detections, tensorboard=tensorboard_callback, weighted_average=weighted_average), prediction_model), ]) model.save_weights(model_path) np.savez(loss_path, loss_history=loss_history.history) average_precisions = evaluate( val_data, prediction_model, iou_threshold=iou_threshold, score_threshold=score_threshold, max_detections=max_detections, ) # print evaluation total_instances = [] precisions = [] for label, (average_precision, num_annotations) in average_precisions.items(): print('{:.0f} instances of class'.format(num_annotations), label, 'with average precision: {:.4f}'.format(average_precision)) total_instances.append(num_annotations) precisions.append(average_precision) if sum(total_instances) == 0: print('No test instances found.') else: print( 'mAP using the weighted average of precisions among classes: {:.4f}' .format( sum([a * b for a, b in zip(total_instances, precisions)]) / sum(total_instances))) print('mAP: {:.4f}'.format( sum(precisions) / sum(x > 0 for x in total_instances))) return model
image_width, image_depth), pooling=None, size_final_dense=256, num_classes=num_classes, trainable=True, weights=None) #model = multi_gpu_model(model, gpus=2) # Now train it opt_RMSprop = RMSprop(lr=0.0002) model.compile(optimizer=opt_RMSprop, loss='categorical_crossentropy', metrics=['accuracy']) callback_lr_plateau = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5) train_start = time.time() os.makedirs(os.path.dirname(dir_tensorboard_logs), exist_ok=True) # Make tensorboard log directory model.fit(dataset_train, epochs=num_epochs, steps_per_epoch=num_steps_per_epoch, validation_data=dataset_valid, validation_steps=num_steps_per_epoch_valid, callbacks=[callback_tensorboard, callback_lr_plateau]) print("Training time: %s seconds" % (time.time() - train_start)) print(model.summary()) # Save the model
def main(params): '''create save dir if does not exist''' try: os.stat(params["save_path"]) except: os.makedirs(params["save_path"]) '''load data files''' img_dir = os.path.join(params["data_dir"], "train_images") label_dir = os.path.join(params["data_dir"], "train_labels") img_val_dir = os.path.join(params["data_dir"], "test_images") label_val_dir = os.path.join(params["data_dir"], "test_labels") ids_train = [i for i in os.listdir(img_dir)] ids_val = [i for i in os.listdir(img_val_dir)] num_training_examples = len(ids_train) # im_batch, labels_batch, im_displayed \ # = lnf.get_clinic_train_data(im_dir=img_dir, seg_dir=label_dir, img_shape=params["img_shape"], # batch_size=params["batch_size"]) # # # Running next element in our graph will produce a batch of images # plt.figure(figsize=(10, 10)) # # plt.subplot(2, 2, 1) # plt.imshow(im_batch[0,:,:,:]) # # plt.subplot(2, 2, 2) # plt.imshow(labels_batch[0, :, :, 0]) # # plt.subplot(2, 2, 3) # plt.imshow(im_batch[1,:,:,:]) # # plt.subplot(2, 2, 4) # plt.imshow(labels_batch[1, :, :, 0]) # # plt.show() '''get model''' inputs, outputs = model_fn(params["img_shape"]) model = models.Model(inputs=[inputs], outputs=[outputs]) '''Compile model''' adam = optimizers.Adam(lr=params["learning_rate"], beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False) if params["loss_function"] == "dice_loss": model.compile(optimizer=adam, loss=dice_loss, metrics=[dice_loss]) if params["loss_function"] == "bce_dice_loss": model.compile(optimizer=adam, loss=bce_dice_loss, metrics=[dice_loss]) model.summary() '''train and save model''' save_model_path = os.path.join(params["save_path"], "weights.hdf5") cp = tf.keras.callbacks.ModelCheckpoint(filepath=save_model_path, monitor='val_dice_loss', save_best_only=True, verbose=1, save_weights_only=True) learning_rate_reduction = callbacks.ReduceLROnPlateau( monitor='val_dice_loss', patience=5, verbose=0, factor=0.5, min_lr=0.0001) if params["continuing_training"] == True: '''Load models trained weights''' model = models.load_model(save_model_path, custom_objects={ 'bce_dice_loss': bce_dice_loss, 'dice_loss': dice_loss }) for i in range(0, params["epochs"] * len(ids_train)): im_batch_val, labels_batch_val, im_displayed_val \ = lnf.get_clinic_train_data(im_dir=img_val_dir, seg_dir=label_val_dir, img_shape=params["img_shape"], batch_size=params["batch_size"]) im_batch, labels_batch, im_displayed \ = lnf.get_clinic_train_data(im_dir=img_dir, seg_dir=label_dir, img_shape=params["img_shape"], batch_size=params["batch_size"]) history = model.fit(x=im_batch, y=labels_batch, steps_per_epoch=2, validation_data=(im_batch_val, labels_batch_val), validation_steps=1, callbacks=[cp, learning_rate_reduction]) '''Visualize the training process''' dice = history.history['dice_loss'] val_dice = history.history['val_dice_loss'] loss = history.history['loss'] val_loss = history.history['val_loss'] np.save(os.path.join(params["save_path"], "train_loss"), np.array(loss)) np.save(os.path.join(params["save_path"], "validation_loss"), np.array(val_loss)) np.save(os.path.join(params["save_path"], "train_dice"), np.array(dice)) np.save(os.path.join(params["save_path"], "validation_dice"), np.array(val_dice))