def test_reduce_to_binary_crossentropy(): """Focal loss with gamma=0 should be the same as cross-entropy.""" # From probabilities for y_true, y_pred in product(Y_TRUE, Y_PRED_PROB): # tf.keras.losses.binary_crossentropy averages its output along the last # axis, so we do the same here focal_loss = binary_focal_loss(y_true=y_true, y_pred=y_pred, gamma=0) focal_loss = tf.math.reduce_mean(focal_loss, axis=-1) ce = tf.keras.losses.binary_crossentropy(y_true=y_true, y_pred=y_pred) tf.debugging.assert_near(focal_loss, ce) # From logits for y_true, y_pred in product(Y_TRUE, Y_PRED_LOGITS): focal_loss = binary_focal_loss(y_true=y_true, y_pred=y_pred, gamma=0, from_logits=True) ce = tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.dtypes.cast(y_true, dtype=tf.float32), logits=tf.dtypes.cast(y_pred, dtype=tf.float32), ) tf.debugging.assert_near(focal_loss, ce) # From logits, with positive class weighting for y_true, y_pred, pos_weight in product(Y_TRUE, Y_PRED_LOGITS, (1, 2)): focal_loss = binary_focal_loss(y_true=y_true, y_pred=y_pred, gamma=0, from_logits=True, pos_weight=pos_weight) ce = tf.nn.weighted_cross_entropy_with_logits( labels=tf.dtypes.cast(y_true, dtype=tf.float32), logits=tf.dtypes.cast(y_pred, dtype=tf.float32), pos_weight=pos_weight, ) tf.debugging.assert_near(focal_loss, ce)
def test_computation_sanity_checks(): """Make sure the focal loss computation behaves as expected.""" for y_true, y_pred_logits in product(Y_TRUE, Y_PRED_LOGITS): for y_pred_prob, pos_weight in product((Y_PRED_PROB), (None, 1, 2)): for gamma, label_smoothing in product((0, 1, 2), (None, 0.1, 0.5)): focal_loss_prob = binary_focal_loss( y_true=y_true, y_pred=y_pred_prob, gamma=gamma, from_logits=False, pos_weight=pos_weight, label_smoothing=label_smoothing, ) focal_loss_logits = binary_focal_loss( y_true=y_true, y_pred=y_pred_logits, gamma=gamma, from_logits=True, pos_weight=pos_weight, label_smoothing=label_smoothing, ) losses = [focal_loss_prob, focal_loss_logits] if not (isinstance(y_true, tf.Tensor) or isinstance(y_pred_logits, tf.Tensor)): numpy_focal_loss_logits = numpy_binary_focal_loss( y_true=y_true, y_pred=y_pred_logits, gamma=gamma, from_logits=True, pos_weight=pos_weight, label_smoothing=label_smoothing, ) losses.append(numpy_focal_loss_logits) if not (isinstance(y_true, tf.Tensor) or isinstance(y_pred_prob, tf.Tensor)): numpy_focal_loss_prob = numpy_binary_focal_loss( y_true=y_true, y_pred=y_pred_prob, gamma=gamma, from_logits=False, pos_weight=pos_weight, label_smoothing=label_smoothing, ) losses.append(numpy_focal_loss_prob) for i, loss_1 in enumerate(losses): for loss_2 in losses[(i + 1):]: tf.debugging.assert_near(loss_1, loss_2, atol=1e-5, rtol=1e-5)
def rpn_class_anchorless_loss_graph(rpn_match, rpn_class_logits): # Squeeze last dim to simplify rpn_match = tf.squeeze(rpn_match, -1) indices = tf.where(K.not_equal(rpn_match, 0)) target_class = tf.gather_nd(rpn_match, indices) rpn_class_logits = tf.gather_nd(rpn_class_logits, indices) target_class = K.cast(K.equal(target_class, 1), tf.int32) pos_counts = K.sum(K.cast(K.equal(target_class, 1), tf.float32)) loss = binary_focal_loss(y_true=target_class, y_pred=rpn_class_logits[..., -1], gamma=2, pos_weight=0.75, from_logits=True) loss = K.sum(loss) / pos_counts """ loss = BinaryFocalLoss(gamma=2, pos_weight=0.75, from_logits=True)( y_true = target_class, y_pred = rpn_class_logits[..., -1]) loss = focal_loss(gamma=2.0, alpha=0.75, from_logits=True)( y_true=target_class, y_pred=rpn_class_logits[..., -1]) loss = K.sum(loss)/pos_counts """ return loss
def test_computation_sanity_checks(self, y_true, y_pred_logits, y_pred_prob, pos_weight, gamma, label_smoothing): """Make sure the focal loss computation behaves as expected.""" focal_loss_prob = binary_focal_loss( y_true=y_true, y_pred=y_pred_prob, gamma=gamma, from_logits=False, pos_weight=pos_weight, label_smoothing=label_smoothing, ) focal_loss_logits = binary_focal_loss( y_true=y_true, y_pred=y_pred_logits, gamma=gamma, from_logits=True, pos_weight=pos_weight, label_smoothing=label_smoothing, ) losses = [focal_loss_prob, focal_loss_logits] if not (isinstance(y_true, tf.Tensor) or isinstance(y_pred_logits, tf.Tensor)): numpy_focal_loss_logits = numpy_binary_focal_loss( y_true=y_true, y_pred=y_pred_logits, gamma=gamma, from_logits=True, pos_weight=pos_weight, label_smoothing=label_smoothing, ) losses.append(numpy_focal_loss_logits) if not (isinstance(y_true, tf.Tensor) or isinstance(y_pred_prob, tf.Tensor)): numpy_focal_loss_prob = numpy_binary_focal_loss( y_true=y_true, y_pred=y_pred_prob, gamma=gamma, from_logits=False, pos_weight=pos_weight, label_smoothing=label_smoothing, ) losses.append(numpy_focal_loss_prob) for i, loss_1 in enumerate(losses): for loss_2 in losses[(i + 1):]: self.assertAllClose(loss_1, loss_2, atol=1e-5, rtol=1e-5)
def train(): sample_input = np.empty( [FRAMES_PER_VIDEO, FRAME_HEIGHT, FRAME_WIDTH, FRAME_CHANNEL], dtype=np.uint8) # Read Dataset d_train = pd.read_csv(os.path.join('train.csv')) d_valid = pd.read_csv(os.path.join('test.csv')) # Split data into random training and validation sets nb_classes = len(set(d_train['class'])) video_train_generator = video_gen( d_train, FRAMES_PER_VIDEO, FRAME_HEIGHT, FRAME_WIDTH, FRAME_CHANNEL, nb_classes, batch_size=BATCH_SIZE, augmentations=True) video_val_generator = video_gen( d_valid, FRAMES_PER_VIDEO, FRAME_HEIGHT, FRAME_WIDTH, FRAME_CHANNEL, nb_classes, batch_size=BATCH_SIZE, augmentations=False) # Get Model # model = densenet121_3D_DropOut(sample_input.shape, nb_classes) model = T3D169_Dropout(sample_input.shape, nb_classes, d_rate=0.2) checkpoint = ModelCheckpoint('T3D_weights.hdf5', monitor='val_loss', verbose=1, save_best_only=True, mode='min', save_weights_only=True) # reduceLROnPlat = ReduceLROnPlateau(monitor='val_loss', factor=0.1, # patience=20, # verbose=1, mode='min', min_delta=0.0001, cooldown=2, min_lr=1e-6) csvLogger = CSVLogger('T3D_history.csv', append=True) tensorboard = TensorBoard(log_dir=f'./logs/T3D-{int(time.time())}') callbacks_list = [checkpoint, csvLogger, tensorboard] # compile model #optim = Adam(lr=1e-4, decay=1e-6) #optim = SGD(lr = 0.1, momentum=0.9, decay=1e-4, nesterov=True) optim = AdaBound() model.compile(optimizer=optim, loss=[binary_focal_loss(alpha=.25, gamma=2)], metrics=['accuracy']) if os.path.exists('./T3D_weights.hdf5'): print('Pre-existing model weights found, loading weights.......') model.load_weights('./T3D_weights.hdf5') print('Weights loaded') # train model print('Training started....') train_steps = len(d_train)//BATCH_SIZE val_steps = len(d_valid)//BATCH_SIZE history = model.fit_generator( video_train_generator, steps_per_epoch=train_steps, epochs=EPOCHS, validation_data=video_val_generator, validation_steps=val_steps, verbose=1, callbacks=callbacks_list, workers=1, use_multiprocessing=True ) model.save(MODEL_FILE_NAME)
def test_train_dummy_binary_classifier(): """Train a simple model to make sure that BinaryFocalLoss works.""" # Data/model parameters n_examples = 100 n_features = 16 epochs = 3 random_state = np.random.RandomState(0) # Generate some fake data x = random_state.binomial(n=1, p=0.5, size=(n_examples, n_features)) x = 2.0 * x.astype(np.float32) - 1.0 weights = 100 * np.ones(shape=(n_features, 1)).astype(np.float32) y = (x.dot(weights) > 0).astype(np.int8) # Number of positive and negative examples n_pos = y.sum() n_neg = n_examples - n_pos for pos_weight in (None, (n_neg / n_pos)): for gamma, label_smoothing in product((0, 2), (None, 0.1)): for from_logits in (True, False): if from_logits: activation = None else: activation = 'sigmoid' # Just a linear classifier (without bias term) model = tf.keras.Sequential(layers=[ tf.keras.layers.Input(shape=n_features), tf.keras.layers.Dense(units=1, use_bias=False, activation=activation), ]) model.compile( optimizer='sgd', loss=BinaryFocalLoss(gamma=gamma, pos_weight=pos_weight, from_logits=from_logits, label_smoothing=label_smoothing), metrics=['accuracy'], ) stop_on_nan = tf.keras.callbacks.TerminateOnNaN() history = model.fit(x, y, batch_size=n_examples, epochs=epochs, callbacks=[stop_on_nan]) history = history.history # Check that we didn't stop early: if we did then we # encountered NaNs during training, and that shouldn't happen assert len(history['loss']) == epochs # Check that BinaryFocalLoss and binary_focal_loss agree (at # least when averaged) model_loss, *_ = model.evaluate(x, y) y_pred = model.predict(x) loss = binary_focal_loss(y_true=y, y_pred=y_pred, gamma=gamma, pos_weight=pos_weight, from_logits=from_logits, label_smoothing=label_smoothing) loss = tf.math.reduce_mean(loss) tf.debugging.assert_near(loss, model_loss)
def test_reduce_to_binary_crossentropy_from_probabilities( self, y_true, y_pred): """Focal loss with gamma=0 should be the same as cross-entropy.""" # tf.keras.losses.binary_crossentropy averages its output along the last # axis, so we do the same here focal_loss = binary_focal_loss(y_true=y_true, y_pred=y_pred, gamma=0) focal_loss = tf.math.reduce_mean(focal_loss, axis=-1) ce = tf.keras.losses.binary_crossentropy(y_true=y_true, y_pred=y_pred) self.assertAllClose(focal_loss, ce)
def test_reduce_to_binary_crossentropy_from_logits(self, y_true, y_pred): """Focal loss with gamma=0 should be the same as cross-entropy.""" focal_loss = binary_focal_loss(y_true=y_true, y_pred=y_pred, gamma=0, from_logits=True) ce = tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.dtypes.cast(y_true, dtype=tf.dtypes.float32), logits=tf.dtypes.cast(y_pred, dtype=tf.dtypes.float32), ) self.assertAllClose(focal_loss, ce)
def test_train_dummy_binary_classifier(self, n_examples, n_features, epochs, pos_weight, gamma, label_smoothing, from_logits, random_state): # Generate some fake data x = random_state.binomial(n=1, p=0.5, size=(n_examples, n_features)) x = 2.0 * x.astype(np.float32) - 1.0 weights = 100 * np.ones(shape=(n_features, 1)).astype(np.float32) y = (x.dot(weights) > 0).astype(np.int8) model = get_dummy_binary_classifier(n_features=n_features, gamma=gamma, pos_weight=pos_weight, label_smoothing=label_smoothing, from_logits=from_logits) history = model.fit(x, y, batch_size=n_examples, epochs=epochs, callbacks=[tf.keras.callbacks.TerminateOnNaN()]) history = history.history # Check that we didn't stop early: if we did then we # encountered NaNs during training, and that shouldn't happen self.assertEqual(len(history['loss']), epochs) # Check that BinaryFocalLoss and binary_focal_loss agree (at # least when averaged) model_loss, *_ = model.evaluate(x, y) y_pred = model.predict(x) loss = binary_focal_loss(y_true=y, y_pred=y_pred, gamma=gamma, pos_weight=pos_weight, from_logits=from_logits, label_smoothing=label_smoothing) loss = tf.math.reduce_mean(loss) self.assertAllClose(loss, model_loss)