def custom_cross_entropy(self, y_true, y_pred): # y_true has the payoffs in the last dimension y_true, payoffs = splitter(y_true) if self.method == 'lay': tp_weight = K.abs(payoffs) fp_weight = K.abs(payoffs) tn_weight = 1 fn_weight = 0.95 elif self.method == 'back': tp_weight = K.abs(payoffs) # opportunity cost tn_weight = 0 # opportunity cost fp_weight = 1 # cost fn_weight = K.abs(payoffs) # cost loss = -K.mean( fn_weight * y_true * K.log(y_pred + _EPSILON) + # fn cost (not backing if it should) fp_weight * (1 - y_true) * K.log(1 - y_pred + _EPSILON) # fp cost (backing the wrong one) # + tp_weight * y_true * K.log(1 - y_pred + _EPSILON) # tp (correctly backing) # + tn_weight * (1 - y_true) * K.log(y_pred + _EPSILON) # tn (correctly not backing) ) return loss
def custom_cross_entropy_with_weight_tensor(self, y_true, y_pred): # y_true has the payoffs in the last dimension y_true, payoffs = splitter(y_true) y_pred_pos = K.round(K.clip(y_pred, 0, 1)) y_pred_neg = 1 - y_pred_pos y_pos = K.round(K.clip(y_true, 0, 1)) y_neg = 1 - y_pos # get confusion matrix of all samples in batch as matrix tp = (y_pos * y_pred_pos) tn = (y_neg * y_pred_neg) fn = (y_pos * y_pred_neg) fp = (y_neg * y_pred_pos) if self.method == 'lay': tp_weight = K.abs(payoffs) fp_weight = K.abs(payoffs) tn_weight = 1 fn_weight = 0.95 elif self.method == 'back': tp_weight = K.abs(payoffs) # tp (correctly backing) fp_weight = 1 # fp cost (backing the wrong one) tn_weight = 0 # tn (correctly not backing) fn_weight = K.abs(payoffs) # fn cost (not backing if it should) # Get weights weight_tensor = tp_weight * tp + fp_weight * fp + tn_weight * tn + fn_weight * fn loss = binary_crossentropy(y_true, y_pred) weighted_loss = loss * weight_tensor return weighted_loss
def huber_loss(y_true, y_pred, clip_value=1): # Huber loss, see https://en.wikipedia.org/wiki/Huber_loss and # https://medium.com/@karpathy/yes-you-should-understand-backprop-e2f06eab496b # for details. assert clip_value > 0. x = y_true - y_pred if np.isinf(clip_value): # Spacial case for infinity since Tensorflow does have problems # if we compare `K.abs(x) < np.inf`. return .5 * K.square(x) condition = K.abs(x) < clip_value squared_loss = .5 * K.square(x) linear_loss = clip_value * (K.abs(x) - .5 * clip_value) if K.backend() == 'tensorflow': import tensorflow as tf if hasattr(tf, 'select'): return tf.select(condition, squared_loss, linear_loss) # condition, true, false else: return tf.where(condition, squared_loss, linear_loss) # condition, true, false elif K.backend() == 'theano': from theano import tensor as T return T.switch(condition, squared_loss, linear_loss) else: raise RuntimeError('Unknown backend "{}".'.format(K.backend()))
def __call__(self, loss): if not hasattr(self, 'layer'): raise Exception('Need to call `set_layer` on ' 'ActivityRegularizer instance ' 'before calling the instance.') output = self.layer.output regularized_loss = loss + self.l1 * K.sum(K.mean(K.abs(output), axis=0)) regularized_loss += self.l2 * K.sum(K.mean(K.square(output), axis=0)) return K.in_train_phase(regularized_loss, loss)
def siamese_net(shape=(image_shape()[0], image_shape()[1], 1)): left_input = Input(shape=shape) right_input = Input(shape=shape) # Image Encoding Model encoding_model = get_encoding_model(shape) encoding_model.summary() # Encode input images left_encoding = encoding_model(left_input) right_encoding = encoding_model(right_input) # Compute absolute difference between encodings DifferenceLayer = Lambda(lambda tensors: K.abs(tensors[0] - tensors[1])) distance = DifferenceLayer([left_encoding, right_encoding]) # 1st Dense Layer siamese_output = Dense(16)(distance) siamese_output = ReLU()(siamese_output) siamese_output = Dropout(0.4)(siamese_output) siamese_output = BatchNormalization()(siamese_output) # 2nd Dense Layer siamese_output = Dense(16)(siamese_output) siamese_output = ReLU()(siamese_output) siamese_output = Dropout(0.4)(siamese_output) siamese_output = BatchNormalization()(siamese_output) # 3rd Dense Layer siamese_output = Dense(4)(siamese_output) siamese_output = ReLU()(siamese_output) siamese_output = Dropout(0.4)(siamese_output) siamese_output = BatchNormalization()(siamese_output) siamese_output = Dense(1, activation='sigmoid')(siamese_output) return Model(inputs=[left_input, right_input], outputs=siamese_output)
def hybrid_loss(self, y_true, y_pred, delta=0.5): l1 = K.abs(y_true - y_pred) l2 = K.square(y_true - y_pred) hybrid_loss = delta * l1 + (1 - delta) * l2 return hybrid_loss
def smoothL1(y_true, y_pred): x = K.abs(y_true - y_pred) x = tf.where(x < HUBER_DELTA, 0.5 * x ** 2, HUBER_DELTA * (x - 0.5 * HUBER_DELTA)) return K.sum(x)
def sequence_mask(sequence): return K.sign(K.max(K.abs(sequence), 2))
def __init__(self, n_classes, input_dims, lr, top_rnns=True, metrics_eval_discard_first_classes=2): self.train_history = None input = Input(shape=(None, input_dims), dtype='float32', name='bert_encodings') X = input if top_rnns: X = get_bi_lstm()(X) X = get_bi_lstm()(X) pred = Dense(n_classes, activation='softmax')(X) self.model_save = Model(input, pred) #logger.debug(f'available training devices:\n{device_lib.list_local_devices()}'.replace('\n', '\n\t')) devices = device_lib.list_local_devices() # take gpu count from device info manually, because virtual devices (e.g. XLA_GPU) cause wrong number gpus = len([None for d in devices if d.device_type == 'GPU']) if gpus > 1: self.model = multi_gpu_model(self.model_save, gpus=gpus, cpu_relocation=True) logging.info(f"Training using {gpus} GPUs...") else: self.model = self.model_save logging.info("Training using single GPU or CPU...") optimizer = Adam(lr=lr) self.model.compile( loss='categorical_crossentropy', optimizer=optimizer, metrics=[ ANDCounter( conditions_and=lambda y_true, y_pred: ( y_true, K.round(y_pred), # This condition masks all entries where y_true has class=0, i.e. <PAD>: # 1) gold values, except for the first class, are summed along the class-axis # 2) the resulting vector is broadcast back to the original format (via stack and number of classes) K.stack([ K.sum(y_true[:, :, metrics_eval_discard_first_classes:], axis=-1) ] * n_classes, axis=-1), ), name='tp'), ANDCounter( conditions_and=lambda y_true, y_pred: ( K.abs(y_true - K.ones_like(y_true)), K.round(y_pred), # this condition masks all entries where y_true has class=0, i.e. <PAD> (see above) K.stack([ K.sum(y_true[:, :, metrics_eval_discard_first_classes:], axis=-1) ] * n_classes, axis=-1), ), name='fp'), ANDCounter( conditions_and=lambda y_true, y_pred: ( y_true, K.abs(K.round(y_pred) - K.ones_like(y_pred)), # this condition masks all entries where y_true has class=0, i.e. <PAD> (see above) K.stack([ K.sum(y_true[:, :, metrics_eval_discard_first_classes:], axis=-1) ] * n_classes, axis=-1), ), name='fn'), ANDCounter( conditions_and=lambda y_true, y_pred: ( y_true, # this condition masks all entries where y_true has class=0, i.e. <PAD> (see above) K.stack([ K.sum(y_true[:, :, metrics_eval_discard_first_classes:], axis=-1) ] * n_classes, axis=-1), ), name='total_count'), 'acc', ]) plot_model(self.model, to_file='model.png', show_shapes=True)