def huber_loss(arg): y_true, y_pred = arg x = y_true - y_pred squared_loss = .5 * K.square(x) linear_loss = K.abs(x) - .5 loss = tf.where(K.abs(x) < 1., squared_loss, squared_loss) return K.sum(loss, axis=-1)
def huber_loss(y_true, y_pred, clip_value): # Huber loss, see https://en.wikipedia.org/wiki/Huber_loss and # https://medium.com/@karpathy/yes-you-should-understand-backprop-e2f06eab496b # for details. assert clip_value > 0. x = y_true - y_pred if np.isinf(clip_value): # Spacial case for infinity since Tensorflow does have problems # if we compare `K.abs(x) < np.inf`. return .5 * K.square(x) condition = K.abs(x) < clip_value squared_loss = .5 * K.square(x) linear_loss = clip_value * (K.abs(x) - .5 * clip_value) if K.backend() == 'tensorflow': import tensorflow as tf if hasattr(tf, 'select'): return tf.select(condition, squared_loss, linear_loss) # condition, true, false else: return tf.where(condition, squared_loss, linear_loss) # condition, true, false elif K.backend() == 'theano': from theano import tensor as T return T.switch(condition, squared_loss, linear_loss) else: raise RuntimeError('Unknown backend "{}".'.format(K.backend()))
def huberloss(y_true, y_pred): err = y_true - y_pred cond = K.abs(err) < 1.0 L2 = 0.5 * K.square(err) L1 = (K.abs(err) - 0.5) loss = tf.where(cond, L2, L1) # Keras does not cover where function in tensorflow :-( return K.mean(loss)
def loss(y_true, y_pred): y_true = denormalize(y_true, y_mean, y_std) y_pred = denormalize(y_pred, y_mean, y_std) diff = K.abs((y_true - y_pred) / K.clip(K.abs(y_true), K.epsilon(), None)) return 100. * K.mean(diff, axis=-1)
def stock_loss(y_true, y_pred): alpha = 100. loss = K.switch(K.less(y_true * y_pred, 0), \ alpha*y_pred**2 - K.sign(y_true)*y_pred + K.abs(y_true), \ K.abs(y_true - y_pred) ) return K.mean(loss, axis=-1)
def _huber_loss(self, y_true, y_pred, clip_delta=1.0): error = y_true - y_pred cond = K.abs(error) <= clip_delta squared_loss = 0.5 * K.square(error) quadratic_loss = 0.5 * K.square(clip_delta) + clip_delta * (K.abs(error) - clip_delta) return K.mean(tf.where(cond, squared_loss, quadratic_loss))
def metrics_mape(rate_true, rate_pred): if args.norm_ans: rate_true = denormalize(rate_true, rate_mean, rate_std) rate_pred = denormalize(rate_pred, rate_mean, rate_std) diff = K.abs((rate_true - rate_pred) / K.clip(K.abs(rate_true), K.epsilon(), None)) return 100. * K.mean(diff, axis=-1)
def first_order(var_x, axis=1): """ First Order Function from Shoanlu GAN """ img_nrows = var_x.shape[1] img_ncols = var_x.shape[2] if axis == 1: return K.abs(var_x[:, :img_nrows - 1, :img_ncols - 1, :] - var_x[:, 1:, :img_ncols - 1, :]) if axis == 2: return K.abs(var_x[:, :img_nrows - 1, :img_ncols - 1, :] - var_x[:, :img_nrows - 1, 1:, :]) return None
def func(y_true, y_pred): Y_true = K.reshape(y_true, (-1, ) + img_shape) Y_pred = K.reshape(y_pred, (-1, ) + img_shape) t1 = K.pow(K.abs(Y_true[:, :, :, 1:, :] - Y_true[:, :, :, :-1, :]) - K.abs(Y_pred[:, :, :, 1:, :] - Y_pred[:, :, :, :-1, :]), alpha) t2 = K.pow(K.abs(Y_true[:, :, :, :, :-1] - Y_true[:, :, :, :, 1:]) - K.abs(Y_pred[:, :, :, :, :-1] - Y_pred[:, :, :, :, 1:]), alpha) out = K.mean(K.batch_flatten(t1 + t2), -1) return out
def first_order(self, x, axis=1): img_nrows = x.shape[1] img_ncols = x.shape[2] if axis == 1: return K.abs(x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, 1:, :img_ncols - 1, :]) elif axis == 2: return K.abs(x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, :img_nrows - 1, 1:, :]) else: return None
def setup(self): distorted_A, fake_A, fake_sz64_A, mask_A, self.path_A, self.path_mask_A, self.path_abgr_A, self.path_bgr_A = self.cycle_variables(self.model.netGA) distorted_B, fake_B, fake_sz64_B, mask_B, self.path_B, self.path_mask_B, self.path_abgr_B, self.path_bgr_B = self.cycle_variables(self.model.netGB) real_A = Input(shape=self.model.img_shape) real_B = Input(shape=self.model.img_shape) if self.use_lsgan: self.loss_fn = lambda output, target : K.mean(K.abs(K.square(output-target))) else: self.loss_fn = lambda output, target : -K.mean(K.log(output+1e-12)*target+K.log(1-output+1e-12)*(1-target)) # ========== Define Perceptual Loss Model========== if self.use_perceptual_loss: from keras.models import Model from keras_vggface.vggface import VGGFace vggface = VGGFace(include_top=False, model='resnet50', input_shape=(224, 224, 3)) vggface.trainable = False out_size55 = vggface.layers[36].output out_size28 = vggface.layers[78].output out_size7 = vggface.layers[-2].output vggface_feat = Model(vggface.input, [out_size55, out_size28, out_size7]) vggface_feat.trainable = False else: vggface_feat = None loss_DA, loss_GA = self.define_loss(self.model.netDA, real_A, fake_A, fake_sz64_A, distorted_A, vggface_feat) loss_DB, loss_GB = self.define_loss(self.model.netDB, real_B, fake_B, fake_sz64_B, distorted_B, vggface_feat) if self.use_mask_refinement: loss_GA += 1e-3 * K.mean(K.square(mask_A)) loss_GB += 1e-3 * K.mean(K.square(mask_B)) else: loss_GA += 3e-3 * K.mean(K.abs(mask_A)) loss_GB += 3e-3 * K.mean(K.abs(mask_B)) w_fo = 0.01 loss_GA += w_fo * K.mean(self.first_order(mask_A, axis=1)) loss_GA += w_fo * K.mean(self.first_order(mask_A, axis=2)) loss_GB += w_fo * K.mean(self.first_order(mask_B, axis=1)) loss_GB += w_fo * K.mean(self.first_order(mask_B, axis=2)) weightsDA = self.model.netDA.trainable_weights weightsGA = self.model.netGA.trainable_weights weightsDB = self.model.netDB.trainable_weights weightsGB = self.model.netGB.trainable_weights # Adam(..).get_updates(...) training_updates = Adam(lr=self.lrD, beta_1=0.5).get_updates(weightsDA,[],loss_DA) self.netDA_train = K.function([distorted_A, real_A],[loss_DA], training_updates) training_updates = Adam(lr=self.lrG, beta_1=0.5).get_updates(weightsGA,[], loss_GA) self.netGA_train = K.function([distorted_A, real_A], [loss_GA], training_updates) training_updates = Adam(lr=self.lrD, beta_1=0.5).get_updates(weightsDB,[],loss_DB) self.netDB_train = K.function([distorted_B, real_B],[loss_DB], training_updates) training_updates = Adam(lr=self.lrG, beta_1=0.5).get_updates(weightsGB,[], loss_GB) self.netGB_train = K.function([distorted_B, real_B], [loss_GB], training_updates)
def huber_loss(y_true, y_pred): error = y_true - y_pred condition = K.abs(error) < HUBER_LOSS_DELTA l2 = 0.5 * K.square(error) l1 = HUBER_LOSS_DELTA * (K.abs(error) - 0.5 * HUBER_LOSS_DELTA) loss = tf.where(condition, l2, l1) return K.mean(loss)
def myLoss(y_true, y_pred): p1 = K.mean(K.abs(y_pred - y_true), axis=-1) print("Shape: " + str(K.int_shape(y_pred))) #t2 = tf.slice(y_pred,2,-1) yy = y_true - y_pred t2 = yy[:,2:,:] t3 = yy[:,1:-1,:] #t3 = tf.slice(y_pred,1,-2) print("Shape2: " + str(K.int_shape(t2)[1])) print("Shape3: " + str(K.int_shape(t3)[1])) return p1 + K.sum(K.abs(t3-t2)) / K.int_shape(t3)[1]
def normals_metric(y_true, y_pred): y_true = K.variable(y_true) y_pred = K.variable(y_pred) y_true = K.expand_dims(y_true,0) filter_y = K.variable(np.array([[ 0., -0.5 , 0.], [0., 0., 0.], [0., 0.5, 0.]]).reshape(3, 3, 1, 1)) filter_x = K.variable(np.array([ [0, 0., 0.], [0.5, 0., -0.5], [0., 0., 0.]]).reshape(3, 3, 1, 1)) dzdx = K.conv2d(K.exp(y_true), filter_x, padding='same') dzdy = K.conv2d(K.exp(y_true), filter_y, padding='same') dzdx_ = dzdx * -1.0#K.constant(-1.0, shape=[batch_size,K.int_shape(y_pred)[1],K.int_shape(y_pred)[2],K.int_shape(y_pred)[3]]) #K.constant(-1.0, shape=K.int_shape(dzdx)) dzdy_ = dzdy * -1.0#K.constant(-1.0, shape=[batch_size,K.int_shape(y_pred)[1],K.int_shape(y_pred)[2],K.int_shape(y_pred)[3]]) #K.constant(-1.0, shape=K.int_shape(dzdy)) mag_norm = K.pow(dzdx,2) + K.pow(dzdy,2) + 1.0#K.constant(1.0, shape=[batch_size,K.int_shape(y_pred)[1],K.int_shape(y_pred)[2],K.int_shape(y_pred)[3]]) #K.constant(1.0, shape=K.int_shape(dzdx)) mag_norm = K.sqrt(mag_norm) N3 = 1.0 / mag_norm #K.constant(1.0, shape=K.int_shape(dzdx)) / mag_norm N1 = dzdx_ / mag_norm N2 = dzdy_ / mag_norm normals = K.concatenate(tensors=[N1,N2,N3],axis=-1) dzdx_pred = K.conv2d(K.exp(y_pred), filter_x, padding='same') dzdy_pred = K.conv2d(K.exp(y_pred), filter_y, padding='same') mag_norm_pred = K.pow(dzdx_pred,2) + K.pow(dzdy_pred,2) + 1.0 mag_norm_pred = K.sqrt(mag_norm_pred) grad_x = K.concatenate(tensors=[1.0/ mag_norm_pred, 0.0/ mag_norm_pred, dzdx_pred/ mag_norm_pred],axis=-1) grad_y = K.concatenate(tensors=[0.0/ mag_norm_pred, 1.0/ mag_norm_pred, dzdy_pred/ mag_norm_pred],axis=-1) dot_term_x = K.mean(K.sum(normals[0,:,:,:] * grad_x[0,:,:,:], axis=-1, keepdims=True), axis=-1) dot_term_y = K.mean(K.sum(normals[0,:,:,:] * grad_y[0,:,:,:], axis=-1, keepdims=True), axis=-1) dot_term_x = K.abs(dot_term_x) dot_term_y = K.abs(dot_term_y) return K.eval(K.mean(dot_term_x)),K.eval(K.mean(dot_term_y))
def rpn_loss_regr_fixed_num(y_true, y_pred): if K.image_dim_ordering() == 'th': x = y_true[:, 4 * num_anchors:, :, :] - y_pred x_abs = K.abs(x) x_bool = K.less_equal(x_abs, 1.0) return lambda_rpn_regr * K.sum( y_true[:, :4 * num_anchors, :, :] * (x_bool * (0.5 * x * x) + (1 - x_bool) * (x_abs - 0.5))) / K.sum(epsilon + y_true[:, :4 * num_anchors, :, :]) else: x = y_true[:, :, :, 4 * num_anchors:] - y_pred x_abs = K.abs(x) x_bool = K.cast(K.less_equal(x_abs, 1.0), tf.float32) return lambda_rpn_regr * K.sum( y_true[:, :, :, :4 * num_anchors] * (x_bool * (0.5 * x * x) + (1 - x_bool) * (x_abs - 0.5))) / K.sum(epsilon + y_true[:, :, :, :4 * num_anchors])
def call(self, x): # sample from noise distribution e_i = K.random_normal((self.input_dim, self.units)) e_j = K.random_normal((self.units,)) # We use the factorized Gaussian noise variant from Section 3 of Fortunato et al. eW = K.sign(e_i) * (K.sqrt(K.abs(e_i))) * K.sign(e_j) * (K.sqrt(K.abs(e_j))) eB = K.sign(e_j) * (K.abs(e_j) ** (1 / 2)) noise_injected_weights = K.dot(x, self.mu_weight + (self.sigma_weight * eW)) noise_injected_bias = self.mu_bias + (self.sigma_bias * eB) output = K.bias_add(noise_injected_weights, noise_injected_bias) if self.activation is not None: output = self.activation(output) return output
def metrics_mae(rate_true, rate_pred): if args.norm_ans: rate_true = denormalize(rate_true, rate_mean, rate_std) rate_pred = denormalize(rate_pred, rate_mean, rate_std) rate_true = K.round(rate_true) rate_pred = K.round(rate_pred) return K.mean(K.abs(rate_pred - rate_true), axis=-1)
def __call__(self, x): regularization = 0 if self.l1: regularization += self.l1 * K.sum(K.abs(K.sum(x, axis=self.axis) - 1.)) if self.l2: regularization += self.l2 * K.sum(K.square(K.sum(x, axis=self.axis) - 1.)) return regularization
def sigmoid_cross_entropy(y_true, y_pred): z = K.flatten(y_true) x = K.flatten(y_pred) q = 10 l = (1 + (q - 1) * z) loss = (K.sum((1 - z) * x) + K.sum(l * (K.log(1 + K.exp(- K.abs(x))) + K.max(-x, 0)))) / 500 return loss
def get_loss(self): loss = 0.0 if self.l1: loss += K.mean(K.abs(self.p)) * self.l1 if self.l2: loss += K.mean(K.square(self.p)) * self.l2 return loss
def huber_loss(y_true, y_pred, clip_value): # Huber loss, see https://en.wikipedia.org/wiki/Huber_loss and https://medium.com/@karpathy/yes-you-should-understand-backprop-e2f06eab496b for details. assert clip_value > 0. x = y_true - y_pred if np.isinf(clip_value): # Spacial case for infinity since Tensorflow does have problems if we compare `K.abs(x) < np.inf`. return .5 * K.square(x) condition = K.abs(x) < clip_value squared_loss = .5 * K.square(x) linear_loss = clip_value * (K.abs(x) - .5 * clip_value) if hasattr(tf, 'select'): return tf.select(condition, squared_loss, linear_loss) # condition, true, false else: return tf.where(condition, squared_loss, linear_loss) # condition, true, false
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] t = self.iterations + 1 lr_t = self.lr / (1. - K.pow(self.beta_1, t)) shapes = [K.get_variable_shape(p) for p in params] # zero init of 1st moment ms = [K.zeros(shape) for shape in shapes] # zero init of exponentially weighted infinity norm us = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + ms + us for p, g, m, u in zip(params, grads, ms, us): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g u_t = K.maximum(self.beta_2 * u, K.abs(g)) p_t = p - self.get_param_learning_rate_t(p,t,lr_t) * m_t / (u_t + self.epsilon) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(u, u_t)) new_p = p_t # apply constraints if p in constraints: c = constraints[p] new_p = c(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def build_loss(self): # Infinity norm if np.isinf(self.p): value = K.max(self.img) else: value = K.pow(K.sum(K.pow(K.abs(self.img), self.p)), 1. / self.p) return normalize(self.img, value)
def dice_coef(y_true, y_pred, smooth=1): """ Dice = (2*|X & Y|)/ (|X|+ |Y|) = 2*sum(|A*B|)/(sum(A^2)+sum(B^2)) ref: https://arxiv.org/pdf/1606.04797v1.pdf """ intersection = K.sum(K.abs(y_true * y_pred), axis=-1) return (2. * intersection + smooth) / (K.sum(K.square(y_true),-1) + K.sum(K.square(y_pred),-1) + smooth)
def mask_aware_max(x): mask = K.not_equal(K.sum(K.abs(x), axis=2, keepdims=True), 0) mask = K.cast(mask, 'float32') vecmin = K.min(x, axis=1, keepdims=True) xstar = x + (vecmin * (1 - mask)) # setting masked values to the min value return K.max(xstar, axis=1, keepdims=False)
def toChw(self, position): samples, targetDim = K.shape(position) position = K.reshape(position, (samples, 2, 2)) centroid = K.sum(position, axis=1) / 2.0 hw = K.abs(THEO.diff(position, axis=1)[:,0,:]) chw = K.concatenate((centroid, hw), axis=1) chw = chw[:, [0, 1, 3, 2]] # Changing from cwh to chw return chw
def get_similarity(self): ''' Specify similarity in configuration under 'similarity_params' -> 'mode' If a parameter is needed for the model, specify it in 'similarity_params' Example configuration: config = { ... other parameters ... 'similarity_params': { 'mode': 'gesd', 'gamma': 1, 'c': 1, } } cosine: dot(a, b) / sqrt(dot(a, a) * dot(b, b)) polynomial: (gamma * dot(a, b) + c) ^ d sigmoid: tanh(gamma * dot(a, b) + c) rbf: exp(-gamma * l2_norm(a-b) ^ 2) euclidean: 1 / (1 + l2_norm(a - b)) exponential: exp(-gamma * l2_norm(a - b)) gesd: euclidean * sigmoid aesd: (euclidean + sigmoid) / 2 ''' params = self.similarity_params similarity = params['mode'] axis = lambda a: len(a._keras_shape) - 1 dot = lambda a, b: K.batch_dot(a, b, axes=axis(a)) l2_norm = lambda a, b: K.sqrt(K.sum((a - b) ** 2, axis=axis(a), keepdims=True)) l1_norm = lambda a, b: K.sum(K.abs(a - b), axis=axis(a), keepdims=True) if similarity == 'cosine': return lambda x: dot(x[0], x[1]) / K.sqrt(dot(x[0], x[0]) * dot(x[1], x[1])) elif similarity == 'polynomial': return lambda x: (params['gamma'] * dot(x[0], x[1]) + params['c']) ** params['d'] elif similarity == 'sigmoid': return lambda x: K.tanh(params['gamma'] * dot(x[0], x[1]) + params['c']) elif similarity == 'rbf': return lambda x: K.exp(-1 * params['gamma'] * l2_norm(x[0], x[1]) ** 2) elif similarity == 'euclidean': return lambda x: 1 / (1 + l2_norm(x[0], x[1])) elif similarity == 'l1': return lambda x: -l1_norm(x[0], x[1]) elif similarity == 'exponential': return lambda x: K.exp(-1 * params['gamma'] * l2_norm(x[0], x[1])) elif similarity == 'gesd': euclidean = lambda x: 1 / (1 + l2_norm(x[0], x[1])) sigmoid = lambda x: 1 / (1 + K.exp(-1 * params['gamma'] * (dot(x[0], x[1]) + params['c']))) return lambda x: euclidean(x) * sigmoid(x) elif similarity == 'aesd': euclidean = lambda x: 0.5 / (1 + l2_norm(x[0], x[1])) sigmoid = lambda x: 0.5 / (1 + K.exp(-1 * params['gamma'] * (dot(x[0], x[1]) + params['c']))) return lambda x: euclidean(x) + sigmoid(x) else: raise Exception('Invalid similarity: {}'.format(similarity))
def calculateGpu(self, gtPosition, predPosition): pShape = K.shape(gtPosition) inputDim = K.ndim(gtPosition) gtPosition = K.reshape(gtPosition, (-1, pShape[-1])) predPosition = K.reshape(predPosition, (-1, pShape[-1])) left = K.maximum(predPosition[:, 0], gtPosition[:, 0]) top = K.maximum(predPosition[:, 1], gtPosition[:, 1]) right = K.minimum(predPosition[:, 2], gtPosition[:, 2]) bottom = K.minimum(predPosition[:, 3], gtPosition[:, 3]) intersect = (right - left) * ((right - left) > 0) * (bottom - top) * ((bottom - top) > 0) label_area = K.abs(gtPosition[:, 2] - gtPosition[:, 0]) * K.abs(gtPosition[:, 3] - gtPosition[:, 1]) predict_area = K.abs(predPosition[:, 2] - predPosition[:, 0]) * K.abs(predPosition[:, 3] - predPosition[:, 1]) union = label_area + predict_area - intersect iou = intersect / union #iouShape = K.concatenate([pShape[:-1], (1, )]) iou = THT.reshape(iou, (pShape[0], pShape[1], 1), ndim=inputDim) return iou
def __call__(self, x): regularization = 0 dimorder = self.axis + list(set(range(K.ndim(x))) - set(self.axis)) lp = laplacian1d(K.permute_dimensions(x, dimorder)) if self.l1: regularization += K.sum(self.l1 * K.abs(lp)) if self.l2: regularization += K.sum(self.l2 * K.square(lp)) return regularization
def staircase_loss(y_true, y_pred, var_a=16.0, cnst=1.0/255.0): """ Keras Staircase Loss """ height = cnst width = cnst var_x = K.clip(K.abs(y_true - y_pred) - 0.5 * cnst, 0.0, 1.0) loss = height*(K.tanh(var_a*((var_x/width)-tf.floor(var_x/width)-0.5)) / (2.0*K.tanh(var_a/2.0)) + 0.5 + tf.floor(var_x/width)) loss += 1e-10 return K.mean(loss, axis=-1)
def dice_coef(y_true, y_pred, smooth=1): intersection = K.sum(K.abs(y_true * y_pred), axis=-1) return (2. * intersection + smooth) / ( K.sum(K.square(y_true), -1) + K.sum(K.square(y_pred), -1) + smooth)
def linesearch_morethuente(n, x, f, g, s, step, xp, gp, wp, evaluate, param): # Check the input parameters for errors. if step <= 0.0: return LBFGSResult.ERR_INVALIDPARAMETERS, f, step # Compute the initial gradient in the search direction. dginit = K.dot(g, s) # Make sure that s points to a descent direction. if dginit > 0.0: return LBFGSResult.ERR_INCREASEGRADIENT, f, step # Initialize local variables. finit = f count = 0 uinfo = 0 brackt = False stage1 = True dgtest = param.ftol * dginit width = param.max_step - param.min_step prev_width = 2.0 * width # The variables stx, fx, dgx contain the values of the step, # function, and directional derivative at the best step. # The variables sty, fy, dgy contain the value of the step, # function, and derivative at the other endpoint of # the interval of uncertainty. # The variables stp, f, dg contain the values of the step, # function, and derivative at the current step. stx = sty = 0.0 fx = fy = finit dgx = dgy = dginit stp = step while True: # Set the minimum and maximum steps to correspond to the # present interval of uncertainty. if brackt: stmin = K.minimum(stx, sty) stmax = K.maximum(stx, sty) else: stmin = stx stmax = stp + 4.0 * (stp - stx) # Clip the step in the range of [stpmin, stpmax]. stp = K.clip(stp, param.min_step, param.max_step) # If an unusual termination is to occur then let # stp be the lowest point obtained so far. if brackt and (stp <= stmin or stmax <= stp or param.max_linesearch < count or uinfo != 0 or (stmax - stmin) <= (param.xtol * stmax)): stp = stx # Compute the current value of x. veccpy(x, xp, n) vecadd(x, s, stp, n) # Evaluate the function and gradient values. f = evaluate(x, g, n, stp) dg = K.dot(g, s) ftest1 = finit + stp * dgtest # Increase the number of iterations. count += 1 # Test for errors and convergence. if brackt and (stp <= stmin or stmax <= stp or uinfo != 0): # Rounding errors prevent further progress. return LBFGSResult.ERR_ROUNDING_ERROR, f, stp if stp == param.max_step and f <= ftest1 and dg <= dgtest: # The step is the maximum value. return LBFGSResult.ERR_MAXIMUMSTEP, f, stp if stp == param.min_step and (ftest1 < f or dgtest <= dg): # The step is the minimum value. return LBFGSResult.ERR_MINIMUMSTEP, f, stp if brackt and (stmax - stmin) <= (param.xtol * stmax): # Relative width of the interval of uncertainty is at most xtol. return LBFGSResult.ERR_WIDTHTOOSMALL, f, stp if param.max_linesearch <= count: # Maximum number of iteration. */ return LBFGSResult.ERR_MAXIMUMLINESEARCH, f, stp if f <= ftest1 and K.abs(dg) <= param.gtol * (-dginit): # The sufficient decrease condition and the directional derivative condition hold. return count, f, stp # In the first stage we seek a step for which the modified # function has a nonpositive value and nonnegative derivative. if stage1 and f <= ftest1 and (K.minimum(param.ftol, param.gtol) * dginit) <= dg: stage1 = False # A modified function is used to predict the step only if # we have not obtained a step for which the modified # function has a nonpositive function value and nonnegative # derivative, and if a lower function value has been # obtained but the decrease is not sufficient. if stage1 and ftest1 < f and f <= fx: # Define the modified function and derivative values. fm = f - stp * dgtest fxm = fx - stx * dgtest fym = fy - sty * dgtest dgm = dg - dgtest dgxm = dgx - dgtest dgym = dgy - dgtest # Call update_trial_interval() to update the interval of # uncertainty and to compute the new step. (uinfo, stx, fxm, dgxm, sty, fym, dgym, stp, fm, dgm, brackt) = update_trial_interval(stx, fxm, dgxm, sty, fym, dgym, stp, fm, dgm, stmin, stmax, brackt) # Reset the function and gradient values for f. fx = fxm + stx * dgtest fy = fym + sty * dgtest dgx = dgxm + dgtest dgy = dgym + dgtest else: # Call update_trial_interval() to update the interval of # uncertainty and to compute the new step. (uinfo, stx, fx, dgx, sty, fy, dgy, stp, f, dg, brackt) = update_trial_interval(stx, fx, dgx, sty, fy, dgy, stp, f, dg, stmin, stmax, brackt) # Force a sufficient decrease in the interval of uncertainty. if brackt: if 0.66 * prev_width <= K.abs(sty - stx): stp = stx + 0.5 * (sty - stx) prev_width = width width = K.abs(sty - stx)
def imit_mean_absolute_error(y_true, y_pred): return K.mean(K.abs(y_pred - y_true), axis=-1)
def disag_error(y_true, y_pred): return K.sum(K.square(K.abs(y_pred - y_true)), axis=-1) / 2
def get_siamese_model(input_shape): """ Model architecture based on the one provided in: http://www.cs.utoronto.ca/~gkoch/files/msc-thesis.pdf """ # Define the tensors for the two input images left_input = Input(input_shape) right_input = Input(input_shape) # Convolutional Neural Network model = Sequential() model.add( Conv2D(64, (10, 10), activation='relu', input_shape=input_shape, kernel_initializer=initialize_weights, kernel_regularizer=l2(2e-4))) model.add(MaxPooling2D()) model.add( Conv2D(128, (7, 7), activation='relu', kernel_initializer=initialize_weights, bias_initializer=initialize_bias, kernel_regularizer=l2(2e-4))) model.add(MaxPooling2D()) model.add( Conv2D(128, (4, 4), activation='relu', kernel_initializer=initialize_weights, bias_initializer=initialize_bias, kernel_regularizer=l2(2e-4))) model.add(MaxPooling2D()) model.add( Conv2D(256, (4, 4), activation='relu', kernel_initializer=initialize_weights, bias_initializer=initialize_bias, kernel_regularizer=l2(2e-4))) model.add(Flatten()) model.add( Dense(10, activation='sigmoid', kernel_regularizer=l2(1e-3), kernel_initializer=initialize_weights, bias_initializer=initialize_bias)) # Generate the encodings (feature vectors) for the two images encoded_l = model(left_input) encoded_r = model(right_input) # Add a customized layer to compute the absolute difference between the encodings L1_layer = Lambda(lambda tensors: K.abs(tensors[0] - tensors[1])) L1_distance = L1_layer([encoded_l, encoded_r]) # Add a dense layer with a sigmoid unit to generate the similarity score prediction = Dense(1, activation='sigmoid', bias_initializer=initialize_bias)(L1_distance) # Connect the inputs with the outputs siamese_net = Model(inputs=[left_input, right_input], outputs=prediction) # return the model return siamese_net
def l1_loss(y_true, y_pred): return K.mean(K.abs(y_pred - y_true))
def l1_reg(weight_matrix): return 0.01 * K.sum(K.abs(weight_matrix))
def _g_std(p, epsilon): mu, s = p return mu + K.abs(s) * epsilon
def mean_absolute_error(y_true, y_pred): return K.mean(K.abs(y_pred - y_true), axis=0)[-1]
def sae(y_true, y_pred): return K.sum(K.abs(y_pred - y_true))
def call(self, inputs, **kwargs): outputs = K.clip(K.abs(inputs), K.epsilon(), None) * K.sign(inputs) return outputs
def generator_l1_loss(y_true,y_pred): BATCH_SIZE=10 return K.mean(K.abs(K.flatten(y_pred) - K.flatten(y_true)), axis=-1)
'mixed4': 2., 'mixed5': 1.5, } layer_dict = dict([(layer.name, layer) for layer in model.layers]) loss = K.variable(0.) for layer_name in layer_contributions: coeff = layer_contributions[layer_name] activation = layer_dict[layer_name].output scaling = K.prod(K.cast(K.shape(activation), 'float32')) loss += coeff * K.sum(K.square(activation[:, 2:-2, 2:-2, :])) / scaling dream = model.input grads = K.gradients(loss, dream)[0] grads /= K.maximum(K.mean(K.abs(grads)), 1e-7) outputs = [loss, grads] fetch_loss_and_grads = K.function([dream], outputs) def eval_loss_and_grads(x): outs = fetch_loss_and_grads([x]) loss_value = outs[0] grad_values = outs[1] return loss_value, grad_values def gradient_ascent(x, iterations, step, max_loss=None): for i in range(iterations): loss_value, grad_values = eval_loss_and_grads(x)
settings = { 'features': { 'block4_conv1': 0.08, 'block4_conv2': 0.03, 'block4_conv3': 0.04 }, } for layer_name in settings['features']: coeff = settings['features'][layer_name] x = layer_dict[layer_name].output scaling = K.prod(K.cast(K.shape(x), 'float32')) loss = coeff * K.sum(K.square(x[:, 2: -2, 2: -2, :])) / scaling grads = K.gradients(loss, dream)[0] grads /= K.maximum(K.mean(K.abs(grads)), K.epsilon()) outputs = [loss, grads] fetch_loss_and_grads = K.function([dream], outputs) def eval_loss_and_grads(x): outs = fetch_loss_and_grads([x]) loss_value = outs[0] grad_values = outs[1] return loss_value, grad_values def gradient_ascent(x, iterations, step, max_loss=None): for i in range(iterations): loss_value, grad_values = eval_loss_and_grads(x)
def coeff(y_true, y_pred, smooth, tresh): return K.sqrt(K.sum(K.square(y_true - y_pred) * K.abs(y_true)))
def _g_var(p, epsilon): mu, var = p return mu + K.sqrt(K.abs(var)) * epsilon
def call(self, inputs, **kwargs): return [K.abs(inputs), K.abs(inputs)]
def _g_std_chol(p, epsilon): mu, s, chol = p epsilon = K.batch_dot(epsilon, chol, axes=(1, 2)) return mu + K.abs(s) * epsilon
def __call__(self, x): regularization = self.alpha * K.sum(K.abs(x - self.value)) return regularization
def _g_var_chol(p, epsilon): mu, var, chol = p epsilon = K.batch_dot(epsilon, chol, axes=(1, 2)) return mu + K.sqrt(K.abs(var)) * epsilon
def L1_loss(self, y_true, y_pred): return k.mean(k.abs(y_true - y_pred))
def smooth_l1_loss(y_true, y_pred): diff = K.abs(y_true - y_pred) less_than_one = K.cast(K.less(diff, 1.0), "float32") loss = (less_than_one * 0.5 * diff**2) + (1 - less_than_one) * (diff - 0.5) return loss
def owlqn_x1norm(x, start, n): s = 0.0 for i in np.arange(start, n): s += K.abs(x[i]) return s
def max_error(y_true, y_pred): return K.max(K.abs(y_true - y_pred))
def earth_mover_loss(y_true, y_pred): cdf_ytrue = K.cumsum(y_true, axis=-1) cdf_ypred = K.cumsum(y_pred, axis=-1) samplewise_emd = K.sqrt(K.mean(K.square(K.abs(cdf_ytrue - cdf_ypred)), axis=-1)) return K.mean(samplewise_emd)
def extrap_loss(y_true, y_hat): y_true = y_true[:, 1:] y_hat = y_hat[:, 1:] return 0.5 * K.mean(K.abs(y_true - y_hat), axis=-1)
def update_trial_interval(x, fx, dx, y, fy, dy, t, ft, dt, tmin, tmax, brackt): nx, nfx, ndx, ny, nfy, ndy, nt, nft, ndt, nbrackt = x, fx, dx, y, fy, dy, t, ft, dt, brackt # Check the input parameters for errors. if nbrackt: if t <= K.minimum(x, y) or K.maximum(x, y) <= t: # The trival value t is out of the interval. return LBFGSResult.ERR_OUTOFINTERVAL, nx, nfx, ndx, ny, nfy, ndy, nt, nft, ndt, nbrackt if 0. <= dx * (t - x): # The function must decrease from x. return LBFGSResult.ERR_INCREASEGRADIENT, nx, nfx, ndx, ny, nfy, ndy, nt, nft, ndt, nbrackt if tmax < tmin: # Incorrect tmin and tmax specified. return LBFGSResult.ERR_INCORRECT_TMINMAX, nx, nfx, ndx, ny, nfy, ndy, nt, nft, ndt, nbrackt # flag whether the value is close to upper bound. bound = False # flag whether dt and dx have the same sign. dsign = dt * dx > 0.0 # minimizer of an interpolated cubic. mc = 0.0 # minimizer of an interpolated quadratic. mq = 0.0 # Trial value selection. if fx < ft: # Case 1: a higher function value. # The minimum is brackt. If the cubic minimizer is closer # to x than the quadratic one, the cubic one is taken, else # the average of the minimizers is taken. nbrackt = True bound = True mc = CUBIC_MINIMIZER(x, fx, dx, t, ft, dt) mq = QUARD_MINIMIZER(x, fx, dx, t, ft) if K.abs(mc - x) < K.abs(mq - x): nt = mc else: nt = mc + 0.5 * (mq - mc) elif dsign: # Case 2: a lower function value and derivatives of # opposite sign. The minimum is brackt. If the cubic # minimizer is closer to x than the quadratic (secant) one, # the cubic one is taken, else the quadratic one is taken. nbrackt = True bound = False mc = CUBIC_MINIMIZER(x, fx, dx, t, ft, dt) mq = QUARD_MINIMIZER2(x, dx, t, dt) if K.abs(mc - t) > K.abs(mq - t): nt = mc else: nt = mq elif K.abs(dt) < K.abs(dx): # Case 3: a lower function value, derivatives of the # same sign, and the magnitude of the derivative decreases. # The cubic minimizer is only used if the cubic tends to # infinity in the direction of the minimizer or if the minimum # of the cubic is beyond t. Otherwise the cubic minimizer is # defined to be either tmin or tmax. The quadratic (secant) # minimizer is also computed and if the minimum is brackt # then the the minimizer closest to x is taken, else the one # farthest away is taken. bound = True mc = CUBIC_MINIMIZER2(x, fx, dx, t, ft, dt, tmin, tmax) mq = QUARD_MINIMIZER2(x, dx, t, dt) if nbrackt: if K.abs(t - mc) < K.abs(t - mq): nt = mc else: nt = mq else: if K.abs(t - mc) > K.abs(t - mq): nt = mc else: nt = mq else: # Case 4: a lower function value, derivatives of the # same sign, and the magnitude of the derivative does # not decrease. If the minimum is not brackt, the step # is either tmin or tmax, else the cubic minimizer is taken. bound = False if nbrackt: nt = CUBIC_MINIMIZER(t, ft, dt, y, fy, dy) elif x < t: nt = tmax else: nt = tmin # Update the interval of uncertainty. This update does not # depend on the new step or the case analysis above. # # - Case a: if f(x) < f(t), # x <- x, y <- t. # - Case b: if f(t) <= f(x) && f'(t)*f'(x) > 0, # x <- t, y <- y. # - Case c: if f(t) <= f(x) && f'(t)*f'(x) < 0, # x <- t, y <- x. if fx < ft: # Case a ny = t nfy = ft ndy = dt else: # Case c if dsign: ny = x nfy = fx ndy = dx # Cases b and c nx = t nfx = ft ndx = dt # Clip the new trial value in [tmin, tmax]. nt = K.clip(nt, tmin, tmax) # Redefine the new trial value if it is close to the upper bound # of the interval. if nbrackt and bound: mq = x + 0.66 * (y - x) if x < y: nt = K.minimum(mq, nt) else: nt = K.maximum(mq, nt) return 0, nx, nfx, ndx, ny, nfy, ndy, nt, nft, ndt, nbrackt
def wm_b_box_evaluation_with_output_rounding(step_size, reported_precision, model, watermark, key_embed, grad_select, sample_size, num_pixels, watermark_key_set_x, target_label, num_classes): input_shape = watermark_key_set_x.shape[1:] idx = random.sample(range(watermark_key_set_x.shape[0]), sample_size) grad_x = watermark_key_set_x[idx, ::] y_true = K.placeholder(shape=model.output.shape) model_output = K.placeholder(shape=model.output.shape) cross_ent = K.categorical_crossentropy(y_true, model_output) get_cross_ent = K.function([model_output, y_true], [cross_ent]) estimated_grads = np.zeros((num_pixels, 1)) grad_select_indeces = [] number_of_career_nodes = int(sum(K.get_value(grad_select).tolist())) for index, selected in enumerate(K.get_value(grad_select).tolist()): if selected == 1.0: grad_select_indeces.append(int(index)) start_predicted_probs = model.predict(grad_x) for i, career_node in enumerate(grad_select_indeces): rounded_predicted_probs = np.around(start_predicted_probs, reported_precision) start_cross_ents = get_cross_ent([ rounded_predicted_probs, keras.utils.to_categorical( target_label * np.ones(grad_x.shape[0], ), num_classes) ])[0] grad_x_moved = grad_x + K.get_value( K.reshape(step_size * K.cast(keras.utils.to_categorical( [career_node], num_pixels), dtype=K.floatx()), shape=input_shape)) end_predicted_probs = model.predict(grad_x_moved) rounded_predicted_probs = np.around(end_predicted_probs, reported_precision) end_cross_ents = get_cross_ent([ rounded_predicted_probs, keras.utils.to_categorical( target_label * np.ones(grad_x.shape[0], ), num_classes) ])[0] grads = (end_cross_ents - start_cross_ents) / step_size estimated_grads[career_node] = np.mean(grads) ########################### end_predicted_probs = None end_cross_ents = None rounded_predicted_probs = None start_cross_ents = None print("Memory usage : ", py.memory_info()[0] // (10**6), ' career node [', i, '/', number_of_career_nodes, ']', end='\r') estimated_grads = K.variable(estimated_grads) projection = K.cast( K.reshape(0 <= (K.dot(key_embed, estimated_grads)), watermark.shape), K.floatx()) matching_accuracy = K.get_value(1.00 - K.mean(K.abs(projection - watermark))) # Memory Release estimated_grads = None start_predicted_probs = None end_predicted_probs = None grad_select_indeces = None grad_x = None projection = None print('\nMatching_accuracy : ', matching_accuracy) return matching_accuracy
def ew_maL1(x): return K.abs(x[0] - x[1])
def get_siamese_model(input_shape): """ Model architecture """ # Define the tensors for the two inputs left_input = Input(input_shape) right_input = Input(input_shape) # Convolutional Neural Network model = Sequential() model.add( Conv1D(64, 10, activation='relu', input_shape=input_shape, kernel_initializer=initialize_weights, kernel_regularizer=l2(2e-4))) model.add(MaxPooling1D()) model.add( Conv1D(128, 7, activation='relu', kernel_initializer=initialize_weights, kernel_regularizer=l2(2e-4))) model.add(MaxPooling1D()) model.add( Conv1D(128, 4, activation='relu', kernel_initializer=initialize_weights, kernel_regularizer=l2(2e-4))) model.add(MaxPooling1D()) model.add( Conv1D(256, 4, activation='relu', kernel_initializer=initialize_weights, kernel_regularizer=l2(2e-4))) model.add(MaxPooling1D()) model.add(Flatten()) model.add( Dense(512, activation='sigmoid', kernel_regularizer=l2(1e-3), kernel_initializer=initialize_weights, bias_initializer=initialize_bias)) # Generate the encodings (feature vectors) for the two images encoded_l = model(left_input) encoded_r = model(right_input) # Add a customized layer to compute the absolute difference between the encodings L1_layer = Lambda(lambda tensors: K.abs(tensors[0] - tensors[1])) L1_distance = L1_layer([encoded_l, encoded_r]) # Add a dense layer with a sigmoid unit to generate the similarity score prediction = Dense(1, activation='sigmoid', bias_initializer=initialize_bias)(L1_distance) # Connect the inputs with the outputs siamese_net = Model(inputs=[left_input, right_input], outputs=prediction) # return the model optimizer = Adam(lr=0.00006) siamese_net.compile(loss="binary_crossentropy", optimizer=optimizer) return siamese_net