def test_get_img_shape_on_2d_image(): n = 5 channels = 4 dim1 = 1 dim2 = 2 K.set_image_data_format('channels_first') assert (n, channels, dim1, dim2) == utils.get_img_shape(K.ones(shape=(n, channels, dim1, dim2))) K.set_image_data_format('channels_last') assert (n, channels, dim1, dim2) == utils.get_img_shape(K.ones(shape=(n, dim1, dim2, channels)))
def build(self): input_dim = self.input_shape[2] self.W = self.init((input_dim, self.output_dim), name='{}_W'.format(self.name)) self.b = K.ones((self.output_dim,), name='{}_b'.format(self.name)) #self.b = K.zeros((self.output_dim,), name='{}_b'.format(self.name)) self.trainable_weights = [self.W, self.b] self.regularizers = [] if self.W_regularizer: self.W_regularizer.set_param(self.W) self.regularizers.append(self.W_regularizer) if self.b_regularizer: self.b_regularizer.set_param(self.b) self.regularizers.append(self.b_regularizer) if self.activity_regularizer: self.activity_regularizer.set_layer(self) self.regularizers.append(self.activity_regularizer) if self.initial_weights is not None: self.set_weights(self.initial_weights) del self.initial_weights
def test_dropout(layer_class): for unroll in [True, False]: layer_test(layer_class, kwargs={'units': units, 'dropout': 0.1, 'recurrent_dropout': 0.1, 'unroll': unroll}, input_shape=(num_samples, timesteps, embedding_dim)) # Test that dropout is applied during training x = K.ones((num_samples, timesteps, embedding_dim)) layer = layer_class(units, dropout=0.5, recurrent_dropout=0.5, input_shape=(timesteps, embedding_dim)) y = layer(x) assert y._uses_learning_phase y = layer(x, training=True) assert not getattr(y, '_uses_learning_phase') # Test that dropout is not applied during testing x = np.random.random((num_samples, timesteps, embedding_dim)) layer = layer_class(units, dropout=0.5, recurrent_dropout=0.5, unroll=unroll, input_shape=(timesteps, embedding_dim)) model = Sequential([layer]) assert model.uses_learning_phase y1 = model.predict(x) y2 = model.predict(x) assert_allclose(y1, y2)
def get_output(self, train=False): print "LogNormalizedOccupancy", self.output_shape X = self.get_input(train) # calculate the log occupancies log_occs = theano_calc_log_occs(-X, self.chem_affinity) # reshape the output so that the forward and reverse complement # occupancies are viewed as different tracks log_occs = K.reshape(log_occs, (X.shape[0], 1, 2*X.shape[1], X.shape[3])) if self.steric_hindrance_win_len == 0: log_norm_factor = 0 else: # correct occupancies for overlapping binding sites occs = K.exp(log_occs) kernel = K.ones((1, 1, 1, 2*self.steric_hindrance_win_len-1), dtype='float32') win_occ_sum = K.conv2d(occs, kernel, border_mode='same').sum(axis=2, keepdims=True) win_prb_all_unbnd = TT.exp( K.conv2d(K.log(1-occs), kernel, border_mode='same')).sum(axis=2, keepdims=True) log_norm_factor = TT.log(win_occ_sum + win_prb_all_unbnd) #start = max(0, self.steric_hindrance_win_len-1) #stop = min(self.output_shape[3], # self.output_shape[3]-(self.steric_hindrance_win_len-1)) #rv = log_occs[:,:,:,start:stop] - log_norm_factor rv = (log_occs - log_norm_factor) return K.reshape( rv, (X.shape[0], 2*X.shape[1], 1, X.shape[3]) )
def residual_drop(x, input_shape, output_shape, strides=(1, 1)): global add_tables nb_filter = output_shape[0] conv = Convolution2D(nb_filter, 3, 3, subsample=strides, border_mode="same")(x) conv = BatchNormalization(axis=1)(conv) conv = Activation("relu")(conv) conv = Convolution2D(nb_filter, 3, 3, border_mode="same")(conv) conv = BatchNormalization(axis=1)(conv) if strides[0] >= 2: x = AveragePooling2D(strides)(x) if (output_shape[0] - input_shape[0]) > 0: pad_shape = (1, output_shape[0] - input_shape[0], output_shape[1], output_shape[2]) padding = K.ones(pad_shape) padding = K.repeat_elements(padding, K.shape(x)[0], axis=0) x = Lambda(lambda y: K.concatenate([y, padding], axis=1), output_shape=output_shape)(x) _death_rate = K.variable(death_rate) scale = K.ones_like(conv) - _death_rate conv = Lambda(lambda c: K.in_test_phase(scale * c, c), output_shape=output_shape)(conv) out = merge([conv, x], mode="sum") out = Activation("relu")(out) gate = K.variable(1, dtype="uint8") add_tables += [{"death_rate": _death_rate, "gate": gate}] return Lambda(lambda tensors: K.switch(gate, tensors[0], tensors[1]), output_shape=output_shape)([out, x])
def build(self, input_shape): super(LSTM_LN, self).build(input_shape) self.gs, self.bs = [], [] for i in xrange(3): f = 1 if i == 2 else 4 self.gs += [ K.ones((f*self.output_dim,), name='{}_g%i'.format(self.name, i)) ] self.bs += [ K.zeros((f*self.output_dim,), name='{}_b%d'.format(self.name, i)) ] self.trainable_weights += self.gs + self.bs
def call(self, inputs, training=None): input_shape = K.int_shape(inputs) broadcast_shape = [1] * len(input_shape) broadcast_shape[self.axis] = input_shape[self.axis] broadcast_moving_mean = K.reshape(self.moving_mean, broadcast_shape) broadcast_moving_variance = K.reshape(self.moving_variance, broadcast_shape) broadcast_gamma = K.reshape(self.gamma, broadcast_shape) broadcast_beta = K.reshape(self.beta, broadcast_shape) invstd = K.ones (shape=broadcast_shape, dtype='float32') / K.sqrt(broadcast_moving_variance + K.constant(self.epsilon, dtype='float32')) return (inputs - broadcast_moving_mean) * invstd * broadcast_gamma + broadcast_beta
def tversky_loss(y_true, y_pred): alpha = 0.5 beta = 0.5 ones = K.ones(K.shape(y_true)) p0 = y_pred # proba that voxels are class i p1 = ones-y_pred # proba that voxels are not class i g0 = y_true g1 = ones-y_true num = K.sum(p0*g0, (0,1,2)) den = num + alpha*K.sum(p0*g1,(0,1,2)) + beta*K.sum(p1*g0,(0,1,2)) T = K.sum(num/den) # when summing over classes, T has dynamic range [0 Ncl] Ncl = K.cast(K.shape(y_true)[-1], 'float32') return Ncl-T
def call(self, x, mask=None): if 0. < self.dropout < 1.: retain_p = 1. - self.dropout B = K.random_binomial( (self.input_dim, ), p=retain_p) * (1. / retain_p) B = K.expand_dims(B) W = K.in_train_phase(self.W * B, self.W) else: W = self.W M = K.concatenate([K.zeros( (1, )), K.ones((self.input_dim - 1, ))], axis=0) M = K.expand_dims(M) out = K.gather(W * M, x) return out
def call(self, x): # # x[i] -> x[i] + sum(k, Corner(k, i) * x[k] * prod(j, j!=k, (1-x[j]))) # # x: [mb, n] # w = [mb, n, 1] dot [1, n] # R: [mb, n, n] R = K.dot(1.0 - x[..., None], K.ones( (1, self.dim))) # [mb, n, 1], [1, n] -> [mb, n, n] diagonal = K.eye(self.dim)[None, ...] R = R * ( 1.0 - diagonal ) + diagonal # R[mb, i, j] = { 1 - x[mb, i] if i != j, else 1 } Q = x * K.prod(R, axis=1) return x + K.dot(Q, self.corners)
def loss_tv(self, mask, y_comp): """Total variation loss, used for smoothing the hole region, see. eq. 6""" # Create dilated hole region using a 3x3 kernel of all 1s. kernel = K.ones(shape=(3, 3, mask.shape[3], mask.shape[3])) dilated_mask = K.conv2d(1-mask, kernel, data_format='channels_last', padding='same') # Cast values to be [0., 1.], and compute dilated hole region of y_comp dilated_mask = K.cast(K.greater(dilated_mask, 0), 'float32') P = dilated_mask * y_comp # Calculate total variation loss a = self.l1(P[:,1:,:,:], P[:,:-1,:,:]) b = self.l1(P[:,:,1:,:], P[:,:,:-1,:]) return a+b
def _flat_lrp(layer, R, parameter): ''' Distribute relevance for each output evenly to the output neurons' receptive fields. ''' print('_maxpooling3d_flat_lrp') Z = K.ones((layer.pool_size[0], layer.pool_size[1], layer.pool_size[2], 1), dtype=K.floatx()) Zs = K.sum(Z, axis=[0, 1, 2], keepdims=True) result = (Z / Zs) * K.reshape( R, (-1, layer.output_shape[1], layer.output_shape[2], layer.output_shape[3], 1, 1, 1, layer.output_shape[4])) return patches.restitch_volume_patches(result, layer.input_shape, layer.pool_size, layer.strides, layer.padding)
def _generate_controller_dropout_mask(self, inputs, training=None): if 0 < self.controller_dropout < 1: ones = K.ones((self.units, self.memory.shape[0])) def dropped_inputs(): return K.dropout(ones, self.dropout) self._controller_dropout_mask = [ K.in_train_phase(dropped_inputs, ones, training=training) for _ in range(3) ] else: self._controller_dropout_mask = None
def call(self, x): # get new mean and count this_bs_int = K.shape(x)[0] new_mean, new_count = _mean_update(self.mean, self.count, x, self.cap) # update op updates = [(self.count, new_count), (self.mean, new_mean)] self.add_update(updates, x) # prep for broadcasting :( p = tf.concat((K.reshape(this_bs_int, (1,)), K.shape(self.mean)), 0) z = K.ones(p) # the first few 1000 should not matter that much towards this cost return K.minimum(1., new_count / self.cap) * (z * K.expand_dims(new_mean, 0))
def get_attention_decoder_mask(length): """Calculate bias for decoder that maintains model's autoregressive property. Creates a tensor that masks out locations that correspond to illegal connections, so prediction at position i cannot draw information from future positions. Args: length: int length of sequences in batch. Returns: float tensor of shape [1, 1, length, length] """ _NEG_INF = -1e9 valid_locs = K.tf.matrix_band_part(K.ones((length, length)), -1, 0) valid_locs = K.reshape(valid_locs, (1, 1, length, length)) decoder_bias = _NEG_INF * (1.0 - valid_locs) return decoder_bias
def build(self, input_shape): ''' As a custom layer with mask is implemented it is necessary to assert, that the input is a list of [img,mask]. The build function is to define the weights and biases that would be used in the Pconv layer''' assert isinstance(input_shape, list) # Create a trainable weight variable for this layer. if self.data_format == 'channels_first': n = 1 else: n = -1 n_channels = input_shape[0][n] #Custom made kernel for image convolutions kernel_shape = self.kernel_size + (n_channels, self.filters) self.kernel = self.add_weight( name='kernel', shape=kernel_shape, initializer=self.kernel_initializer, regularizer=self.kernel_regularizer, trainable=True, ) #Custom made kernel for convolutions on mask self.kernel_mask = K.ones(shape=kernel_shape, name='mask-kernel') #this is done to ensure that the output shape is obtained as expected self.pconv_padding = ( (int((self.kernel_size[0] - 1) / 2), int((self.kernel_size[0] - 1) / 2)), (int((self.kernel_size[0] - 1) / 2), int((self.kernel_size[0] - 1) / 2)), ) # Window size - used for normalization self.window_size = self.kernel_size[0] * self.kernel_size[1] #bias to add after the custom task in completed if self.use_bias: self.bias = self.add_weight(shape=(self.filters, ), initializer=self.bias_initializer, name='bias', regularizer=self.bias_regularizer, constraint=self.bias_constraint) #super(Pconv, self).build(input_shape) self.built = True
def get_updates(self, params, loss): grads = self.get_gradients(loss, params) shapes = [K.get_variable_shape(p) for p in params] old_grads = [K.zeros(shape) for shape in shapes] zetas = [K.ones(shape) for shape in shapes] zs = [K.zeros(shape) for shape in shapes] thetas = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] # + thetas # prev_weight_deltas = [K.zeros(shape) for shape in shapes] # self.weights = delta_ws + old_grads # TODO: understand self.weights self.updates = [] for param, grad, old_grad, zeta, z, theta in zip( params, grads, old_grads, zetas, zs, thetas): # Line 4 to 8 new_zeta = K.switch( K.greater(grad * old_grad, 0), K.minimum(zeta * self.eta_pos, self.zeta_max), K.switch(K.less(grad * old_grad, 0), K.maximum(zeta * self.eta_neg, self.zeta_min), zeta) ) # note that I added a 'if gradient = 0 then zeta' condition # Line 9 new_z = self.alpha * z + (1 - self.alpha) * new_zeta # Line 10 new_theta = self.alpha_b * theta + (1 - self.alpha_b) * K.square(grad) # Line 11 weight_delta = -self.lr / new_z * grad #/ (K.pow(new_theta, self.theta_pow) + 1e-11) # added epsilon to prevent zero division # weight_delta = -self.lr * (new_zeta/new_z) * grad # * (1 / K.sqrt(new_theta + 1e-11)) # added epsilon to prevent zero division # TODO: Figure this out! It seems like the theta part in particular seems to be breaking the calculation # Also, it seems like we should be taking the sign of grad rather than multiplying it directly. # weight_delta = -new_z * (grad/new_theta) # Line 12 new_param = param + weight_delta # Apply constraints #if param in constraints: # c = constraints[param] # new_param = c(new_param) self.updates.append(K.update(param, new_param)) self.updates.append(K.update(zeta, new_zeta)) self.updates.append(K.update(old_grad, grad)) self.updates.append(K.update(z, new_z)) self.updates.append(K.update(theta, new_theta)) return self.updates
def call(self, x): assert K.ndim( x) == 4, 'Should only call KP layer on input_shape (batches,H,W,C)' input_dims = K.shape(x) # zeroth and first order terms zeroth = self.alpha[0] * K.ones((input_dims[0], 1)) first = self.alpha[1] * tf.reduce_mean(x, axis=[1, 2]) # flatten to feature vectors x_flat = K.reshape(x, (-1, self.C)) # Compute the Count Sketches C_t over feature vectors sketches = [] for t in range(self.p): sketches.append( tf.transpose( tf.sparse_tensor_dense_matmul(self.sketch_matrices[t], x_flat, adjoint_a=True, adjoint_b=True))) # stack and reshape [(b*h*w, d_i)], len=p --> (b, h*w, p, d_i) x_sketches = K.reshape(K.stack(sketches, axis=-2), (input_dims[0], -1, self.p, self.d)) # Compute fft (operates on inner-most axis) x_fft = _fft( tf.complex(real=x_sketches, imag=K.zeros_like(x_sketches)), False, 128) # Cumulative product along order dimension, discard first order x_fft_cp = K.cumprod(x_fft, axis=-2)[:, :, 1:, :] # Inverse fft, avg pool over spatial locations x_ifft = tf.reduce_mean(tf.real(_ifft(x_fft_cp, False, 128)), axis=1) # Apply weights over orders p >= 2 x_p = x_ifft * K.reshape(self.alpha[2:], (1, self.p - 1, 1)) # Concatenate to full order-p kernel approximation vector phi_x = K.concatenate( [zeroth, first, K.reshape(x_p, (input_dims[0], -1))]) # Return the transformed + l2-normed kernel vector phi_x = tf.multiply(tf.sign(phi_x), tf.sqrt(tf.abs(phi_x) + 1e-12)) return tf.nn.l2_normalize(phi_x, axis=-1)
def build(self, input_shape): """Adapted from original _Conv() layer of Keras param input_shape: list of dimensions for [img, mask] """ if self.data_format == 'channels_first': channel_axis = 1 else: channel_axis = -1 if input_shape[0][channel_axis] is None: raise ValueError( 'The channel dimension of the inputs should be defined. Found `None`.' ) self.input_dim = input_shape[0][channel_axis] # Image kernel kernel_shape = self.kernel_size + (self.input_dim, self.filters) self.kernel = self.add_weight(shape=kernel_shape, initializer=self.kernel_initializer, name='img_kernel', regularizer=self.kernel_regularizer, constraint=self.kernel_constraint) # Mask kernel self.kernel_mask = K.ones(shape=self.kernel_size + (self.input_dim, self.filters)) # Calculate padding size to achieve zero-padding self.pconv_padding = ( (int((self.kernel_size[0] - 1) / 2), int((self.kernel_size[0] - 1) / 2)), (int((self.kernel_size[0] - 1) / 2), int((self.kernel_size[0] - 1) / 2)), ) # Window size - used for normalization self.window_size = self.kernel_size[0] * self.kernel_size[1] if self.use_bias: self.bias = self.add_weight(shape=(self.filters, ), initializer=self.bias_initializer, name='bias', regularizer=self.bias_regularizer, constraint=self.bias_constraint) else: self.bias = None self.built = True
def call(self, inputs): _, kernel_b = xnorize(self.kernel, self.H) _, inputs_b = xnorize(inputs) outputs = K.conv2d(inputs_b, kernel_b, strides=self.strides, padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate) # calculate Wa and xa # kernel_a mask = K.reshape( self.kernel, (-1, self.filters)) # self.nb_row * self.nb_col * channels, filters kernel_a = K.stop_gradient(K.mean(K.abs(mask), axis=0)) # filters # inputs_a if self.data_format == 'channels_first': channel_axis = 1 else: channel_axis = -1 mask = K.mean(K.abs(inputs), axis=channel_axis, keepdims=True) ones = K.ones(self.kernel_size + (1, 1)) inputs_a = K.conv2d(mask, ones, strides=self.strides, padding=self.padding, data_format=self.data_format, dilation_rate=self.dilation_rate ) # nb_sample, 1, new_nb_row, new_nb_col if self.data_format == 'channels_first': outputs = outputs * K.stop_gradient(inputs_a) * K.expand_dims( K.expand_dims(K.expand_dims(kernel_a, 0), -1), -1) else: outputs = outputs * K.stop_gradient(inputs_a) * K.expand_dims( K.expand_dims(K.expand_dims(kernel_a, 0), 0), 0) if self.use_bias: outputs = K.bias_add(outputs, self.bias, data_format=self.data_format) if self.activation is not None: return self.activation(outputs) return outputs
def call(self, inputs, **kwargs): input_shape = K.int_shape(inputs) broadcast_shape = [1] * len(input_shape) broadcast_shape[self.axis] = input_shape[self.axis] broadcast_moving_mean = K.reshape(self.moving_mean, broadcast_shape) broadcast_moving_variance = K.reshape(self.moving_variance, broadcast_shape) broadcast_gamma = K.reshape(self.gamma, broadcast_shape) broadcast_beta = K.reshape(self.beta, broadcast_shape) invstd = (K.ones(shape=broadcast_shape, dtype='float32') / K.sqrt(broadcast_moving_variance + self._epsilon_const)) return ((inputs - broadcast_moving_mean) * invstd * broadcast_gamma + broadcast_beta)
def test_unpickle_custom_loss(tmpdir): filename = os.path.join(tmpdir.dirname, 'custom_loss.pickle') misc.pickle_custom_loss(utility.mean_absolute_error_keras, filename) loss = misc.unpickle_custom_loss(filename) y_true = K.ones((10, 20)) y_pred = K.zeros((10, 20)) assert K.eval(utility.mean_absolute_error_keras(y_true, y_pred)) == K.eval( loss(y_true, y_pred)) assert loss.__name__ == 'mean_absolute_error_keras' os.remove(filename)
def _tversky(y_true, y_pred, tv_alpha, tv_beta, classes_weight): # tversky loss for single class smooth = 1e-10 ones = K.ones(K.shape(y_true)) tp = tf.reduce_sum(y_true * y_pred, axis=[0, 1]) fp = y_pred * (ones - y_true) fn = (ones - y_pred) * y_true fp_and_fn = tv_alpha * tf.reduce_sum(fp, axis=[0, 1]) + tv_beta * tf.reduce_sum(fn, axis=[0, 1]) num = tp den = tp + fp_and_fn + smooth tversky = tf.reduce_sum(num / den * classes_weight) return 1 - tversky
def get_weightnorm_params_and_grads(p, g): ps = K.get_variable_shape(p) # construct weight scaler: V_scaler = g/||V|| V_scaler_shape = (ps[-1],) # assumes we're using tensorflow! V_scaler = K.ones(V_scaler_shape) # init to ones, so effective parameters don't change # get V parameters = ||V||/g * W norm_axes = [i for i in range(len(ps) - 1)] V = p / tf.reshape(V_scaler, [1] * len(norm_axes) + [-1]) # split V_scaler into ||V|| and g parameters V_norm = tf.sqrt(tf.reduce_sum(tf.square(V), norm_axes)) g_param = V_scaler * V_norm # get grad in V,g parameters grad_g = tf.reduce_sum(g * V, norm_axes) / V_norm grad_V = tf.reshape(V_scaler, [1] * len(norm_axes) + [-1]) * \ (g - tf.reshape(grad_g / V_norm, [1] * len(norm_axes) + [-1]) * V) return V, V_norm, V_scaler, g_param, grad_g, grad_V
def joint_opt_loss(soft_labels, preds, alpha=0.8, beta=0.4): # introduce prior prob distribution p p = K.ones(9) / 9 prob = tf.nn.softmax(preds, dim=1) prob_avg = K.mean(prob, axis=0) # ignore constant # L_c = -K.mean(K.sum(soft_labels * tf.nn.log_softmax(preds, dim=1), axis=-1)) L_c = K.categorical_crossentropy(soft_labels, preds) L_p = -K.sum(K.log(prob_avg) * p) L_e = -K.mean(K.sum(prob * tf.nn.log_softmax(preds, dim=1), axis=-1)) loss = L_c + alpha * L_p + beta * L_e return loss
def call(self, inputs): _, kernel_i = integerize(self.kernel, self.bits) _, inputs_i = integerize(inputs, self.bits) if self.data_format == 'channels_last': data_format = 'NHWC' channel_axis = -1 else: data_format = 'NCHW' channel_axis = 1 outputs = K.conv2d(inputs_i, kernel_i, strides=self.strides, padding=self.padding, data_format=self.data_format) kernel_m = K.reshape(self.kernel, (-1, self.filters)) kernel_k = K.stop_gradient( K.max(K.abs(kernel_m), axis=0) / 2**self.bits) inputs_m = K.max(K.abs(inputs), axis=channel_axis, keepdims=True) / 2**self.bits ones = K.ones(self.kernel_size + (1, 1)) inputs_k = K.conv2d(inputs_m, ones, strides=self.strides, padding=self.padding, data_format=self.data_format) if self.data_format == 'channels_first': outputs = outputs * \ K.stop_gradient(inputs_k) * \ K.expand_dims(K.expand_dims(K.expand_dims(kernel_k, 0), -1), -1) else: outputs = outputs * \ K.stop_gradient(inputs_k) * \ K.expand_dims(K.expand_dims(K.expand_dims(kernel_k, 0), 0), 0) if self.use_bias: outputs = K.bias_add(outputs, self.bias, data_format=self.data_format) if self.activation is not None: return self.activation(outputs) return outputs
def get_updates(self, params, loss, contraints=None): self.updates = [K.update_add(self.iterations, 1)] grads = self.get_gradients(loss, params) shapes = [K.int_shape(p) for p in params] old_grads = [K.zeros(shape) for shape in shapes] weights = [K.zeros(shape) for shape in shapes] # Learning Rate learning_rate = self.learning_rate if self.initial_decay > 0: learning_rate *= (1. / (1. + self.decay * self.iterations)) t = self.iterations + 1 # Line 2 - initialise current weights zeta = [K.ones(shape) for shape in shapes] Z = [K.zeros(shape) for shape in shapes] theta = [K.zeros(shape) for shape in shapes] for p, g, w, expMA, prevZ, prevTheta, old_g in zip(params, grads, weights, zeta, Z, theta, old_grads): change = g * old_g pos_change = K.greater(change,0.) neg_change = K.less(change,0.) # Line 3-8: For all t in [1..t] do the following zeta_t = K.switch(pos_change, K.minimum(expMA * self.eta_plus, self.zeta_max), K.switch(neg_change, K.maximum(expmA * self.eta_minus, self.zeta_min), expMA)) zeta_clip = K.clip(zeta_t, self.zeta_min, self.zeta_max) # Lines 9-12: Update weights for t with amendments as proposed for line 11 Z_t = (self.alpha * prevZ) + ((1 - self.alpha) * zeta_t) theta_t = (self.alpha * prevTheta) + ((1 - self.alpha) * K.square(g)) wChange = - (learning_rate * (zeta_clip /zeta_t) * g) / K.sqrt(theta_t + self.epsilon) new_weight = w + wChange p_update = p - w + new_weight self.updates.append(K.update(p,p_update)) self.updates.append(K.update(w,new_weight)) self.updates.append(K.update(expMA,zeta_t)) self.updates.append(K.update(prevZ,Z_t)) self.updates.append(K.update(prevTheta,theta_t)) return self.updates
def log_pz(y_true, y_pred): # Calculates the log liklihood that the sampled 'z' is under the unit gaussian distributions #, then weights the unsupervised samples according to how likely their asscociated y value was. # (as predicted by p(y|x)) flat_y_un = K.reshape(y_un, shape=[-1]) ones = K.ones(shape=(BATCH_SIZE // 2)) weights = K.concatenate([ones, flat_y_un], 0) loss_per_point = weights * K.mean(unit_gaussian_ll(q_z__y_x_output), axis=1) split = tf.split(loss_per_point, num_or_size_splits=CLASSES + 1, axis=0) sup_loss = split[0] un = K.concatenate(split[1:]) un_loss = K.sum(K.reshape(un, [BATCH_SIZE // 2, CLASSES]), axis=1) loss = K.concatenate([sup_loss, un_loss]) return loss
def build(self, input_shape): self.input_spec = [InputSpec(shape=input_shape)] shape = (input_shape[self.axis],) self.gamma = self.gamma_init(shape, name='{}_gamma'.format(self.name)) self.beta = self.beta_init(shape, name='{}_beta'.format(self.name)) self.trainable_weights = [self.gamma, self.beta] self.running_mean = K.zeros(shape, name='{}_running_mean'.format(self.name)) self.running_std = K.ones(shape, name='{}_running_std'.format(self.name)) self.non_trainable_weights = [self.running_mean, self.running_std] if self.initial_weights is not None: self.set_weights(self.initial_weights) del self.initial_weights
def sinc(band, t_right): debug_print("sinc") debug_print(" band", band) debug_print(" t_right", t_right) y_right = K.sin( 2 * math.pi * band * t_right) / (2 * math.pi * band * t_right) debug_print(" y_right", y_right) #y_left = flip(y_right, 0) TODO remove if useless y_left = K.reverse(y_right, 0) debug_print(" y_left", y_left) y = K.concatenate([y_left, K.variable(K.ones(1)), y_right]) debug_print(" y", y) return y
def weight_loss(y_true, y_pred, class_weight=None): output = K.binary_crossentropy(y_true, y_pred) if class_weight is not None: output *= class_weight y_p = K.sum(y_true, axis=1) y_n = K.sum(K.cast(K.equal(y_true, 0.), K.floatx()), axis=1) y_n, y_p = K.reshape(y_n, (-1, 1)), K.reshape(y_p, (-1, 1)) one = K.ones((1, K.shape(y_true)[1])) y_p *= one y_n *= one output *= (y_true * y_n + (1.0 - y_true) * y_p) / (y_n + y_p) output = K.mean(output, axis=1) return output
def log_px(y_true, y_pred): # Calculates the log liklihood that the true images is under the gaussian distributions predicted by # p(x|a,y,z), then weights the unsupervised sampled according to how likely their asscociated y value was #(as predicted by p(y|a,x)) flat_y_un = K.reshape(y_un, shape=[-1]) ones = K.ones(shape=((BATCH_SIZE // 2))) weights = K.concatenate([ones, flat_y_un], 0) loss_per_point = -weights * keras.losses.binary_crossentropy( rep_img_input, p_x__y_z_mean) split = tf.split(loss_per_point, num_or_size_splits=CLASSES + 1, axis=0) sup_loss = split[0] un = K.concatenate(split[1:]) un_loss = K.sum(K.reshape(un, [BATCH_SIZE // 2, CLASSES]), axis=1) loss = K.concatenate([sup_loss, un_loss]) return loss
def tversky_loss(y_true, y_pred): alpha, beta = 0.5, 0.5 ones = K.ones(K.shape(y_true)) p0 = y_pred p1 = ones - y_pred g0 = y_true g1 = ones - y_true num = K.sum(p0 * g0, (0, 1, 2)) den = num + alpha * K.sum(p0 * g1, (0, 1, 2)) + beta * K.sum(p1 * g0, (0, 1, 2)) T = K.sum(num / den) Ncl = K.cast(K.shape(y_true)[-1], 'float32') return Ncl - T
def call(self, inputs, training=None): input_shape = K.int_shape(inputs) broadcast_shape = [1] * len(input_shape) broadcast_shape[self.axis] = input_shape[self.axis] broadcast_moving_mean = K.reshape(self.moving_mean, broadcast_shape) broadcast_moving_variance = K.reshape(self.moving_variance, broadcast_shape) broadcast_gamma = K.reshape(self.gamma, broadcast_shape) broadcast_beta = K.reshape(self.beta, broadcast_shape) invstd = K.ones(shape=broadcast_shape, dtype='float32') / K.sqrt( broadcast_moving_variance + K.constant(self.epsilon, dtype='float32')) return (inputs - broadcast_moving_mean ) * invstd * broadcast_gamma + broadcast_beta
def call(self, inputs, mask=None): features = inputs[0] A = inputs[ 1:] # list of basis functions; features:X, A: adjacent (could also be introducing the parameter c); paper 2.2 # convolve supports = list() for i in range(self.support): if not self.featureless: supports.append(K.dot(A[i], features)) # features and adjacent (A) else: supports.append(A[i]) # otherwise just A supports = K.concatenate(supports, axis=1) if self.num_bases > 0: self.W = K.reshape( self.W, (self.num_bases, self.input_dim, self.output_dim)) self.W = K.permute_dimensions(self.W, (1, 0, 2)) V = K.dot(self.W_comp, self.W) # W_comp is the base, W is the coefficients V = K.reshape(V, (self.support * self.input_dim, self.output_dim)) output = K.dot(supports, V) # supports * V = (1/c * h) * W else: output = K.dot(supports, self.W) # print (supports.shape) # (?, 752) # print # self.W dim [1111268,2] # if featureless add dropout to output, by elementwise multiplying with column vector of ones, # with dropout applied to the vector of ones. if self.featureless: tmp = K.ones(self.num_nodes) tmp_do = Dropout(self.dropout)(tmp) output = (output.T * tmp_do).T ''' tmp = K.ones((self.num_nodes,)) tmp_do = Dropout(self.dropout)(tmp) output = K.transpose(K.transpose(output) * tmp_do) ''' if self.bias: output += self.b return self.activation(output) # sigma
def _compute_valid_seed_region(self, seq_length): positions = K.arange(seq_length) half_block_size = self.block_size // 2 valid_seed_region = K.switch( K.all( K.stack( [ positions >= half_block_size, positions < seq_length - half_block_size, ], axis=-1, ), axis=-1, ), K.ones((seq_length, )), K.zeros((seq_length, )), ) return K.expand_dims(K.expand_dims(valid_seed_region, axis=0), axis=-1)
def tverskyMeanW(y_true, y_pred, t0=0.99992, t1=0.00002, t2=0.00006): alpha = 0.5 beta = 0.5 ones = K.ones(K.shape(y_true)) p0 = y_pred # proba that voxels are class i p1 = ones - y_pred # proba that voxels are not class g0 = y_true g1 = ones - y_true num = K.sum((p0 * g0)[:, :, :, 0] * t0 + (p0 * g0)[:, :, :, 1] * t1 + (p0 * g0)[:, :, :, 2] * t2) den = num + alpha * K.sum( (p0 * g1)[:, :, :, 0] * t0 + (p0 * g1)[:, :, :, 1] * t1 + (p0 * g1)[:, :, :, 2] * t2) + beta * K.sum((p1 * g0)[:, :, :, 0] * t0 + (p1 * g0)[:, :, :, 1] * t1 + (p1 * g0)[:, :, :, 2] * t2) return K.sum(num / den)
def call(self, inputs, mask=None): #print(inputs * inputs) one_kernel = K.ones((self.kernelSize[0], self.kernelSize[0], self.nInputPlane, self.nOutputPlane)) inputs_norm = K.conv2d(inputs * inputs, one_kernel, strides=self.strides, padding=self.padding) inputs_norm = K.sqrt(inputs_norm) #print(inputs_norm) conv = K.conv2d(inputs, self.kernelWeights, strides=self.strides, padding=self.padding) #print("+++", conv / ( inputs_norm * K.sqrt(K.sum(self.kernelWeights*self.kernelWeights)))) #print(K.sqrt(K.sum(self.kernelWeights*self.kernelWeights))) return self.alpha * conv / (inputs_norm * K.sqrt( K.sum(self.kernelWeights * self.kernelWeights)))
def get_output_mask(self, train=False): """ Shift the mask the mask is (nb_samples, nb_timesteps) with a one for every unmasked datapoint, and a zero for every masked one """ if K._BACKEND == "tensorflow": raise Exception("Masking is Theano-only for the time being.") if train: input_mask = self.get_input_mask(train) else: input_mask = self.sequence_layer.get_output_mask(train) if not input_mask: return None head = K.ones((K.shape(input_mask)[0], 1)) output_mask = K.concatenate((head, input_mask[:, :-1]), axis=1) return output_mask
def get_output(self, train=False): X = self.get_input(train) retain_p = 1. - self.dropout if train and self.dropout > 0: B = K.random_binomial((self.input_dim,), p=retain_p) else: B = K.ones((self.input_dim)) * retain_p # we zero-out rows of W at random Xs = K.cast(K.reshape(X, (-1, self.nb_words)), 'int32') # (samples*input_length, nb_words, dim) out = K.gather(self.W * K.expand_dims(B), Xs) out = K.reshape(out, (-1, self.input_length, self.nb_words, self.output_dim)) # (samples, input_length, nb_words, dim) out = out * K.expand_dims(K.not_equal(X, 0), dim=-1) if self.bow_mode == "bow": out = K.sum(out, axis=2) return out
def kl_loss(self, _, y_pred): mean = y_pred[:,:,:,0:2] log_sigma = y_pred[:,:,:,2:] # compute the degree matrix. If this has already happened # should only compute this once! # also need to check that this works! # z = K.ones((1, ) + vol_size + (3, )) sz = log_sigma.get_shape().as_list()[1:] z = K.ones([1] + sz) filt = np.zeros((3,3,2,2)) for i in range(2): filt[[0,2],1,i,i] = 1 filt[1,[0,2],i,i] = 1 filt_tf = tf.convert_to_tensor(filt,dtype=tf.float32) D = tf.nn.conv2d(z, filt_tf, [1,1,1,1],"SAME") D = K.expand_dims(D, 0) sigma_terms = (self.alpha * D * tf.exp(log_sigma) - log_sigma) prec_terms = 0.5 * self.alpha * self.kl_prec_term_manual(_, mean) # note needs 0.5 twice, one here, one below kl = 0.5 * tf.reduce_mean(sigma_terms,[1,2]) + 0.5 * prec_terms return kl
def true_negative_rate(y_true, y_pred): y_true = K.ones((32,))-y_true y_pred = K.ones((32,))-y_pred return true_positive_rate(y_true, y_pred,mode='n')
def no_attention_control(args): x, dense_2 = args find_att = K.ones(shape=(1, 32, 15, 15)) return find_att
def call(self, inputs, training=None, mask=None): input_shape = K.shape(inputs) if self.rank == 1: input_shape = [input_shape[i] for i in range(3)] batch_shape, dim, channels = input_shape xx_range = K.tile(K.expand_dims(K.arange(0, dim), axis=0), K.stack([batch_shape, 1])) xx_range = K.expand_dims(xx_range, axis=-1) xx_channels = K.cast(xx_range, K.floatx()) xx_channels = xx_channels / K.cast(dim - 1, K.floatx()) xx_channels = (xx_channels * 2) - 1. outputs = K.concatenate([inputs, xx_channels], axis=self.axis) if self.rank == 2: if self.data_format == 'channels_first': inputs = K.permute_dimensions(inputs, [0, 2, 3, 1]) input_shape = [input_shape[i] for i in range(4)] batch_shape, dim1, dim2, channels = input_shape xx_ones = K.ones(K.stack([batch_shape, dim2]), dtype='int32') xx_ones = K.expand_dims(xx_ones, axis=-1) xx_range = K.tile(K.expand_dims(K.arange(0, dim1), axis=0), K.stack([batch_shape, 1])) xx_range = K.expand_dims(xx_range, axis=1) xx_channels = K.batch_dot(xx_ones, xx_range, axes=[2, 1]) xx_channels = K.expand_dims(xx_channels, axis=-1) xx_channels = K.permute_dimensions(xx_channels, [0, 2, 1, 3]) yy_ones = K.ones(K.stack([batch_shape, dim1]), dtype='int32') yy_ones = K.expand_dims(yy_ones, axis=1) yy_range = K.tile(K.expand_dims(K.arange(0, dim2), axis=0), K.stack([batch_shape, 1])) yy_range = K.expand_dims(yy_range, axis=-1) yy_channels = K.batch_dot(yy_range, yy_ones, axes=[2, 1]) yy_channels = K.expand_dims(yy_channels, axis=-1) yy_channels = K.permute_dimensions(yy_channels, [0, 2, 1, 3]) xx_channels = K.cast(xx_channels, K.floatx()) xx_channels = xx_channels / K.cast(dim1 - 1, K.floatx()) xx_channels = (xx_channels * 2) - 1. yy_channels = K.cast(yy_channels, K.floatx()) yy_channels = yy_channels / K.cast(dim2 - 1, K.floatx()) yy_channels = (yy_channels * 2) - 1. outputs = K.concatenate([inputs, xx_channels, yy_channels], axis=self.axis) if self.use_radius: rr = K.sqrt(K.square(xx_channels - 0.5) + K.square(yy_channels - 0.5)) outputs = K.concatenate([outputs, rr], axis=-1) if self.data_format == 'channels_first': outputs = K.permute_dimensions(outputs, [0, 3, 1, 2]) if self.rank == 3: if self.data_format == 'channels_first': inputs = K.permute_dimensions(inputs, [0, 2, 3, 4, 1]) input_shape = [input_shape[i] for i in range(5)] batch_shape, dim1, dim2, dim3, channels = input_shape xx_ones = K.ones(K.stack([batch_shape, dim3]), dtype='int32') xx_ones = K.expand_dims(xx_ones, axis=-1) xx_range = K.tile(K.expand_dims(K.arange(0, dim2), axis=0), K.stack([batch_shape, 1])) xx_range = K.expand_dims(xx_range, axis=1) xx_channels = K.batch_dot(xx_ones, xx_range, axes=[2, 1]) xx_channels = K.expand_dims(xx_channels, axis=-1) xx_channels = K.permute_dimensions(xx_channels, [0, 2, 1, 3]) xx_channels = K.expand_dims(xx_channels, axis=1) xx_channels = K.tile(xx_channels, [1, dim1, 1, 1, 1]) yy_ones = K.ones(K.stack([batch_shape, dim2]), dtype='int32') yy_ones = K.expand_dims(yy_ones, axis=1) yy_range = K.tile(K.expand_dims(K.arange(0, dim3), axis=0), K.stack([batch_shape, 1])) yy_range = K.expand_dims(yy_range, axis=-1) yy_channels = K.batch_dot(yy_range, yy_ones, axes=[2, 1]) yy_channels = K.expand_dims(yy_channels, axis=-1) yy_channels = K.permute_dimensions(yy_channels, [0, 2, 1, 3]) yy_channels = K.expand_dims(yy_channels, axis=1) yy_channels = K.tile(yy_channels, [1, dim1, 1, 1, 1]) zz_range = K.tile(K.expand_dims(K.arange(0, dim1), axis=0), K.stack([batch_shape, 1])) zz_range = K.expand_dims(zz_range, axis=-1) zz_range = K.expand_dims(zz_range, axis=-1) zz_channels = K.tile(zz_range, [1, 1, dim2, dim3]) zz_channels = K.expand_dims(zz_channels, axis=-1) xx_channels = K.cast(xx_channels, K.floatx()) xx_channels = xx_channels / K.cast(dim2 - 1, K.floatx()) xx_channels = xx_channels * 2 - 1. yy_channels = K.cast(yy_channels, K.floatx()) yy_channels = yy_channels / K.cast(dim3 - 1, K.floatx()) yy_channels = yy_channels * 2 - 1. zz_channels = K.cast(zz_channels, K.floatx()) zz_channels = zz_channels / K.cast(dim1 - 1, K.floatx()) zz_channels = zz_channels * 2 - 1. outputs = K.concatenate([inputs, zz_channels, xx_channels, yy_channels], axis=self.axis) if self.data_format == 'channels_first': outputs = K.permute_dimensions(outputs, [0, 4, 1, 2, 3]) return outputs
def one(shape, name=None): return K.ones(shape, name=name)