def call(self, input): for i in range(self.num_layer): if i == 0: cross = Lambda(lambda x: Add()([K.sum(self.W[i] * K.batch_dot(K.reshape(x, (-1, self.input_dim, 1)), x), 1, keepdims = True), self.bias[i], x]))(input) else: cross = Lambda(lambda x: Add()([K.sum(self.W[i] * K.batch_dot(K.reshape(x, (-1, self.input_dim, 1)), input), 1, keepdims = True), self.bias[i], input]))(cross) return Flatten()(cross)
def content_mean_square_error(self, y_true, y_pred): y_pred = K.variable(value=y_pred) y_true = K.variable(value=y_true) _, filters, x_pos, y_pos = self.get_shape(y_pred) y_pred = K.reshape(y_pred, (filters, x_pos * y_pos)) y_true = K.reshape(y_true, (filters, x_pos * y_pos)) return K.sum(K.square(y_pred - y_true))
def call(self, x): #如果只传入Q_seq,K_seq,V_seq,那么就不做Mask #如果同时传入Q_seq,K_seq,V_seq,Q_len,V_len,那么对多余部分做Mask if len(x) == 3: Q_seq,K_seq,V_seq = x Q_len,V_len = None,None elif len(x) == 5: Q_seq,K_seq,V_seq,Q_len,V_len = x #对Q、K、V做线性变换 Q_seq = K.dot(Q_seq, self.WQ) Q_seq = K.reshape(Q_seq, (-1, K.shape(Q_seq)[1], self.nb_head, self.size_per_head)) Q_seq = K.permute_dimensions(Q_seq, (0,2,1,3)) K_seq = K.dot(K_seq, self.WK) K_seq = K.reshape(K_seq, (-1, K.shape(K_seq)[1], self.nb_head, self.size_per_head)) K_seq = K.permute_dimensions(K_seq, (0,2,1,3)) V_seq = K.dot(V_seq, self.WV) V_seq = K.reshape(V_seq, (-1, K.shape(V_seq)[1], self.nb_head, self.size_per_head)) V_seq = K.permute_dimensions(V_seq, (0,2,1,3)) #计算内积,然后mask,然后softmax A = K.batch_dot(Q_seq, K_seq, axes=[3,3]) A = K.permute_dimensions(A, (0,3,2,1)) A = self.Mask(A, V_len, 'add') A = K.permute_dimensions(A, (0,3,2,1)) A = K.softmax(A) #输出并mask O_seq = K.batch_dot(A, V_seq, axes=[3,2]) O_seq = K.permute_dimensions(O_seq, (0,2,1,3)) O_seq = K.reshape(O_seq, (-1, K.shape(O_seq)[1], self.output_dim)) O_seq = self.Mask(O_seq, Q_len, 'mul') return O_seq
def get_output(self, train=False): print "LogNormalizedOccupancy", self.output_shape X = self.get_input(train) # calculate the log occupancies log_occs = theano_calc_log_occs(-X, self.chem_affinity) # reshape the output so that the forward and reverse complement # occupancies are viewed as different tracks log_occs = K.reshape(log_occs, (X.shape[0], 1, 2*X.shape[1], X.shape[3])) if self.steric_hindrance_win_len == 0: log_norm_factor = 0 else: # correct occupancies for overlapping binding sites occs = K.exp(log_occs) kernel = K.ones((1, 1, 1, 2*self.steric_hindrance_win_len-1), dtype='float32') win_occ_sum = K.conv2d(occs, kernel, border_mode='same').sum(axis=2, keepdims=True) win_prb_all_unbnd = TT.exp( K.conv2d(K.log(1-occs), kernel, border_mode='same')).sum(axis=2, keepdims=True) log_norm_factor = TT.log(win_occ_sum + win_prb_all_unbnd) #start = max(0, self.steric_hindrance_win_len-1) #stop = min(self.output_shape[3], # self.output_shape[3]-(self.steric_hindrance_win_len-1)) #rv = log_occs[:,:,:,start:stop] - log_norm_factor rv = (log_occs - log_norm_factor) return K.reshape( rv, (X.shape[0], 2*X.shape[1], 1, X.shape[3]) )
def normalize_inference(): if needs_broadcasting: # In this case we must explicitly broadcast all parameters. broadcast_moving_mean = K.reshape(self.moving_mean, broadcast_shape) broadcast_moving_variance = K.reshape(self.moving_variance, broadcast_shape) if self.center: broadcast_beta = K.reshape(self.beta, broadcast_shape) else: broadcast_beta = None if self.scale: broadcast_gamma = K.reshape(self.gamma, broadcast_shape) else: broadcast_gamma = None return K.batch_normalization( inputs, broadcast_moving_mean, broadcast_moving_variance, broadcast_beta, broadcast_gamma, epsilon=self.epsilon) else: return K.batch_normalization( inputs, self.moving_mean, self.moving_variance, self.beta, self.gamma, epsilon=self.epsilon)
def make_patches_grid(x, patch_size, patch_stride): '''Break image `x` up into a grid of patches. input shape: (channels, rows, cols) output shape: (rows, cols, channels, patch_rows, patch_cols) ''' from theano.tensor.nnet.neighbours import images2neibs # TODO: all K, no T x = K.expand_dims(x, 0) xs = K.shape(x) num_rows = 1 + (xs[-2] - patch_size) // patch_stride num_cols = 1 + (xs[-1] - patch_size) // patch_stride num_channels = xs[-3] patches = images2neibs( x, (patch_size, patch_size), (patch_stride, patch_stride), mode='valid') # neibs are sorted per-channel patches = K.reshape(patches, (num_channels, K.shape(patches)[0] // num_channels, patch_size, patch_size)) patches = K.permute_dimensions(patches, (1, 0, 2, 3)) # arrange in a 2d-grid (rows, cols, channels, px, py) patches = K.reshape( patches, (num_rows, num_cols, num_channels, patch_size, patch_size)) patches_norm = K.sqrt( K.sum(K.square(patches), axis=(2, 3, 4), keepdims=True)) return patches, patches_norm
def call(self, x, mask=None): assert self.built, 'Layer must be built before being called' input_shape = K.int_shape(x) reduction_axes = list(range(len(input_shape))) del reduction_axes[self.axis] broadcast_shape = [1] * len(input_shape) broadcast_shape[self.axis] = input_shape[self.axis] if sorted(reduction_axes) == range(K.ndim(x))[:-1]: x_normed = K.batch_normalization( x, self.running_mean, self.running_std, self.beta, self.gamma, epsilon=self.epsilon) else: # need broadcasting broadcast_running_mean = K.reshape(self.running_mean, broadcast_shape) broadcast_running_std = K.reshape(self.running_std, broadcast_shape) broadcast_beta = K.reshape(self.beta, broadcast_shape) broadcast_gamma = K.reshape(self.gamma, broadcast_shape) x_normed = K.batch_normalization( x, broadcast_running_mean, broadcast_running_std, broadcast_beta, broadcast_gamma, epsilon=self.epsilon) return x_normed
def call(self, x, mask=None): x = K.permute_dimensions(x, (0, 2, 1)) x = K.reshape(x, (-1, self.input_length)) x = K.expand_dims(x, 1) x = K.expand_dims(x, -1) if self.real_filts is not None: conv_out_r = K.conv2d(x, self.W_r, strides=self.subsample, border_mode=self.border_mode, dim_ordering='th') else: conv_out_r = x if self.complex_filts is not None: conv_out_c1 = K.conv2d(x, self.W_c1, strides=self.subsample, border_mode=self.border_mode, dim_ordering='th') conv_out_c2 = K.conv2d(x, self.W_c2, strides=self.subsample, border_mode=self.border_mode, dim_ordering='th') conv_out_c = K.sqrt(K.square(conv_out_c1) + K.square(conv_out_c2) + K.epsilon()) output = K.concatenate((conv_out_r, conv_out_c), axis=1) else: output = conv_out_r output_shape = self.get_output_shape_for((None, self.input_length, self.input_dim)) output = K.squeeze(output, 3) # remove the dummy 3rd dimension output = K.permute_dimensions(output, (2, 1, 0)) output = K.reshape(output, (-1, output_shape[1], output.shape[1]*output.shape[2])) return output
def get_output(self, train=False): print "Input Shape", self.input_shape print "ConvolutionDNASequenceBinding", self.output_shape X = self.get_input(train) if self.use_three_base_encoding: X_fwd = X[:,1:,:,:] X_rc = X[:,:3,:,:] else: X_fwd = X X_rc = X print self.W print self.b if self.W[1] is not None: W = self.W[0][self.W[1],:,:,:] else: W = self.W[0] if self.b[1] is not None: b = self.b[0][self.b[1]] else: b = self.b[0] fwd_rv = K.conv2d(X_fwd, W, border_mode='valid') \ + K.reshape(b, (1, self.nb_motifs, 1, 1)) rc_rv = K.conv2d(X_rc, W[:,::-1,:,::-1], border_mode='valid') \ + K.reshape(b, (1, self.nb_motifs, 1, 1)) rv = K.concatenate((fwd_rv, rc_rv), axis=2) #return rv.dimshuffle((0,3,2,1)) return rv # K.permute_dimensions(rv, (0,3,2,1))
def time_distributed_dense(x, w, b=None, dropout=None, input_dim=None, output_dim=None, timesteps=None, activation='linear'): '''Apply y.w + b for every temporal slice y of x. ''' activation = activations.get(activation) if not input_dim: # won't work with TensorFlow input_dim = K.shape(x)[2] if not timesteps: # won't work with TensorFlow timesteps = K.shape(x)[1] if not output_dim: # won't work with TensorFlow output_dim = K.shape(w)[1] if dropout is not None and 0. < dropout < 1.: # apply the same dropout pattern at every timestep ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim))) dropout_matrix = K.dropout(ones, dropout) expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps) x = K.in_train_phase(x * expanded_dropout_matrix, x) # collapse time dimension and batch dimension together x = K.reshape(x, (-1, input_dim)) x = K.dot(x, w) if b: x = x + b # reshape to 3D tensor x = K.reshape(activation(x), (-1, timesteps, output_dim)) return x
def call(self, position): inputDim = K.ndim(position) positionShape = K.shape(position) targetDim = positionShape[-1] position = K.reshape(position, (-1, targetDim)) samples = K.shape(position)[0] theta = THT.zeros((samples, 3, 3)) chw = self.toChw(position) chw = K.reshape(chw, (samples, targetDim)) dx = -self.distortion + 2.0 * self.distortion * self.srng.uniform((samples,)) dy = -self.distortion + 2.0 * self.distortion * self.srng.uniform((samples,)) cX = chw[:, 0] + dx cY = chw[:, 1] + dy h = K.maximum(chw[:, 2] * (1.0 + self.context), self.minSide) w = K.maximum(chw[:, 3] * (1.0 + self.context), self.minSide) # Calculating the parameters of the transformation tx = cX ty = cY sx = w / 2.0 # Scale x sy = h / 2.0 # Scale y # Setting transformation theta = THT.set_subtensor(theta[:, 0, 0], sx) theta = THT.set_subtensor(theta[:, 1, 1], sy) theta = THT.set_subtensor(theta[:, 0, 2], tx) theta = THT.set_subtensor(theta[:, 1, 2], ty) theta = THT.set_subtensor(theta[:, 2, 2], 1.0) thetaShape = K.concatenate([positionShape[:-1], K.shape(theta)[-2:]]) theta = THT.reshape(theta, thetaShape, ndim=inputDim + 1) return theta
def call(self, inputs): input_shape = K.int_shape(inputs) if len(input_shape) != 4: raise ValueError('Inputs should have rank ' + str(4) + '; Received input shape:', str(input_shape)) if self.data_format == 'channels_first': batch_size, c, h, w = input_shape if batch_size is None: batch_size = -1 rh, rw = self.size oh, ow = h * rh, w * rw oc = c // (rh * rw) out = K.reshape(inputs, (batch_size, rh, rw, oc, h, w)) out = K.permute_dimensions(out, (0, 3, 4, 1, 5, 2)) out = K.reshape(out, (batch_size, oc, oh, ow)) return out elif self.data_format == 'channels_last': batch_size, h, w, c = input_shape if batch_size is None: batch_size = -1 rh, rw = self.size oh, ow = h * rh, w * rw oc = c // (rh * rw) out = K.reshape(inputs, (batch_size, h, w, rh, rw, oc)) out = K.permute_dimensions(out, (0, 1, 3, 2, 4, 5)) out = K.reshape(out, (batch_size, oh, ow, oc)) return out
def call(self, X): if type(X) is not list or len(X) != 2: raise Exception("SquareAttention must be called on a list of two tensors. Got: " + str(X)) frame, position = X[0], X[1] # Reshaping the input to exclude the time dimension frameShape = K.shape(frame) positionShape = K.shape(position) (chans, height, width) = frameShape[-3:] targetDim = positionShape[-1] frame = K.reshape(frame, (-1, chans, height, width)) position = K.reshape(position, (-1, ) + (targetDim, )) # Applying the attention hw = THT.abs_(position[:, 2] - position[:, 0]) * self.scale / 2.0 hh = THT.abs_(position[:, 3] - position[:, 1]) * self.scale / 2.0 position = THT.maximum(THT.set_subtensor(position[:, 0], position[:, 0] - hw), -1.0) position = THT.minimum(THT.set_subtensor(position[:, 2], position[:, 2] + hw), 1.0) position = THT.maximum(THT.set_subtensor(position[:, 1], position[:, 1] - hh), -1.0) position = THT.minimum(THT.set_subtensor(position[:, 3], position[:, 3] + hh), 1.0) rX = Data.linspace(-1.0, 1.0, width) rY = Data.linspace(-1.0, 1.0, height) FX = THT.gt(rX, position[:,0].dimshuffle(0,'x')) * THT.le(rX, position[:,2].dimshuffle(0,'x')) FY = THT.gt(rY, position[:,1].dimshuffle(0,'x')) * THT.le(rY, position[:,3].dimshuffle(0,'x')) m = FY.dimshuffle(0, 1, 'x') * FX.dimshuffle(0, 'x', 1) m = m + self.alpha - THT.gt(m, 0.) * self.alpha frame = frame * m.dimshuffle(0, 'x', 1, 2) # Reshaping the frame to include time dimension output = K.reshape(frame, frameShape) return output
def call(self, X): if type(X) is not list or len(X) != 2: raise Exception("GaussianAttention must be called on a list of two tensors. Got: " + str(X)) frame, position = X[0], X[1] # Reshaping the input to exclude the time dimension frameShape = K.shape(frame) positionShape = K.shape(position) (chans, height, width) = frameShape[-3:] targetDim = positionShape[-1] frame = K.reshape(frame, (-1, chans, height, width)) position = K.reshape(position, (-1, ) + (targetDim, )) cx = (position[:, 0] + position[:, 2]) / 2.0 cy = (position[:, 1] + position[:, 3]) / 2.0 sx = (position[:, 2] - cx) * 0.60 sy = (position[:, 3] - cy) * 0.60 rX = Data.linspace(-1.0, 1.0, width) rY = Data.linspace(-1.0, 1.0, height) FX = K.exp(-(rX - cx.dimshuffle(0, 'x')) ** 2 / (2.0 * (sx.dimshuffle(0, 'x') ** 2 + self.epsilon))) FY = K.exp(-(rY - cy.dimshuffle(0, 'x')) ** 2 / (2.0 * (sy.dimshuffle(0, 'x') ** 2 + self.epsilon))) m = (FY.dimshuffle(0, 1, 'x') * FX.dimshuffle(0, 'x', 1)) m = m + self.alpha m = m - K.greater(m, 1.0) * (m - 1.0) frame = frame * m.dimshuffle(0, 'x', 1, 2) # Reshaping the frame to include time dimension output = K.reshape(frame, frameShape) return output
def get_model(inputdim, outputdim, regularization_strength=0.01, lr=0.000, cosine=False, **kwargs): transformation = Dense(inputdim, init='identity', W_constraint=Orthogonal()) model = Graph() model.add_input(name='embeddings1', input_shape=(inputdim,)) model.add_input(name='embeddings2', input_shape=(inputdim,)) model.add_shared_node(transformation, name='transformation', inputs=['embeddings1', 'embeddings2'], outputs=['transformed1', 'transformed2']) model.add_node(Lambda(lambda x: x[:, :outputdim]), input='transformed1', name='projected1') model.add_node(Lambda(lambda x: -x[:, :outputdim]), input='transformed2', name='negprojected2') if cosine: model.add_node(Lambda(lambda x: x / K.reshape(K.sqrt(K.sum(x * x, axis=1)), (x.shape[0], 1))), name='normalized1', input='projected1') model.add_node(Lambda(lambda x: x / K.reshape(K.sqrt(K.sum(x * x, axis=1)), (x.shape[0], 1))), name='negnormalized2', input='negprojected2') model.add_node(Lambda(lambda x: K.reshape(K.sum(x, axis=1), (x.shape[0], 1))), name='distances', inputs=['normalized1', 'negnormalized2'], merge_mode='mul') else: model.add_node(Lambda(lambda x: K.reshape(K.sqrt(K.sum(x * x, axis=1)), (x.shape[0], 1))), name='distances', inputs=['projected1', 'negprojected2'], merge_mode='sum') model.add_output(name='y', input='distances') model.compile(loss={'y': lambda y, d: K.mean(y * d)}, optimizer=SimpleSGD()) return model
def call(self, x, mask=None): input_shape = self.input_spec[0].shape broadcast_shape = [1] * len(input_shape) broadcast_shape[self.axis] = input_shape[self.axis] out = K.reshape(self.gamma, broadcast_shape) * x + K.reshape(self.beta, broadcast_shape) return out
def image_categorical_crossentropy(output, target, from_logits=False): output = T.clip(output, _EPSILON, 1.0 - _EPSILON) output_ = K.reshape(output, (-1, 256)) target_ = K.reshape(target, (-1, 256)) out = T.nnet.categorical_crossentropy(output_, target_) out = K.reshape(out,(K.shape(output)[0],-1)) return T.mean(T.mean(out, axis=1))
def call(self, x, mask=None): input_shape = K.shape(x) if self.dim_ordering == 'th': num_rows = input_shape[2] num_cols = input_shape[3] elif self.dim_ordering == 'tf': num_rows = input_shape[1] num_cols = input_shape[2] row_length = [K.cast(num_rows, 'float32') / i for i in self.pool_list] col_length = [K.cast(num_cols, 'float32') / i for i in self.pool_list] outputs = [] if self.dim_ordering == 'th': for pool_num, num_pool_regions in enumerate(self.pool_list): for ix in range(num_pool_regions): for jy in range(num_pool_regions): x1 = ix * col_length[pool_num] x2 = ix * col_length[pool_num] + col_length[pool_num] y1 = jy * row_length[pool_num] y2 = jy * row_length[pool_num] + row_length[pool_num] x1 = K.cast(K.round(x1), 'int32') x2 = K.cast(K.round(x2), 'int32') y1 = K.cast(K.round(y1), 'int32') y2 = K.cast(K.round(y2), 'int32') new_shape = [input_shape[0], input_shape[1], y2 - y1, x2 - x1] x_crop = x[:, :, y1:y2, x1:x2] xm = K.reshape(x_crop, new_shape) pooled_val = K.max(xm, axis=(2, 3)) outputs.append(pooled_val) elif self.dim_ordering == 'tf': for pool_num, num_pool_regions in enumerate(self.pool_list): for ix in range(num_pool_regions): for jy in range(num_pool_regions): x1 = ix * col_length[pool_num] x2 = ix * col_length[pool_num] + col_length[pool_num] y1 = jy * row_length[pool_num] y2 = jy * row_length[pool_num] + row_length[pool_num] x1 = K.cast(K.round(x1), 'int32') x2 = K.cast(K.round(x2), 'int32') y1 = K.cast(K.round(y1), 'int32') y2 = K.cast(K.round(y2), 'int32') new_shape = [input_shape[0], y2 - y1, x2 - x1, input_shape[3]] x_crop = x[:, y1:y2, x1:x2, :] xm = K.reshape(x_crop, new_shape) pooled_val = K.max(xm, axis=(1, 2)) outputs.append(pooled_val) outputs = K.concatenate(outputs) return outputs
def _transform(theta, input, downsample_factor): num_batch, num_channels, height, width = input.shape theta = K.reshape(theta, (-1, 2, 3)) # grid of (x_t, y_t, 1), eq (1) in ref [2] height_f = K.cast(height, 'float32') width_f = K.cast(width, 'float32') out_height = K.cast(height_f // downsample_factor, 'int64') out_width = K.cast(width_f // downsample_factor, 'int64') grid = _meshgrid(out_height, out_width) # Transform A x (x_t, y_t, 1)^T -> (x_s, y_s) T_g = K.dot(theta, grid) x_s, y_s = T_g[:, 0], T_g[:, 1] x_s_flat = x_s.flatten() y_s_flat = y_s.flatten() # dimshuffle input to (bs, height, width, channels) #input_dim = input.dimshuffle(0, 2, 3, 1) input_dim = input.transpose(0, 2, 3, 1) input_transformed = _interpolate( input_dim, x_s_flat, y_s_flat, downsample_factor) output = K.reshape(input_transformed, (num_batch, out_height, out_width, num_channels)) output = output.transpose(0, 3, 1, 2) return output
def step(self, x, states): h_tm1 = states[0] c_tm1 = states[1] x_i = K.conv2d(x, self.W_i, border_mode="same") x_f = K.conv2d(x, self.W_f, border_mode="same") x_c = K.conv2d(x, self.W_c, border_mode="same") x_o = K.conv2d(x, self.W_o, border_mode="same") h_i = K.conv2d(h_tm1, self.U_i, border_mode="same") h_f = K.conv2d(h_tm1, self.U_f, border_mode="same") h_c = K.conv2d(h_tm1, self.U_c, border_mode="same") h_o = K.conv2d(h_tm1, self.U_o, border_mode="same") c_i = self.C_i * c_tm1 c_f = self.C_f * c_tm1 c_o = self.C_o * c_tm1 b_i = K.reshape(self.b_i, (1, -1, 1, 1)) b_f = K.reshape(self.b_f, (1, -1, 1, 1)) b_c = K.reshape(self.b_c, (1, -1, 1, 1)) b_o = K.reshape(self.b_o, (1, -1, 1, 1)) i = self.inner_activation(x_i + h_i + c_i + b_i) f = self.inner_activation(x_f + h_f + c_f + b_f) c = f * c_tm1 + i * self.activation(x_c + h_c + b_c) o = self.inner_activation(x_o + h_o + c_o + b_o) h = o * self.activation(c) return h, [h, c]
def call(self, x, mask=None): # eij = K.dot(x, self.W) TF backend doesn't support it # features_dim = self.W.shape[0] # step_dim = x._keras_shape[1] features_dim = self.features_dim step_dim = self.step_dim eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))), (-1, step_dim)) if self.bias: eij += self.b eij = K.tanh(eij) a = K.exp(eij) # apply mask after the exp. will be re-normalized next if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano a *= K.cast(mask, K.floatx()) # in some cases especially in the early stages of training the sum may be almost zero a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) weighted_input = x * a # print weigthted_input.shape return K.sum(weighted_input, axis=1)
def yolo_head(feats, anchors, num_classes, input_shape): """Convert final layer features to bounding box parameters.""" num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) box_xy = K.sigmoid(feats[..., :2]) box_wh = K.exp(feats[..., 2:4]) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:]) # Adjust preditions to each spatial grid point and anchor size. box_xy = (box_xy + grid) / K.cast(grid_shape[::-1], K.dtype(feats)) box_wh = box_wh * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats)) return box_xy, box_wh, box_confidence, box_class_probs
def simple_context(X, mask, n=activation_rnn_size): """Reduce the input just to its headline part (second half). For each word in this part it concatenate the output of the previous layer (RNN) with a weighted average of the outputs of the description part. In this only the last `rnn_size - activation_rnn_size` are used from each output. The first `activation_rnn_size` output is used to computer the weights for the averaging. """ desc, head = X[:, :maxlend, :], X[:, maxlend:, :] head_activations, head_words = head[:, :, :n], head[:, :, n:] desc_activations, desc_words = desc[:, :, :n], desc[:, :, n:] # RTFM http://deeplearning.net/software/theano/library/tensor/basic.html#theano.tensor.batched_tensordot # activation for every head word and every desc word activation_energies = K.batch_dot(head_activations, desc_activations, axes=(2, 2)) # make sure we dont use description words that are masked out activation_energies = activation_energies + -1e20 * K.expand_dims( 1. - K.cast(mask[:, :maxlend], 'float32'), 1) # for every head word compute weights for every desc word activation_energies = K.reshape(activation_energies, (-1, maxlend)) activation_weights = K.softmax(activation_energies) activation_weights = K.reshape(activation_weights, (-1, maxlenh, maxlend)) # for every head word compute weighted average of desc words desc_avg_word = K.batch_dot(activation_weights, desc_words, axes=(2, 1)) return K.concatenate((desc_avg_word, head_words))
def call(self, x, mask=None): if hasattr(x, '_keras_shape'): input_shape = x._keras_shape else: input_shape = self._input_shape #import pdb #pdb.set_trace() #if self.last_two is not None: # last2 = self.last_two #else: # input_shape = x._keras_shape # last2 = input_shape[-2:] #out_shape = K.shape(x)[:-2] x = K.reshape(x, (-1,) + input_shape[-2:]) # (batch * d1 * ... * dn-2, dn-1, dn) if mask is not None: mask_shape = (K.shape(x)[0], -1) mask = K.reshape(mask, mask_shape) # give it the same first dim y = self.layer.call(x, mask) #try: #output_shape = self.get_output_shape_for(K.shape(x)) #except: output_shape = self.get_output_shape_for(input_shape) #import pdb #pdb.set_trace() return K.cast(K.reshape(y, output_shape), K.floatx())
def get_output(self, train=False): def format_shape(shape): if K._BACKEND == 'tensorflow': def trf(x): try: return int(x) except TypeError: return x return map(trf, shape) return shape X = self.get_input(train) in_shape = format_shape(K.shape(X)) batch_flatten_len = K.prod(in_shape[:2]) cast_in_shape = (batch_flatten_len, ) + tuple(in_shape[i] for i in range(2, K.ndim(X))) pre_outs = self.layer(K.reshape(X, cast_in_shape)) out_shape = format_shape(K.shape(pre_outs)) cast_out_shape = (in_shape[0], in_shape[1]) + tuple(out_shape[i] for i in range(1, K.ndim(pre_outs))) outputs = K.reshape(pre_outs, cast_out_shape) return outputs
def euclidDist( inputs ): assert len( inputs ) == 2, "euclidDist requires 2 inputs" l1 = inputs[ 0 ] l2 = inputs[ 1 ] x = l1 - l2 output = K.batch_dot( x, x, axes = 1 ) K.reshape( output, (1,) ) return output
def gram_matrix_mean_squared_error(self, y_true, y_pred): y_pred = K.variable(value=y_pred) y_true = K.variable(value=y_true) _, filters, x_pos, y_pos = self.get_shape(y_pred) denominator = K.variable(value=(2 * filters * (x_pos * y_pos)) ** 2) y_pred = K.reshape(y_pred, (filters, x_pos * y_pos)) y_true = K.reshape(y_true, (filters, x_pos * y_pos)) return K.square(self.gram_matrix(y_pred) - self.gram_matrix(y_true)) / denominator
def get_output(self, train=False): X = self.get_input() batch_size, time_len = X.shape[:2] X = X.flatten(ndim=2) # (sample*time, dim) X = K.reshape(X, self.reshape_dim) # (sample*time, dim1, dim2, ...) Y = apply_model(self.model, X) Y = K.reshape(Y, (batch_size, time_len, -1)) # (sample, time, dim_out) return Y
def call(self, x, mask=None): input_shape = self.input_spec[0].shape x = K.reshape(x, (-1,) + input_shape[-1:]) # (batch * d1 * ... * dn-2*dn-1, dn) mask_shape = (K.shape(x)[0], -1) mask = K.reshape(mask, mask_shape) # give it the same first dim y = self.layer.call(x, mask) output_shape = self.get_output_shape_for(input_shape) return K.reshape(y, output_shape)
def max_margin2(y_true, y_pred): # assumes the samples are interleaved positive and corrupt (p, c, p, c, ...) v = - y_pred * y_true + y_pred * (1.0 - y_true) # (-p, c, -p, c,...) v = K.reshape(v, (2, 64)) # ([-p, c], [-p, c],...) v = 1. + K.sum(v, axis=0) # (1 - p + c, 1- p + c,...) v = K.reshape(v, (64,)) v = K.maximum(0., v) # (max(0, 1 - p + c), max(0, 1 - p + c), ...) return K.sum(v)
def add_dim(tensor): """Add a dimension to tensors that don't have any.""" if K.int_shape(tensor) == (): return KL.Lambda(lambda t: K.reshape(t, [1, 1]))(tensor) return tensor
def call(self, inputs): q, k, v = inputs[:3] v_mask, q_mask = None, None # 这里的mask.shape=[batch_size, seq_len]或[batch_size, seq_len, 1] if len(inputs) > 3: v_mask = inputs[3] if len(inputs) > 4: q_mask = inputs[4] # 线性变换 qw = self.reuse(self.q_dense, q) kw = self.reuse(self.k_dense, k) vw = self.reuse(self.v_dense, v) # 形状变换 qw = K.reshape(qw, (-1, K.shape(qw)[1], self.heads, self.key_size)) kw = K.reshape(kw, (-1, K.shape(kw)[1], self.heads, self.key_size)) vw = K.reshape(vw, (-1, K.shape(vw)[1], self.heads, self.size_per_head)) # 维度置换 qw = K.permute_dimensions(qw, (0, 2, 1, 3)) kw = K.permute_dimensions(kw, (0, 2, 1, 3)) vw = K.permute_dimensions(vw, (0, 2, 1, 3)) # Attention a = K.batch_dot(qw, kw, [3, 3]) / self.key_size**0.5 a = K.permute_dimensions(a, (0, 3, 2, 1)) a = to_mask(a, v_mask, 'add') a = K.permute_dimensions(a, (0, 3, 2, 1)) if (self.mask_right is not False) or (self.mask_right is not None): if self.mask_right is True: ones = K.ones_like(a[:1, :1]) mask = (ones - K.tf.matrix_band_part(ones, -1, 0)) * 1e10 a = a - mask else: # 这种情况下,mask_right是外部传入的0/1矩阵,shape=[q_len, k_len] mask = (1 - K.constant(self.mask_right)) * 1e10 mask = K.expand_dims(K.expand_dims(mask, 0), 0) self.mask = mask a = a - mask a = K.softmax(a) self.a = a # 完成输出 o = K.batch_dot(a, vw, [3, 2]) o = K.permute_dimensions(o, (0, 2, 1, 3)) o = K.reshape(o, (-1, K.shape(o)[1], self.out_dim)) o = to_mask(o, q_mask, 'mul') return o
def local_conv3d(self, inputs, kernel, kernel_size, strides, output_shape, data_format=None): """Apply 3D conv with un-shared weights. # Arguments inputs: 4D tensor with shape: (batch_size, filters, new_rows, new_cols) if data_format='channels_first' or 4D tensor with shape: (batch_size, new_rows, new_cols, filters) if data_format='channels_last'. kernel: the unshared weight for convolution, with shape (output_items, feature_dim, filters) kernel_size: a tuple of 2 integers, specifying the width and height of the 3D convolution window. strides: a tuple of 2 integers, specifying the strides of the convolution along the width and height. output_shape: a tuple with (output_row, output_col) data_format: the data format, channels_first or channels_last # Returns A 4d tensor with shape: (batch_size, filters, new_rows, new_cols) if data_format='channels_first' or 4D tensor with shape: (batch_size, new_rows, new_cols, filters) if data_format='channels_last'. # Raises ValueError: if `data_format` is neither `channels_last` or `channels_first`. """ if data_format is None: data_format = K.image_data_format() if data_format not in {'channels_first', 'channels_last'}: raise ValueError('Unknown data_format: ' + str(data_format)) stride_row, stride_col, stride_z = strides output_row, output_col, output_z = output_shape kernel_shape = K.int_shape(kernel) _, feature_dim, filters = kernel_shape xs = [] for i in range(output_row): for j in range(output_col): for k in range(output_z): slice_row = slice(i * stride_row, i * stride_row + kernel_size[0]) slice_col = slice(j * stride_col, j * stride_col + kernel_size[1]) slice_z = slice(k * stride_z, k * stride_z + kernel_size[2]) if data_format == 'channels_first': xs.append(K.reshape(inputs[:, :, slice_row, slice_col, slice_z], (1, -1, feature_dim))) else: xs.append(K.reshape(inputs[:, slice_row, slice_col, slice_z, :], (1, -1, feature_dim))) x_aggregate = K.concatenate(xs, axis=0) output = K.batch_dot(x_aggregate, kernel) output = K.reshape(output, (output_row, output_col, output_z, -1, filters)) if data_format == 'channels_first': output = K.permute_dimensions(output, (3, 4, 0, 1, 2)) else: output = K.permute_dimensions(output, (3, 0, 1, 2, 4)) return output
def __call__(self, y_sing_pred): anchors = np.reshape( self.config["constants"]["anchors"], [1, 1, 1, len(self.config["constants"]["anchors"]) // 2, 2]) # need to convert b's from GRID_SIZE units into IMG coords. Divide by grid here. b_xy = (K.sigmoid(y_sing_pred[..., 0:2]) + self.c_grid[0]) / self.config["model"]["grid_size"] b_wh = (K.exp(y_sing_pred[..., 2:4]) * anchors[0]) / self.config["model"]["grid_size"] b_xy1 = b_xy - b_wh / 2. b_xy2 = b_xy + b_wh / 2. boxes = K.concatenate([b_xy1, b_xy2], axis=-1) # filter out scores below detection threshold scores_all = K.sigmoid(y_sing_pred[..., 4:5]) * K.softmax( y_sing_pred[..., 5:]) indicator_detection = scores_all > self.detection_threshold scores_all = scores_all * K.cast(indicator_detection, np.float32) # compute detected classes and scores classes = K.argmax(scores_all, axis=-1) scores = K.max(scores_all, axis=-1) # flattened tensor length S2B = self.config["model"]["grid_size"] * self.config["model"][ "grid_size"] * len(self.config["constants"]["anchors"]) // 2 # flatten boxes, scores for NMS flatten_boxes = K.reshape(boxes, shape=(S2B, 4)) flatten_scores = K.reshape(scores, shape=(S2B, )) flatten_classes = K.reshape(classes, shape=(S2B, )) inds = [] # apply multiclass NMS for c in range(self.num_classes): # only include boxes of the current class, with > 0 confidence class_mask = K.cast(K.equal(flatten_classes, c), np.float32) score_mask = K.cast(flatten_scores > 0, np.float32) mask = class_mask * score_mask # compute class NMS nms_inds = tf.image.non_max_suppression( flatten_boxes, flatten_scores * mask, max_output_size=self.max_boxes, iou_threshold=self.nms_threshold, score_threshold=0.) inds.append(nms_inds) # combine winning box indices of all classes selected_indices = K.concatenate(inds, axis=-1) # gather corresponding boxes, scores, class indices selected_boxes = K.gather(flatten_boxes, selected_indices) selected_scores = K.gather(flatten_scores, selected_indices) selected_classes = K.gather(flatten_classes, selected_indices) return process_outs(selected_boxes, selected_scores, K.cast(selected_classes, np.float32))
def attention(self, pre_q, pre_v, pre_k, out_seq_len: int, d_input: int, lengths=None, training=None): """ Calculates the output of the attention once the affine transformations of the inputs are done. Here's the shapes of the arguments: :param pre_q: (batch_size, q_seq_len, num_heads, d_model // num_heads) :param pre_v: (batch_size, v_seq_len, num_heads, d_model // num_heads) :param pre_k: (batch_size, k_seq_len, num_heads, d_model // num_heads) :param out_seq_len: the length of the output sequence :param d_model: dimensionality of the model (by the paper) :param training: Passed by Keras. Should not be defined manually. Optional scalar tensor indicating if we're in training or inference phase. """ # shaping Q and V into (batch_size, num_heads, seq_len, d_model//heads) q = K.permute_dimensions(pre_q, [0, 2, 1, 3]) v = K.permute_dimensions(pre_v, [0, 2, 1, 3]) if self.compression_window_size is None: k_transposed = K.permute_dimensions(pre_k, [0, 2, 3, 1]) else: # Memory-compressed attention described in paper # "Generating Wikipedia by Summarizing Long Sequences" # (https://arxiv.org/pdf/1801.10198.pdf) # It compresses keys and values using 1D-convolution which reduces # the size of Q * K_transposed from roughly seq_len^2 # to convoluted_seq_len^2. If we use strided convolution with # window size = 3 and stride = 3, memory requirements of such # memory-compressed attention will be 9 times smaller than # that of the original version. if self.use_masking: raise NotImplementedError( "Masked memory-compressed attention has not " "been implemented yet") k = K.permute_dimensions(pre_k, [0, 2, 1, 3]) k, v = [ K.reshape( # Step 3: Return the result to its original dimensions # (batch_size, num_heads, seq_len, d_model//heads) K.bias_add( # Step 3: ... and add bias K.conv1d( # Step 2: we "compress" K and V using strided conv K.reshape( # Step 1: we reshape K and V to # (batch + num_heads, seq_len, d_model//heads) item, (-1, K.int_shape(item)[-2], self.d_model // self.num_heads)), kernel, strides=self.compression_window_size, padding='valid', data_format='channels_last'), bias, data_format='channels_last'), # new shape K.concatenate([ K.shape(item)[:2], #[-1, d_model // self.num_heads]])) [ K.int_shape(item)[2] // self.compression_window_size, self.d_model // self.num_heads ] ])) for item, kernel, bias in ((k, self.k_conv_kernel, self.k_conv_bias), (v, self.v_conv_kernel, self.v_conv_bias)) ] k_transposed = K.permute_dimensions(k, [0, 1, 3, 2]) # shaping K into (batch_size, num_heads, d_model//heads, seq_len) # for further matrix multiplication sqrt_d = K.constant(np.sqrt(self.d_model // self.num_heads), dtype=K.floatx()) q_shape = K.int_shape(q) k_t_shape = K.int_shape(k_transposed) v_shape = K.int_shape(v) # before performing batch_dot all tensors are being converted to 3D # shape (batch_size * num_heads, rows, cols) to make sure batch_dot # performs identically on all backends attention_heads = K.reshape( K.batch_dot( self.apply_dropout_if_needed(K.softmax( self.mask_length_if_provided(self.mask_local_if_needed( self.mask_attention_if_needed( K.batch_dot( K.reshape(q, (-1, ) + q_shape[-2:]), K.reshape(k_transposed, (-1, ) + k_t_shape[-2:])) / sqrt_d)), lengths=lengths)), training=training), K.reshape(v, (-1, ) + v_shape[-2:])), (-1, self.num_heads, q_shape[-2], v_shape[-1])) attention_heads_merged = K.reshape( K.permute_dimensions(attention_heads, [0, 2, 1, 3]), (-1, self.d_model)) attention_out = K.reshape( K.dot(attention_heads_merged, self.output_weights), (-1, out_seq_len, d_input)) return attention_out
def one_hot(self, seq, num_classes): import theano.tensor as T return K.equal(K.reshape(seq, (-1, 1)), T.arange(num_classes))
def build_model(self): sentence = Concatenate()([ self.sen_embedding, # self.sen_entity_type_embedding, self.position_t_embedding, self.position_a_embedding ]) sentence = Bidirectional( GRU(300, activation="relu", return_sequences=True, recurrent_dropout=0.3, dropout=0.3))(sentence) average_layer = Lambda(average, output_shape=no_change) position_mt = average_layer(self.position_mt) position_ma = average_layer(self.position_ma) trigger = Dot(axes=[1, 1])([sentence, position_mt]) entity = Dot(axes=[1, 1])([sentence, position_ma]) triggers = Lambda(liter, output_shape=liter_output_shape, arguments={'length': self.max_len})(trigger) # (?, 125, 300) entities = Lambda(liter, output_shape=liter_output_shape, arguments={'length': self.max_len})(entity) # (?, 125, 300) # ----------------- trigger attention ------------------------ x1 = Concatenate()([triggers, entities, sentence]) # (?, 125, 900) x1 = Dense(300, activation='tanh')(x1) # (?, 82, 600) x1 = Dense(1)(x1) # (?, 125, 1) x1 = Lambda(reduce_dimension, output_shape=reduce_dimension_output_shape, arguments={'length': self.max_len}, mask=self.sentence_embedding_layer.get_output_mask_at(0), name='te_attention')(x1) # (?, 125) x1 = Lambda(attention, output_shape=attention_output_shape, arguments={'dim': 600})([x1, sentence]) # (?, 600) # ----------------------------------------------------------- x_layer = Lambda( lambda x: K.reshape(x, [-1, self.TRIGGER_TYPE_VEC_DIM]), output_shape=output_shape) trigger_type = x_layer(self.trigger_type_embedding) entity_type = x_layer(self.entity_type_embedding) tt = Lambda(liter, output_shape=liter_output_shape, arguments={'length': self.max_len})(trigger_type) # (?, 125, 50) et = Lambda(liter, output_shape=liter_output_shape, arguments={'length': self.max_len})(entity_type) # (?, 125, 50) # ----------------- argument attention ------------------------ x2 = Concatenate()([tt, sentence]) # (?, 125, 350) # x2 = Dense(300, activation='tanh')(x2) # (?, 82, 600) x2 = Dense(1)(x2) # (?, 125, 1) x2 = Lambda(reduce_dimension, output_shape=reduce_dimension_output_shape, arguments={'length': self.max_len}, mask=self.sentence_embedding_layer.get_output_mask_at(0), name='tt_attention')(x2) # (?, 125) x2 = Lambda(attention, output_shape=attention_output_shape, arguments={'dim': 600})([x2, sentence]) # (?, 600) # ----------------------------------------------------------- # ----------------- argument attention ------------------------ x3 = Concatenate()([et, sentence]) # (?, 125, 350) # x3 = Dense(300, activation='tanh')(x3) # (?, 82, 600) x3 = Dense(1)(x3) # (?, 125, 1) x3 = Lambda(reduce_dimension, output_shape=reduce_dimension_output_shape, arguments={'length': self.max_len}, mask=self.sentence_embedding_layer.get_output_mask_at(0), name='et_attention')(x3) # (?, 125) x3 = Lambda(attention, output_shape=attention_output_shape, arguments={'dim': 600})([x3, sentence]) # (?, 600) # ----------------------------------------------------------- x = Concatenate()([x1, x2, x3]) x = Dropout(rate=0.5)(x) output = Dense(9, activation='softmax')(x) return output
def reshape_one(c): return K.reshape(c, (tf.shape(c)[0] * padsize, char_padsize, CHAR_EMBEDDING_DIM))
def call(self, x, mask=None): assert (len(x) == 2) img = x[0] rois = x[1] input_shape = K.shape(img) outputs = [] for roi_idx in range(self.num_rois): x = rois[0, roi_idx, 0] y = rois[0, roi_idx, 1] w = rois[0, roi_idx, 2] h = rois[0, roi_idx, 3] row_length = w / float(self.pool_size) col_length = h / float(self.pool_size) num_pool_regions = self.pool_size #NOTE: the RoiPooling implementation differs between theano and tensorflow due to the lack of a resize op # in theano. The theano implementation is much less efficient and leads to long compile times if self.dim_ordering == 'channels_first': for jy in range(num_pool_regions): for ix in range(num_pool_regions): x1 = x + ix * row_length x2 = x1 + row_length y1 = y + jy * col_length y2 = y1 + col_length x1 = K.cast(x1, 'int32') x2 = K.cast(x2, 'int32') y1 = K.cast(y1, 'int32') y2 = K.cast(y2, 'int32') x2 = x1 + K.maximum(1, x2 - x1) y2 = y1 + K.maximum(1, y2 - y1) new_shape = [ input_shape[0], input_shape[1], y2 - y1, x2 - x1 ] x_crop = img[:, :, y1:y2, x1:x2] xm = K.reshape(x_crop, new_shape) pooled_val = K.max(xm, axis=(2, 3)) outputs.append(pooled_val) elif self.dim_ordering == 'channels_last': x = K.cast(x, 'int32') y = K.cast(y, 'int32') w = K.cast(w, 'int32') h = K.cast(h, 'int32') rs = tf.image.resize_images(img[:, y:y + h, x:x + w, :], (self.pool_size, self.pool_size)) outputs.append(rs) final_output = K.concatenate(outputs, axis=0) final_output = K.reshape(final_output, (1, self.num_rois, self.pool_size, self.pool_size, self.nb_channels)) if self.dim_ordering == 'channels_first': final_output = K.permute_dimensions(final_output, (0, 1, 4, 2, 3)) else: final_output = K.permute_dimensions(final_output, (0, 1, 2, 3, 4)) return final_output
def merge_heads(x): new_x = K.permute_dimensions(x, [0, 2, 1, 3]) x_shape = shape_list(new_x) new_x_shape = x_shape[:-2] + [np.prod(x_shape[-2:])] return K.reshape(new_x, new_x_shape)
def split_heads(x, n: int, k: bool = False): # B, L, C x_shape = shape_list(x) m = x_shape[-1] new_x_shape = x_shape[:-1] + [n, m // n] new_x = K.reshape(x, new_x_shape) return K.permute_dimensions(new_x, [0, 2, 3, 1] if k else [0, 2, 1, 3])
def call(self, x, mask=None): assert (len(x) == 2) img = x[0] rois = x[1] input_shape = K.shape(img) outputs = [] for roi_idx in range(self.num_rois): x = rois[0, roi_idx, 0] y = rois[0, roi_idx, 1] w = rois[0, roi_idx, 2] h = rois[0, roi_idx, 3] row_length = [w / i for i in self.pool_list] col_length = [h / i for i in self.pool_list] if self.dim_ordering == 'th': for pool_num, num_pool_regions in enumerate(self.pool_list): for ix in range(num_pool_regions): for jy in range(num_pool_regions): x1 = x + ix * col_length[pool_num] x2 = x1 + col_length[pool_num] y1 = y + jy * row_length[pool_num] y2 = y1 + row_length[pool_num] x1 = K.cast(K.round(x1), 'int32') x2 = K.cast(K.round(x2), 'int32') y1 = K.cast(K.round(y1), 'int32') y2 = K.cast(K.round(y2), 'int32') new_shape = [ input_shape[0], input_shape[1], y2 - y1, x2 - x1 ] x_crop = img[:, :, y1:y2, x1:x2] xm = K.reshape(x_crop, new_shape) pooled_val = K.max(xm, axis=(2, 3)) outputs.append(pooled_val) elif self.dim_ordering == 'tf': for pool_num, num_pool_regions in enumerate(self.pool_list): for ix in range(num_pool_regions): for jy in range(num_pool_regions): x1 = x + ix * col_length[pool_num] x2 = x1 + col_length[pool_num] y1 = y + jy * row_length[pool_num] y2 = y1 + row_length[pool_num] x1 = K.cast(K.round(x1), 'int32') x2 = K.cast(K.round(x2), 'int32') y1 = K.cast(K.round(y1), 'int32') y2 = K.cast(K.round(y2), 'int32') new_shape = [ input_shape[0], y2 - y1, x2 - x1, input_shape[3] ] x_crop = img[:, y1:y2, x1:x2, :] xm = K.reshape(x_crop, new_shape) pooled_val = K.max(xm, axis=(1, 2)) outputs.append(pooled_val) final_output = K.concatenate(outputs, axis=0) final_output = K.reshape(final_output, (1, self.num_rois, self.nb_channels * self.num_outputs_per_channel)) return final_output
def dense2conv(args): x_hat = args return K.reshape(x_hat, (-1, hidden_h, hidden_w, hidden_c))
def tensor_product(self, x): a = x[0] b = x[1] b = K.reshape(b, (-1, self.experts, self.target)) y = K.batch_dot(b, a, axes=1) return y
def call(self, x, mask=None): response = K.reshape(x[:, self.axis], (-1, 1)) return K.concatenate([1 - response, response], axis=self.axis)
def reshape_two(c): return K.reshape(c, (tf.shape(c)[0] / padsize, padsize, CHAR_EMBEDDING_DIM))
def attention(x, dim): res = K.batch_dot(x[0], x[1], axes=[1, 1]) return K.reshape(res, [-1, dim])
def build_model(char_size=27, dim=64, iterations=4, training=True, pca=False): """Build the model.""" # Inputs # Context: (rules, preds, chars,) context = L.Input(shape=( None, None, None, ), name='context', dtype='int32') query = L.Input(shape=(None, ), name='query', dtype='int32') # Flatten preds to embed entire rules var_flat = L.Lambda(lambda x: K.reshape( x, K.stack([K.shape(x)[0], -1, K.prod(K.shape(x)[2:])])), name='var_flat') flat_ctx = var_flat(context) # (?, rules, preds*chars) print('Found %s texts.' % len(CONTEXT_TEXTS)) word_index = WORD_INDEX print('Found %s unique tokens.' % len(word_index)) embeddings_index = {} GLOVE_DIR = os.path.abspath('.') + "/data/glove" f = open(os.path.join(GLOVE_DIR, 'glove.6B.100d.txt'), 'r', encoding='utf-8') for line in f: values = line.split() word = values[0] coefs = np.asarray(values[1:], dtype='float32') embeddings_index[word] = coefs f.close() print('Found %s word vectors.' % len(embeddings_index)) EMBEDDING_DIM = 100 embedding_matrix = np.zeros((len(word_index) + 1, EMBEDDING_DIM)) for word, i in word_index.items(): embedding_vector = embeddings_index.get(word) if embedding_vector is not None: # words not found in embedding index will be all-zeros. embedding_matrix[i] = embedding_vector # Onehot embedding # Contextual embeddeding of symbols # onehot_weights = np.eye(char_size) # onehot_weights[0, 0] = 0 # Clear zero index # onehot = L.Embedding(char_size, char_size, # trainable=False, # weights=[onehot_weights], # name='onehot') embedding_layer = L.Embedding(len(word_index) + 1, EMBEDDING_DIM, weights=[embedding_matrix], trainable=False) embedded_ctx = embedding_layer( flat_ctx) # (?, rules, preds*chars*char_size) embedded_q = embedding_layer(query) # (?, chars, char_size) embed_pred = ZeroGRU(dim, go_backwards=True, name='embed_pred') embedded_predq = embed_pred(embedded_q) # (?, dim) # Embed every rule embedded_rules = NestedTimeDist(embed_pred, name='rule_embed')(embedded_ctx) # (?, rules, dim) # Reused layers over iterations repeat_toctx = L.RepeatVector(K.shape(embedded_ctx)[1], name='repeat_to_ctx') diff_sq = L.Lambda(lambda xy: K.square(xy[0] - xy[1]), output_shape=(None, dim), name='diff_sq') concat = L.Lambda(lambda xs: K.concatenate(xs, axis=2), output_shape=(None, dim * 5), name='concat') att_dense1 = L.TimeDistributed(L.Dense(dim, activation='tanh', name='att_dense1'), name='d_att_dense1') att_dense2 = L.TimeDistributed(L.Dense(1, activation='sigmoid', name='att_dense2'), name='d_att_dense2') squeeze2 = L.Lambda(lambda x: K.squeeze(x, 2), name='sequeeze2') # expand = L.Lambda(lambda x: K.expand_dims(x, axis=2), name='expand') rule_mask = L.Lambda(lambda x: K.cast( K.any(K.not_equal(x, 0), axis=-1, keepdims=True), 'float32'), name='rule_mask')(embedded_rules) episodic_mem = EpisodicMemory(dim, name='episodic_mem') # Reasoning iterations state = embedded_predq repeated_q = repeat_toctx(embedded_predq) outs = list() for _ in range(iterations): # Compute attention between rule and query state ctx_state = repeat_toctx(state) # (?, rules, dim) s_s_c = diff_sq([ctx_state, embedded_rules]) s_m_c = L.multiply([embedded_rules, state]) # (?, rules, dim) sim_vec = concat([s_s_c, s_m_c, ctx_state, embedded_rules, repeated_q]) sim_vec = att_dense1(sim_vec) # (?, rules, dim) sim_vec = att_dense2(sim_vec) # (?, rules, 1) # sim_vec = squeeze2(sim_vec) # (?, rules) # sim_vec = L.Softmax(axis=1)(sim_vec) # sim_vec = expand(sim_vec) # (?, rules, 1) sim_vec = L.multiply([sim_vec, rule_mask]) state = episodic_mem([state, sim_vec, embedded_rules]) sim_vec = squeeze2(sim_vec) # (?, rules) outs.append(sim_vec) # Predication out = L.Dense(1, activation='sigmoid', name='out')(state) if pca: model = Model([context, query], [embedded_rules]) elif training: model = Model([context, query], [out]) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc']) else: model = Model([context, query], outs + [out]) return model
def getModel( srcVocabTransformer, refVocabTransformer, embedding_size, gru_size, src_fastText, ref_fastText, train_embeddings, attention, summary_attention, use_estimator, model_inputs=None, verbose=False, ): src_vocab_size = srcVocabTransformer.vocab_size() ref_vocab_size = refVocabTransformer.vocab_size() src_embedding_kwargs = {} ref_embedding_kwargs = {} if src_fastText: logger.info("Loading fastText embeddings for source language") src_embedding_kwargs['weights'] = [ get_fastText_embeddings(src_fastText, srcVocabTransformer, embedding_size) ] if ref_fastText: logger.info("Loading fastText embeddings for target language") ref_embedding_kwargs['weights'] = [ get_fastText_embeddings(ref_fastText, refVocabTransformer, embedding_size) ] if verbose: logger.info("Creating model") if model_inputs: src_input, ref_input = model_inputs else: src_input = Input(shape=(None, )) ref_input = Input(shape=(None, )) src_embedding = Embedding(output_dim=embedding_size, input_dim=src_vocab_size, mask_zero=True, name="src_embedding", trainable=train_embeddings, **src_embedding_kwargs)(src_input) ref_embedding = Embedding(output_dim=embedding_size, input_dim=ref_vocab_size, mask_zero=True, name="ref_embedding", trainable=train_embeddings, **ref_embedding_kwargs)(ref_input) encoder = Bidirectional(GRU(gru_size, return_sequences=True, return_state=True), name="encoder")(src_embedding) return_sequence = (use_estimator or summary_attention) if attention: attention_states = TimeDistributedSequential( [Dense(gru_size, name="attention_state")], encoder[0]) with CustomObjectScope({'AttentionGRUCell': AttentionGRUCell}): decoder = Bidirectional(RNN(AttentionGRUCell(gru_size), return_sequences=return_sequence, return_state=return_sequence), name="decoder")(ref_embedding, constants=attention_states, initial_state=encoder[1:]) else: decoder = Bidirectional(GRU(gru_size, return_sequences=return_sequence, return_state=return_sequence), name="decoder")(ref_embedding, initial_state=encoder[1:]) if use_estimator: decoder = Bidirectional(GRU(gru_size, return_sequences=summary_attention, return_state=summary_attention), name="estimator")(decoder[0]) if summary_attention: attention_weights = TimeDistributedSequential([ Dense(gru_size, activation="tanh"), Dense(1, name="attention_weights"), ], decoder[0]) # attention_weights = Reshape((-1,))(attention_weights) attention_weights = Lambda( lambda x: K.reshape(x, ( x.shape[0], -1, )), output_shape=lambda input_shape: input_shape[:-1], mask=lambda inputs, mask: mask, name="reshape")(attention_weights) attention_weights = Activation( "softmax", name="attention_softmax")(attention_weights) quality_summary = dot([attention_weights, decoder[0]], axes=(1, 1), name="summary") else: quality_summary = decoder quality = Dense(1, name="quality")(quality_summary) model = Model(inputs=[src_input, ref_input], outputs=[quality]) if verbose: _printModelSummary(logger, model, "model") return model
def yolo_loss(args, anchors, num_anchors_per_layer, num_classes, ignore_thresh=.5, print_loss=True): """ Return yolo_loss tensor Args: args (list): args[:num_output_layers] the output of yolo_body or tiny_yolo_body args[num_output_layers:] raw_y_true anchors (np.array): shape=(N, 2), wh num_anchors_per_layer (int): num_classes (int): ignore_thresh (float): the iou threshold whether to ignore object confidence loss print_loss: Returns: loss: tensor, shape=(1,) """ num_output_layers = len(anchors) // num_anchors_per_layer yolo_outputs = args[:num_output_layers] raw_y_trues = args[num_output_layers:] anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(raw_y_trues[0])) grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(raw_y_trues[0])) for l in range(num_output_layers) ] loss = 0 batch_size = K.shape(yolo_outputs[0])[0] batch_size_f = K.cast(batch_size, K.dtype(yolo_outputs[0])) for l in range(num_output_layers): grid_shape = grid_shapes[l] yolo_output = yolo_outputs[l] raw_y_pred = K.reshape(yolo_output, [ -1, grid_shape[0], grid_shape[1], num_anchors_per_layer, num_classes + 9 ]) raw_y_true = raw_y_trues[l] anchor_mask = anchor_masks[l] # (batch_size, grid_height, grid_width, num_anchors_this_layer, 1) object_mask = raw_y_true[..., 4:5] # (batch_size, grid_height, grid_width, num_anchors_this_layer, num_classes) y_true_class_probs = raw_y_true[..., 5:] grid, y_pred_box, y_pred_delta_xy, y_pred_log_wh, y_pred_sigma, y_pred_confidence, y_pred_class_probs = \ y_pred_graph(raw_y_pred, anchors[anchor_mask], input_shape) y_true_delta_xy = raw_y_true[..., :2] * grid_shapes[l][::-1] - grid y_true_log_wh = K.log(raw_y_true[..., 2:4] * input_shape[::-1] / anchors[anchor_mask]) y_true_log_wh = K.switch(object_mask, y_true_log_wh, K.zeros_like(y_true_log_wh)) box_loss_scale = 2 - raw_y_true[..., 2:3] * raw_y_true[..., 3:4] ignore_mask = tf.TensorArray(K.dtype(raw_y_trues[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask_): # (num_gt_boxes, 4) gt_box = tf.boolean_mask(raw_y_true[b, ..., 0:4], object_mask_bool[b, ..., 0]) # (grid_height, grid_width, num_anchors_this_layer, num_gt_boxes) iou = box_iou_graph(y_pred_box[b], gt_box) # (grid_height, grid_width, num_anchors_this_layer) best_iou = K.max(iou, axis=-1) ignore_mask_ = ignore_mask_.write( b, K.cast(best_iou < ignore_thresh, K.dtype(gt_box))) return b + 1, ignore_mask_ _, ignore_mask = tf.while_loop(lambda b, *largs: b < batch_size, loop_body, [0, ignore_mask]) # (batch_size, grid_height, grid_width, num_anchors_this_layer) ignore_mask = ignore_mask.stack() # (batch_size, grid_height, grid_width, num_anchors_this_layer, 1) ignore_mask = K.expand_dims(ignore_mask, -1) y_true = tf.concat([y_true_delta_xy, y_true_log_wh], axis=-1) y_pred_mu = tf.concat([y_pred_delta_xy, y_pred_log_wh], axis=-1) x_loss = nll_loss(y_true[..., 0:1], y_pred_mu[..., 0:1], y_pred_sigma[..., 0:1]) x_loss = object_mask * box_loss_scale * x_loss y_loss = nll_loss(y_true[..., 1:2], y_pred_mu[..., 1:2], y_pred_sigma[..., 1:2]) y_loss = object_mask * box_loss_scale * y_loss w_loss = nll_loss(y_true[..., 2:3], y_pred_mu[..., 2:3], y_pred_sigma[..., 2:3]) w_loss = object_mask * box_loss_scale * w_loss h_loss = nll_loss(y_true[..., 3:4], y_pred_mu[..., 3:4], y_pred_sigma[..., 3:4]) h_loss = object_mask * box_loss_scale * h_loss confidence_loss = object_mask * K.binary_crossentropy(object_mask, y_pred_confidence) + \ (1 - object_mask) * K.binary_crossentropy(object_mask, y_pred_confidence) * ignore_mask class_loss = object_mask * K.binary_crossentropy( y_true_class_probs, y_pred_class_probs) x_loss = K.sum(x_loss) / batch_size_f y_loss = K.sum(y_loss) / batch_size_f w_loss = K.sum(w_loss) / batch_size_f h_loss = K.sum(h_loss) / batch_size_f confidence_loss = K.sum(confidence_loss) / batch_size_f class_loss = K.sum(class_loss) / batch_size_f loss += x_loss + y_loss + w_loss + h_loss + confidence_loss + class_loss if print_loss: loss = tf.Print(loss, [ loss, x_loss, y_loss, w_loss, h_loss, confidence_loss, class_loss, K.sum(ignore_mask) ], message='\nloss: ') return loss
def call(self, x): return x + K.reshape(self.threshold, (1, 1, 1, self.filters))
def call(self, x, mask=None): # 之前我们通过各种计算,从model_rpn的输出中得到了比较靠谱的rois和对应的bbox了。 # 通过设置num_rois,从这些rois中,提取num_rois数量的样本用于模型训练,把这些rois输入roipooling层,进行训练。 # ROI pooling层接收的是由2个张量组成的list,而输出是个5D张量,所有需要配置compute_output_shape(self, input_shape) , # 其中input_shape为2维张量。 # 简单来说,roipooling接受了一个[图像特征图信息,RPN中选取的框(1:1的正负样本)]的list, # 输出了num_rois个7×7×channel的特征层 assert(len(x) == 2) img = x[0] rois = x[1] input_shape = K.shape(img) outputs = [] for roi_idx in range(self.num_rois): x = rois[0, roi_idx, 0] y = rois[0, roi_idx, 1] w = rois[0, roi_idx, 2] h = rois[0, roi_idx, 3] row_length = w / float(self.pool_size) col_length = h / float(self.pool_size) num_pool_regions = self.pool_size #NOTE: the RoiPooling implementation differs between theano and tensorflow due to the lack of a resize op # in theano. The theano implementation is much less efficient and leads to long compile times if self.dim_ordering == 'th': for jy in range(num_pool_regions): for ix in range(num_pool_regions): x1 = x + ix * row_length x2 = x1 + row_length y1 = y + jy * col_length y2 = y1 + col_length x1 = K.cast(x1, 'int32') x2 = K.cast(x2, 'int32') y1 = K.cast(y1, 'int32') y2 = K.cast(y2, 'int32') x2 = x1 + K.maximum(1,x2-x1) y2 = y1 + K.maximum(1,y2-y1) new_shape = [input_shape[0], input_shape[1], y2 - y1, x2 - x1] x_crop = img[:, :, y1:y2, x1:x2] xm = K.reshape(x_crop, new_shape) pooled_val = K.max(xm, axis=(2, 3)) outputs.append(pooled_val) elif self.dim_ordering == 'tf': x = K.cast(x, 'int32') y = K.cast(y, 'int32') w = K.cast(w, 'int32') h = K.cast(h, 'int32') rs = tf.image.resize_images(img[:, y:y+h, x:x+w, :], (self.pool_size, self.pool_size)) outputs.append(rs) final_output = K.concatenate(outputs, axis=0) final_output = K.reshape(final_output, (1, self.num_rois, self.pool_size, self.pool_size, self.nb_channels)) if self.dim_ordering == 'th': final_output = K.permute_dimensions(final_output, (0, 1, 4, 2, 3)) else: final_output = K.permute_dimensions(final_output, (0, 1, 2, 3, 4)) return final_output
def call(self, inputs, initial_state=None, initial_readout=None, ground_truth=None, mask=None, training=None): # input shape: `(samples, time (padded with zeros), input_dim)` # note that the .build() method of subclasses MUST define # self.input_spec and self.state_spec with complete input shapes. if type(mask) is list: mask = mask[0] if self.model is None: raise Exception('Empty RecurrentModel.') num_req_states = self.num_states if self.readout: num_actual_states = num_req_states - 1 else: num_actual_states = num_req_states if type(inputs) is list: inputs_list = inputs[:] inputs = inputs_list.pop(0) initial_states = inputs_list[:num_actual_states] if len(initial_states) > 0: if self._is_optional_input_placeholder(initial_states[0]): initial_states = self.get_initial_state(inputs) inputs_list = inputs_list[num_actual_states:] if self.readout: initial_readout = inputs_list.pop(0) if self.teacher_force: ground_truth = inputs_list.pop() else: if initial_state is not None: if not isinstance(initial_state, (list, tuple)): initial_states = [initial_state] else: initial_states = list(initial_state) if self._is_optional_input_placeholder(initial_states[0]): initial_states = self.get_initial_state(inputs) elif self.stateful: initial_states = self.states else: initial_states = self.get_initial_state(inputs) if self.readout: if initial_readout is None or self._is_optional_input_placeholder( initial_readout): output_shape = K.int_shape(_to_list((self.model.output))[0]) output_ndim = len(output_shape) input_ndim = K.ndim(inputs) initial_readout = K.zeros_like(inputs) slices = [slice(None)] + [0] * (input_ndim - 1) initial_readout = initial_readout[slices] # (batch_size,) initial_readout = K.reshape(initial_readout, (-1, ) + (1, ) * (output_ndim - 1)) initial_readout = K.tile(initial_readout, (1, ) + tuple(output_shape[1:])) initial_states.append(initial_readout) if self.teacher_force: if ground_truth is None or self._is_optional_input_placeholder( ground_truth): raise Exception( 'ground_truth must be provided for RecurrentModel with teacher_force=True.' ) # counter = K.zeros((1,), dtype='int32') counter = K.zeros((1, )) counter = K.cast(counter, 'int32') initial_states.insert(-1, counter) initial_states[-2] initial_states.insert(-1, ground_truth) num_req_states += 2 if len(initial_states) != num_req_states: raise ValueError('Layer requires ' + str(num_req_states) + ' states but was passed ' + str(len(initial_states)) + ' initial states.') input_shape = K.int_shape(inputs) if self.unroll and input_shape[1] is None: raise ValueError('Cannot unroll a RNN if the ' 'time dimension is undefined. \n' '- If using a Sequential model, ' 'specify the time dimension by passing ' 'an `input_shape` or `batch_input_shape` ' 'argument to your first layer. If your ' 'first layer is an Embedding, you can ' 'also use the `input_length` argument.\n' '- If using the functional API, specify ' 'the time dimension by passing a `shape` ' 'or `batch_shape` argument to your Input layer.') preprocessed_input = self.preprocess_input(inputs, training=None) constants = self.get_constants(inputs, training=None) if self.decode: initial_states.insert(0, inputs) preprocessed_input = K.zeros((1, self.output_length, 1)) input_length = self.output_length else: input_length = input_shape[1] if self.uses_learning_phase: with learning_phase_scope(0): last_output_test, outputs_test, states_test, updates = rnn( self.step, preprocessed_input, initial_states, go_backwards=self.go_backwards, mask=mask, constants=constants, unroll=self.unroll, input_length=input_length) with learning_phase_scope(1): last_output_train, outputs_train, states_train, updates = rnn( self.step, preprocessed_input, initial_states, go_backwards=self.go_backwards, mask=mask, constants=constants, unroll=self.unroll, input_length=input_length) last_output = K.in_train_phase(last_output_train, last_output_test, training=training) outputs = K.in_train_phase(outputs_train, outputs_test, training=training) states = [] for state_train, state_test in zip(states_train, states_test): states.append( K.in_train_phase(state_train, state_test, training=training)) else: last_output, outputs, states, updates = rnn( self.step, preprocessed_input, initial_states, go_backwards=self.go_backwards, mask=mask, constants=constants, unroll=self.unroll, input_length=input_length) states = list(states) if self.decode: states.pop(0) if self.readout: states.pop() if self.teacher_force: states.pop() states.pop() if len(updates) > 0: self.add_update(updates) if self.stateful: updates = [] for i in range(len(states)): updates.append((self.states[i], states[i])) self.add_update(updates, inputs) # Properly set learning phase if 0 < self.dropout + self.recurrent_dropout: last_output._uses_learning_phase = True outputs._uses_learning_phase = True if self.return_sequences: y = outputs else: y = last_output if self.return_states: return [y] + states else: return y
def call(self, x, mask=None): # print("call is called") input_shape = K.shape(x) # print("Input Shape",input_shape.shape) if self.dim_ordering == 'th': num_rows = input_shape[2] num_cols = input_shape[3] elif self.dim_ordering == 'tf': num_rows = input_shape[1] num_cols = input_shape[2] print(num_rows, num_cols) row_length = [K.cast(num_rows, 'float32') / i for i in self.pool_list] col_length = [K.cast(num_cols, 'float32') / i for i in self.pool_list] # print("row_length", row_length) # print("col_length", col_length) outputs = [] if self.dim_ordering == 'th': for pool_num, num_pool_regions in enumerate(self.pool_list): # print("num_pool_regions:",num_pool_regions) # print("pool_num: ",pool_num) for jy in range(num_pool_regions): print("jy: ",jy) for ix in range(num_pool_regions): print("ix: ",ix) x1 = ix * col_length[pool_num] x2 = ix * col_length[pool_num] + col_length[pool_num] y1 = jy * row_length[pool_num] y2 = jy * row_length[pool_num] + row_length[pool_num] x1 = K.cast(K.round(x1), 'int32') x2 = K.cast(K.round(x2), 'int32') y1 = K.cast(K.round(y1), 'int32') y2 = K.cast(K.round(y2), 'int32') new_shape = [input_shape[0], input_shape[1], y2 - y1, x2 - x1] x_crop = x[:, :, y1:y2, x1:x2] xm = K.reshape(x_crop, new_shape) pooled_val = K.max(xm, axis=(2, 3)) outputs.append(pooled_val) elif self.dim_ordering == 'tf': for pool_num, num_pool_regions in enumerate(self.pool_list): # print("pool_num", pool_num) # print("num_pool_regions", num_pool_regions) for jy in range(num_pool_regions): for ix in range(num_pool_regions): x1 = ix * col_length[pool_num] x2 = ix * col_length[pool_num] + col_length[pool_num] y1 = jy * row_length[pool_num] y2 = jy * row_length[pool_num] + row_length[pool_num] x1 = K.cast(K.round(x1), 'int32') x2 = K.cast(K.round(x2), 'int32') y1 = K.cast(K.round(y1), 'int32') y2 = K.cast(K.round(y2), 'int32') new_shape = [input_shape[0], y2 - y1, x2 - x1, input_shape[3]] x_crop = x[:, y1:y2, x1:x2, :] xm = K.reshape(x_crop, new_shape) # print("xm.shape",xm.shape) pooled_val = K.max(xm, axis=(1, 2)) # print("pooled_val",pooled_val) outputs.append(pooled_val) if self.dim_ordering == 'th': outputs = K.concatenate(outputs) elif self.dim_ordering == 'tf': #outputs = K.concatenate(outputs,axis = 1) outputs = K.concatenate(outputs) #outputs = K.reshape(outputs,(len(self.pool_list),self.num_outputs_per_channel,input_shape[0],input_shape[1])) #outputs = K.permute_dimensions(outputs,(3,1,0,2)) #outputs = K.reshape(outputs,(input_shape[0], self.num_outputs_per_channel * self.nb_channels)) # print("outputs.shape",outputs.shape) return outputs
def build_heatmap(in_tensor, config, names = None): num_detections = config.DETECTION_MAX_INSTANCES img_h, img_w = config.IMAGE_SHAPE[:2] batch_size = config.BATCH_SIZE num_classes = config.NUM_CLASSES print('\n ') print(' > NEW build_heatmap() for ', names ) print(' orignal in_tensor shape : ', in_tensor.shape) # rois per image is determined by size of input tensor # detection mode: config.TRAIN_ROIS_PER_IMAGE # ground_truth : config.DETECTION_MAX_INSTANCES rois_per_image = (in_tensor.shape)[2] # strt_cls = 0 if rois_per_image == 32 else 1 print(' num of bboxes per class is : ', rois_per_image ) #----------------------------------------------------------------------------- ## Stack non_zero bboxes from in_tensor into pt2_dense #----------------------------------------------------------------------------- # pt2_ind shape is [?, 3]. # pt2_ind[0] corresponds to image_index # pt2_ind[1] corresponds to class_index # pt2_ind[2] corresponds to roi row_index # pt2_dense shape is [?, 6] # pt2_dense[0] is image index # pt2_dense[1:4] roi cooridnaytes # pt2_dense[5] is class id #----------------------------------------------------------------------------- pt2_sum = tf.reduce_sum(tf.abs(in_tensor[:,:,:,:-2]), axis=-1) print(' pt2_sum shape ',pt2_sum.shape) # print(pt2_sum[0].eval()) pt2_ind = tf.where(pt2_sum > 0) ## replaced the two operations below with the one above - 15-05-2018 # pt2_mask = tf.greater(pt2_sum , 0) # pt2_ind = tf.where(pt2_mask) # print(' pt2_mask shape ', pt2_mask.get_shape()) # print(pt2_mask.eval()) # print(' pt2_ind shape ', pt2_ind.get_shape()) # print(pt2_ind.eval()) pt2_dense = tf.gather_nd( in_tensor, pt2_ind) print(' dense shape ',pt2_dense.get_shape()) #----------------------------------------------------------------------------- ## Build mesh-grid to hold pixel coordinates #----------------------------------------------------------------------------- X = tf.range(img_w, dtype=tf.int32) Y = tf.range(img_h, dtype=tf.int32) X, Y = tf.meshgrid(X, Y) # duplicate (repeat) X and Y into a batch_size x rois_per_image tensor print(' X/Y shapes :', X.get_shape(), Y.get_shape()) ones = tf.ones([tf.shape(pt2_dense)[0] , 1, 1], dtype = tf.int32) rep_X = ones * X rep_Y = ones * Y print(' Ones: ', ones.shape) print(' ones_exp * X', ones.shape, '*', X.shape, '= ',rep_X.shape) print(' ones_exp * Y', ones.shape, '*', Y.shape, '= ',rep_Y.shape) # # stack the X and Y grids bef_pos = tf.to_float(tf.stack([rep_X,rep_Y], axis = -1)) print(' before transpse ', bef_pos.get_shape()) pos_grid = tf.transpose(bef_pos,[1,2,0,3]) print(' after transpose ', pos_grid.get_shape()) #----------------------------------------------------------------------------- ## Build mean and convariance tensors for Multivariate Normal Distribution #----------------------------------------------------------------------------- width = pt2_dense[:,3] - pt2_dense[:,1] # x2 - x1 height = pt2_dense[:,2] - pt2_dense[:,0] cx = pt2_dense[:,1] + ( width / 2.0) cy = pt2_dense[:,0] + ( height / 2.0) means = tf.stack((cx,cy),axis = -1) covar = tf.stack((width * 0.5 , height * 0.5), axis = -1) covar = tf.sqrt(covar) tfd = tf.contrib.distributions mvn = tfd.MultivariateNormalDiag( loc = means, scale_diag = covar) prob_grid = mvn.prob(pos_grid) print(' Prob_grid shape before tanspose: ',prob_grid.get_shape()) prob_grid = tf.transpose(prob_grid,[2,0,1]) print(' Prob_grid shape after tanspose: ',prob_grid.get_shape()) print(' >> input to MVN.PROB: pos_grid (meshgrid) shape: ', pos_grid.get_shape()) print(' << output probabilities shape:' , prob_grid.get_shape()) #-------------------------------------------------------------------------------- ## IMPORTANT: kill distributions of NaN boxes (resulting from bboxes with height/width of zero ## which cause singular sigma cov matrices #-------------------------------------------------------------------------------- prob_grid = tf.where(tf.is_nan(prob_grid), tf.zeros_like(prob_grid), prob_grid) # scatter out the probability distributions based on class -------------------------- print('\n Scatter out the probability distributions based on class --------------') gauss_scatt = tf.scatter_nd(pt2_ind, prob_grid, [batch_size, num_classes, rois_per_image, img_w, img_h]) print(' pt2_ind shape : ', pt2_ind.shape) print(' prob_grid shape : ', prob_grid.shape) print(' gauss_scatt : ', gauss_scatt.shape) # batch_sz , num_classes, num_rois, image_h, image_w # heatmap: sum gauss_scattered based on class --------------------------------------- print('\n Reduce sum based on class ---------------------------------------------') gauss_sum = tf.reduce_sum(gauss_scatt, axis=2, name='pred_heatmap2') gauss_sum = tf.where(gauss_sum > 1e-12, gauss_sum, tf.zeros_like(gauss_sum)) print(' gaussian_sum shape : ', gauss_sum.get_shape(), 'Keras tensor ', KB.is_keras_tensor(gauss_sum) ) ##--------------------------------------------------------------------------------------------- ## heatmap L2 normalization ## Normalization using the `gauss_sum` (batchsize , num_classes, height, width) ## 17-05-2018 (New method, replace dthe previous method that usedthe transposed gauss sum ## 17-05-2018 Replaced with normalization across the CLASS axis ##--------------------------------------------------------------------------------------------- # print('\n L2 normalization ------------------------------------------------------') gauss_L2norm = KB.l2_normalize(gauss_sum, axis = +1) # normalize along the CLASS axis print(' gauss L2 norm : ', gauss_L2norm.shape ,' Keras tensor ', KB.is_keras_tensor(gauss_L2norm) ) print('\n normalization ------------------------------------------------------') gauss_norm = gauss_sum / tf.reduce_max(gauss_sum, axis=[-2,-1], keepdims = True) gauss_norm = tf.where(tf.is_nan(gauss_norm), tf.zeros_like(gauss_norm), gauss_norm) print(' gauss norm : ', gauss_norm.shape ,' Keras tensor ', KB.is_keras_tensor(gauss_norm) ) ##-------------------------------------------------------------------------------------------- ## generate score based on gaussian using bounding box masks ## NOTE: Score is generated on NORMALIZED gaussian distributions (GAUSS_NORM) ## If want to do this on NON-NORMALIZED, we need to apply it on GAUSS_SUM ##-------------------------------------------------------------------------------------------- # flatten guassian scattered and input_tensor, and pass on to build_bbox_score routine in_shape = tf.shape(in_tensor) in_tensor_flattened = tf.reshape(in_tensor, [-1, in_shape[-1]]) bboxes = tf.to_int32(tf.round(in_tensor_flattened[...,0:4])) print(' in_tensor ', in_tensor.shape) print(' in_tensorr_flattened is ', in_tensor_flattened.shape) print(' boxes shape ', bboxes.shape) print(' Rois per image : ', rois_per_image) #-------------------------------------------------------------------------------------------------------------------------- # duplicate GAUSS_NORM <num_roi> times to pass along with bboxes to map_fn function # Here we have a choice to calculate scores using the GAUSS_SUM (unnormalized) or GAUSS_NORM (normalized) # after looking at the scores and ratios for each option, I decided to go with the normalized # as the numbers are larger # # Examples> # Using GAUSS_SUM # [ 3.660313 3.513489 54.475536 52.747402 1. 0.999997 4.998889 2450. 0.00204 0.444867] # [ 7.135149 1.310972 50.020126 44.779854 1. 0.999991 4.981591 1892. 0.002633 0.574077] # [ 13.401865 0. 62.258957 46.636948 1. 0.999971 4.957398 2303. 0.002153 0.469335] # [ 0. 0. 66.42349 56.123024 1. 0.999908 4.999996 3696. 0.001353 0.294958] # [ 0. 0. 40.78952 60.404335 1. 0.999833 4.586552 2460. 0.001864 0.406513] # # Using GAUSS_NORM: # [ 3.660313 3.513489 54.475536 52.747402 1. 0.999997 1832.9218 2450. 0.748131 0.479411] # [ 7.135149 1.310972 50.020126 44.779854 1. 0.999991 1659.3965 1892. 0.877059 0.56203 ] # [ 13.401865 0. 62.258957 46.636948 1. 0.999971 1540.4974 2303. 0.668909 0.428645] # [ 0. 0. 66.42349 56.123024 1. 0.999908 1925.3267 3696. 0.520922 0.333813] # [ 0. 0. 40.78952 60.404335 1. 0.999833 1531.321 2460. 0.622488 0.398898] # # to change the source, change the following line gauss_norm <--> gauss_sum #--------------------------------------------------------------------------------------------------------------------------- temp = tf.expand_dims(gauss_norm, axis =2) temp = tf.tile(temp, [1,1, rois_per_image ,1,1]) temp_shape = KB.int_shape(temp) temp_reshape = KB.reshape(temp, (-1, temp_shape[-2], temp_shape[-1])) print(' heatmap original shape : ', gauss_norm.shape) print(' heatmap replicated : ', temp_shape) print(' heatmap flattened : ', temp_reshape.shape) scores = tf.map_fn(build_mask_routine, [temp_reshape, bboxes], dtype=tf.float32) # consider the two new columns for reshaping the gaussian_bbox_scores new_shape = tf.shape(in_tensor)+ [0,0,0, tf.shape(scores)[-1]] bbox_scores = tf.concat([in_tensor_flattened, scores], axis = -1) bbox_scores = tf.reshape(bbox_scores, new_shape) # print(' new shape is : ', new_shape.eval()) print(' in_tensor_flattened : ', in_tensor_flattened.shape) print(' Scores shape : ', scores.shape) # [(num_batches x num_class x num_rois ), 3] print(' boxes_scores (rehspaed) : ', bbox_scores.shape) ##-------------------------------------------------------------------------------------------- ## Normalize computed score above, and add it to the heatmap_score tensor as last column ##-------------------------------------------------------------------------------------------- scr_L2norm = tf.nn.l2_normalize(bbox_scores[...,-1], axis = -1) # shape (num_imgs, num_class, num_rois) scr_L2norm = tf.expand_dims(scr_L2norm, axis = -1) ##-------------------------------------------------------------------------------------------- # shape of tf.reduce_max(bbox_scores[...,-1], axis = -1, keepdims=True) is (num_imgs, num_class, 1) # This is a regular normalization that moves everything between [0, 1]. # This causes negative values to move to -inf, which is a problem in FCN scoring. # To address this a normalization between [-1 and +1] was introduced in FCN. # Not sure how this will work with training tho. ##-------------------------------------------------------------------------------------------- scr_norm = bbox_scores[...,-1]/ tf.reduce_max(bbox_scores[...,-1], axis = -1, keepdims=True) scr_norm = tf.where(tf.is_nan(scr_norm), tf.zeros_like(scr_norm), scr_norm) #-------------------------------------------------------------------------------------------- # this normalization moves values to [-1, +1] which we use in FCN, but not here. #-------------------------------------------------------------------------------------------- # reduce_max = tf.reduce_max(bbox_scores[...,-1], axis = -1, keepdims=True) # reduce_min = tf.reduce_min(bbox_scores[...,-1], axis = -1, keepdims=True) ## epsilon = tf.ones_like(reduce_max) * 1e-7 # scr_norm = (2* (bbox_scores[...,-1] - reduce_min) / (reduce_max - reduce_min)) - 1 scr_norm = tf.where(tf.is_nan(scr_norm), tf.zeros_like(scr_norm), scr_norm) scr_norm = tf.expand_dims(scr_norm, axis = -1) # shape (num_imgs, num_class, 32, 1) bbox_scores = tf.concat([bbox_scores, scr_norm, scr_L2norm], axis = -1) gauss_heatmap = KB.identity(tf.transpose(gauss_sum,[0,2,3,1]), name = names[0]) gauss_heatmap_norm = KB.identity(tf.transpose(gauss_norm,[0,2,3,1]), name = names[0]+'_norm') gauss_heatmap_L2norm = KB.identity(tf.transpose(gauss_L2norm,[0,2,3,1]), name = names[0]+'_L2norm') gauss_scores = KB.identity(bbox_scores, name = names[0]+'_scores') print(' gauss_heatmap final shape : ', gauss_heatmap.shape ,' Keras tensor ', KB.is_keras_tensor(gauss_heatmap) ) print(' gauss_scores final shape : ', gauss_scores.shape ,' Keras tensor ', KB.is_keras_tensor(gauss_scores) ) print(' complete') return gauss_heatmap_norm, gauss_scores, gauss_heatmap,gauss_heatmap_L2norm # [gauss_sum, gauss_scatt, means, covar]
y = K.tf.matmul(mat_x, mat_x, transpose_b=True) return y def multiply(x, n=100): x_prime = tf.reshape(x, (-1, n, 5)) x_transpose = tf.transpose(x_prime, perm=[0, 2, 1]) return tf.matmul(x_prime, x_transpose) for i in range(0, 10): mat_x = x[:, :, :, i] final[i] = Lambda(lambda x: multiply(x, n=100), output_shape=(100, 100))( mat_x) #Lambda( matmul,output_shape= (-1,100, 100,1) ) (mat_x) #final[i] = K.dot(mat_x,K.permute_dimensions(mat_x,(0,2,1))) final[i] = K.reshape(final[i], (-1, 100, 100, 1)) y = merge([final[idx] for idx in final], mode='concat', concat_axis=3) #y = Reshape((100,100,10))(y) z = Activation('relu')(y) model = Model([seq_input, ss_input], z) import tensorflow as tf sess = K.get_session() q = K.eval from keras import backend as K #K.set_session(sess) with sess.as_default(): x = [[1, 1], [3, 4], [5, 6]] z = tf.Variable(x) z2 = K.reshape(z, (6, 2))
def get_initial_state(self, x): input_shape = self.input_spec[0].shape init_nb_row = input_shape[self.row_axis] init_nb_col = input_shape[self.column_axis] base_initial_state = K.zeros_like( x) # (samples, timesteps) + image_shape non_channel_axis = -1 if self.data_format == 'channels_first' else -2 for _ in range(2): base_initial_state = K.sum(base_initial_state, axis=non_channel_axis) base_initial_state = K.sum(base_initial_state, axis=1) # (samples, nb_channels) initial_states = [] states_to_pass = ['r', 'c', 'a'] nlayers_to_pass = {u: self.nb_layers for u in states_to_pass} if self.extrap_start_time is not None: states_to_pass.append( 'ahat' ) # pass prediction in states so can use as actual for t+1 when extrapolating nlayers_to_pass['ahat'] = 1 for u in states_to_pass: ds_factor = 1 for l in range(nlayers_to_pass[u]): nb_row = init_nb_row // ds_factor nb_col = init_nb_col // ds_factor if l < self.nb_layers - 1: ds_factor *= self.upsample_size[l] if u in ['r', 'c']: stack_size = self.R_stack_sizes[l] elif u == 'a': stack_size = self.stack_sizes[l] elif u == 'ahat': stack_size = self.stack_sizes[l] output_size = stack_size * nb_row * nb_col # flattened size reducer = K.zeros((input_shape[self.channel_axis], output_size)) # (nb_channels, output_size) initial_state = K.dot(base_initial_state, reducer) # (samples, output_size) if self.data_format == 'channels_first': output_shp = (-1, stack_size, nb_row, nb_col) else: output_shp = (-1, nb_row, nb_col, stack_size) initial_state = K.reshape(initial_state, output_shp) initial_states += [initial_state] if K._BACKEND == 'theano': from theano import tensor as T # There is a known issue in the Theano scan op when dealing with inputs whose shape is 1 along a dimension. # In our case, this is a problem when training on grayscale images, and the below line fixes it. initial_states = [ T.unbroadcast(init_state, 0, 1) for init_state in initial_states ] if self.extrap_start_time is not None: initial_states += [ K.variable(0, int if K.backend() != 'tensorflow' else 'int32') ] # the last state will correspond to the current timestep return initial_states
def call(self, inputs): self.in_shape = [i or -1 for i in K.int_shape(inputs)] if self.shape is None: self.shape = [-1, np.prod(self.in_shape[1:])] return K.reshape(inputs, self.shape)
def yolo_loss(args, anchors, num_classes, rescore_confidence=False, print_loss=False): """YOLO localization loss function. Parameters ---------- yolo_output : tensor Final convolutional layer features. true_boxes : tensor Ground truth boxes tensor with shape [batch, num_true_boxes, 5] containing box x_center, y_center, width, height, and class. detectors_mask : array 0/1 mask for detector positions where there is a matching ground truth. matching_true_boxes : array Corresponding ground truth boxes for positive detector positions. Already adjusted for conv height and width. anchors : tensor Anchor boxes for model. num_classes : int Number of object classes. rescore_confidence : bool, default=False If true then set confidence target to IOU of best predicted box with the closest matching ground truth box. print_loss : bool, default=False If True then use a tf.Print() to print the loss components. Returns ------- mean_loss : float mean localization loss across minibatch """ (yolo_output, true_boxes, detectors_mask, matching_true_boxes) = args num_anchors = len(anchors) object_scale = 5 no_object_scale = 1 class_scale = 1 coordinates_scale = 1 pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head( yolo_output, anchors, num_classes) # Unadjusted box predictions for loss. # TODO: Remove extra computation shared with yolo_head. yolo_output_shape = K.shape(yolo_output) feats = K.reshape(yolo_output, [ -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors, num_classes + 5 ]) pred_boxes = K.concatenate((K.sigmoid(feats[..., 0:2]), feats[..., 2:4]), axis=-1) # TODO: Adjust predictions by image width/height for non-square images? # IOUs may be off due to different aspect ratio. # Expand pred x,y,w,h to allow comparison with ground truth. # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params pred_xy = K.expand_dims(pred_xy, 4) pred_wh = K.expand_dims(pred_wh, 4) pred_wh_half = pred_wh / 2. pred_mins = pred_xy - pred_wh_half pred_maxes = pred_xy + pred_wh_half true_boxes_shape = K.shape(true_boxes) # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params true_boxes = K.reshape(true_boxes, [ true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2] ]) true_xy = true_boxes[..., 0:2] true_wh = true_boxes[..., 2:4] # Find IOU of each predicted box with each ground truth box. true_wh_half = true_wh / 2. true_mins = true_xy - true_wh_half true_maxes = true_xy + true_wh_half intersect_mins = K.maximum(pred_mins, true_mins) intersect_maxes = K.minimum(pred_maxes, true_maxes) intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] pred_areas = pred_wh[..., 0] * pred_wh[..., 1] true_areas = true_wh[..., 0] * true_wh[..., 1] union_areas = pred_areas + true_areas - intersect_areas iou_scores = intersect_areas / union_areas # Best IOUs for each location. best_ious = K.max(iou_scores, axis=4) # Best IOU scores. best_ious = K.expand_dims(best_ious) # A detector has found an object if IOU > thresh for some true box. object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious)) # TODO: Darknet region training includes extra coordinate loss for early # training steps to encourage predictions to match anchor priors. # Determine confidence weights from object and no_object weights. # NOTE: YOLO does not use binary cross-entropy here. no_object_weights = (no_object_scale * (1 - object_detections) * (1 - detectors_mask)) no_objects_loss = no_object_weights * K.square(-pred_confidence) if rescore_confidence: objects_loss = (object_scale * detectors_mask * K.square(best_ious - pred_confidence)) else: objects_loss = (object_scale * detectors_mask * K.square(1 - pred_confidence)) confidence_loss = objects_loss + no_objects_loss # Classification loss for matching detections. # NOTE: YOLO does not use categorical cross-entropy loss here. matching_classes = K.cast(matching_true_boxes[..., 4], 'int32') matching_classes = K.one_hot(matching_classes, num_classes) classification_loss = (class_scale * detectors_mask * K.square(matching_classes - pred_class_prob)) # Coordinate loss for matching detection boxes. matching_boxes = matching_true_boxes[..., 0:4] coordinates_loss = (coordinates_scale * detectors_mask * K.square(matching_boxes - pred_boxes)) confidence_loss_sum = K.sum(confidence_loss) classification_loss_sum = K.sum(classification_loss) coordinates_loss_sum = K.sum(coordinates_loss) total_loss = 0.5 * (confidence_loss_sum + classification_loss_sum + coordinates_loss_sum) if print_loss: total_loss = tf.Print( total_loss, [ total_loss, confidence_loss_sum, classification_loss_sum, coordinates_loss_sum ], message='yolo_loss, conf_loss, class_loss, box_coord_loss:') return total_loss
def yolo_head(feats, anchors, num_classes): """Convert final layer features to bounding box parameters. Parameters ---------- feats : tensor Final convolutional layer features. anchors : array-like Anchor box widths and heights. num_classes : int Number of target classes. Returns ------- box_xy : tensor x, y box predictions adjusted by spatial location in conv layer. box_wh : tensor w, h box predictions adjusted by anchors and conv spatial resolution. box_conf : tensor Probability estimate for whether each box contains any object. box_class_pred : tensor Probability distribution estimate for each box over class labels. """ num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2]) # Static implementation for fixed models. # TODO: Remove or add option for static implementation. # _, conv_height, conv_width, _ = K.int_shape(feats) # conv_dims = K.variable([conv_width, conv_height]) # Dynamic implementation of conv dims for fully convolutional model. conv_dims = K.shape(feats)[1:3] # assuming channels last # In YOLO the height index is the inner most iteration. conv_height_index = K.arange(0, stop=conv_dims[0]) conv_width_index = K.arange(0, stop=conv_dims[1]) conv_height_index = K.tile(conv_height_index, [conv_dims[1]]) # TODO: Repeat_elements and tf.split doesn't support dynamic splits. # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0) conv_width_index = K.tile(K.expand_dims(conv_width_index, 0), [conv_dims[0], 1]) conv_width_index = K.flatten(K.transpose(conv_width_index)) conv_index = K.transpose(K.stack([conv_height_index, conv_width_index])) conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2]) conv_index = K.cast(conv_index, K.dtype(feats)) feats = K.reshape( feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5]) conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats)) # Static generation of conv_index: # conv_index = np.array([_ for _ in np.ndindex(conv_width, conv_height)]) # conv_index = conv_index[:, [1, 0]] # swap columns for YOLO ordering. # conv_index = K.variable( # conv_index.reshape(1, conv_height, conv_width, 1, 2)) # feats = Reshape( # (conv_dims[0], conv_dims[1], num_anchors, num_classes + 5))(feats) box_confidence = K.sigmoid(feats[..., 4:5]) box_xy = K.sigmoid(feats[..., :2]) box_wh = K.exp(feats[..., 2:4]) box_class_probs = K.softmax(feats[..., 5:]) # Adjust preditions to each spatial grid point and anchor size. # Note: YOLO iterates over height index before width index. box_xy = (box_xy + conv_index) / conv_dims box_wh = box_wh * anchors_tensor / conv_dims return box_confidence, box_xy, box_wh, box_class_probs