def call(self, x, mask=None): x = K.permute_dimensions(x, (0, 2, 1)) x = K.reshape(x, (-1, self.input_length)) x = K.expand_dims(x, 1) x = K.expand_dims(x, -1) if self.real_filts is not None: conv_out_r = K.conv2d(x, self.W_r, strides=self.subsample, border_mode=self.border_mode, dim_ordering='th') else: conv_out_r = x if self.complex_filts is not None: conv_out_c1 = K.conv2d(x, self.W_c1, strides=self.subsample, border_mode=self.border_mode, dim_ordering='th') conv_out_c2 = K.conv2d(x, self.W_c2, strides=self.subsample, border_mode=self.border_mode, dim_ordering='th') conv_out_c = K.sqrt(K.square(conv_out_c1) + K.square(conv_out_c2) + K.epsilon()) output = K.concatenate((conv_out_r, conv_out_c), axis=1) else: output = conv_out_r output_shape = self.get_output_shape_for((None, self.input_length, self.input_dim)) output = K.squeeze(output, 3) # remove the dummy 3rd dimension output = K.permute_dimensions(output, (2, 1, 0)) output = K.reshape(output, (-1, output_shape[1], output.shape[1]*output.shape[2])) return output
def build(self): self.encoder.build() self.decoder.build() self.variational.build() outputs = [] self.regularizers = [] input_list = self.get_input() if isinstance(input_list, OrderedDict): assert len(input_list) == 2 for X in input_list.values(): Y = self.encoder(X) reg = self.variational.get_variational_regularization(Y) self.regularizers.append(reg) Y = self.variational._get_output(Y, train=True) Y = self.decoder(Y) outputs.append(Y) pool0 = self.max_pool(K.expand_dims(outputs[0], 2)) pool1 = self.max_pool(K.expand_dims(outputs[1], 2)) slow = self.beta * ((pool0 - pool1) ** 2).mean() self.regularizers.append(LambdaRegularizer(slow)) else: Y = self.encoder(input_list) reg = self.variational.get_variational_regularization(Y) self.regularizers.append(reg) Y = self.variational._get_output(Y, train=True) Y = self.decoder(Y) self.trainable_weights = ( self.encoder.trainable_weights + self.variational.trainable_weights + self.decoder.trainable_weights )
def call(self, x): assert(K.backend() == 'tensorflow') temp = K.permute_dimensions(x, (0, 2, 1)) for i in range(0, self.attention_depth): temp = K.sigmoid(K.dot(temp, self.Ws[i]) + self.bs[i]) temp = K.permute_dimensions(temp, (0, 2, 1)) estimated_weight = K.squeeze(K.dot(temp, K.expand_dims(self.Wf, -1)), -1) biased_weight = estimated_weight + self.bias non_linear_weight = K.tanh(biased_weight) # For each hidded state calculate how much should it contribute # to the context vector. This is the main part of attention. # In order to convert weights to "probabilities" use a sigmoid # based function: exp(x) / sum(exp(xi)). prob = K.exp(non_linear_weight) # Compute the total sum for each batch. total_sum = K.sum(prob, axis=1, keepdims=True) prob /= K.cast(total_sum, K.floatx()) # Enable this if you want access to internal probabilities. # Should only be used for testing that Attention works as expected. # return prob # Multiply each hidden value by the corresponding probability. prob = K.expand_dims(prob, -1) new_hidden_values = x * prob return K.sum(new_hidden_values, axis=1)
def call(self, x): r = K.cast(K.arange(self.num), K.floatx()) / float(self.num - 1) r = self.start + (self.stop - self.start) * r r = K.expand_dims(K.expand_dims(r), axis=0) r = K.cast(r, dtype=K.floatx()) r = K.tile(r, (K.shape(x)[0], 1, 1)) return r
def duel_atari_cnn(input_shape, n_actions, mode='mean'): """ Follows the network architecture described in the 2015 Deepmind Nature paper with the changes proposed in Dueling Network paper. input_shape: 3D Tensor (channels, height, width) format n_actions: int """ agg = None if mode == 'mean': agg = Lambda(lambda a: K.expand_dims(a[:,0], dim=-1) + a[:,1:] - K.mean(a[:, 1:], keepdims=True), output_shape=(n_actions,)) elif mode == 'max': agg = Lambda(lambda a: K.expand_dims(a[:,0], dim=-1) + a[:,1:] - K.max(a[:, 1:], keepdims=True), output_shape=(n_actions,)) else: raise ValueError("mode must be either 'mean' or 'max'") input = Input(shape=input_shape) x = Convolution2D(32, 8, 8, subsample=(4,4), activation='relu')(input) x = Convolution2D(64, 4, 4, subsample=(2,2), activation='relu')(x) x = Convolution2D(64, 3, 3, subsample=(1,1), activation='relu')(x) x = Flatten()(x) x = Dense(512, activation='relu')(x) x = Dense(n_actions+1)(x) output = agg(x) return Model(input, output)
def accumulate(attend_function, inputs, input_length, mask=None, return_probabilities=False): '''get the running attention over a sequence. given a 3dim tensor where the 1st dim is time (or not. whatever.), calculating the running attended sum. in other words, at the first time step, you only have that item. at the second time step, attend over the first two items. at the third.. the third. so on. this basically a mod on keras' rnn implementation author: bcm ''' ndim = inputs.ndim assert ndim >= 3, 'inputs should be at least 3d' axes = [1,0] + list(range(2, ndim)) inputs = inputs.dimshuffle(axes) indices = list(range(input_length)) successive_outputs = [] if mask is not None: if mask.ndim == ndim-1: mask = K.expand_dims(mask) assert mask.ndim == ndim mask = mask.dimshuffle(axes) prev_output = None successive_outputs = [] successive_pvecs = [] uncover_mask = K.zeros_like(inputs) uncover_indices = K.arange(input_length) for _ in range(ndim-1): uncover_indices = K.expand_dims(uncover_indices) make_subset = lambda i,X: K.switch(uncover_indices <= i, X, uncover_mask) for i in indices: inputs_i = make_subset(i,inputs) mask_i = make_subset(i,mask) if mask is not None: output = attend_function(inputs_i, mask_i) # this should not output the time dimension; it should be marginalized over. else: output = attend_function(inputs_i) # this should not output the time dimension; it should be marginalized over. if return_probabilities: output, p_vectors = output successive_pvecs.append(p_vectors) assert output.ndim == 2, "Your attention function is malfunctioning; the attention accumulator should return 2 dimensional tensors" successive_outputs.append(output) outputs = K.pack(successive_outputs) K.squeeze(outputs, -1) axes = [1, 0] + list(range(2, outputs.ndim)) outputs = outputs.dimshuffle(axes) if return_probabilities: out_pvecs = K.pack(successive_pvecs) K.squeeze(out_pvecs, -1) out_pvecs = out_pvecs.dimshuffle(axes) outputs = [outputs, out_pvecs] return outputs
def __init__(self, *kargs, **kwargs): super(TDD, self).__init__(*kargs, **kwargs) mult = np.zeros(self.input_length) mult[0] = 1.0 t = K.variable(value=mult) t = K.expand_dims(t, 0) t = K.expand_dims(t, 2) self.mult = t
def _get_anchor_positive_triplet_mask(self, y_true: Tensor, pairwise_dist: Tensor) -> Tensor: # mask label(a) != label(p) mask1 = K.equal(K.expand_dims(y_true, 0), K.expand_dims(y_true, 1)) mask1 = K.cast(mask1, K.dtype(pairwise_dist)) # mask a == p mask2 = K.not_equal(pairwise_dist, 0.0) mask2 = K.cast(mask2, K.dtype(pairwise_dist)) return mask1 * mask2
def call(self, x, mask=None): # x[0]: (batch_size, input_length, input_dim) # x[1]: (batch_size, 1) indices of prepositions # Optional: x[2]: (batch_size, input_length - 2) assert isinstance(x, list) or isinstance(x, tuple) encoded_sentence = x[0] prep_indices = K.squeeze(x[1], axis=-1) #(batch_size,) batch_indices = K.arange(K.shape(encoded_sentence)[0]) # (batch_size,) if self.with_attachment_probs: # We're essentially doing K.argmax(x[2]) here, but argmax is not differentiable! head_probs = x[2] head_probs_padding = K.zeros_like(x[2])[:, :2] # (batch_size, 2) # (batch_size, input_length) padded_head_probs = K.concatenate([head_probs, head_probs_padding]) # (batch_size, 1) max_head_probs = K.expand_dims(K.max(padded_head_probs, axis=1)) # (batch_size, input_length, 1) max_head_prob_indices = K.expand_dims(K.equal(padded_head_probs, max_head_probs)) # (batch_size, input_length, input_dim) masked_head_encoding = K.switch(max_head_prob_indices, encoded_sentence, K.zeros_like(encoded_sentence)) # (batch_size, input_dim) head_encoding = K.sum(masked_head_encoding, axis=1) else: head_indices = prep_indices - 1 # (batch_size,) head_encoding = encoded_sentence[batch_indices, head_indices, :] # (batch_size, input_dim) prep_encoding = encoded_sentence[batch_indices, prep_indices, :] # (batch_size, input_dim) child_encoding = encoded_sentence[batch_indices, prep_indices+1, :] # (batch_size, input_dim) ''' prep_indices = x[1] sentence_mask = mask[0] if sentence_mask is not None: if K.ndim(sentence_mask) > 2: # This means this layer came after a Bidirectional layer. Keras has this bug which # concatenates input masks instead of output masks. # TODO: Fix Bidirectional instead. sentence_mask = K.any(sentence_mask, axis=(-2, -1)) head_encoding, prep_encoding, child_encoding = self.get_split_averages(encoded_sentence, sentence_mask, prep_indices) ''' head_projection = K.dot(head_encoding, self.proj_head) # (batch_size, proj_dim) prep_projection = K.dot(prep_encoding, self.proj_prep) # (batch_size, proj_dim) child_projection = K.dot(child_encoding, self.proj_child) # (batch_size, proj_dim) #(batch_size, proj_dim) if self.composition_type == 'HPCT': composed_projection = K.tanh(head_projection + prep_projection + child_projection) elif self.composition_type == 'HPC': prep_child_projection = K.tanh(prep_projection + child_projection) # (batch_size, proj_dim) composed_projection = K.tanh(head_projection + prep_child_projection) else: # Composition type in HC composed_projection = K.tanh(head_projection + child_projection) for hidden_layer in self.hidden_layers: composed_projection = K.tanh(K.dot(composed_projection, hidden_layer)) # (batch_size, proj_dim) # (batch_size, num_classes) class_scores = K.dot(composed_projection, self.scorer) label_probabilities = K.softmax(class_scores) return label_probabilities
def _build(self, model): """ Instantiates the layer with the given backend. """ backend = model.get_backend() if backend.get_name() == 'keras': import keras.layers as L # pylint: disable=import-error import keras.backend as K # pylint: disable=import-error target_dim = self.dimension if target_dim >= 0: target_dim += 1 def expand_shape(input_shape): """ Computes the expanded shape. """ dim = target_dim if dim < 0: dim += len(input_shape) + 1 return input_shape[:dim] + (1,) + input_shape[dim:] if backend.keras_version() == 1: func = lambda x: K.expand_dims(x, dim=target_dim) else: func = lambda x: K.expand_dims(x, axis=target_dim) yield L.Lambda( func, expand_shape, name=self.name ) elif backend.get_name() == 'pytorch': import torch # pylint: disable=import-error def connect(inputs): """ Connects the layer. """ assert len(inputs) == 1 dim = self.dimension if dim < 0: dim += len(inputs[0]['shape']) + 1 dim += 1 return { 'shape' : self.shape([inputs[0]['shape']]), 'layer' : model.data.add_operation( lambda x: torch.unsqueeze(x, dim) )(inputs[0]['layer']) } yield connect else: raise ValueError('Unknown or unsupported backend: {}'.format(backend))
def _batch_all_triplet_loss(self, y_true: Tensor, pairwise_dist: Tensor) -> Tensor: anchor_positive_dist = K.expand_dims(pairwise_dist, 2) anchor_negative_dist = K.expand_dims(pairwise_dist, 1) triplet_loss = anchor_positive_dist - anchor_negative_dist + self.margin mask = self._get_triplet_mask(y_true, pairwise_dist) triplet_loss = mask * triplet_loss triplet_loss = K.clip(triplet_loss, 0.0, None) valid_triplets = K.cast(K.greater(triplet_loss, 1e-16), K.dtype(triplet_loss)) num_positive_triplets = K.sum(valid_triplets) triplet_loss = K.sum(triplet_loss) / (num_positive_triplets + 1e-16) return triplet_loss
def lookup(self, x, W, memory_length): # shape: (batch*memory_length, input_length) x = K.cast(K.reshape(x, (-1, self.input_length)), 'int32') mask = K.expand_dims(K.not_equal(x, 0.), dim=-1) # shape: (batch*memory_length, input_length, output_dim) X = K.gather(W, x) if self.bow_mode == "bow": # shape: (batch*memory_length, output_dim) X = K.sum(X + K.expand_dims(self.Te, 0), axis=1) # shape: (batch, memory_length, output_dim) X = K.reshape(X, (-1, memory_length, self.output_dim)) return X, mask
def __init__(self, model, policy=None, test_policy=None, enable_double_dqn=True, enable_dueling_network=False, dueling_type='avg', *args, **kwargs): super(DQNAgent, self).__init__(*args, **kwargs) # Validate (important) input. if hasattr(model.output, '__len__') and len(model.output) > 1: raise ValueError('Model "{}" has more than one output. DQN expects a model that has a single output.'.format(model)) if model.output._keras_shape != (None, self.nb_actions): raise ValueError('Model output "{}" has invalid shape. DQN expects a model that has one dimension for each action, in this case {}.'.format(model.output, self.nb_actions)) # Parameters. self.enable_double_dqn = enable_double_dqn self.enable_dueling_network = enable_dueling_network self.dueling_type = dueling_type if self.enable_dueling_network: # get the second last layer of the model, abandon the last layer layer = model.layers[-2] nb_action = model.output._keras_shape[-1] # layer y has a shape (nb_action+1,) # y[:,0] represents V(s;theta) # y[:,1:] represents A(s,a;theta) y = Dense(nb_action + 1, activation='linear')(layer.output) # caculate the Q(s,a;theta) # dueling_type == 'avg' # Q(s,a;theta) = V(s;theta) + (A(s,a;theta)-Avg_a(A(s,a;theta))) # dueling_type == 'max' # Q(s,a;theta) = V(s;theta) + (A(s,a;theta)-max_a(A(s,a;theta))) # dueling_type == 'naive' # Q(s,a;theta) = V(s;theta) + A(s,a;theta) if self.dueling_type == 'avg': outputlayer = Lambda(lambda a: K.expand_dims(a[:, 0], -1) + a[:, 1:] - K.mean(a[:, 1:], keepdims=True), output_shape=(nb_action,))(y) elif self.dueling_type == 'max': outputlayer = Lambda(lambda a: K.expand_dims(a[:, 0], -1) + a[:, 1:] - K.max(a[:, 1:], keepdims=True), output_shape=(nb_action,))(y) elif self.dueling_type == 'naive': outputlayer = Lambda(lambda a: K.expand_dims(a[:, 0], -1) + a[:, 1:], output_shape=(nb_action,))(y) else: assert False, "dueling_type must be one of {'avg','max','naive'}" model = Model(inputs=model.input, outputs=outputlayer) # Related objects. self.model = model if policy is None: policy = EpsGreedyQPolicy() if test_policy is None: test_policy = GreedyQPolicy() self.policy = policy self.test_policy = test_policy # State. self.reset_states()
def iou(x_true, y_true, w_true, h_true, x_pred, y_pred, w_pred, h_pred, t, pred_confid_tf): x_true = K.expand_dims(x_true, 2) y_true = K.expand_dims(y_true, 2) w_true = K.expand_dims(w_true, 2) h_true = K.expand_dims(h_true, 2) x_pred = K.expand_dims(x_pred, 2) y_pred = K.expand_dims(y_pred, 2) w_pred = K.expand_dims(w_pred, 2) h_pred = K.expand_dims(h_pred, 2) xoffset = K.expand_dims(tf.convert_to_tensor(np.asarray([0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7], dtype=np.float32)),1) yoffset = K.expand_dims(tf.convert_to_tensor(np.asarray([0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4], dtype=np.float32)),1) # xoffset = K.cast_to_floatx((np.tile(np.arange(side),side))) # yoffset = K.cast_to_floatx((np.repeat(np.arange(side),side))) x = tf.where(t, x_pred, K.zeros_like(x_pred)) y = tf.where(t, y_pred, K.zeros_like(y_pred)) w = tf.where(t, w_pred, K.zeros_like(w_pred)) h = tf.where(t, h_pred, K.zeros_like(h_pred)) ow = overlap(x + xoffset, w * 256. , x_true + xoffset, w_true * 256.) oh = overlap(y + yoffset, h * 160., y_true + yoffset, h_true * 256.) ow = tf.where(K.greater(ow, 0), ow, K.zeros_like(ow)) oh = tf.where(K.greater(oh, 0), oh, K.zeros_like(oh)) intersection = ow * oh union = w * 256. * h * 160. + w_true * 256. * h_true * 160. - intersection + K.epsilon() # prevent div 0 # # find best iou among bboxs # iouall shape=(-1, bnum*gridcells) iouall = intersection / union obj_count = K.sum(tf.where(t, K.ones_like(x_true), K.zeros_like(x_true))) ave_iou = K.sum(iouall) / (obj_count + 0.0000001) recall_t = K.greater(iouall, 0.5) # recall_count = K.sum(tf.select(recall_t, K.ones_like(iouall), K.zeros_like(iouall))) fid_t = K.greater(pred_confid_tf, 0.3) recall_count_all = K.sum(tf.where(fid_t, K.ones_like(iouall), K.zeros_like(iouall))) # obj_fid_t = tf.logical_and(fid_t, t) obj_fid_t = tf.logical_and(fid_t, recall_t) effevtive_iou_count = K.sum(tf.where(obj_fid_t, K.ones_like(iouall), K.zeros_like(iouall))) recall = effevtive_iou_count / (obj_count + 0.00000001) precision = effevtive_iou_count / (recall_count_all + 0.0000001) return ave_iou, recall, precision, obj_count, intersection, union, ow, oh, x, y, w, h
def call(self, x, mask=None): if (self.size == None) or (self.mode == 'sum'): self.size = int(x.shape[-1]) batch_size, seq_len = K.shape(x)[0], K.shape(x)[1] position_j = 1. / K.pow(10000., 2 * K.arange(self.size / 2, dtype='float32') / self.size) position_j = K.expand_dims(position_j, 0) position_i = K.cumsum(K.ones_like(x[:, :, 0]), 1) - 1 # K.arange不支持变长,只好用这种方法生成 position_i = K.expand_dims(position_i, 2) position_ij = K.dot(position_i, position_j) position_ij = K.concatenate([K.cos(position_ij), K.sin(position_ij)], 2) if self.mode == 'sum': return position_ij + x elif self.mode == 'concat': return K.concatenate([position_ij, x], 2)
def sparse_amsoftmax_loss(y_true, y_pred, scale=30, margin=0.35): y_true = K.expand_dims(y_true[:, 0], 1) # 保证y_true的shape=(None, 1) y_true = K.cast(y_true, 'int32') # 保证y_true的dtype=int32 batch_idxs = K.arange(0, K.shape(y_true)[0]) batch_idxs = K.expand_dims(batch_idxs, 1) idxs = K.concatenate([batch_idxs, y_true], 1) y_true_pred = K.tf.gather_nd(y_pred, idxs) # 目标特征,用tf.gather_nd提取出来 y_true_pred = K.expand_dims(y_true_pred, 1) y_true_pred_margin = y_true_pred - margin # 减去margin _Z = K.concatenate([y_pred, y_true_pred_margin], 1) # 为计算配分函数 _Z = _Z * scale # 缩放结果,主要因为pred是cos值,范围[-1, 1] logZ = K.logsumexp(_Z, 1, keepdims=True) # 用logsumexp,保证梯度不消失 logZ = logZ + K.log(1 - K.exp(scale * y_true_pred - logZ)) # 从Z中减去exp(scale * y_true_pred) return - y_true_pred_margin * scale + logZ
def call(self, inputs, training=None): # inputs.shape=[None, input_num_capsule, input_dim_vector] # Expand dims to [None, input_num_capsule, 1, 1, input_dim_vector] inputs_expand = K.expand_dims(K.expand_dims(inputs, 2), 2) # Replicate num_capsule dimension to prepare being multiplied by W # Now it has shape = [None, input_num_capsule, num_capsule, 1, input_dim_vector] inputs_tiled = K.tile(inputs_expand, [1, 1, self.num_capsule, 1, 1]) """ # Compute `inputs * W` by expanding the first dim of W. More time-consuming and need batch_size. # Now W has shape = [batch_size, input_num_capsule, num_capsule, input_dim_vector, dim_vector] w_tiled = K.tile(K.expand_dims(self.W, 0), [self.batch_size, 1, 1, 1, 1]) # Transformed vectors, inputs_hat.shape = [None, input_num_capsule, num_capsule, 1, dim_vector] inputs_hat = K.batch_dot(inputs_tiled, w_tiled, [4, 3]) """ # Compute `inputs * W` by scanning inputs_tiled on dimension 0. This is faster but requires Tensorflow. # inputs_hat.shape = [None, input_num_capsule, num_capsule, 1, dim_vector] inputs_hat = tf.scan(lambda ac, x: K.batch_dot(x, self.W, [3, 2]), elems=inputs_tiled, initializer=K.zeros([self.input_num_capsule, self.num_capsule, 1, self.dim_vector])) """ # Routing algorithm V1. Use tf.while_loop in a dynamic way. def body(i, b, outputs): c = tf.nn.softmax(self.bias, dim=2) # dim=2 is the num_capsule dimension outputs = squash(K.sum(c * inputs_hat, 1, keepdims=True)) b = b + K.sum(inputs_hat * outputs, -1, keepdims=True) return [i-1, b, outputs] cond = lambda i, b, inputs_hat: i > 0 loop_vars = [K.constant(self.num_routing), self.bias, K.sum(inputs_hat, 1, keepdims=True)] _, _, outputs = tf.while_loop(cond, body, loop_vars) """ # Routing algorithm V2. Use iteration. V2 and V1 both work without much difference on performance assert self.num_routing > 0, 'The num_routing should be > 0.' for i in range(self.num_routing): c = tf.nn.softmax(self.bias, dim=2) # dim=2 is the num_capsule dimension # outputs.shape=[None, 1, num_capsule, 1, dim_vector] outputs = squash(K.sum(c * inputs_hat, 1, keepdims=True)) # last iteration needs not compute bias which will not be passed to the graph any more anyway. if i != self.num_routing - 1: # self.bias = K.update_add(self.bias, K.sum(inputs_hat * outputs, [0, -1], keepdims=True)) self.bias += K.sum(inputs_hat * outputs, -1, keepdims=True) # tf.summary.histogram('BigBee', self.bias) # for debugging return K.reshape(outputs, [-1, self.num_capsule, self.dim_vector])
def call(self, x, mask=None): eij = dot_product(x, self.W) if self.bias: eij += self.b eij = K.tanh(eij) a = K.exp(eij) # apply mask after the exp. will be re-normalized next if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano a *= K.cast(mask, K.floatx()) # in some cases especially in the early stages of training the sum may be almost zero # and this results in NaN's. A workaround is to add a very small positive number ε to the sum. # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) weighted_input = x * K.expand_dims(a) result = K.sum(weighted_input, axis=1) if self.return_attention: return [result, a] return result
def get_output(self, train = False, get_tuple = False): # input shape: (nb_samples, time (padded with zeros), input_dim) X = self.get_input(train) assert K.ndim(X) == 3 mask = self.get_output_mask(train) if mask: # apply mask X *= K.cast(K.expand_dims(mask), X.dtype) masking = True else: masking = False if self.stateful: initial_states = self.states else: initial_states = self.get_initial_states(X) last_output, outputs, other_outputs, states = LX.rnn(self.attention_step, X, initial_states, self.contexts, truncate_gradient=self.truncate_gradient, go_backwards=self.go_backwards, masking=masking) self.other_outputs = other_outputs if self.stateful: self.updates = [] for i in range(len(states)): self.updates.append((self.states[i], states[i])) if self.return_sequences: return outputs else: return last_output
def compute_mask(self, x, mask=None): if self.return_probabilities: mask2 = mask if mask is not None: mask2 = K.expand_dims(K.all(mask2, axis=-1)) return [mask, mask2] return mask
def call(self, X, mask=None): # 1D -> 2D batch = K.shape(X)[0] width = deconv_output_length(K.shape(X)[1], self.filter_length, self.padding, self.strides[2]) print("Output width: ", width) print("Input shape: ", K.shape(X)) X = K.expand_dims(X,2) print("Input shape after expand: ", K.shape(X)) # X = K.permute_dimensions(X, (0, 2, 3, 1)) X = K.permute_dimensions(X, (0, 2, 1, 3)) print("Input shape after permute: ", K.shape(X)) deconv_shape = tf.pack([batch, 1, width, self.nb_filter]) print("Deconv shape: ", deconv_shape) conv_out = tf.nn.conv2d_transpose(X, self.W, strides=self.strides, padding=self.padding.upper(), output_shape=deconv_shape) output = conv_out + K.reshape(self.b, (1, 1, 1, self.W_shape[2])) print("Output shape: ", K.shape(output)) # output = K.permute_dimensions(output, (0, 3, 1, 2)) output = K.permute_dimensions(output, (0, 2, 1, 3)) print("Output shape after permute: ", K.shape(output)) # 2D -> 1D output = K.squeeze(output,2) print("Output shape after squeeze: ", K.shape(output)) return output
def simple_context(X, mask, n=activation_rnn_size): """Reduce the input just to its headline part (second half). For each word in this part it concatenate the output of the previous layer (RNN) with a weighted average of the outputs of the description part. In this only the last `rnn_size - activation_rnn_size` are used from each output. The first `activation_rnn_size` output is used to computer the weights for the averaging. """ desc, head = X[:, :maxlend, :], X[:, maxlend:, :] head_activations, head_words = head[:, :, :n], head[:, :, n:] desc_activations, desc_words = desc[:, :, :n], desc[:, :, n:] # RTFM http://deeplearning.net/software/theano/library/tensor/basic.html#theano.tensor.batched_tensordot # activation for every head word and every desc word activation_energies = K.batch_dot(head_activations, desc_activations, axes=(2, 2)) # make sure we dont use description words that are masked out activation_energies = activation_energies + -1e20 * K.expand_dims( 1. - K.cast(mask[:, :maxlend], 'float32'), 1) # for every head word compute weights for every desc word activation_energies = K.reshape(activation_energies, (-1, maxlend)) activation_weights = K.softmax(activation_energies) activation_weights = K.reshape(activation_weights, (-1, maxlenh, maxlend)) # for every head word compute weighted average of desc words desc_avg_word = K.batch_dot(activation_weights, desc_words, axes=(2, 1)) return K.concatenate((desc_avg_word, head_words))
def conv_step(self, x, W, b=None, border_mode="valid", filter_shape=None, mask_type='b'): mask = np.ones(filter_shape, dtype=_FLOATX) in_third = self.input_dim//3 out_third = self.nb_filter//3 mask[:out_third,in_third:,0,0] = 0 mask[out_third:2*out_third,2*in_third:,0,0] = 0 W = W * mask input_shape = self.shuffeled_dims x = K.expand_dims(x, -1) # add a dimension of the right conv_out = T.nnet.conv2d(x, W, subsample=self.subsample, border_mode='half', filter_flip=False, input_shape=(input_shape[0], input_shape[2], input_shape[3], 1), filter_shape=filter_shape) if b: conv_out = conv_out + K.reshape(b, (1, filter_shape[0], 1, 1)) conv_out = K.squeeze(conv_out, 3) # remove the dummy 3rd dimension return conv_out
def create_neural_network(self): model = Sequential() model.add(Dense(100, input_dim=self.nstates, activation='relu')) model.add(Dense(100, activation='relu')) model.add(Dense(self.nactions,activation='linear')) #get second last layer of the model, abondon the last layer layer = model.layers[-2] nb_action = model.output._keras_shape[-1] #layer y has a shape(nb_action+1) #y[:,0] represents V(s;theta) #y[:,1] represents A(a;theta) y = Dense(nb_action+1, activation='linear')(layer.output) #calculate the Q(s,a,;theta) #dueling type average -> Q(s,a;theta) = V(s;theta) + (A(s,a;theta)-Average_a(A(s,a;theta))) #outputlayer = Lambda(lambda a:K.expand_dims(a[:,0], -1) + a[:,1:] - K.mean(a[:,1:], keepdims=True), output_shape=(nb_action,))(y) #dueling type max -> Q(s,a;theta) = V(s;theta) + (A(s,a;theta)-Max_a(A(s,a;theta))) outputlayer = Lambda(lambda a:K.expand_dims(a[:,0], -1) + a[:,1:] - K.max(a[:,1:,], keepdims=True), output_shape=(nb_action,))(y) #dueling type naive -> Q(s,a;theta) = V(s;theta) + A(s,a;theta) #outputlayer = Lambda(lambda a: K.expand_dims(a[:,0], -1) + a[:,1:], output_shape=(nb_action,))(y) #connect model = Model(input=model.input, output=outputlayer) model.compile(loss='mse', optimizer=Adam(lr=self.alpha)) model_json = model.to_json() with open('cartpole.json','w') as json_file: json_file.write(model_json) return model
def recurrence(y_i, h): h_permute = K.permute_dimensions(h, [0, 2, 1]) # (batch_size, encoding_dim, input_length) e = K.l2_normalize( K.batch_dot(h_permute, s, axes=1), # (batch_size, input_length) axis=1) # (batch_size, input_length) # eqn 6 alpha = K.softmax(e) # (batch_size, input_length) # eqn 5 c = K.batch_dot(h, alpha, axes=1) # (batch_size, encoding_dim) recurrence_result = K.expand_dims( K.concatenate([c, y_i], axis=1), dim=1) # (batch_size, 1, 2 * encoding_dim) expanded_h = Input(shape=(1, 2 * encoding_dim), name='expanded_h') gru = Sequential([ GRU(output_dim, return_sequences=False, input_shape=(1, 2 * encoding_dim)) ]) model = Model(input=[expanded_h], output=[gru(expanded_h)]) # (batch_size, 1, output_dim) return model(recurrence_result)
def get_initial_states(self, x): initial_state = K.zeros_like(x) # (samples, num_steps, input_channel, h, w) initial_state = K.sum(initial_state, [1, 2]) # (samples, h, w) initial_state = K.expand_dims(initial_state, 1) initial_state = K.repeat_elements(initial_state, self.nb_filter, 1) initial_states = [initial_state for _ in range(len(self.states))] return initial_states
def compute_mask(self, inputs, mask=None): if mask is None or not any([m is not None for m in mask]): return None assert hasattr(mask, '__len__') and len(mask) == len(inputs) if self.mode in ['sum', 'mul', 'ave']: bool_type = 'bool' if K._BACKEND == 'tensorflow' else 'int32' masks = [K.cast(m, bool_type) for m in mask if m is not None] mask = masks[0] for m in masks[1:]: mask = mask & m return mask elif self.mode in ['concat']: masks = [K.ones_like(inputs[i][:-1]) if m is None else m for i, m in zip(inputs, mask)] expanded_dims = [K.expand_dims(m) for m in masks] concatenated = K.concatenate(expanded_dims, axis=self.concat_axis) return K.all(concatenated, axis=-1, keepdims=False) elif self.mode in ['cos', 'dot']: return None elif hasattr(self.mode, '__call__'): if hasattr(self._output_mask, '__call__'): return self._output_mask(mask) else: return self._output_mask else: # this should have been caught earlier raise Exception('Invalid merge mode: {}'.format(self.mode))
def criterion_GAN(output, target, use_lsgan=True): if use_lsgan: diff = output - target dims = list(range(1, K.ndim(diff))) return K.expand_dims((K.mean(diff ** 2, dims)), 0) else: return K.mean(K.log(output + 1e-12) * target + K.log(1 - output + 1e-12) * (1 - target))
def call(self, x, mask=None): # eij = K.dot(x, self.W) TF backend doesn't support it # features_dim = self.W.shape[0] # step_dim = x._keras_shape[1] features_dim = self.features_dim step_dim = self.step_dim eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))), (-1, step_dim)) if self.bias: eij += self.b eij = K.tanh(eij) a = K.exp(eij) # apply mask after the exp. will be re-normalized next if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano a *= K.cast(mask, K.floatx()) # in some cases especially in the early stages of training the sum may be almost zero a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) weighted_input = x * a # print weigthted_input.shape return K.sum(weighted_input, axis=1)
def make_patches_grid(x, patch_size, patch_stride): '''Break image `x` up into a grid of patches. input shape: (channels, rows, cols) output shape: (rows, cols, channels, patch_rows, patch_cols) ''' from theano.tensor.nnet.neighbours import images2neibs # TODO: all K, no T x = K.expand_dims(x, 0) xs = K.shape(x) num_rows = 1 + (xs[-2] - patch_size) // patch_stride num_cols = 1 + (xs[-1] - patch_size) // patch_stride num_channels = xs[-3] patches = images2neibs( x, (patch_size, patch_size), (patch_stride, patch_stride), mode='valid') # neibs are sorted per-channel patches = K.reshape(patches, (num_channels, K.shape(patches)[0] // num_channels, patch_size, patch_size)) patches = K.permute_dimensions(patches, (1, 0, 2, 3)) # arrange in a 2d-grid (rows, cols, channels, px, py) patches = K.reshape( patches, (num_rows, num_cols, num_channels, patch_size, patch_size)) patches_norm = K.sqrt( K.sum(K.square(patches), axis=(2, 3, 4), keepdims=True)) return patches, patches_norm
def find_path(argmin_table, best_idx): next_best_idx = gather_each_row(argmin_table, best_idx[0][:, 0]) next_best_idx = K.expand_dims(next_best_idx) if K.backend() == 'theano': next_best_idx = K.T.unbroadcast(next_best_idx, 1) return next_best_idx, [next_best_idx]
def call(self, inputs, **kwargs): if inputs.get_shape().ndims == 5: assert inputs.get_shape( )[-2].value == 1, 'Error: Must have num_capsules = 1 going into Length' inputs = K.squeeze(inputs, axis=-2) return K.expand_dims(tf.norm(inputs, axis=-1), axis=-1)
def call(self, x, mask=None): # TODO: validate input shape assert (len(x) == 3) L_flat = x[0] mu = x[1] a = x[2] if self.mode == 'full': # Create L and L^T matrix, which we use to construct the positive-definite matrix P. L = None LT = None if K.backend() == 'theano': import theano.tensor as T import theano def fn(x, L_acc, LT_acc): x_ = K.zeros((self.nb_actions, self.nb_actions)) x_ = T.set_subtensor(x_[np.tril_indices(self.nb_actions)], x) diag = K.exp(T.diag(x_)) + K.epsilon() x_ = T.set_subtensor(x_[np.diag_indices(self.nb_actions)], diag) return x_, x_.T outputs_info = [ K.zeros((self.nb_actions, self.nb_actions)), K.zeros((self.nb_actions, self.nb_actions)), ] results, _ = theano.scan(fn=fn, sequences=L_flat, outputs_info=outputs_info) L, LT = results elif K.backend() == 'tensorflow': import tensorflow as tf # Number of elements in a triangular matrix. nb_elems = (self.nb_actions * self.nb_actions + self.nb_actions) // 2 # Create mask for the diagonal elements in L_flat. This is used to exponentiate # only the diagonal elements, which is done before gathering. diag_indeces = [0] for row in range(1, self.nb_actions): diag_indeces.append(diag_indeces[-1] + (row + 1)) diag_mask = np.zeros(1 + nb_elems) # +1 for the leading zero diag_mask[np.array(diag_indeces) + 1] = 1 diag_mask = K.variable(diag_mask) # Add leading zero element to each element in the L_flat. We use this zero # element when gathering L_flat into a lower triangular matrix L. nb_rows = tf.shape(L_flat)[0] zeros = tf.expand_dims(tf.tile(K.zeros((1,)), [nb_rows]), 1) try: # Old TF behavior. L_flat = tf.concat(1, [zeros, L_flat]) except TypeError: # New TF behavior L_flat = tf.concat([zeros, L_flat], 1) # Create mask that can be used to gather elements from L_flat and put them # into a lower triangular matrix. tril_mask = np.zeros((self.nb_actions, self.nb_actions), dtype='int32') tril_mask[np.tril_indices(self.nb_actions)] = range(1, nb_elems + 1) # Finally, process each element of the batch. init = [ K.zeros((self.nb_actions, self.nb_actions)), K.zeros((self.nb_actions, self.nb_actions)), ] def fn(a, x): # Exponentiate everything. This is much easier than only exponentiating # the diagonal elements, and, usually, the action space is relatively low. x_ = K.exp(x) + K.epsilon() # Only keep the diagonal elements. x_ *= diag_mask # Add the original, non-diagonal elements. x_ += x * (1. - diag_mask) # Finally, gather everything into a lower triangular matrix. L_ = tf.gather(x_, tril_mask) return [L_, tf.transpose(L_)] tmp = tf.scan(fn, L_flat, initializer=init) if isinstance(tmp, (list, tuple)): # TensorFlow 0.10 now returns a tuple of tensors. L, LT = tmp else: # Old TensorFlow < 0.10 returns a shared tensor. L = tmp[:, 0, :, :] LT = tmp[:, 1, :, :] else: raise RuntimeError('Unknown Keras backend "{}".'.format(K.backend())) assert L is not None assert LT is not None P = K.batch_dot(L, LT) elif self.mode == 'diag': if K.backend() == 'theano': import theano.tensor as T import theano def fn(x, P_acc): x_ = K.zeros((self.nb_actions, self.nb_actions)) x_ = T.set_subtensor(x_[np.diag_indices(self.nb_actions)], x) return x_ outputs_info = [ K.zeros((self.nb_actions, self.nb_actions)), ] P, _ = theano.scan(fn=fn, sequences=L_flat, outputs_info=outputs_info) elif K.backend() == 'tensorflow': import tensorflow as tf # Create mask that can be used to gather elements from L_flat and put them # into a diagonal matrix. diag_mask = np.zeros((self.nb_actions, self.nb_actions), dtype='int32') diag_mask[np.diag_indices(self.nb_actions)] = range(1, self.nb_actions + 1) # Add leading zero element to each element in the L_flat. We use this zero # element when gathering L_flat into a lower triangular matrix L. nb_rows = tf.shape(L_flat)[0] zeros = tf.expand_dims(tf.tile(K.zeros((1,)), [nb_rows]), 1) try: # Old TF behavior. L_flat = tf.concat(1, [zeros, L_flat]) except TypeError: # New TF behavior L_flat = tf.concat([zeros, L_flat], 1) # Finally, process each element of the batch. def fn(a, x): x_ = tf.gather(x, diag_mask) return x_ P = tf.scan(fn, L_flat, initializer=K.zeros((self.nb_actions, self.nb_actions))) else: raise RuntimeError('Unknown Keras backend "{}".'.format(K.backend())) assert P is not None assert K.ndim(P) == 3 # Combine a, mu and P into a scalar (over the batches). What we compute here is # -.5 * (a - mu)^T * P * (a - mu), where * denotes the dot-product. Unfortunately # TensorFlow handles vector * P slightly suboptimal, hence we convert the vectors to # 1xd/dx1 matrices and finally flatten the resulting 1x1 matrix into a scalar. All # operations happen over the batch size, which is dimension 0. prod = K.batch_dot(K.expand_dims(a - mu, 1), P) prod = K.batch_dot(prod, K.expand_dims(a - mu, -1)) A = -.5 * K.batch_flatten(prod) assert K.ndim(A) == 2 return A
def class_accuracy(y_true, y_pred): mask = K.cast( K.equal(y_true[...,dataset.num_classes], 1.0 ), K.floatx() ) acc = K.cast(K.equal(K.argmax(y_true[...,0:dataset.num_classes], axis=-1), K.argmax(y_pred[...,0:dataset.num_classes], axis=-1)), K.floatx()) if K.backend() == "cntk": acc = K.expand_dims(acc) return K.sum(acc * mask) / K.maximum(K.sum(mask), 1.0)
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False): '''Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(N, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' num_layers = len(anchors)//3 # default setting yolo_outputs = args[:num_layers] y_true = args[num_layers:] anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]] input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)] loss = 0 m = K.shape(yolo_outputs[0])[0] # batch size, tensor mf = K.cast(m, K.dtype(yolo_outputs[0])) for l in range(num_layers): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet raw box to calculate loss. raw_true_xy = y_true[l][..., :2]*grid_shapes[l][::-1] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf box_loss_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box))) return b+1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop(lambda b,*args: b<m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) # K.binary_crossentropy is helpful to avoid exp overflow. xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(raw_true_xy, raw_pred[...,0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * K.square(raw_true_wh-raw_pred[...,2:4]) confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \ (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy(true_class_probs, raw_pred[...,5:], from_logits=True) xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf loss += xy_loss + wh_loss + confidence_loss + class_loss if print_loss: loss = tf.Print(loss, [loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask)], message='loss: ') return loss
def get_model(): CONC=[] IGLOO_V=[] ###STRUCTURE FROM IGLOO PAPER BEGINS inin = Input(shape=input_shape, name='input') #inin=Lambda(lambda q: q[:,1:,:]) (inin) a=Conv1D(40,2,padding="causal")(inin) #first set of convolutions b=Conv1D(40,4,padding="causal")(inin) c=Conv1D(40,8,padding="causal")(inin) x=Concatenate(axis=-1)([a,b,c]) x=Activation("relu")(x) x= BatchNormalization(axis=-1)(x) a=Conv1D(40,2,padding="causal")(x) #second set of convolutions b=Conv1D(40,4, padding="causal")(x) c=Conv1D(40,8, padding="causal")(x) x=Concatenate(axis=-1)([a,b,c]) x=Activation("relu")(x) x= BatchNormalization(axis=-1)(x) a=Conv1D(40,2,padding="causal")(x) #third set of convolutions b=Conv1D(40,4,padding="causal")(x) c=Conv1D(40,8, padding="causal")(x) x=Concatenate(axis=-1)([a,b,c]) x=Activation("relu")(x) x= BatchNormalization(axis=-1)(x) x=Lambda(lambda q: q[:,1:,:]) (x) x=Conv1D(64, 1,strides=1,padding=padding) (x) x = BatchNormalization(axis=-1) (x) x = Activation("relu") (x) x = SpatialDropout1D(mDR) (x) IGLOO_V.append(IGLOO_RETURNFULLSEQ(x,nb_patches_FULL,Conv1D_dim_full_seq,patch_size=patch_size,padding_style=padding,stretch_factor=stretch_factor,l2reg=igloo_l2reg, add_residual=add_residual,nb_stacks=nb_stacks_full,build_backbone=build_backbone)) CONC.append(IGLOO_V[0]) for kk in range(5): #IGLOO patches x=Conv1D(C1D_K, 1,strides=1,padding=padding) (CONC[kk]) x = BatchNormalization(axis=-1) (x) x = Activation("relu") (x) x = SpatialDropout1D(mDR) (x) IGLOO_V.append(IGLOO_RETURNFULLSEQ(x,nb_patches_FULL,Conv1D_dim_full_seq,patch_size=patch_size,padding_style=padding,stretch_factor=stretch_factor,l2reg=igloo_l2reg, add_residual=add_residual,nb_stacks=nb_stacks_full,build_backbone=build_backbone)) ###second residual connection co=Add() ([IGLOO_V[kk+1],CONC[kk]]) CONC.append(Activation("relu") (co)) x=Conv1D(C1D_K, 1,strides=1,padding=padding) (CONC[-1]) x = BatchNormalization(axis=-1) (x) x = Activation("relu") (x) x = SpatialDropout1D(mDR) (x) y=IGLOO(x,nb_patches,CONV1D_dim,patch_size=patch_size,return_sequences=False,l2reg=igloo_l2reg,padding_style=padding,nb_stacks=nb_stacks,DR=mDR,max_pooling_kernel=MAXPOOL_size) #### Structure from IGLOO Paper ends y=Dense(64,activation='relu') (y) y=Dropout(0.4) (y) output_1=Dense(1,activation='softmax') (y) #first output, a binary classification of normal or abnormal word_input = Input(shape=(9,), name='decoder_input') embedded_word=Embedding(input_dim=1149, output_dim=500, name='word_embedding',input_length=9,trainable=False, weights=[balloony])(word_input) #trainable is false, weight=ballooney input_=embedded_word #input_ = BatchNormalization(axis=-1)(input_) gru_out=GRU(700, activation='tanh', recurrent_activation='sigmoid', dropout=0.22,return_sequences=True, return_state=False,unroll=False,reset_after=True)(input_) #first gru layer input_=gru_out input_ = BatchNormalization(axis=-1)(input_) gru_out=GRU(700, activation='tanh', recurrent_activation='sigmoid', #second gru layer dropout=0.22,return_sequences=True, return_state=False,unroll=False,reset_after=True)(input_) input_ = gru_out features=Permute((2,1))(x) . ##attention mechanism begins part1=Dense(700)(features) gru_out=Permute((2,1))(gru_out) shape= K.int_shape(part1) part2=Dense(shape[1])(gru_out) #change output to dimensions in order to add part2=Permute((2,1))(part2) part3= Add()([part1,part2]) score = Activation("tanh")(part3) part4= Dense(1)(score) attention_weights=Lambda(lambda x: softmax(x,axis=1))(part4) context_vector=multiply([attention_weights,features]) #weighting the pixels context_vector=Lambda(lambda x: K.sum(x,axis=1))(context_vector) context_vector_mod=Dense(600)(context_vector) context_vector_mod = Lambda(lambda x: K.expand_dims(x, -1))(context_vector_mod) context_vector_mod=Permute((2,1))(context_vector_mod) . ##attention mechanism ends gru_out_mod=Dense(600)(gru_out) input_=Concatenate(axis=1)([context_vector_mod, gru_out_mod]) input_=Activation("tanh")(input_) input_ = BatchNormalization(axis=-1)(input_) gru_out=GRU(9, activation='tanh', recurrent_activation='sigmoid', dropout=0.22,return_sequences=True, return_state=False,unroll=False,reset_after=True)(input_) gru_out=Permute((2,1))(gru_out) gru_out=Activation("tanh")(gru_out) sequence_output = TimeDistributed(Dense(units=vocab_size))(gru_out) ##final word is generated opt = optimizers.Adam(lr=0.0005, clipnorm=1.0, decay=0.001) model = Model(inputs=[inin,word_input],outputs=[output_1,sequence_output]) model.compile(loss=['binary_crossentropy',cross_entropy2],optimizer=opt, metrics=['accuracy'],loss_weights=[100000,1]) #return model #model.fit_generator(Mygenerator(2),epochs=30) #model.save('my_eeg_model_final.h5') return model
def MyAddAxes(x): return K.expand_dims(K.expand_dims(x, axis=-1), axis=-1)
def yolo_loss(args, anchors, num_anchors_per_layer, num_classes, ignore_thresh=.5, print_loss=True): """ Return yolo_loss tensor Args: args (list): args[:num_output_layers] the output of yolo_body or tiny_yolo_body args[num_output_layers:] raw_y_true anchors (np.array): shape=(N, 2), wh num_anchors_per_layer (int): num_classes (int): ignore_thresh (float): the iou threshold whether to ignore object confidence loss print_loss: Returns: loss: tensor, shape=(1,) """ num_output_layers = len(anchors) // num_anchors_per_layer # num_layers yolo_outputs = args[:num_output_layers] raw_y_trues = args[num_output_layers:] # y_true anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(raw_y_trues[0])) grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(raw_y_trues[0])) for l in range(num_output_layers) ] loss = 0 batch_size = K.shape(yolo_outputs[0])[0] # m batch_size_f = K.cast(batch_size, K.dtype(yolo_outputs[0])) for l in range(num_output_layers): grid_shape = grid_shapes[l] yolo_output = yolo_outputs[l] #raw_y_pred = K.reshape(yolo_output, [-1, grid_shape[0], grid_shape[1], num_anchors_per_layer, num_classes + 9]) raw_y_pred = tf.reshape(yolo_output, [-1, -1, -1, 3, 14]) raw_y_true = raw_y_trues[l] anchor_mask = anchor_masks[l] # (batch_size, grid_height, grid_width, num_anchors_this_layer, 1) object_mask = raw_y_true[..., 4:5] # (batch_size, grid_height, grid_width, num_anchors_this_layer, num_classes) y_true_class_probs = raw_y_true[..., 5:] grid, y_pred_box, y_pred_delta_xy, y_pred_log_wh, y_pred_sigma, y_pred_confidence, y_pred_class_probs = \ y_pred_graph(raw_y_pred, anchors[anchor_mask], input_shape) y_true_delta_xy = raw_y_true[ ..., :2] * grid_shapes[l][::-1] - grid # raw_true_xy y_true_log_wh = K.log(raw_y_true[..., 2:4] * input_shape[::-1] / anchors[anchor_mask]) y_true_log_wh = K.switch(object_mask, y_true_log_wh, K.zeros_like(y_true_log_wh)) # raw_true_wh box_loss_scale = 2 - raw_y_true[..., 2:3] * raw_y_true[..., 3:4] ignore_mask = tf.TensorArray(K.dtype(raw_y_trues[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask_): # (num_gt_boxes, 4) gt_box = tf.boolean_mask(raw_y_true[b, ..., 0:4], object_mask_bool[b, ..., 0]) # (grid_height, grid_width, num_anchors_this_layer, num_gt_boxes) iou = box_iou_graph(y_pred_box[b], gt_box) # (grid_height, grid_width, num_anchors_this_layer) best_iou = K.max(iou, axis=-1) ignore_mask_ = ignore_mask_.write( b, K.cast(best_iou < ignore_thresh, K.dtype(gt_box))) return b + 1, ignore_mask_ _, ignore_mask = tf.while_loop(lambda b, *largs: b < batch_size, loop_body, [0, ignore_mask]) # (batch_size, grid_height, grid_width, num_anchors_this_layer) ignore_mask = ignore_mask.stack() # (batch_size, grid_height, grid_width, num_anchors_this_layer, 1) ignore_mask = K.expand_dims(ignore_mask, -1) y_true = tf.concat([y_true_delta_xy, y_true_log_wh], axis=-1) y_pred_mu = tf.concat([y_pred_delta_xy, y_pred_log_wh], axis=-1) x_loss = nll_loss(y_true[..., 0:1], y_pred_mu[..., 0:1], y_pred_sigma[..., 0:1]) x_loss = object_mask * box_loss_scale * x_loss y_loss = nll_loss(y_true[..., 1:2], y_pred_mu[..., 1:2], y_pred_sigma[..., 1:2]) y_loss = object_mask * box_loss_scale * y_loss w_loss = nll_loss(y_true[..., 2:3], y_pred_mu[..., 2:3], y_pred_sigma[..., 2:3]) w_loss = object_mask * box_loss_scale * w_loss h_loss = nll_loss(y_true[..., 3:4], y_pred_mu[..., 3:4], y_pred_sigma[..., 3:4]) h_loss = object_mask * box_loss_scale * h_loss ##### confidence_loss = object_mask * K.binary_crossentropy(object_mask, y_pred_confidence) + \ (1 - object_mask) * K.binary_crossentropy(object_mask, y_pred_confidence) * ignore_mask class_loss = object_mask * K.binary_crossentropy( y_true_class_probs, y_pred_class_probs) ##### x_loss = K.sum(x_loss) / batch_size_f y_loss = K.sum(y_loss) / batch_size_f w_loss = K.sum(w_loss) / batch_size_f h_loss = K.sum(h_loss) / batch_size_f confidence_loss = K.sum(confidence_loss) / batch_size_f class_loss = K.sum(class_loss) / batch_size_f loss += x_loss + y_loss + w_loss + h_loss + confidence_loss + class_loss if print_loss: loss = tf.Print(loss, [ loss, x_loss, y_loss, w_loss, h_loss, confidence_loss, class_loss, K.sum(ignore_mask) ], message='\nloss: ') return loss
def build(self): # build encoder network input_layer = Input(shape=self.input_shape, name='input') encoder_output = self.encoder(input_layer) # build latent network latent_params = Dense(self.n_latent*2, name='latent_params')(encoder_output) logvar = Lambda(lambda x: K.clip(x[:, :self.n_latent], -5, 5), name='logvar')(latent_params) mu = Lambda(lambda x: x[:, self.n_latent:], name='mu')(latent_params) var = Lambda(lambda x: K.exp(x), name='var')(logvar) std = Lambda(lambda x: K.sqrt(x), name='std')(var) gaussian_sampler = Lambda(lambda x: K.random_normal((K.shape(x)[0], self.n_sampler, self.n_latent)), name='gaussian_sampler')(input_layer) latent_sampler = Lambda(lambda x: x[0]*K.expand_dims(x[2], axis=1) + K.expand_dims(x[1], axis=1), name='latent_sampler')([gaussian_sampler, mu, std]) latent_values = Lambda(lambda x: K.reshape(x, (-1, self.n_latent)), name='latent')(latent_sampler) # build decoder network decoder_output = self.decoder(latent_values) output = Lambda(lambda x: K.mean(K.reshape(x, [-1, self.n_sampler] + self.input_shape), axis=1), name='output_mean')(decoder_output) # define loss functions def kl_loss(y_true, y_pred): KL = 0.5*K.sum(var + K.square(mu) - 1 - K.log(var), axis=1) return KL def sequence_nll_loss(y_true, y_pred): y_shape = K.shape(y_true) y_true = K.reshape(y_true, (-1, y_shape[-1])) y_pred = K.reshape(y_pred, (-1, y_shape[-1])) NLL = K.categorical_crossentropy(y_true, y_pred) NLL = K.sum(K.reshape(NLL, (-1, y_shape[1])), axis=1) return NLL def sequence_accuracy(y_true, y_pred): return K.mean(K.cast(K.equal(K.argmax(y_true, axis=-1), K.argmax(y_pred, axis=-1)), K.floatx()), axis=1) def sequence_vae_loss(y_true, y_pred): return sequence_nll_loss(y_true, y_pred) + kl_loss(y_true, y_pred) def nll_loss(y_true, y_pred): NLL = K.categorical_crossentropy(y_true, y_pred) return NLL #def vae_loss(y_true, y_pred): # return nll_loss(y_true, y_pred) + kl_loss(y_true, y_pred) import likelihoods ll = getattr(likelihoods, self.likelihood) def vae_loss(y_true, y_pred): return -ll(y_true, y_pred) + kl_loss(y_true, y_pred) # build training model model = Model(inputs=[input_layer], outputs=[output]) model.compile(loss=vae_loss, metrics=[sequence_accuracy, kl_loss, nll_loss], optimizer='Adam') self.model = model # build log likelihood function ll_input = Input(shape=self.input_shape, name='ll_input') ll_output = ll(ll_input, output) self.ll_function = K.function([ll_input, latent_sampler], [ll_output]) # build function for generating new samples self.sampler_function = K.function([ll_input, latent_values], [output])
def __init__(self, model, policy=None, enable_double_dqn=True, enable_dueling_network=False, dueling_type='avg', *args, **kwargs): super(DQNAgent, self).__init__(*args, **kwargs) # Validate (important) input. if hasattr(model.output, '__len__') and len(model.output) > 1: raise ValueError( 'Model "{}" has more than one output. DQN expects a model that has a single output.' .format(model)) if model.output._keras_shape != (None, self.nb_actions): raise ValueError( 'Model output "{}" has invalid shape. DQN expects a model that has one dimension for each action, in this case {}.' .format(model.output, self.nb_actions)) # Parameters. self.enable_double_dqn = enable_double_dqn self.enable_dueling_network = enable_dueling_network self.dueling_type = dueling_type if self.enable_dueling_network: # get the second last layer of the model, abandon the last layer layer = model.layers[-2] nb_action = model.output._keras_shape[-1] # layer y has a shape (nb_action+1,) # y[:,0] represents V(s;theta) # y[:,1:] represents A(s,a;theta) y = Dense(nb_action + 1, activation='linear')(layer.output) # caculate the Q(s,a;theta) # dueling_type == 'avg' # Q(s,a;theta) = V(s;theta) + (A(s,a;theta)-Avg_a(A(s,a;theta))) # dueling_type == 'max' # Q(s,a;theta) = V(s;theta) + (A(s,a;theta)-max_a(A(s,a;theta))) # dueling_type == 'naive' # Q(s,a;theta) = V(s;theta) + A(s,a;theta) if self.dueling_type == 'avg': outputlayer = Lambda( lambda a: K.expand_dims(a[:, 0], dim=-1) + a[:, 1:] - K. mean(a[:, 1:], keepdims=True), output_shape=(nb_action, ))(y) elif self.dueling_type == 'max': outputlayer = Lambda(lambda a: K.expand_dims(a[:, 0], dim=-1) + a[:, 1:] - K.max(a[:, 1:], keepdims=True), output_shape=(nb_action, ))(y) elif self.dueling_type == 'naive': outputlayer = Lambda( lambda a: K.expand_dims(a[:, 0], dim=-1) + a[:, 1:], output_shape=(nb_action, ))(y) else: assert False, "dueling_type must be one of {'avg','max','naive'}" model = Model(input=model.input, output=outputlayer) # Related objects. self.model = model if policy is None: policy = EpsGreedyQPolicy() self.policy = policy # State. self.reset_states()
def build(self): ''' 根据数据集自行定义 基础信息部分:session_info 用户级别\用户年龄\品类\订单状态 字段名称 取值范围 长度 用户级别: 0 ~ 5 :6 用户年龄: 0 ~ 7 :8 商品品类: 0 ~ 2484 :2485 订单状态: 0 ~ 3 :4 ''' session_info = Input(shape=(None,)) ''' 标准信息部分: 问题输入标量: x_in 正向回答标量: yl_in 逆向回答标量: yr_in 历史问题标量: z_in ''' x_in = Input(shape=(None,)) yl_in = Input(shape=(None,)) yr_in = Input(shape=(None,)) z_in = Input(shape=(None,)) x, yl,yr, z = x_in, yl_in, yr_in, z_in session_level = Lambda(lambda x: x[:,0])(session_info) session_years = Lambda(lambda x: x[:,1])(session_info) session_kinds = Lambda(lambda x: x[:,2])(session_info) session_station = Lambda(lambda x: x[:,3])(session_info) session_level = Embedding(6, self.char_size // 4)(session_level) session_years = Embedding(8, self.char_size // 4)(session_years) session_kinds = Embedding(2485, self.char_size // 4)(session_kinds) session_station = Embedding(4, self.char_size // 4)(session_station) session = Concatenate()([session_level, session_years, session_kinds, session_station]) session = Lambda(lambda x: K.expand_dims(x, 1))(session) x_mask = Lambda(lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(x) y_mask = Lambda(lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(yl) z_mask = Lambda(lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(z) x_one_hot = Lambda(self.to_one_hot)([x, x_mask]) z_one_hot = Lambda(self.to_one_hot)([z, z_mask]) xz_one_hot = Lambda(lambda x: K.cast(K.greater(x[0] + x[1], 0.5), 'float32'))([x_one_hot, z_one_hot]) xz_prior = ScaleShift()(xz_one_hot) # 学习输出的先验分布 if self.word2vec != None: embedding = Embedding(len(self.word2vec.W2Vembedding), # 字典长度 self.char_size, # 词向量 长度(100) weights=[self.word2vec.W2Vembedding], # 重点:预训练的词向量系数 trainable=True # 是否在 训练的过程中 更新词向量 ) else: embedding = Embedding(self.chars_num + 4, self.char_size) x = embedding(x) z = embedding(z) # encoder,双层双向LSTM x = LayerNormalization()(x) x = OurBidirectional(CuDNNLSTM(self.z_dim // 2, return_sequences=True))([x, x_mask]) x = LayerNormalization()(x) x = OurBidirectional(CuDNNLSTM(self.z_dim // 2, return_sequences=True))([x, x_mask]) z = LayerNormalization()(z) z = OurBidirectional(CuDNNLSTM(self.z_dim // 2, return_sequences=True))([z, z_mask]) z = LayerNormalization()(z) z = OurBidirectional(CuDNNLSTM(self.z_dim // 2, return_sequences=True))([z, z_mask]) x_max = Lambda(self.seq_maxpool)([x, x_mask]) session = LayerNormalization()(session) session = CuDNNLSTM(self.z_dim // 4, return_sequences=True)(session) session = LayerNormalization()(session) session = CuDNNLSTM(self.z_dim // 4, return_sequences=True)(session) session = LayerNormalization()(session) session = CuDNNLSTM(self.z_dim // 4, return_sequences=True)(session) # 正向decoder,单向LSTM y = embedding(yl) y = SelfModulatedLayerNormalization(self.z_dim // 4)([y, x_max]) y = CuDNNLSTM(self.z_dim, return_sequences=True)(y) y = SelfModulatedLayerNormalization(self.z_dim // 4)([y, x_max]) y = CuDNNLSTM(self.z_dim, return_sequences=True)(y) yl = SelfModulatedLayerNormalization(self.z_dim // 4)([y, x_max]) # 逆向decoder,单向LSTM y = embedding(yr) y = SelfModulatedLayerNormalization(self.z_dim // 4)([y, x_max]) y = CuDNNLSTM(self.z_dim, return_sequences=True)(y) y = SelfModulatedLayerNormalization(self.z_dim // 4)([y, x_max]) y = CuDNNLSTM(self.z_dim, return_sequences=True)(y) yr = SelfModulatedLayerNormalization(self.z_dim // 4)([y, x_max]) # 对齐attention + 检索attention yl_ = Attention(8, 16, mask_right=True)([yl, yr, yr]) ylx = Attention(8, 16)([yl, x, x, x_mask]) ylz = Attention(8, 16)([yl, z, z, z_mask]) yls = Attention(8, 16, mask_right=True)([yl, session, session]) yl = Concatenate()([yl, yl_, ylx, ylz, yls]) # 对齐attention + 检索attention yr_ = Attention(8, 16, mask_right=True)([yr, yl, yl]) yrx = Attention(8, 16)([yr, x, x, x_mask]) yrz = Attention(8, 16)([yr, z, z, z_mask]) yrs = Attention(8, 16, mask_right=True)([yr, session, session]) yr = Concatenate()([yr, yr_, yrx, yrz, yrs]) # 最后的输出分类(左右共享权重) classifier = Dense(len(self.data_info.chars) + 4) yl = Dense(self.data_info.char_size)(yl) yl = LeakyReLU(0.2)(yl) yl = classifier(yl) yl = Lambda(lambda x: (x[0] + x[1]) / 2)([yl, xz_prior]) # 与先验结果平均 yl = Activation('softmax')(yl) yr = Dense(self.data_info.char_size)(yr) yr = LeakyReLU(0.2)(yr) yr = classifier(yr) yr = Lambda(lambda x: (x[0] + x[1]) / 2)([yr, xz_prior]) # 与先验结果平均 yr = Activation('softmax')(yr) # 交叉熵作为loss,但mask掉padding部分 cross_entropy_1 = K.sparse_categorical_crossentropy(yl_in[:, 1:], yl[:, :-1]) cross_entropy_1 = K.sum(cross_entropy_1 * y_mask[:, 1:, 0]) / K.sum(y_mask[:, 1:, 0]) cross_entropy_2 = K.sparse_categorical_crossentropy(yr_in[:, 1:], yr[:, :-1]) cross_entropy_2 = K.sum(cross_entropy_2 * y_mask[:, 1:, 0]) / K.sum(y_mask[:, 1:, 0]) cross_entropy = (cross_entropy_1 + cross_entropy_2) / 2 model = Model([session_info, x_in, yl_in, yr_in, z_in], [yl, yr]) model.add_loss(cross_entropy) model.compile(optimizer = Adam(self.learning_rate)) # print(model.summary()) return model
def reg_accuracy(y_true, y_pred): mask = K.cast( K.equal(y_true[...,dataset.num_classes], 1.0 ), K.floatx() ) reg = K.sum(K.square(y_true[...,dataset.num_classes+1:dataset.num_classes+3] - y_pred[...,dataset.num_classes+1:dataset.num_classes+3]), axis=-1) if K.backend() == "cntk": reg = K.expand_dims(reg) return K.sum(reg * mask) / K.maximum(K.sum(mask), 1.0)
x1_in = Input(shape=(None,), dtype='int32') x2_in = Input(shape=(None,)) xm_in = Input(shape=(None,)) h_in = Input(shape=(None,), dtype='int32') hm_in = Input(shape=(None,)) sel_in = Input(shape=(None,), dtype='int32') conn_in = Input(shape=(1,), dtype='int32') csel_in = Input(shape=(None,), dtype='int32') cop_in = Input(shape=(None,), dtype='int32') x1, x2, xm, h, hm, sel, conn, csel, cop = ( x1_in, x2_in, xm_in, h_in, hm_in, sel_in, conn_in, csel_in, cop_in ) hm = Lambda(lambda x: K.expand_dims(x, 1))(hm) # header的mask.shape=(None, 1, h_len) x = bert_model([x1_in, x2_in]) x4conn = Lambda(lambda x: x[:, 0])(x) pconn = Dense(num_cond_conn_op, activation='softmax')(x4conn) x4h = Lambda(seq_gather)([x, h]) psel = Dense(num_agg, activation='softmax')(x4h) pcop = Dense(num_op, activation='softmax')(x) x = Lambda(lambda x: K.expand_dims(x, 2))(x) x4h = Lambda(lambda x: K.expand_dims(x, 1))(x4h) pcsel_1 = Dense(1)(x) pcsel_2 = Dense(1)(x4h) pcsel = Lambda(lambda x: x[0] + x[1])([pcsel_1, pcsel_2])
def expand_item(X): return K.expand_dims(X, 2)
def call(self, x, mask=None): if mask is not None: mask = K.cast(mask, K.floatx()) mask = K.expand_dims(mask) x = x * mask return K.max(x, axis=1)
def expand_rate(X): return K.expand_dims(X, 1)
def call(self, x): C = K.expand_dims(self.centers) H = K.transpose(C - K.transpose(x)) return K.exp(-self.betas * K.sum(H**2, axis=1))
def build_model(config, n_users, vocab_len, pretrained_emb, emb_dim_user_id=50, emb_dim_pref_query=200, emb_dim_words=300, n_filters_cnn=400, dropout_p=0.2, **kwargs): ##user embedding - word & article level user_id = Input(shape=(1, ), dtype='int32') user_embedding_layer = Embedding(n_users, emb_dim_user_id, trainable=True) user_embedding = user_embedding_layer(user_id) user_embedding_word = Dense(emb_dim_pref_query, activation='relu')(user_embedding) user_embedding_word = Flatten()(user_embedding_word) user_embedding_news = Dense(emb_dim_pref_query, activation='relu')(user_embedding) user_embedding_news = Flatten()(user_embedding_news) ##news encoder news_input = Input(shape=(config.max_len_title, ), dtype='int32') if pretrained_emb: embedding_layer = Embedding( vocab_len, emb_dim_words, weights=[pretrained_emb], trainable=True) # weights=[pretrained_emb], else: embedding_layer = Embedding(vocab_len, emb_dim_words, trainable=True) # random initialisation embedded_sequences = embedding_layer(news_input) embedded_sequences = Dropout(dropout_p)(embedded_sequences) cnnouput = Convolution1D(nb_filter=n_filters_cnn, filter_length=3, padding='same', activation='relu', strides=1)( embedded_sequences) # original nb_filter=400 cnnouput = Dropout(dropout_p)(cnnouput) # personalised attention - word level attention_a = Dot((2, 1))([ cnnouput, Dense(n_filters_cnn, activation='tanh')(user_embedding_word) ]) attention_weight = Activation('softmax')(attention_a) news_rep = keras.layers.Dot((1, 1))([cnnouput, attention_weight]) newsEncoder = Model([news_input, user_id], news_rep) # browsing history as concatenation of MAX_SENTS articles all_news_input = [ keras.Input((config.max_len_title, ), dtype='int32') for _ in range(config.max_len_hist) ] browsed_news_rep = [ newsEncoder([news, user_id]) for news in all_news_input ] browsed_news_rep = concatenate([ Lambda(lambda x: K.expand_dims(x, axis=1))(news) for news in browsed_news_rep ], axis=1) ## user encoder # personalised attention - article level attention_news = keras.layers.Dot((2, 1))([ browsed_news_rep, Dense(n_filters_cnn, activation='tanh')(user_embedding_news) ]) attention_weight_news = Activation('softmax')(attention_news) user_rep = keras.layers.Dot( (1, 1))([browsed_news_rep, attention_weight_news]) # candidate items - as pseudo K + 1 classification task candidates = [ keras.Input((config.max_len_title, ), dtype='int32') for _ in range(1 + config.neg_sample_ratio) ] candidate_vecs = [ newsEncoder([candidate, user_id]) for candidate in candidates ] # logits scores_raw = [ keras.layers.dot([user_rep, candidate_vec], axes=-1) for candidate_vec in candidate_vecs ] # normalised probs softm_probs = keras.layers.Activation(keras.activations.softmax)( keras.layers.concatenate(scores_raw)) model = Model(candidates + all_news_input + [user_id], softm_probs) model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.001), metrics=['acc']) candidate_one = keras.Input((config.max_len_title, )) candidate_one_vec = newsEncoder([candidate_one, user_id]) score_raw = keras.layers.dot([user_rep, candidate_one_vec], axes=-1) score_sigmoid = keras.layers.Activation( keras.activations.sigmoid)(score_raw) model_test = keras.Model(inputs=[candidate_one] + all_news_input + [user_id], outputs=score_sigmoid) return model, model_test
def call(self, inputs, states, constants): ''' call函数 会在RNN中被调用然后被RNN改写 此时constant参数可用 :param inputs: [wt; v_g] 维度为self.input_dim :param states: 前一步ht,mt :param constants: cnn_encoder outputs :return: ''' h_tm = states[0] # last hidden state m_tm = states[1] # last memory cell self.v_seq = constants[ 0] # [self.cnn_encoder_k, self.units] self.units=cnn_encoder_d """ f-gate """ ft = activations.sigmoid( K.dot(h_tm, self.W_f) + K.dot(inputs, self.U_f) + self.b_f) """ i-gate """ it = activations.sigmoid( K.dot(h_tm, self.W_i) + K.dot(inputs, self.U_i) + self.b_i) """ o-gate """ ot = activations.sigmoid( K.dot(h_tm, self.W_o) + K.dot(inputs, self.U_o) + self.b_o) """ g-gate (sentinel gate) """ gt = activations.sigmoid( K.dot(h_tm, self.W_g) + K.dot(inputs, self.U_g) + self.b_g) """ at-renew input """ at = activations.tanh( K.dot(h_tm, self.W_a) + K.dot(inputs, self.U_a) + self.b_a) """ mt-memory cell """ mt = m_tm * ft + it * at """ ht-hidden state """ ht = ot * activations.tanh(mt) """ st-visual sentinel """ st = gt * activations.tanh(mt) """ ct-visual context """ st = K.expand_dims(st, axis=1) # 将st合并进来一起计算权重参数[?, k+1, d] d=self.units 与论文的处理稍有不同 self.v_expand = K.concatenate([self.v_seq, st], axis=1) # one_matrix = K.ones((self.cnn_encoder_k + 1, 1)) vtt = K.dot(self.v_expand, self.W_z) dtt = K.repeat(K.dot(ht, self.U_z), self.cnn_encoder_k + 1) # (?, k + 1, k + 1) tantt = K.tanh(vtt + dtt) zt = K.dot(tantt, self.W_h) alpha_t = activations.softmax(zt) # (?, k + 1, 1) # alpha_t = K.expand_dims(alpha_t) # (?, k + 1, 1) # 将st,v1,...,vk包括在内直接加权求和 与论文的处理稍有不同 (?, k + 1, units) # 输出(?, units) ct = K.squeeze(K.batch_dot(alpha_t, self.v_expand, axes=1), axis=1) # batch_dot 针对 k + 1 ht_plus_ct = ht + ct return ht_plus_ct, [ht, mt]
x, v = x v = K.expand_dims(v, 1) v = K.tile(v, [1, K.shape(x)[1], 1]) return K.concatenate([x, v], 2) q1_in = Input(shape=(None, )) # 问题字id输入 q2_in = Input(shape=(None, word_size)) # 问题词向量输入 p1_in = Input(shape=(None, )) # 篇章字id输入 p2_in = Input(shape=(None, word_size)) # 篇章词向量输入 a1_in = Input(shape=(None, )) # 答案左边界输入 a2_in = Input(shape=(None, )) # 答案右边界输入 q1, q2, p1, p2, a1, a2 = q1_in, q2_in, p1_in, p2_in, a1_in, a2_in q_mask = Lambda( lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(q1) p_mask = Lambda( lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(p1) embeddings = MixEmbedding(len(char2id) + 2, char_size) q = embeddings([q1, q2]) q = Dropout(0.1)(q) p = embeddings([p1, p2]) p = Dropout(0.1)(p) q = DilatedGatedConv1D(rate=1, drop_gate=0.1)([q, q_mask]) q = DilatedGatedConv1D(rate=2, drop_gate=0.1)([q, q_mask]) q = DilatedGatedConv1D(rate=1, drop_gate=0.1)([q, q_mask]) qv = AttentionPooling1D()([q, q_mask])
def _fit(self, X, y): # Pulling out the zeroth item from each element because X, y are dataframes and # so each item in _.values is a list of length 1. Same for _predict, below. X = np.expand_dims(np.stack([x[0] for x in X.values]), 3) # TODO: figure out why y is a Series and X is a pandas dataframe y_stability = np.stack([x[0] for x in y.values], axis=1).T y_dssp = np.squeeze(np.stack([x[1] for x in y.values], axis=1)) y = [y_stability, y_dssp] val_size = int(X.shape[0] * .1) Xv = X[-val_size:, :, :, :] yv = [y[0][-val_size:, :], y[1][-val_size:, :, :]] X = X[:-val_size, :, :, :] y = [y[0][:-val_size, :], y[1][:-val_size, :, :]] def data_gen(batch_size): batch_ind = 0 amino_dict = dict( zip([ 'A', 'R', 'N', 'D', 'C', 'E', 'Q', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V', 'X', 'J', 'O' ], range(23)) ) # 'X' means nothing, 'J' means beginning, 'O' means end while True: xi = randrange(X.shape[0] - batch_size) if batch_ind % batch_size == 0: batch_ind = 0 x_ret = [] y_ret = [[], []] x = X[xi, :, :, :] y0 = y[0][xi, :] y1 = y[1][xi, :, :] minshift = np.argmax( x[amino_dict['O'], :, :]) - x.shape[1] + self.padding maxshift = np.argmax(x[amino_dict['J'], :, :]) - self.padding shift = randrange( minshift, maxshift ) + 1 # +1 is because we want to be able to shift maxshift (putting the 'J' at the beginning) but not minshift (putting the 'O' wrapped around and at the beginning - we want the farthest rightward shift possible to put the 'O' at the end) x_ret += [np.roll(x, shift, axis=1)] y_ret[0] += [y0] y_ret[1] += [np.roll(y1, shift, axis=0)] batch_ind += 1 if batch_ind % batch_size == 0: yield np.stack(x_ret), [ np.stack(y_ret[0]), np.stack(y_ret[1]) ] checkpoint_filepath = 'sequence_only_cnn_v2_{}.best.hdf5'.format( str(randint(1000000000, 9999999999))) checkpoint_callback = ModelCheckpoint(checkpoint_filepath, monitor='val_loss', save_best_only=True) stopping_callback = EarlyStopping(monitor='val_loss', min_delta=0.0, patience=5) callbacks_list = [checkpoint_callback, stopping_callback] self.model.fit_generator(data_gen(self.batch_size), epochs=self.epochs, steps_per_epoch=1 * X.shape[0] / self.batch_size, validation_data=(Xv, yv), callbacks=callbacks_list, verbose=self.verbose) # Based on permissible transitions between DSSP codes transition_kernels = K.constant([[[1, 0, 0, 0, 0, 0], [-1, -1, -1, 0, 0, -1]], [[0, 1, 0, 0, 0, 0], [-1, -1, 0, 0, 0, 0]], [[0, 0, 1, 0, 0, 0], [-1, 0, -1, 0, 0, 0]], [[0, 0, 0, 1, 0, 0], [0, 0, 0, -1, -1, 0]], [[0, 0, 0, 0, 1, 0], [-1, 0, 0, 0, 0, 0]], [[0, 0, 0, 0, 0, 1], [0, 0, 0, -1, 0, 0]]]) transition_kernels = K.permute_dimensions(transition_kernels, (1, 2, 0)) transition_kernels = K.expand_dims(transition_kernels, -2) def custom_loss_dssp(y_true, y_pred): y_pred_one_hot = K.one_hot(K.argmax(y_pred), 6) def conv_loss(pred): return K.max(K.clip( K.conv2d(K.expand_dims(y_pred_one_hot, -1), transition_kernels), 0.0, 1.0), axis=-1) return ( K.mean(losses.categorical_crossentropy(y_true, y_pred)) # inner max is over filters, which is important to only pick the most-activated filter at each site - # this will be the filter that matches the identity of the DSSP code. + 0.8 * K.mean(conv_loss(y_pred_one_hot)) + 0.4 * K.max(conv_loss(y_pred_one_hot)) + 0.4 * K.mean(conv_loss(y_pred)) + 0.2 * K.max(conv_loss(y_pred))) def coeff_determination(y_true, y_pred): SS_res = K.sum(K.square(y_true - y_pred)) SS_tot = K.sum(K.square(y_true - K.mean(y_true))) return 1 - SS_res / (SS_tot + K.epsilon()) def custom_loss_stability(y_true, y_pred): return K.sqrt( K.mean(K.square(y_pred - y_true), axis=-1)) - 3.0 * coeff_determination(y_true, y_pred) loss = { "model_dssp": custom_loss_dssp, "model_stability": custom_loss_stability, } loss_weights = {"model_stability": 0.1, "model_dssp": 0.05} self.model.load_weights(checkpoint_filepath) self.model.compile(optimizer='adadelta', loss=loss, loss_weights=loss_weights) self.model.fit_generator(data_gen(self.batch_size), epochs=self.epochs, steps_per_epoch=1 * X.shape[0] / self.batch_size, validation_data=(Xv, yv), callbacks=callbacks_list, verbose=self.verbose) self.model.load_weights(checkpoint_filepath) os.remove(checkpoint_filepath)
def seq_and_vec(x): x, v = x v = K.expand_dims(v, 1) v = K.tile(v, [1, K.shape(x)[1], 1]) return K.concatenate([x, v], 2)
def co_attention(self, Q, E, mask, axis=1): mask = K.expand_dims(mask, axis=axis) E_beta = E - (1 - mask) * 1e30 beta_weights = K.softmax(E_beta, axis=3 - axis) beta = K.batch_dot(beta_weights, Q, axes=[3 - axis, 1]) return beta
def backend_expand_dims_1(x): return K.expand_dims(x, axis=1)
def conv_loss(pred): return K.max(K.clip( K.conv2d(K.expand_dims(y_pred_one_hot, -1), transition_kernels), 0.0, 1.0), axis=-1)
def backend_expand_dims_last(x): return K.expand_dims(x, axis=-1)
def sequence_only_cnn_v2(max_residues, padding): amino_inputs = Input(shape=(23, max_residues + 2 + 2 * padding, 1)) # 20 amino acids plus null/beginning/end amino_model = Conv2D(400, (23, 5), kernel_regularizer=l2(.0), activation='relu')(amino_inputs) amino_model = Dropout(0.3)(amino_model) amino_model = Conv2D(200, (1, 9), kernel_regularizer=l2(.0), activation='relu')(amino_model) amino_model = Dropout(0.3)(amino_model) amino_model = Conv2D(100, (1, 17), kernel_regularizer=l2(.0), activation='relu')(amino_model) amino_model = Dropout(0.3)(amino_model) model = Flatten()(amino_model) model_dssp = Dense((max_residues + 2 + 2 * padding) * 6)(model) model_dssp = Reshape(((max_residues + 2 + 2 * padding), 6))(model_dssp) model_dssp = Activation('softmax', name='model_dssp')( model_dssp) # softmax default axis is last axis model_dssp_flat = Flatten()(model_dssp) model = Concatenate()([model, model_dssp_flat]) model = Dense(80, activation='elu', kernel_regularizer=l2(.0))(model) model = Dense(40, activation='elu', kernel_regularizer=l2(.0))(model) model = Dense(2, activation='linear', kernel_regularizer=l2(.0))(model) model_stability = Lambda( lambda x: K.concatenate([x, K.min(x, axis=1, keepdims=True)], axis=1), name='model_stability')(model) comp_model = Model(inputs=amino_inputs, outputs=[model_stability, model_dssp]) # Based on permissible transitions between DSSP codes transition_kernels = K.constant([[[1, 0, 0, 0, 0, 0], [-1, -1, -1, 0, 0, -1]], [[0, 1, 0, 0, 0, 0], [-1, -1, 0, 0, 0, 0]], [[0, 0, 1, 0, 0, 0], [-1, 0, -1, 0, 0, 0]], [[0, 0, 0, 1, 0, 0], [0, 0, 0, -1, -1, 0]], [[0, 0, 0, 0, 1, 0], [-1, 0, 0, 0, 0, 0]], [[0, 0, 0, 0, 0, 1], [0, 0, 0, -1, 0, 0]]]) transition_kernels = K.permute_dimensions(transition_kernels, (1, 2, 0)) transition_kernels = K.expand_dims(transition_kernels, -2) def custom_loss_dssp(y_true, y_pred): y_pred_one_hot = K.one_hot(K.argmax(y_pred), 6) def conv_loss(pred): return K.max(K.clip( K.conv2d(K.expand_dims(y_pred_one_hot, -1), transition_kernels), 0.0, 1.0), axis=-1) return ( K.mean(losses.categorical_crossentropy(y_true, y_pred)) # inner max is over filters, which is important to only pick the most-activated filter at each site - # this will be the filter that matches the identity of the DSSP code. + 0.8 * K.mean(conv_loss(y_pred_one_hot)) + 0.4 * K.max(conv_loss(y_pred_one_hot)) + 0.4 * K.mean(conv_loss(y_pred)) + 0.2 * K.max(conv_loss(y_pred))) def coeff_determination(y_true, y_pred): SS_res = K.sum(K.square(y_true - y_pred)) SS_tot = K.sum(K.square(y_true - K.mean(y_true))) return 1 - SS_res / (SS_tot + K.epsilon()) def custom_loss_stability(y_true, y_pred): return K.sqrt( K.mean(K.square(y_pred - y_true), axis=-1)) - 3.0 * coeff_determination(y_true, y_pred) loss = { "model_dssp": custom_loss_dssp, "model_stability": custom_loss_stability, } loss_weights = {"model_stability": 0.2, "model_dssp": 1.2} comp_model.compile(optimizer='adadelta', loss=loss, loss_weights=loss_weights) th_model = KerasRegressionTwoDimensional( model=comp_model, model_author="Jed", model_description= 'Sequence CNN v2 regressor: 400x5->200x9->100x17->80->40->1', batch_size=128, epochs=50, padding=padding) return th_model
def recursion(self, input_energy, mask=None, go_backwards=False, return_sequences=True, return_logZ=True, input_length=None): """Forward (alpha) or backward (beta) recursion If `return_logZ = True`, compute the logZ, the normalization constant: \[ Z = \sum_{y1, y2, y3} exp(-E) # energy = \sum_{y1, y2, y3} exp(-(u1' y1 + y1' W y2 + u2' y2 + y2' W y3 + u3' y3)) = sum_{y2, y3} (exp(-(u2' y2 + y2' W y3 + u3' y3)) sum_{y1} exp(-(u1' y1' + y1' W y2))) \] Denote: \[ S(y2) := sum_{y1} exp(-(u1' y1 + y1' W y2)), \] \[ Z = sum_{y2, y3} exp(log S(y2) - (u2' y2 + y2' W y3 + u3' y3)) \] \[ logS(y2) = log S(y2) = log_sum_exp(-(u1' y1' + y1' W y2)) \] Note that: yi's are one-hot vectors u1, u3: boundary energies have been merged If `return_logZ = False`, compute the Viterbi's best path lookup table. """ chain_energy = self.chain_kernel # shape=(1, F, F): F=num of output features. 1st F is for t-1, 2nd F for t chain_energy = K.expand_dims(chain_energy, 0) # shape=(B, F), dtype=float32 prev_target_val = K.zeros_like(input_energy[:, 0, :]) if go_backwards: input_energy = K.reverse(input_energy, 1) if mask is not None: mask = K.reverse(mask, 1) initial_states = [ prev_target_val, K.zeros_like(prev_target_val[:, :1]) ] constants = [chain_energy] if mask is not None: mask2 = K.cast( K.concatenate([mask, K.zeros_like(mask[:, :1])], axis=1), K.floatx()) constants.append(mask2) def _step(input_energy_i, states): return self.step(input_energy_i, states, return_logZ) target_val_last, target_val_seq, _ = K.rnn(_step, input_energy, initial_states, constants=constants, input_length=input_length, unroll=self.unroll) if return_sequences: if go_backwards: target_val_seq = K.reverse(target_val_seq, 1) return target_val_seq else: return target_val_last
def weight_expand(x): return backend.expand_dims(x)
def softmaxLayer(x): channel_sum = K.sum(x, axis=3) softmax = K.expand_dims(K.softmax(channel_sum), axis=-1) return softmax