def call(self, inputs, states, constants): if not isinstance(constants, (list, tuple)): keys = values = constants elif len(constants) == 1: keys = values = constants[0] elif len(constants) == 2: keys, values = constants else: raise ValueError( 'constants can either be a list with keys and values or just attention vectors' ) if not isinstance(states, (list, tuple)): query = states else: query = states[0] query = self._query_transformation(query) repeated_query = K.repeat(query, K.shape(keys)[1]) logits = self._attention_logits_dense(K.tanh(repeated_query + keys)) attention_weights = keras.activations.softmax(logits, axis=1) attention_context = K.sum(attention_weights * values, axis=1, keepdims=False) inputs = inputs + attention_context return self._cell.call(inputs, states)
def build_generator(self): """U-Net Generator""" def conv2d(layer_input, filters, f_size=4): """Layers used during downsampling""" d = Conv2D(filters, kernel_size=f_size, strides=2, padding='same')(layer_input) d = LeakyReLU(alpha=0.2)(d) d = InstanceNormalization()(d) return d def deconv2d(layer_input, skip_input, filters, f_size=4, dropout_rate=0): """Layers used during upsampling""" u = UpSampling2D(size=2)(layer_input) u = Conv2D(filters, kernel_size=f_size, strides=1, padding='same', activation='relu')(u) if dropout_rate: u = Dropout(dropout_rate)(u) u = InstanceNormalization()(u) u = Concatenate()([u, skip_input]) return u # Image input input_img = Input(shape=self.img_shape) inp_c = Input(shape=(self.class_num, )) c = Lambda(lambda x: K.repeat(x, self.img_width * self.img_height))( inp_c) c = Reshape((self.img_width, self.img_height, self.class_num))(c) d0 = Concatenate()([input_img, c]) # Downsampling d1 = conv2d(d0, self.gf) d2 = conv2d(d1, self.gf * 2) d3 = conv2d(d2, self.gf * 4) d4 = conv2d(d3, self.gf * 8) # Upsampling u1 = deconv2d(d4, d3, self.gf * 4) u2 = deconv2d(u1, d2, self.gf * 2) u3 = deconv2d(u2, d1, self.gf) u4 = UpSampling2D(size=2)(u3) output_img = Conv2D(self.img_channel, kernel_size=4, strides=1, padding='same', activation='tanh')(u4) return Model([input_img, inp_c], output_img)
def call(self, x, mask=None): if mask is not None: # mask (batch, time) mask = K.cast(mask, K.floatx()) # mask (batch, x_dim, time) mask = K.repeat(mask, x.shape[-1]) # mask (batch, time, x_dim) mask = tf.transpose(mask, [0, 2, 1]) x = x * mask # print(mask) return K.sum(x, axis=1) / K.sum(mask, axis=1)
def call(self, x, mask=None): """1, mask is a bool type tensor, need casting before compute. 2, mask shape in 2 dimension (batch_size, feature_dimension) """ if mask is not None: mask = K.repeat(mask, x.shape[-1]) mask = tf.transpose(mask, [0, 2, 1]) mask = tf.cast(mask, tf.float32) x = x * mask return K.sum(x, axis=1) / K.sum(mask, axis=1) else: return K.mean(x, axis=1)
def _time_distributed_dense(x, w, b=None, dropout=None, input_dim=None, output_dim=None, timesteps=None, training=None): """Apply `y . w + b` for every temporal slice y of x. # Arguments x: input tensor. w: weight matrix. b: optional bias vector. dropout: wether to apply dropout (same dropout mask for every temporal slice of the input). input_dim: integer; optional dimensionality of the input. output_dim: integer; optional dimensionality of the output. timesteps: integer; optional number of timesteps. training: training phase tensor or boolean. # Returns Output tensor. """ if not input_dim: input_dim = K.shape(x)[2] if not timesteps: timesteps = K.shape(x)[1] if not output_dim: output_dim = K.int_shape(w)[1] if dropout is not None and 0. < dropout < 1.: # apply the same dropout pattern at every timestep ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim))) dropout_matrix = K.dropout(ones, dropout) expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps) x = K.in_train_phase(x * expanded_dropout_matrix, x, training=training) # collapse time dimension and batch dimension together x = K.reshape(x, (-1, input_dim)) x = K.dot(x, w) if b is not None: x = K.bias_add(x, b) # reshape to 3D tensor if K.backend() == 'tensorflow': x = K.reshape(x, K.stack([-1, timesteps, output_dim])) x.set_shape([None, None, output_dim]) else: x = K.reshape(x, (-1, timesteps, output_dim)) return x
def call(self, x, mask=None): if mask is not None: # mask (batch, time) mask = K.cast(mask, K.floatx()) if K.ndim(x) != K.ndim(mask): mask = K.repeat(mask, x.shape[-1]) mask = tf.transpose(mask, [0, 2, 1]) x = x * mask if K.ndim(x) == 2: x = K.expand_dims(x) return K.sum(x, axis=self.axis) else: if K.ndim(x) == 2: x = K.expand_dims(x) return K.sum(x, axis=self.axis)
def select_best_leaf(self, y_pred): if self.N > self.num_leaves: # if there are more leaf nodes than total nodes in the hierarchy (should always be the case, # but allowed to work either way) then pad with a zero for each non-leaf node in the taxonomy y_pred = self._pad(y_pred) # propagate the probabilities (algo 1) propagated_probabilities = K.transpose( K.dot(self.A, K.transpose(y_pred))) # grab the mask vector for root and repeat it <batch size> times root = K.repeat(self.root, K.shape(y_pred)[0]) # reshape into (<batch size>, N) predictions = K.reshape(root, (K.shape(y_pred)[0], )) # each branch will walk futher out toward leaf nodes (and loops on leaf nodes) for _ in range(self.depth): predictions = self._branch(propagated_probabilities, predictions) return predictions
def call(self, x, mask=None): '''mask是上一层的''' '''# using 'mask' you can access the mask passed from the previous layer''' # x [batch_size, seq_len, embedding_size] if self.supports_masking: # mask [batch_size, seq_len] if mask is None: # 先判断是否非零,然后执行OR运算,计算每个序列的有效长度 mask = K.any(K.not_equal(x, 0), -1) # [batch_size, seq_len] mask = K.cast(mask, K.floatx()) return K.sum(x, axis=1) / K.sum(mask, axis=1, keepdims=True) if mask is not None: mask = K.cast(mask, K.floatx()) # [batch_size, embedding_size, seq_len] mask = K.repeat(mask, x.shape[-1].value) # [batch_size, seq_len, embedding_size] mask = tf.transpose(mask, [0, 2, 1]) x = x * mask return K.sum(x, axis=1) / K.sum(mask, axis=1)
def test_sequence_example_into_input_layer(self): examples = [_make_sequence_example().SerializeToString()] * 100 ctx_cols, seq_cols = self._build_feature_columns() def _parse_example(example): ctx, seq = parsing_ops.parse_single_sequence_example( example, context_features=fc.make_parse_example_spec_v2(ctx_cols), sequence_features=fc.make_parse_example_spec_v2(seq_cols)) ctx.update(seq) return ctx ds = dataset_ops.Dataset.from_tensor_slices(examples) ds = ds.map(_parse_example) ds = ds.batch(20) # Test on a single batch features = dataset_ops.make_one_shot_iterator(ds).get_next() # Tile the context features across the sequence features sequence_input_layer = ksfc.SequenceFeatures(seq_cols) seq_input, _ = sequence_input_layer(features) dense_input_layer = dense_features.DenseFeatures(ctx_cols) ctx_input = dense_input_layer(features) ctx_input = backend.repeat(ctx_input, array_ops.shape(seq_input)[1]) concatenated_input = merge.concatenate([seq_input, ctx_input]) rnn_layer = recurrent.RNN(recurrent.SimpleRNNCell(10)) output = rnn_layer(concatenated_input) with self.cached_session() as sess: sess.run(variables.global_variables_initializer()) features_r = sess.run(features) self.assertAllEqual(features_r['int_list'].dense_shape, [20, 3, 6]) output_r = sess.run(output) self.assertAllEqual(output_r.shape, [20, 10])
def call(self, inputs): return K.repeat(inputs, self.n)
def call(self, inputs): return K.repeat(inputs, self.n)
def call(self, inputs, states, training=None): # dropout matrices for input units dp_mask = self._dropout_mask # dropout matrices for recurrent units rec_dp_mask = self._recurrent_dropout_mask h_tm1 = states[0] # previous memory state c_tm1 = states[1] # previous carry state # alignment model h_att = K.repeat(h_tm1, self.timestep_dim) att = _time_distributed_dense(inputs, self.attention_weights, self.attention_bias, input_dim=self.input_dim, output_dim=self.units, timesteps=self.timestep_dim) attention_ = self.attention_activation( K.dot(h_att, self.attention_recurrent_weights) + att) # energy attention_ = K.squeeze(K.dot(attention_, self.attention_recurrent_bias), 2) # energy alpha = K.exp(attention_) if dp_mask is not None: alpha *= dp_mask[0] alpha /= K.sum(alpha, axis=1, keepdims=True) alpha_r = K.repeat(alpha, self.input_dim) alpha_r = K.permute_dimensions(alpha_r, (0, 2, 1)) # make context vector (soft attention after Bahdanau et al.) z_hat = inputs * alpha_r context_sequence = z_hat z_hat = K.sum(z_hat, axis=1) if self.implementation == 1: if 0 < self.dropout < 1.: inputs_i = inputs * dp_mask[0] inputs_f = inputs * dp_mask[1] inputs_c = inputs * dp_mask[2] inputs_o = inputs * dp_mask[3] else: inputs_i = inputs inputs_f = inputs inputs_c = inputs inputs_o = inputs x_i = K.dot(inputs_i, self.kernel_i) x_f = K.dot(inputs_f, self.kernel_f) x_c = K.dot(inputs_c, self.kernel_c) x_o = K.dot(inputs_o, self.kernel_o) if self.use_bias: x_i = K.bias_add(x_i, self.bias_i) x_f = K.bias_add(x_f, self.bias_f) x_c = K.bias_add(x_c, self.bias_c) x_o = K.bias_add(x_o, self.bias_o) if 0 < self.recurrent_dropout < 1.: h_tm1_i = h_tm1 * rec_dp_mask[0] h_tm1_f = h_tm1 * rec_dp_mask[1] h_tm1_c = h_tm1 * rec_dp_mask[2] h_tm1_o = h_tm1 * rec_dp_mask[3] else: h_tm1_i = h_tm1 h_tm1_f = h_tm1 h_tm1_c = h_tm1 h_tm1_o = h_tm1 i = self.recurrent_activation( x_i + K.dot(h_tm1_i, self.recurrent_kernel_i) + K.dot(z_hat, self.attention_i)) f = self.recurrent_activation( x_f + K.dot(h_tm1_f, self.recurrent_kernel_f) + K.dot(z_hat, self.attention_f)) c = f * c_tm1 + i * self.activation( x_c + K.dot(h_tm1_c, self.recurrent_kernel_c) + K.dot(z_hat, self.attention_c)) o = self.recurrent_activation( x_o + K.dot(h_tm1_o, self.recurrent_kernel_o) + K.dot(z_hat, self.attention_o)) else: if 0. < self.dropout < 1.: inputs *= dp_mask[0] z = K.dot(inputs, self.kernel) if 0. < self.recurrent_dropout < 1.: h_tm1 *= rec_dp_mask[0] z += K.dot(h_tm1, self.recurrent_kernel) z += K.dot(z_hat, self.attention_kernel) if self.use_bias: z = K.bias_add(z, self.bias) z0 = z[:, :self.units] z1 = z[:, self.units:2 * self.units] z2 = z[:, 2 * self.units:3 * self.units] z3 = z[:, 3 * self.units:] i = self.recurrent_activation(z0) f = self.recurrent_activation(z1) c = f * c_tm1 + i * self.activation(z2) o = self.recurrent_activation(z3) h = o * self.activation(c) if 0 < self.dropout + self.recurrent_dropout: if training is None: h._uses_learning_phase = True return h, [h, c]