Beispiel #1
0
    def call(self, x, mask=None):
        x = K.permute_dimensions(x, (0, 2, 1))
        x = K.reshape(x, (-1, self.input_length))
        x = K.expand_dims(x, 1)
        x = K.expand_dims(x, -1)
        if self.real_filts is not None:
            conv_out_r = K.conv2d(x, self.W_r, strides=self.subsample,
                                  border_mode=self.border_mode,
                                  dim_ordering='th')
        else:
            conv_out_r = x

        if self.complex_filts is not None:
            conv_out_c1 = K.conv2d(x, self.W_c1, strides=self.subsample,
                                   border_mode=self.border_mode,
                                   dim_ordering='th')
            conv_out_c2 = K.conv2d(x, self.W_c2, strides=self.subsample,
                                   border_mode=self.border_mode,
                                   dim_ordering='th')
            conv_out_c = K.sqrt(K.square(conv_out_c1) + K.square(conv_out_c2) + K.epsilon())
            output = K.concatenate((conv_out_r, conv_out_c), axis=1)
        else:
            output = conv_out_r

        output_shape = self.get_output_shape_for((None, self.input_length, self.input_dim))
        output = K.squeeze(output, 3)  # remove the dummy 3rd dimension
        output = K.permute_dimensions(output, (2, 1, 0))
        output = K.reshape(output, (-1, output_shape[1], output.shape[1]*output.shape[2]))
        return output
Beispiel #2
0
    def build(self):
        self.encoder.build()
        self.decoder.build()
        self.variational.build()

        outputs = []
        self.regularizers = []
        input_list = self.get_input()
        if isinstance(input_list, OrderedDict):
            assert len(input_list) == 2
            for X in input_list.values():
                Y = self.encoder(X)
                reg = self.variational.get_variational_regularization(Y)
                self.regularizers.append(reg)
                Y = self.variational._get_output(Y, train=True)
                Y = self.decoder(Y)
                outputs.append(Y)
            pool0 = self.max_pool(K.expand_dims(outputs[0], 2))
            pool1 = self.max_pool(K.expand_dims(outputs[1], 2))
            slow = self.beta * ((pool0 - pool1) ** 2).mean()
            self.regularizers.append(LambdaRegularizer(slow))
        else:
            Y = self.encoder(input_list)
            reg = self.variational.get_variational_regularization(Y)
            self.regularizers.append(reg)
            Y = self.variational._get_output(Y, train=True)
            Y = self.decoder(Y)

        self.trainable_weights = (
            self.encoder.trainable_weights + self.variational.trainable_weights + self.decoder.trainable_weights
        )
    def call(self, x):
        assert(K.backend() == 'tensorflow')
        temp = K.permute_dimensions(x, (0, 2, 1))
        for i in range(0, self.attention_depth):
            temp = K.sigmoid(K.dot(temp, self.Ws[i]) + self.bs[i])
        temp = K.permute_dimensions(temp, (0, 2, 1))
        estimated_weight = K.squeeze(K.dot(temp, K.expand_dims(self.Wf, -1)), -1)
        biased_weight = estimated_weight + self.bias
        non_linear_weight = K.tanh(biased_weight)

        # For each hidded state calculate how much should it contribute
        # to the context vector. This is the main part of attention.
        # In order to convert weights to "probabilities" use a sigmoid
        # based function: exp(x) / sum(exp(xi)).
        prob = K.exp(non_linear_weight)
        # Compute the total sum for each batch.
        total_sum = K.sum(prob, axis=1, keepdims=True)
        prob /= K.cast(total_sum, K.floatx())

        # Enable this if you want access to internal probabilities.
        # Should only be used for testing that Attention works as expected.
        # return prob

        # Multiply each hidden value by the corresponding probability.
        prob = K.expand_dims(prob, -1)
        new_hidden_values = x * prob
        return K.sum(new_hidden_values, axis=1)
Beispiel #4
0
 def call(self, x):
     r = K.cast(K.arange(self.num), K.floatx()) / float(self.num - 1)
     r = self.start + (self.stop - self.start) * r
     r = K.expand_dims(K.expand_dims(r), axis=0)
     r = K.cast(r, dtype=K.floatx())
     r = K.tile(r, (K.shape(x)[0], 1, 1))
     return r
def duel_atari_cnn(input_shape, n_actions, mode='mean'):
    """
    Follows the network architecture described in the 2015 Deepmind Nature paper
    with the changes proposed in Dueling Network paper.

    input_shape: 3D Tensor (channels, height, width) format
    n_actions: int
    """

    agg = None
    if mode == 'mean':
        agg = Lambda(lambda a: K.expand_dims(a[:,0], dim=-1) + a[:,1:] - K.mean(a[:, 1:], keepdims=True), output_shape=(n_actions,))
    elif mode == 'max':
        agg = Lambda(lambda a: K.expand_dims(a[:,0], dim=-1) + a[:,1:] - K.max(a[:, 1:], keepdims=True), output_shape=(n_actions,))
    else:
        raise ValueError("mode must be either 'mean' or 'max'")

    input = Input(shape=input_shape)
    x = Convolution2D(32, 8, 8, subsample=(4,4), activation='relu')(input)
    x = Convolution2D(64, 4, 4, subsample=(2,2), activation='relu')(x)
    x = Convolution2D(64, 3, 3, subsample=(1,1), activation='relu')(x)
    x = Flatten()(x)

    x = Dense(512, activation='relu')(x)
    x = Dense(n_actions+1)(x)
    output = agg(x)

    return Model(input, output)
Beispiel #6
0
def accumulate(attend_function, inputs, input_length,
                                mask=None, return_probabilities=False):
    '''get the running attention over a sequence. 

    given a 3dim tensor where the 1st dim is time (or not. whatever.),  calculating the running attended sum.
    in other words, at the first time step, you only have that item.
                    at the second time step, attend over the first two items.
                    at the third..  the third. so on. 

    this basically a mod on keras' rnn implementation
    author: bcm
    '''

    ndim = inputs.ndim
    assert ndim >= 3, 'inputs should be at least 3d'

    axes = [1,0] + list(range(2, ndim))
    inputs = inputs.dimshuffle(axes)

    indices = list(range(input_length))

    successive_outputs = []
    if mask is not None:
        if mask.ndim == ndim-1:
            mask = K.expand_dims(mask)
        assert mask.ndim == ndim
        mask = mask.dimshuffle(axes)
        prev_output = None

    successive_outputs = []
    successive_pvecs = []
    uncover_mask = K.zeros_like(inputs)
    uncover_indices = K.arange(input_length)
    for _ in range(ndim-1):
        uncover_indices = K.expand_dims(uncover_indices)
    make_subset = lambda i,X: K.switch(uncover_indices <= i, X, uncover_mask)
    for i in indices:
        inputs_i = make_subset(i,inputs)
        mask_i = make_subset(i,mask)
        if mask is not None:
            output = attend_function(inputs_i, mask_i) # this should not output the time dimension; it should be marginalized over. 
        else:
            output = attend_function(inputs_i) # this should not output the time dimension; it should be marginalized over. 
        if return_probabilities:
            output, p_vectors = output
            successive_pvecs.append(p_vectors)
        assert output.ndim == 2, "Your attention function is malfunctioning; the attention accumulator should return 2 dimensional tensors"
        successive_outputs.append(output)
    outputs = K.pack(successive_outputs)
    K.squeeze(outputs, -1)
    axes = [1, 0] + list(range(2, outputs.ndim))
    outputs = outputs.dimshuffle(axes)

    if return_probabilities:
        out_pvecs = K.pack(successive_pvecs)
        K.squeeze(out_pvecs, -1)
        out_pvecs = out_pvecs.dimshuffle(axes)
        outputs = [outputs, out_pvecs]

    return outputs
Beispiel #7
0
 def __init__(self, *kargs, **kwargs):
     super(TDD, self).__init__(*kargs, **kwargs)
     mult = np.zeros(self.input_length)
     mult[0] = 1.0
     t = K.variable(value=mult)
     t = K.expand_dims(t, 0)
     t = K.expand_dims(t, 2)
     self.mult = t
 def _get_anchor_positive_triplet_mask(self, y_true: Tensor, pairwise_dist: Tensor) -> Tensor:
     # mask label(a) != label(p)
     mask1 = K.equal(K.expand_dims(y_true, 0), K.expand_dims(y_true, 1))
     mask1 = K.cast(mask1, K.dtype(pairwise_dist))
     # mask a == p
     mask2 = K.not_equal(pairwise_dist, 0.0)
     mask2 = K.cast(mask2, K.dtype(pairwise_dist))
     return mask1 * mask2
 def call(self, x, mask=None):
     # x[0]: (batch_size, input_length, input_dim)
     # x[1]: (batch_size, 1) indices of prepositions
     # Optional: x[2]: (batch_size, input_length - 2)
     assert isinstance(x, list) or isinstance(x, tuple)
     encoded_sentence = x[0]
     prep_indices = K.squeeze(x[1], axis=-1)  #(batch_size,)
     batch_indices = K.arange(K.shape(encoded_sentence)[0])  # (batch_size,)
     if self.with_attachment_probs:
         # We're essentially doing K.argmax(x[2]) here, but argmax is not differentiable!
         head_probs = x[2]
         head_probs_padding = K.zeros_like(x[2])[:, :2]  # (batch_size, 2)
         # (batch_size, input_length)
         padded_head_probs = K.concatenate([head_probs, head_probs_padding])
         # (batch_size, 1)
         max_head_probs = K.expand_dims(K.max(padded_head_probs, axis=1))
         # (batch_size, input_length, 1)
         max_head_prob_indices = K.expand_dims(K.equal(padded_head_probs, max_head_probs))
         # (batch_size, input_length, input_dim)
         masked_head_encoding = K.switch(max_head_prob_indices, encoded_sentence, K.zeros_like(encoded_sentence))
         # (batch_size, input_dim)
         head_encoding = K.sum(masked_head_encoding, axis=1)
     else:
         head_indices = prep_indices - 1  # (batch_size,)
         head_encoding = encoded_sentence[batch_indices, head_indices, :]  # (batch_size, input_dim)
     prep_encoding = encoded_sentence[batch_indices, prep_indices, :]  # (batch_size, input_dim)
     child_encoding = encoded_sentence[batch_indices, prep_indices+1, :]  # (batch_size, input_dim)
     '''
     prep_indices = x[1]
     sentence_mask = mask[0]
     if sentence_mask is not None:
         if K.ndim(sentence_mask) > 2:
             # This means this layer came after a Bidirectional layer. Keras has this bug which
             # concatenates input masks instead of output masks.
             # TODO: Fix Bidirectional instead.
             sentence_mask = K.any(sentence_mask, axis=(-2, -1))
     head_encoding, prep_encoding, child_encoding = self.get_split_averages(encoded_sentence, sentence_mask,
                                                                            prep_indices)
     '''
     head_projection = K.dot(head_encoding, self.proj_head)  # (batch_size, proj_dim)
     prep_projection = K.dot(prep_encoding, self.proj_prep)  # (batch_size, proj_dim)
     child_projection = K.dot(child_encoding, self.proj_child)  # (batch_size, proj_dim)
     #(batch_size, proj_dim)
     if self.composition_type == 'HPCT':
         composed_projection = K.tanh(head_projection + prep_projection + child_projection)
     elif self.composition_type == 'HPC':
         prep_child_projection = K.tanh(prep_projection + child_projection)  # (batch_size, proj_dim)
         composed_projection = K.tanh(head_projection + prep_child_projection)
     else:
         # Composition type in HC
         composed_projection = K.tanh(head_projection + child_projection)
     for hidden_layer in self.hidden_layers:
         composed_projection = K.tanh(K.dot(composed_projection, hidden_layer))  # (batch_size, proj_dim)
     # (batch_size, num_classes)
     class_scores = K.dot(composed_projection, self.scorer)
     label_probabilities = K.softmax(class_scores)
     return label_probabilities
Beispiel #10
0
	def _build(self, model):
		""" Instantiates the layer with the given backend.
		"""
		backend = model.get_backend()
		if backend.get_name() == 'keras':

			import keras.layers as L			# pylint: disable=import-error
			import keras.backend as K			# pylint: disable=import-error

			target_dim = self.dimension
			if target_dim >= 0:
				target_dim += 1

			def expand_shape(input_shape):
				""" Computes the expanded shape.
				"""
				dim = target_dim
				if dim < 0:
					dim += len(input_shape) + 1
				return input_shape[:dim] + (1,) + input_shape[dim:]

			if backend.keras_version() == 1:
				func = lambda x: K.expand_dims(x, dim=target_dim)
			else:
				func = lambda x: K.expand_dims(x, axis=target_dim)

			yield L.Lambda(
				func,
				expand_shape,
				name=self.name
			)

		elif backend.get_name() == 'pytorch':

			import torch						# pylint: disable=import-error

			def connect(inputs):
				""" Connects the layer.
				"""
				assert len(inputs) == 1
				dim = self.dimension
				if dim < 0:
					dim += len(inputs[0]['shape']) + 1
				dim += 1
				return {
					'shape' : self.shape([inputs[0]['shape']]),
					'layer' : model.data.add_operation(
						lambda x: torch.unsqueeze(x, dim)
					)(inputs[0]['layer'])
				}

			yield connect

		else:
			raise ValueError('Unknown or unsupported backend: {}'.format(backend))
 def _batch_all_triplet_loss(self, y_true: Tensor, pairwise_dist: Tensor) -> Tensor:
     anchor_positive_dist = K.expand_dims(pairwise_dist, 2)
     anchor_negative_dist = K.expand_dims(pairwise_dist, 1)
     triplet_loss = anchor_positive_dist - anchor_negative_dist + self.margin
     mask = self._get_triplet_mask(y_true, pairwise_dist)
     triplet_loss = mask * triplet_loss
     triplet_loss = K.clip(triplet_loss, 0.0, None)
     valid_triplets = K.cast(K.greater(triplet_loss, 1e-16), K.dtype(triplet_loss))
     num_positive_triplets = K.sum(valid_triplets)
     triplet_loss = K.sum(triplet_loss) / (num_positive_triplets + 1e-16)
     return triplet_loss
Beispiel #12
0
 def lookup(self, x, W, memory_length):
     # shape: (batch*memory_length, input_length)
     x = K.cast(K.reshape(x, (-1, self.input_length)), 'int32')
     mask = K.expand_dims(K.not_equal(x, 0.), dim=-1)
     # shape: (batch*memory_length, input_length, output_dim)
     X = K.gather(W, x)
     if self.bow_mode == "bow":
         # shape: (batch*memory_length, output_dim)
         X = K.sum(X + K.expand_dims(self.Te, 0), axis=1)
     # shape: (batch, memory_length, output_dim)
     X = K.reshape(X, (-1, memory_length, self.output_dim))
     return X, mask
Beispiel #13
0
    def __init__(self, model, policy=None, test_policy=None, enable_double_dqn=True, enable_dueling_network=False,
                 dueling_type='avg', *args, **kwargs):
        super(DQNAgent, self).__init__(*args, **kwargs)

        # Validate (important) input.
        if hasattr(model.output, '__len__') and len(model.output) > 1:
            raise ValueError('Model "{}" has more than one output. DQN expects a model that has a single output.'.format(model))
        if model.output._keras_shape != (None, self.nb_actions):
            raise ValueError('Model output "{}" has invalid shape. DQN expects a model that has one dimension for each action, in this case {}.'.format(model.output, self.nb_actions))

        # Parameters.
        self.enable_double_dqn = enable_double_dqn
        self.enable_dueling_network = enable_dueling_network
        self.dueling_type = dueling_type
        if self.enable_dueling_network:
            # get the second last layer of the model, abandon the last layer
            layer = model.layers[-2]
            nb_action = model.output._keras_shape[-1]
            # layer y has a shape (nb_action+1,)
            # y[:,0] represents V(s;theta)
            # y[:,1:] represents A(s,a;theta)
            y = Dense(nb_action + 1, activation='linear')(layer.output)
            # caculate the Q(s,a;theta)
            # dueling_type == 'avg'
            # Q(s,a;theta) = V(s;theta) + (A(s,a;theta)-Avg_a(A(s,a;theta)))
            # dueling_type == 'max'
            # Q(s,a;theta) = V(s;theta) + (A(s,a;theta)-max_a(A(s,a;theta)))
            # dueling_type == 'naive'
            # Q(s,a;theta) = V(s;theta) + A(s,a;theta)
            if self.dueling_type == 'avg':
                outputlayer = Lambda(lambda a: K.expand_dims(a[:, 0], -1) + a[:, 1:] - K.mean(a[:, 1:], keepdims=True), output_shape=(nb_action,))(y)
            elif self.dueling_type == 'max':
                outputlayer = Lambda(lambda a: K.expand_dims(a[:, 0], -1) + a[:, 1:] - K.max(a[:, 1:], keepdims=True), output_shape=(nb_action,))(y)
            elif self.dueling_type == 'naive':
                outputlayer = Lambda(lambda a: K.expand_dims(a[:, 0], -1) + a[:, 1:], output_shape=(nb_action,))(y)
            else:
                assert False, "dueling_type must be one of {'avg','max','naive'}"

            model = Model(inputs=model.input, outputs=outputlayer)

        # Related objects.
        self.model = model
        if policy is None:
            policy = EpsGreedyQPolicy()
        if test_policy is None:
            test_policy = GreedyQPolicy()
        self.policy = policy
        self.test_policy = test_policy

        # State.
        self.reset_states()
def iou(x_true, y_true, w_true, h_true, x_pred, y_pred, w_pred, h_pred, t, pred_confid_tf):
    x_true = K.expand_dims(x_true, 2)
    y_true = K.expand_dims(y_true, 2)
    w_true = K.expand_dims(w_true, 2)
    h_true = K.expand_dims(h_true, 2)
    x_pred = K.expand_dims(x_pred, 2)
    y_pred = K.expand_dims(y_pred, 2)
    w_pred = K.expand_dims(w_pred, 2)
    h_pred = K.expand_dims(h_pred, 2)

    xoffset = K.expand_dims(tf.convert_to_tensor(np.asarray([0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7], dtype=np.float32)),1)
    yoffset = K.expand_dims(tf.convert_to_tensor(np.asarray([0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4], dtype=np.float32)),1)


    # xoffset = K.cast_to_floatx((np.tile(np.arange(side),side)))
    # yoffset = K.cast_to_floatx((np.repeat(np.arange(side),side)))
    x = tf.where(t, x_pred, K.zeros_like(x_pred))
    y = tf.where(t, y_pred, K.zeros_like(y_pred))
    w = tf.where(t, w_pred, K.zeros_like(w_pred))
    h = tf.where(t, h_pred, K.zeros_like(h_pred))

    ow = overlap(x + xoffset, w * 256. , x_true + xoffset, w_true * 256.)
    oh = overlap(y + yoffset, h * 160., y_true + yoffset, h_true * 256.)

    ow = tf.where(K.greater(ow, 0), ow, K.zeros_like(ow))
    oh = tf.where(K.greater(oh, 0), oh, K.zeros_like(oh))
    intersection = ow * oh
    union = w * 256. * h * 160. + w_true * 256. * h_true * 160.  - intersection + K.epsilon()  # prevent div 0

    #
    # find best iou among bboxs
    # iouall shape=(-1, bnum*gridcells)
    iouall = intersection / union
    obj_count = K.sum(tf.where(t, K.ones_like(x_true), K.zeros_like(x_true)))

    ave_iou = K.sum(iouall) / (obj_count + 0.0000001)
    recall_t = K.greater(iouall, 0.5)
    # recall_count = K.sum(tf.select(recall_t, K.ones_like(iouall), K.zeros_like(iouall)))

    fid_t = K.greater(pred_confid_tf, 0.3)
    recall_count_all = K.sum(tf.where(fid_t, K.ones_like(iouall), K.zeros_like(iouall)))

    #  
    obj_fid_t = tf.logical_and(fid_t, t)
    obj_fid_t = tf.logical_and(fid_t, recall_t)
    effevtive_iou_count = K.sum(tf.where(obj_fid_t, K.ones_like(iouall), K.zeros_like(iouall)))

    recall = effevtive_iou_count / (obj_count + 0.00000001)
    precision = effevtive_iou_count / (recall_count_all + 0.0000001)
    return ave_iou, recall, precision, obj_count, intersection, union, ow, oh, x, y, w, h
 def call(self, x, mask=None):
     if (self.size == None) or (self.mode == 'sum'):
         self.size = int(x.shape[-1])
     batch_size, seq_len = K.shape(x)[0], K.shape(x)[1]
     position_j = 1. / K.pow(10000., 2 * K.arange(self.size / 2, dtype='float32') / self.size)
     position_j = K.expand_dims(position_j, 0)
     position_i = K.cumsum(K.ones_like(x[:, :, 0]), 1) - 1  # K.arange不支持变长,只好用这种方法生成
     position_i = K.expand_dims(position_i, 2)
     position_ij = K.dot(position_i, position_j)
     position_ij = K.concatenate([K.cos(position_ij), K.sin(position_ij)], 2)
     if self.mode == 'sum':
         return position_ij + x
     elif self.mode == 'concat':
         return K.concatenate([position_ij, x], 2)
def sparse_amsoftmax_loss(y_true, y_pred, scale=30, margin=0.35):
    y_true = K.expand_dims(y_true[:, 0], 1) # 保证y_true的shape=(None, 1)
    y_true = K.cast(y_true, 'int32') # 保证y_true的dtype=int32
    batch_idxs = K.arange(0, K.shape(y_true)[0])
    batch_idxs = K.expand_dims(batch_idxs, 1)
    idxs = K.concatenate([batch_idxs, y_true], 1)
    y_true_pred = K.tf.gather_nd(y_pred, idxs) # 目标特征,用tf.gather_nd提取出来
    y_true_pred = K.expand_dims(y_true_pred, 1)
    y_true_pred_margin = y_true_pred - margin # 减去margin
    _Z = K.concatenate([y_pred, y_true_pred_margin], 1) # 为计算配分函数
    _Z = _Z * scale # 缩放结果,主要因为pred是cos值,范围[-1, 1]
    logZ = K.logsumexp(_Z, 1, keepdims=True) # 用logsumexp,保证梯度不消失
    logZ = logZ + K.log(1 - K.exp(scale * y_true_pred - logZ)) # 从Z中减去exp(scale * y_true_pred)
    return - y_true_pred_margin * scale + logZ
Beispiel #17
0
    def call(self, inputs, training=None):
        # inputs.shape=[None, input_num_capsule, input_dim_vector]
        # Expand dims to [None, input_num_capsule, 1, 1, input_dim_vector]
        inputs_expand = K.expand_dims(K.expand_dims(inputs, 2), 2)

        # Replicate num_capsule dimension to prepare being multiplied by W
        # Now it has shape = [None, input_num_capsule, num_capsule, 1, input_dim_vector]
        inputs_tiled = K.tile(inputs_expand, [1, 1, self.num_capsule, 1, 1])

        """  
        # Compute `inputs * W` by expanding the first dim of W. More time-consuming and need batch_size.
        # Now W has shape  = [batch_size, input_num_capsule, num_capsule, input_dim_vector, dim_vector]
        w_tiled = K.tile(K.expand_dims(self.W, 0), [self.batch_size, 1, 1, 1, 1])
        
        # Transformed vectors, inputs_hat.shape = [None, input_num_capsule, num_capsule, 1, dim_vector]
        inputs_hat = K.batch_dot(inputs_tiled, w_tiled, [4, 3])
        """
        # Compute `inputs * W` by scanning inputs_tiled on dimension 0. This is faster but requires Tensorflow.
        # inputs_hat.shape = [None, input_num_capsule, num_capsule, 1, dim_vector]
        inputs_hat = tf.scan(lambda ac, x: K.batch_dot(x, self.W, [3, 2]),
                             elems=inputs_tiled,
                             initializer=K.zeros([self.input_num_capsule, self.num_capsule, 1, self.dim_vector]))
        """
        # Routing algorithm V1. Use tf.while_loop in a dynamic way.
        def body(i, b, outputs):
            c = tf.nn.softmax(self.bias, dim=2)  # dim=2 is the num_capsule dimension
            outputs = squash(K.sum(c * inputs_hat, 1, keepdims=True))
            b = b + K.sum(inputs_hat * outputs, -1, keepdims=True)
            return [i-1, b, outputs]

        cond = lambda i, b, inputs_hat: i > 0
        loop_vars = [K.constant(self.num_routing), self.bias, K.sum(inputs_hat, 1, keepdims=True)]
        _, _, outputs = tf.while_loop(cond, body, loop_vars)
        """
        # Routing algorithm V2. Use iteration. V2 and V1 both work without much difference on performance
        assert self.num_routing > 0, 'The num_routing should be > 0.'

        for i in range(self.num_routing):
            c = tf.nn.softmax(self.bias, dim=2)  # dim=2 is the num_capsule dimension
            # outputs.shape=[None, 1, num_capsule, 1, dim_vector]
            outputs = squash(K.sum(c * inputs_hat, 1, keepdims=True))

            # last iteration needs not compute bias which will not be passed to the graph any more anyway.
            if i != self.num_routing - 1:
                # self.bias = K.update_add(self.bias, K.sum(inputs_hat * outputs, [0, -1], keepdims=True))
                self.bias += K.sum(inputs_hat * outputs, -1, keepdims=True)
            # tf.summary.histogram('BigBee', self.bias)  # for debugging
        return K.reshape(outputs, [-1, self.num_capsule, self.dim_vector])
    def call(self, x, mask=None):
        eij = dot_product(x, self.W)

        if self.bias:
            eij += self.b

        eij = K.tanh(eij)

        a = K.exp(eij)

        # apply mask after the exp. will be re-normalized next
        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            a *= K.cast(mask, K.floatx())

        # in some cases especially in the early stages of training the sum may be almost zero
        # and this results in NaN's. A workaround is to add a very small positive number ε to the sum.
        # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx())
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        weighted_input = x * K.expand_dims(a)

        result = K.sum(weighted_input, axis=1)

        if self.return_attention:
            return [result, a]
        return result
Beispiel #19
0
    def get_output(self, train = False, get_tuple = False):
        # input shape: (nb_samples, time (padded with zeros), input_dim)
        X = self.get_input(train)
        assert K.ndim(X) == 3
        
        mask = self.get_output_mask(train)
        if mask:
            # apply mask
            X *= K.cast(K.expand_dims(mask), X.dtype)
            masking = True
        else:
            masking = False

        if self.stateful:
            initial_states = self.states
        else:
            initial_states = self.get_initial_states(X)

        last_output, outputs, other_outputs, states = LX.rnn(self.attention_step, X, initial_states, self.contexts,
                                              truncate_gradient=self.truncate_gradient,
                                              go_backwards=self.go_backwards,
                                              masking=masking)
        self.other_outputs = other_outputs
        
        if self.stateful:
            self.updates = []
            for i in range(len(states)):
                self.updates.append((self.states[i], states[i]))

        if self.return_sequences:
            return outputs
        else:
            return last_output
Beispiel #20
0
 def compute_mask(self, x, mask=None):
     if self.return_probabilities:
         mask2 = mask
         if mask is not None:
             mask2 = K.expand_dims(K.all(mask2, axis=-1))
         return [mask, mask2]
     return mask
    def call(self, X, mask=None):
        # 1D -> 2D
        batch = K.shape(X)[0]
        width = deconv_output_length(K.shape(X)[1],
                                    self.filter_length,
                                    self.padding,
                                    self.strides[2])

        print("Output width: ", width)

        print("Input shape: ", K.shape(X))
        X = K.expand_dims(X,2)
        print("Input shape after expand: ", K.shape(X))
        # X = K.permute_dimensions(X, (0, 2, 3, 1))
        X = K.permute_dimensions(X, (0, 2, 1, 3))
        print("Input shape after permute: ", K.shape(X))
        deconv_shape = tf.pack([batch, 1, width, self.nb_filter])
        print("Deconv shape: ", deconv_shape)
        conv_out = tf.nn.conv2d_transpose(X, self.W, strides=self.strides,
                                          padding=self.padding.upper(),
                                          output_shape=deconv_shape)

        output = conv_out + K.reshape(self.b, (1, 1, 1, self.W_shape[2]))
        print("Output shape: ", K.shape(output))
        # output =  K.permute_dimensions(output, (0, 3, 1, 2))
        output =  K.permute_dimensions(output, (0, 2, 1, 3))
        print("Output shape after permute: ", K.shape(output))
        # 2D -> 1D
        output = K.squeeze(output,2)
        print("Output shape after squeeze: ", K.shape(output))
        return output
Beispiel #22
0
    def simple_context(X, mask, n=activation_rnn_size):
        """Reduce the input just to its headline part (second half).

        For each word in this part it concatenate the output of the previous layer (RNN)
        with a weighted average of the outputs of the description part.
        In this only the last `rnn_size - activation_rnn_size` are used from each output.
        The first `activation_rnn_size` output is used to computer the weights for the averaging.
        """
        desc, head = X[:, :maxlend, :], X[:, maxlend:, :]
        head_activations, head_words = head[:, :, :n], head[:, :, n:]
        desc_activations, desc_words = desc[:, :, :n], desc[:, :, n:]

        # RTFM http://deeplearning.net/software/theano/library/tensor/basic.html#theano.tensor.batched_tensordot
        # activation for every head word and every desc word
        activation_energies = K.batch_dot(head_activations, desc_activations, axes=(2, 2))
        # make sure we dont use description words that are masked out
        activation_energies = activation_energies + -1e20 * K.expand_dims(
            1. - K.cast(mask[:, :maxlend], 'float32'), 1)

        # for every head word compute weights for every desc word
        activation_energies = K.reshape(activation_energies, (-1, maxlend))
        activation_weights = K.softmax(activation_energies)
        activation_weights = K.reshape(activation_weights, (-1, maxlenh, maxlend))

        # for every head word compute weighted average of desc words
        desc_avg_word = K.batch_dot(activation_weights, desc_words, axes=(2, 1))
        return K.concatenate((desc_avg_word, head_words))
Beispiel #23
0
    def conv_step(self, x, W, b=None, border_mode="valid", filter_shape=None, mask_type='b'):

        mask = np.ones(filter_shape, dtype=_FLOATX)

        in_third = self.input_dim//3
        out_third = self.nb_filter//3
        mask[:out_third,in_third:,0,0] = 0
        mask[out_third:2*out_third,2*in_third:,0,0] = 0

        W = W * mask

        input_shape = self.shuffeled_dims

        x = K.expand_dims(x, -1)  # add a dimension of the right

        conv_out = T.nnet.conv2d(x, W, subsample=self.subsample,
                                 border_mode='half',
                                 filter_flip=False,
                                 input_shape=(input_shape[0],
                                              input_shape[2],
                                              input_shape[3],
                                              1),
                                 filter_shape=filter_shape)
        if b:
            conv_out = conv_out + K.reshape(b, (1, filter_shape[0], 1, 1))

        conv_out = K.squeeze(conv_out, 3)  # remove the dummy 3rd dimension

        return conv_out
Beispiel #24
0
 def create_neural_network(self):
     model = Sequential()
     model.add(Dense(100, input_dim=self.nstates, activation='relu'))
     model.add(Dense(100, activation='relu'))
     model.add(Dense(self.nactions,activation='linear'))
     
     #get second last layer of the model, abondon the last layer
     layer = model.layers[-2]
     nb_action = model.output._keras_shape[-1]
    
     #layer y has a shape(nb_action+1)
     #y[:,0] represents V(s;theta)
     #y[:,1] represents A(a;theta)
     y = Dense(nb_action+1, activation='linear')(layer.output)
    
     #calculate the Q(s,a,;theta)
     #dueling type average -> Q(s,a;theta) = V(s;theta) + (A(s,a;theta)-Average_a(A(s,a;theta)))
     #outputlayer = Lambda(lambda a:K.expand_dims(a[:,0], -1) + a[:,1:] - K.mean(a[:,1:], keepdims=True), output_shape=(nb_action,))(y)
     #dueling type max     -> Q(s,a;theta) = V(s;theta) + (A(s,a;theta)-Max_a(A(s,a;theta)))
     outputlayer = Lambda(lambda a:K.expand_dims(a[:,0], -1) + a[:,1:] - K.max(a[:,1:,], keepdims=True), output_shape=(nb_action,))(y)
     #dueling type naive   -> Q(s,a;theta) = V(s;theta) + A(s,a;theta)
     #outputlayer = Lambda(lambda a: K.expand_dims(a[:,0], -1) + a[:,1:], output_shape=(nb_action,))(y)
    
     #connect
     model = Model(input=model.input, output=outputlayer)
    
     model.compile(loss='mse', optimizer=Adam(lr=self.alpha))
     model_json = model.to_json()
     with open('cartpole.json','w') as json_file:
         json_file.write(model_json)
     return model
Beispiel #25
0
def recurrence(y_i, h):
    h_permute = K.permute_dimensions(h, [0, 2, 1])  # (batch_size, encoding_dim, input_length)
    e = K.l2_normalize(
        K.batch_dot(h_permute, s, axes=1),  # (batch_size, input_length)
        axis=1)  # (batch_size, input_length)

    # eqn 6
    alpha = K.softmax(e)  # (batch_size, input_length)

    # eqn 5
    c = K.batch_dot(h, alpha, axes=1)  # (batch_size, encoding_dim)

    recurrence_result = K.expand_dims(
        K.concatenate([c, y_i], axis=1),
        dim=1)  # (batch_size, 1, 2 * encoding_dim)

    expanded_h = Input(shape=(1, 2 * encoding_dim),
                       name='expanded_h')
    gru = Sequential([
        GRU(output_dim,
            return_sequences=False,
            input_shape=(1, 2 * encoding_dim))
    ])
    model = Model(input=[expanded_h],
                  output=[gru(expanded_h)])  # (batch_size, 1, output_dim)
    return model(recurrence_result)
Beispiel #26
0
 def get_initial_states(self, x):
     initial_state = K.zeros_like(x)  # (samples, num_steps, input_channel, h, w)
     initial_state = K.sum(initial_state, [1, 2])  # (samples, h, w)
     initial_state = K.expand_dims(initial_state, 1)
     initial_state = K.repeat_elements(initial_state, self.nb_filter, 1)
     initial_states = [initial_state for _ in range(len(self.states))]
     return initial_states
Beispiel #27
0
    def compute_mask(self, inputs, mask=None):

        if mask is None or not any([m is not None for m in mask]):
            return None

        assert hasattr(mask, '__len__') and len(mask) == len(inputs)

        if self.mode in ['sum', 'mul', 'ave']:
            bool_type = 'bool' if K._BACKEND == 'tensorflow' else 'int32'
            masks = [K.cast(m, bool_type) for m in mask if m is not None]
            mask = masks[0]
            for m in masks[1:]:
                mask = mask & m
            return mask
        elif self.mode in ['concat']:
            masks = [K.ones_like(inputs[i][:-1]) if m is None else m for i, m in zip(inputs, mask)]
            expanded_dims = [K.expand_dims(m) for m in masks]
            concatenated = K.concatenate(expanded_dims, axis=self.concat_axis)
            return K.all(concatenated, axis=-1, keepdims=False)
        elif self.mode in ['cos', 'dot']:
            return None
        elif hasattr(self.mode, '__call__'):
            if hasattr(self._output_mask, '__call__'):
                return self._output_mask(mask)
            else:
                return self._output_mask
        else:
            # this should have been caught earlier
            raise Exception('Invalid merge mode: {}'.format(self.mode))
Beispiel #28
0
def criterion_GAN(output, target, use_lsgan=True):
    if use_lsgan:
        diff = output - target
        dims = list(range(1, K.ndim(diff)))
        return K.expand_dims((K.mean(diff ** 2, dims)), 0)
    else:
        return K.mean(K.log(output + 1e-12) * target + K.log(1 - output + 1e-12) * (1 - target))
    def call(self, x, mask=None):
        # eij = K.dot(x, self.W) TF backend doesn't support it

        # features_dim = self.W.shape[0]
        # step_dim = x._keras_shape[1]

        features_dim = self.features_dim
        step_dim = self.step_dim

        eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))), (-1, step_dim))

        if self.bias:
            eij += self.b

        eij = K.tanh(eij)

        a = K.exp(eij)

        # apply mask after the exp. will be re-normalized next
        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            a *= K.cast(mask, K.floatx())

        # in some cases especially in the early stages of training the sum may be almost zero
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
        # print weigthted_input.shape
        return K.sum(weighted_input, axis=1)
def make_patches_grid(x, patch_size, patch_stride):
    '''Break image `x` up into a grid of patches.

    input shape: (channels, rows, cols)
    output shape: (rows, cols, channels, patch_rows, patch_cols)
    '''
    from theano.tensor.nnet.neighbours import images2neibs  # TODO: all K, no T
    x = K.expand_dims(x, 0)
    xs = K.shape(x)
    num_rows = 1 + (xs[-2] - patch_size) // patch_stride
    num_cols = 1 + (xs[-1] - patch_size) // patch_stride
    num_channels = xs[-3]
    patches = images2neibs(
        x, (patch_size, patch_size), (patch_stride, patch_stride),
        mode='valid')
    # neibs are sorted per-channel
    patches = K.reshape(patches,
                        (num_channels, K.shape(patches)[0] // num_channels,
                         patch_size, patch_size))
    patches = K.permute_dimensions(patches, (1, 0, 2, 3))
    # arrange in a 2d-grid (rows, cols, channels, px, py)
    patches = K.reshape(
        patches, (num_rows, num_cols, num_channels, patch_size, patch_size))
    patches_norm = K.sqrt(
        K.sum(K.square(patches), axis=(2, 3, 4), keepdims=True))
    return patches, patches_norm
Beispiel #31
0
 def find_path(argmin_table, best_idx):
     next_best_idx = gather_each_row(argmin_table, best_idx[0][:, 0])
     next_best_idx = K.expand_dims(next_best_idx)
     if K.backend() == 'theano':
         next_best_idx = K.T.unbroadcast(next_best_idx, 1)
     return next_best_idx, [next_best_idx]
 def call(self, inputs, **kwargs):
     if inputs.get_shape().ndims == 5:
         assert inputs.get_shape(
         )[-2].value == 1, 'Error: Must have num_capsules = 1 going into Length'
         inputs = K.squeeze(inputs, axis=-2)
     return K.expand_dims(tf.norm(inputs, axis=-1), axis=-1)
    def call(self, x, mask=None):
        # TODO: validate input shape

        assert (len(x) == 3)
        L_flat = x[0]
        mu = x[1]
        a = x[2]

        if self.mode == 'full':
            # Create L and L^T matrix, which we use to construct the positive-definite matrix P.
            L = None
            LT = None
            if K.backend() == 'theano':
                import theano.tensor as T
                import theano

                def fn(x, L_acc, LT_acc):
                    x_ = K.zeros((self.nb_actions, self.nb_actions))
                    x_ = T.set_subtensor(x_[np.tril_indices(self.nb_actions)], x)
                    diag = K.exp(T.diag(x_)) + K.epsilon()
                    x_ = T.set_subtensor(x_[np.diag_indices(self.nb_actions)], diag)
                    return x_, x_.T

                outputs_info = [
                    K.zeros((self.nb_actions, self.nb_actions)),
                    K.zeros((self.nb_actions, self.nb_actions)),
                ]
                results, _ = theano.scan(fn=fn, sequences=L_flat, outputs_info=outputs_info)
                L, LT = results
            elif K.backend() == 'tensorflow':
                import tensorflow as tf

                # Number of elements in a triangular matrix.
                nb_elems = (self.nb_actions * self.nb_actions + self.nb_actions) // 2

                # Create mask for the diagonal elements in L_flat. This is used to exponentiate
                # only the diagonal elements, which is done before gathering.
                diag_indeces = [0]
                for row in range(1, self.nb_actions):
                    diag_indeces.append(diag_indeces[-1] + (row + 1))
                diag_mask = np.zeros(1 + nb_elems)  # +1 for the leading zero
                diag_mask[np.array(diag_indeces) + 1] = 1
                diag_mask = K.variable(diag_mask)

                # Add leading zero element to each element in the L_flat. We use this zero
                # element when gathering L_flat into a lower triangular matrix L.
                nb_rows = tf.shape(L_flat)[0]
                zeros = tf.expand_dims(tf.tile(K.zeros((1,)), [nb_rows]), 1)
                try:
                    # Old TF behavior.
                    L_flat = tf.concat(1, [zeros, L_flat])
                except TypeError:
                    # New TF behavior
                    L_flat = tf.concat([zeros, L_flat], 1)

                # Create mask that can be used to gather elements from L_flat and put them
                # into a lower triangular matrix.
                tril_mask = np.zeros((self.nb_actions, self.nb_actions), dtype='int32')
                tril_mask[np.tril_indices(self.nb_actions)] = range(1, nb_elems + 1)

                # Finally, process each element of the batch.
                init = [
                    K.zeros((self.nb_actions, self.nb_actions)),
                    K.zeros((self.nb_actions, self.nb_actions)),
                ]

                def fn(a, x):
                    # Exponentiate everything. This is much easier than only exponentiating
                    # the diagonal elements, and, usually, the action space is relatively low.
                    x_ = K.exp(x) + K.epsilon()
                    # Only keep the diagonal elements.
                    x_ *= diag_mask
                    # Add the original, non-diagonal elements.
                    x_ += x * (1. - diag_mask)
                    # Finally, gather everything into a lower triangular matrix.
                    L_ = tf.gather(x_, tril_mask)
                    return [L_, tf.transpose(L_)]

                tmp = tf.scan(fn, L_flat, initializer=init)
                if isinstance(tmp, (list, tuple)):
                    # TensorFlow 0.10 now returns a tuple of tensors.
                    L, LT = tmp
                else:
                    # Old TensorFlow < 0.10 returns a shared tensor.
                    L = tmp[:, 0, :, :]
                    LT = tmp[:, 1, :, :]
            else:
                raise RuntimeError('Unknown Keras backend "{}".'.format(K.backend()))
            assert L is not None
            assert LT is not None
            P = K.batch_dot(L, LT)
        elif self.mode == 'diag':
            if K.backend() == 'theano':
                import theano.tensor as T
                import theano

                def fn(x, P_acc):
                    x_ = K.zeros((self.nb_actions, self.nb_actions))
                    x_ = T.set_subtensor(x_[np.diag_indices(self.nb_actions)], x)
                    return x_

                outputs_info = [
                    K.zeros((self.nb_actions, self.nb_actions)),
                ]
                P, _ = theano.scan(fn=fn, sequences=L_flat, outputs_info=outputs_info)
            elif K.backend() == 'tensorflow':
                import tensorflow as tf

                # Create mask that can be used to gather elements from L_flat and put them
                # into a diagonal matrix.
                diag_mask = np.zeros((self.nb_actions, self.nb_actions), dtype='int32')
                diag_mask[np.diag_indices(self.nb_actions)] = range(1, self.nb_actions + 1)

                # Add leading zero element to each element in the L_flat. We use this zero
                # element when gathering L_flat into a lower triangular matrix L.
                nb_rows = tf.shape(L_flat)[0]
                zeros = tf.expand_dims(tf.tile(K.zeros((1,)), [nb_rows]), 1)
                try:
                    # Old TF behavior.
                    L_flat = tf.concat(1, [zeros, L_flat])
                except TypeError:
                    # New TF behavior
                    L_flat = tf.concat([zeros, L_flat], 1)

                # Finally, process each element of the batch.
                def fn(a, x):
                    x_ = tf.gather(x, diag_mask)
                    return x_

                P = tf.scan(fn, L_flat, initializer=K.zeros((self.nb_actions, self.nb_actions)))
            else:
                raise RuntimeError('Unknown Keras backend "{}".'.format(K.backend()))
        assert P is not None
        assert K.ndim(P) == 3

        # Combine a, mu and P into a scalar (over the batches). What we compute here is
        # -.5 * (a - mu)^T * P * (a - mu), where * denotes the dot-product. Unfortunately
        # TensorFlow handles vector * P slightly suboptimal, hence we convert the vectors to
        # 1xd/dx1 matrices and finally flatten the resulting 1x1 matrix into a scalar. All
        # operations happen over the batch size, which is dimension 0.
        prod = K.batch_dot(K.expand_dims(a - mu, 1), P)
        prod = K.batch_dot(prod, K.expand_dims(a - mu, -1))
        A = -.5 * K.batch_flatten(prod)
        assert K.ndim(A) == 2
        return A
	def class_accuracy(y_true, y_pred):
		mask = K.cast( K.equal(y_true[...,dataset.num_classes], 1.0  ), K.floatx()  )
		acc = K.cast(K.equal(K.argmax(y_true[...,0:dataset.num_classes], axis=-1), K.argmax(y_pred[...,0:dataset.num_classes], axis=-1)), K.floatx())
		if K.backend() == "cntk":
			acc = K.expand_dims(acc)
		return K.sum(acc * mask) / K.maximum(K.sum(mask), 1.0)
Beispiel #35
0
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False):
    '''Return yolo_loss tensor

    Parameters
    ----------
    yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body
    y_true: list of array, the output of preprocess_true_boxes
    anchors: array, shape=(N, 2), wh
    num_classes: integer
    ignore_thresh: float, the iou threshold whether to ignore object confidence loss

    Returns
    -------
    loss: tensor, shape=(1,)

    '''
    num_layers = len(anchors)//3 # default setting
    yolo_outputs = args[:num_layers]
    y_true = args[num_layers:]
    anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]]
    input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
    grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)]
    loss = 0
    m = K.shape(yolo_outputs[0])[0] # batch size, tensor
    mf = K.cast(m, K.dtype(yolo_outputs[0]))

    for l in range(num_layers):
        object_mask = y_true[l][..., 4:5]
        true_class_probs = y_true[l][..., 5:]

        grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
             anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True)
        pred_box = K.concatenate([pred_xy, pred_wh])

        # Darknet raw box to calculate loss.
        raw_true_xy = y_true[l][..., :2]*grid_shapes[l][::-1] - grid
        raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1])
        raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf
        box_loss_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4]

        # Find ignore mask, iterate over each of batch.
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')
        def loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0])
            iou = box_iou(pred_box[b], true_box)
            best_iou = K.max(iou, axis=-1)
            ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box)))
            return b+1, ignore_mask
        _, ignore_mask = K.control_flow_ops.while_loop(lambda b,*args: b<m, loop_body, [0, ignore_mask])
        ignore_mask = ignore_mask.stack()
        ignore_mask = K.expand_dims(ignore_mask, -1)

        # K.binary_crossentropy is helpful to avoid exp overflow.
        xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(raw_true_xy, raw_pred[...,0:2], from_logits=True)
        wh_loss = object_mask * box_loss_scale * 0.5 * K.square(raw_true_wh-raw_pred[...,2:4])
        confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \
            (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask
        class_loss = object_mask * K.binary_crossentropy(true_class_probs, raw_pred[...,5:], from_logits=True)

        xy_loss = K.sum(xy_loss) / mf
        wh_loss = K.sum(wh_loss) / mf
        confidence_loss = K.sum(confidence_loss) / mf
        class_loss = K.sum(class_loss) / mf
        loss += xy_loss + wh_loss + confidence_loss + class_loss
        if print_loss:
            loss = tf.Print(loss, [loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask)], message='loss: ')
    return loss
Beispiel #36
0
def get_model(): 
 
    CONC=[] 
    IGLOO_V=[] ###STRUCTURE FROM IGLOO PAPER BEGINS
 
    inin = Input(shape=input_shape, name='input')
    
    #inin=Lambda(lambda q: q[:,1:,:]) (inin) 

    a=Conv1D(40,2,padding="causal")(inin) #first set of convolutions
    b=Conv1D(40,4,padding="causal")(inin)
    c=Conv1D(40,8,padding="causal")(inin)

    x=Concatenate(axis=-1)([a,b,c])
    x=Activation("relu")(x)
    x= BatchNormalization(axis=-1)(x)

    a=Conv1D(40,2,padding="causal")(x) #second set of convolutions
    b=Conv1D(40,4, padding="causal")(x)
    c=Conv1D(40,8, padding="causal")(x)

    x=Concatenate(axis=-1)([a,b,c])
    x=Activation("relu")(x)
    x= BatchNormalization(axis=-1)(x)

    a=Conv1D(40,2,padding="causal")(x) #third set of convolutions
    b=Conv1D(40,4,padding="causal")(x)
    c=Conv1D(40,8, padding="causal")(x)

    x=Concatenate(axis=-1)([a,b,c])
    x=Activation("relu")(x)
    x= BatchNormalization(axis=-1)(x)
    
    x=Lambda(lambda q: q[:,1:,:]) (x)
 
    x=Conv1D(64, 1,strides=1,padding=padding)  (x) 
    x = BatchNormalization(axis=-1) (x) 
    x = Activation("relu") (x) 
    x = SpatialDropout1D(mDR) (x)

    IGLOO_V.append(IGLOO_RETURNFULLSEQ(x,nb_patches_FULL,Conv1D_dim_full_seq,patch_size=patch_size,padding_style=padding,stretch_factor=stretch_factor,l2reg=igloo_l2reg,
                                      add_residual=add_residual,nb_stacks=nb_stacks_full,build_backbone=build_backbone)) 
 

    CONC.append(IGLOO_V[0]) 
 
    for kk in range(5):  #IGLOO patches
 
        x=Conv1D(C1D_K, 1,strides=1,padding=padding)  (CONC[kk]) 
        x = BatchNormalization(axis=-1) (x) 
        x = Activation("relu") (x) 
        x = SpatialDropout1D(mDR) (x) 
 
        IGLOO_V.append(IGLOO_RETURNFULLSEQ(x,nb_patches_FULL,Conv1D_dim_full_seq,patch_size=patch_size,padding_style=padding,stretch_factor=stretch_factor,l2reg=igloo_l2reg,
                                           add_residual=add_residual,nb_stacks=nb_stacks_full,build_backbone=build_backbone)) 
 
 
        ###second residual connection 
        co=Add() ([IGLOO_V[kk+1],CONC[kk]]) 
        CONC.append(Activation("relu") (co)) 
 
 
    x=Conv1D(C1D_K, 1,strides=1,padding=padding)  (CONC[-1]) 
    x = BatchNormalization(axis=-1) (x) 
    x = Activation("relu") (x) 
    x = SpatialDropout1D(mDR) (x) 
 
    y=IGLOO(x,nb_patches,CONV1D_dim,patch_size=patch_size,return_sequences=False,l2reg=igloo_l2reg,padding_style=padding,nb_stacks=nb_stacks,DR=mDR,max_pooling_kernel=MAXPOOL_size) 
    #### Structure from IGLOO Paper ends
    
    y=Dense(64,activation='relu') (y) 
    y=Dropout(0.4) (y)
    output_1=Dense(1,activation='softmax') (y) #first output, a binary classification of normal or abnormal

    word_input = Input(shape=(9,), name='decoder_input')
    
 
    embedded_word=Embedding(input_dim=1149, output_dim=500, name='word_embedding',input_length=9,trainable=False, weights=[balloony])(word_input) #trainable is false, weight=ballooney
   


    input_=embedded_word
    

    #input_ = BatchNormalization(axis=-1)(input_)
    gru_out=GRU(700, activation='tanh', recurrent_activation='sigmoid', 
    dropout=0.22,return_sequences=True, return_state=False,unroll=False,reset_after=True)(input_) #first gru layer
    
    input_=gru_out
    
    input_ = BatchNormalization(axis=-1)(input_)
    gru_out=GRU(700, activation='tanh', recurrent_activation='sigmoid',  #second gru layer
    dropout=0.22,return_sequences=True, return_state=False,unroll=False,reset_after=True)(input_)
    input_ = gru_out
    
    features=Permute((2,1))(x) . ##attention mechanism begins
 
    part1=Dense(700)(features)
    gru_out=Permute((2,1))(gru_out)
    
    shape= K.int_shape(part1) 
    
    part2=Dense(shape[1])(gru_out) #change output to dimensions in order to add
    part2=Permute((2,1))(part2) 
    part3= Add()([part1,part2])
    
    score = Activation("tanh")(part3) 
    part4= Dense(1)(score)
    
    attention_weights=Lambda(lambda x: softmax(x,axis=1))(part4)
    
    context_vector=multiply([attention_weights,features]) #weighting the pixels
    context_vector=Lambda(lambda x: K.sum(x,axis=1))(context_vector)
    
    context_vector_mod=Dense(600)(context_vector)
    context_vector_mod = Lambda(lambda x: K.expand_dims(x, -1))(context_vector_mod)
    context_vector_mod=Permute((2,1))(context_vector_mod) . ##attention mechanism ends
    
    gru_out_mod=Dense(600)(gru_out)

    
    input_=Concatenate(axis=1)([context_vector_mod, gru_out_mod])
    input_=Activation("tanh")(input_)


    input_ = BatchNormalization(axis=-1)(input_)
    gru_out=GRU(9, activation='tanh', recurrent_activation='sigmoid', dropout=0.22,return_sequences=True, return_state=False,unroll=False,reset_after=True)(input_)
   
    gru_out=Permute((2,1))(gru_out)
  
  
    gru_out=Activation("tanh")(gru_out)
    sequence_output = TimeDistributed(Dense(units=vocab_size))(gru_out) ##final word is generated
    
    
 
    opt = optimizers.Adam(lr=0.0005, clipnorm=1.0, decay=0.001) 
    model = Model(inputs=[inin,word_input],outputs=[output_1,sequence_output]) 
    

 
    model.compile(loss=['binary_crossentropy',cross_entropy2],optimizer=opt, metrics=['accuracy'],loss_weights=[100000,1]) 
 
    #return model
    #model.fit_generator(Mygenerator(2),epochs=30)
    
    #model.save('my_eeg_model_final.h5')
    return model
def MyAddAxes(x):
    return K.expand_dims(K.expand_dims(x, axis=-1), axis=-1)
Beispiel #38
0
def yolo_loss(args,
              anchors,
              num_anchors_per_layer,
              num_classes,
              ignore_thresh=.5,
              print_loss=True):
    """
    Return yolo_loss tensor

    Args:
        args (list): args[:num_output_layers] the output of yolo_body or tiny_yolo_body
            args[num_output_layers:] raw_y_true
        anchors (np.array): shape=(N, 2), wh
        num_anchors_per_layer (int):
        num_classes (int):
        ignore_thresh (float): the iou threshold whether to ignore object confidence loss
        print_loss:

    Returns:
        loss: tensor, shape=(1,)

    """
    num_output_layers = len(anchors) // num_anchors_per_layer  # num_layers
    yolo_outputs = args[:num_output_layers]
    raw_y_trues = args[num_output_layers:]  # y_true
    anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
    input_shape = K.cast(
        K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(raw_y_trues[0]))
    grid_shapes = [
        K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(raw_y_trues[0]))
        for l in range(num_output_layers)
    ]
    loss = 0
    batch_size = K.shape(yolo_outputs[0])[0]  # m
    batch_size_f = K.cast(batch_size, K.dtype(yolo_outputs[0]))

    for l in range(num_output_layers):
        grid_shape = grid_shapes[l]
        yolo_output = yolo_outputs[l]
        #raw_y_pred = K.reshape(yolo_output, [-1, grid_shape[0], grid_shape[1], num_anchors_per_layer, num_classes + 9])
        raw_y_pred = tf.reshape(yolo_output, [-1, -1, -1, 3, 14])
        raw_y_true = raw_y_trues[l]
        anchor_mask = anchor_masks[l]
        # (batch_size, grid_height, grid_width, num_anchors_this_layer, 1)
        object_mask = raw_y_true[..., 4:5]
        # (batch_size, grid_height, grid_width, num_anchors_this_layer, num_classes)
        y_true_class_probs = raw_y_true[..., 5:]
        grid, y_pred_box, y_pred_delta_xy, y_pred_log_wh, y_pred_sigma, y_pred_confidence, y_pred_class_probs = \
            y_pred_graph(raw_y_pred, anchors[anchor_mask], input_shape)
        y_true_delta_xy = raw_y_true[
            ..., :2] * grid_shapes[l][::-1] - grid  # raw_true_xy
        y_true_log_wh = K.log(raw_y_true[..., 2:4] * input_shape[::-1] /
                              anchors[anchor_mask])
        y_true_log_wh = K.switch(object_mask, y_true_log_wh,
                                 K.zeros_like(y_true_log_wh))  # raw_true_wh
        box_loss_scale = 2 - raw_y_true[..., 2:3] * raw_y_true[..., 3:4]
        ignore_mask = tf.TensorArray(K.dtype(raw_y_trues[0]),
                                     size=1,
                                     dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')

        def loop_body(b, ignore_mask_):
            # (num_gt_boxes, 4)
            gt_box = tf.boolean_mask(raw_y_true[b, ..., 0:4],
                                     object_mask_bool[b, ..., 0])
            # (grid_height, grid_width, num_anchors_this_layer, num_gt_boxes)
            iou = box_iou_graph(y_pred_box[b], gt_box)
            # (grid_height, grid_width, num_anchors_this_layer)
            best_iou = K.max(iou, axis=-1)
            ignore_mask_ = ignore_mask_.write(
                b, K.cast(best_iou < ignore_thresh, K.dtype(gt_box)))
            return b + 1, ignore_mask_

        _, ignore_mask = tf.while_loop(lambda b, *largs: b < batch_size,
                                       loop_body, [0, ignore_mask])
        # (batch_size, grid_height, grid_width, num_anchors_this_layer)
        ignore_mask = ignore_mask.stack()
        # (batch_size, grid_height, grid_width, num_anchors_this_layer, 1)
        ignore_mask = K.expand_dims(ignore_mask, -1)

        y_true = tf.concat([y_true_delta_xy, y_true_log_wh], axis=-1)
        y_pred_mu = tf.concat([y_pred_delta_xy, y_pred_log_wh], axis=-1)
        x_loss = nll_loss(y_true[..., 0:1], y_pred_mu[..., 0:1],
                          y_pred_sigma[..., 0:1])
        x_loss = object_mask * box_loss_scale * x_loss
        y_loss = nll_loss(y_true[..., 1:2], y_pred_mu[..., 1:2],
                          y_pred_sigma[..., 1:2])
        y_loss = object_mask * box_loss_scale * y_loss
        w_loss = nll_loss(y_true[..., 2:3], y_pred_mu[..., 2:3],
                          y_pred_sigma[..., 2:3])
        w_loss = object_mask * box_loss_scale * w_loss
        h_loss = nll_loss(y_true[..., 3:4], y_pred_mu[..., 3:4],
                          y_pred_sigma[..., 3:4])
        h_loss = object_mask * box_loss_scale * h_loss
        #####
        confidence_loss = object_mask * K.binary_crossentropy(object_mask, y_pred_confidence) + \
                          (1 - object_mask) * K.binary_crossentropy(object_mask, y_pred_confidence) * ignore_mask
        class_loss = object_mask * K.binary_crossentropy(
            y_true_class_probs, y_pred_class_probs)
        #####
        x_loss = K.sum(x_loss) / batch_size_f
        y_loss = K.sum(y_loss) / batch_size_f
        w_loss = K.sum(w_loss) / batch_size_f
        h_loss = K.sum(h_loss) / batch_size_f
        confidence_loss = K.sum(confidence_loss) / batch_size_f
        class_loss = K.sum(class_loss) / batch_size_f
        loss += x_loss + y_loss + w_loss + h_loss + confidence_loss + class_loss
        if print_loss:
            loss = tf.Print(loss, [
                loss, x_loss, y_loss, w_loss, h_loss, confidence_loss,
                class_loss,
                K.sum(ignore_mask)
            ],
                            message='\nloss: ')
    return loss
Beispiel #39
0
    def build(self):
        # build encoder network
        input_layer = Input(shape=self.input_shape, name='input')
        encoder_output = self.encoder(input_layer)
        # build latent network
        latent_params = Dense(self.n_latent*2, name='latent_params')(encoder_output)
        logvar = Lambda(lambda x: K.clip(x[:, :self.n_latent], -5, 5), name='logvar')(latent_params)
        mu = Lambda(lambda x: x[:, self.n_latent:], name='mu')(latent_params)
        var = Lambda(lambda x: K.exp(x), name='var')(logvar)
        std = Lambda(lambda x: K.sqrt(x), name='std')(var)
        gaussian_sampler = Lambda(lambda x: K.random_normal((K.shape(x)[0], self.n_sampler, self.n_latent)),
            name='gaussian_sampler')(input_layer)
        latent_sampler = Lambda(lambda x: x[0]*K.expand_dims(x[2], axis=1) + K.expand_dims(x[1], axis=1),
            name='latent_sampler')([gaussian_sampler, mu, std])
        latent_values = Lambda(lambda x: K.reshape(x, (-1, self.n_latent)), name='latent')(latent_sampler)
        # build decoder network
        decoder_output = self.decoder(latent_values)
        output = Lambda(lambda x: K.mean(K.reshape(x, [-1, self.n_sampler] + self.input_shape), axis=1),
            name='output_mean')(decoder_output)
        # define loss functions
        def kl_loss(y_true, y_pred):
            KL = 0.5*K.sum(var + K.square(mu) - 1 - K.log(var), axis=1)
            return KL
            
        def sequence_nll_loss(y_true, y_pred):
            y_shape = K.shape(y_true)
            y_true = K.reshape(y_true, (-1, y_shape[-1]))
            y_pred = K.reshape(y_pred, (-1, y_shape[-1]))
            NLL = K.categorical_crossentropy(y_true, y_pred)
            NLL = K.sum(K.reshape(NLL, (-1, y_shape[1])), axis=1)
            return NLL
        
        def sequence_accuracy(y_true, y_pred):
            return K.mean(K.cast(K.equal(K.argmax(y_true, axis=-1),
                        K.argmax(y_pred, axis=-1)),
                        K.floatx()), axis=1)
        
        def sequence_vae_loss(y_true, y_pred):
            return sequence_nll_loss(y_true, y_pred) + kl_loss(y_true, y_pred)

        def nll_loss(y_true, y_pred):
            NLL = K.categorical_crossentropy(y_true, y_pred)
            return NLL
                
        #def vae_loss(y_true, y_pred):
        #    return nll_loss(y_true, y_pred) + kl_loss(y_true, y_pred)

        import likelihoods
        ll = getattr(likelihoods, self.likelihood)
        def vae_loss(y_true, y_pred):
            return -ll(y_true, y_pred) + kl_loss(y_true, y_pred)
        
        # build training model
        model = Model(inputs=[input_layer], outputs=[output])
        model.compile(loss=vae_loss,
            metrics=[sequence_accuracy, kl_loss, nll_loss],
            optimizer='Adam')
        self.model = model
        # build log likelihood function
        ll_input = Input(shape=self.input_shape, name='ll_input')
        ll_output = ll(ll_input, output)
        self.ll_function = K.function([ll_input, latent_sampler], [ll_output])
        # build function for generating new samples
        self.sampler_function = K.function([ll_input, latent_values], [output])
    def __init__(self,
                 model,
                 policy=None,
                 enable_double_dqn=True,
                 enable_dueling_network=False,
                 dueling_type='avg',
                 *args,
                 **kwargs):
        super(DQNAgent, self).__init__(*args, **kwargs)

        # Validate (important) input.
        if hasattr(model.output, '__len__') and len(model.output) > 1:
            raise ValueError(
                'Model "{}" has more than one output. DQN expects a model that has a single output.'
                .format(model))
        if model.output._keras_shape != (None, self.nb_actions):
            raise ValueError(
                'Model output "{}" has invalid shape. DQN expects a model that has one dimension for each action, in this case {}.'
                .format(model.output, self.nb_actions))

        # Parameters.
        self.enable_double_dqn = enable_double_dqn
        self.enable_dueling_network = enable_dueling_network
        self.dueling_type = dueling_type
        if self.enable_dueling_network:
            # get the second last layer of the model, abandon the last layer
            layer = model.layers[-2]
            nb_action = model.output._keras_shape[-1]
            # layer y has a shape (nb_action+1,)
            # y[:,0] represents V(s;theta)
            # y[:,1:] represents A(s,a;theta)
            y = Dense(nb_action + 1, activation='linear')(layer.output)
            # caculate the Q(s,a;theta)
            # dueling_type == 'avg'
            # Q(s,a;theta) = V(s;theta) + (A(s,a;theta)-Avg_a(A(s,a;theta)))
            # dueling_type == 'max'
            # Q(s,a;theta) = V(s;theta) + (A(s,a;theta)-max_a(A(s,a;theta)))
            # dueling_type == 'naive'
            # Q(s,a;theta) = V(s;theta) + A(s,a;theta)
            if self.dueling_type == 'avg':
                outputlayer = Lambda(
                    lambda a: K.expand_dims(a[:, 0], dim=-1) + a[:, 1:] - K.
                    mean(a[:, 1:], keepdims=True),
                    output_shape=(nb_action, ))(y)
            elif self.dueling_type == 'max':
                outputlayer = Lambda(lambda a: K.expand_dims(a[:, 0], dim=-1) +
                                     a[:, 1:] - K.max(a[:, 1:], keepdims=True),
                                     output_shape=(nb_action, ))(y)
            elif self.dueling_type == 'naive':
                outputlayer = Lambda(
                    lambda a: K.expand_dims(a[:, 0], dim=-1) + a[:, 1:],
                    output_shape=(nb_action, ))(y)
            else:
                assert False, "dueling_type must be one of {'avg','max','naive'}"

            model = Model(input=model.input, output=outputlayer)

        # Related objects.
        self.model = model
        if policy is None:
            policy = EpsGreedyQPolicy()
        self.policy = policy

        # State.
        self.reset_states()
    def build(self):
        '''
        根据数据集自行定义
        基础信息部分:session_info
            用户级别\用户年龄\品类\订单状态
            字段名称    取值范围    长度
            用户级别:   0 ~ 5       :6
            用户年龄:   0 ~ 7       :8
            商品品类:   0 ~ 2484    :2485
            订单状态:   0 ~ 3       :4
        '''
        
        session_info = Input(shape=(None,))
        '''
        标准信息部分:
            问题输入标量: x_in
            正向回答标量: yl_in
            逆向回答标量: yr_in
            历史问题标量: z_in
        '''
        x_in = Input(shape=(None,))
        yl_in = Input(shape=(None,))
        yr_in = Input(shape=(None,))
        z_in = Input(shape=(None,))
        x, yl,yr, z = x_in, yl_in, yr_in, z_in

        session_level = Lambda(lambda x: x[:,0])(session_info)
        session_years = Lambda(lambda x: x[:,1])(session_info)
        session_kinds = Lambda(lambda x: x[:,2])(session_info)
        session_station = Lambda(lambda x: x[:,3])(session_info)

        session_level = Embedding(6, self.char_size // 4)(session_level)
        session_years = Embedding(8, self.char_size // 4)(session_years)
        session_kinds = Embedding(2485, self.char_size // 4)(session_kinds)
        session_station = Embedding(4, self.char_size // 4)(session_station)

        session = Concatenate()([session_level, session_years, session_kinds, session_station])
        session = Lambda(lambda x: K.expand_dims(x, 1))(session)

        x_mask = Lambda(lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(x)
        y_mask = Lambda(lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(yl)
        z_mask = Lambda(lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(z)

        x_one_hot = Lambda(self.to_one_hot)([x, x_mask])
        z_one_hot = Lambda(self.to_one_hot)([z, z_mask])
        xz_one_hot = Lambda(lambda x: K.cast(K.greater(x[0] + x[1], 0.5), 'float32'))([x_one_hot, z_one_hot])
        xz_prior = ScaleShift()(xz_one_hot)  # 学习输出的先验分布

        if self.word2vec != None:
            embedding = Embedding(len(self.word2vec.W2Vembedding),  # 字典长度
                                    self.char_size,  # 词向量 长度(100)
                                    weights=[self.word2vec.W2Vembedding],  # 重点:预训练的词向量系数
                                    trainable=True  # 是否在 训练的过程中 更新词向量
                                    )
        else:
            embedding = Embedding(self.chars_num + 4, self.char_size)

        x = embedding(x)
        z = embedding(z)

        # encoder,双层双向LSTM
        x = LayerNormalization()(x)
        x = OurBidirectional(CuDNNLSTM(self.z_dim // 2, return_sequences=True))([x, x_mask])
        x = LayerNormalization()(x)
        x = OurBidirectional(CuDNNLSTM(self.z_dim // 2, return_sequences=True))([x, x_mask])

        z = LayerNormalization()(z)
        z = OurBidirectional(CuDNNLSTM(self.z_dim // 2, return_sequences=True))([z, z_mask])
        z = LayerNormalization()(z)
        z = OurBidirectional(CuDNNLSTM(self.z_dim // 2, return_sequences=True))([z, z_mask])

        x_max = Lambda(self.seq_maxpool)([x, x_mask])

        session = LayerNormalization()(session)
        session = CuDNNLSTM(self.z_dim // 4, return_sequences=True)(session)
        session = LayerNormalization()(session)
        session = CuDNNLSTM(self.z_dim // 4, return_sequences=True)(session)
        session = LayerNormalization()(session)
        session = CuDNNLSTM(self.z_dim // 4, return_sequences=True)(session)

        # 正向decoder,单向LSTM
        y = embedding(yl)
        y = SelfModulatedLayerNormalization(self.z_dim // 4)([y, x_max])
        y = CuDNNLSTM(self.z_dim, return_sequences=True)(y)
        y = SelfModulatedLayerNormalization(self.z_dim // 4)([y, x_max])
        y = CuDNNLSTM(self.z_dim, return_sequences=True)(y)
        yl = SelfModulatedLayerNormalization(self.z_dim // 4)([y, x_max])

        # 逆向decoder,单向LSTM
        y = embedding(yr)
        y = SelfModulatedLayerNormalization(self.z_dim // 4)([y, x_max])
        y = CuDNNLSTM(self.z_dim, return_sequences=True)(y)
        y = SelfModulatedLayerNormalization(self.z_dim // 4)([y, x_max])
        y = CuDNNLSTM(self.z_dim, return_sequences=True)(y)
        yr = SelfModulatedLayerNormalization(self.z_dim // 4)([y, x_max])

        # 对齐attention + 检索attention
        yl_ = Attention(8, 16, mask_right=True)([yl, yr, yr])
        ylx = Attention(8, 16)([yl, x, x, x_mask])
        ylz = Attention(8, 16)([yl, z, z, z_mask])
        yls = Attention(8, 16, mask_right=True)([yl, session, session])
        yl = Concatenate()([yl, yl_, ylx, ylz, yls])
        # 对齐attention + 检索attention
        yr_ = Attention(8, 16, mask_right=True)([yr, yl, yl])
        yrx = Attention(8, 16)([yr, x, x, x_mask])
        yrz = Attention(8, 16)([yr, z, z, z_mask])
        yrs = Attention(8, 16, mask_right=True)([yr, session, session])
        yr = Concatenate()([yr, yr_, yrx, yrz, yrs])

        # 最后的输出分类(左右共享权重)
        classifier = Dense(len(self.data_info.chars) + 4)
        yl = Dense(self.data_info.char_size)(yl)
        yl = LeakyReLU(0.2)(yl)
        yl = classifier(yl)
        yl = Lambda(lambda x: (x[0] + x[1]) / 2)([yl, xz_prior])  # 与先验结果平均
        yl = Activation('softmax')(yl)
        yr = Dense(self.data_info.char_size)(yr)
        yr = LeakyReLU(0.2)(yr)
        yr = classifier(yr)
        yr = Lambda(lambda x: (x[0] + x[1]) / 2)([yr, xz_prior])  # 与先验结果平均
        yr = Activation('softmax')(yr)

        # 交叉熵作为loss,但mask掉padding部分
        cross_entropy_1 = K.sparse_categorical_crossentropy(yl_in[:, 1:], yl[:, :-1])
        cross_entropy_1 = K.sum(cross_entropy_1 * y_mask[:, 1:, 0]) / K.sum(y_mask[:, 1:, 0])
        cross_entropy_2 = K.sparse_categorical_crossentropy(yr_in[:, 1:], yr[:, :-1])
        cross_entropy_2 = K.sum(cross_entropy_2 * y_mask[:, 1:, 0]) / K.sum(y_mask[:, 1:, 0])
        cross_entropy = (cross_entropy_1 + cross_entropy_2) / 2

        model = Model([session_info, x_in, yl_in, yr_in, z_in], [yl, yr])
        model.add_loss(cross_entropy)
        model.compile(optimizer = Adam(self.learning_rate))
        # print(model.summary())
        return model
	def reg_accuracy(y_true, y_pred):
		mask = K.cast( K.equal(y_true[...,dataset.num_classes], 1.0  ), K.floatx()  )
		reg = K.sum(K.square(y_true[...,dataset.num_classes+1:dataset.num_classes+3] - y_pred[...,dataset.num_classes+1:dataset.num_classes+3]), axis=-1)
		if K.backend() == "cntk":
			reg = K.expand_dims(reg)
		return K.sum(reg * mask) / K.maximum(K.sum(mask), 1.0)
Beispiel #43
0
x1_in = Input(shape=(None,), dtype='int32')
x2_in = Input(shape=(None,))
xm_in = Input(shape=(None,))
h_in = Input(shape=(None,), dtype='int32')
hm_in = Input(shape=(None,))
sel_in = Input(shape=(None,), dtype='int32')
conn_in = Input(shape=(1,), dtype='int32')
csel_in = Input(shape=(None,), dtype='int32')
cop_in = Input(shape=(None,), dtype='int32')

x1, x2, xm, h, hm, sel, conn, csel, cop = (
    x1_in, x2_in, xm_in, h_in, hm_in, sel_in, conn_in, csel_in, cop_in
)

hm = Lambda(lambda x: K.expand_dims(x, 1))(hm) # header的mask.shape=(None, 1, h_len)

x = bert_model([x1_in, x2_in])
x4conn = Lambda(lambda x: x[:, 0])(x)
pconn = Dense(num_cond_conn_op, activation='softmax')(x4conn)

x4h = Lambda(seq_gather)([x, h])
psel = Dense(num_agg, activation='softmax')(x4h)

pcop = Dense(num_op, activation='softmax')(x)

x = Lambda(lambda x: K.expand_dims(x, 2))(x)
x4h = Lambda(lambda x: K.expand_dims(x, 1))(x4h)
pcsel_1 = Dense(1)(x)
pcsel_2 = Dense(1)(x4h)
pcsel = Lambda(lambda x: x[0] + x[1])([pcsel_1, pcsel_2])
Beispiel #44
0
def expand_item(X):
    return K.expand_dims(X, 2)
Beispiel #45
0
 def call(self, x, mask=None):
     if mask is not None:
         mask = K.cast(mask, K.floatx())
         mask = K.expand_dims(mask)
         x = x * mask
     return K.max(x, axis=1)
Beispiel #46
0
def expand_rate(X):
    return K.expand_dims(X, 1)
Beispiel #47
0
    def call(self, x):

        C = K.expand_dims(self.centers)
        H = K.transpose(C - K.transpose(x))
        return K.exp(-self.betas * K.sum(H**2, axis=1))
Beispiel #48
0
def build_model(config,
                n_users,
                vocab_len,
                pretrained_emb,
                emb_dim_user_id=50,
                emb_dim_pref_query=200,
                emb_dim_words=300,
                n_filters_cnn=400,
                dropout_p=0.2,
                **kwargs):

    ##user embedding - word & article level
    user_id = Input(shape=(1, ), dtype='int32')
    user_embedding_layer = Embedding(n_users, emb_dim_user_id, trainable=True)
    user_embedding = user_embedding_layer(user_id)
    user_embedding_word = Dense(emb_dim_pref_query,
                                activation='relu')(user_embedding)
    user_embedding_word = Flatten()(user_embedding_word)
    user_embedding_news = Dense(emb_dim_pref_query,
                                activation='relu')(user_embedding)
    user_embedding_news = Flatten()(user_embedding_news)

    ##news encoder
    news_input = Input(shape=(config.max_len_title, ), dtype='int32')

    if pretrained_emb:
        embedding_layer = Embedding(
            vocab_len, emb_dim_words, weights=[pretrained_emb],
            trainable=True)  # weights=[pretrained_emb],
    else:
        embedding_layer = Embedding(vocab_len, emb_dim_words,
                                    trainable=True)  # random initialisation

    embedded_sequences = embedding_layer(news_input)
    embedded_sequences = Dropout(dropout_p)(embedded_sequences)

    cnnouput = Convolution1D(nb_filter=n_filters_cnn,
                             filter_length=3,
                             padding='same',
                             activation='relu',
                             strides=1)(
                                 embedded_sequences)  # original nb_filter=400
    cnnouput = Dropout(dropout_p)(cnnouput)

    # personalised attention - word level
    attention_a = Dot((2, 1))([
        cnnouput,
        Dense(n_filters_cnn, activation='tanh')(user_embedding_word)
    ])
    attention_weight = Activation('softmax')(attention_a)
    news_rep = keras.layers.Dot((1, 1))([cnnouput, attention_weight])
    newsEncoder = Model([news_input, user_id], news_rep)

    # browsing history as concatenation of MAX_SENTS articles
    all_news_input = [
        keras.Input((config.max_len_title, ), dtype='int32')
        for _ in range(config.max_len_hist)
    ]
    browsed_news_rep = [
        newsEncoder([news, user_id]) for news in all_news_input
    ]
    browsed_news_rep = concatenate([
        Lambda(lambda x: K.expand_dims(x, axis=1))(news)
        for news in browsed_news_rep
    ],
                                   axis=1)

    ## user encoder
    # personalised attention - article level
    attention_news = keras.layers.Dot((2, 1))([
        browsed_news_rep,
        Dense(n_filters_cnn, activation='tanh')(user_embedding_news)
    ])
    attention_weight_news = Activation('softmax')(attention_news)
    user_rep = keras.layers.Dot(
        (1, 1))([browsed_news_rep, attention_weight_news])

    # candidate items - as pseudo K + 1 classification task
    candidates = [
        keras.Input((config.max_len_title, ), dtype='int32')
        for _ in range(1 + config.neg_sample_ratio)
    ]
    candidate_vecs = [
        newsEncoder([candidate, user_id]) for candidate in candidates
    ]
    # logits
    scores_raw = [
        keras.layers.dot([user_rep, candidate_vec], axes=-1)
        for candidate_vec in candidate_vecs
    ]
    # normalised probs
    softm_probs = keras.layers.Activation(keras.activations.softmax)(
        keras.layers.concatenate(scores_raw))

    model = Model(candidates + all_news_input + [user_id], softm_probs)
    model.compile(loss='categorical_crossentropy',
                  optimizer=Adam(lr=0.001),
                  metrics=['acc'])

    candidate_one = keras.Input((config.max_len_title, ))
    candidate_one_vec = newsEncoder([candidate_one, user_id])
    score_raw = keras.layers.dot([user_rep, candidate_one_vec], axes=-1)
    score_sigmoid = keras.layers.Activation(
        keras.activations.sigmoid)(score_raw)
    model_test = keras.Model(inputs=[candidate_one] + all_news_input +
                             [user_id],
                             outputs=score_sigmoid)

    return model, model_test
    def call(self, inputs, states, constants):
        '''
        call函数 会在RNN中被调用然后被RNN改写 此时constant参数可用
        :param inputs: [wt; v_g] 维度为self.input_dim
        :param states: 前一步ht,mt
        :param constants: cnn_encoder outputs
        :return:
        '''
        h_tm = states[0]  # last hidden state
        m_tm = states[1]  # last memory cell
        self.v_seq = constants[
            0]  # [self.cnn_encoder_k, self.units] self.units=cnn_encoder_d
        """
            f-gate
        """
        ft = activations.sigmoid(
            K.dot(h_tm, self.W_f) + K.dot(inputs, self.U_f) + self.b_f)
        """
            i-gate
        """
        it = activations.sigmoid(
            K.dot(h_tm, self.W_i) + K.dot(inputs, self.U_i) + self.b_i)
        """
            o-gate
        """
        ot = activations.sigmoid(
            K.dot(h_tm, self.W_o) + K.dot(inputs, self.U_o) + self.b_o)
        """
            g-gate (sentinel gate)
        """
        gt = activations.sigmoid(
            K.dot(h_tm, self.W_g) + K.dot(inputs, self.U_g) + self.b_g)
        """
            at-renew input
        """
        at = activations.tanh(
            K.dot(h_tm, self.W_a) + K.dot(inputs, self.U_a) + self.b_a)
        """
            mt-memory cell
        """
        mt = m_tm * ft + it * at
        """
            ht-hidden state
        """
        ht = ot * activations.tanh(mt)
        """
            st-visual sentinel
        """
        st = gt * activations.tanh(mt)
        """
            ct-visual context
        """
        st = K.expand_dims(st, axis=1)
        # 将st合并进来一起计算权重参数[?, k+1, d] d=self.units 与论文的处理稍有不同
        self.v_expand = K.concatenate([self.v_seq, st], axis=1)
        # one_matrix = K.ones((self.cnn_encoder_k + 1, 1))
        vtt = K.dot(self.v_expand, self.W_z)
        dtt = K.repeat(K.dot(ht, self.U_z),
                       self.cnn_encoder_k + 1)  # (?, k + 1, k + 1)
        tantt = K.tanh(vtt + dtt)

        zt = K.dot(tantt, self.W_h)

        alpha_t = activations.softmax(zt)  # (?, k + 1, 1)
        # alpha_t = K.expand_dims(alpha_t)  # (?, k + 1, 1)
        # 将st,v1,...,vk包括在内直接加权求和 与论文的处理稍有不同 (?, k + 1, units)
        # 输出(?, units)
        ct = K.squeeze(K.batch_dot(alpha_t, self.v_expand, axes=1),
                       axis=1)  # batch_dot 针对 k + 1
        ht_plus_ct = ht + ct

        return ht_plus_ct, [ht, mt]
Beispiel #50
0
    x, v = x
    v = K.expand_dims(v, 1)
    v = K.tile(v, [1, K.shape(x)[1], 1])
    return K.concatenate([x, v], 2)


q1_in = Input(shape=(None, ))  # 问题字id输入
q2_in = Input(shape=(None, word_size))  # 问题词向量输入
p1_in = Input(shape=(None, ))  # 篇章字id输入
p2_in = Input(shape=(None, word_size))  # 篇章词向量输入
a1_in = Input(shape=(None, ))  # 答案左边界输入
a2_in = Input(shape=(None, ))  # 答案右边界输入

q1, q2, p1, p2, a1, a2 = q1_in, q2_in, p1_in, p2_in, a1_in, a2_in
q_mask = Lambda(
    lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(q1)
p_mask = Lambda(
    lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(p1)

embeddings = MixEmbedding(len(char2id) + 2, char_size)

q = embeddings([q1, q2])
q = Dropout(0.1)(q)
p = embeddings([p1, p2])
p = Dropout(0.1)(p)

q = DilatedGatedConv1D(rate=1, drop_gate=0.1)([q, q_mask])
q = DilatedGatedConv1D(rate=2, drop_gate=0.1)([q, q_mask])
q = DilatedGatedConv1D(rate=1, drop_gate=0.1)([q, q_mask])
qv = AttentionPooling1D()([q, q_mask])
    def _fit(self, X, y):
        # Pulling out the zeroth item from each element because X, y are dataframes and
        # so each item in _.values is a list of length 1. Same for _predict, below.
        X = np.expand_dims(np.stack([x[0] for x in X.values]), 3)
        # TODO: figure out why y is a Series and X is a pandas dataframe
        y_stability = np.stack([x[0] for x in y.values], axis=1).T
        y_dssp = np.squeeze(np.stack([x[1] for x in y.values], axis=1))
        y = [y_stability, y_dssp]
        val_size = int(X.shape[0] * .1)
        Xv = X[-val_size:, :, :, :]
        yv = [y[0][-val_size:, :], y[1][-val_size:, :, :]]
        X = X[:-val_size, :, :, :]
        y = [y[0][:-val_size, :], y[1][:-val_size, :, :]]

        def data_gen(batch_size):
            batch_ind = 0
            amino_dict = dict(
                zip([
                    'A', 'R', 'N', 'D', 'C', 'E', 'Q', 'G', 'H', 'I', 'L', 'K',
                    'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V', 'X', 'J', 'O'
                ], range(23))
            )  # 'X' means nothing, 'J' means beginning, 'O' means end
            while True:
                xi = randrange(X.shape[0] - batch_size)
                if batch_ind % batch_size == 0:
                    batch_ind = 0
                    x_ret = []
                    y_ret = [[], []]
                x = X[xi, :, :, :]
                y0 = y[0][xi, :]
                y1 = y[1][xi, :, :]
                minshift = np.argmax(
                    x[amino_dict['O'], :, :]) - x.shape[1] + self.padding
                maxshift = np.argmax(x[amino_dict['J'], :, :]) - self.padding
                shift = randrange(
                    minshift, maxshift
                ) + 1  # +1 is because we want to be able to shift maxshift (putting the 'J' at the beginning) but not minshift (putting the 'O' wrapped around and at the beginning - we want the farthest rightward shift possible to put the 'O' at the end)
                x_ret += [np.roll(x, shift, axis=1)]
                y_ret[0] += [y0]
                y_ret[1] += [np.roll(y1, shift, axis=0)]
                batch_ind += 1
                if batch_ind % batch_size == 0:
                    yield np.stack(x_ret), [
                        np.stack(y_ret[0]),
                        np.stack(y_ret[1])
                    ]

        checkpoint_filepath = 'sequence_only_cnn_v2_{}.best.hdf5'.format(
            str(randint(1000000000, 9999999999)))
        checkpoint_callback = ModelCheckpoint(checkpoint_filepath,
                                              monitor='val_loss',
                                              save_best_only=True)
        stopping_callback = EarlyStopping(monitor='val_loss',
                                          min_delta=0.0,
                                          patience=5)
        callbacks_list = [checkpoint_callback, stopping_callback]
        self.model.fit_generator(data_gen(self.batch_size),
                                 epochs=self.epochs,
                                 steps_per_epoch=1 * X.shape[0] /
                                 self.batch_size,
                                 validation_data=(Xv, yv),
                                 callbacks=callbacks_list,
                                 verbose=self.verbose)

        # Based on permissible transitions between DSSP codes
        transition_kernels = K.constant([[[1, 0, 0, 0, 0, 0],
                                          [-1, -1, -1, 0, 0, -1]],
                                         [[0, 1, 0, 0, 0, 0],
                                          [-1, -1, 0, 0, 0, 0]],
                                         [[0, 0, 1, 0, 0, 0],
                                          [-1, 0, -1, 0, 0, 0]],
                                         [[0, 0, 0, 1, 0, 0],
                                          [0, 0, 0, -1, -1, 0]],
                                         [[0, 0, 0, 0, 1, 0],
                                          [-1, 0, 0, 0, 0, 0]],
                                         [[0, 0, 0, 0, 0, 1],
                                          [0, 0, 0, -1, 0, 0]]])
        transition_kernels = K.permute_dimensions(transition_kernels,
                                                  (1, 2, 0))
        transition_kernels = K.expand_dims(transition_kernels, -2)

        def custom_loss_dssp(y_true, y_pred):
            y_pred_one_hot = K.one_hot(K.argmax(y_pred), 6)

            def conv_loss(pred):
                return K.max(K.clip(
                    K.conv2d(K.expand_dims(y_pred_one_hot, -1),
                             transition_kernels), 0.0, 1.0),
                             axis=-1)

            return (
                K.mean(losses.categorical_crossentropy(y_true, y_pred))
                # inner max is over filters, which is important to only pick the most-activated filter at each site -
                # this will be the filter that matches the identity of the DSSP code.
                + 0.8 * K.mean(conv_loss(y_pred_one_hot)) +
                0.4 * K.max(conv_loss(y_pred_one_hot)) +
                0.4 * K.mean(conv_loss(y_pred)) +
                0.2 * K.max(conv_loss(y_pred)))

        def coeff_determination(y_true, y_pred):
            SS_res = K.sum(K.square(y_true - y_pred))
            SS_tot = K.sum(K.square(y_true - K.mean(y_true)))
            return 1 - SS_res / (SS_tot + K.epsilon())

        def custom_loss_stability(y_true, y_pred):
            return K.sqrt(
                K.mean(K.square(y_pred - y_true),
                       axis=-1)) - 3.0 * coeff_determination(y_true, y_pred)

        loss = {
            "model_dssp": custom_loss_dssp,
            "model_stability": custom_loss_stability,
        }
        loss_weights = {"model_stability": 0.1, "model_dssp": 0.05}
        self.model.load_weights(checkpoint_filepath)
        self.model.compile(optimizer='adadelta',
                           loss=loss,
                           loss_weights=loss_weights)
        self.model.fit_generator(data_gen(self.batch_size),
                                 epochs=self.epochs,
                                 steps_per_epoch=1 * X.shape[0] /
                                 self.batch_size,
                                 validation_data=(Xv, yv),
                                 callbacks=callbacks_list,
                                 verbose=self.verbose)
        self.model.load_weights(checkpoint_filepath)

        os.remove(checkpoint_filepath)
Beispiel #52
0
def seq_and_vec(x):
    x, v = x
    v = K.expand_dims(v, 1)
    v = K.tile(v, [1, K.shape(x)[1], 1])
    return K.concatenate([x, v], 2)
 def co_attention(self, Q, E, mask, axis=1):
     mask = K.expand_dims(mask, axis=axis)
     E_beta = E - (1 - mask) * 1e30
     beta_weights = K.softmax(E_beta, axis=3 - axis)
     beta = K.batch_dot(beta_weights, Q, axes=[3 - axis, 1])
     return beta
def backend_expand_dims_1(x):
    return K.expand_dims(x, axis=1)
 def conv_loss(pred):
     return K.max(K.clip(
         K.conv2d(K.expand_dims(y_pred_one_hot, -1),
                  transition_kernels), 0.0, 1.0),
                  axis=-1)
def backend_expand_dims_last(x):
    return K.expand_dims(x, axis=-1)
def sequence_only_cnn_v2(max_residues, padding):
    amino_inputs = Input(shape=(23, max_residues + 2 + 2 * padding,
                                1))  # 20 amino acids plus null/beginning/end

    amino_model = Conv2D(400, (23, 5),
                         kernel_regularizer=l2(.0),
                         activation='relu')(amino_inputs)
    amino_model = Dropout(0.3)(amino_model)
    amino_model = Conv2D(200, (1, 9),
                         kernel_regularizer=l2(.0),
                         activation='relu')(amino_model)
    amino_model = Dropout(0.3)(amino_model)
    amino_model = Conv2D(100, (1, 17),
                         kernel_regularizer=l2(.0),
                         activation='relu')(amino_model)
    amino_model = Dropout(0.3)(amino_model)

    model = Flatten()(amino_model)

    model_dssp = Dense((max_residues + 2 + 2 * padding) * 6)(model)
    model_dssp = Reshape(((max_residues + 2 + 2 * padding), 6))(model_dssp)
    model_dssp = Activation('softmax', name='model_dssp')(
        model_dssp)  # softmax default axis is last axis
    model_dssp_flat = Flatten()(model_dssp)
    model = Concatenate()([model, model_dssp_flat])

    model = Dense(80, activation='elu', kernel_regularizer=l2(.0))(model)
    model = Dense(40, activation='elu', kernel_regularizer=l2(.0))(model)
    model = Dense(2, activation='linear', kernel_regularizer=l2(.0))(model)
    model_stability = Lambda(
        lambda x: K.concatenate([x, K.min(x, axis=1, keepdims=True)], axis=1),
        name='model_stability')(model)
    comp_model = Model(inputs=amino_inputs,
                       outputs=[model_stability, model_dssp])

    # Based on permissible transitions between DSSP codes
    transition_kernels = K.constant([[[1, 0, 0, 0, 0, 0],
                                      [-1, -1, -1, 0, 0, -1]],
                                     [[0, 1, 0, 0, 0, 0], [-1, -1, 0, 0, 0,
                                                           0]],
                                     [[0, 0, 1, 0, 0, 0], [-1, 0, -1, 0, 0,
                                                           0]],
                                     [[0, 0, 0, 1, 0, 0], [0, 0, 0, -1, -1,
                                                           0]],
                                     [[0, 0, 0, 0, 1, 0], [-1, 0, 0, 0, 0, 0]],
                                     [[0, 0, 0, 0, 0, 1], [0, 0, 0, -1, 0,
                                                           0]]])
    transition_kernels = K.permute_dimensions(transition_kernels, (1, 2, 0))
    transition_kernels = K.expand_dims(transition_kernels, -2)

    def custom_loss_dssp(y_true, y_pred):
        y_pred_one_hot = K.one_hot(K.argmax(y_pred), 6)

        def conv_loss(pred):
            return K.max(K.clip(
                K.conv2d(K.expand_dims(y_pred_one_hot, -1),
                         transition_kernels), 0.0, 1.0),
                         axis=-1)

        return (
            K.mean(losses.categorical_crossentropy(y_true, y_pred))
            # inner max is over filters, which is important to only pick the most-activated filter at each site -
            # this will be the filter that matches the identity of the DSSP code.
            + 0.8 * K.mean(conv_loss(y_pred_one_hot)) +
            0.4 * K.max(conv_loss(y_pred_one_hot)) +
            0.4 * K.mean(conv_loss(y_pred)) + 0.2 * K.max(conv_loss(y_pred)))

    def coeff_determination(y_true, y_pred):
        SS_res = K.sum(K.square(y_true - y_pred))
        SS_tot = K.sum(K.square(y_true - K.mean(y_true)))
        return 1 - SS_res / (SS_tot + K.epsilon())

    def custom_loss_stability(y_true, y_pred):
        return K.sqrt(
            K.mean(K.square(y_pred - y_true),
                   axis=-1)) - 3.0 * coeff_determination(y_true, y_pred)

    loss = {
        "model_dssp": custom_loss_dssp,
        "model_stability": custom_loss_stability,
    }
    loss_weights = {"model_stability": 0.2, "model_dssp": 1.2}
    comp_model.compile(optimizer='adadelta',
                       loss=loss,
                       loss_weights=loss_weights)

    th_model = KerasRegressionTwoDimensional(
        model=comp_model,
        model_author="Jed",
        model_description=
        'Sequence CNN v2 regressor: 400x5->200x9->100x17->80->40->1',
        batch_size=128,
        epochs=50,
        padding=padding)
    return th_model
Beispiel #58
0
    def recursion(self,
                  input_energy,
                  mask=None,
                  go_backwards=False,
                  return_sequences=True,
                  return_logZ=True,
                  input_length=None):
        """Forward (alpha) or backward (beta) recursion

        If `return_logZ = True`, compute the logZ, the normalization constant:

        \[ Z = \sum_{y1, y2, y3} exp(-E) # energy
          = \sum_{y1, y2, y3} exp(-(u1' y1 + y1' W y2 + u2' y2 + y2' W y3 + u3' y3))
          = sum_{y2, y3} (exp(-(u2' y2 + y2' W y3 + u3' y3))
          sum_{y1} exp(-(u1' y1' + y1' W y2))) \]

        Denote:
            \[ S(y2) := sum_{y1} exp(-(u1' y1 + y1' W y2)), \]
            \[ Z = sum_{y2, y3} exp(log S(y2) - (u2' y2 + y2' W y3 + u3' y3)) \]
            \[ logS(y2) = log S(y2) = log_sum_exp(-(u1' y1' + y1' W y2)) \]
        Note that:
              yi's are one-hot vectors
              u1, u3: boundary energies have been merged

        If `return_logZ = False`, compute the Viterbi's best path lookup table.
        """
        chain_energy = self.chain_kernel
        # shape=(1, F, F): F=num of output features. 1st F is for t-1, 2nd F for t
        chain_energy = K.expand_dims(chain_energy, 0)
        # shape=(B, F), dtype=float32
        prev_target_val = K.zeros_like(input_energy[:, 0, :])

        if go_backwards:
            input_energy = K.reverse(input_energy, 1)
            if mask is not None:
                mask = K.reverse(mask, 1)

        initial_states = [
            prev_target_val,
            K.zeros_like(prev_target_val[:, :1])
        ]
        constants = [chain_energy]

        if mask is not None:
            mask2 = K.cast(
                K.concatenate([mask, K.zeros_like(mask[:, :1])], axis=1),
                K.floatx())
            constants.append(mask2)

        def _step(input_energy_i, states):
            return self.step(input_energy_i, states, return_logZ)

        target_val_last, target_val_seq, _ = K.rnn(_step,
                                                   input_energy,
                                                   initial_states,
                                                   constants=constants,
                                                   input_length=input_length,
                                                   unroll=self.unroll)

        if return_sequences:
            if go_backwards:
                target_val_seq = K.reverse(target_val_seq, 1)
            return target_val_seq
        else:
            return target_val_last
Beispiel #59
0
def weight_expand(x):
    return backend.expand_dims(x)
Beispiel #60
0
def softmaxLayer(x):
    channel_sum = K.sum(x, axis=3)
    softmax = K.expand_dims(K.softmax(channel_sum), axis=-1)
    return softmax