Esempio n. 1
0
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
    """Convert final layer features to bounding box parameters."""
    num_anchors = len(anchors)
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])

    grid_shape = K.shape(feats)[1:3]  # height, width
    grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
                    [1, grid_shape[1], 1, 1])
    grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
                    [grid_shape[0], 1, 1, 1])
    grid = K.concatenate([grid_x, grid_y])
    grid = K.cast(grid, K.dtype(feats))

    feats = K.reshape(
        feats,
        [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])

    # Adjust preditions to each spatial grid point and anchor size.
    box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(
        grid_shape[::-1], K.dtype(feats))
    box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(
        input_shape[::-1], K.dtype(feats))
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.sigmoid(feats[..., 5:])

    if calc_loss == True:
        return grid, feats, box_xy, box_wh
    return box_xy, box_wh, box_confidence, box_class_probs
Esempio n. 2
0
    def call(self, x, mask=None):
        '''
        shape=(batch_size,new_time_step,filters)
     x_cont=Tensor("layer_dropout_5/cond/Identity:0", shape=(None, None, 128), dtype=float32)
x_ques=Tensor("layer_dropout_11/cond/Identity:0", shape=(None, None, 128), dtype=float32)
c_mask=Tensor("batch_slice_4/Slice:0", shape=(None, None), dtype=bool)#
q_mask=Tensor("batch_slice_5/Slice:0", shape=(None, None), dtype=bool)
        '''
        x_cont, x_ques, c_mask, q_mask = x
        # get similarity matrix S
        ##K.dot(x_cont, self.W0)维度变化: [batch_size,time_step,dim] *[dim,1] =[batch_size,time_step,1]
        subres0 = K.tile(K.dot(x_cont, self.W0), [1, 1, self.q_maxlen])
        subres1 = K.tile(
            K.permute_dimensions(K.dot(x_ques, self.W1), pattern=(0, 2, 1)),
            [1, self.c_maxlen, 1])
        subres2 = K.batch_dot(x_cont * self.W2,
                              K.permute_dimensions(x_ques, pattern=(0, 2, 1)))
        S = subres0 + subres1 + subres2
        S += self.bias
        q_mask = tf.expand_dims(q_mask, 1)
        #默认是对最后一维度,即axis=-1
        S_ = tf.nn.softmax(self.mask_logits(S, q_mask))
        c_mask = tf.expand_dims(c_mask, 2)
        S_T = K.permute_dimensions(
            tf.nn.softmax(self.mask_logits(S, c_mask), axis=1), (0, 2, 1))
        c2q = tf.matmul(S_, x_ques)
        q2c = tf.matmul(tf.matmul(S_, S_T), x_cont)
        result = K.concatenate([x_cont, c2q, x_cont * c2q, x_cont * q2c],
                               axis=-1)

        return result
Esempio n. 3
0
def DeltaLayer(encoded_l, encoded_r, negateDiffs=False):
    """
  A Layer which computes all possible absolute differences of
  all pixels. Input are two feature volumes, e.g. result of a conv layer
  Hints:
  - The Reshape reshapes a matrix row-wise, that means,

    Reshape( (6,1) ) ([ 1 2 3
                      4 5 6]) is

                      1
                      2
                      3
                      4
                      5
                      6
  - Algorithm:
    - The left  leg is reshaped to a w*h x 1  column vector (for each channel)
    - The right leg is reshaped to a  1 x w*h row vector (for each channel)
    - The left is tiled along colum axis, so from w*h x 1 to w*h x w*h (per channel)
    - The right is tiled along row axis, so from 1 x w*h to w*h x w*h
    - The absolute difference is calculated
  Args:
      encoded_l, encoded_r : left and right image tensor (batchsize,w,h,channels)
                             must have same size
      negateDiffs: if True then not abs(diffs), but -abs(diffs) is returned.
                   Default: False
  Returns:
      difference tensor, has size (batchsize, w*h, w*h, channels)
  """
    w = encoded_l.shape[1]
    h = encoded_l.shape[2]
    chan = encoded_l.shape[3]
    reshapel = Reshape((w * h, 1, chan))  # reshape layer
    reshaped_l = reshapel(encoded_l)
    reshaper = Reshape((1, w * h, chan))
    reshaped_r = reshaper(encoded_r)

    # 之所以是4个维度是因为第一个维度需要给batch,即Reshape输出的就是四维的
    tiled_l = Lambda(lambda x: K.tile(x, [1, 1, w * h, 1]))(reshaped_l)
    tiled_r = Lambda(lambda x: K.tile(x, [1, w * h, 1, 1]))(reshaped_r)

    if negateDiffs:
        diff = Lambda(lambda x: -K.abs(x[0] - x[1]))([tiled_l, tiled_r])
    else:
        diff = Lambda(lambda x: K.abs(x[0] - x[1]))([tiled_l, tiled_r])

    # print("diff类型+++++++++++++", diff)

    return diff
Esempio n. 4
0
    def call(self, inputs, **kwargs):
        inputs, memory_length = inputs
        memory_length = K.cast(memory_length[0][0], 'int32')
        batch_size = K.cast(K.shape(inputs)[0], 'int32')
        seq_len = K.cast(K.shape(inputs)[1], 'int32')

        # Build new memory
        pad = K.tile(inputs[0:1, ...], (self.batch_size - batch_size, 1, 1))
        padded = K.concatenate([inputs, pad], axis=0)              # (self.batch_size, seq_len, output_dim)
        new_memory = K.concatenate([self.memory, padded], axis=1)  # (self.batch_size, self.memory_len + self.target_len + seq_len, ...)
        new_memory = tf.slice(                                     # (self.batch_size, self.memory_len + self.target_len, output_dim)
            new_memory,
            (0, seq_len, 0),
            (self.batch_size, self.memory_len + self.target_len, self.output_dim),
        )
        self.add_update(K.update(self.memory, new_memory), inputs)

        # Build output
        old_memory = tf.slice(                                     # (batch_size, memory_length, output_dim)
            new_memory,
            (0, K.maximum(0, self.memory_len + self.target_len - seq_len - memory_length), 0),
            (batch_size, K.minimum(self.memory_len, memory_length), self.output_dim),
        )

        return old_memory
Esempio n. 5
0
 def _pad(self, y):
     if self.N > self.num_leaves:
         # pads the encoding with zeros in the place of non-leaf nodes
         # cast in case our labels are ints
         y = tf.cast(y, self.p.dtype)
         P = K.tile(self.p, (K.shape(y)[0], 1))
         return K.concatenate((y, P))
 def create_inital_state(inputs, hidden_size):
     # We are not using initial states, but need to pass something to K.rnn funciton
     fake_state = K.zeros_like(inputs)  # <= (batch_size, enc_seq_len, latent_dim
     fake_state = K.sum(fake_state, axis=[1, 2])  # <= (batch_size)
     fake_state = K.expand_dims(fake_state)  # <= (batch_size, 1)
     fake_state = K.tile(fake_state, [1, hidden_size])  # <= (batch_size, latent_dim
     return fake_state
    def call(self, x):
        print(x)
        features_dim = x.shape[-1].value
        step_dim = x.shape[-2].value
        print(K.reshape(self.kernel, (-1, features_dim)))  # n, d
        print(K.reshape(self.W, (features_dim, 1)))  # w= dx1
        print(K.dot(K.reshape(self.kernel, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))))  # nx1

        eij = K.reshape(K.dot(K.reshape(self.kernel, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))),
                        (-1, step_dim))  # batch,step
        print(eij)

        eij += self.b

        eij = K.tanh(eij)

        a = K.exp(eij)


        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())
        a = tf.transpose(a,(1,0))
        print(a)

        print("x:")
        print(self.kernel)
        weighted_input = self.kernel * a  # 自动填充为相同的维度相乘 N T K
        print(weighted_input.shape)
        temp = K.sum(weighted_input, axis=0)  # N K  权重相加
        temp = K.tile(K.expand_dims(temp, 0), [step_dim, 1])
        temp = keras.layers.concatenate([self.kernel, temp])
        temp = K.dot(temp, self.W2) + self.b2
        return x + temp
Esempio n. 8
0
 def call(self, inputs, **kwargs):
     memory, speaker_embedding = inputs
     tiled_speaker_embeddings = K.tile(
         K.expand_dims(speaker_embedding, axis=1),
         [1, K.shape(memory)[1], 1])
     conditioned_memory = K.concatenate([memory, tiled_speaker_embeddings],
                                        axis=-1)
     return conditioned_memory
    def call(self, inputs):
        batch_size = K.shape(inputs)[0]
        num_rows = K.int_shape(inputs)[1]
        num_cols = K.int_shape(inputs)[2]
        num_channels = K.int_shape(inputs)[3]
        n = num_rows * num_cols
        X = K.reshape(inputs, (batch_size, num_channels, n))
        factor = K.cast(1 / n, K.floatx())
        I_hat = factor * (K.eye(n) - factor * K.ones((n, n)))
        I_hat = K.tile(
            K.expand_dims(I_hat, axis=0),
            (batch_size, 1, 1))  # One identity matrix per sample in batch
        Sigma = K.batch_dot(K.batch_dot(X, I_hat),
                            K.permute_dimensions(X, (0, 2, 1)))

        # Pre-normalization
        trace = K.sum(K.sum(K.eye(num_channels) * Sigma, axis=1,
                            keepdims=True),
                      axis=2,
                      keepdims=True)
        A = Sigma / trace

        # Newton-Schulz Iteration
        Y = A
        Z = K.eye(num_channels)
        Z = K.tile(K.expand_dims(Z, axis=0), (batch_size, 1, 1))
        I3 = 3 * K.eye(num_channels)
        I3 = K.tile(K.expand_dims(I3, axis=0), (batch_size, 1, 1))
        for i in range(self.num_iter):
            Y = 0.5 * K.batch_dot(Y, I3 - K.batch_dot(Z, Y))
            Z = 0.5 * K.batch_dot(I3 - K.batch_dot(Z, Y), Z)

        # Post-compensation
        C = K.sqrt(trace) * Y

        # Extract upper triangular matrix as vector
        ones = K.ones((num_channels, num_channels))
        mask = tf.matrix_band_part(ones, 0,
                                   -1)  # Upper triangular matrix of 0s and 1s
        mask = K.cast(mask, 'bool')  # Convert integer mask to boolean mask
        triuvec = tf.boolean_mask(
            C, mask, axis=1)  # Apply mask to 2nd and 3rd dimension
        triuvec.set_shape((None, num_channels * (num_channels + 1) /
                           2))  # Set correct shape manually

        return triuvec
Esempio n. 10
0
        def create_inital_state(inputs, hidden_size):
            fake_state = K.zeros_like(
                inputs)  # (batch_size, enc_seq_len, latent_dim)
            fake_state = K.sum(fake_state, axis=[1, 2])  # (batch_size)
            fake_state = K.expand_dims(fake_state)  # (batch_size, 1)
            fake_state = K.tile(fake_state,
                                [1, hidden_size])  # (batch_size, latent_dim)

            return fake_state
Esempio n. 11
0
    def call(self, x, mask=None):
        features_dim = self.features_dim
        step_dim = self.step_dim

        t1 = x[:, 0, :]
        t1 = K.expand_dims(t1, 1)
        # t1 = K.tile(t1, [1, step_dim, 1])
        print(t1)
        eij = K.batch_dot(x, t1, (2, 2))  #(?,500,1)
        # eij = K.tile(eij, [1, 1, features_dim])
        print(eij)
        a = K.exp(eij)
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())
        print(a)
        weighted_input = x * a
        temp = K.sum(weighted_input, axis=1)
        temp = K.expand_dims(temp, 1)
        temp = K.tile(temp, [1, 1, features_dim])
        print(temp)
        alltemp = temp

        for i in range(1, step_dim):
            t1 = x[:, i, :]
            t1 = K.expand_dims(t1, 1)
            # t1 = K.tile(t1, [1, 2, 1])
            eij = K.batch_dot(x, t1, (2, 2))
            # eij = K.tile(eij, [1, 1, features_dim])
            a = K.exp(eij)
            a /= K.cast(
                K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())
            weighted_input = x * a
            temp = K.sum(weighted_input, axis=1)
            temp = K.expand_dims(temp, 1)
            temp = K.tile(temp, [1, 1, features_dim])
            alltemp = keras.layers.concatenate([alltemp, temp], 1)

        temp = keras.layers.concatenate([x, alltemp])
        return temp
Esempio n. 12
0
    def _generate_recurrent_dropout_mask(self, inputs, training=None):
        if 0 < self.recurrent_dropout < 1:
            ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1)))
            ones = K.tile(ones, (1, self.units))

            def dropped_inputs():
                return K.dropout(ones, self.dropout)

            self._recurrent_dropout_mask = [
                K.in_train_phase(dropped_inputs, ones, training=training)
                for _ in range(4)
            ]
        else:
            self._recurrent_dropout_mask = None
Esempio n. 13
0
 def call(self, inputs, **kwargs):
     length = K.shape(inputs[0])[1] + K.shape(inputs[1])[1]
     inputs = K.tile(
         K.expand_dims(K.arange(length - 1, -1, -1, dtype=K.floatx()), axis=0),
         [K.shape(inputs[0])[0], 1],
     )
     if self.clamp_len is not None:
         inputs = K.clip(inputs, min_value=0, max_value=self.clamp_len)
     inputs = K.expand_dims(inputs, axis=-1)
     output_dim = K.cast(self.output_dim, K.floatx())
     ranges = K.expand_dims(K.arange(0.0, self.output_dim, 2.0), axis=0) / output_dim
     inverse = 1.0 / K.pow(10000.0, ranges)
     positions = inputs * inverse
     return K.concatenate([K.sin(positions), K.cos(positions)], axis=-1)
Esempio n. 14
0
    def get_initial_state(self, inputs):
        initial_state = K.zeros_like(inputs)
        initial_state = K.sum(initial_state, axis=(1, 2))
        initial_state = K.expand_dims(initial_state)
        initial_state = K.tile(initial_state, [1, self.units])  # (samples, output_dim)
        n = K.identity(initial_state)
        d = K.identity(initial_state)
        h = K.identity(initial_state)

        dtype = initial_state.dtype.name
        min_value = np.array([1E38]).astype(dtype).item()
        a_max = K.identity(initial_state) - min_value
        h = h + self.cell.recurrent_activation(K.expand_dims(self.cell.initial_attention, axis=0))

        return [n, d, h, a_max]
Esempio n. 15
0
 def call(self, inputs):
     filter = np.array(self.filter, np.float32)
     if filter.ndim == 1:
         filter = filter[:, np.newaxis] * filter[np.newaxis, :]
     if self.normalize:
         filter /= np.sum(filter)
     filter = filter[:, :, np.newaxis, np.newaxis]
     filter = K.constant(filter, dtype=inputs.dtype, name='filter')
     filter = K.tile(filter, [1, 1, K.shape(inputs)[-1], 1])
     outputs = nn.depthwise_conv2d(
         inputs,
         filter,
         strides=(1, self.stride, self.stride, 1),
         padding='SAME')
     return outputs
Esempio n. 16
0
  def get_initial_state(self, inputs):
    # (samples, timesteps, rows, cols, z, filters)
    initial_state = K.zeros_like(inputs)
    d = [1,2,1,1,1] if self.cell.data_format == 'channels_first' else [1,1,1,1,2]
    initial_state = K.tile(initial_state, d)
    shape = list(self.cell.kernel_shape)
    shape[-1] = self.cell.filters
    initial_state = self.cell.input_conv(initial_state,
                                         array_ops.zeros(tuple(shape)),
                                         padding=self.cell.padding)

    if hasattr(self.cell.state_size, '__len__'):
      return [initial_state for _ in self.cell.state_size]
    else:
      return [initial_state]
Esempio n. 17
0
        def create_inital_state(inputs, hidden_size):  # hidden_size=64
            # print("inputs",inputs)
            # print("hidden_size",hidden_size)
            # print("type(hidden_size)", type(hidden_size))
            # We are not using initial states, but need to pass something to K.rnn funciton
            fake_state = K.zeros_like(
                inputs)  # [b,64,512]<= (batch_size, enc_seq_len, latent_dim)
            fake_state = K.sum(fake_state, axis=[1, 2])  # <= (batch_size)
            fake_state = K.expand_dims(fake_state)  # <= (batch_size, 1)
            # print(fake_state)
            # print("------")
            # print(tf.shape(fake_state))
            # print("hidden_size:",hidden_size)

            fake_state = tile(
                fake_state,
                [1, hidden_size])  # <= (batch_size, latent_dim) (b,64)
            return fake_state
Esempio n. 18
0
 def call(self, inputs, **kwargs):
     # calculate the mean value for each pixel across channels
     mean = K.mean(inputs, axis=0, keepdims=True)
     # calculate the squared differences between pixel values and mean
     squ_diffs = K.square(inputs - mean)
     # calculate the average of the squared differences (variance)
     mean_sq_diff = K.mean(squ_diffs, axis=0, keepdims=True)
     # add a small value to avoid a blow-up when we calculate stdev
     mean_sq_diff += 1e-8
     # square root of the variance (stdev)
     stdev = K.sqrt(mean_sq_diff)
     # calculate the mean standard deviation across each pixel coord
     mean_pix = K.mean(stdev, keepdims=True)
     # scale this up to be the size of one input feature map for each sample
     shape = K.shape(inputs)
     output = K.tile(mean_pix, (shape[0], shape[1], shape[2], 1))
     # concatenate with the output
     combined = K.concatenate([inputs, output], axis=-1)
     return combined
Esempio n. 19
0
 def _reshape_mask(self, mask):
     seq_len = K.shape(mask)[1]
     mask = K.expand_dims(mask, axis=1)
     mask = K.tile(mask, [1, self.num_head, 1])
     return K.reshape(mask, (-1, seq_len))
Esempio n. 20
0
    def _attention_layer(self, memory_plus_inputs, ns_plus_one, rpe_out,
                         rpe_neighbor1, rpe_neighbor2, ws):
        from_length = self.num_memory_slots + 1
        to_length = self.num_memory_slots + 1

        q_bias, k_bias, v_bias = array_ops.split(ws["attention_bias"],
                                                 3,
                                                 axis=0)

        # [B, F, C]
        query_layer = memory_plus_inputs * K.expand_dims(K.sqrt(ns_plus_one))
        # [B, F, N*H]
        query_layer = K.dot(query_layer,
                            ws["attention_kernel"][:, :self.units])
        # [B, F, N*H]
        query_layer = K.bias_add(query_layer, q_bias)
        # [B, F, N, H]
        query_layer = array_ops.reshape(
            query_layer,
            [-1, from_length, self.num_attention_heads, self.size_per_head])
        # [B, N, F, H]
        query_layer = array_ops.transpose(query_layer, perm=[0, 2, 1, 3])
        # [B*N, F, H]
        query_layer = array_ops.reshape(
            query_layer, shape=[-1, from_length, self.size_per_head])

        # [B, T, C]
        key_layer = memory_plus_inputs * K.expand_dims(K.sqrt(ns_plus_one))
        # [B, T, N*H]
        key_layer = K.dot(key_layer,
                          ws["attention_kernel"][:, self.units:self.units * 2])
        # [B, T, N*H]
        key_layer = K.bias_add(key_layer, k_bias)
        # [B, T, N, H]
        key_layer = array_ops.reshape(
            key_layer,
            [-1, to_length, self.num_attention_heads, self.size_per_head])
        # [B, N, T, H]
        key_layer = array_ops.transpose(key_layer, perm=[0, 2, 1, 3])
        # [B*N, T, H]
        key_layer = array_ops.reshape(
            key_layer, shape=[-1, to_length, self.size_per_head])

        # [B, T, N*H]
        value_layer = K.dot(
            memory_plus_inputs,
            ws["attention_kernel"][:, self.units * 2:self.units * 3])
        # [B, T, N*H]
        value_layer = K.bias_add(value_layer, v_bias)
        # [B, T, N, H]
        value_layer = array_ops.reshape(
            value_layer,
            [-1, to_length, self.num_attention_heads, self.size_per_head])
        # [B, N, T, H]
        value_layer = array_ops.transpose(value_layer, perm=[0, 2, 1, 3])
        # [B*N, T, H]
        value_layer = array_ops.reshape(
            value_layer, shape=[-1, to_length, self.size_per_head])

        # [B*N, 1, T]
        attention_scores = K.batch_dot(query_layer[:, -1:, :],
                                       key_layer,
                                       axes=[2, 2])

        if self.use_relative_position:
            # [B, T, N*H]
            r = K.dot(rpe_out, ws["rel_kernel"])
            # [B, T, N, H]
            r = array_ops.reshape(
                r,
                [-1, to_length, self.num_attention_heads, self.size_per_head])
            # [B, N, T, H]
            r = array_ops.transpose(r, perm=[0, 2, 1, 3])
            # [B*N, T, H]
            r = array_ops.reshape(r, [-1, to_length, self.size_per_head])
            # [B*N, 1, T]
            bd = tf.einsum("bfh,bth->bft", query_layer[:, -1:, :], r)
            # [B*N, 1, T]
            attention_scores += bd

        # [B*N, 1, T]
        attention_scores = attention_scores / K.cast(self.size_per_head,
                                                     tf.float32)

        # [B, N, T]
        mask = K.tile(
            K.expand_dims(K.cast(ns_plus_one > 0, tf.float32), axis=1),
            [1, self.num_attention_heads, 1])

        # [B*N, 1, T]
        mask = array_ops.reshape(mask, [-1, 1, to_length])

        # [B*N, 1, T]
        attention_scores -= (1.0 - mask) * 10000.0

        # [B*N, 1, T]
        attention_probs = K.softmax(attention_scores)

        # [B*N, 1, H]
        context_layer = K.batch_dot(attention_probs, value_layer, axes=[2, 1])

        # [B, N, H]
        context_layer = array_ops.reshape(
            context_layer, [-1, self.num_attention_heads, self.size_per_head])

        # [B, N*H]
        context_layer = array_ops.reshape(
            context_layer, [-1, self.num_attention_heads * self.size_per_head])

        # -----------------------------

        # [B*N, F, H]
        neighbor_score = (query_layer[:, 1:, :] * key_layer[:, :-1, :] +
                          query_layer[:, :-1, :] * key_layer[:, 1:, :])

        if self.use_relative_position:
            # [B, F, N*H]
            r = K.dot(rpe_neighbor1, ws["rel_kernel"])
            # [B, F, N, H]
            r = array_ops.reshape(r, [
                -1, self.num_memory_slots, self.num_attention_heads,
                self.size_per_head
            ])
            # [B, N, F, H]
            r = array_ops.transpose(r, perm=[0, 2, 1, 3])
            # [B*N, F, H]
            r = array_ops.reshape(
                r, [-1, self.num_memory_slots, self.size_per_head])
            # [B*N, F, H]
            bd = query_layer[:, 1:, :] * r
            # [B*N, F, H]
            neighbor_score += bd

            # [B, F, N*H]
            r = K.dot(rpe_neighbor2, ws["rel_kernel"])
            # [B, F, N, H]
            r = array_ops.reshape(r, [
                -1, self.num_memory_slots, self.num_attention_heads,
                self.size_per_head
            ])
            # [B, N, F, H]
            r = array_ops.transpose(r, perm=[0, 2, 1, 3])
            # [B*N, F, H]
            r = array_ops.reshape(
                r, [-1, self.num_memory_slots, self.size_per_head])
            # [B*N, F, H]
            bd = query_layer[:, :-1, :] * r
            # [B*N, F, H]
            neighbor_score += bd

        # [B*N, F]
        neighbor_score = K.sum(neighbor_score, axis=-1)
        # [B, N, F]
        neighbor_score = array_ops.reshape(
            neighbor_score,
            [-1, self.num_attention_heads, self.num_memory_slots])
        # [B, F]
        neighbor_score = K.sum(neighbor_score, axis=1)

        # [B, F]
        mask = K.cast(ns_plus_one[:, :-1] > 0, tf.float32)

        # [B, F]
        neighbor_score += (1.0 - mask) * 10000.0

        return context_layer, neighbor_score
Esempio n. 21
0
 def broadcast(x):
     return K.tile(x[:, np.newaxis], [1, dlatent_broadcast, 1])
    def call(self, x, mask=None):
        '''
        Return an anchor box tensor based on the shape of the input tensor.

        The logic implemented here is identical to the logic in the module `ssd_box_encode_decode_utils.py`.

        Note that this tensor does not participate in any graph computations at runtime. It is being created
        as a constant once during graph creation and is just being output along with the rest of the model output
        during runtime. Because of this, all logic is implemented as Numpy array operations and it is sufficient
        to convert the resulting Numpy array into a Keras tensor at the very end before outputting it.

        Arguments:
            x (tensor): 4D tensor of shape `(batch, channels, height, width)` if `dim_ordering = 'th'`
                or `(batch, height, width, channels)` if `dim_ordering = 'tf'`. The input for this
                layer must be the output of the localization predictor layer.
        '''

        # Compute box width and height for each aspect ratio
        # The shorter side of the image will be used to compute `w` and `h` using `scale` and `aspect_ratios`.
        size = min(self.img_height, self.img_width)
        # Compute the box widths and and heights for all aspect ratios
        wh_list = []
        for ar in self.aspect_ratios:
            if (ar == 1):
                # Compute the regular anchor box for aspect ratio 1.
                box_height = box_width = self.this_scale * size
                wh_list.append((box_width, box_height))
                if self.two_boxes_for_ar1:
                    # Compute one slightly larger version using the geometric mean of this scale value and the next.
                    box_height = box_width = np.sqrt(
                        self.this_scale * self.next_scale) * size
                    wh_list.append((box_width, box_height))
            else:
                box_height = self.this_scale * size / np.sqrt(ar)
                box_width = self.this_scale * size * np.sqrt(ar)
                wh_list.append((box_width, box_height))
        wh_list = np.array(wh_list)
        """
        # We need the shape of the input tensor
        if K.image_dim_ordering() == 'tf':
            batch_size, feature_map_height, feature_map_width, feature_map_channels = x._keras_shape
        else: # Not yet relevant since TensorFlow is the only supported backend right now, but it can't harm to have this in here for the future
            batch_size, feature_map_channels, feature_map_height, feature_map_width = x._keras_shape

        # Compute the grid of box center points. They are identical for all aspect ratios.

        # Compute the step sizes, i.e. how far apart the anchor box center points will be vertically and horizontally.
        """
        batch_size, feature_map_height, feature_map_width, feature_map_channels = K.int_shape(
            x)  #

        if (self.this_steps is None):
            step_height = self.img_height / feature_map_height
            step_width = self.img_width / feature_map_width
        else:
            if isinstance(self.this_steps,
                          (list, tuple)) and (len(self.this_steps) == 2):
                step_height = self.this_steps[0]
                step_width = self.this_steps[1]
            elif isinstance(self.this_steps, (int, float)):
                step_height = self.this_steps
                step_width = self.this_steps
        # Compute the offsets, i.e. at what pixel values the first anchor box center point will be from the top and from the left of the image.
        if (self.this_offsets is None):
            offset_height = 0.5
            offset_width = 0.5
        else:
            if isinstance(self.this_offsets,
                          (list, tuple)) and (len(self.this_offsets) == 2):
                offset_height = self.this_offsets[0]
                offset_width = self.this_offsets[1]
            elif isinstance(self.this_offsets, (int, float)):
                offset_height = self.this_offsets
                offset_width = self.this_offsets
        # Now that we have the offsets and step sizes, compute the grid of anchor box center points.
        cy = np.linspace(offset_height * step_height,
                         (offset_height + feature_map_height - 1) *
                         step_height, feature_map_height)
        cx = np.linspace(offset_width * step_width,
                         (offset_width + feature_map_width - 1) * step_width,
                         feature_map_width)
        cx_grid, cy_grid = np.meshgrid(cx, cy)
        cx_grid = np.expand_dims(
            cx_grid, -1
        )  # This is necessary for np.tile() to do what we want further down
        cy_grid = np.expand_dims(
            cy_grid, -1
        )  # This is necessary for np.tile() to do what we want further down

        # Create a 4D tensor template of shape `(feature_map_height, feature_map_width, n_boxes, 4)`
        # where the last dimension will contain `(cx, cy, w, h)`
        boxes_tensor = np.zeros(
            (feature_map_height, feature_map_width, self.n_boxes, 4))

        boxes_tensor[:, :, :, 0] = np.tile(cx_grid,
                                           (1, 1, self.n_boxes))  # Set cx
        boxes_tensor[:, :, :, 1] = np.tile(cy_grid,
                                           (1, 1, self.n_boxes))  # Set cy
        boxes_tensor[:, :, :, 2] = wh_list[:, 0]  # Set w
        boxes_tensor[:, :, :, 3] = wh_list[:, 1]  # Set h

        # Convert `(cx, cy, w, h)` to `(xmin, xmax, ymin, ymax)`
        boxes_tensor = convert_coordinates(boxes_tensor,
                                           start_index=0,
                                           conversion='centroids2corners')

        # If `clip_boxes` is enabled, clip the coordinates to lie within the image boundaries
        if self.clip_boxes:
            x_coords = boxes_tensor[:, :, :, [0, 2]]
            x_coords[x_coords >= self.img_width] = self.img_width - 1
            x_coords[x_coords < 0] = 0
            boxes_tensor[:, :, :, [0, 2]] = x_coords
            y_coords = boxes_tensor[:, :, :, [1, 3]]
            y_coords[y_coords >= self.img_height] = self.img_height - 1
            y_coords[y_coords < 0] = 0
            boxes_tensor[:, :, :, [1, 3]] = y_coords

        # If `normalize_coords` is enabled, normalize the coordinates to be within [0,1]
        if self.normalize_coords:
            boxes_tensor[:, :, :, [0, 2]] /= self.img_width
            boxes_tensor[:, :, :, [1, 3]] /= self.img_height

        # TODO: Implement box limiting directly for `(cx, cy, w, h)` so that we don't have to unnecessarily convert back and forth.
        if self.coords == 'centroids':
            # Convert `(xmin, ymin, xmax, ymax)` back to `(cx, cy, w, h)`.
            boxes_tensor = convert_coordinates(boxes_tensor,
                                               start_index=0,
                                               conversion='corners2centroids',
                                               border_pixels='half')
        elif self.coords == 'minmax':
            # Convert `(xmin, ymin, xmax, ymax)` to `(xmin, xmax, ymin, ymax).
            boxes_tensor = convert_coordinates(boxes_tensor,
                                               start_index=0,
                                               conversion='corners2minmax',
                                               border_pixels='half')

        # Create a tensor to contain the variances and append it to `boxes_tensor`. This tensor has the same shape
        # as `boxes_tensor` and simply contains the same 4 variance values for every position in the last axis.
        variances_tensor = np.zeros_like(
            boxes_tensor
        )  # Has shape `(feature_map_height, feature_map_width, n_boxes, 4)`
        variances_tensor += self.variances  # Long live broadcasting
        # Now `boxes_tensor` becomes a tensor of shape `(feature_map_height, feature_map_width, n_boxes, 8)`
        boxes_tensor = np.concatenate((boxes_tensor, variances_tensor),
                                      axis=-1)

        # Now prepend one dimension to `boxes_tensor` to account for the batch size and tile it along
        # The result will be a 5D tensor of shape `(batch_size, feature_map_height, feature_map_width, n_boxes, 8)`
        boxes_tensor = np.expand_dims(boxes_tensor, axis=0)
        boxes_tensor = K.tile(K.constant(boxes_tensor, dtype='float32'),
                              (K.shape(x)[0], 1, 1, 1, 1))

        return boxes_tensor
Esempio n. 23
0
    def call(self, x, mask=None):
        size = min(self.img_height, self.img_width)

        # 在aspect ratios 中 计算宽和高
        w_h_list = []
        for ar in self.aspect_ratios:
            if (ar == 1):
                box_height = box_width = self.this_scale * size
                w_h_list.append((box_width, box_height))

                if (self.two_boxes_for_ar1):
                    # 计算通过这个比例值和下一个比例值,计算一个稍大的版本
                    box_height = box_width = np.sqrt(
                        self.this_scale * self.next_scale) * size
                    w_h_list.append((box_width, box_height))
            else:
                box_height = self.this_scale * size / np.sqrt(ar)
                box_width = self.this_scale * size * np.sqrt(ar)
                w_h_list.append((box_width, box_height))
        w_h_list = np.array(w_h_list)

        batch_size, feature_map_height, feature_map_width, feature_map_channels = x.shape

        if self.this_steps is None:
            step_height = self.img_height / feature_map_height
            step_width = self.img_width / feature_map_width
        else:
            if isinstance(self.this_steps,
                          (list, tuple)) and (len(self.this_steps) == 2):
                step_height = self.this_steps[0]
                step_width = self.this_steps[1]
            elif isinstance(self.this_steps, (int, float)):
                step_height = self.this_steps
                step_width = self.this_steps
        # 计算offsets
        if self.this_offsets is None:
            offset_height = 0.5
            offset_width = 0.5
        else:
            if isinstance(self.this_offsets,
                          (list, tuple)) and (len(self.this_offsets) == 2):
                offset_height = self.this_offsets[0]
                offset_width = self.this_offsets[1]
            elif isinstance(self.this_offsets, (int, float)):
                offset_height = self.this_offsets
                offset_width = self.this_offsets

        cy = np.linspace(offset_height * step_height,
                         (offset_height + feature_map_height - 1) *
                         step_height, feature_map_height)
        cx = np.linspace(offset_width * step_width,
                         (offset_width + feature_map_width - 1) * step_width,
                         feature_map_width)

        cx_grid, cy_grid = np.meshgrid(cx, cy)

        cx_grid = np.expand_dims(cx_grid, -1)
        cy_grid = np.expand_dims(cy_grid, -1)

        boxes_tensor = np.zeros(
            (feature_map_height, feature_map_width, self.n_boxes, 4))
        boxes_tensor[:, :, :, 0] = np.tile(cx_grid, (1, 1, self.n_boxes))
        boxes_tensor[:, :, :, 1] = np.tile(cy_grid, (1, 1, self.n_boxes))
        boxes_tensor[:, :, :, 2] = w_h_list[:, 0]
        boxes_tensor[:, :, :, 3] = w_h_list[:, 1]

        boxes_tensor = convert_coordinates(boxes_tensor,
                                           start_index=0,
                                           conversion="centroids2corners")
        if self.clip_boxes:
            x_coords = boxes_tensor[:, :, :, [0, 2]]
            x_coords[x_coords >= self.img_width] = self.img_width - 1
            x_coords[x_coords < 0] = 0
            boxes_tensor[:, :, :, [0, 2]] = x_coords
            y_coords = boxes_tensor[:, :, :, [1, 3]]
            y_coords[y_coords >= self.img_height] = self.img_height - 1
            y_coords[y_coords < 0] = 0
            boxes_tensor[:, :, :, [1, 3]] = y_coords

        if self.normalize_coords:
            boxes_tensor[:, :, :, [0, 2]] /= self.img_width
            boxes_tensor[:, :, :, [1, 3]] /= self.img_height

        if self.coords == "centroids":
            boxes_tensor = convert_coordinates(boxes_tensor,
                                               start_index=0,
                                               conversion="corners2centroids",
                                               border_pixel="half")

        variances_tensor = np.zeros_like(boxes_tensor)
        variances_tensor += self.variances

        boxes_tensor = np.concatenate((boxes_tensor, variances_tensor),
                                      axis=-1)

        boxes_tensor = np.expand_dims(boxes_tensor, axis=0)

        boxes_tensor = K.tile(K.constant(boxes_tensor, dtype="float32"),
                              (K.shape(x)[0], 1, 1, 1, 1))

        #shape = (feature_map_height,feature_map_width,n_boxes,8)
        return boxes_tensor
Esempio n. 24
0
def yolo_head(feats, anchors, num_classes):
    """Convert final layer features to bounding box parameters.

    Parameters
    ----------
    feats : tf.Tensor
        Final convolutional layer features.
    anchors : np.array, list
        Anchor box widths and heights.
    num_classes : int
        Number of target classes.

    Returns
    -------
    box_xy: tf.Tensor
        (x, y) box predictions adjusted by spatial location in conv layer.
    box_wh: tf.Tensor
        (w, h) box predictions adjusted by anchors and conv spatial resolution.
    box_conf: tf.Tensor
        Probability estimate for whether each box contains any object.
    box_class_pred: tf.Tensor
        Probability distribution estimate for each box over class labels.

    """

    num_anchors = len(anchors)

    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2])

    # Static implementation for fixed models.
    # TODO: Remove or add option for static implementation.
    # _, conv_height, conv_width, _ = K.int_shape(feats)
    # conv_dims = K.variable([conv_width, conv_height])

    # Dynamic implementation of conv dims for fully convolutional model.
    conv_dims = K.shape(feats)[1:3]  # assuming channels last

    # In YOLO the height index is the inner most iteration.
    conv_height_index = K.arange(0, stop=conv_dims[0])
    conv_width_index = K.arange(0, stop=conv_dims[1])
    conv_height_index = K.tile(conv_height_index, [conv_dims[1]])

    # TODO: Repeat_elements and tf.split doesn't support dynamic splits.
    # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0)
    conv_width_index = K.tile(K.expand_dims(conv_width_index, 0),
                              [conv_dims[0], 1])
    conv_width_index = K.flatten(K.transpose(conv_width_index))
    conv_index = K.transpose(K.stack([conv_height_index, conv_width_index]))
    conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2])
    conv_index = K.cast(conv_index, K.dtype(feats))

    feats = K.reshape(
        feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5])
    conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats))

    # Static generation of conv_index:
    # conv_index = np.array([_ for _ in np.ndindex(conv_width, conv_height)])
    # conv_index = conv_index[:, [1, 0]]  # swap columns for YOLO ordering.
    # conv_index = K.variable(
    #     conv_index.reshape(1, conv_height, conv_width, 1, 2))
    # feats = Reshape(
    #     (conv_dims[0], conv_dims[1], num_anchors, num_classes + 5))(feats)

    box_confidence = K.sigmoid(feats[..., 4:5])
    box_xy = K.sigmoid(feats[..., :2])
    box_wh = K.exp(feats[..., 2:4])
    box_class_probs = K.softmax(feats[..., 5:])

    # Adjust preditions to each spatial grid point and anchor size.
    # Note: YOLO iterates over height index before width index.
    box_xy = (box_xy + conv_index) / conv_dims
    box_wh = box_wh * anchors_tensor / conv_dims

    return box_confidence, box_xy, box_wh, box_class_probs
    def call(self, input_tensor, training=None):
        input_transposed = tf.transpose(input_tensor, [3, 0, 1, 2, 4])
        input_shape = K.shape(input_transposed)
        input_tensor_reshaped = K.reshape(input_transposed, [
            input_shape[1] * input_shape[0], self.input_height,
            self.input_width, self.input_num_atoms
        ])
        input_tensor_reshaped.set_shape(
            (None, self.input_height, self.input_width, self.input_num_atoms))

        if self.upsamp_type == 'resize':
            upsamp = K.resize_images(input_tensor_reshaped, self.scaling,
                                     self.scaling, 'channels_last')
            outputs = K.conv2d(upsamp,
                               kernel=self.W,
                               strides=(1, 1),
                               padding=self.padding,
                               data_format='channels_last')
        elif self.upsamp_type == 'subpix':
            conv = K.conv2d(input_tensor_reshaped,
                            kernel=self.W,
                            strides=(1, 1),
                            padding='same',
                            data_format='channels_last')
            outputs = tf.depth_to_space(conv, self.scaling)
        else:
            batch_size = input_shape[1] * input_shape[0]

            # Infer the dynamic output shape:
            out_height = deconv_output_length(input_length=self.input_height,
                                              stride=self.scaling,
                                              filter_size=self.kernel_size,
                                              padding=self.padding)
            out_width = deconv_output_length(input_length=self.input_width,
                                             stride=self.scaling,
                                             filter_size=self.kernel_size,
                                             padding=self.padding)
            output_shape = (batch_size, out_height, out_width,
                            self.num_capsule * self.num_atoms)

            outputs = K.conv2d_transpose(input_tensor_reshaped,
                                         self.W,
                                         output_shape,
                                         (self.scaling, self.scaling),
                                         padding=self.padding,
                                         data_format='channels_last')

        votes_shape = K.shape(outputs)
        _, conv_height, conv_width, _ = outputs.get_shape()

        votes = K.reshape(outputs, [
            input_shape[1], input_shape[0], votes_shape[1], votes_shape[2],
            self.num_capsule, self.num_atoms
        ])
        votes.set_shape((None, self.input_num_capsule, conv_height.value,
                         conv_width.value, self.num_capsule, self.num_atoms))

        logit_shape = K.stack([
            input_shape[1], input_shape[0], votes_shape[1], votes_shape[2],
            self.num_capsule
        ])
        biases_replicated = K.tile(self.b,
                                   [votes_shape[1], votes_shape[2], 1, 1])

        activations = update_routing(votes=votes,
                                     biases=biases_replicated,
                                     logit_shape=logit_shape,
                                     num_dims=6,
                                     input_dim=self.input_num_capsule,
                                     output_dim=self.num_capsule,
                                     num_routing=self.routings)

        return activations
Esempio n. 26
0
    def call(self, inputs, training=None):
        #inputs_hat.shape[None,input_num_capsule,num_capsule,dim_capsule]
        inputs_hat = tf.tensordot(inputs, self.reweight_W, axes=(-1, 0))
        inputs_hat = K.permute_dimensions(inputs_hat, (0, 2, 1, 3))

        b = K.expand_dims(self.routing_init, 0)
        b = K.tile(b, [K.shape(inputs_hat)[0], 1, 1])

        assert self.routings > 0, 'The routings should be > 0.'
        for i in range(self.routings):
            # c.shape=[batch_size, num_capsule, input_num_capsule]
            c = softmax(b, axis=1)

            # c.shape =  [batch_size, num_capsule, input_num_capsule]
            # inputs_hat.shape=[None, num_capsule, input_num_capsule , dim_capsule]
            # The first two dimensions as `batch` dimension,
            # then matmal: [input_num_capsule] x [input_num_capsule, dim_capsule] -> [dim_capsule].
            # outputs.shape=[None, num_capsule, dim_capsule]
            outputs = squash(caps_batch_dot(c, inputs_hat, transpose=False))

            # outputs.shape =  [None, num_capsule, dim_capsule]
            # inputs_hat.shape=[None, num_capsule, input_num_capsule, dim_capsule]
            # The first two dimensions as `batch` dimension,
            # then matmal: [dim_capsule] x [input_num_capsule, dim_capsule]^T -> [input_num_capsule].
            # b.shape=[batch_size, num_capsule, input_num_capsule]
            # b_add = caps_batch_dot(outputs, inputs_hat,transpose=True)
            # norm = (K.max(b,axis=1)-K.min(b_add,axis=1))/(b_add - K.min(b_add,axis=1))
            b += caps_batch_dot(outputs, inputs_hat, transpose=True)

        # End: Routing algorithm -----------------------------------------------------------------------#

        c = softmax(b, axis=1)

        routing_score = K.expand_dims(c, -1)

        attention_output = routing_score * inputs_hat

        field_wise_embeds_list = [
            K.squeeze(embeds, 1) for embeds in tf.split(
                attention_output, attention_output.shape[1], axis=1)
        ]
        # HiFM module
        square_of_sum_list = [
            tf.square(reduce_sum(field_i_vectors, axis=1, keep_dims=True))
            for field_i_vectors in field_wise_embeds_list
        ]

        sum_of_square_list = [
            reduce_sum(field_i_vectors * field_i_vectors,
                       axis=1,
                       keep_dims=True)
            for field_i_vectors in field_wise_embeds_list
        ]

        field_fm = tf.concat([
            square_of_sum - sum_of_square for square_of_sum, sum_of_square in
            zip(square_of_sum_list, sum_of_square_list)
        ], 1)

        hi_fm = reduce_sum(field_fm, axis=1)
        hi_fm = reduce_sum(field_fm * self.kernel_fm, axis=1)
        hi_fm = tf.nn.bias_add(hi_fm, self.bias_fm)

        # mf
        field_wise_vectors = reduce_sum(attention_output,
                                        axis=2,
                                        keep_dims=False)

        left = []
        right = []

        for i, j in itertools.combinations(list(range(self.num_fields)), 2):
            left.append(i)
            right.append(j)

        embeddings_left = tf.gather(params=field_wise_vectors,
                                    indices=left,
                                    axis=1)
        embeddings_right = tf.gather(params=field_wise_vectors,
                                     indices=right,
                                     axis=1)

        embeddings_prod = embeddings_left * embeddings_right

        field_weighted_embedding = embeddings_prod * self.kernel_mf

        h_mf = reduce_sum(field_weighted_embedding, axis=1)
        h_mf = tf.nn.bias_add(h_mf, self.bias_mf)

        # self-attention
        for _ in range(self.self_attention_layer):
            field_wise_vectors = InteractingLayer(self.self_attention_factor,
                                                  self.head_num,
                                                  True)(field_wise_vectors)
        high_int = reduce_sum(field_wise_vectors * self.kernel_highint, axis=1)
        high_int = tf.nn.bias_add(high_int, self.bias_highint)

        return concat_func([hi_fm, h_mf, high_int]), routing_score
Esempio n. 27
0
 def _zip(foo):
   y_val, x_val = foo
   bar = backend.tile(y_val, array_ops.shape(x_val))
   return array_ops.stack([bar, x_val], axis=1)
Esempio n. 28
0
    def call(self, x, mask=None):
        '''
        根据输入张量的形状返回锚框张量。
        Arguments:
            x (张量): 4D `(batch, height, width, channels)` . 该层的输入必须是本地化预测器层的输出。
        '''
        #=====================================不同策略仅需修改下面这一部分代码=================================
        # 计算每一个宽高比下的宽和高。
        # 图像将根据`scale` 和 `aspect_ratios`并利用较短的边计算`w` and `h`。
        size = min(self.img_height, self.img_width)
        # 计算所有纵横比的框宽和高
        wh_list = []
        for ar in self.aspect_ratios:
            if (ar == 1):
                # 计算宽高比为1.的常规锚框。
                box_height = box_width = self.this_scale * size
                wh_list.append((box_width, box_height))
                if self.two_boxes_for_ar1:
                    # 使用此比例尺值的几何平均值计算一个稍大的包围框。
                    box_height = box_width = np.sqrt(self.this_scale * self.next_scale) * size
                    wh_list.append((box_width, box_height))
            else:
                box_height = self.this_scale * size / np.sqrt(ar)
                box_width = self.this_scale * size * np.sqrt(ar)
                wh_list.append((box_width, box_height))
        wh_list = np.array(wh_list)
        #=====================================================================================================
        # 输入的shape,这是我们所必须的
        batch_size, feature_map_height, feature_map_width, feature_map_channels = x._keras_shape

        # 获取step尺寸。
        step_height = self.this_steps
        step_width = self.this_steps
        # 获取offsets尺寸。
        offset_height = self.this_offsets
        offset_width = self.this_offsets
        # 现在我们有了偏移量和步长,计算锚点盒中心点的网格。
        cy = np.linspace(offset_height * step_height, (offset_height + feature_map_height - 1) * step_height, feature_map_height)
        cx = np.linspace(offset_width * step_width, (offset_width + feature_map_width - 1) * step_width, feature_map_width)
        cx_grid, cy_grid = np.meshgrid(cx, cy)
        cx_grid = np.expand_dims(cx_grid, -1)
        cy_grid = np.expand_dims(cy_grid, -1)

        # 创建一个4D模板`(feature_map_height, feature_map_width, n_boxes, 4)`,这里最后一个维度包含`(cx, cy, w, h)`
        boxes_tensor = np.zeros((feature_map_height, feature_map_width, self.n_boxes, 4))

        boxes_tensor[:, :, :, 0] = np.tile(cx_grid, (1, 1, self.n_boxes)) # 设置 cx
        boxes_tensor[:, :, :, 1] = np.tile(cy_grid, (1, 1, self.n_boxes)) # 设置 cy
        boxes_tensor[:, :, :, 2] = wh_list[:, 0] # 设置 w
        boxes_tensor[:, :, :, 3] = wh_list[:, 1] # 设置 h

        # 转换 `(cx, cy, w, h)` 为 `(xmin, xmax, ymin, ymax)`
        boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion='centroids2corners')

        # 进行标准化,使所有值在[0, 1]。
        if self.normalize_coords:
            boxes_tensor[:, :, :, [0, 2]] /= self.img_width
            boxes_tensor[:, :, :, [1, 3]] /= self.img_height

        if self.coords == 'centroids':
            # 转换 `(xmin, ymin, xmax, ymax)` 为 `(cx, cy, w, h)`.
            boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion='corners2centroids', border_pixels='half')
        elif self.coords == 'minmax':
            # 转换 `(xmin, ymin, xmax, ymax)` 为 `(xmin, xmax, ymin, ymax).
            boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion='corners2minmax', border_pixels='half')

        variances_tensor = np.zeros_like(boxes_tensor) # shape为 `(feature_map_height, feature_map_width, n_boxes, 4)`
        variances_tensor += self.variances
        # 现在 `boxes_tensor` 变为形状`(feature_map_height, feature_map_width, n_boxes, 8)`的张量。
        boxes_tensor = np.concatenate((boxes_tensor, variances_tensor), axis=-1)

        boxes_tensor = np.expand_dims(boxes_tensor, axis=0)
        boxes_tensor = K.tile(K.constant(boxes_tensor, dtype='float32'), (K.shape(x)[0], 1, 1, 1, 1))

        return boxes_tensor
Esempio n. 29
0
 def constant(input_batch, size):
     batch_size = K.shape(input_batch)[0]
     return K.tile(K.ones((1, size)), (batch_size, 1))
Esempio n. 30
0
 def msssim(self, y_true, y_pred):
     '''
     Compute multiscale ssim according to Zhao 2016.
     
     Has only been tested with tensorflow backend (channels last) so far!
     
     Uses convolutions to do the calculations in one go.
     
     This function takes proper 2D Keras Tensors (NWHC or NCWH)
     
     # Arguments
         y_true: Keras Tensor with Rank 4: Image to compare to
         y_pred: Keras Tensor with Rank 4: Image to compare
     '''
     # some useful inits
     channels = self.__int_shape(y_pred)[self.channel_dim]
             
     # repeat kernel for each channel
     kernel = K.tile(self.kernels, [1, 1, channels, 1])
     
     # compute means
     mu_true = K.depthwise_conv2d(y_true, kernel, padding='same')
     mu_pred = K.depthwise_conv2d(y_pred, kernel, padding='same')
     
     # compute mean squares
     mu_true_sq = K.square(mu_true)
     mu_pred_sq = K.square(mu_pred)
     mu_true_pred = mu_true * mu_pred
     
     # compute input square
     y_true_sq = K.square(y_true)
     y_pred_sq = K.square(y_pred)
     y_true_pred = y_true * y_pred
     
     # compute variances/covariance
     sigma_true_sq = K.depthwise_conv2d(y_true_sq, kernel, padding='same')
     sigma_pred_sq = K.depthwise_conv2d(y_pred_sq, kernel, padding='same')
     sigma_true_pred = K.depthwise_conv2d(y_true_pred, kernel, padding='same')
     
     # centered squares of variances
     sigma_true_sq -= mu_true_sq
     sigma_pred_sq -= mu_pred_sq
     sigma_true_pred -= mu_true_pred
     
     # compute luminance term (l), select only maximum kernel for each channel
     l = (2 * mu_true_pred + self.c1) / (mu_true_sq + mu_pred_sq + self.c1)
     if self.dim_ordering == 'channels_last':
         l_max = l[:,:,:,(self.num - 1)::self.num]
     else:
         l_max = l[:,(self.num - 1)::self.num,:,:]
             
     # compute contrast-structure term (cs)
     cs = (2 * sigma_true_pred + self.c2) / (sigma_true_sq + sigma_pred_sq +
                                             self.c2)
     
     # compute product of different scale cs
     if self.dim_ordering == 'channels_last':
         pcs = [K.prod(cs[:,:,:,i*self.num:(i+1)*self.num], axis=-1, 
                       keepdims=True) for i in range(channels)]
     else:
         pcs = [K.prod(cs[:,i*self.num:(i+1)*self.num,:,:], axis=1,
                       keepdims=True) for i in range(channels)]
     pcs = K.concatenate(pcs, axis=self.channel_dim)
     
     # compute msssim map
     msssim = l_max * pcs # do normalization?
     return msssim