Example #1
0
 def call(self, input):
     for i in range(self.num_layer):
         if i == 0:
             cross = Lambda(lambda x: Add()([K.sum(self.W[i] * K.batch_dot(K.reshape(x, (-1, self.input_dim, 1)), x), 1, keepdims = True), self.bias[i], x]))(input)
         else:
             cross = Lambda(lambda x: Add()([K.sum(self.W[i] * K.batch_dot(K.reshape(x, (-1, self.input_dim, 1)), input), 1, keepdims = True), self.bias[i], input]))(cross)
     return Flatten()(cross)
Example #2
0
 def content_mean_square_error(self, y_true, y_pred):
     y_pred = K.variable(value=y_pred)
     y_true = K.variable(value=y_true)
     _, filters, x_pos, y_pos = self.get_shape(y_pred)
     y_pred = K.reshape(y_pred, (filters, x_pos * y_pos))
     y_true = K.reshape(y_true, (filters, x_pos * y_pos))
     return K.sum(K.square(y_pred - y_true))
Example #3
0
 def call(self, x):
     #如果只传入Q_seq,K_seq,V_seq,那么就不做Mask
     #如果同时传入Q_seq,K_seq,V_seq,Q_len,V_len,那么对多余部分做Mask
     if len(x) == 3:
         Q_seq,K_seq,V_seq = x
         Q_len,V_len = None,None
     elif len(x) == 5:
         Q_seq,K_seq,V_seq,Q_len,V_len = x
     #对Q、K、V做线性变换
     Q_seq = K.dot(Q_seq, self.WQ)
     Q_seq = K.reshape(Q_seq, (-1, K.shape(Q_seq)[1], self.nb_head, self.size_per_head))
     Q_seq = K.permute_dimensions(Q_seq, (0,2,1,3))
     K_seq = K.dot(K_seq, self.WK)
     K_seq = K.reshape(K_seq, (-1, K.shape(K_seq)[1], self.nb_head, self.size_per_head))
     K_seq = K.permute_dimensions(K_seq, (0,2,1,3))
     V_seq = K.dot(V_seq, self.WV)
     V_seq = K.reshape(V_seq, (-1, K.shape(V_seq)[1], self.nb_head, self.size_per_head))
     V_seq = K.permute_dimensions(V_seq, (0,2,1,3))
     #计算内积,然后mask,然后softmax
     A = K.batch_dot(Q_seq, K_seq, axes=[3,3])
     A = K.permute_dimensions(A, (0,3,2,1))
     A = self.Mask(A, V_len, 'add')
     A = K.permute_dimensions(A, (0,3,2,1))    
     A = K.softmax(A)
     #输出并mask
     O_seq = K.batch_dot(A, V_seq, axes=[3,2])
     O_seq = K.permute_dimensions(O_seq, (0,2,1,3))
     O_seq = K.reshape(O_seq, (-1, K.shape(O_seq)[1], self.output_dim))
     O_seq = self.Mask(O_seq, Q_len, 'mul')
     return O_seq
Example #4
0
 def get_output(self, train=False):
     print "LogNormalizedOccupancy", self.output_shape
     X = self.get_input(train)
     # calculate the log occupancies
     log_occs = theano_calc_log_occs(-X, self.chem_affinity)
     # reshape the output so that the forward and reverse complement 
     # occupancies are viewed as different tracks 
     log_occs = K.reshape(log_occs, (X.shape[0], 1, 2*X.shape[1], X.shape[3]))
     if self.steric_hindrance_win_len == 0:
         log_norm_factor = 0
     else:
         # correct occupancies for overlapping binding sites
         occs = K.exp(log_occs)
         kernel = K.ones((1, 1, 1, 2*self.steric_hindrance_win_len-1), dtype='float32')
         win_occ_sum = K.conv2d(occs, kernel, border_mode='same').sum(axis=2, keepdims=True)
         win_prb_all_unbnd = TT.exp(
             K.conv2d(K.log(1-occs), kernel, border_mode='same')).sum(axis=2, keepdims=True)
         log_norm_factor = TT.log(win_occ_sum + win_prb_all_unbnd)
     #start = max(0, self.steric_hindrance_win_len-1)
     #stop = min(self.output_shape[3], 
     #           self.output_shape[3]-(self.steric_hindrance_win_len-1))
     #rv = log_occs[:,:,:,start:stop] - log_norm_factor
     rv = (log_occs - log_norm_factor)
     return K.reshape(
         rv, 
         (X.shape[0], 2*X.shape[1], 1, X.shape[3])
     )
Example #5
0
 def normalize_inference():
     if needs_broadcasting:
         # In this case we must explicitly broadcast all parameters.
         broadcast_moving_mean = K.reshape(self.moving_mean,
                                           broadcast_shape)
         broadcast_moving_variance = K.reshape(self.moving_variance,
                                               broadcast_shape)
         if self.center:
             broadcast_beta = K.reshape(self.beta, broadcast_shape)
         else:
             broadcast_beta = None
         if self.scale:
             broadcast_gamma = K.reshape(self.gamma,
                                         broadcast_shape)
         else:
             broadcast_gamma = None
         return K.batch_normalization(
             inputs,
             broadcast_moving_mean,
             broadcast_moving_variance,
             broadcast_beta,
             broadcast_gamma,
             epsilon=self.epsilon)
     else:
         return K.batch_normalization(
             inputs,
             self.moving_mean,
             self.moving_variance,
             self.beta,
             self.gamma,
             epsilon=self.epsilon)
def make_patches_grid(x, patch_size, patch_stride):
    '''Break image `x` up into a grid of patches.

    input shape: (channels, rows, cols)
    output shape: (rows, cols, channels, patch_rows, patch_cols)
    '''
    from theano.tensor.nnet.neighbours import images2neibs  # TODO: all K, no T
    x = K.expand_dims(x, 0)
    xs = K.shape(x)
    num_rows = 1 + (xs[-2] - patch_size) // patch_stride
    num_cols = 1 + (xs[-1] - patch_size) // patch_stride
    num_channels = xs[-3]
    patches = images2neibs(
        x, (patch_size, patch_size), (patch_stride, patch_stride),
        mode='valid')
    # neibs are sorted per-channel
    patches = K.reshape(patches,
                        (num_channels, K.shape(patches)[0] // num_channels,
                         patch_size, patch_size))
    patches = K.permute_dimensions(patches, (1, 0, 2, 3))
    # arrange in a 2d-grid (rows, cols, channels, px, py)
    patches = K.reshape(
        patches, (num_rows, num_cols, num_channels, patch_size, patch_size))
    patches_norm = K.sqrt(
        K.sum(K.square(patches), axis=(2, 3, 4), keepdims=True))
    return patches, patches_norm
    def call(self, x, mask=None):

        assert self.built, 'Layer must be built before being called'
        input_shape = K.int_shape(x)

        reduction_axes = list(range(len(input_shape)))
        del reduction_axes[self.axis]
        broadcast_shape = [1] * len(input_shape)
        broadcast_shape[self.axis] = input_shape[self.axis]

        if sorted(reduction_axes) == range(K.ndim(x))[:-1]:
            x_normed = K.batch_normalization(
                x, self.running_mean, self.running_std,
                self.beta, self.gamma,
                epsilon=self.epsilon)
        else:
            # need broadcasting
            broadcast_running_mean = K.reshape(self.running_mean, broadcast_shape)
            broadcast_running_std = K.reshape(self.running_std, broadcast_shape)
            broadcast_beta = K.reshape(self.beta, broadcast_shape)
            broadcast_gamma = K.reshape(self.gamma, broadcast_shape)
            x_normed = K.batch_normalization(
                x, broadcast_running_mean, broadcast_running_std,
                broadcast_beta, broadcast_gamma,
                epsilon=self.epsilon)

        return x_normed
Example #8
0
    def call(self, x, mask=None):
        x = K.permute_dimensions(x, (0, 2, 1))
        x = K.reshape(x, (-1, self.input_length))
        x = K.expand_dims(x, 1)
        x = K.expand_dims(x, -1)
        if self.real_filts is not None:
            conv_out_r = K.conv2d(x, self.W_r, strides=self.subsample,
                                  border_mode=self.border_mode,
                                  dim_ordering='th')
        else:
            conv_out_r = x

        if self.complex_filts is not None:
            conv_out_c1 = K.conv2d(x, self.W_c1, strides=self.subsample,
                                   border_mode=self.border_mode,
                                   dim_ordering='th')
            conv_out_c2 = K.conv2d(x, self.W_c2, strides=self.subsample,
                                   border_mode=self.border_mode,
                                   dim_ordering='th')
            conv_out_c = K.sqrt(K.square(conv_out_c1) + K.square(conv_out_c2) + K.epsilon())
            output = K.concatenate((conv_out_r, conv_out_c), axis=1)
        else:
            output = conv_out_r

        output_shape = self.get_output_shape_for((None, self.input_length, self.input_dim))
        output = K.squeeze(output, 3)  # remove the dummy 3rd dimension
        output = K.permute_dimensions(output, (2, 1, 0))
        output = K.reshape(output, (-1, output_shape[1], output.shape[1]*output.shape[2]))
        return output
Example #9
0
    def get_output(self, train=False):
        print "Input Shape", self.input_shape
        print "ConvolutionDNASequenceBinding", self.output_shape
        X = self.get_input(train)
        if self.use_three_base_encoding:
            X_fwd = X[:,1:,:,:]
            X_rc = X[:,:3,:,:]
        else:
            X_fwd = X
            X_rc = X

        print self.W
        print self.b
        if self.W[1] is not None:
            W = self.W[0][self.W[1],:,:,:]
        else:
            W = self.W[0]
        if self.b[1] is not None:
            b = self.b[0][self.b[1]]
        else:
            b = self.b[0]
        
        fwd_rv = K.conv2d(X_fwd, W, border_mode='valid') \
                 + K.reshape(b, (1, self.nb_motifs, 1, 1))
        rc_rv = K.conv2d(X_rc, W[:,::-1,:,::-1], border_mode='valid') \
                + K.reshape(b, (1, self.nb_motifs, 1, 1))
        rv = K.concatenate((fwd_rv, rc_rv), axis=2)            
        #return rv.dimshuffle((0,3,2,1))
        return rv # K.permute_dimensions(rv, (0,3,2,1))
Example #10
0
def time_distributed_dense(x, w, b=None, dropout=None,
                           input_dim=None, output_dim=None, timesteps=None, activation='linear'):
    '''Apply y.w + b for every temporal slice y of x.
    '''
    activation = activations.get(activation)

    if not input_dim:
        # won't work with TensorFlow
        input_dim = K.shape(x)[2]
    if not timesteps:
        # won't work with TensorFlow
        timesteps = K.shape(x)[1]
    if not output_dim:
        # won't work with TensorFlow
        output_dim = K.shape(w)[1]

    if dropout is not None and 0. < dropout < 1.:
        # apply the same dropout pattern at every timestep
        ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim)))
        dropout_matrix = K.dropout(ones, dropout)
        expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps)
        x = K.in_train_phase(x * expanded_dropout_matrix, x)

    # collapse time dimension and batch dimension together
    x = K.reshape(x, (-1, input_dim))

    x = K.dot(x, w)
    if b:
        x = x + b
    # reshape to 3D tensor
    x = K.reshape(activation(x), (-1, timesteps, output_dim))
    return x
Example #11
0
 def call(self, position):
     inputDim = K.ndim(position)
     positionShape = K.shape(position)
     targetDim = positionShape[-1]
     position = K.reshape(position, (-1, targetDim))
     samples = K.shape(position)[0]
     theta = THT.zeros((samples, 3, 3))
     
     chw = self.toChw(position)
     chw = K.reshape(chw, (samples, targetDim))
     dx = -self.distortion + 2.0 * self.distortion * self.srng.uniform((samples,)) 
     dy = -self.distortion + 2.0 * self.distortion * self.srng.uniform((samples,))
     cX = chw[:, 0] + dx
     cY = chw[:, 1] + dy
     h = K.maximum(chw[:, 2] * (1.0 + self.context), self.minSide)
     w = K.maximum(chw[:, 3] * (1.0 + self.context), self.minSide)
     
     # Calculating the parameters of the transformation
     tx = cX
     ty = cY
     sx = w / 2.0 # Scale x
     sy = h / 2.0 # Scale y
     
     # Setting transformation
     theta = THT.set_subtensor(theta[:, 0, 0], sx)
     theta = THT.set_subtensor(theta[:, 1, 1], sy)
     theta = THT.set_subtensor(theta[:, 0, 2], tx)
     theta = THT.set_subtensor(theta[:, 1, 2], ty)
     theta = THT.set_subtensor(theta[:, 2, 2], 1.0)
     
     thetaShape = K.concatenate([positionShape[:-1], K.shape(theta)[-2:]])
     theta = THT.reshape(theta, thetaShape, ndim=inputDim + 1)
     
     return theta
    def call(self, inputs):

        input_shape = K.int_shape(inputs)
        if len(input_shape) != 4:
            raise ValueError('Inputs should have rank ' +
                             str(4) +
                             '; Received input shape:', str(input_shape))

        if self.data_format == 'channels_first':
            batch_size, c, h, w = input_shape
            if batch_size is None:
                batch_size = -1
            rh, rw = self.size
            oh, ow = h * rh, w * rw
            oc = c // (rh * rw)

            out = K.reshape(inputs, (batch_size, rh, rw, oc, h, w))
            out = K.permute_dimensions(out, (0, 3, 4, 1, 5, 2))
            out = K.reshape(out, (batch_size, oc, oh, ow))
            return out

        elif self.data_format == 'channels_last':
            batch_size, h, w, c = input_shape
            if batch_size is None:
                batch_size = -1
            rh, rw = self.size
            oh, ow = h * rh, w * rw
            oc = c // (rh * rw)

            out = K.reshape(inputs, (batch_size, h, w, rh, rw, oc))
            out = K.permute_dimensions(out, (0, 1, 3, 2, 4, 5))
            out = K.reshape(out, (batch_size, oh, ow, oc))
            return out
Example #13
0
 def call(self, X):
     if type(X) is not list or len(X) != 2:
         raise Exception("SquareAttention must be called on a list of two tensors. Got: " + str(X))
         
     frame, position  = X[0], X[1]
     
     # Reshaping the input to exclude the time dimension
     frameShape = K.shape(frame)
     positionShape = K.shape(position)
     (chans, height, width) = frameShape[-3:]
     targetDim = positionShape[-1]
     frame = K.reshape(frame, (-1, chans, height, width))
     position = K.reshape(position, (-1, ) + (targetDim, ))
     
     # Applying the attention
     hw = THT.abs_(position[:, 2] - position[:, 0]) * self.scale / 2.0
     hh = THT.abs_(position[:, 3] - position[:, 1]) * self.scale / 2.0
     position = THT.maximum(THT.set_subtensor(position[:, 0], position[:, 0] - hw), -1.0)
     position = THT.minimum(THT.set_subtensor(position[:, 2], position[:, 2] + hw), 1.0)
     position = THT.maximum(THT.set_subtensor(position[:, 1], position[:, 1] - hh), -1.0)
     position = THT.minimum(THT.set_subtensor(position[:, 3], position[:, 3] + hh), 1.0)
     rX = Data.linspace(-1.0, 1.0, width)
     rY = Data.linspace(-1.0, 1.0, height)
     FX = THT.gt(rX, position[:,0].dimshuffle(0,'x')) * THT.le(rX, position[:,2].dimshuffle(0,'x'))
     FY = THT.gt(rY, position[:,1].dimshuffle(0,'x')) * THT.le(rY, position[:,3].dimshuffle(0,'x'))
     m = FY.dimshuffle(0, 1, 'x') * FX.dimshuffle(0, 'x', 1)
     m = m + self.alpha - THT.gt(m, 0.) * self.alpha
     frame = frame * m.dimshuffle(0, 'x', 1, 2)
     
     # Reshaping the frame to include time dimension
     output = K.reshape(frame, frameShape)
     
     return output
Example #14
0
 def call(self, X):
     if type(X) is not list or len(X) != 2:
         raise Exception("GaussianAttention must be called on a list of two tensors. Got: " + str(X))
     
     frame, position  = X[0], X[1]
     
     # Reshaping the input to exclude the time dimension
     frameShape = K.shape(frame)
     positionShape = K.shape(position)
     (chans, height, width) = frameShape[-3:]
     targetDim = positionShape[-1]
     frame = K.reshape(frame, (-1, chans, height, width))
     position = K.reshape(position, (-1, ) + (targetDim, ))
     
     cx = (position[:, 0] + position[:, 2]) / 2.0
     cy = (position[:, 1] + position[:, 3]) / 2.0
     sx = (position[:, 2] - cx) * 0.60
     sy = (position[:, 3] - cy) * 0.60
     rX = Data.linspace(-1.0, 1.0, width)
     rY = Data.linspace(-1.0, 1.0, height)
     FX = K.exp(-(rX - cx.dimshuffle(0, 'x')) ** 2 / (2.0 * (sx.dimshuffle(0, 'x') ** 2 + self.epsilon)))
     FY = K.exp(-(rY - cy.dimshuffle(0, 'x')) ** 2 / (2.0 * (sy.dimshuffle(0, 'x') ** 2 + self.epsilon)))
     m = (FY.dimshuffle(0, 1, 'x') * FX.dimshuffle(0, 'x', 1))
     m = m + self.alpha
     m = m - K.greater(m, 1.0) * (m - 1.0)
     
     frame = frame * m.dimshuffle(0, 'x', 1, 2)
     
     # Reshaping the frame to include time dimension
     output = K.reshape(frame, frameShape)
     
     return output
def get_model(inputdim, outputdim, regularization_strength=0.01, lr=0.000, cosine=False, **kwargs):
    transformation = Dense(inputdim, init='identity',
                           W_constraint=Orthogonal())

    model = Graph()
    model.add_input(name='embeddings1', input_shape=(inputdim,))
    model.add_input(name='embeddings2', input_shape=(inputdim,))
    model.add_shared_node(transformation, name='transformation',
                          inputs=['embeddings1', 'embeddings2'],
                          outputs=['transformed1', 'transformed2'])
    model.add_node(Lambda(lambda x: x[:, :outputdim]), input='transformed1', name='projected1')
    model.add_node(Lambda(lambda x: -x[:, :outputdim]), input='transformed2', name='negprojected2')

    if cosine:
        model.add_node(Lambda(lambda x:  x / K.reshape(K.sqrt(K.sum(x * x, axis=1)), (x.shape[0], 1))),
                       name='normalized1', input='projected1')
        model.add_node(Lambda(lambda x:  x / K.reshape(K.sqrt(K.sum(x * x, axis=1)), (x.shape[0], 1))),
                       name='negnormalized2', input='negprojected2')
        model.add_node(Lambda(lambda x: K.reshape(K.sum(x, axis=1), (x.shape[0], 1))),
                       name='distances', inputs=['normalized1', 'negnormalized2'], merge_mode='mul')
    else:
        model.add_node(Lambda(lambda x: K.reshape(K.sqrt(K.sum(x * x, axis=1)), (x.shape[0], 1))),
                       name='distances', inputs=['projected1', 'negprojected2'], merge_mode='sum')

    model.add_output(name='y', input='distances')
    model.compile(loss={'y': lambda y, d: K.mean(y * d)}, optimizer=SimpleSGD())
    return model
Example #16
0
    def call(self, x, mask=None):
        input_shape = self.input_spec[0].shape
        broadcast_shape = [1] * len(input_shape)
        broadcast_shape[self.axis] = input_shape[self.axis]

        out = K.reshape(self.gamma, broadcast_shape) * x + K.reshape(self.beta, broadcast_shape)
        return out
Example #17
0
def image_categorical_crossentropy(output, target, from_logits=False):
    output = T.clip(output, _EPSILON, 1.0 - _EPSILON)
    output_ = K.reshape(output, (-1, 256))
    target_ = K.reshape(target, (-1, 256))
    out = T.nnet.categorical_crossentropy(output_, target_)
    out = K.reshape(out,(K.shape(output)[0],-1))
    return T.mean(T.mean(out, axis=1))
    def call(self, x, mask=None):

        input_shape = K.shape(x)

        if self.dim_ordering == 'th':
            num_rows = input_shape[2]
            num_cols = input_shape[3]
        elif self.dim_ordering == 'tf':
            num_rows = input_shape[1]
            num_cols = input_shape[2]

        row_length = [K.cast(num_rows, 'float32') / i for i in self.pool_list]
        col_length = [K.cast(num_cols, 'float32') / i for i in self.pool_list]

        outputs = []

        if self.dim_ordering == 'th':
            for pool_num, num_pool_regions in enumerate(self.pool_list):
                for ix in range(num_pool_regions):
                    for jy in range(num_pool_regions):
                        x1 = ix * col_length[pool_num]
                        x2 = ix * col_length[pool_num] + col_length[pool_num]
                        y1 = jy * row_length[pool_num]
                        y2 = jy * row_length[pool_num] + row_length[pool_num]

                        x1 = K.cast(K.round(x1), 'int32')
                        x2 = K.cast(K.round(x2), 'int32')
                        y1 = K.cast(K.round(y1), 'int32')
                        y2 = K.cast(K.round(y2), 'int32')

                        new_shape = [input_shape[0], input_shape[1],
                                     y2 - y1, x2 - x1]
                        x_crop = x[:, :, y1:y2, x1:x2]
                        xm = K.reshape(x_crop, new_shape)
                        pooled_val = K.max(xm, axis=(2, 3))
                        outputs.append(pooled_val)

        elif self.dim_ordering == 'tf':
            for pool_num, num_pool_regions in enumerate(self.pool_list):
                for ix in range(num_pool_regions):
                    for jy in range(num_pool_regions):
                        x1 = ix * col_length[pool_num]
                        x2 = ix * col_length[pool_num] + col_length[pool_num]
                        y1 = jy * row_length[pool_num]
                        y2 = jy * row_length[pool_num] + row_length[pool_num]

                        x1 = K.cast(K.round(x1), 'int32')
                        x2 = K.cast(K.round(x2), 'int32')
                        y1 = K.cast(K.round(y1), 'int32')
                        y2 = K.cast(K.round(y2), 'int32')

                        new_shape = [input_shape[0], y2 - y1,
                                     x2 - x1, input_shape[3]]
                        x_crop = x[:, y1:y2, x1:x2, :]
                        xm = K.reshape(x_crop, new_shape)
                        pooled_val = K.max(xm, axis=(1, 2))
                        outputs.append(pooled_val)

        outputs = K.concatenate(outputs)
        return outputs
def _transform(theta, input, downsample_factor):
    num_batch, num_channels, height, width = input.shape
    theta = K.reshape(theta, (-1, 2, 3))

    # grid of (x_t, y_t, 1), eq (1) in ref [2]
    height_f = K.cast(height, 'float32')
    width_f = K.cast(width, 'float32')
    out_height = K.cast(height_f // downsample_factor, 'int64')
    out_width = K.cast(width_f // downsample_factor, 'int64')
    grid = _meshgrid(out_height, out_width)

    # Transform A x (x_t, y_t, 1)^T -> (x_s, y_s)
    T_g = K.dot(theta, grid)
    x_s, y_s = T_g[:, 0], T_g[:, 1]
    x_s_flat = x_s.flatten()
    y_s_flat = y_s.flatten()

    # dimshuffle input to  (bs, height, width, channels)
    #input_dim = input.dimshuffle(0, 2, 3, 1)
    input_dim = input.transpose(0, 2, 3, 1)
    input_transformed = _interpolate(
        input_dim, x_s_flat, y_s_flat,
        downsample_factor)

    output = K.reshape(input_transformed,
                       (num_batch, out_height, out_width, num_channels))
    output = output.transpose(0, 3, 1, 2)
    return output
Example #20
0
    def step(self, x, states):
        h_tm1 = states[0]
        c_tm1 = states[1]

        x_i = K.conv2d(x, self.W_i, border_mode="same")
        x_f = K.conv2d(x, self.W_f, border_mode="same")
        x_c = K.conv2d(x, self.W_c, border_mode="same")
        x_o = K.conv2d(x, self.W_o, border_mode="same")

        h_i = K.conv2d(h_tm1, self.U_i, border_mode="same")
        h_f = K.conv2d(h_tm1, self.U_f, border_mode="same")
        h_c = K.conv2d(h_tm1, self.U_c, border_mode="same")
        h_o = K.conv2d(h_tm1, self.U_o, border_mode="same")

        c_i = self.C_i * c_tm1
        c_f = self.C_f * c_tm1
        c_o = self.C_o * c_tm1

        b_i = K.reshape(self.b_i, (1, -1, 1, 1))
        b_f = K.reshape(self.b_f, (1, -1, 1, 1))
        b_c = K.reshape(self.b_c, (1, -1, 1, 1))
        b_o = K.reshape(self.b_o, (1, -1, 1, 1))

        i = self.inner_activation(x_i + h_i + c_i + b_i)
        f = self.inner_activation(x_f + h_f + c_f + b_f)
        c = f * c_tm1 + i * self.activation(x_c + h_c + b_c)
        o = self.inner_activation(x_o + h_o + c_o + b_o)
        h = o * self.activation(c)

        return h, [h, c]
    def call(self, x, mask=None):
        # eij = K.dot(x, self.W) TF backend doesn't support it

        # features_dim = self.W.shape[0]
        # step_dim = x._keras_shape[1]

        features_dim = self.features_dim
        step_dim = self.step_dim

        eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))), (-1, step_dim))

        if self.bias:
            eij += self.b

        eij = K.tanh(eij)

        a = K.exp(eij)

        # apply mask after the exp. will be re-normalized next
        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            a *= K.cast(mask, K.floatx())

        # in some cases especially in the early stages of training the sum may be almost zero
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
        # print weigthted_input.shape
        return K.sum(weighted_input, axis=1)
Example #22
0
def yolo_head(feats, anchors, num_classes, input_shape):
    """Convert final layer features to bounding box parameters."""
    num_anchors = len(anchors)
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])

    grid_shape = K.shape(feats)[1:3] # height, width
    grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
        [1, grid_shape[1], 1, 1])
    grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
        [grid_shape[0], 1, 1, 1])
    grid = K.concatenate([grid_x, grid_y])
    grid = K.cast(grid, K.dtype(feats))

    feats = K.reshape(
        feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])

    box_xy = K.sigmoid(feats[..., :2])
    box_wh = K.exp(feats[..., 2:4])
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.sigmoid(feats[..., 5:])

    # Adjust preditions to each spatial grid point and anchor size.
    box_xy = (box_xy + grid) / K.cast(grid_shape[::-1], K.dtype(feats))
    box_wh = box_wh * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats))

    return box_xy, box_wh, box_confidence, box_class_probs
Example #23
0
    def simple_context(X, mask, n=activation_rnn_size):
        """Reduce the input just to its headline part (second half).

        For each word in this part it concatenate the output of the previous layer (RNN)
        with a weighted average of the outputs of the description part.
        In this only the last `rnn_size - activation_rnn_size` are used from each output.
        The first `activation_rnn_size` output is used to computer the weights for the averaging.
        """
        desc, head = X[:, :maxlend, :], X[:, maxlend:, :]
        head_activations, head_words = head[:, :, :n], head[:, :, n:]
        desc_activations, desc_words = desc[:, :, :n], desc[:, :, n:]

        # RTFM http://deeplearning.net/software/theano/library/tensor/basic.html#theano.tensor.batched_tensordot
        # activation for every head word and every desc word
        activation_energies = K.batch_dot(head_activations, desc_activations, axes=(2, 2))
        # make sure we dont use description words that are masked out
        activation_energies = activation_energies + -1e20 * K.expand_dims(
            1. - K.cast(mask[:, :maxlend], 'float32'), 1)

        # for every head word compute weights for every desc word
        activation_energies = K.reshape(activation_energies, (-1, maxlend))
        activation_weights = K.softmax(activation_energies)
        activation_weights = K.reshape(activation_weights, (-1, maxlenh, maxlend))

        # for every head word compute weighted average of desc words
        desc_avg_word = K.batch_dot(activation_weights, desc_words, axes=(2, 1))
        return K.concatenate((desc_avg_word, head_words))
Example #24
0
    def call(self, x, mask=None):
        if hasattr(x, '_keras_shape'):
            input_shape = x._keras_shape
        else:
            input_shape = self._input_shape
        #import pdb
        #pdb.set_trace()
        #if self.last_two is not None:
        #    last2 = self.last_two
        #else:
        #    input_shape = x._keras_shape
        #    last2 = input_shape[-2:]
        #out_shape = K.shape(x)[:-2]

        x = K.reshape(x, (-1,) + input_shape[-2:]) # (batch * d1 * ... * dn-2, dn-1, dn)
        if mask is not None:
            mask_shape = (K.shape(x)[0], -1)
            mask = K.reshape(mask, mask_shape) # give it the same first dim
        y = self.layer.call(x, mask)
        #try:
        #output_shape = self.get_output_shape_for(K.shape(x))
        #except:
        output_shape =  self.get_output_shape_for(input_shape)
        #import pdb
        #pdb.set_trace()
        return K.cast(K.reshape(y, output_shape), K.floatx()) 
Example #25
0
    def get_output(self, train=False):
        def format_shape(shape):
            if K._BACKEND == 'tensorflow':
                def trf(x):
                    try:
                        return int(x)
                    except TypeError:
                        return x

                return map(trf, shape)
            return shape

        X = self.get_input(train)

        in_shape = format_shape(K.shape(X))
        batch_flatten_len = K.prod(in_shape[:2])
        cast_in_shape = (batch_flatten_len, ) + tuple(in_shape[i] for i in range(2, K.ndim(X)))
        
        pre_outs = self.layer(K.reshape(X, cast_in_shape))
        
        out_shape = format_shape(K.shape(pre_outs))
        cast_out_shape = (in_shape[0], in_shape[1]) + tuple(out_shape[i] for i in range(1, K.ndim(pre_outs)))
        
        outputs = K.reshape(pre_outs, cast_out_shape)
        return outputs
def euclidDist( inputs ):
	assert len( inputs ) == 2, "euclidDist requires 2 inputs"
	l1 = inputs[ 0 ]
	l2 = inputs[ 1 ]
	x = l1 - l2
	output = K.batch_dot( x, x, axes = 1 )
	K.reshape( output, (1,) )
	return output
Example #27
0
 def gram_matrix_mean_squared_error(self, y_true, y_pred):
     y_pred = K.variable(value=y_pred)
     y_true = K.variable(value=y_true)
     _, filters, x_pos, y_pos = self.get_shape(y_pred)
     denominator = K.variable(value=(2 * filters * (x_pos * y_pos)) ** 2)
     y_pred = K.reshape(y_pred, (filters, x_pos * y_pos))
     y_true = K.reshape(y_true, (filters, x_pos * y_pos))
     return K.square(self.gram_matrix(y_pred) - self.gram_matrix(y_true)) / denominator
Example #28
0
 def get_output(self, train=False):
     X = self.get_input()
     batch_size, time_len = X.shape[:2]
     X = X.flatten(ndim=2)  # (sample*time, dim)
     X = K.reshape(X, self.reshape_dim)  # (sample*time, dim1, dim2, ...)
     Y = apply_model(self.model, X)
     Y = K.reshape(Y, (batch_size, time_len, -1))  # (sample, time, dim_out)
     return Y
Example #29
0
 def call(self, x, mask=None):
     input_shape = self.input_spec[0].shape
     x = K.reshape(x, (-1,) + input_shape[-1:]) # (batch * d1 * ... * dn-2*dn-1, dn)
     mask_shape = (K.shape(x)[0], -1)
     mask = K.reshape(mask, mask_shape) # give it the same first dim
     y = self.layer.call(x, mask)
     output_shape = self.get_output_shape_for(input_shape)
     return K.reshape(y, output_shape)
Example #30
0
def max_margin2(y_true, y_pred):
    # assumes the samples are interleaved positive and corrupt (p, c, p, c, ...)
    v = - y_pred * y_true + y_pred * (1.0 - y_true) # (-p, c, -p, c,...)
    v = K.reshape(v, (2, 64)) # ([-p, c], [-p, c],...)
    v = 1. + K.sum(v, axis=0) # (1 - p + c, 1- p + c,...)
    v = K.reshape(v, (64,))
    v = K.maximum(0., v) # (max(0, 1 - p + c), max(0, 1 - p + c), ...)
    return K.sum(v)
Example #31
0
 def add_dim(tensor):
     """Add a dimension to tensors that don't have any."""
     if K.int_shape(tensor) == ():
         return KL.Lambda(lambda t: K.reshape(t, [1, 1]))(tensor)
     return tensor
    def call(self, inputs):

        q, k, v = inputs[:3]

        v_mask, q_mask = None, None

        # 这里的mask.shape=[batch_size, seq_len]或[batch_size, seq_len, 1]

        if len(inputs) > 3:

            v_mask = inputs[3]

            if len(inputs) > 4:

                q_mask = inputs[4]

        # 线性变换

        qw = self.reuse(self.q_dense, q)

        kw = self.reuse(self.k_dense, k)

        vw = self.reuse(self.v_dense, v)

        # 形状变换

        qw = K.reshape(qw, (-1, K.shape(qw)[1], self.heads, self.key_size))

        kw = K.reshape(kw, (-1, K.shape(kw)[1], self.heads, self.key_size))

        vw = K.reshape(vw,
                       (-1, K.shape(vw)[1], self.heads, self.size_per_head))

        # 维度置换

        qw = K.permute_dimensions(qw, (0, 2, 1, 3))

        kw = K.permute_dimensions(kw, (0, 2, 1, 3))

        vw = K.permute_dimensions(vw, (0, 2, 1, 3))

        # Attention

        a = K.batch_dot(qw, kw, [3, 3]) / self.key_size**0.5

        a = K.permute_dimensions(a, (0, 3, 2, 1))

        a = to_mask(a, v_mask, 'add')

        a = K.permute_dimensions(a, (0, 3, 2, 1))

        if (self.mask_right is not False) or (self.mask_right is not None):

            if self.mask_right is True:

                ones = K.ones_like(a[:1, :1])

                mask = (ones - K.tf.matrix_band_part(ones, -1, 0)) * 1e10

                a = a - mask

            else:

                # 这种情况下,mask_right是外部传入的0/1矩阵,shape=[q_len, k_len]

                mask = (1 - K.constant(self.mask_right)) * 1e10

                mask = K.expand_dims(K.expand_dims(mask, 0), 0)

                self.mask = mask

                a = a - mask

        a = K.softmax(a)

        self.a = a

        # 完成输出

        o = K.batch_dot(a, vw, [3, 2])

        o = K.permute_dimensions(o, (0, 2, 1, 3))

        o = K.reshape(o, (-1, K.shape(o)[1], self.out_dim))

        o = to_mask(o, q_mask, 'mul')

        return o
Example #33
0
    def local_conv3d(self, inputs, kernel, kernel_size, strides, output_shape, data_format=None):
        """Apply 3D conv with un-shared weights.
        # Arguments
            inputs: 4D tensor with shape:
                    (batch_size, filters, new_rows, new_cols)
                    if data_format='channels_first'
                    or 4D tensor with shape:
                    (batch_size, new_rows, new_cols, filters)
                    if data_format='channels_last'.
            kernel: the unshared weight for convolution,
                    with shape (output_items, feature_dim, filters)
            kernel_size: a tuple of 2 integers, specifying the
                        width and height of the 3D convolution window.
            strides: a tuple of 2 integers, specifying the strides
                    of the convolution along the width and height.
            output_shape: a tuple with (output_row, output_col)
            data_format: the data format, channels_first or channels_last
        # Returns
            A 4d tensor with shape:
            (batch_size, filters, new_rows, new_cols)
            if data_format='channels_first'
            or 4D tensor with shape:
            (batch_size, new_rows, new_cols, filters)
            if data_format='channels_last'.
        # Raises
            ValueError: if `data_format` is neither
                        `channels_last` or `channels_first`.
        """
        if data_format is None:
            data_format = K.image_data_format()
        if data_format not in {'channels_first', 'channels_last'}:
            raise ValueError('Unknown data_format: ' + str(data_format))

        stride_row, stride_col, stride_z = strides
        output_row, output_col, output_z = output_shape
        kernel_shape = K.int_shape(kernel)
        _, feature_dim, filters = kernel_shape

        xs = []
        for i in range(output_row):
            for j in range(output_col):
                for k in range(output_z):
                    slice_row = slice(i * stride_row,
                                    i * stride_row + kernel_size[0])
                    slice_col = slice(j * stride_col,
                                    j * stride_col + kernel_size[1])
                    slice_z = slice(k * stride_z,
                                    k * stride_z + kernel_size[2])
                    if data_format == 'channels_first':
                        xs.append(K.reshape(inputs[:, :, slice_row, slice_col, slice_z],
                                        (1, -1, feature_dim)))
                    else:
                        xs.append(K.reshape(inputs[:, slice_row, slice_col, slice_z, :],
                                        (1, -1, feature_dim)))

        x_aggregate = K.concatenate(xs, axis=0)
        output = K.batch_dot(x_aggregate, kernel)
        output = K.reshape(output,
                        (output_row, output_col, output_z, -1, filters))

        if data_format == 'channels_first':
            output = K.permute_dimensions(output, (3, 4, 0, 1, 2))
        else:
            output = K.permute_dimensions(output, (3, 0, 1, 2, 4))
        return output
Example #34
0
    def __call__(self, y_sing_pred):

        anchors = np.reshape(
            self.config["constants"]["anchors"],
            [1, 1, 1,
             len(self.config["constants"]["anchors"]) // 2, 2])

        # need to convert b's from GRID_SIZE units into IMG coords. Divide by grid here.
        b_xy = (K.sigmoid(y_sing_pred[..., 0:2]) +
                self.c_grid[0]) / self.config["model"]["grid_size"]
        b_wh = (K.exp(y_sing_pred[..., 2:4]) *
                anchors[0]) / self.config["model"]["grid_size"]
        b_xy1 = b_xy - b_wh / 2.
        b_xy2 = b_xy + b_wh / 2.
        boxes = K.concatenate([b_xy1, b_xy2], axis=-1)

        # filter out scores below detection threshold
        scores_all = K.sigmoid(y_sing_pred[..., 4:5]) * K.softmax(
            y_sing_pred[..., 5:])
        indicator_detection = scores_all > self.detection_threshold
        scores_all = scores_all * K.cast(indicator_detection, np.float32)

        # compute detected classes and scores
        classes = K.argmax(scores_all, axis=-1)
        scores = K.max(scores_all, axis=-1)

        # flattened tensor length
        S2B = self.config["model"]["grid_size"] * self.config["model"][
            "grid_size"] * len(self.config["constants"]["anchors"]) // 2

        # flatten boxes, scores for NMS
        flatten_boxes = K.reshape(boxes, shape=(S2B, 4))
        flatten_scores = K.reshape(scores, shape=(S2B, ))
        flatten_classes = K.reshape(classes, shape=(S2B, ))

        inds = []

        # apply multiclass NMS
        for c in range(self.num_classes):

            # only include boxes of the current class, with > 0 confidence
            class_mask = K.cast(K.equal(flatten_classes, c), np.float32)
            score_mask = K.cast(flatten_scores > 0, np.float32)
            mask = class_mask * score_mask

            # compute class NMS
            nms_inds = tf.image.non_max_suppression(
                flatten_boxes,
                flatten_scores * mask,
                max_output_size=self.max_boxes,
                iou_threshold=self.nms_threshold,
                score_threshold=0.)

            inds.append(nms_inds)

        # combine winning box indices of all classes
        selected_indices = K.concatenate(inds, axis=-1)

        # gather corresponding boxes, scores, class indices
        selected_boxes = K.gather(flatten_boxes, selected_indices)
        selected_scores = K.gather(flatten_scores, selected_indices)
        selected_classes = K.gather(flatten_classes, selected_indices)

        return process_outs(selected_boxes, selected_scores,
                            K.cast(selected_classes, np.float32))
Example #35
0
    def attention(self,
                  pre_q,
                  pre_v,
                  pre_k,
                  out_seq_len: int,
                  d_input: int,
                  lengths=None,
                  training=None):
        """
        Calculates the output of the attention once the affine transformations
        of the inputs are done. Here's the shapes of the arguments:
        :param pre_q: (batch_size, q_seq_len, num_heads, d_model // num_heads)
        :param pre_v: (batch_size, v_seq_len, num_heads, d_model // num_heads)
        :param pre_k: (batch_size, k_seq_len, num_heads, d_model // num_heads)
        :param out_seq_len: the length of the output sequence
        :param d_model: dimensionality of the model (by the paper)
        :param training: Passed by Keras. Should not be defined manually.
          Optional scalar tensor indicating if we're in training
          or inference phase.
        """
        # shaping Q and V into (batch_size, num_heads, seq_len, d_model//heads)
        q = K.permute_dimensions(pre_q, [0, 2, 1, 3])
        v = K.permute_dimensions(pre_v, [0, 2, 1, 3])

        if self.compression_window_size is None:
            k_transposed = K.permute_dimensions(pre_k, [0, 2, 3, 1])
        else:
            # Memory-compressed attention described in paper
            # "Generating Wikipedia by Summarizing Long Sequences"
            # (https://arxiv.org/pdf/1801.10198.pdf)
            # It compresses keys and values using 1D-convolution which reduces
            # the size of Q * K_transposed from roughly seq_len^2
            # to convoluted_seq_len^2. If we use strided convolution with
            # window size = 3 and stride = 3, memory requirements of such
            # memory-compressed attention will be 9 times smaller than
            # that of the original version.
            if self.use_masking:
                raise NotImplementedError(
                    "Masked memory-compressed attention has not "
                    "been implemented yet")
            k = K.permute_dimensions(pre_k, [0, 2, 1, 3])
            k, v = [
                K.reshape(
                    # Step 3: Return the result to its original dimensions
                    # (batch_size, num_heads, seq_len, d_model//heads)
                    K.bias_add(
                        # Step 3: ... and add bias
                        K.conv1d(
                            # Step 2: we "compress" K and V using strided conv
                            K.reshape(
                                # Step 1: we reshape K and V to
                                # (batch + num_heads,  seq_len, d_model//heads)
                                item,
                                (-1, K.int_shape(item)[-2],
                                 self.d_model // self.num_heads)),
                            kernel,
                            strides=self.compression_window_size,
                            padding='valid',
                            data_format='channels_last'),
                        bias,
                        data_format='channels_last'),
                    # new shape
                    K.concatenate([
                        K.shape(item)[:2],
                        #[-1, d_model // self.num_heads]]))
                        [
                            K.int_shape(item)[2] //
                            self.compression_window_size,
                            self.d_model // self.num_heads
                        ]
                    ])) for item, kernel, bias in ((k, self.k_conv_kernel,
                                                    self.k_conv_bias),
                                                   (v, self.v_conv_kernel,
                                                    self.v_conv_bias))
            ]
            k_transposed = K.permute_dimensions(k, [0, 1, 3, 2])
        # shaping K into (batch_size, num_heads, d_model//heads, seq_len)
        # for further matrix multiplication
        sqrt_d = K.constant(np.sqrt(self.d_model // self.num_heads),
                            dtype=K.floatx())
        q_shape = K.int_shape(q)
        k_t_shape = K.int_shape(k_transposed)
        v_shape = K.int_shape(v)
        # before performing batch_dot all tensors are being converted to 3D
        # shape (batch_size * num_heads, rows, cols) to make sure batch_dot
        # performs identically on all backends
        attention_heads = K.reshape(
            K.batch_dot(
                self.apply_dropout_if_needed(K.softmax(
                    self.mask_length_if_provided(self.mask_local_if_needed(
                        self.mask_attention_if_needed(
                            K.batch_dot(
                                K.reshape(q, (-1, ) + q_shape[-2:]),
                                K.reshape(k_transposed,
                                          (-1, ) + k_t_shape[-2:])) / sqrt_d)),
                                                 lengths=lengths)),
                                             training=training),
                K.reshape(v, (-1, ) + v_shape[-2:])),
            (-1, self.num_heads, q_shape[-2], v_shape[-1]))
        attention_heads_merged = K.reshape(
            K.permute_dimensions(attention_heads, [0, 2, 1, 3]),
            (-1, self.d_model))
        attention_out = K.reshape(
            K.dot(attention_heads_merged, self.output_weights),
            (-1, out_seq_len, d_input))
        return attention_out
Example #36
0
 def  one_hot(self, seq, num_classes):
     import theano.tensor as T
     return K.equal(K.reshape(seq, (-1, 1)), T.arange(num_classes))
    def build_model(self):
        sentence = Concatenate()([
            self.sen_embedding,
            # self.sen_entity_type_embedding,
            self.position_t_embedding,
            self.position_a_embedding
        ])

        sentence = Bidirectional(
            GRU(300,
                activation="relu",
                return_sequences=True,
                recurrent_dropout=0.3,
                dropout=0.3))(sentence)

        average_layer = Lambda(average, output_shape=no_change)
        position_mt = average_layer(self.position_mt)
        position_ma = average_layer(self.position_ma)

        trigger = Dot(axes=[1, 1])([sentence, position_mt])
        entity = Dot(axes=[1, 1])([sentence, position_ma])

        triggers = Lambda(liter,
                          output_shape=liter_output_shape,
                          arguments={'length':
                                     self.max_len})(trigger)  # (?, 125, 300)
        entities = Lambda(liter,
                          output_shape=liter_output_shape,
                          arguments={'length':
                                     self.max_len})(entity)  # (?, 125, 300)

        # ----------------- trigger attention ------------------------
        x1 = Concatenate()([triggers, entities, sentence])  # (?, 125, 900)
        x1 = Dense(300, activation='tanh')(x1)  # (?, 82, 600)
        x1 = Dense(1)(x1)  # (?, 125, 1)
        x1 = Lambda(reduce_dimension,
                    output_shape=reduce_dimension_output_shape,
                    arguments={'length': self.max_len},
                    mask=self.sentence_embedding_layer.get_output_mask_at(0),
                    name='te_attention')(x1)  # (?, 125)
        x1 = Lambda(attention,
                    output_shape=attention_output_shape,
                    arguments={'dim': 600})([x1, sentence])  # (?, 600)
        # -----------------------------------------------------------

        x_layer = Lambda(
            lambda x: K.reshape(x, [-1, self.TRIGGER_TYPE_VEC_DIM]),
            output_shape=output_shape)
        trigger_type = x_layer(self.trigger_type_embedding)
        entity_type = x_layer(self.entity_type_embedding)

        tt = Lambda(liter,
                    output_shape=liter_output_shape,
                    arguments={'length':
                               self.max_len})(trigger_type)  # (?, 125, 50)
        et = Lambda(liter,
                    output_shape=liter_output_shape,
                    arguments={'length':
                               self.max_len})(entity_type)  # (?, 125, 50)

        # ----------------- argument attention ------------------------
        x2 = Concatenate()([tt, sentence])  # (?, 125, 350)
        # x2 = Dense(300, activation='tanh')(x2)   # (?, 82, 600)
        x2 = Dense(1)(x2)  # (?, 125, 1)
        x2 = Lambda(reduce_dimension,
                    output_shape=reduce_dimension_output_shape,
                    arguments={'length': self.max_len},
                    mask=self.sentence_embedding_layer.get_output_mask_at(0),
                    name='tt_attention')(x2)  # (?, 125)
        x2 = Lambda(attention,
                    output_shape=attention_output_shape,
                    arguments={'dim': 600})([x2, sentence])  # (?, 600)
        # -----------------------------------------------------------

        # ----------------- argument attention ------------------------
        x3 = Concatenate()([et, sentence])  # (?, 125, 350)
        # x3 = Dense(300, activation='tanh')(x3)   # (?, 82, 600)
        x3 = Dense(1)(x3)  # (?, 125, 1)
        x3 = Lambda(reduce_dimension,
                    output_shape=reduce_dimension_output_shape,
                    arguments={'length': self.max_len},
                    mask=self.sentence_embedding_layer.get_output_mask_at(0),
                    name='et_attention')(x3)  # (?, 125)
        x3 = Lambda(attention,
                    output_shape=attention_output_shape,
                    arguments={'dim': 600})([x3, sentence])  # (?, 600)
        # -----------------------------------------------------------

        x = Concatenate()([x1, x2, x3])
        x = Dropout(rate=0.5)(x)
        output = Dense(9, activation='softmax')(x)

        return output
Example #38
0
def reshape_one(c):
    return K.reshape(c, (tf.shape(c)[0] * padsize, char_padsize, CHAR_EMBEDDING_DIM))
Example #39
0
    def call(self, x, mask=None):

        assert (len(x) == 2)

        img = x[0]
        rois = x[1]

        input_shape = K.shape(img)

        outputs = []

        for roi_idx in range(self.num_rois):

            x = rois[0, roi_idx, 0]
            y = rois[0, roi_idx, 1]
            w = rois[0, roi_idx, 2]
            h = rois[0, roi_idx, 3]

            row_length = w / float(self.pool_size)
            col_length = h / float(self.pool_size)

            num_pool_regions = self.pool_size

            #NOTE: the RoiPooling implementation differs between theano and tensorflow due to the lack of a resize op
            # in theano. The theano implementation is much less efficient and leads to long compile times

            if self.dim_ordering == 'channels_first':
                for jy in range(num_pool_regions):
                    for ix in range(num_pool_regions):
                        x1 = x + ix * row_length
                        x2 = x1 + row_length
                        y1 = y + jy * col_length
                        y2 = y1 + col_length

                        x1 = K.cast(x1, 'int32')
                        x2 = K.cast(x2, 'int32')
                        y1 = K.cast(y1, 'int32')
                        y2 = K.cast(y2, 'int32')

                        x2 = x1 + K.maximum(1, x2 - x1)
                        y2 = y1 + K.maximum(1, y2 - y1)

                        new_shape = [
                            input_shape[0], input_shape[1], y2 - y1, x2 - x1
                        ]

                        x_crop = img[:, :, y1:y2, x1:x2]
                        xm = K.reshape(x_crop, new_shape)
                        pooled_val = K.max(xm, axis=(2, 3))
                        outputs.append(pooled_val)

            elif self.dim_ordering == 'channels_last':
                x = K.cast(x, 'int32')
                y = K.cast(y, 'int32')
                w = K.cast(w, 'int32')
                h = K.cast(h, 'int32')

                rs = tf.image.resize_images(img[:, y:y + h, x:x + w, :],
                                            (self.pool_size, self.pool_size))
                outputs.append(rs)

        final_output = K.concatenate(outputs, axis=0)
        final_output = K.reshape(final_output,
                                 (1, self.num_rois, self.pool_size,
                                  self.pool_size, self.nb_channels))

        if self.dim_ordering == 'channels_first':
            final_output = K.permute_dimensions(final_output, (0, 1, 4, 2, 3))
        else:
            final_output = K.permute_dimensions(final_output, (0, 1, 2, 3, 4))

        return final_output
Example #40
0
def merge_heads(x):
    new_x = K.permute_dimensions(x, [0, 2, 1, 3])
    x_shape = shape_list(new_x)
    new_x_shape = x_shape[:-2] + [np.prod(x_shape[-2:])]
    return K.reshape(new_x, new_x_shape)
Example #41
0
def split_heads(x, n: int, k: bool = False):  # B, L, C
    x_shape = shape_list(x)
    m = x_shape[-1]
    new_x_shape = x_shape[:-1] + [n, m // n]
    new_x = K.reshape(x, new_x_shape)
    return K.permute_dimensions(new_x, [0, 2, 3, 1] if k else [0, 2, 1, 3])
Example #42
0
    def call(self, x, mask=None):

        assert (len(x) == 2)

        img = x[0]
        rois = x[1]

        input_shape = K.shape(img)

        outputs = []

        for roi_idx in range(self.num_rois):

            x = rois[0, roi_idx, 0]
            y = rois[0, roi_idx, 1]
            w = rois[0, roi_idx, 2]
            h = rois[0, roi_idx, 3]

            row_length = [w / i for i in self.pool_list]
            col_length = [h / i for i in self.pool_list]

            if self.dim_ordering == 'th':
                for pool_num, num_pool_regions in enumerate(self.pool_list):
                    for ix in range(num_pool_regions):
                        for jy in range(num_pool_regions):
                            x1 = x + ix * col_length[pool_num]
                            x2 = x1 + col_length[pool_num]
                            y1 = y + jy * row_length[pool_num]
                            y2 = y1 + row_length[pool_num]

                            x1 = K.cast(K.round(x1), 'int32')
                            x2 = K.cast(K.round(x2), 'int32')
                            y1 = K.cast(K.round(y1), 'int32')
                            y2 = K.cast(K.round(y2), 'int32')

                            new_shape = [
                                input_shape[0], input_shape[1], y2 - y1,
                                x2 - x1
                            ]
                            x_crop = img[:, :, y1:y2, x1:x2]
                            xm = K.reshape(x_crop, new_shape)
                            pooled_val = K.max(xm, axis=(2, 3))
                            outputs.append(pooled_val)

            elif self.dim_ordering == 'tf':
                for pool_num, num_pool_regions in enumerate(self.pool_list):
                    for ix in range(num_pool_regions):
                        for jy in range(num_pool_regions):
                            x1 = x + ix * col_length[pool_num]
                            x2 = x1 + col_length[pool_num]
                            y1 = y + jy * row_length[pool_num]
                            y2 = y1 + row_length[pool_num]

                            x1 = K.cast(K.round(x1), 'int32')
                            x2 = K.cast(K.round(x2), 'int32')
                            y1 = K.cast(K.round(y1), 'int32')
                            y2 = K.cast(K.round(y2), 'int32')

                            new_shape = [
                                input_shape[0], y2 - y1, x2 - x1,
                                input_shape[3]
                            ]
                            x_crop = img[:, y1:y2, x1:x2, :]
                            xm = K.reshape(x_crop, new_shape)
                            pooled_val = K.max(xm, axis=(1, 2))
                            outputs.append(pooled_val)

        final_output = K.concatenate(outputs, axis=0)
        final_output = K.reshape(final_output,
                                 (1, self.num_rois, self.nb_channels *
                                  self.num_outputs_per_channel))

        return final_output
def dense2conv(args):
    x_hat = args
    return K.reshape(x_hat, (-1, hidden_h, hidden_w, hidden_c))
 def tensor_product(self, x):
     a = x[0]
     b = x[1]
     b = K.reshape(b, (-1, self.experts, self.target))
     y = K.batch_dot(b, a, axes=1)
     return y
Example #45
0
 def call(self, x, mask=None):
     response = K.reshape(x[:, self.axis], (-1, 1))
     return K.concatenate([1 - response, response], axis=self.axis)
Example #46
0
def reshape_two(c):
    return K.reshape(c, (tf.shape(c)[0] / padsize, padsize, CHAR_EMBEDDING_DIM))
def attention(x, dim):
    res = K.batch_dot(x[0], x[1], axes=[1, 1])
    return K.reshape(res, [-1, dim])
def build_model(char_size=27, dim=64, iterations=4, training=True, pca=False):
    """Build the model."""
    # Inputs
    # Context: (rules, preds, chars,)
    context = L.Input(shape=(
        None,
        None,
        None,
    ),
                      name='context',
                      dtype='int32')
    query = L.Input(shape=(None, ), name='query', dtype='int32')

    # Flatten preds to embed entire rules
    var_flat = L.Lambda(lambda x: K.reshape(
        x, K.stack([K.shape(x)[0], -1,
                    K.prod(K.shape(x)[2:])])),
                        name='var_flat')
    flat_ctx = var_flat(context)  # (?, rules, preds*chars)

    print('Found %s texts.' % len(CONTEXT_TEXTS))
    word_index = WORD_INDEX
    print('Found %s unique tokens.' % len(word_index))

    embeddings_index = {}
    GLOVE_DIR = os.path.abspath('.') + "/data/glove"
    f = open(os.path.join(GLOVE_DIR, 'glove.6B.100d.txt'),
             'r',
             encoding='utf-8')
    for line in f:
        values = line.split()
        word = values[0]
        coefs = np.asarray(values[1:], dtype='float32')
        embeddings_index[word] = coefs
    f.close()

    print('Found %s word vectors.' % len(embeddings_index))

    EMBEDDING_DIM = 100

    embedding_matrix = np.zeros((len(word_index) + 1, EMBEDDING_DIM))
    for word, i in word_index.items():
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            # words not found in embedding index will be all-zeros.
            embedding_matrix[i] = embedding_vector

    # Onehot embedding
    # Contextual embeddeding of symbols
    # onehot_weights = np.eye(char_size)
    # onehot_weights[0, 0] = 0 # Clear zero index
    # onehot = L.Embedding(char_size, char_size,
    #                      trainable=False,
    #                      weights=[onehot_weights],
    #                      name='onehot')
    embedding_layer = L.Embedding(len(word_index) + 1,
                                  EMBEDDING_DIM,
                                  weights=[embedding_matrix],
                                  trainable=False)
    embedded_ctx = embedding_layer(
        flat_ctx)  # (?, rules, preds*chars*char_size)
    embedded_q = embedding_layer(query)  # (?, chars, char_size)

    embed_pred = ZeroGRU(dim, go_backwards=True, name='embed_pred')
    embedded_predq = embed_pred(embedded_q)  # (?, dim)
    # Embed every rule
    embedded_rules = NestedTimeDist(embed_pred,
                                    name='rule_embed')(embedded_ctx)
    # (?, rules, dim)

    # Reused layers over iterations
    repeat_toctx = L.RepeatVector(K.shape(embedded_ctx)[1],
                                  name='repeat_to_ctx')
    diff_sq = L.Lambda(lambda xy: K.square(xy[0] - xy[1]),
                       output_shape=(None, dim),
                       name='diff_sq')
    concat = L.Lambda(lambda xs: K.concatenate(xs, axis=2),
                      output_shape=(None, dim * 5),
                      name='concat')
    att_dense1 = L.TimeDistributed(L.Dense(dim,
                                           activation='tanh',
                                           name='att_dense1'),
                                   name='d_att_dense1')
    att_dense2 = L.TimeDistributed(L.Dense(1,
                                           activation='sigmoid',
                                           name='att_dense2'),
                                   name='d_att_dense2')
    squeeze2 = L.Lambda(lambda x: K.squeeze(x, 2), name='sequeeze2')
    # expand = L.Lambda(lambda x: K.expand_dims(x, axis=2), name='expand')
    rule_mask = L.Lambda(lambda x: K.cast(
        K.any(K.not_equal(x, 0), axis=-1, keepdims=True), 'float32'),
                         name='rule_mask')(embedded_rules)
    episodic_mem = EpisodicMemory(dim, name='episodic_mem')

    # Reasoning iterations
    state = embedded_predq
    repeated_q = repeat_toctx(embedded_predq)
    outs = list()
    for _ in range(iterations):
        # Compute attention between rule and query state
        ctx_state = repeat_toctx(state)  # (?, rules, dim)
        s_s_c = diff_sq([ctx_state, embedded_rules])
        s_m_c = L.multiply([embedded_rules, state])  # (?, rules, dim)
        sim_vec = concat([s_s_c, s_m_c, ctx_state, embedded_rules, repeated_q])
        sim_vec = att_dense1(sim_vec)  # (?, rules, dim)
        sim_vec = att_dense2(sim_vec)  # (?, rules, 1)
        # sim_vec = squeeze2(sim_vec) # (?, rules)
        # sim_vec = L.Softmax(axis=1)(sim_vec)
        # sim_vec = expand(sim_vec) # (?, rules, 1)
        sim_vec = L.multiply([sim_vec, rule_mask])

        state = episodic_mem([state, sim_vec, embedded_rules])
        sim_vec = squeeze2(sim_vec)  # (?, rules)
        outs.append(sim_vec)

    # Predication
    out = L.Dense(1, activation='sigmoid', name='out')(state)
    if pca:
        model = Model([context, query], [embedded_rules])
    elif training:
        model = Model([context, query], [out])
        model.compile(loss='binary_crossentropy',
                      optimizer='adam',
                      metrics=['acc'])
    else:
        model = Model([context, query], outs + [out])
    return model
Example #49
0
def getModel(
    srcVocabTransformer,
    refVocabTransformer,
    embedding_size,
    gru_size,
    src_fastText,
    ref_fastText,
    train_embeddings,
    attention,
    summary_attention,
    use_estimator,
    model_inputs=None,
    verbose=False,
):
    src_vocab_size = srcVocabTransformer.vocab_size()
    ref_vocab_size = refVocabTransformer.vocab_size()

    src_embedding_kwargs = {}
    ref_embedding_kwargs = {}

    if src_fastText:
        logger.info("Loading fastText embeddings for source language")
        src_embedding_kwargs['weights'] = [
            get_fastText_embeddings(src_fastText, srcVocabTransformer,
                                    embedding_size)
        ]

    if ref_fastText:
        logger.info("Loading fastText embeddings for target language")
        ref_embedding_kwargs['weights'] = [
            get_fastText_embeddings(ref_fastText, refVocabTransformer,
                                    embedding_size)
        ]

    if verbose:
        logger.info("Creating model")

    if model_inputs:
        src_input, ref_input = model_inputs
    else:
        src_input = Input(shape=(None, ))
        ref_input = Input(shape=(None, ))

    src_embedding = Embedding(output_dim=embedding_size,
                              input_dim=src_vocab_size,
                              mask_zero=True,
                              name="src_embedding",
                              trainable=train_embeddings,
                              **src_embedding_kwargs)(src_input)

    ref_embedding = Embedding(output_dim=embedding_size,
                              input_dim=ref_vocab_size,
                              mask_zero=True,
                              name="ref_embedding",
                              trainable=train_embeddings,
                              **ref_embedding_kwargs)(ref_input)

    encoder = Bidirectional(GRU(gru_size,
                                return_sequences=True,
                                return_state=True),
                            name="encoder")(src_embedding)

    return_sequence = (use_estimator or summary_attention)
    if attention:
        attention_states = TimeDistributedSequential(
            [Dense(gru_size, name="attention_state")], encoder[0])

        with CustomObjectScope({'AttentionGRUCell': AttentionGRUCell}):
            decoder = Bidirectional(RNN(AttentionGRUCell(gru_size),
                                        return_sequences=return_sequence,
                                        return_state=return_sequence),
                                    name="decoder")(ref_embedding,
                                                    constants=attention_states,
                                                    initial_state=encoder[1:])
    else:
        decoder = Bidirectional(GRU(gru_size,
                                    return_sequences=return_sequence,
                                    return_state=return_sequence),
                                name="decoder")(ref_embedding,
                                                initial_state=encoder[1:])

    if use_estimator:
        decoder = Bidirectional(GRU(gru_size,
                                    return_sequences=summary_attention,
                                    return_state=summary_attention),
                                name="estimator")(decoder[0])

    if summary_attention:
        attention_weights = TimeDistributedSequential([
            Dense(gru_size, activation="tanh"),
            Dense(1, name="attention_weights"),
        ], decoder[0])

        # attention_weights = Reshape((-1,))(attention_weights)
        attention_weights = Lambda(
            lambda x: K.reshape(x, (
                x.shape[0],
                -1,
            )),
            output_shape=lambda input_shape: input_shape[:-1],
            mask=lambda inputs, mask: mask,
            name="reshape")(attention_weights)

        attention_weights = Activation(
            "softmax", name="attention_softmax")(attention_weights)

        quality_summary = dot([attention_weights, decoder[0]],
                              axes=(1, 1),
                              name="summary")
    else:
        quality_summary = decoder

    quality = Dense(1, name="quality")(quality_summary)

    model = Model(inputs=[src_input, ref_input], outputs=[quality])

    if verbose:
        _printModelSummary(logger, model, "model")

    return model
Example #50
0
def yolo_loss(args,
              anchors,
              num_anchors_per_layer,
              num_classes,
              ignore_thresh=.5,
              print_loss=True):
    """
    Return yolo_loss tensor

    Args:
        args (list): args[:num_output_layers] the output of yolo_body or tiny_yolo_body
            args[num_output_layers:] raw_y_true
        anchors (np.array): shape=(N, 2), wh
        num_anchors_per_layer (int):
        num_classes (int):
        ignore_thresh (float): the iou threshold whether to ignore object confidence loss
        print_loss:

    Returns:
        loss: tensor, shape=(1,)

    """
    num_output_layers = len(anchors) // num_anchors_per_layer
    yolo_outputs = args[:num_output_layers]
    raw_y_trues = args[num_output_layers:]
    anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
    input_shape = K.cast(
        K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(raw_y_trues[0]))
    grid_shapes = [
        K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(raw_y_trues[0]))
        for l in range(num_output_layers)
    ]
    loss = 0
    batch_size = K.shape(yolo_outputs[0])[0]
    batch_size_f = K.cast(batch_size, K.dtype(yolo_outputs[0]))

    for l in range(num_output_layers):
        grid_shape = grid_shapes[l]
        yolo_output = yolo_outputs[l]
        raw_y_pred = K.reshape(yolo_output, [
            -1, grid_shape[0], grid_shape[1], num_anchors_per_layer,
            num_classes + 9
        ])
        raw_y_true = raw_y_trues[l]
        anchor_mask = anchor_masks[l]
        # (batch_size, grid_height, grid_width, num_anchors_this_layer, 1)
        object_mask = raw_y_true[..., 4:5]
        # (batch_size, grid_height, grid_width, num_anchors_this_layer, num_classes)
        y_true_class_probs = raw_y_true[..., 5:]
        grid, y_pred_box, y_pred_delta_xy, y_pred_log_wh, y_pred_sigma, y_pred_confidence, y_pred_class_probs = \
            y_pred_graph(raw_y_pred, anchors[anchor_mask], input_shape)
        y_true_delta_xy = raw_y_true[..., :2] * grid_shapes[l][::-1] - grid
        y_true_log_wh = K.log(raw_y_true[..., 2:4] * input_shape[::-1] /
                              anchors[anchor_mask])
        y_true_log_wh = K.switch(object_mask, y_true_log_wh,
                                 K.zeros_like(y_true_log_wh))
        box_loss_scale = 2 - raw_y_true[..., 2:3] * raw_y_true[..., 3:4]
        ignore_mask = tf.TensorArray(K.dtype(raw_y_trues[0]),
                                     size=1,
                                     dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')

        def loop_body(b, ignore_mask_):
            # (num_gt_boxes, 4)
            gt_box = tf.boolean_mask(raw_y_true[b, ..., 0:4],
                                     object_mask_bool[b, ..., 0])
            # (grid_height, grid_width, num_anchors_this_layer, num_gt_boxes)
            iou = box_iou_graph(y_pred_box[b], gt_box)
            # (grid_height, grid_width, num_anchors_this_layer)
            best_iou = K.max(iou, axis=-1)
            ignore_mask_ = ignore_mask_.write(
                b, K.cast(best_iou < ignore_thresh, K.dtype(gt_box)))
            return b + 1, ignore_mask_

        _, ignore_mask = tf.while_loop(lambda b, *largs: b < batch_size,
                                       loop_body, [0, ignore_mask])
        # (batch_size, grid_height, grid_width, num_anchors_this_layer)
        ignore_mask = ignore_mask.stack()
        # (batch_size, grid_height, grid_width, num_anchors_this_layer, 1)
        ignore_mask = K.expand_dims(ignore_mask, -1)

        y_true = tf.concat([y_true_delta_xy, y_true_log_wh], axis=-1)
        y_pred_mu = tf.concat([y_pred_delta_xy, y_pred_log_wh], axis=-1)
        x_loss = nll_loss(y_true[..., 0:1], y_pred_mu[..., 0:1],
                          y_pred_sigma[..., 0:1])
        x_loss = object_mask * box_loss_scale * x_loss
        y_loss = nll_loss(y_true[..., 1:2], y_pred_mu[..., 1:2],
                          y_pred_sigma[..., 1:2])
        y_loss = object_mask * box_loss_scale * y_loss
        w_loss = nll_loss(y_true[..., 2:3], y_pred_mu[..., 2:3],
                          y_pred_sigma[..., 2:3])
        w_loss = object_mask * box_loss_scale * w_loss
        h_loss = nll_loss(y_true[..., 3:4], y_pred_mu[..., 3:4],
                          y_pred_sigma[..., 3:4])
        h_loss = object_mask * box_loss_scale * h_loss
        confidence_loss = object_mask * K.binary_crossentropy(object_mask, y_pred_confidence) + \
                          (1 - object_mask) * K.binary_crossentropy(object_mask, y_pred_confidence) * ignore_mask
        class_loss = object_mask * K.binary_crossentropy(
            y_true_class_probs, y_pred_class_probs)
        x_loss = K.sum(x_loss) / batch_size_f
        y_loss = K.sum(y_loss) / batch_size_f
        w_loss = K.sum(w_loss) / batch_size_f
        h_loss = K.sum(h_loss) / batch_size_f
        confidence_loss = K.sum(confidence_loss) / batch_size_f
        class_loss = K.sum(class_loss) / batch_size_f
        loss += x_loss + y_loss + w_loss + h_loss + confidence_loss + class_loss
        if print_loss:
            loss = tf.Print(loss, [
                loss, x_loss, y_loss, w_loss, h_loss, confidence_loss,
                class_loss,
                K.sum(ignore_mask)
            ],
                            message='\nloss: ')
    return loss
Example #51
0
 def call(self, x):
     return x + K.reshape(self.threshold, (1, 1, 1, self.filters))
Example #52
0
    def call(self, x, mask=None):
        # 之前我们通过各种计算,从model_rpn的输出中得到了比较靠谱的rois和对应的bbox了。
        # 通过设置num_rois,从这些rois中,提取num_rois数量的样本用于模型训练,把这些rois输入roipooling层,进行训练。
        # ROI pooling层接收的是由2个张量组成的list,而输出是个5D张量,所有需要配置compute_output_shape(self, input_shape) ,
        # 其中input_shape为2维张量。 
        # 简单来说,roipooling接受了一个[图像特征图信息,RPN中选取的框(1:1的正负样本)]的list,
        # 输出了num_rois个7×7×channel的特征层

        assert(len(x) == 2)

        img = x[0]
        rois = x[1]

        input_shape = K.shape(img)

        outputs = []

        for roi_idx in range(self.num_rois):

            x = rois[0, roi_idx, 0]
            y = rois[0, roi_idx, 1]
            w = rois[0, roi_idx, 2]
            h = rois[0, roi_idx, 3]
            
            row_length = w / float(self.pool_size)
            col_length = h / float(self.pool_size)

            num_pool_regions = self.pool_size

            #NOTE: the RoiPooling implementation differs between theano and tensorflow due to the lack of a resize op
            # in theano. The theano implementation is much less efficient and leads to long compile times

            if self.dim_ordering == 'th':
                for jy in range(num_pool_regions):
                    for ix in range(num_pool_regions):
                        x1 = x + ix * row_length
                        x2 = x1 + row_length
                        y1 = y + jy * col_length
                        y2 = y1 + col_length

                        x1 = K.cast(x1, 'int32')
                        x2 = K.cast(x2, 'int32')
                        y1 = K.cast(y1, 'int32')
                        y2 = K.cast(y2, 'int32')

                        x2 = x1 + K.maximum(1,x2-x1)
                        y2 = y1 + K.maximum(1,y2-y1)
                        
                        new_shape = [input_shape[0], input_shape[1],
                                     y2 - y1, x2 - x1]

                        x_crop = img[:, :, y1:y2, x1:x2]
                        xm = K.reshape(x_crop, new_shape)
                        pooled_val = K.max(xm, axis=(2, 3))
                        outputs.append(pooled_val)

            elif self.dim_ordering == 'tf':
                x = K.cast(x, 'int32')
                y = K.cast(y, 'int32')
                w = K.cast(w, 'int32')
                h = K.cast(h, 'int32')

                rs = tf.image.resize_images(img[:, y:y+h, x:x+w, :], (self.pool_size, self.pool_size))
                outputs.append(rs)

        final_output = K.concatenate(outputs, axis=0)
        final_output = K.reshape(final_output, (1, self.num_rois, self.pool_size, self.pool_size, self.nb_channels))

        if self.dim_ordering == 'th':
            final_output = K.permute_dimensions(final_output, (0, 1, 4, 2, 3))
        else:
            final_output = K.permute_dimensions(final_output, (0, 1, 2, 3, 4))

        return final_output
Example #53
0
    def call(self,
             inputs,
             initial_state=None,
             initial_readout=None,
             ground_truth=None,
             mask=None,
             training=None):
        # input shape: `(samples, time (padded with zeros), input_dim)`
        # note that the .build() method of subclasses MUST define
        # self.input_spec and self.state_spec with complete input shapes.
        if type(mask) is list:
            mask = mask[0]
        if self.model is None:
            raise Exception('Empty RecurrentModel.')
        num_req_states = self.num_states
        if self.readout:
            num_actual_states = num_req_states - 1
        else:
            num_actual_states = num_req_states
        if type(inputs) is list:
            inputs_list = inputs[:]
            inputs = inputs_list.pop(0)
            initial_states = inputs_list[:num_actual_states]
            if len(initial_states) > 0:
                if self._is_optional_input_placeholder(initial_states[0]):
                    initial_states = self.get_initial_state(inputs)
            inputs_list = inputs_list[num_actual_states:]
            if self.readout:
                initial_readout = inputs_list.pop(0)
                if self.teacher_force:
                    ground_truth = inputs_list.pop()
        else:
            if initial_state is not None:
                if not isinstance(initial_state, (list, tuple)):
                    initial_states = [initial_state]
                else:
                    initial_states = list(initial_state)
                if self._is_optional_input_placeholder(initial_states[0]):
                    initial_states = self.get_initial_state(inputs)

            elif self.stateful:
                initial_states = self.states
            else:
                initial_states = self.get_initial_state(inputs)
        if self.readout:
            if initial_readout is None or self._is_optional_input_placeholder(
                    initial_readout):
                output_shape = K.int_shape(_to_list((self.model.output))[0])
                output_ndim = len(output_shape)
                input_ndim = K.ndim(inputs)
                initial_readout = K.zeros_like(inputs)
                slices = [slice(None)] + [0] * (input_ndim - 1)
                initial_readout = initial_readout[slices]  # (batch_size,)
                initial_readout = K.reshape(initial_readout,
                                            (-1, ) + (1, ) * (output_ndim - 1))
                initial_readout = K.tile(initial_readout,
                                         (1, ) + tuple(output_shape[1:]))
            initial_states.append(initial_readout)
            if self.teacher_force:
                if ground_truth is None or self._is_optional_input_placeholder(
                        ground_truth):
                    raise Exception(
                        'ground_truth must be provided for RecurrentModel with teacher_force=True.'
                    )
                # counter = K.zeros((1,), dtype='int32')
                counter = K.zeros((1, ))
                counter = K.cast(counter, 'int32')
                initial_states.insert(-1, counter)
                initial_states[-2]
                initial_states.insert(-1, ground_truth)
                num_req_states += 2
        if len(initial_states) != num_req_states:
            raise ValueError('Layer requires ' + str(num_req_states) +
                             ' states but was passed ' +
                             str(len(initial_states)) + ' initial states.')
        input_shape = K.int_shape(inputs)
        if self.unroll and input_shape[1] is None:
            raise ValueError('Cannot unroll a RNN if the '
                             'time dimension is undefined. \n'
                             '- If using a Sequential model, '
                             'specify the time dimension by passing '
                             'an `input_shape` or `batch_input_shape` '
                             'argument to your first layer. If your '
                             'first layer is an Embedding, you can '
                             'also use the `input_length` argument.\n'
                             '- If using the functional API, specify '
                             'the time dimension by passing a `shape` '
                             'or `batch_shape` argument to your Input layer.')
        preprocessed_input = self.preprocess_input(inputs, training=None)
        constants = self.get_constants(inputs, training=None)
        if self.decode:
            initial_states.insert(0, inputs)
            preprocessed_input = K.zeros((1, self.output_length, 1))
            input_length = self.output_length
        else:
            input_length = input_shape[1]
        if self.uses_learning_phase:
            with learning_phase_scope(0):
                last_output_test, outputs_test, states_test, updates = rnn(
                    self.step,
                    preprocessed_input,
                    initial_states,
                    go_backwards=self.go_backwards,
                    mask=mask,
                    constants=constants,
                    unroll=self.unroll,
                    input_length=input_length)
            with learning_phase_scope(1):
                last_output_train, outputs_train, states_train, updates = rnn(
                    self.step,
                    preprocessed_input,
                    initial_states,
                    go_backwards=self.go_backwards,
                    mask=mask,
                    constants=constants,
                    unroll=self.unroll,
                    input_length=input_length)

            last_output = K.in_train_phase(last_output_train,
                                           last_output_test,
                                           training=training)
            outputs = K.in_train_phase(outputs_train,
                                       outputs_test,
                                       training=training)
            states = []
            for state_train, state_test in zip(states_train, states_test):
                states.append(
                    K.in_train_phase(state_train,
                                     state_test,
                                     training=training))

        else:
            last_output, outputs, states, updates = rnn(
                self.step,
                preprocessed_input,
                initial_states,
                go_backwards=self.go_backwards,
                mask=mask,
                constants=constants,
                unroll=self.unroll,
                input_length=input_length)
        states = list(states)
        if self.decode:
            states.pop(0)
        if self.readout:
            states.pop()
            if self.teacher_force:
                states.pop()
                states.pop()
        if len(updates) > 0:
            self.add_update(updates)
        if self.stateful:
            updates = []
            for i in range(len(states)):
                updates.append((self.states[i], states[i]))
            self.add_update(updates, inputs)

        # Properly set learning phase
        if 0 < self.dropout + self.recurrent_dropout:
            last_output._uses_learning_phase = True
            outputs._uses_learning_phase = True

        if self.return_sequences:
            y = outputs
        else:
            y = last_output
        if self.return_states:
            return [y] + states
        else:
            return y
    def call(self, x, mask=None):
#        print("call is called")
        input_shape = K.shape(x)
#        print("Input Shape",input_shape.shape)
        if self.dim_ordering == 'th':
            num_rows = input_shape[2]
            num_cols = input_shape[3]
        elif self.dim_ordering == 'tf':
            num_rows = input_shape[1]
            num_cols = input_shape[2]
            
        print(num_rows, num_cols)
        
        row_length = [K.cast(num_rows, 'float32') / i for i in self.pool_list]
        col_length = [K.cast(num_cols, 'float32') / i for i in self.pool_list]
        
#        print("row_length", row_length)
#        print("col_length", col_length)
        
        outputs = []

        if self.dim_ordering == 'th':
            for pool_num, num_pool_regions in enumerate(self.pool_list):
#                print("num_pool_regions:",num_pool_regions)
#                print("pool_num: ",pool_num)
                for jy in range(num_pool_regions):
                    print("jy: ",jy)
                    for ix in range(num_pool_regions):
                        print("ix: ",ix)
                        x1 = ix * col_length[pool_num]
                        x2 = ix * col_length[pool_num] + col_length[pool_num]
                        y1 = jy * row_length[pool_num]
                        y2 = jy * row_length[pool_num] + row_length[pool_num]

                        x1 = K.cast(K.round(x1), 'int32')
                        x2 = K.cast(K.round(x2), 'int32')
                        y1 = K.cast(K.round(y1), 'int32')
                        y2 = K.cast(K.round(y2), 'int32')
                        new_shape = [input_shape[0], input_shape[1],
                                     y2 - y1, x2 - x1]
                        x_crop = x[:, :, y1:y2, x1:x2]
                        xm = K.reshape(x_crop, new_shape)
                        pooled_val = K.max(xm, axis=(2, 3))
                        outputs.append(pooled_val)

        elif self.dim_ordering == 'tf':
            for pool_num, num_pool_regions in enumerate(self.pool_list):
#                print("pool_num", pool_num)
#                print("num_pool_regions", num_pool_regions)
                for jy in range(num_pool_regions):
                    for ix in range(num_pool_regions):
                        
                        
                        x1 = ix * col_length[pool_num]
                        x2 = ix * col_length[pool_num] + col_length[pool_num]
                        y1 = jy * row_length[pool_num]
                        y2 = jy * row_length[pool_num] + row_length[pool_num]

                        x1 = K.cast(K.round(x1), 'int32')
                        x2 = K.cast(K.round(x2), 'int32')
                        y1 = K.cast(K.round(y1), 'int32')
                        y2 = K.cast(K.round(y2), 'int32')

                        new_shape = [input_shape[0], y2 - y1,
                                     x2 - x1, input_shape[3]]

                        x_crop = x[:, y1:y2, x1:x2, :]
                        xm = K.reshape(x_crop, new_shape)
#                        print("xm.shape",xm.shape)
                        pooled_val = K.max(xm, axis=(1, 2))
#                        print("pooled_val",pooled_val)
                        outputs.append(pooled_val)

        if self.dim_ordering == 'th':
            outputs = K.concatenate(outputs)
        elif self.dim_ordering == 'tf':
            #outputs = K.concatenate(outputs,axis = 1)
            outputs = K.concatenate(outputs)
            #outputs = K.reshape(outputs,(len(self.pool_list),self.num_outputs_per_channel,input_shape[0],input_shape[1]))
            #outputs = K.permute_dimensions(outputs,(3,1,0,2))
            #outputs = K.reshape(outputs,(input_shape[0], self.num_outputs_per_channel * self.nb_channels))
#        print("outputs.shape",outputs.shape)
        return outputs
def build_heatmap(in_tensor, config, names = None):
 
    num_detections  = config.DETECTION_MAX_INSTANCES
    img_h, img_w    = config.IMAGE_SHAPE[:2]
    batch_size      = config.BATCH_SIZE
    num_classes     = config.NUM_CLASSES  
    print('\n ')
    print('  > NEW build_heatmap() for ', names )
    print('    orignal in_tensor shape : ', in_tensor.shape)       
    # rois per image is determined by size of input tensor 
    #   detection mode:   config.TRAIN_ROIS_PER_IMAGE 
    #   ground_truth  :   config.DETECTION_MAX_INSTANCES
    rois_per_image  = (in_tensor.shape)[2] 
    # strt_cls        = 0 if rois_per_image == 32 else 1
    print('    num of bboxes per class is : ', rois_per_image )

    #-----------------------------------------------------------------------------    
    ## Stack non_zero bboxes from in_tensor into pt2_dense 
    #-----------------------------------------------------------------------------
    # pt2_ind shape is [?, 3]. 
    #   pt2_ind[0] corresponds to image_index 
    #   pt2_ind[1] corresponds to class_index 
    #   pt2_ind[2] corresponds to roi row_index 
    # pt2_dense shape is [?, 6]
    #    pt2_dense[0] is image index
    #    pt2_dense[1:4]  roi cooridnaytes 
    #    pt2_dense[5]    is class id 
    #-----------------------------------------------------------------------------
    pt2_sum = tf.reduce_sum(tf.abs(in_tensor[:,:,:,:-2]), axis=-1)
    print('    pt2_sum shape ',pt2_sum.shape)
    # print(pt2_sum[0].eval())
    pt2_ind = tf.where(pt2_sum > 0)

    ## replaced the two operations below with the one above - 15-05-2018
    # pt2_mask = tf.greater(pt2_sum , 0)
    # pt2_ind  = tf.where(pt2_mask)
    # print(' pt2_mask shape ', pt2_mask.get_shape())
    # print(pt2_mask.eval())
    # print('    pt2_ind shape ', pt2_ind.get_shape())
    # print(pt2_ind.eval())

    pt2_dense = tf.gather_nd( in_tensor, pt2_ind)
    print('    dense shape ',pt2_dense.get_shape())

    #-----------------------------------------------------------------------------
    ## Build mesh-grid to hold pixel coordinates  
    #-----------------------------------------------------------------------------
    X = tf.range(img_w, dtype=tf.int32)
    Y = tf.range(img_h, dtype=tf.int32)
    X, Y = tf.meshgrid(X, Y)

    # duplicate (repeat) X and Y into a  batch_size x rois_per_image tensor
    print('    X/Y shapes :',  X.get_shape(), Y.get_shape())
    ones = tf.ones([tf.shape(pt2_dense)[0] , 1, 1], dtype = tf.int32)
    rep_X = ones * X
    rep_Y = ones * Y 
    print('    Ones:    ', ones.shape)                
    print('    ones_exp * X', ones.shape, '*', X.shape, '= ',rep_X.shape)
    print('    ones_exp * Y', ones.shape, '*', Y.shape, '= ',rep_Y.shape)

    # # stack the X and Y grids 
    bef_pos = tf.to_float(tf.stack([rep_X,rep_Y], axis = -1))
    print('    before transpse ', bef_pos.get_shape())
    pos_grid = tf.transpose(bef_pos,[1,2,0,3])
    print('    after transpose ', pos_grid.get_shape())    

    #-----------------------------------------------------------------------------
    ##  Build mean and convariance tensors for Multivariate Normal Distribution 
    #-----------------------------------------------------------------------------
    width  = pt2_dense[:,3] - pt2_dense[:,1]      # x2 - x1
    height = pt2_dense[:,2] - pt2_dense[:,0]
    cx     = pt2_dense[:,1] + ( width  / 2.0)
    cy     = pt2_dense[:,0] + ( height / 2.0)
    means  = tf.stack((cx,cy),axis = -1)
    covar  = tf.stack((width * 0.5 , height * 0.5), axis = -1)
    covar  = tf.sqrt(covar)

    tfd = tf.contrib.distributions
    mvn = tfd.MultivariateNormalDiag( loc  = means,  scale_diag = covar)
    prob_grid = mvn.prob(pos_grid)
    print('     Prob_grid shape before tanspose: ',prob_grid.get_shape())
    prob_grid = tf.transpose(prob_grid,[2,0,1])
    print('     Prob_grid shape after tanspose: ',prob_grid.get_shape())    
    print('    >> input to MVN.PROB: pos_grid (meshgrid) shape: ', pos_grid.get_shape())
    print('    << output probabilities shape:' , prob_grid.get_shape())

    #--------------------------------------------------------------------------------
    ## IMPORTANT: kill distributions of NaN boxes (resulting from bboxes with height/width of zero
    ## which cause singular sigma cov matrices
    #--------------------------------------------------------------------------------
    prob_grid = tf.where(tf.is_nan(prob_grid),  tf.zeros_like(prob_grid), prob_grid)


    # scatter out the probability distributions based on class --------------------------
    print('\n    Scatter out the probability distributions based on class --------------') 
    gauss_scatt   = tf.scatter_nd(pt2_ind, prob_grid, [batch_size, num_classes, rois_per_image, img_w, img_h])
    print('    pt2_ind shape   : ', pt2_ind.shape)  
    print('    prob_grid shape : ', prob_grid.shape)  
    print('    gauss_scatt     : ', gauss_scatt.shape)   # batch_sz , num_classes, num_rois, image_h, image_w
    
    # heatmap: sum gauss_scattered based on class ---------------------------------------
    print('\n    Reduce sum based on class ---------------------------------------------')         
    gauss_sum = tf.reduce_sum(gauss_scatt, axis=2, name='pred_heatmap2')
    gauss_sum = tf.where(gauss_sum > 1e-12, gauss_sum, tf.zeros_like(gauss_sum))
    
    print('    gaussian_sum shape     : ', gauss_sum.get_shape(), 'Keras tensor ', KB.is_keras_tensor(gauss_sum) )      
    
    ##---------------------------------------------------------------------------------------------
    ## heatmap L2 normalization
    ## Normalization using the  `gauss_sum` (batchsize , num_classes, height, width) 
    ## 17-05-2018 (New method, replace dthe previous method that usedthe transposed gauss sum
    ## 17-05-2018 Replaced with normalization across the CLASS axis 
    ##---------------------------------------------------------------------------------------------

    # print('\n    L2 normalization ------------------------------------------------------')   
    gauss_L2norm   = KB.l2_normalize(gauss_sum, axis = +1)   # normalize along the CLASS axis 
    print('    gauss L2 norm   : ', gauss_L2norm.shape   ,' Keras tensor ', KB.is_keras_tensor(gauss_L2norm) )

    print('\n    normalization ------------------------------------------------------')   
    gauss_norm    = gauss_sum / tf.reduce_max(gauss_sum, axis=[-2,-1], keepdims = True)
    gauss_norm    = tf.where(tf.is_nan(gauss_norm),  tf.zeros_like(gauss_norm), gauss_norm)
    print('    gauss norm   : ', gauss_norm.shape   ,' Keras tensor ', KB.is_keras_tensor(gauss_norm) )
    
    ##--------------------------------------------------------------------------------------------
    ## generate score based on gaussian using bounding box masks 
    ## NOTE: Score is generated on NORMALIZED gaussian distributions (GAUSS_NORM)
    ##       If want to do this on NON-NORMALIZED, we need to apply it on GAUSS_SUM
    ##--------------------------------------------------------------------------------------------
    # flatten guassian scattered and input_tensor, and pass on to build_bbox_score routine 
    in_shape = tf.shape(in_tensor)
    in_tensor_flattened  = tf.reshape(in_tensor, [-1, in_shape[-1]])
    bboxes = tf.to_int32(tf.round(in_tensor_flattened[...,0:4]))
    print('    in_tensor               ', in_tensor.shape)
    print('    in_tensorr_flattened is ', in_tensor_flattened.shape)
    print('    boxes shape             ', bboxes.shape)
    print('    Rois per image        : ', rois_per_image)


    #--------------------------------------------------------------------------------------------------------------------------
    # duplicate GAUSS_NORM <num_roi> times to pass along with bboxes to map_fn function
    #   Here we have a choice to calculate scores using the GAUSS_SUM (unnormalized) or GAUSS_NORM (normalized)
    #   after looking at the scores and ratios for each option, I decided to go with the normalized 
    #   as the numbers are larger
    #
    # Examples>
    #   Using GAUSS_SUM
    # [   3.660313    3.513489   54.475536   52.747402    1.          0.999997    4.998889 2450.          0.00204     0.444867]
    # [   7.135149    1.310972   50.020126   44.779854    1.          0.999991    4.981591 1892.          0.002633    0.574077]
    # [  13.401865    0.         62.258957   46.636948    1.          0.999971    4.957398 2303.          0.002153    0.469335]
    # [   0.          0.         66.42349    56.123024    1.          0.999908    4.999996 3696.          0.001353    0.294958]
    # [   0.          0.         40.78952    60.404335    1.          0.999833    4.586552 2460.          0.001864    0.406513]    
    #
    #   Using GAUSS_NORM:
    # [   3.660313    3.513489   54.475536   52.747402    1.          0.999997 1832.9218   2450.          0.748131    0.479411]
    # [   7.135149    1.310972   50.020126   44.779854    1.          0.999991 1659.3965   1892.          0.877059    0.56203 ]
    # [  13.401865    0.         62.258957   46.636948    1.          0.999971 1540.4974   2303.          0.668909    0.428645]
    # [   0.          0.         66.42349    56.123024    1.          0.999908 1925.3267   3696.          0.520922    0.333813]
    # [   0.          0.         40.78952    60.404335    1.          0.999833 1531.321    2460.          0.622488    0.398898]
    # 
    #  to change the source, change the following line gauss_norm <--> gauss_sum
    #---------------------------------------------------------------------------------------------------------------------------
    temp = tf.expand_dims(gauss_norm, axis =2)
    temp = tf.tile(temp, [1,1, rois_per_image ,1,1])
    temp_shape   = KB.int_shape(temp)
    temp_reshape = KB.reshape(temp, (-1, temp_shape[-2], temp_shape[-1]))
    print('    heatmap original shape  : ', gauss_norm.shape)
    print('    heatmap replicated      : ', temp_shape)
    print('    heatmap flattened       : ', temp_reshape.shape)

    scores = tf.map_fn(build_mask_routine, [temp_reshape, bboxes], dtype=tf.float32)


    # consider the two new columns for reshaping the gaussian_bbox_scores
    new_shape   = tf.shape(in_tensor)+ [0,0,0, tf.shape(scores)[-1]]        
    bbox_scores = tf.concat([in_tensor_flattened, scores], axis = -1)
    bbox_scores = tf.reshape(bbox_scores, new_shape)
    # print('    new shape is            : ', new_shape.eval())
    print('    in_tensor_flattened     : ', in_tensor_flattened.shape)
    print('    Scores shape            : ', scores.shape)   # [(num_batches x num_class x num_rois ), 3]
    print('    boxes_scores (rehspaed) : ', bbox_scores.shape)    

    ##--------------------------------------------------------------------------------------------
    ## Normalize computed score above, and add it to the heatmap_score tensor as last column
    ##--------------------------------------------------------------------------------------------
    scr_L2norm   = tf.nn.l2_normalize(bbox_scores[...,-1], axis = -1)   # shape (num_imgs, num_class, num_rois)
    scr_L2norm   = tf.expand_dims(scr_L2norm, axis = -1)

    ##--------------------------------------------------------------------------------------------
    # shape of tf.reduce_max(bbox_scores[...,-1], axis = -1, keepdims=True) is (num_imgs, num_class, 1)
    #  This is a regular normalization that moves everything between [0, 1]. 
    #  This causes negative values to move to -inf, which is a problem in FCN scoring. 
    # To address this a normalization between [-1 and +1] was introduced in FCN.
    # Not sure how this will work with training tho.
    ##--------------------------------------------------------------------------------------------
    scr_norm     = bbox_scores[...,-1]/ tf.reduce_max(bbox_scores[...,-1], axis = -1, keepdims=True)
    scr_norm     = tf.where(tf.is_nan(scr_norm),  tf.zeros_like(scr_norm), scr_norm)     
    
    #--------------------------------------------------------------------------------------------
    # this normalization moves values to [-1, +1] which we use in FCN, but not here. 
    #--------------------------------------------------------------------------------------------    
    # reduce_max = tf.reduce_max(bbox_scores[...,-1], axis = -1, keepdims=True)
    # reduce_min = tf.reduce_min(bbox_scores[...,-1], axis = -1, keepdims=True)  ## epsilon    = tf.ones_like(reduce_max) * 1e-7
    # scr_norm  = (2* (bbox_scores[...,-1] - reduce_min) / (reduce_max - reduce_min)) - 1     

    scr_norm     = tf.where(tf.is_nan(scr_norm),  tf.zeros_like(scr_norm), scr_norm)  
    scr_norm     = tf.expand_dims(scr_norm, axis = -1)                             # shape (num_imgs, num_class, 32, 1)
    bbox_scores  = tf.concat([bbox_scores, scr_norm, scr_L2norm], axis = -1)
    
    gauss_heatmap        = KB.identity(tf.transpose(gauss_sum,[0,2,3,1]), name = names[0])
    gauss_heatmap_norm   = KB.identity(tf.transpose(gauss_norm,[0,2,3,1]), name = names[0]+'_norm')
    gauss_heatmap_L2norm = KB.identity(tf.transpose(gauss_L2norm,[0,2,3,1]), name = names[0]+'_L2norm')
    gauss_scores         = KB.identity(bbox_scores, name = names[0]+'_scores') 
    
    print('    gauss_heatmap final shape : ', gauss_heatmap.shape   ,' Keras tensor ', KB.is_keras_tensor(gauss_heatmap) )  
    print('    gauss_scores  final shape : ', gauss_scores.shape ,' Keras tensor ', KB.is_keras_tensor(gauss_scores) )  
    print('    complete')

    return   gauss_heatmap_norm, gauss_scores, gauss_heatmap,gauss_heatmap_L2norm    # [gauss_sum, gauss_scatt, means, covar]    
    y = K.tf.matmul(mat_x, mat_x, transpose_b=True)
    return y


def multiply(x, n=100):
    x_prime = tf.reshape(x, (-1, n, 5))
    x_transpose = tf.transpose(x_prime, perm=[0, 2, 1])
    return tf.matmul(x_prime, x_transpose)


for i in range(0, 10):
    mat_x = x[:, :, :, i]
    final[i] = Lambda(lambda x: multiply(x, n=100), output_shape=(100, 100))(
        mat_x)  #Lambda( matmul,output_shape= (-1,100, 100,1) ) (mat_x)
    #final[i] =  K.dot(mat_x,K.permute_dimensions(mat_x,(0,2,1)))
    final[i] = K.reshape(final[i], (-1, 100, 100, 1))

y = merge([final[idx] for idx in final], mode='concat', concat_axis=3)
#y = Reshape((100,100,10))(y)
z = Activation('relu')(y)
model = Model([seq_input, ss_input], z)

import tensorflow as tf
sess = K.get_session()
q = K.eval
from keras import backend as K
#K.set_session(sess)
with sess.as_default():
    x = [[1, 1], [3, 4], [5, 6]]
    z = tf.Variable(x)
    z2 = K.reshape(z, (6, 2))
Example #57
0
    def get_initial_state(self, x):
        input_shape = self.input_spec[0].shape
        init_nb_row = input_shape[self.row_axis]
        init_nb_col = input_shape[self.column_axis]

        base_initial_state = K.zeros_like(
            x)  # (samples, timesteps) + image_shape
        non_channel_axis = -1 if self.data_format == 'channels_first' else -2
        for _ in range(2):
            base_initial_state = K.sum(base_initial_state,
                                       axis=non_channel_axis)
        base_initial_state = K.sum(base_initial_state,
                                   axis=1)  # (samples, nb_channels)

        initial_states = []
        states_to_pass = ['r', 'c', 'a']
        nlayers_to_pass = {u: self.nb_layers for u in states_to_pass}
        if self.extrap_start_time is not None:
            states_to_pass.append(
                'ahat'
            )  # pass prediction in states so can use as actual for t+1 when extrapolating
            nlayers_to_pass['ahat'] = 1
        for u in states_to_pass:
            ds_factor = 1
            for l in range(nlayers_to_pass[u]):
                nb_row = init_nb_row // ds_factor
                nb_col = init_nb_col // ds_factor
                if l < self.nb_layers - 1:
                    ds_factor *= self.upsample_size[l]
                if u in ['r', 'c']:
                    stack_size = self.R_stack_sizes[l]
                elif u == 'a':
                    stack_size = self.stack_sizes[l]
                elif u == 'ahat':
                    stack_size = self.stack_sizes[l]
                output_size = stack_size * nb_row * nb_col  # flattened size

                reducer = K.zeros((input_shape[self.channel_axis],
                                   output_size))  # (nb_channels, output_size)
                initial_state = K.dot(base_initial_state,
                                      reducer)  # (samples, output_size)
                if self.data_format == 'channels_first':
                    output_shp = (-1, stack_size, nb_row, nb_col)
                else:
                    output_shp = (-1, nb_row, nb_col, stack_size)
                initial_state = K.reshape(initial_state, output_shp)
                initial_states += [initial_state]

        if K._BACKEND == 'theano':
            from theano import tensor as T
            # There is a known issue in the Theano scan op when dealing with inputs whose shape is 1 along a dimension.
            # In our case, this is a problem when training on grayscale images, and the below line fixes it.
            initial_states = [
                T.unbroadcast(init_state, 0, 1)
                for init_state in initial_states
            ]

        if self.extrap_start_time is not None:
            initial_states += [
                K.variable(0, int if K.backend() != 'tensorflow' else 'int32')
            ]  # the last state will correspond to the current timestep
        return initial_states
Example #58
0
 def call(self, inputs):
     self.in_shape = [i or -1 for i in K.int_shape(inputs)]
     if self.shape is None:
         self.shape = [-1, np.prod(self.in_shape[1:])]
     return K.reshape(inputs, self.shape)
Example #59
0
def yolo_loss(args,
              anchors,
              num_classes,
              rescore_confidence=False,
              print_loss=False):
    """YOLO localization loss function.

    Parameters
    ----------
    yolo_output : tensor
        Final convolutional layer features.

    true_boxes : tensor
        Ground truth boxes tensor with shape [batch, num_true_boxes, 5]
        containing box x_center, y_center, width, height, and class.

    detectors_mask : array
        0/1 mask for detector positions where there is a matching ground truth.

    matching_true_boxes : array
        Corresponding ground truth boxes for positive detector positions.
        Already adjusted for conv height and width.

    anchors : tensor
        Anchor boxes for model.

    num_classes : int
        Number of object classes.

    rescore_confidence : bool, default=False
        If true then set confidence target to IOU of best predicted box with
        the closest matching ground truth box.

    print_loss : bool, default=False
        If True then use a tf.Print() to print the loss components.

    Returns
    -------
    mean_loss : float
        mean localization loss across minibatch
    """
    (yolo_output, true_boxes, detectors_mask, matching_true_boxes) = args
    num_anchors = len(anchors)
    object_scale = 5
    no_object_scale = 1
    class_scale = 1
    coordinates_scale = 1
    pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head(
        yolo_output, anchors, num_classes)

    # Unadjusted box predictions for loss.
    # TODO: Remove extra computation shared with yolo_head.
    yolo_output_shape = K.shape(yolo_output)
    feats = K.reshape(yolo_output, [
        -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors,
        num_classes + 5
    ])
    pred_boxes = K.concatenate((K.sigmoid(feats[..., 0:2]), feats[..., 2:4]),
                               axis=-1)

    # TODO: Adjust predictions by image width/height for non-square images?
    # IOUs may be off due to different aspect ratio.

    # Expand pred x,y,w,h to allow comparison with ground truth.
    # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
    pred_xy = K.expand_dims(pred_xy, 4)
    pred_wh = K.expand_dims(pred_wh, 4)

    pred_wh_half = pred_wh / 2.
    pred_mins = pred_xy - pred_wh_half
    pred_maxes = pred_xy + pred_wh_half

    true_boxes_shape = K.shape(true_boxes)

    # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
    true_boxes = K.reshape(true_boxes, [
        true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2]
    ])
    true_xy = true_boxes[..., 0:2]
    true_wh = true_boxes[..., 2:4]

    # Find IOU of each predicted box with each ground truth box.
    true_wh_half = true_wh / 2.
    true_mins = true_xy - true_wh_half
    true_maxes = true_xy + true_wh_half

    intersect_mins = K.maximum(pred_mins, true_mins)
    intersect_maxes = K.minimum(pred_maxes, true_maxes)
    intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

    pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
    true_areas = true_wh[..., 0] * true_wh[..., 1]

    union_areas = pred_areas + true_areas - intersect_areas
    iou_scores = intersect_areas / union_areas

    # Best IOUs for each location.
    best_ious = K.max(iou_scores, axis=4)  # Best IOU scores.
    best_ious = K.expand_dims(best_ious)

    # A detector has found an object if IOU > thresh for some true box.
    object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious))

    # TODO: Darknet region training includes extra coordinate loss for early
    # training steps to encourage predictions to match anchor priors.

    # Determine confidence weights from object and no_object weights.
    # NOTE: YOLO does not use binary cross-entropy here.
    no_object_weights = (no_object_scale * (1 - object_detections) *
                         (1 - detectors_mask))
    no_objects_loss = no_object_weights * K.square(-pred_confidence)

    if rescore_confidence:
        objects_loss = (object_scale * detectors_mask *
                        K.square(best_ious - pred_confidence))
    else:
        objects_loss = (object_scale * detectors_mask *
                        K.square(1 - pred_confidence))
    confidence_loss = objects_loss + no_objects_loss

    # Classification loss for matching detections.
    # NOTE: YOLO does not use categorical cross-entropy loss here.
    matching_classes = K.cast(matching_true_boxes[..., 4], 'int32')
    matching_classes = K.one_hot(matching_classes, num_classes)
    classification_loss = (class_scale * detectors_mask *
                           K.square(matching_classes - pred_class_prob))

    # Coordinate loss for matching detection boxes.
    matching_boxes = matching_true_boxes[..., 0:4]
    coordinates_loss = (coordinates_scale * detectors_mask *
                        K.square(matching_boxes - pred_boxes))

    confidence_loss_sum = K.sum(confidence_loss)
    classification_loss_sum = K.sum(classification_loss)
    coordinates_loss_sum = K.sum(coordinates_loss)
    total_loss = 0.5 * (confidence_loss_sum + classification_loss_sum +
                        coordinates_loss_sum)
    if print_loss:
        total_loss = tf.Print(
            total_loss, [
                total_loss, confidence_loss_sum, classification_loss_sum,
                coordinates_loss_sum
            ],
            message='yolo_loss, conf_loss, class_loss, box_coord_loss:')

    return total_loss
Example #60
0
def yolo_head(feats, anchors, num_classes):
    """Convert final layer features to bounding box parameters.

    Parameters
    ----------
    feats : tensor
        Final convolutional layer features.
    anchors : array-like
        Anchor box widths and heights.
    num_classes : int
        Number of target classes.

    Returns
    -------
    box_xy : tensor
        x, y box predictions adjusted by spatial location in conv layer.
    box_wh : tensor
        w, h box predictions adjusted by anchors and conv spatial resolution.
    box_conf : tensor
        Probability estimate for whether each box contains any object.
    box_class_pred : tensor
        Probability distribution estimate for each box over class labels.
    """
    num_anchors = len(anchors)
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2])
    # Static implementation for fixed models.
    # TODO: Remove or add option for static implementation.
    # _, conv_height, conv_width, _ = K.int_shape(feats)
    # conv_dims = K.variable([conv_width, conv_height])

    # Dynamic implementation of conv dims for fully convolutional model.
    conv_dims = K.shape(feats)[1:3]  # assuming channels last
    # In YOLO the height index is the inner most iteration.
    conv_height_index = K.arange(0, stop=conv_dims[0])
    conv_width_index = K.arange(0, stop=conv_dims[1])
    conv_height_index = K.tile(conv_height_index, [conv_dims[1]])

    # TODO: Repeat_elements and tf.split doesn't support dynamic splits.
    # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0)
    conv_width_index = K.tile(K.expand_dims(conv_width_index, 0),
                              [conv_dims[0], 1])
    conv_width_index = K.flatten(K.transpose(conv_width_index))
    conv_index = K.transpose(K.stack([conv_height_index, conv_width_index]))
    conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2])
    conv_index = K.cast(conv_index, K.dtype(feats))

    feats = K.reshape(
        feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5])
    conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats))

    # Static generation of conv_index:
    # conv_index = np.array([_ for _ in np.ndindex(conv_width, conv_height)])
    # conv_index = conv_index[:, [1, 0]]  # swap columns for YOLO ordering.
    # conv_index = K.variable(
    #     conv_index.reshape(1, conv_height, conv_width, 1, 2))
    # feats = Reshape(
    #     (conv_dims[0], conv_dims[1], num_anchors, num_classes + 5))(feats)

    box_confidence = K.sigmoid(feats[..., 4:5])
    box_xy = K.sigmoid(feats[..., :2])
    box_wh = K.exp(feats[..., 2:4])
    box_class_probs = K.softmax(feats[..., 5:])

    # Adjust preditions to each spatial grid point and anchor size.
    # Note: YOLO iterates over height index before width index.
    box_xy = (box_xy + conv_index) / conv_dims
    box_wh = box_wh * anchors_tensor / conv_dims

    return box_confidence, box_xy, box_wh, box_class_probs