def call(self, x, mask=None):
        b, xb = 0., 0.
        if self.data_format == 'channels_first':
            kernel_sum_axes = [1, 2, 3]
            if self.use_bias:
                b = K.reshape(self.b, (self.filters, 1, 1, 1))
                xb = 1.
        elif self.data_format == 'channels_last':
            kernel_sum_axes = [0, 1, 2]
            if self.use_bias:
                b = K.reshape(self.b, (1, 1, 1, self.filters))
                xb = 1.

        Wnorm = K.sqrt(
            K.sum(K.square(self.W), axis=kernel_sum_axes, keepdims=True) +
            K.square(b) + K.epsilon())
        xnorm = K.sqrt(
            K.conv2d(K.square(x),
                     self.kernel_norm,
                     strides=self.strides,
                     padding=self.padding,
                     data_format=self.data_format,
                     filter_shape=self.kernel_norm_shape) + xb + K.epsilon())

        W = self.W / Wnorm

        output = K.conv2d(x,
                          W,
                          strides=self.strides,
                          padding=self.padding,
                          data_format=self.data_format,
                          filter_shape=self.kernel_shape)

        if K.backend() == 'theano':
            xnorm = K.pattern_broadcast(xnorm, [False, True, False, False])

        output /= xnorm

        if self.use_bias:
            b /= Wnorm
            if self.data_format == 'channels_first':
                b = K.reshape(b, (1, self.filters, 1, 1))
            elif self.data_format == 'channels_last':
                b = K.reshape(b, (1, 1, 1, self.filters))
            else:
                raise ValueError('Invalid data_format:', self.data_format)
            b /= xnorm
            output += b
        output = self.activation(output)
        return output
Beispiel #2
0
    def set_output(self, X, train=False):

        input_shape = (self.batch_size, self.num_lstm)
        reduction_axes = list(range(len(input_shape)))
        del reduction_axes[self.axis]
        broadcast_shape = [1] * len(input_shape)
        broadcast_shape[self.axis] = input_shape[self.axis]
        if train:
            m = K.mean(X, axis=reduction_axes)
            brodcast_m = K.reshape(m, broadcast_shape)
            std = K.mean(K.square(X - brodcast_m) + self.epsilon,
                         axis=reduction_axes)
            std = K.sqrt(std)
            brodcast_std = K.reshape(std, broadcast_shape)
            mean_update = self.momentum * self.running_mean + (
                1 - self.momentum) * m
            std_update = self.momentum * self.running_std + (
                1 - self.momentum) * std
            self.updates = [(self.running_mean, mean_update),
                            (self.running_std, std_update)]
            X_normed = (X - brodcast_m) / (brodcast_std + self.epsilon)
        else:
            brodcast_m = K.reshape(self.running_mean, broadcast_shape)
            brodcast_std = K.reshape(self.running_std, broadcast_shape)
            X_normed = ((X - brodcast_m) / (brodcast_std + self.epsilon))
        out = K.reshape(self.gamma, broadcast_shape) * X_normed + K.reshape(
            self.beta, broadcast_shape)

        return out
Beispiel #3
0
    def set_output(self, X, train=False):

        input_shape = (self.batch_size, self.num_lstm)
        reduction_axes = list(range(len(input_shape)))
        del reduction_axes[self.axis]
        broadcast_shape = [1] * len(input_shape)
        broadcast_shape[self.axis] = input_shape[self.axis]
        if train:
            m = K.mean(X, axis=reduction_axes)
            brodcast_m = K.reshape(m, broadcast_shape)
            std = K.mean(K.square(X - brodcast_m) + self.epsilon, axis=reduction_axes)
            std = K.sqrt(std)
            brodcast_std = K.reshape(std, broadcast_shape)
            mean_update = self.momentum * self.running_mean + (1-self.momentum) * m
            std_update = self.momentum * self.running_std + (1-self.momentum) * std
            self.updates = [(self.running_mean, mean_update), (self.running_std, std_update)]
            X_normed = (X - brodcast_m) / (brodcast_std + self.epsilon)
        else:
            brodcast_m = K.reshape(self.running_mean, broadcast_shape)
            brodcast_std = K.reshape(self.running_std, broadcast_shape)
            X_normed = ((X - brodcast_m) /
                            (brodcast_std + self.epsilon))
        out = K.reshape(self.gamma, broadcast_shape) * X_normed + K.reshape(self.beta, broadcast_shape)

        return out
Beispiel #4
0
    def get_updates(self, params, loss):
        grads = self.get_gradients(loss, params)
        self.updates = [(self.iterations, self.iterations+1.)]

        t = self.iterations + 1
        beta_2t = K.sqrt(1 - K.pow(self.beta_2, t))
        lr_t = self.lr * beta_2t / (1 - K.pow(self.beta_1, t))

        for p, g, m, v in zip(params, grads, self.m, self.v):

            beta_1t = self.beta_1 * K.pow(self.lda, t-1)
            m_t = (beta_1t * m) + (1 - beta_1t) * g
            v_t = (self.beta_2 * v) + (1 - self.beta_2) * K.square(g)
            p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon * beta_2t)

            self.updates.append((m, m_t))
            self.updates.append((v, v_t))
            self.updates.append((p, p_t))
        return self.updates
Beispiel #5
0
    def get_updates(self, params, loss):
        grads = self.get_gradients(loss, params)
        self.updates = [(self.iterations, self.iterations + 1.)]

        t = self.iterations + 1
        beta_2t = K.sqrt(1 - K.pow(self.beta_2, t))
        lr_t = self.lr * beta_2t / (1 - K.pow(self.beta_1, t))

        for p, g, m, v in zip(params, grads, self.m, self.v):

            beta_1t = self.beta_1 * K.pow(self.lda, t - 1)
            m_t = (beta_1t * m) + (1 - beta_1t) * g
            v_t = (self.beta_2 * v) + (1 - self.beta_2) * K.square(g)
            p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon * beta_2t)

            self.updates.append((m, m_t))
            self.updates.append((v, v_t))
            self.updates.append((p, p_t))
        return self.updates
Beispiel #6
0
def batchnorm(X,
              batch_size,
              hidden_dim,
              gamma,
              beta,
              running_mean,
              running_std,
              epsilon=1e-10,
              axis=1,
              momentum=0.99,
              train=False):

    X = K.reshape(X, (batch_size, hidden_dim))
    input_shape = (batch_size, hidden_dim)  # (1, 512)
    reduction_axes = list(range(len(input_shape)))  # [0, 1]
    del reduction_axes[axis]  # [0]
    broadcast_shape = [1] * len(input_shape)  # [1, 1]
    broadcast_shape[axis] = input_shape[axis]  # [1, 512]
    if train:
        m = K.mean(
            X, axis=reduction_axes
        )  # m.shape = (1, 512), note that if matrix is 1-d then mean function will return one number even if axis=0
        brodcast_m = K.reshape(m, broadcast_shape)  # m.shape = (1, 512)
        std = K.mean(K.square(X - brodcast_m) + epsilon,
                     axis=reduction_axes)  # batchnormed m(m**2)
        std = K.sqrt(std)  # batchnormed m, (1, 512)
        brodcast_std = K.reshape(std, broadcast_shape)  # (1, 512)
        mean_update = momentum * running_mean + (1 - momentum) * m  # (1, 512)
        std_update = momentum * running_std + (1 - momentum) * std  # (1, 512)
        X_normed = (X - brodcast_m) / (brodcast_std + epsilon)  # (1, 512)
    else:
        brodcast_m = K.reshape(running_mean, broadcast_shape)
        brodcast_std = K.reshape(running_std, broadcast_shape)
        X_normed = ((X - brodcast_m) / (brodcast_std + epsilon))
    out = K.reshape(gamma, broadcast_shape) * X_normed + K.reshape(
        beta, broadcast_shape)  # (1, 512)

    return out, mean_update, std_update
Beispiel #7
0
 def __call__(self, loss):
     output = self.layer.get_output(True)
     loss += self.l1 * K.sum(K.mean(K.abs(output), axis=0))
     loss += self.l2 * K.sum(K.mean(K.square(output), axis=0))
     return loss
Beispiel #8
0
 def __call__(self, loss):
     loss += K.sum(K.abs(self.p)) * self.l1 / 2.
     loss += K.sum(K.square(self.p)) * self.l2 / 2.
     return loss
Beispiel #9
0
    def CompileAndUpdate(self, Params):
        self.regularizerS = []
        # for par_name, par_value in Params:
        #     regularizer = regularizers.WeightRegularizer(l1=0., l2=self.options['l2_decay'])
        #     regularizer.set_param(par_value.get_value())
        #     self.regularizerS.append(regularizer)

        weight = self.options['weight']

        fea = T.tensor4(name='input_features', dtype=theano.config.floatX)
        att = T.tensor4(name='input_att', dtype='float32')
        pos_fea = T.tensor4(name='pos_fea', dtype='float32')
        pos_att = T.tensor4(name='pos_att', dtype='float32')
        neg_fea = T.tensor4(name='pos_fea', dtype='float32')
        neg_att = T.tensor4(name='neg_att', dtype='float32')
        TT = [fea, att, pos_fea, pos_att, neg_fea, neg_att]

        LSTM = lstm_simple(fea, att, self.options, Params)
        LSTMproj = LSTM.set_output()
        LSTMC = ComputeCode(fea, self.options, Params, LSTMproj)
        frame, featurepart = LSTMC.set_output()

        LSTM_pos = lstm_simple(pos_fea, pos_att, self.options, Params)
        LSTMproj_pos = LSTM_pos.set_output()
        LSTMC_pos = ComputeCode(pos_fea, self.options, Params, LSTMproj_pos)
        frame_pos, featurepart_pos = LSTMC_pos.set_output()

        LSTM_neg = lstm_simple(neg_fea, neg_att, self.options, Params)
        LSTMproj_neg = LSTM_neg.set_output()
        LSTMC_neg = ComputeCode(neg_fea, self.options, Params, LSTMproj_neg)
        frame_neg, featurepart_neg = LSTMC_neg.set_output()

        self.params = LSTM.get_Params()

        steps = self.options['steps']
        self.loss_1 = self.loss2 = self.loss_3 = 0.
        loss = 0.
        # for i in range(self.options['batch_size']):
        AA = K.sigmoid(frame)
        BB = K.sigmoid(frame_pos)
        CC = K.sigmoid(frame_neg)

        Code = AA * featurepart
        Code_pos = BB * featurepart_pos
        Code_neg = CC * featurepart_neg

        Code_ = (Code >= 0).astype('float32')
        Code_pos_ = (Code_pos >= 0).astype('float32')
        Code_neg_ = (Code_neg >= 0).astype('float32')
        # Code = Code / T.sqrt(T.sum(T.sqr(featurepart)))
        # Code_pos = Code_pos / T.sqrt(T.sum(T.sqr(Code_pos)))
        # Code_neg = Code_neg / T.sqrt(T.sum(T.sqr(Code_neg)))

        self.loss2 = T.max(
            (0, 2. - T.sqrt(T.sum(T.sqr(Code - Code_neg))) / 32. +
             T.sqrt(T.sum(T.sqr(Code - Code_pos))) / 32.))
        for i in range(32):
            self.loss_3 += T.max(
                (0, 2. - T.sqrt(T.sum(T.sqr(Code_[0][i] - Code_neg_[0][i]))) +
                 T.sqrt(T.sum(T.sqr(Code_[0][i] - Code_pos_[0][i])))))

        loss = self.loss2 + 0.1 * self.loss_3

        for par in Params.values():
            loss += K.sum(K.square(par)) * self.options['l2_decay'] / 2.
        # def Regularize(Params):
        #   for par_name, par_value in Params:
        #       Params[par_name] += self.options['l2_decay'] * K.sum(K.mean(K.square(par_value.get_value()), axis=0))
        #     return Params
        # Params = Regularize(Params)

        # opt = optimizer.Adam(self.params, lr=self.options['lrate'])
        # updates = opt.get_updates(self.params, loss)

        # train_graph = theano.function([fea, att, pos_fea, pos_att, neg_fea, neg_att], loss, on_unused_input='warn', allow_input_downcast=True)
        # self.test_graph = theano.function([fea, att, pos_fea, pos_att, neg_fea, neg_att], loss, on_unused_input='warn')
        my_H_last = Code
        encoder = theano.function(
            [fea, att, pos_fea, pos_att, neg_fea, neg_att],
            my_H_last,
            on_unused_input='ignore',
            allow_input_downcast=True)

        return loss, fea, att, pos_fea, pos_att, neg_fea, neg_att, Code, encoder
Beispiel #10
0
    def step(self, cell_p, hid_p, mean_p, std_p):

        embed = T.reshape(T.dot(self.attribute[:, 0], self.params['W_ctx_3']),
                          [self.batch_size, 10])
        hidP = T.dot(hid_p, self.params['W_ctx_2'])  # (25, 10)
        embedd = T.repeat(self.params['W_ctx_1'], self.batch_size, 0) * T.tanh(
            embed + hidP +
            T.repeat(self.params['b_ctx'], self.batch_size, 0))  # (25, 10)
        alpha_base = T.reshape(T.exp(embedd),
                               [self.batch_size, 10, 1])  # (25, 10, 1)
        alpha_base = alpha_base / alpha_base.sum()
        att = T.reshape(self.attribute[:, 0],
                        [self.batch_size, 10, self.att_frame])
        ctx = (alpha_base * att /
               T.reshape(alpha_base.sum(axis=1), [self.batch_size, 1, 1])).sum(
                   axis=1)  # (25, 300)
        ctx = T.reshape(ctx, [self.batch_size, self.att_frame])
        # ctx += T.dot(hid_p, self.params['W_att']) + T.repeat(self.params['b_att'], self.batch_size, 0)

        input_to = T.dot(ctx, self.params['W_in']) + T.repeat(
            self.params['b'], self.batch_size, 0)  # (25, 2048)
        # input_to_i = T.dot(ctx, self.params['W_in_i']) + T.repeat(self.params['b_i'], self.batch_size, 0)
        # input_to_f = T.dot(ctx, self.params['W_in_f']) + T.repeat(self.params['b_f'], self.batch_size, 0)
        # input_to_o = T.dot(ctx, self.params['W_in_o']) + T.repeat(self.params['b_o'], self.batch_size, 0)
        # input_to_c = T.dot(ctx, self.params['W_in_c']) + T.repeat(self.params['b_c'], self.batch_size, 0)
        gate = input_to + T.dot(hid_p, self.params['W_hid'])
        # gate_i = input_to_i + T.dot(hid_p, self.params['W_hid_i'])
        # gate_f = input_to_f + T.dot(hid_p, self.params['W_hid_f'])
        # gate_o = input_to_o + T.dot(hid_p, self.params['W_hid_o'])
        # gate_c = input_to_c + T.dot(hid_p, self.params['W_hid_c'])

        # Apply nonlinearities
        ingate = T.nnet.sigmoid(
            self._slice(gate, 0, self.hidden_dim) +
            cell_p * T.repeat(self.params['W_cell'][0], self.batch_size, 0))
        forgetgate = T.nnet.sigmoid(
            self._slice(gate, 1, self.hidden_dim) +
            cell_p * T.repeat(self.params['W_cell'][1], self.batch_size, 0))
        cell_input = T.tanh(self._slice(gate, 2, self.hidden_dim))

        # Compute new cell value
        cell = forgetgate * cell_p + ingate * cell_input

        # BatchNormalization
        input_shape = (self.batch_size, self.hidden_dim)  # (1, 512)
        cell = K.reshape(cell, input_shape)
        reduction_axes = list(range(len(input_shape)))  # [0, 1]
        del reduction_axes[self.axis_bn]  # [0]
        broadcast_shape = [1] * len(input_shape)  # [1, 1]
        broadcast_shape[self.axis_bn] = input_shape[self.axis_bn]  # [1, 512]
        # m = K.mean(cell, axis=reduction_axes) # m.shape = (1, 512), note that if matrix is 1-d then mean function will return one number even if axis=0
        m = K.mean(cell, axis=0)
        brodcast_m = K.reshape(m, [1, self.hidden_dim])  # m.shape = (1, 512)
        # brodcast_m = m
        std = K.mean(K.square(cell - brodcast_m) + self.epsilon,
                     axis=reduction_axes)  # batchnormed m(m**2)
        std = K.sqrt(std)  # batchnormed m, (1, 512)
        brodcast_std = K.reshape(std, broadcast_shape)  # (1, 512)
        mean_update = self.momentum * mean_p + (1 -
                                                self.momentum) * m  # (1, 512)
        std_update = self.momentum * std_p + (1 -
                                              self.momentum) * std  # (1, 512)
        cell_normed = (cell - brodcast_m) / (brodcast_std + self.epsilon
                                             )  # (1, 512)
        cell_bn = K.reshape(
            self.params['gamma'], broadcast_shape) * cell_normed + K.reshape(
                self.params['beta'], broadcast_shape)  # (1, 512)

        # cell_bn, mean, std = batchnorm(cell, self.batch_size, self.hidden_dim, self.params['gamma'], self.params['beta'], mean_p, std_p, train=True)

        outgate = T.nnet.sigmoid(
            self._slice(gate, 3, self.hidden_dim) +
            cell_bn * T.repeat(self.params['W_cell'][2], self.batch_size, 0))

        # Compute new hidden unit activation
        hid = outgate * T.tanh(cell_bn)
        return T.reshape(
            cell_bn, [self.batch_size, self.hidden_dim]), T.reshape(
                hid,
                [self.batch_size, self.hidden_dim]), mean_update, std_update
Beispiel #11
0
 def __call__(self, loss):
     output = self.layer.get_output(True)
     loss += self.l1 * K.sum(K.mean(K.abs(output), axis=0))
     loss += self.l2 * K.sum(K.mean(K.square(output), axis=0))
     return loss
Beispiel #12
0
 def __call__(self, loss):
     loss += K.sum(K.abs(self.p)) * self.l1 / 2.
     loss += K.sum(K.square(self.p)) * self.l2 / 2.
     return loss