Exemple #1
0
    def get_updates(self, params, gparams):
        self._ms_ = []
        self._vs_ = []
        for param in params:    
            self._ms_ += [K.shared(np.zeros_like(param.get_value()))]
            self._vs_ += [K.shared(np.zeros_like(param.get_value()))]

        updates = []
        
        t = self._iter_ + 1
        alpha_t = self._alpha_ * (K.sqrt(1. - K.power(self._beta2_, t)) / (1. - K.power(self._beta1_, t)))
        
        for p, g, m, v in zip(params, gparams, self._ms_, self._vs_):
            m_new = self._beta1_ * m + (1. - self._beta1_) * g
            updates.append((m, m_new))
            
            v_new = self._beta2_ * v + (1. - self._beta2_) * K.sqr(g)
            updates.append((v, v_new))
            
            p_new = p - alpha_t * m_new / (K.sqrt(v_new) + self._eps_)
            updates.append((p, p_new))
            
        updates.append((self._iter_, self._iter_ + 1))
        
        return updates
    def call(self, x, mask=None):
        b, xb = 0., 0.
        if self.data_format == 'channels_first':
            kernel_sum_axes = [1, 2, 3]
            if self.use_bias:
                b = K.reshape(self.b, (self.filters, 1, 1, 1))
                xb = 1.
        elif self.data_format == 'channels_last':
            kernel_sum_axes = [0, 1, 2]
            if self.use_bias:
                b = K.reshape(self.b, (1, 1, 1, self.filters))
                xb = 1.

        Wnorm = K.sqrt(
            K.sum(K.square(self.W), axis=kernel_sum_axes, keepdims=True) +
            K.square(b) + K.epsilon())
        xnorm = K.sqrt(
            K.conv2d(K.square(x),
                     self.kernel_norm,
                     strides=self.strides,
                     padding=self.padding,
                     data_format=self.data_format,
                     filter_shape=self.kernel_norm_shape) + xb + K.epsilon())

        W = self.W / Wnorm

        output = K.conv2d(x,
                          W,
                          strides=self.strides,
                          padding=self.padding,
                          data_format=self.data_format,
                          filter_shape=self.kernel_shape)

        if K.backend() == 'theano':
            xnorm = K.pattern_broadcast(xnorm, [False, True, False, False])

        output /= xnorm

        if self.use_bias:
            b /= Wnorm
            if self.data_format == 'channels_first':
                b = K.reshape(b, (1, self.filters, 1, 1))
            elif self.data_format == 'channels_last':
                b = K.reshape(b, (1, 1, 1, self.filters))
            else:
                raise ValueError('Invalid data_format:', self.data_format)
            b /= xnorm
            output += b
        output = self.activation(output)
        return output
Exemple #3
0
    def get_updates(self, params, gparams):
        if not self._ms:
            for param in params:
                self._ms += [K.shared(np.zeros_like(param.get_value()))]
                self._vs += [K.shared(np.zeros_like(param.get_value()))]

        update_params = []
        update_ms = []
        update_vs = []

        for i1 in xrange(len(params)):
            m_new = self._beta1 * self._ms[i1] + (1 -
                                                  self._beta1) * gparams[i1]
            v_new = self._beta2 * self._vs[i1] + (1 -
                                                  self._beta2) * gparams[i1]**2
            m_unbias = m_new / (1 - K.power(self._beta1, self._epoch))
            v_unbias = v_new / (1 - K.power(self._beta2, self._epoch))
            param_new = params[i1] - self._alpha * m_unbias / (
                K.sqrt(v_unbias) + self._eps)
            update_ms += [(self._ms[i1], m_new)]
            update_vs += [(self._vs[i1], v_new)]
            update_params += [(params[i1], param_new)]

        update_epoch = [(self._epoch, self._epoch + 1.)]

        updates = update_params + update_ms + update_vs + update_epoch
        return updates
Exemple #4
0
    def set_output(self, X, train=False):

        input_shape = (self.batch_size, self.num_lstm)
        reduction_axes = list(range(len(input_shape)))
        del reduction_axes[self.axis]
        broadcast_shape = [1] * len(input_shape)
        broadcast_shape[self.axis] = input_shape[self.axis]
        if train:
            m = K.mean(X, axis=reduction_axes)
            brodcast_m = K.reshape(m, broadcast_shape)
            std = K.mean(K.square(X - brodcast_m) + self.epsilon,
                         axis=reduction_axes)
            std = K.sqrt(std)
            brodcast_std = K.reshape(std, broadcast_shape)
            mean_update = self.momentum * self.running_mean + (
                1 - self.momentum) * m
            std_update = self.momentum * self.running_std + (
                1 - self.momentum) * std
            self.updates = [(self.running_mean, mean_update),
                            (self.running_std, std_update)]
            X_normed = (X - brodcast_m) / (brodcast_std + self.epsilon)
        else:
            brodcast_m = K.reshape(self.running_mean, broadcast_shape)
            brodcast_std = K.reshape(self.running_std, broadcast_shape)
            X_normed = ((X - brodcast_m) / (brodcast_std + self.epsilon))
        out = K.reshape(self.gamma, broadcast_shape) * X_normed + K.reshape(
            self.beta, broadcast_shape)

        return out
Exemple #5
0
    def set_output(self, X, train=False):

        input_shape = (self.batch_size, self.num_lstm)
        reduction_axes = list(range(len(input_shape)))
        del reduction_axes[self.axis]
        broadcast_shape = [1] * len(input_shape)
        broadcast_shape[self.axis] = input_shape[self.axis]
        if train:
            m = K.mean(X, axis=reduction_axes)
            brodcast_m = K.reshape(m, broadcast_shape)
            std = K.mean(K.square(X - brodcast_m) + self.epsilon, axis=reduction_axes)
            std = K.sqrt(std)
            brodcast_std = K.reshape(std, broadcast_shape)
            mean_update = self.momentum * self.running_mean + (1-self.momentum) * m
            std_update = self.momentum * self.running_std + (1-self.momentum) * std
            self.updates = [(self.running_mean, mean_update), (self.running_std, std_update)]
            X_normed = (X - brodcast_m) / (brodcast_std + self.epsilon)
        else:
            brodcast_m = K.reshape(self.running_mean, broadcast_shape)
            brodcast_std = K.reshape(self.running_std, broadcast_shape)
            X_normed = ((X - brodcast_m) /
                            (brodcast_std + self.epsilon))
        out = K.reshape(self.gamma, broadcast_shape) * X_normed + K.reshape(self.beta, broadcast_shape)

        return out
Exemple #6
0
    def get_updates(self, params, loss):
        grads = self.get_gradients(loss, params)
        self.updates = [(self.iterations, self.iterations+1.)]

        t = self.iterations + 1
        beta_2t = K.sqrt(1 - K.pow(self.beta_2, t))
        lr_t = self.lr * beta_2t / (1 - K.pow(self.beta_1, t))

        for p, g, m, v in zip(params, grads, self.m, self.v):

            beta_1t = self.beta_1 * K.pow(self.lda, t-1)
            m_t = (beta_1t * m) + (1 - beta_1t) * g
            v_t = (self.beta_2 * v) + (1 - self.beta_2) * K.square(g)
            p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon * beta_2t)

            self.updates.append((m, m_t))
            self.updates.append((v, v_t))
            self.updates.append((p, p_t))
        return self.updates
Exemple #7
0
    def get_updates(self, params, loss):
        grads = self.get_gradients(loss, params)
        self.updates = [(self.iterations, self.iterations + 1.)]

        t = self.iterations + 1
        beta_2t = K.sqrt(1 - K.pow(self.beta_2, t))
        lr_t = self.lr * beta_2t / (1 - K.pow(self.beta_1, t))

        for p, g, m, v in zip(params, grads, self.m, self.v):

            beta_1t = self.beta_1 * K.pow(self.lda, t - 1)
            m_t = (beta_1t * m) + (1 - beta_1t) * g
            v_t = (self.beta_2 * v) + (1 - self.beta_2) * K.square(g)
            p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon * beta_2t)

            self.updates.append((m, m_t))
            self.updates.append((v, v_t))
            self.updates.append((p, p_t))
        return self.updates
Exemple #8
0
    def get_updates(self, params, gparams):
        self._accumulators_ = []
        self._delta_accumulators_ = []
        for param in params:
            self._accumulators_ += [K.shared(np.zeros_like(param.get_value()))]
            self._delta_accumulators_ += [K.shared(np.zeros_like(param.get_value()))]
                
        updates = []

        for p, g, a, d_a in zip(params, gparams, self._accumulators_, self._delta_accumulators_):
            a_new = self._rou_ * a + (1. - self._rou_) * K.sqr(g)
            updates.append((a, a_new))
            
            p_delta = - g * K.sqrt(d_a + self._eps_) / K.sqrt(a_new + self._eps_)
            p_new = p + p_delta
            updates.append((p, p_new))
            
            d_a_new = self._rou_ * d_a + (1. - self._rou_) * K.sqr(p_delta)
            updates.append((d_a, d_a_new))
        
        return updates
Exemple #9
0
 def get_updates(self, params, gparams):
     self._accumulators_ = []
     for param in params:
         self._accumulators_.append(K.shared(np.zeros_like(K.get_value(param))))
             
     updates = []
     
     for p, g, a in zip(params, gparams, self._accumulators_):
         a_new = a + K.sqr(g)
         p_new = p - self._lr_ * g / (K.sqrt(a_new) + self._eps_)
         updates.append((a, a_new))
         updates.append((p, p_new))
     
     return updates
Exemple #10
0
    def get_updates(self, params, gparams):
        if len(self._Gs) == 0:
            for param in params:
                self._Gs.append(K.shared(np.zeros_like(K.get_value(param))))

        update_params = []
        update_Gs = []

        for i1 in xrange(len(params)):
            G_new = self._Gs[i1] + gparams[i1]**2
            update_Gs.append((self._Gs[i1], G_new))
            update_params.append(
                (params[i1], params[i1] -
                 self._lr * gparams[i1] / K.sqrt(G_new + self._eps)))

        return update_params + update_Gs
Exemple #11
0
def batchnorm(X,
              batch_size,
              hidden_dim,
              gamma,
              beta,
              running_mean,
              running_std,
              epsilon=1e-10,
              axis=1,
              momentum=0.99,
              train=False):

    X = K.reshape(X, (batch_size, hidden_dim))
    input_shape = (batch_size, hidden_dim)  # (1, 512)
    reduction_axes = list(range(len(input_shape)))  # [0, 1]
    del reduction_axes[axis]  # [0]
    broadcast_shape = [1] * len(input_shape)  # [1, 1]
    broadcast_shape[axis] = input_shape[axis]  # [1, 512]
    if train:
        m = K.mean(
            X, axis=reduction_axes
        )  # m.shape = (1, 512), note that if matrix is 1-d then mean function will return one number even if axis=0
        brodcast_m = K.reshape(m, broadcast_shape)  # m.shape = (1, 512)
        std = K.mean(K.square(X - brodcast_m) + epsilon,
                     axis=reduction_axes)  # batchnormed m(m**2)
        std = K.sqrt(std)  # batchnormed m, (1, 512)
        brodcast_std = K.reshape(std, broadcast_shape)  # (1, 512)
        mean_update = momentum * running_mean + (1 - momentum) * m  # (1, 512)
        std_update = momentum * running_std + (1 - momentum) * std  # (1, 512)
        X_normed = (X - brodcast_m) / (brodcast_std + epsilon)  # (1, 512)
    else:
        brodcast_m = K.reshape(running_mean, broadcast_shape)
        brodcast_std = K.reshape(running_std, broadcast_shape)
        X_normed = ((X - brodcast_m) / (brodcast_std + epsilon))
    out = K.reshape(gamma, broadcast_shape) * X_normed + K.reshape(
        beta, broadcast_shape)  # (1, 512)

    return out, mean_update, std_update
Exemple #12
0
def test_knn_cpu(algorithm, dist):
    x = th.randn(8, 3).to(F.cpu())
    kg = dgl.nn.KNNGraph(3)
    if dist == 'euclidean':
        d = th.cdist(x, x).to(F.cpu())
    else:
        x = x + th.randn(1).item()
        tmp_x = x / (1e-5 + F.sqrt(F.sum(x * x, dim=1, keepdims=True)))
        d = 1 - F.matmul(tmp_x, tmp_x.T).to(F.cpu())

    def check_knn(g, x, start, end, k):
        assert g.device == x.device
        for v in range(start, end):
            src, _ = g.in_edges(v)
            src = set(src.numpy())
            i = v - start
            src_ans = set(
                th.topk(d[start:end,
                          start:end][i], k, largest=False)[1].numpy() + start)
            assert src == src_ans

    # check knn with 2d input
    g = kg(x, algorithm, dist)
    check_knn(g, x, 0, 8, 3)

    # check knn with 3d input
    g = kg(x.view(2, 4, 3), algorithm, dist)
    check_knn(g, x, 0, 4, 3)
    check_knn(g, x, 4, 8, 3)

    # check segmented knn
    kg = dgl.nn.SegmentedKNNGraph(3)
    g = kg(x, [3, 5], algorithm, dist)
    check_knn(g, x, 0, 3, 3)
    check_knn(g, x, 3, 8, 3)

    # check k > num_points
    kg = dgl.nn.KNNGraph(10)
    with pytest.warns(DGLWarning):
        g = kg(x, algorithm, dist)
    check_knn(g, x, 0, 8, 8)

    with pytest.warns(DGLWarning):
        g = kg(x.view(2, 4, 3), algorithm, dist)
    check_knn(g, x, 0, 4, 4)
    check_knn(g, x, 4, 8, 4)

    kg = dgl.nn.SegmentedKNNGraph(5)
    with pytest.warns(DGLWarning):
        g = kg(x, [3, 5], algorithm, dist)
    check_knn(g, x, 0, 3, 3)
    check_knn(g, x, 3, 8, 3)

    # check k == 0
    kg = dgl.nn.KNNGraph(0)
    with pytest.raises(DGLError):
        g = kg(x, algorithm, dist)
    kg = dgl.nn.SegmentedKNNGraph(0)
    with pytest.raises(DGLError):
        g = kg(x, [3, 5], algorithm, dist)

    # check empty
    x_empty = th.tensor([])
    kg = dgl.nn.KNNGraph(3)
    with pytest.raises(DGLError):
        g = kg(x_empty, algorithm, dist)
    kg = dgl.nn.SegmentedKNNGraph(3)
    with pytest.raises(DGLError):
        g = kg(x_empty, [3, 5], algorithm, dist)
Exemple #13
0
    def step(self, cell_p, hid_p, mean_p, std_p):

        embed = T.reshape(T.dot(self.attribute[:, 0], self.params['W_ctx_3']),
                          [self.batch_size, 10])
        hidP = T.dot(hid_p, self.params['W_ctx_2'])  # (25, 10)
        embedd = T.repeat(self.params['W_ctx_1'], self.batch_size, 0) * T.tanh(
            embed + hidP +
            T.repeat(self.params['b_ctx'], self.batch_size, 0))  # (25, 10)
        alpha_base = T.reshape(T.exp(embedd),
                               [self.batch_size, 10, 1])  # (25, 10, 1)
        alpha_base = alpha_base / alpha_base.sum()
        att = T.reshape(self.attribute[:, 0],
                        [self.batch_size, 10, self.att_frame])
        ctx = (alpha_base * att /
               T.reshape(alpha_base.sum(axis=1), [self.batch_size, 1, 1])).sum(
                   axis=1)  # (25, 300)
        ctx = T.reshape(ctx, [self.batch_size, self.att_frame])
        # ctx += T.dot(hid_p, self.params['W_att']) + T.repeat(self.params['b_att'], self.batch_size, 0)

        input_to = T.dot(ctx, self.params['W_in']) + T.repeat(
            self.params['b'], self.batch_size, 0)  # (25, 2048)
        # input_to_i = T.dot(ctx, self.params['W_in_i']) + T.repeat(self.params['b_i'], self.batch_size, 0)
        # input_to_f = T.dot(ctx, self.params['W_in_f']) + T.repeat(self.params['b_f'], self.batch_size, 0)
        # input_to_o = T.dot(ctx, self.params['W_in_o']) + T.repeat(self.params['b_o'], self.batch_size, 0)
        # input_to_c = T.dot(ctx, self.params['W_in_c']) + T.repeat(self.params['b_c'], self.batch_size, 0)
        gate = input_to + T.dot(hid_p, self.params['W_hid'])
        # gate_i = input_to_i + T.dot(hid_p, self.params['W_hid_i'])
        # gate_f = input_to_f + T.dot(hid_p, self.params['W_hid_f'])
        # gate_o = input_to_o + T.dot(hid_p, self.params['W_hid_o'])
        # gate_c = input_to_c + T.dot(hid_p, self.params['W_hid_c'])

        # Apply nonlinearities
        ingate = T.nnet.sigmoid(
            self._slice(gate, 0, self.hidden_dim) +
            cell_p * T.repeat(self.params['W_cell'][0], self.batch_size, 0))
        forgetgate = T.nnet.sigmoid(
            self._slice(gate, 1, self.hidden_dim) +
            cell_p * T.repeat(self.params['W_cell'][1], self.batch_size, 0))
        cell_input = T.tanh(self._slice(gate, 2, self.hidden_dim))

        # Compute new cell value
        cell = forgetgate * cell_p + ingate * cell_input

        # BatchNormalization
        input_shape = (self.batch_size, self.hidden_dim)  # (1, 512)
        cell = K.reshape(cell, input_shape)
        reduction_axes = list(range(len(input_shape)))  # [0, 1]
        del reduction_axes[self.axis_bn]  # [0]
        broadcast_shape = [1] * len(input_shape)  # [1, 1]
        broadcast_shape[self.axis_bn] = input_shape[self.axis_bn]  # [1, 512]
        # m = K.mean(cell, axis=reduction_axes) # m.shape = (1, 512), note that if matrix is 1-d then mean function will return one number even if axis=0
        m = K.mean(cell, axis=0)
        brodcast_m = K.reshape(m, [1, self.hidden_dim])  # m.shape = (1, 512)
        # brodcast_m = m
        std = K.mean(K.square(cell - brodcast_m) + self.epsilon,
                     axis=reduction_axes)  # batchnormed m(m**2)
        std = K.sqrt(std)  # batchnormed m, (1, 512)
        brodcast_std = K.reshape(std, broadcast_shape)  # (1, 512)
        mean_update = self.momentum * mean_p + (1 -
                                                self.momentum) * m  # (1, 512)
        std_update = self.momentum * std_p + (1 -
                                              self.momentum) * std  # (1, 512)
        cell_normed = (cell - brodcast_m) / (brodcast_std + self.epsilon
                                             )  # (1, 512)
        cell_bn = K.reshape(
            self.params['gamma'], broadcast_shape) * cell_normed + K.reshape(
                self.params['beta'], broadcast_shape)  # (1, 512)

        # cell_bn, mean, std = batchnorm(cell, self.batch_size, self.hidden_dim, self.params['gamma'], self.params['beta'], mean_p, std_p, train=True)

        outgate = T.nnet.sigmoid(
            self._slice(gate, 3, self.hidden_dim) +
            cell_bn * T.repeat(self.params['W_cell'][2], self.batch_size, 0))

        # Compute new hidden unit activation
        hid = outgate * T.tanh(cell_bn)
        return T.reshape(
            cell_bn, [self.batch_size, self.hidden_dim]), T.reshape(
                hid,
                [self.batch_size, self.hidden_dim]), mean_update, std_update