Beispiel #1
0
    def set_output(self, X, train=False):

        input_shape = (self.batch_size, self.num_lstm)
        reduction_axes = list(range(len(input_shape)))
        del reduction_axes[self.axis]
        broadcast_shape = [1] * len(input_shape)
        broadcast_shape[self.axis] = input_shape[self.axis]
        if train:
            m = K.mean(X, axis=reduction_axes)
            brodcast_m = K.reshape(m, broadcast_shape)
            std = K.mean(K.square(X - brodcast_m) + self.epsilon,
                         axis=reduction_axes)
            std = K.sqrt(std)
            brodcast_std = K.reshape(std, broadcast_shape)
            mean_update = self.momentum * self.running_mean + (
                1 - self.momentum) * m
            std_update = self.momentum * self.running_std + (
                1 - self.momentum) * std
            self.updates = [(self.running_mean, mean_update),
                            (self.running_std, std_update)]
            X_normed = (X - brodcast_m) / (brodcast_std + self.epsilon)
        else:
            brodcast_m = K.reshape(self.running_mean, broadcast_shape)
            brodcast_std = K.reshape(self.running_std, broadcast_shape)
            X_normed = ((X - brodcast_m) / (brodcast_std + self.epsilon))
        out = K.reshape(self.gamma, broadcast_shape) * X_normed + K.reshape(
            self.beta, broadcast_shape)

        return out
Beispiel #2
0
    def set_output(self, X, train=False):

        input_shape = (self.batch_size, self.num_lstm)
        reduction_axes = list(range(len(input_shape)))
        del reduction_axes[self.axis]
        broadcast_shape = [1] * len(input_shape)
        broadcast_shape[self.axis] = input_shape[self.axis]
        if train:
            m = K.mean(X, axis=reduction_axes)
            brodcast_m = K.reshape(m, broadcast_shape)
            std = K.mean(K.square(X - brodcast_m) + self.epsilon, axis=reduction_axes)
            std = K.sqrt(std)
            brodcast_std = K.reshape(std, broadcast_shape)
            mean_update = self.momentum * self.running_mean + (1-self.momentum) * m
            std_update = self.momentum * self.running_std + (1-self.momentum) * std
            self.updates = [(self.running_mean, mean_update), (self.running_std, std_update)]
            X_normed = (X - brodcast_m) / (brodcast_std + self.epsilon)
        else:
            brodcast_m = K.reshape(self.running_mean, broadcast_shape)
            brodcast_std = K.reshape(self.running_std, broadcast_shape)
            X_normed = ((X - brodcast_m) /
                            (brodcast_std + self.epsilon))
        out = K.reshape(self.gamma, broadcast_shape) * X_normed + K.reshape(self.beta, broadcast_shape)

        return out
Beispiel #3
0
def test_simple_readout():
    g1 = dgl.DGLGraph()
    g1.add_nodes(3)
    g2 = dgl.DGLGraph()
    g2.add_nodes(4)  # no edges
    g1.add_edges([0, 1, 2], [2, 0, 1])

    n1 = F.randn((3, 5))
    n2 = F.randn((4, 5))
    e1 = F.randn((3, 5))
    s1 = F.sum(n1, 0)  # node sums
    s2 = F.sum(n2, 0)
    se1 = F.sum(e1, 0)  # edge sums
    m1 = F.mean(n1, 0)  # node means
    m2 = F.mean(n2, 0)
    me1 = F.mean(e1, 0)  # edge means
    w1 = F.randn((3, ))
    w2 = F.randn((4, ))
    max1 = F.max(n1, 0)
    max2 = F.max(n2, 0)
    maxe1 = F.max(e1, 0)
    ws1 = F.sum(n1 * F.unsqueeze(w1, 1), 0)
    ws2 = F.sum(n2 * F.unsqueeze(w2, 1), 0)
    wm1 = F.sum(n1 * F.unsqueeze(w1, 1), 0) / F.sum(F.unsqueeze(w1, 1), 0)
    wm2 = F.sum(n2 * F.unsqueeze(w2, 1), 0) / F.sum(F.unsqueeze(w2, 1), 0)
    g1.ndata['x'] = n1
    g2.ndata['x'] = n2
    g1.ndata['w'] = w1
    g2.ndata['w'] = w2
    g1.edata['x'] = e1

    assert F.allclose(dgl.sum_nodes(g1, 'x'), s1)
    assert F.allclose(dgl.sum_nodes(g1, 'x', 'w'), ws1)
    assert F.allclose(dgl.sum_edges(g1, 'x'), se1)
    assert F.allclose(dgl.mean_nodes(g1, 'x'), m1)
    assert F.allclose(dgl.mean_nodes(g1, 'x', 'w'), wm1)
    assert F.allclose(dgl.mean_edges(g1, 'x'), me1)
    assert F.allclose(dgl.max_nodes(g1, 'x'), max1)
    assert F.allclose(dgl.max_edges(g1, 'x'), maxe1)

    g = dgl.batch([g1, g2])
    s = dgl.sum_nodes(g, 'x')
    m = dgl.mean_nodes(g, 'x')
    max_bg = dgl.max_nodes(g, 'x')
    assert F.allclose(s, F.stack([s1, s2], 0))
    assert F.allclose(m, F.stack([m1, m2], 0))
    assert F.allclose(max_bg, F.stack([max1, max2], 0))
    ws = dgl.sum_nodes(g, 'x', 'w')
    wm = dgl.mean_nodes(g, 'x', 'w')
    assert F.allclose(ws, F.stack([ws1, ws2], 0))
    assert F.allclose(wm, F.stack([wm1, wm2], 0))
    s = dgl.sum_edges(g, 'x')
    m = dgl.mean_edges(g, 'x')
    max_bg_e = dgl.max_edges(g, 'x')
    assert F.allclose(s, F.stack([se1, F.zeros(5)], 0))
    assert F.allclose(m, F.stack([me1, F.zeros(5)], 0))
    assert F.allclose(max_bg_e, F.stack([maxe1, F.zeros(5)], 0))
Beispiel #4
0
def test_simple_pool():
    ctx = F.ctx()
    g = dgl.DGLGraph(nx.path_graph(15))

    sum_pool = nn.SumPooling()
    avg_pool = nn.AvgPooling()
    max_pool = nn.MaxPooling()
    sort_pool = nn.SortPooling(10) # k = 10
    print(sum_pool, avg_pool, max_pool, sort_pool)

    # test#1: basic
    h0 = F.randn((g.number_of_nodes(), 5))
    if F.gpu_ctx():
        sum_pool = sum_pool.to(ctx)
        avg_pool = avg_pool.to(ctx)
        max_pool = max_pool.to(ctx)
        sort_pool = sort_pool.to(ctx)
        h0 = h0.to(ctx)
    h1 = sum_pool(g, h0)
    assert F.allclose(h1, F.sum(h0, 0))
    h1 = avg_pool(g, h0)
    assert F.allclose(h1, F.mean(h0, 0))
    h1 = max_pool(g, h0)
    assert F.allclose(h1, F.max(h0, 0))
    h1 = sort_pool(g, h0)
    assert h1.shape[0] == 10 * 5 and h1.dim() == 1

    # test#2: batched graph
    g_ = dgl.DGLGraph(nx.path_graph(5))
    bg = dgl.batch([g, g_, g, g_, g])
    h0 = F.randn((bg.number_of_nodes(), 5))
    if F.gpu_ctx():
        h0 = h0.to(ctx)

    h1 = sum_pool(bg, h0)
    truth = th.stack([F.sum(h0[:15], 0),
                      F.sum(h0[15:20], 0),
                      F.sum(h0[20:35], 0),
                      F.sum(h0[35:40], 0),
                      F.sum(h0[40:55], 0)], 0)
    assert F.allclose(h1, truth)

    h1 = avg_pool(bg, h0)
    truth = th.stack([F.mean(h0[:15], 0),
                      F.mean(h0[15:20], 0),
                      F.mean(h0[20:35], 0),
                      F.mean(h0[35:40], 0),
                      F.mean(h0[40:55], 0)], 0)
    assert F.allclose(h1, truth)

    h1 = max_pool(bg, h0)
    truth = th.stack([F.max(h0[:15], 0),
                      F.max(h0[15:20], 0),
                      F.max(h0[20:35], 0),
                      F.max(h0[35:40], 0),
                      F.max(h0[40:55], 0)], 0)
    assert F.allclose(h1, truth)

    h1 = sort_pool(bg, h0)
    assert h1.shape[0] == 5 and h1.shape[1] == 10 * 5 and h1.dim() == 2
Beispiel #5
0
def batchnorm(X,
              batch_size,
              hidden_dim,
              gamma,
              beta,
              running_mean,
              running_std,
              epsilon=1e-10,
              axis=1,
              momentum=0.99,
              train=False):

    X = K.reshape(X, (batch_size, hidden_dim))
    input_shape = (batch_size, hidden_dim)  # (1, 512)
    reduction_axes = list(range(len(input_shape)))  # [0, 1]
    del reduction_axes[axis]  # [0]
    broadcast_shape = [1] * len(input_shape)  # [1, 1]
    broadcast_shape[axis] = input_shape[axis]  # [1, 512]
    if train:
        m = K.mean(
            X, axis=reduction_axes
        )  # m.shape = (1, 512), note that if matrix is 1-d then mean function will return one number even if axis=0
        brodcast_m = K.reshape(m, broadcast_shape)  # m.shape = (1, 512)
        std = K.mean(K.square(X - brodcast_m) + epsilon,
                     axis=reduction_axes)  # batchnormed m(m**2)
        std = K.sqrt(std)  # batchnormed m, (1, 512)
        brodcast_std = K.reshape(std, broadcast_shape)  # (1, 512)
        mean_update = momentum * running_mean + (1 - momentum) * m  # (1, 512)
        std_update = momentum * running_std + (1 - momentum) * std  # (1, 512)
        X_normed = (X - brodcast_m) / (brodcast_std + epsilon)  # (1, 512)
    else:
        brodcast_m = K.reshape(running_mean, broadcast_shape)
        brodcast_std = K.reshape(running_std, broadcast_shape)
        X_normed = ((X - brodcast_m) / (brodcast_std + epsilon))
    out = K.reshape(gamma, broadcast_shape) * X_normed + K.reshape(
        beta, broadcast_shape)  # (1, 512)

    return out, mean_update, std_update
Beispiel #6
0
def test_simple_pool():
    g = dgl.DGLGraph(nx.path_graph(15))

    sum_pool = nn.SumPooling()
    avg_pool = nn.AvgPooling()
    max_pool = nn.MaxPooling()
    sort_pool = nn.SortPooling(10)  # k = 10
    print(sum_pool, avg_pool, max_pool, sort_pool)

    # test#1: basic
    h0 = F.randn((g.number_of_nodes(), 5))
    h1 = sum_pool(g, h0)
    check_close(F.squeeze(h1, 0), F.sum(h0, 0))
    h1 = avg_pool(g, h0)
    check_close(F.squeeze(h1, 0), F.mean(h0, 0))
    h1 = max_pool(g, h0)
    check_close(F.squeeze(h1, 0), F.max(h0, 0))
    h1 = sort_pool(g, h0)
    assert h1.shape[0] == 1 and h1.shape[1] == 10 * 5 and h1.ndim == 2

    # test#2: batched graph
    g_ = dgl.DGLGraph(nx.path_graph(5))
    bg = dgl.batch([g, g_, g, g_, g])
    h0 = F.randn((bg.number_of_nodes(), 5))
    h1 = sum_pool(bg, h0)
    truth = mx.nd.stack(F.sum(h0[:15], 0),
                        F.sum(h0[15:20], 0),
                        F.sum(h0[20:35], 0),
                        F.sum(h0[35:40], 0),
                        F.sum(h0[40:55], 0),
                        axis=0)
    check_close(h1, truth)

    h1 = avg_pool(bg, h0)
    truth = mx.nd.stack(F.mean(h0[:15], 0),
                        F.mean(h0[15:20], 0),
                        F.mean(h0[20:35], 0),
                        F.mean(h0[35:40], 0),
                        F.mean(h0[40:55], 0),
                        axis=0)
    check_close(h1, truth)

    h1 = max_pool(bg, h0)
    truth = mx.nd.stack(F.max(h0[:15], 0),
                        F.max(h0[15:20], 0),
                        F.max(h0[20:35], 0),
                        F.max(h0[35:40], 0),
                        F.max(h0[40:55], 0),
                        axis=0)
    check_close(h1, truth)

    h1 = sort_pool(bg, h0)
    assert h1.shape[0] == 5 and h1.shape[1] == 10 * 5 and h1.ndim == 2
Beispiel #7
0
def is_divergence(y_pred, y_gt):
    y_pred = K.clip(y_pred, _EPSILON, np.inf)
    y_gt = K.clip(y_gt, _EPSILON, np.inf)
    is_mat = y_gt / y_pred - K.log(y_gt / y_pred) - 1
    return K.mean(K.sum(is_mat, axis=-1))
Beispiel #8
0
def kl_divergence(y_pred, y_gt):
    y_pred = K.clip(y_pred, _EPSILON, np.inf)
    y_gt = K.clip(y_gt, _EPSILON, np.inf)
    kl_mat = y_gt * K.log(y_gt / y_pred) - y_gt + y_pred
    return K.mean(K.sum(kl_mat, axis=-1))
Beispiel #9
0
def norm_lp(y_pred, y_gt, norm):
    return K.mean(K.sum(K.power(K.abs(y_pred - y_gt), norm), axis=-1))
Beispiel #10
0
def mse(y_pred, y_gt):
    return K.mean(K.sqr(y_pred - y_gt))
Beispiel #11
0
def binary_crossentropy(p_y_pred, y_gt):
    p_y_pred = K.clip(p_y_pred, _EPSILON, 1. - _EPSILON)    
    return K.mean(K.mean(K.binary_crossentropy(p_y_pred, y_gt), axis=-1))
Beispiel #12
0
def udf_mean(nodes):
    return {'r2': F.mean(nodes.mailbox['m'], 1)}
Beispiel #13
0
 def __call__(self, loss):
     output = self.layer.get_output(True)
     loss += self.l1 * K.sum(K.mean(K.abs(output), axis=0))
     loss += self.l2 * K.sum(K.mean(K.square(output), axis=0))
     return loss
Beispiel #14
0
    def step(self, cell_p, hid_p, mean_p, std_p):

        embed = T.reshape(T.dot(self.attribute[:, 0], self.params['W_ctx_3']),
                          [self.batch_size, 10])
        hidP = T.dot(hid_p, self.params['W_ctx_2'])  # (25, 10)
        embedd = T.repeat(self.params['W_ctx_1'], self.batch_size, 0) * T.tanh(
            embed + hidP +
            T.repeat(self.params['b_ctx'], self.batch_size, 0))  # (25, 10)
        alpha_base = T.reshape(T.exp(embedd),
                               [self.batch_size, 10, 1])  # (25, 10, 1)
        alpha_base = alpha_base / alpha_base.sum()
        att = T.reshape(self.attribute[:, 0],
                        [self.batch_size, 10, self.att_frame])
        ctx = (alpha_base * att /
               T.reshape(alpha_base.sum(axis=1), [self.batch_size, 1, 1])).sum(
                   axis=1)  # (25, 300)
        ctx = T.reshape(ctx, [self.batch_size, self.att_frame])
        # ctx += T.dot(hid_p, self.params['W_att']) + T.repeat(self.params['b_att'], self.batch_size, 0)

        input_to = T.dot(ctx, self.params['W_in']) + T.repeat(
            self.params['b'], self.batch_size, 0)  # (25, 2048)
        # input_to_i = T.dot(ctx, self.params['W_in_i']) + T.repeat(self.params['b_i'], self.batch_size, 0)
        # input_to_f = T.dot(ctx, self.params['W_in_f']) + T.repeat(self.params['b_f'], self.batch_size, 0)
        # input_to_o = T.dot(ctx, self.params['W_in_o']) + T.repeat(self.params['b_o'], self.batch_size, 0)
        # input_to_c = T.dot(ctx, self.params['W_in_c']) + T.repeat(self.params['b_c'], self.batch_size, 0)
        gate = input_to + T.dot(hid_p, self.params['W_hid'])
        # gate_i = input_to_i + T.dot(hid_p, self.params['W_hid_i'])
        # gate_f = input_to_f + T.dot(hid_p, self.params['W_hid_f'])
        # gate_o = input_to_o + T.dot(hid_p, self.params['W_hid_o'])
        # gate_c = input_to_c + T.dot(hid_p, self.params['W_hid_c'])

        # Apply nonlinearities
        ingate = T.nnet.sigmoid(
            self._slice(gate, 0, self.hidden_dim) +
            cell_p * T.repeat(self.params['W_cell'][0], self.batch_size, 0))
        forgetgate = T.nnet.sigmoid(
            self._slice(gate, 1, self.hidden_dim) +
            cell_p * T.repeat(self.params['W_cell'][1], self.batch_size, 0))
        cell_input = T.tanh(self._slice(gate, 2, self.hidden_dim))

        # Compute new cell value
        cell = forgetgate * cell_p + ingate * cell_input

        # BatchNormalization
        input_shape = (self.batch_size, self.hidden_dim)  # (1, 512)
        cell = K.reshape(cell, input_shape)
        reduction_axes = list(range(len(input_shape)))  # [0, 1]
        del reduction_axes[self.axis_bn]  # [0]
        broadcast_shape = [1] * len(input_shape)  # [1, 1]
        broadcast_shape[self.axis_bn] = input_shape[self.axis_bn]  # [1, 512]
        # m = K.mean(cell, axis=reduction_axes) # m.shape = (1, 512), note that if matrix is 1-d then mean function will return one number even if axis=0
        m = K.mean(cell, axis=0)
        brodcast_m = K.reshape(m, [1, self.hidden_dim])  # m.shape = (1, 512)
        # brodcast_m = m
        std = K.mean(K.square(cell - brodcast_m) + self.epsilon,
                     axis=reduction_axes)  # batchnormed m(m**2)
        std = K.sqrt(std)  # batchnormed m, (1, 512)
        brodcast_std = K.reshape(std, broadcast_shape)  # (1, 512)
        mean_update = self.momentum * mean_p + (1 -
                                                self.momentum) * m  # (1, 512)
        std_update = self.momentum * std_p + (1 -
                                              self.momentum) * std  # (1, 512)
        cell_normed = (cell - brodcast_m) / (brodcast_std + self.epsilon
                                             )  # (1, 512)
        cell_bn = K.reshape(
            self.params['gamma'], broadcast_shape) * cell_normed + K.reshape(
                self.params['beta'], broadcast_shape)  # (1, 512)

        # cell_bn, mean, std = batchnorm(cell, self.batch_size, self.hidden_dim, self.params['gamma'], self.params['beta'], mean_p, std_p, train=True)

        outgate = T.nnet.sigmoid(
            self._slice(gate, 3, self.hidden_dim) +
            cell_bn * T.repeat(self.params['W_cell'][2], self.batch_size, 0))

        # Compute new hidden unit activation
        hid = outgate * T.tanh(cell_bn)
        return T.reshape(
            cell_bn, [self.batch_size, self.hidden_dim]), T.reshape(
                hid,
                [self.batch_size, self.hidden_dim]), mean_update, std_update
Beispiel #15
0
def mse( y_pred, y_gt ):
    return K.mean( K.sum( K.sqr( y_pred - y_gt ), axis=-1 ) )
Beispiel #16
0
def beta_divergence(y_pred, y_gt, beta):
    y_pred = K.clip(y_pred, _EPSILON, np.inf)
    y_gt = K.clip(y_gt, _EPSILON, np.inf)
    beta_mat = 1. / (beta*(beta-1)) * (K.power(y_gt, beta) + (beta-1) * K.power(y_pred, beta) - beta * y_gt * K.power(y_pred, (beta-1)))
    return K.mean(K.sum(beta_mat, axis=-1))
Beispiel #17
0
def categorical_error(p_y_pred, y_gt):
    y_pred_sparse = K.argmax(p_y_pred, axis=-1)
    y_gt_sparse = K.argmax(y_gt, axis=-1)
    return K.mean(K.neq(y_pred_sparse, y_gt_sparse))
Beispiel #18
0
def categorical_crossentropy(p_y_pred, y_gt):
    p_y_pred = K.clip(p_y_pred, _EPSILON, 1. - _EPSILON)
    return K.mean(K.categorical_crossentropy(p_y_pred, y_gt))
Beispiel #19
0
def sparse_categorical_crossentropy(p_y_pred, y_gt):
    p_y_pred = K.clip(p_y_pred, _EPSILON, 1. - _EPSILON)
    y_gt = K.to_one_hot(y_gt, )
    return K.mean(K.categorical_crossentropy(p_y_pred, y_gt))
Beispiel #20
0
 def __call__(self, loss):
     output = self.layer.get_output(True)
     loss += self.l1 * K.sum(K.mean(K.abs(output), axis=0))
     loss += self.l2 * K.sum(K.mean(K.square(output), axis=0))
     return loss