Example #1
0
 def forward(self, x):
     u = self.wxh @ x + self.whh @ self.h + self.bh
     i = F.sigmoid(F.slice(u, 0, 0, self.out_size))
     f = F.sigmoid(F.slice(u, 0, self.out_size, 2 * self.out_size))
     o = F.sigmoid(F.slice(u, 0, 2 * self.out_size, 3 * self.out_size))
     j = F.tanh(F.slice(u, 0, 3 * self.out_size, 4 * self.out_size))
     self.c = i * j + f * self.c
     self.h = o * F.tanh(self.c)
     return self.h
Example #2
0
 def forward(self, x):
     u = self.wxh_ @ x + self.whh_ @ self.h_ + self.bh_
     i = F.sigmoid(F.slice(u, 0, 0, self.out_size_))
     f = F.sigmoid(F.slice(u, 0, self.out_size_, 2 * self.out_size_))
     o = F.sigmoid(F.slice(u, 0, 2 * self.out_size_, 3 * self.out_size_))
     j = F.tanh(F.slice(u, 0, 3 * self.out_size_, 4 * self.out_size_))
     self.c_ = i * j + f * self.c_
     self.h_ = o * F.tanh(self.c_)
     return self.h_
Example #3
0
 def forward(self, x):
     """One step forwarding."""
     out_size = self.pwhh.shape()[1]
     u = self.wxh @ x + self.whh @ self.h + self.bh
     i = F.sigmoid(F.slice(u, 0, 0, out_size))
     f = F.sigmoid(F.slice(u, 0, out_size, 2 * out_size))
     o = F.sigmoid(F.slice(u, 0, 2 * out_size, 3 * out_size))
     j = F.tanh(F.slice(u, 0, 3 * out_size, 4 * out_size))
     self.c = i * j + f * self.c
     self.h = o * F.tanh(self.c)
     return self.h
Example #4
0
def main():

    with DefaultScopeDevice(CPUDevice()):
        pw1 = Parameter("w1", [8, 2], I.XavierUniform())
        pb1 = Parameter("b1", [8], I.Constant(0))
        pw2 = Parameter("w2", [1, 8], I.XavierUniform())
        pb2 = Parameter("b2", [], I.Constant(0))

        trainer = T.SGD(0.1)

        trainer.add_parameter(pw1)
        trainer.add_parameter(pb1)
        trainer.add_parameter(pw2)
        trainer.add_parameter(pb2)

        input_data = np.array(
            [
                [1, 1],  # Sample 1
                [1, -1],  # Sample 2
                [-1, 1],  # Sample 3
                [-1, -1],  # Sample 4
            ],
            dtype=np.float32)

        output_data = np.array(
            [
                1,  # Label 1
                -1,  # Label 2
                -1,  # Label 3
                1,  # Label 4
            ],
            dtype=np.float32)

        for i in range(100):
            g = Graph()
            with DefaultScopeGraph(g):
                # Builds a computation graph.
                #x = F.input(shape=Shape([2], 4), data=input_data)
                x = F.input(data=input_data)
                w1 = F.input(param=pw1)
                b1 = F.input(param=pb1)
                w2 = F.input(param=pw2)
                b2 = F.input(param=pb2)
                h = F.tanh(F.matmul(w1, x) + b1)
                y = F.matmul(w2, h) + b2

                # Calculates values.
                y_val = g.forward(y).to_list()
                print("epoch ", i, ":")
                for j in range(4):
                    print("  [", j, "]: ", y_val[j])
                    #t = F.input(shape=Shape([], 4), data=output_data)
                    t = F.input(data=output_data)
                diff = t - y
                loss = F.batch.mean(diff * diff)
                loss_val = g.forward(loss).to_list()[0]
                print("  loss: ", loss_val)
                trainer.reset_gradients()
                g.backward(loss)
                trainer.update()
Example #5
0
 def decode_step(self, trg_words, train):
     e = F.pick(self.trg_lookup_, trg_words, 1)
     e = F.dropout(e, self.dropout_rate_, train)
     h = self.trg_lstm_.forward(F.concat([e, self.feed_], 0))
     h = F.dropout(h, self.dropout_rate_, train)
     atten_probs = F.softmax(self.t_concat_fb_ @ h, 0)
     c = self.concat_fb_ @ atten_probs
     self.feed_ = F.tanh(self.whj_ @ F.concat([h, c], 0) + self.bj_)
     return self.wjy_ @ self.feed_ + self.by_
Example #6
0
 def decode_step(self, trg_words, train):
     """One step decoding."""
     e = F.pick(self.trg_lookup, trg_words, 1)
     e = F.dropout(e, self.dropout_rate, train)
     h = self.trg_lstm.forward(F.concat([e, self.feed], 0))
     h = F.dropout(h, self.dropout_rate, train)
     atten_probs = F.softmax(self.t_concat_fb @ h, 0)
     c = self.concat_fb @ atten_probs
     self.feed = F.tanh(self.whj @ F.concat([h, c], 0) + self.bj)
     return self.wjy @ self.feed + self.by
Example #7
0
def train_func(trainer):
    dev = D.Naive(12345)
    Device.set_default(dev)
    g = Graph()
    Graph.set_default(g)

    pw1 = Parameter([8, 2], I.XavierUniform())
    pb1 = Parameter([8], I.Constant(0))
    pw2 = Parameter([1, 8], I.XavierUniform())
    pb2 = Parameter([1], I.Constant(0))

    trainer.add_parameter(pw1)
    trainer.add_parameter(pb1)
    trainer.add_parameter(pw2)
    trainer.add_parameter(pb2)

    input_data = [1, 1, 1, -1, -1, 1, -1, -1]
    output_data = [1, -1, -1, 1]

    for i in range(10):
        g.clear()
        x = F.input(input_data, Shape([2], 4))
        w1 = F.parameter(pw1)
        b1 = F.parameter(pb1)
        w2 = F.parameter(pw2)
        b2 = F.parameter(pb2)
        h = F.tanh(w1 @ x + b1)
        y = w2 @ h + b2

        t = F.input(output_data, Shape([], 4))
        diff = t - y
        loss = F.batch.mean(diff * diff)

        trainer.reset_gradients()
        loss.backward()
        trainer.update()

    return [
        pw1.value.to_list(),
        pb1.value.to_list(),
        pw2.value.to_list(),
        pb2.value.to_list()
    ]
Example #8
0
    def forward(self, xs):
        x = F.concat(xs, 1)
        u = self.w_ @ x
        j = F.slice(u, 0, 0, self.out_size_)
        f = F.sigmoid(
            F.slice(u, 0, self.out_size_, 2 * self.out_size_) +
            F.broadcast(self.bf_, 1, len(xs)))
        r = F.sigmoid(
            F.slice(u, 0, 2 * self.out_size_, 3 * self.out_size_) +
            F.broadcast(self.bf_, 1, len(xs)))
        c = F.zeros([self.out_size_])
        hs = []
        for i in range(len(xs)):
            ji = F.slice(j, 1, i, i + 1)
            fi = F.slice(f, 1, i, i + 1)
            ri = F.slice(r, 1, i, i + 1)
            c = fi * c + (1 - fi) * ji
            hs.append(ri * F.tanh(c) + (1 - ri) * xs[i])

        return hs
Example #9
0
def main():
    dev = D.Naive()  # or D.CUDA(gpuid)
    Device.set_default(dev)

    # Parameters
    pw1 = Parameter([8, 2], I.XavierUniform())
    pb1 = Parameter([8], I.Constant(0))
    pw2 = Parameter([1, 8], I.XavierUniform())
    pb2 = Parameter([], I.Constant(0))

    # Optimizer
    optimizer = O.SGD(0.1)

    # Registers parameters.
    optimizer.add_parameter(pw1)
    optimizer.add_parameter(pb1)
    optimizer.add_parameter(pw2)
    optimizer.add_parameter(pb2)

    # Training data
    input_data = [
        np.array([1, 1], dtype=np.float32),  # Sample 1
        np.array([1, -1], dtype=np.float32),  # Sample 2
        np.array([-1, 1], dtype=np.float32),  # Sample 3
        np.array([-1, -1], dtype=np.float32),  # Sample 4
    ]
    output_data = [
        np.array([1], dtype=np.float32),  # Label 1
        np.array([-1], dtype=np.float32),  # Label 2
        np.array([-1], dtype=np.float32),  # Label 3
        np.array([1], dtype=np.float32),  # Label 4
    ]

    g = Graph()
    Graph.set_default(g)

    for i in range(10):
        g.clear()

        # Builds a computation graph.
        x = F.input(input_data)
        w1 = F.parameter(pw1)
        b1 = F.parameter(pb1)
        w2 = F.parameter(pw2)
        b2 = F.parameter(pb2)
        h = F.tanh(w1 @ x + b1)
        y = w2 @ h + b2

        # Obtains values.
        y_val = y.to_list()
        print("epoch ", i, ":")
        for j in range(4):
            print("  [", j, "]: ", y_val[j])

        # Extends the computation graph to calculate loss values.
        t = F.input(output_data)
        diff = t - y
        loss = F.batch.mean(diff * diff)

        # Obtains the loss.
        loss_val = loss.to_float()
        print("  loss: ", loss_val)

        # Updates parameters.
        optimizer.reset_gradients()
        loss.backward()
        optimizer.update()