Example #1
0
 def reset(self, init_c = Node(), init_h = Node()):
     """Initializes internal states."""
     out_size = self._pwhh.shape()[1]
     self._wxh = F.parameter(self._pwxh)
     self._whh = F.parameter(self._pwhh)
     self._bh = F.parameter(self._pbh)
     self._c = init_c if init_c.valid() else F.zeros([out_size])
     self._h = init_h if init_h.valid() else F.zeros([out_size])
Example #2
0
    def make_graph(inputs, train):
        x = F.input(inputs)

        w1 = F.parameter(pw1)
        b1 = F.parameter(pb1)
        h = F.relu(w1 @ x + b1)

        h = F.dropout(h, .5, train)

        w2 = F.parameter(pw2)
        b2 = F.parameter(pb2)
        return w2 @ h + b2
Example #3
0
 def make_graph(inputs):
     # We first store input values explicitly on GPU 0.
     x = F.input(inputs, device=dev0)
     w1 = F.parameter(pw1)
     b1 = F.parameter(pb1)
     w2 = F.parameter(pw2)
     b2 = F.parameter(pb2)
     # The hidden layer is calculated and implicitly stored on GPU 0.
     h_on_gpu0 = F.relu(w1 @ x + b1)
     # `copy()` transfers the hiddne layer to GPU 1.
     h_on_gpu1 = F.copy(h_on_gpu0, dev1)
     # The output layer is calculated and implicitly stored on GPU 1.
     return w2 @ h_on_gpu1 + b2
Example #4
0
 def forward(self, inputs):
     batch_size = len(inputs[0])
     wlookup = F.parameter(self.pwlookup)
     wxs = F.parameter(self.pwxs)
     wsy = F.parameter(self.pwsy)
     s = F.zeros(Shape([NUM_HIDDEN_UNITS], batch_size))
     outputs = []
     for i in range(len(inputs) - 1):
         w = F.pick(wlookup, inputs[i], 1)
         x = w + s
         s = F.sigmoid(wxs @ x)
         outputs.append(wsy @ s)
     return outputs
Example #5
0
    def encode(self, src_batch, train):
        """Encodes source sentences and prepares internal states."""
        # Reversed encoding.
        src_lookup = F.parameter(self.psrc_lookup)
        self.src_lstm.restart()
        for it in src_batch:
            x = F.pick(src_lookup, it, 1)
            x = F.dropout(x, self.dropout_rate, train)
            self.src_lstm.forward(x)

        # Initializes decoder states.
        self.trg_lookup = F.parameter(self.ptrg_lookup)
        self.why = F.parameter(self.pwhy)
        self.by = F.parameter(self.pby)
        self.trg_lstm.restart(self.src_lstm.get_c(), self.src_lstm.get_h())
def train_func(optimizer):
    dev = D.Naive(12345)
    Device.set_default(dev)
    g = Graph()
    Graph.set_default(g)

    pw1 = Parameter([8, 2], I.XavierUniform())
    pb1 = Parameter([8], I.Constant(0))
    pw2 = Parameter([1, 8], I.XavierUniform())
    pb2 = Parameter([1], I.Constant(0))

    optimizer.add(pw1, pb1, pw2, pb2)

    input_data = [1, 1, 1, -1, -1, 1, -1, -1]
    output_data = [1, -1, -1, 1]

    for i in range(10):
        g.clear()
        x = F.raw_input(Shape([2], 4), input_data)
        w1 = F.parameter(pw1)
        b1 = F.parameter(pb1)
        w2 = F.parameter(pw2)
        b2 = F.parameter(pb2)
        h = F.tanh(w1 @ x + b1)
        y = w2 @ h + b2

        t = F.raw_input(Shape([], 4), output_data)
        diff = t - y
        loss = F.batch.mean(diff * diff)

        optimizer.reset_gradients()
        loss.backward()
        optimizer.update()

    return [
        pw1.value.to_list(),
        pb1.value.to_list(),
        pw2.value.to_list(),
        pb2.value.to_list()
    ]
Example #7
0
    def encode(self, src_batch, train):
        # Embedding lookup.
        src_lookup = F.parameter(self.psrc_lookup_)
        e_list = []
        for x in src_batch:
            e = F.pick(src_lookup, x, 1)
            e = F.dropout(e, self.dropout_rate_, train)
            e_list.append(e)

        # Forward encoding
        self.src_fw_lstm_.reset()
        f_list = []
        for e in e_list:
            f = self.src_fw_lstm_.forward(e)
            f = F.dropout(f, self.dropout_rate_, train)
            f_list.append(f)

        # Backward encoding
        self.src_bw_lstm_.reset()
        b_list = []
        for e in reversed(e_list):
            b = self.src_bw_lstm_.forward(e)
            b = F.dropout(b, self.dropout_rate_, train)
            b_list.append(b)
        b_list.reverse()

        # Concatenates RNN states.
        fb_list = [F.concat([f_list[i], b_list[i]], 0) for i in range(len(src_batch))]
        self.concat_fb = F.concat(fb_list, 1)
        self.t_concat_fb = F.transpose(self.concat_fb)

        # Initializes decode states.
        self.wfbw_ = F.parameter(self.pwfbw_)
        self.whw_ = F.parameter(self.pwhw_)
        self.wwe_ = F.parameter(self.pwwe_)
        self.trg_lookup_ = F.parameter(self.ptrg_lookup_)
        self.whj_ = F.parameter(self.pwhj_)
        self.bj_ = F.parameter(self.pbj_)
        self.wjy_ = F.parameter(self.pwjy_)
        self.by_ = F.parameter(self.pby_)
        self.trg_lstm_.reset()
Example #8
0
    def forward(self, inputs, train):
        batch_size = len(inputs[0])
        lookup = F.parameter(self.plookup)
        self.rnn1.restart()
        self.rnn2.restart()
        self.hy.reset()

        outputs = []
        for i in range(len(inputs) - 1):
            x = F.pick(lookup, inputs[i], 1)
            x = F.dropout(x, DROPOUT_RATE, train)
            h1 = self.rnn1.forward(x)
            h1 = F.dropout(h1, DROPOUT_RATE, train)
            h2 = self.rnn2.forward(h1)
            h2 = F.dropout(h2, DROPOUT_RATE, train)
            outputs.append(self.hy.forward(h2))

        return outputs
Example #9
0
    def forward(self, inputs, train):
        batch_size = len(inputs[0])
        lookup = F.parameter(self.plookup)
        self.rnn1.restart()
        self.rnn2.restart()
        self.hy.reset()

        xs = [
            F.dropout(F.pick(lookup, inputs[i], 1), DROPOUT_RATE, train)
            for i in range(len(inputs) - 1)
        ]
        hs1 = self.rnn1.forward(xs)
        for i in range(len(inputs) - 1):
            hs1[i] = F.dropout(hs1[i], DROPOUT_RATE, train)
        hs2 = self.rnn2.forward(hs1)
        outputs = [
            self.hy.forward(F.dropout(hs2[i], DROPOUT_RATE, train))
            for i in range(len(inputs) - 1)
        ]

        return outputs
    def encode(self, src_batch, train):
        """Encodes source sentences and prepares internal states."""
        # Embedding lookup.
        src_lookup = F.parameter(self.psrc_lookup)
        e_list = []
        for x in src_batch:
            e = F.pick(src_lookup, x, 1)
            e = F.dropout(e, self.dropout_rate, train)
            e_list.append(e)

        # Forward encoding
        self.src_fw_lstm.restart()
        f_list = []
        for e in e_list:
            f = self.src_fw_lstm.forward(e)
            f = F.dropout(f, self.dropout_rate, train)
            f_list.append(f)

        # Backward encoding
        self.src_bw_lstm.restart()
        b_list = []
        for e in reversed(e_list):
            b = self.src_bw_lstm.forward(e)
            b = F.dropout(b, self.dropout_rate, train)
            b_list.append(b)

        b_list.reverse()

        # Concatenates RNN states.
        fb_list = [f_list[i] + b_list[i] for i in range(len(src_batch))]
        self.concat_fb = F.concat(fb_list, 1)
        self.t_concat_fb = F.transpose(self.concat_fb)

        # Initializes decode states.
        embed_size = self.psrc_lookup.shape()[0]
        self.trg_lookup = F.parameter(self.ptrg_lookup)
        self.whj = F.parameter(self.pwhj)
        self.bj = F.parameter(self.pbj)
        self.wjy = F.parameter(self.pwjy)
        self.by = F.parameter(self.pby)
        self.feed = F.zeros([embed_size])
        self.trg_lstm.restart(
            self.src_fw_lstm.get_c() + self.src_bw_lstm.get_c(),
            self.src_fw_lstm.get_h() + self.src_bw_lstm.get_h())
Example #11
0
 def make_graph(inputs, train):
     # Input and parameters.
     #x = F.input(Shape([IMAGE_HEIGHT, IMAGE_WIDTH], BATCH_SIZE), inputs)
     x = F.input(inputs)
     w_cnn1 = F.parameter(pw_cnn1)
     w_cnn2 = F.parameter(pw_cnn2)
     w_fc1 = F.parameter(pw_fc1)
     w_fc2 = F.parameter(pw_fc2)
     b_fc1 = F.parameter(pb_fc1)
     b_fc2 = F.parameter(pb_fc2)
     # CNNs
     h_cnn1 = F.relu(F.conv2d(x, w_cnn1, PADDING1, PADDING1, 1, 1, 1, 1))
     h_pool1 = F.max_pool2d(h_cnn1, 2, 2, 0, 0, 2, 2)
     h_cnn2 = F.relu(
         F.conv2d(h_pool1, w_cnn2, PADDING2, PADDING2, 1, 1, 1, 1))
     h_pool2 = F.max_pool2d(h_cnn2, 2, 2, 0, 0, 2, 2)
     # FC layers
     x_fc = F.dropout(F.flatten(h_pool2), .5, train)
     h_fc = F.dropout(F.relu(F.matmul(w_fc1, x_fc) + b_fc1), .5, train)
     return F.matmul(w_fc2, h_fc) + b_fc2
Example #12
0
 def restart(self):
     self.w = F.parameter(self.pw)
     self.bf = F.parameter(self.pbf)
     self.br = F.parameter(self.pbr)
Example #13
0
 def reset(self):
     self.w = F.parameter(self.pw)
     self.b = F.parameter(self.pb)
Example #14
0
    def primitiv_xor_test(self):
        dev = D.Naive()
        Device.set_default(dev)
        g = Graph()
        Graph.set_default(g)

        input_data = [
            np.array([[1], [1]]),
            np.array([[-1], [1]]),
            np.array([[-1], [-1]]),
            np.array([[1], [-1]]),
        ]

        label_data = [
            np.array([1]),
            np.array([-1]),
            np.array([1]),
            np.array([-1]),
        ]

        N = 8
        pw = Parameter([1, N], I.XavierUniform())
        pb = Parameter([], I.Constant(0))
        pu = Parameter([N, 2], I.XavierUniform())
        pc = Parameter([N], I.Constant(0))
        if os.path.isfile('output/xor/pw.data') and os.path.isfile(
                'output/xor/pb.data') and os.path.isfile(
                    'output/xor/pu.data') and os.path.isfile(
                        'output/xor/pc.data'):
            pw.load('output/xor/pw.data')
            pb.load('output/xor/pb.data')
            pu.load('output/xor/pu.data')
            pc.load('output/xor/pc.data')

        optimizer = O.SGD(0.01)
        optimizer.add(pw, pb, pu, pc)

        for epoch in range(1000):
            print(epoch, end=' ')

            g.clear()

            x = F.input(input_data)
            w = F.parameter(pw)
            b = F.parameter(pb)
            u = F.parameter(pu)
            c = F.parameter(pc)
            h = F.tanh(u @ x + c)
            y = F.tanh(w @ h + b)

            for val in y.to_list():
                print('{:+.6f},'.format(val), end=' ')

            loss = self.calc_loss(y, label_data)
            print('loss={:.6f}'.format(loss.to_float()))

            optimizer.reset_gradients()
            loss.backward()
            optimizer.update()

        pw.save('output/xor/pw.data')
        pb.save('output/xor/pb.data')
        pu.save('output/xor/pu.data')
        pc.save('output/xor/pc.data')

        return y.to_list()
Example #15
0
 def restart(self):
     self.wxh = F.parameter(self.pwxh)
     self.whh = F.parameter(self.pwhh)
     self.bh = F.parameter(self.pbh)
     self.h = self.c = F.zeros([self.out_size])
Example #16
0
def main():
    dev = D.Naive()  # or D.CUDA(gpuid)
    Device.set_default(dev)

    # Parameters
    pw1 = Parameter([8, 2], I.XavierUniform())
    pb1 = Parameter([8], I.Constant(0))
    pw2 = Parameter([1, 8], I.XavierUniform())
    pb2 = Parameter([], I.Constant(0))

    # Optimizer
    optimizer = O.SGD(0.1)

    # Registers parameters.
    optimizer.add(pw1, pb1, pw2, pb2)

    # Training data
    input_data = [
        np.array([1, 1], dtype=np.float32),  # Sample 1
        np.array([1, -1], dtype=np.float32),  # Sample 2
        np.array([-1, 1], dtype=np.float32),  # Sample 3
        np.array([-1, -1], dtype=np.float32),  # Sample 4
    ]
    output_data = [
        np.array([1], dtype=np.float32),  # Label 1
        np.array([-1], dtype=np.float32),  # Label 2
        np.array([-1], dtype=np.float32),  # Label 3
        np.array([1], dtype=np.float32),  # Label 4
    ]

    g = Graph()
    Graph.set_default(g)

    for i in range(10):
        g.clear()

        # Builds a computation graph.
        x = F.input(input_data)
        w1 = F.parameter(pw1)
        b1 = F.parameter(pb1)
        w2 = F.parameter(pw2)
        b2 = F.parameter(pb2)
        h = F.tanh(w1 @ x + b1)
        y = w2 @ h + b2

        # Obtains values.
        y_val = y.to_list()
        print("epoch ", i, ":")
        for j in range(4):
            print("  [", j, "]: ", y_val[j])

        # Extends the computation graph to calculate loss values.
        t = F.input(output_data)
        diff = t - y
        loss = F.batch.mean(diff * diff)

        # Obtains the loss.
        loss_val = loss.to_float()
        print("  loss: ", loss_val)

        # Updates parameters.
        optimizer.reset_gradients()
        loss.backward()
        optimizer.update()