def reset(self, init_c = Node(), init_h = Node()): """Initializes internal states.""" out_size = self._pwhh.shape()[1] self._wxh = F.parameter(self._pwxh) self._whh = F.parameter(self._pwhh) self._bh = F.parameter(self._pbh) self._c = init_c if init_c.valid() else F.zeros([out_size]) self._h = init_h if init_h.valid() else F.zeros([out_size])
def make_graph(inputs, train): x = F.input(inputs) w1 = F.parameter(pw1) b1 = F.parameter(pb1) h = F.relu(w1 @ x + b1) h = F.dropout(h, .5, train) w2 = F.parameter(pw2) b2 = F.parameter(pb2) return w2 @ h + b2
def make_graph(inputs): # We first store input values explicitly on GPU 0. x = F.input(inputs, device=dev0) w1 = F.parameter(pw1) b1 = F.parameter(pb1) w2 = F.parameter(pw2) b2 = F.parameter(pb2) # The hidden layer is calculated and implicitly stored on GPU 0. h_on_gpu0 = F.relu(w1 @ x + b1) # `copy()` transfers the hiddne layer to GPU 1. h_on_gpu1 = F.copy(h_on_gpu0, dev1) # The output layer is calculated and implicitly stored on GPU 1. return w2 @ h_on_gpu1 + b2
def forward(self, inputs): batch_size = len(inputs[0]) wlookup = F.parameter(self.pwlookup) wxs = F.parameter(self.pwxs) wsy = F.parameter(self.pwsy) s = F.zeros(Shape([NUM_HIDDEN_UNITS], batch_size)) outputs = [] for i in range(len(inputs) - 1): w = F.pick(wlookup, inputs[i], 1) x = w + s s = F.sigmoid(wxs @ x) outputs.append(wsy @ s) return outputs
def encode(self, src_batch, train): """Encodes source sentences and prepares internal states.""" # Reversed encoding. src_lookup = F.parameter(self.psrc_lookup) self.src_lstm.restart() for it in src_batch: x = F.pick(src_lookup, it, 1) x = F.dropout(x, self.dropout_rate, train) self.src_lstm.forward(x) # Initializes decoder states. self.trg_lookup = F.parameter(self.ptrg_lookup) self.why = F.parameter(self.pwhy) self.by = F.parameter(self.pby) self.trg_lstm.restart(self.src_lstm.get_c(), self.src_lstm.get_h())
def train_func(optimizer): dev = D.Naive(12345) Device.set_default(dev) g = Graph() Graph.set_default(g) pw1 = Parameter([8, 2], I.XavierUniform()) pb1 = Parameter([8], I.Constant(0)) pw2 = Parameter([1, 8], I.XavierUniform()) pb2 = Parameter([1], I.Constant(0)) optimizer.add(pw1, pb1, pw2, pb2) input_data = [1, 1, 1, -1, -1, 1, -1, -1] output_data = [1, -1, -1, 1] for i in range(10): g.clear() x = F.raw_input(Shape([2], 4), input_data) w1 = F.parameter(pw1) b1 = F.parameter(pb1) w2 = F.parameter(pw2) b2 = F.parameter(pb2) h = F.tanh(w1 @ x + b1) y = w2 @ h + b2 t = F.raw_input(Shape([], 4), output_data) diff = t - y loss = F.batch.mean(diff * diff) optimizer.reset_gradients() loss.backward() optimizer.update() return [ pw1.value.to_list(), pb1.value.to_list(), pw2.value.to_list(), pb2.value.to_list() ]
def encode(self, src_batch, train): # Embedding lookup. src_lookup = F.parameter(self.psrc_lookup_) e_list = [] for x in src_batch: e = F.pick(src_lookup, x, 1) e = F.dropout(e, self.dropout_rate_, train) e_list.append(e) # Forward encoding self.src_fw_lstm_.reset() f_list = [] for e in e_list: f = self.src_fw_lstm_.forward(e) f = F.dropout(f, self.dropout_rate_, train) f_list.append(f) # Backward encoding self.src_bw_lstm_.reset() b_list = [] for e in reversed(e_list): b = self.src_bw_lstm_.forward(e) b = F.dropout(b, self.dropout_rate_, train) b_list.append(b) b_list.reverse() # Concatenates RNN states. fb_list = [F.concat([f_list[i], b_list[i]], 0) for i in range(len(src_batch))] self.concat_fb = F.concat(fb_list, 1) self.t_concat_fb = F.transpose(self.concat_fb) # Initializes decode states. self.wfbw_ = F.parameter(self.pwfbw_) self.whw_ = F.parameter(self.pwhw_) self.wwe_ = F.parameter(self.pwwe_) self.trg_lookup_ = F.parameter(self.ptrg_lookup_) self.whj_ = F.parameter(self.pwhj_) self.bj_ = F.parameter(self.pbj_) self.wjy_ = F.parameter(self.pwjy_) self.by_ = F.parameter(self.pby_) self.trg_lstm_.reset()
def forward(self, inputs, train): batch_size = len(inputs[0]) lookup = F.parameter(self.plookup) self.rnn1.restart() self.rnn2.restart() self.hy.reset() outputs = [] for i in range(len(inputs) - 1): x = F.pick(lookup, inputs[i], 1) x = F.dropout(x, DROPOUT_RATE, train) h1 = self.rnn1.forward(x) h1 = F.dropout(h1, DROPOUT_RATE, train) h2 = self.rnn2.forward(h1) h2 = F.dropout(h2, DROPOUT_RATE, train) outputs.append(self.hy.forward(h2)) return outputs
def forward(self, inputs, train): batch_size = len(inputs[0]) lookup = F.parameter(self.plookup) self.rnn1.restart() self.rnn2.restart() self.hy.reset() xs = [ F.dropout(F.pick(lookup, inputs[i], 1), DROPOUT_RATE, train) for i in range(len(inputs) - 1) ] hs1 = self.rnn1.forward(xs) for i in range(len(inputs) - 1): hs1[i] = F.dropout(hs1[i], DROPOUT_RATE, train) hs2 = self.rnn2.forward(hs1) outputs = [ self.hy.forward(F.dropout(hs2[i], DROPOUT_RATE, train)) for i in range(len(inputs) - 1) ] return outputs
def encode(self, src_batch, train): """Encodes source sentences and prepares internal states.""" # Embedding lookup. src_lookup = F.parameter(self.psrc_lookup) e_list = [] for x in src_batch: e = F.pick(src_lookup, x, 1) e = F.dropout(e, self.dropout_rate, train) e_list.append(e) # Forward encoding self.src_fw_lstm.restart() f_list = [] for e in e_list: f = self.src_fw_lstm.forward(e) f = F.dropout(f, self.dropout_rate, train) f_list.append(f) # Backward encoding self.src_bw_lstm.restart() b_list = [] for e in reversed(e_list): b = self.src_bw_lstm.forward(e) b = F.dropout(b, self.dropout_rate, train) b_list.append(b) b_list.reverse() # Concatenates RNN states. fb_list = [f_list[i] + b_list[i] for i in range(len(src_batch))] self.concat_fb = F.concat(fb_list, 1) self.t_concat_fb = F.transpose(self.concat_fb) # Initializes decode states. embed_size = self.psrc_lookup.shape()[0] self.trg_lookup = F.parameter(self.ptrg_lookup) self.whj = F.parameter(self.pwhj) self.bj = F.parameter(self.pbj) self.wjy = F.parameter(self.pwjy) self.by = F.parameter(self.pby) self.feed = F.zeros([embed_size]) self.trg_lstm.restart( self.src_fw_lstm.get_c() + self.src_bw_lstm.get_c(), self.src_fw_lstm.get_h() + self.src_bw_lstm.get_h())
def make_graph(inputs, train): # Input and parameters. #x = F.input(Shape([IMAGE_HEIGHT, IMAGE_WIDTH], BATCH_SIZE), inputs) x = F.input(inputs) w_cnn1 = F.parameter(pw_cnn1) w_cnn2 = F.parameter(pw_cnn2) w_fc1 = F.parameter(pw_fc1) w_fc2 = F.parameter(pw_fc2) b_fc1 = F.parameter(pb_fc1) b_fc2 = F.parameter(pb_fc2) # CNNs h_cnn1 = F.relu(F.conv2d(x, w_cnn1, PADDING1, PADDING1, 1, 1, 1, 1)) h_pool1 = F.max_pool2d(h_cnn1, 2, 2, 0, 0, 2, 2) h_cnn2 = F.relu( F.conv2d(h_pool1, w_cnn2, PADDING2, PADDING2, 1, 1, 1, 1)) h_pool2 = F.max_pool2d(h_cnn2, 2, 2, 0, 0, 2, 2) # FC layers x_fc = F.dropout(F.flatten(h_pool2), .5, train) h_fc = F.dropout(F.relu(F.matmul(w_fc1, x_fc) + b_fc1), .5, train) return F.matmul(w_fc2, h_fc) + b_fc2
def restart(self): self.w = F.parameter(self.pw) self.bf = F.parameter(self.pbf) self.br = F.parameter(self.pbr)
def reset(self): self.w = F.parameter(self.pw) self.b = F.parameter(self.pb)
def primitiv_xor_test(self): dev = D.Naive() Device.set_default(dev) g = Graph() Graph.set_default(g) input_data = [ np.array([[1], [1]]), np.array([[-1], [1]]), np.array([[-1], [-1]]), np.array([[1], [-1]]), ] label_data = [ np.array([1]), np.array([-1]), np.array([1]), np.array([-1]), ] N = 8 pw = Parameter([1, N], I.XavierUniform()) pb = Parameter([], I.Constant(0)) pu = Parameter([N, 2], I.XavierUniform()) pc = Parameter([N], I.Constant(0)) if os.path.isfile('output/xor/pw.data') and os.path.isfile( 'output/xor/pb.data') and os.path.isfile( 'output/xor/pu.data') and os.path.isfile( 'output/xor/pc.data'): pw.load('output/xor/pw.data') pb.load('output/xor/pb.data') pu.load('output/xor/pu.data') pc.load('output/xor/pc.data') optimizer = O.SGD(0.01) optimizer.add(pw, pb, pu, pc) for epoch in range(1000): print(epoch, end=' ') g.clear() x = F.input(input_data) w = F.parameter(pw) b = F.parameter(pb) u = F.parameter(pu) c = F.parameter(pc) h = F.tanh(u @ x + c) y = F.tanh(w @ h + b) for val in y.to_list(): print('{:+.6f},'.format(val), end=' ') loss = self.calc_loss(y, label_data) print('loss={:.6f}'.format(loss.to_float())) optimizer.reset_gradients() loss.backward() optimizer.update() pw.save('output/xor/pw.data') pb.save('output/xor/pb.data') pu.save('output/xor/pu.data') pc.save('output/xor/pc.data') return y.to_list()
def restart(self): self.wxh = F.parameter(self.pwxh) self.whh = F.parameter(self.pwhh) self.bh = F.parameter(self.pbh) self.h = self.c = F.zeros([self.out_size])
def main(): dev = D.Naive() # or D.CUDA(gpuid) Device.set_default(dev) # Parameters pw1 = Parameter([8, 2], I.XavierUniform()) pb1 = Parameter([8], I.Constant(0)) pw2 = Parameter([1, 8], I.XavierUniform()) pb2 = Parameter([], I.Constant(0)) # Optimizer optimizer = O.SGD(0.1) # Registers parameters. optimizer.add(pw1, pb1, pw2, pb2) # Training data input_data = [ np.array([1, 1], dtype=np.float32), # Sample 1 np.array([1, -1], dtype=np.float32), # Sample 2 np.array([-1, 1], dtype=np.float32), # Sample 3 np.array([-1, -1], dtype=np.float32), # Sample 4 ] output_data = [ np.array([1], dtype=np.float32), # Label 1 np.array([-1], dtype=np.float32), # Label 2 np.array([-1], dtype=np.float32), # Label 3 np.array([1], dtype=np.float32), # Label 4 ] g = Graph() Graph.set_default(g) for i in range(10): g.clear() # Builds a computation graph. x = F.input(input_data) w1 = F.parameter(pw1) b1 = F.parameter(pb1) w2 = F.parameter(pw2) b2 = F.parameter(pb2) h = F.tanh(w1 @ x + b1) y = w2 @ h + b2 # Obtains values. y_val = y.to_list() print("epoch ", i, ":") for j in range(4): print(" [", j, "]: ", y_val[j]) # Extends the computation graph to calculate loss values. t = F.input(output_data) diff = t - y loss = F.batch.mean(diff * diff) # Obtains the loss. loss_val = loss.to_float() print(" loss: ", loss_val) # Updates parameters. optimizer.reset_gradients() loss.backward() optimizer.update()