def test_pytrainer_not_implemented(self): dev = D.Naive() Device.set_default(dev) trainer = IncompleteTrainer() p = Parameter(Shape([])) with self.assertRaises(NotImplementedError): trainer.add_parameter(p) with self.assertRaises(NotImplementedError): trainer.update() with self.assertRaises(NotImplementedError): Trainer.get_configs(trainer) with self.assertRaises(NotImplementedError): Trainer.set_configs(trainer, {'Trainer.epoch': 1}, { 'Trainer.clip_threshold': 0.0, 'Trainer.lr_scale': 1.0, 'Trainer.l2_strength': 0.0 })
def test_pyoptimizer_not_implemented(self): dev = D.Naive() Device.set_default(dev) optimizer = IncompleteOptimizer() p = Parameter() with self.assertRaises(NotImplementedError): optimizer.add(p) with self.assertRaises(NotImplementedError): optimizer.update() with self.assertRaises(NotImplementedError): Optimizer.get_configs(optimizer) with self.assertRaises(NotImplementedError): Optimizer.set_configs(optimizer, {'Optimizer.epoch': 1}, { 'Optimizer.clip_threshold': 0.0, 'Optimizer.lr_scale': 1.0, 'Optimizer.l2_strength': 0.0 })
class LSTM(Model): """LSTM cell.""" def __init__(self): self._pwxh = Parameter(); self._pwhh = Parameter(); self._pbh = Parameter(); self.scan_attributes() def init(self, in_size, out_size): """Creates a new LSTM.""" self._pwxh.init([4 * out_size, in_size], I.XavierUniform()) self._pwhh.init([4 * out_size, out_size], I.XavierUniform()) self._pbh.init([4 * out_size], I.Constant(0)) def reset(self, init_c = Node(), init_h = Node()): """Initializes internal states.""" out_size = self._pwhh.shape()[1] self._wxh = F.parameter(self._pwxh) self._whh = F.parameter(self._pwhh) self._bh = F.parameter(self._pbh) self._c = init_c if init_c.valid() else F.zeros([out_size]) self._h = init_h if init_h.valid() else F.zeros([out_size]) def forward(self, x): """One step forwarding.""" out_size = self._pwhh.shape()[1] u = self._wxh @ x + self._whh @ self._h + self._bh i = F.sigmoid(F.slice(u, 0, 0, out_size)) f = F.sigmoid(F.slice(u, 0, out_size, 2 * out_size)); o = F.sigmoid(F.slice(u, 0, 2 * out_size, 3 * out_size)); j = F.tanh(F.slice(u, 0, 3 * out_size, 4 * out_size)); self._c = i * j + f * self._c; self._h = o * F.tanh(self._c); return self._h; def get_c(self): """Retrieves current internal cell state.""" return self._c def get_h(self): """Retrieves current hidden value.""" return self._h
def test_pyoptimizer_propagate_exception(self): dev = D.Naive() Device.set_default(dev) optimizer = ExceptionOptimizer() p = Parameter() with self.assertRaises(TestException) as ctx: optimizer.add(p) self.assertEqual(str(ctx.exception), "configure_parameter") with self.assertRaises(TestException) as ctx: optimizer.update() self.assertEqual(str(ctx.exception), "update_parameter") with self.assertRaises(TestException) as ctx: Optimizer.get_configs(optimizer) self.assertEqual(str(ctx.exception), "get_configs") with self.assertRaises(TestException) as ctx: Optimizer.set_configs(optimizer, {'Optimizer.epoch': 1}, { 'Optimizer.clip_threshold': 0.0, 'Optimizer.lr_scale': 1.0, 'Optimizer.l2_strength': 0.0 }) self.assertEqual(str(ctx.exception), "set_configs")
def test_model_invalid_operation(self): model1 = Model() model2 = Model() model1.add("m", model2) param = Parameter() model1.add("p", param) with self.assertRaises(TypeError) as e: model1["notfound"] self.assertEqual( str(e.exception), "'name' is not a name of neither parameter nor submodel") with self.assertRaises(TypeError): del model1["p"] with self.assertRaises(TypeError): del model1["m"] with self.assertRaises(TypeError): del model1[0] with self.assertRaises(TypeError): model1[(0, 1)] with self.assertRaises(TypeError): model1[[0, 1]]
def test_ModelTest_CheckGetSubmodelRecursiveByTuple(self): m = Model() sm1 = Model() sm2 = Model() ssm = Model() p = Parameter() m.add("p", p) m.add("sm1", sm1) m.add("sm2", sm2) sm1.add("ssm", ssm) self.assertIs(sm1, m["sm1"]); self.assertIs(sm2, m["sm2"]); self.assertIs(ssm, m["sm1", "ssm"]); self.assertIs(ssm, sm1["ssm"]); m["p"] with self.assertRaises(TypeError): m["ssm"] with self.assertRaises(TypeError): m["sm2", "ssm"] with self.assertRaises(TypeError): m["x"]
def load(name, prefix): encdec = EncoderDecoder.__new__(EncoderDecoder) encdec.name_ = name encdec.psrc_lookup_ = Parameter.load(prefix + name + "_src_lookup.param") encdec.ptrg_lookup_ = Parameter.load(prefix + name + "_trg_lookup.param") encdec.pwhj_ = Parameter.load(prefix + name + "_whj.param") encdec.pbj_ = Parameter.load(prefix + name + "_bj.param") encdec.pwjy_ = Parameter.load(prefix + name + "_wjy.param") encdec.pby_ = Parameter.load(prefix + name + "_by.param") encdec.src_fw_lstm_ = LSTM.load(name + "_src_fw_lstm", prefix) encdec.src_bw_lstm_ = LSTM.load(name + "_src_bw_lstm", prefix) encdec.trg_lstm_ = LSTM.load(name + "_trg_lstm", prefix) encdec.embed_size_ = encdec.pbj_.shape()[0] with open(prefix + name + ".config", "r", encoding="utf-8") as f: encdec.dropout_rate_ = float(f.readline()) return encdec
def main(): dev = D.Naive() # or D.CUDA(gpuid) Device.set_default(dev) # Parameters pw1 = Parameter([8, 2], I.XavierUniform()) pb1 = Parameter([8], I.Constant(0)) pw2 = Parameter([1, 8], I.XavierUniform()) pb2 = Parameter([], I.Constant(0)) # Optimizer optimizer = O.SGD(0.1) # Registers parameters. optimizer.add_parameter(pw1) optimizer.add_parameter(pb1) optimizer.add_parameter(pw2) optimizer.add_parameter(pb2) # Training data input_data = [ np.array([1, 1], dtype=np.float32), # Sample 1 np.array([1, -1], dtype=np.float32), # Sample 2 np.array([-1, 1], dtype=np.float32), # Sample 3 np.array([-1, -1], dtype=np.float32), # Sample 4 ] output_data = [ np.array([1], dtype=np.float32), # Label 1 np.array([-1], dtype=np.float32), # Label 2 np.array([-1], dtype=np.float32), # Label 3 np.array([1], dtype=np.float32), # Label 4 ] g = Graph() Graph.set_default(g) for i in range(10): g.clear() # Builds a computation graph. x = F.input(input_data) w1 = F.parameter(pw1) b1 = F.parameter(pb1) w2 = F.parameter(pw2) b2 = F.parameter(pb2) h = F.tanh(w1 @ x + b1) y = w2 @ h + b2 # Obtains values. y_val = y.to_list() print("epoch ", i, ":") for j in range(4): print(" [", j, "]: ", y_val[j]) # Extends the computation graph to calculate loss values. t = F.input(output_data) diff = t - y loss = F.batch.mean(diff * diff) # Obtains the loss. loss_val = loss.to_float() print(" loss: ", loss_val) # Updates parameters. optimizer.reset_gradients() loss.backward() optimizer.update()
class EncoderDecoder(object): def __init__(self, name, src_vocab_size, trg_vocab_size, embed_size, hidden_size, dropout_rate): self.name_ = name self.dropout_rate_ = dropout_rate self.psrc_lookup_ = Parameter([embed_size, src_vocab_size], I.XavierUniform()) self.ptrg_lookup_ = Parameter([embed_size, trg_vocab_size], I.XavierUniform()) self.pwhy_ = Parameter([trg_vocab_size, hidden_size], I.XavierUniform()) self.pby_ = Parameter([trg_vocab_size], I.Constant(0)) self.src_lstm_ = LSTM(name + "_src_lstm", embed_size, hidden_size) self.trg_lstm_ = LSTM(name + "_trg_lstm", embed_size, hidden_size) # Loads all parameters. @staticmethod def load(name, prefix): encdec = EncoderDecoder.__new__(EncoderDecoder) encdec.name_ = name encdec.psrc_lookup_ = Parameter.load(prefix + name + "_src_lookup.param") encdec.ptrg_lookup_ = Parameter.load(prefix + name + "_trg_lookup.param") encdec.pwhy_ = Parameter.load(prefix + name + "_why.param") encdec.pby_ = Parameter.load(prefix + name + "_by.param") encdec.src_lstm_ = LSTM.load(name + "_src_lstm", prefix) encdec.trg_lstm_ = LSTM.load(name + "_trg_lstm", prefix) with open(prefix + name + ".config", "r") as ifs: encdec.dropout_rate_ = float(ifs.readline()) return encdec # Saves all parameters. def save(self, prefix): self.psrc_lookup_.save(prefix + self.name_ + "_src_lookup.param") self.ptrg_lookup_.save(prefix + self.name_ + "_trg_lookup.param") self.pwhy_.save(prefix + self.name_ + "_why.param") self.pby_.save(prefix + self.name_ + "_by.param") self.src_lstm_.save(prefix) self.trg_lstm_.save(prefix) with open(prefix + self.name_ + ".config", "w") as ofs: print(self.dropout_rate_, file=ofs) # Adds parameters to the trainer. def register_training(self, trainer): trainer.add_parameter(self.psrc_lookup_) trainer.add_parameter(self.ptrg_lookup_) trainer.add_parameter(self.pwhy_) trainer.add_parameter(self.pby_) self.src_lstm_.register_training(trainer) self.trg_lstm_.register_training(trainer) # Encodes source sentences and prepare internal states. def encode(self, src_batch, train): # Reversed encoding. src_lookup = F.parameter(self.psrc_lookup_) self.src_lstm_.init() for it in src_batch: x = F.pick(src_lookup, it, 1) x = F.dropout(x, self.dropout_rate_, train) self.src_lstm_.forward(x) # Initializes decoder states. self.trg_lookup_ = F.parameter(self.ptrg_lookup_) self.why_ = F.parameter(self.pwhy_) self.by_ = F.parameter(self.pby_) self.trg_lstm_.init(self.src_lstm_.get_c(), self.src_lstm_.get_h()) # One step decoding. def decode_step(self, trg_words, train): x = F.pick(self.trg_lookup_, trg_words, 1) x = F.dropout(x, self.dropout_rate_, train) h = self.trg_lstm_.forward(x) h = F.dropout(h, self.dropout_rate_, train) return self.why_ @ h + self.by_ # Calculates the loss function over given target sentences. def loss(self, trg_batch, train): losses = [] for i in range(len(trg_batch) - 1): y = self.decode_step(trg_batch[i], train) losses.append(F.softmax_cross_entropy(y, trg_batch[i + 1], 0)) return F.batch.mean(F.sum(losses))
def __init__(self): self.param = Parameter([5], I.Constant(0)) self.param.gradient = tF.raw_input([5], [1, 2, 3, 4, 5]) self.scan_attributes()
def __init__(self): self.pwxh = Parameter() self.pwhh = Parameter() self.pbh = Parameter() self.add_all_parameters()
def main(): # Loads data train_inputs = load_images("data/train-images-idx3-ubyte", NUM_TRAIN_SAMPLES) train_labels = load_labels("data/train-labels-idx1-ubyte", NUM_TRAIN_SAMPLES) test_inputs = load_images("data/t10k-images-idx3-ubyte", NUM_TEST_SAMPLES) test_labels = load_labels("data/t10k-labels-idx1-ubyte", NUM_TEST_SAMPLES) # Uses GPU. #dev = CUDADevice(0) with DefaultScopeDevice(CPUDevice()): # Parameters for the multilayer perceptron. pw1 = Parameter("w1", [NUM_HIDDEN_UNITS, NUM_INPUT_UNITS], XavierUniform()) pb1 = Parameter("b1", [NUM_HIDDEN_UNITS], Constant(0)) pw2 = Parameter("w2", [NUM_OUTPUT_UNITS, NUM_HIDDEN_UNITS], XavierUniform()) pb2 = Parameter("b2", [NUM_OUTPUT_UNITS], Constant(0)) # Parameters for batch normalization. #Parameter pbeta("beta", {NUM_HIDDEN_UNITS}, Constant(0)); #Parameter pgamma("gamma", {NUM_HIDDEN_UNITS}, Constant(1)); # Trainer trainer = SGD(.5) trainer.add_parameter(pw1) trainer.add_parameter(pb1) trainer.add_parameter(pw2) trainer.add_parameter(pb2) #trainer.add_parameter(&pbeta); #trainer.add_parameter(&pgamma); # Helper lambda to construct the predictor network. def make_graph(inputs, train): # Stores input values. x = F.input(data=inputs) # Calculates the hidden layer. w1 = F.input(param=pw1) b1 = F.input(param=pb1) h = F.relu(F.matmul(w1, x) + b1) # Batch normalization #Node beta = F::input(pbeta); #Node gamma = F::input(pgamma); #h = F::batch::normalize(h) * gamma + beta; # Dropout h = F.dropout(h, .5, train) # Calculates the output layer. w2 = F.input(param=pw2) b2 = F.input(param=pb2) return F.matmul(w2, h) + b2 ids = list(range(NUM_TRAIN_SAMPLES)) for epoch in range(MAX_EPOCH): # Shuffles sample IDs. random.shuffle(ids) # Training loop for batch in range(NUM_TRAIN_BATCHES): print("\rTraining... %d / %d" % (batch + 1, NUM_TRAIN_BATCHES), end="") inputs = train_inputs[ids[batch * BATCH_SIZE:(batch + 1) * BATCH_SIZE]] labels = train_labels[ids[batch * BATCH_SIZE:(batch + 1) * BATCH_SIZE]] trainer.reset_gradients() # Constructs the graph. g = Graph() with DefaultScopeGraph(g): y = make_graph(inputs, True) loss = F.softmax_cross_entropy(y, labels, 0) avg_loss = F.batch.mean(loss) # Dump computation graph at the first time. #if (epoch == 0 && batch == 0) g.dump(); # Forward, backward, and updates parameters. g.forward(avg_loss) g.backward(avg_loss) trainer.update() print() match = 0 # Test loop for batch in range(NUM_TEST_BATCHES): print("\rTesting... %d / %d" % (batch + 1, NUM_TEST_BATCHES), end="") # Makes a test minibatch. inputs = test_inputs[batch * BATCH_SIZE:(batch + 1) * BATCH_SIZE] # Constructs the graph. with Graph() as g: y = make_graph(inputs, False) # Gets outputs, argmax, and compares them with the label. y_val = g.forward(y).to_list() for i in range(BATCH_SIZE): maxval = -1e10 argmax = -1 for j in range(NUM_OUTPUT_UNITS): v = y_val[j + i * NUM_OUTPUT_UNITS] if (v > maxval): maxval = v argmax = j if argmax == test_labels[i + batch * BATCH_SIZE]: match += 1 accuracy = 100.0 * match / NUM_TEST_SAMPLES print("\nepoch %d: accuracy: %.2f%%\n" % (epoch, accuracy))
def __init__(self, in_size, out_size): self.out_size = out_size self.pw = Parameter([3 * out_size, in_size], I.Uniform(-0.1, 0.1)) self.pbf = Parameter([out_size], I.Constant(0)) self.pbr = Parameter([out_size], I.Constant(0)) self.scan_attributes()
def __init__(self): self._pwxh = Parameter(); self._pwhh = Parameter(); self._pbh = Parameter(); self.scan_attributes()
class EncoderDecoder(object): def __init__(self, name, src_vocab_size, trg_vocab_size, embed_size, hidden_size, dropout_rate): self.name_ = name self.embed_size_ = embed_size self.dropout_rate_ = dropout_rate self.psrc_lookup_ = Parameter([embed_size, src_vocab_size], I.XavierUniform()) self.ptrg_lookup_ = Parameter([embed_size, trg_vocab_size], I.XavierUniform()) self.pwhj_ = Parameter([embed_size, 2 * hidden_size], I.XavierUniform()) self.pbj_ = Parameter([embed_size], I.Constant(0)) self.pwjy_ = Parameter([trg_vocab_size, embed_size], I.XavierUniform()) self.pby_ = Parameter([trg_vocab_size], I.Constant(0)) self.src_fw_lstm_ = LSTM(name + "_src_fw_lstm", embed_size, hidden_size) self.src_bw_lstm_ = LSTM(name + "_src_bw_lstm", embed_size, hidden_size) self.trg_lstm_ = LSTM(name + "_trg_lstm", embed_size * 2, hidden_size) # Loads all parameters. @staticmethod def load(name, prefix): encdec = EncoderDecoder.__new__(EncoderDecoder) encdec.name_ = name encdec.psrc_lookup_ = Parameter.load(prefix + name + "_src_lookup.param") encdec.ptrg_lookup_ = Parameter.load(prefix + name + "_trg_lookup.param") encdec.pwhj_ = Parameter.load(prefix + name + "_whj.param") encdec.pbj_ = Parameter.load(prefix + name + "_bj.param") encdec.pwjy_ = Parameter.load(prefix + name + "_wjy.param") encdec.pby_ = Parameter.load(prefix + name + "_by.param") encdec.src_fw_lstm_ = LSTM.load(name + "_src_fw_lstm", prefix) encdec.src_bw_lstm_ = LSTM.load(name + "_src_bw_lstm", prefix) encdec.trg_lstm_ = LSTM.load(name + "_trg_lstm", prefix) encdec.embed_size_ = encdec.pbj_.shape()[0] with open(prefix + name + ".config", "r", encoding="utf-8") as f: encdec.dropout_rate_ = float(f.readline()) return encdec # Saves all parameters def save(self, prefix): self.psrc_lookup_.save(prefix + self.name_ + "_src_lookup.param") self.ptrg_lookup_.save(prefix + self.name_ + "_trg_lookup.param") self.pwhj_.save(prefix + self.name_ + "_whj.param") self.pbj_.save(prefix + self.name_ + "_bj.param") self.pwjy_.save(prefix + self.name_ + "_wjy.param") self.pby_.save(prefix + self.name_ + "_by.param") self.src_fw_lstm_.save(prefix) self.src_bw_lstm_.save(prefix) self.trg_lstm_.save(prefix) with open(prefix + self.name_ + ".config", "w", encoding="utf-8") as f: print(self.dropout_rate_, file=f) # Adds parameters to the trainer def register_training(self, trainer): trainer.add_parameter(self.psrc_lookup_) trainer.add_parameter(self.ptrg_lookup_) trainer.add_parameter(self.pwhj_) trainer.add_parameter(self.pbj_) trainer.add_parameter(self.pwjy_) trainer.add_parameter(self.pby_) self.src_fw_lstm_.register_training(trainer) self.src_bw_lstm_.register_training(trainer) self.trg_lstm_.register_training(trainer) # Encodes source sentences and prepare internal states. def encode(self, src_batch, train): # Embedding lookup. src_lookup = F.parameter(self.psrc_lookup_) e_list = [] for x in src_batch: e = F.pick(src_lookup, x, 1) e = F.dropout(e, self.dropout_rate_, train) e_list.append(e) # Forward encoding self.src_fw_lstm_.init() f_list = [] for e in e_list: f = self.src_fw_lstm_.forward(e) f = F.dropout(f, self.dropout_rate_, train) f_list.append(f) # Backward encoding self.src_bw_lstm_.init() b_list = [] for e in reversed(e_list): b = self.src_bw_lstm_.forward(e) b = F.dropout(b, self.dropout_rate_, train) b_list.append(b) b_list.reverse() # Concatenates RNN states. fb_list = [f_list[i] + b_list[i] for i in range(len(src_batch))] self.concat_fb_ = F.concat(fb_list, 1) self.t_concat_fb_ = F.transpose(self.concat_fb_) # Initializes decode states. self.trg_lookup_ = F.parameter(self.ptrg_lookup_) self.whj_ = F.parameter(self.pwhj_) self.bj_ = F.parameter(self.pbj_) self.wjy_ = F.parameter(self.pwjy_) self.by_ = F.parameter(self.pby_) self.feed_ = F.zeros([self.embed_size_]) self.trg_lstm_.init( self.src_fw_lstm_.get_c() + self.src_bw_lstm_.get_c(), self.src_fw_lstm_.get_h() + self.src_bw_lstm_.get_h()) # One step decoding. def decode_step(self, trg_words, train): e = F.pick(self.trg_lookup_, trg_words, 1) e = F.dropout(e, self.dropout_rate_, train) h = self.trg_lstm_.forward(F.concat([e, self.feed_], 0)) h = F.dropout(h, self.dropout_rate_, train) atten_probs = F.softmax(self.t_concat_fb_ @ h, 0) c = self.concat_fb_ @ atten_probs self.feed_ = F.tanh(self.whj_ @ F.concat([h, c], 0) + self.bj_) return self.wjy_ @ self.feed_ + self.by_ # Calculates the loss function over given target sentences. def loss(self, trg_batch, train): losses = [] for i in range(len(trg_batch) - 1): y = self.decode_step(trg_batch[i], train) loss = F.softmax_cross_entropy(y, trg_batch[i + 1], 0) losses.append(loss) return F.batch.mean(F.sum(losses))
def __init__(self, dropout_rate): self.dropout_rate_ = dropout_rate self.psrc_lookup_ = Parameter() self.ptrg_lookup_ = Parameter() self.pwfbw_ = Parameter() self.pwhw_ = Parameter() self.pwwe_ = Parameter() self.pwhj_ = Parameter() self.pbj_ = Parameter() self.pwjy_ = Parameter() self.pby_ = Parameter() self.src_fw_lstm_ = LSTM() self.src_bw_lstm_ = LSTM() self.trg_lstm_ = LSTM() self.scan_attributes()
class EncoderDecoder(Model): def __init__(self, dropout_rate): self.dropout_rate_ = dropout_rate self.psrc_lookup_ = Parameter() self.ptrg_lookup_ = Parameter() self.pwfbw_ = Parameter() self.pwhw_ = Parameter() self.pwwe_ = Parameter() self.pwhj_ = Parameter() self.pbj_ = Parameter() self.pwjy_ = Parameter() self.pby_ = Parameter() self.src_fw_lstm_ = LSTM() self.src_bw_lstm_ = LSTM() self.trg_lstm_ = LSTM() self.scan_attributes() def init(self, src_vocab_size, trg_vocab_size, embed_size, hidden_size): self.psrc_lookup_.init([embed_size, src_vocab_size], I.XavierUniform()) self.ptrg_lookup_.init([embed_size, trg_vocab_size], I.XavierUniform()) self.pwfbw_.init([2*hidden_size, hidden_size], I.XavierUniform()) self.pwhw_.init([hidden_size, hidden_size], I.XavierUniform()) self.pwwe_.init([hidden_size], I.XavierUniform()) self.pwhj_.init([embed_size, hidden_size], I.XavierUniform()) self.pbj_.init([embed_size], I.Constant(0)) self.pwjy_.init([trg_vocab_size, embed_size], I.XavierUniform()) self.pby_.init([trg_vocab_size], I.Constant(0)) self.src_fw_lstm_.init(embed_size, hidden_size) self.src_bw_lstm_.init(embed_size, hidden_size) self.trg_lstm_.init(embed_size+hidden_size*2, hidden_size) def encode(self, src_batch, train): # Embedding lookup. src_lookup = F.parameter(self.psrc_lookup_) e_list = [] for x in src_batch: e = F.pick(src_lookup, x, 1) e = F.dropout(e, self.dropout_rate_, train) e_list.append(e) # Forward encoding self.src_fw_lstm_.reset() f_list = [] for e in e_list: f = self.src_fw_lstm_.forward(e) f = F.dropout(f, self.dropout_rate_, train) f_list.append(f) # Backward encoding self.src_bw_lstm_.reset() b_list = [] for e in reversed(e_list): b = self.src_bw_lstm_.forward(e) b = F.dropout(b, self.dropout_rate_, train) b_list.append(b) b_list.reverse() # Concatenates RNN states. fb_list = [F.concat([f_list[i], b_list[i]], 0) for i in range(len(src_batch))] self.concat_fb = F.concat(fb_list, 1) self.t_concat_fb = F.transpose(self.concat_fb) # Initializes decode states. self.wfbw_ = F.parameter(self.pwfbw_) self.whw_ = F.parameter(self.pwhw_) self.wwe_ = F.parameter(self.pwwe_) self.trg_lookup_ = F.parameter(self.ptrg_lookup_) self.whj_ = F.parameter(self.pwhj_) self.bj_ = F.parameter(self.pbj_) self.wjy_ = F.parameter(self.pwjy_) self.by_ = F.parameter(self.pby_) self.trg_lstm_.reset() # One step decoding. def decode_step(self, trg_words, train): sentence_len = self.concat_fb.shape()[1] b = self.whw_ @ self.trg_lstm_.get_h() b = F.reshape(b, Shape([1, b.shape()[0]])) b = F.broadcast(b, 0, sentence_len) x = F.tanh(self.t_concat_fb @ self.wfbw_ + b) atten_prob = F.softmax(x @ self.wwe_, 0) c = self.concat_fb @ atten_prob e = F.pick(self.trg_lookup_, trg_words, 1) e = F.dropout(e, self.dropout_rate_, train) h = self.trg_lstm_.forward(F.concat([e, c], 0)) h = F.dropout(h, self.dropout_rate_, train) j = F.tanh(self.whj_ @ h + self.bj_) return self.wjy_ @ j + self.by_ # Calculates the loss function over given target sentences. def loss(self, trg_batch, train): losses = [] for i in range(len(trg_batch)-1): y = self.decode_step(trg_batch[i], train) loss = F.softmax_cross_entropy(y, trg_batch[i+1], 0) losses.append(loss) return F.batch.mean(F.sum(losses))
def __init__(self): self.dropout_rate = DROPOUT_RATE self.psrc_lookup = Parameter() self.ptrg_lookup = Parameter() self.pwhj = Parameter() self.pbj = Parameter() self.pwjy = Parameter() self.pby = Parameter() self.src_fw_lstm = LSTM() self.src_bw_lstm = LSTM() self.trg_lstm = LSTM() self.add_all_parameters() self.add_all_submodels()
class AttentionalEncoderDecoder(Model): """Encoder-decoder translation model with dot-attention.""" def __init__(self): self.dropout_rate = DROPOUT_RATE self.psrc_lookup = Parameter() self.ptrg_lookup = Parameter() self.pwhj = Parameter() self.pbj = Parameter() self.pwjy = Parameter() self.pby = Parameter() self.src_fw_lstm = LSTM() self.src_bw_lstm = LSTM() self.trg_lstm = LSTM() self.add_all_parameters() self.add_all_submodels() def init(self, src_vocab_size, trg_vocab_size, embed_size, hidden_size): """Creates a new AttentionalEncoderDecoder object.""" self.psrc_lookup.init([embed_size, src_vocab_size], I.XavierUniform()) self.ptrg_lookup.init([embed_size, trg_vocab_size], I.XavierUniform()) self.pwhj.init([embed_size, 2 * hidden_size], I.XavierUniform()) self.pbj.init([embed_size], I.Constant(0)) self.pwjy.init([trg_vocab_size, embed_size], I.XavierUniform()) self.pby.init([trg_vocab_size], I.Constant(0)) self.src_fw_lstm.init(embed_size, hidden_size) self.src_bw_lstm.init(embed_size, hidden_size) self.trg_lstm.init(2 * embed_size, hidden_size) def encode(self, src_batch, train): """Encodes source sentences and prepares internal states.""" # Embedding lookup. src_lookup = F.parameter(self.psrc_lookup) e_list = [] for x in src_batch: e = F.pick(src_lookup, x, 1) e = F.dropout(e, self.dropout_rate, train) e_list.append(e) # Forward encoding self.src_fw_lstm.restart() f_list = [] for e in e_list: f = self.src_fw_lstm.forward(e) f = F.dropout(f, self.dropout_rate, train) f_list.append(f) # Backward encoding self.src_bw_lstm.restart() b_list = [] for e in reversed(e_list): b = self.src_bw_lstm.forward(e) b = F.dropout(b, self.dropout_rate, train) b_list.append(b) b_list.reverse() # Concatenates RNN states. fb_list = [f_list[i] + b_list[i] for i in range(len(src_batch))] self.concat_fb = F.concat(fb_list, 1) self.t_concat_fb = F.transpose(self.concat_fb) # Initializes decode states. embed_size = self.psrc_lookup.shape()[0] self.trg_lookup = F.parameter(self.ptrg_lookup) self.whj = F.parameter(self.pwhj) self.bj = F.parameter(self.pbj) self.wjy = F.parameter(self.pwjy) self.by = F.parameter(self.pby) self.feed = F.zeros([embed_size]) self.trg_lstm.restart( self.src_fw_lstm.get_c() + self.src_bw_lstm.get_c(), self.src_fw_lstm.get_h() + self.src_bw_lstm.get_h()) def decode_step(self, trg_words, train): """One step decoding.""" e = F.pick(self.trg_lookup, trg_words, 1) e = F.dropout(e, self.dropout_rate, train) h = self.trg_lstm.forward(F.concat([e, self.feed], 0)) h = F.dropout(h, self.dropout_rate, train) atten_probs = F.softmax(self.t_concat_fb @ h, 0) c = self.concat_fb @ atten_probs self.feed = F.tanh(self.whj @ F.concat([h, c], 0) + self.bj) return self.wjy @ self.feed + self.by def loss(self, trg_batch, train): """Calculates loss values.""" losses = [] for i in range(len(trg_batch) - 1): y = self.decode_step(trg_batch[i], train) loss = F.softmax_cross_entropy(y, trg_batch[i + 1], 0) losses.append(loss) return F.batch.mean(F.sum(losses))
def main(): # Loads data train_inputs = load_images("data/train-images-idx3-ubyte", NUM_TRAIN_SAMPLES) train_labels = load_labels("data/train-labels-idx1-ubyte", NUM_TRAIN_SAMPLES) test_inputs = load_images("data/t10k-images-idx3-ubyte", NUM_TEST_SAMPLES) test_labels = load_labels("data/t10k-labels-idx1-ubyte", NUM_TEST_SAMPLES) dev = D.CUDA(0) Device.set_default(dev) g = Graph() Graph.set_default(g) # Parameters of CNNs # Shape: {kernel_height, kernel_width, in_channels, out_channels} pw_cnn1 = Parameter(Shape([KERNEL_SIZE1, KERNEL_SIZE1, 1, NUM_CHANNELS1]), I.XavierUniformConv2D()) pw_cnn2 = Parameter( Shape([KERNEL_SIZE2, KERNEL_SIZE2, NUM_CHANNELS1, NUM_CHANNELS2]), I.XavierUniformConv2D()) # Parameters of FC layers pw_fc1 = Parameter(Shape([NUM_HIDDEN_UNITS, NUM_INPUT_UNITS]), I.XavierUniform()) pw_fc2 = Parameter(Shape([NUM_OUTPUT_UNITS, NUM_HIDDEN_UNITS]), I.XavierUniform()) pb_fc1 = Parameter(Shape([NUM_HIDDEN_UNITS]), I.Constant(0)) pb_fc2 = Parameter(Shape([NUM_OUTPUT_UNITS]), I.Constant(0)) # Optimizer optimizer = O.SGD(.1) optimizer.add(pw_cnn1, pw_cnn2, pw_fc1, pw_fc2, pb_fc1, pb_fc2) # Helper lambda to construct the predictor network. def make_graph(inputs, train): # Input and parameters. #x = F.input(Shape([IMAGE_HEIGHT, IMAGE_WIDTH], BATCH_SIZE), inputs) x = F.input(inputs) w_cnn1 = F.parameter(pw_cnn1) w_cnn2 = F.parameter(pw_cnn2) w_fc1 = F.parameter(pw_fc1) w_fc2 = F.parameter(pw_fc2) b_fc1 = F.parameter(pb_fc1) b_fc2 = F.parameter(pb_fc2) # CNNs h_cnn1 = F.relu(F.conv2d(x, w_cnn1, PADDING1, PADDING1, 1, 1, 1, 1)) h_pool1 = F.max_pool2d(h_cnn1, 2, 2, 0, 0, 2, 2) h_cnn2 = F.relu( F.conv2d(h_pool1, w_cnn2, PADDING2, PADDING2, 1, 1, 1, 1)) h_pool2 = F.max_pool2d(h_cnn2, 2, 2, 0, 0, 2, 2) # FC layers x_fc = F.dropout(F.flatten(h_pool2), .5, train) h_fc = F.dropout(F.relu(F.matmul(w_fc1, x_fc) + b_fc1), .5, train) return F.matmul(w_fc2, h_fc) + b_fc2 # Batch randomizer ids = list(range(NUM_TRAIN_SAMPLES)) for epoch in range(MAX_EPOCH): # Shuffles sample IDs. random.shuffle(ids) # Training loop for batch in range(NUM_TRAIN_BATCHES): print("\rTraining... %d / %d" % (batch + 1, NUM_TRAIN_BATCHES), end="") # Makes a minibatch for training. inputs = [ train_inputs[ids[batch * BATCH_SIZE + i]] for i in range(BATCH_SIZE) ] labels = [ train_labels[ids[batch * BATCH_SIZE + i]] for i in range(BATCH_SIZE) ] # Constructs the graph. g.clear() y = make_graph(inputs, True) loss = F.softmax_cross_entropy(y, labels, 0) avg_loss = F.batch.mean(loss) # Dump computation graph at the first time. # if epoch == 0 and batch == 0: # print(g.dump("dot")) # Implicit forward, backward, and updates parameters. optimizer.reset_gradients() avg_loss.backward() optimizer.update() print() match = 0 # Test loop for batch in range(NUM_TEST_BATCHES): print("\rTesting... %d / %d" % (batch + 1, NUM_TEST_BATCHES), end="") # Makes a test minibatch. inputs = [ test_inputs[batch * BATCH_SIZE + i] for i in range(BATCH_SIZE) ] # Constructs the graph. g.clear() y = make_graph(inputs, False) # Gets outputs, argmax, and compares them with the label. y_val = y.to_list() for i in range(BATCH_SIZE): maxval = -1e10 argmax = -1 for j in range(NUM_OUTPUT_UNITS): v = y_val[j + i * NUM_OUTPUT_UNITS] if v > maxval: maxval = v argmax = j if argmax == test_labels[i + batch * BATCH_SIZE]: match += 1 accuracy = 100.0 * match / NUM_TEST_SAMPLES print("epoch %d: accuracy: %.2f%%" % (epoch, accuracy)) return 0
def __init__(self, in_size, out_size, trainer): self.pw_ = Parameter([out_size, in_size], I.Uniform(-0.1, 0.1)) self.pb_ = Parameter([out_size], I.Constant(0)) trainer.add_parameter(self.pw_) trainer.add_parameter(self.pb_)
def setUp(self): self.dev = D.Naive() Device.set_default(self.dev) self.p = Parameter([8], I.Constant(0)) self.p.value.reset_by_vector([1, 2, 3, 4, 5, 6, 7, 8])
def main(): # Loads data train_inputs = load_images("data/train-images-idx3-ubyte", NUM_TRAIN_SAMPLES) train_labels = load_labels("data/train-labels-idx1-ubyte", NUM_TRAIN_SAMPLES) test_inputs = load_images("data/t10k-images-idx3-ubyte", NUM_TEST_SAMPLES) test_labels = load_labels("data/t10k-labels-idx1-ubyte", NUM_TEST_SAMPLES) # Initializes 2 device objects which manage different GPUs. dev0 = D.CUDA(0) dev1 = D.CUDA(1) # Parameters on GPU 0. pw1 = Parameter([NUM_HIDDEN_UNITS, NUM_INPUT_UNITS], I.XavierUniform(), dev0) pb1 = Parameter([NUM_HIDDEN_UNITS], I.Constant(0), dev0) # Parameters on GPU 1. pw2 = Parameter([NUM_OUTPUT_UNITS, NUM_HIDDEN_UNITS], I.XavierUniform(), dev1) pb2 = Parameter([NUM_OUTPUT_UNITS], I.Constant(0), dev1) trainer = T.SGD(.1) trainer.add_parameter(pw1) trainer.add_parameter(pb1) trainer.add_parameter(pw2) trainer.add_parameter(pb2) def make_graph(inputs): # We first store input values explicitly on GPU 0. x = F.input(inputs, device=dev0) w1 = F.parameter(pw1) b1 = F.parameter(pb1) w2 = F.parameter(pw2) b2 = F.parameter(pb2) # The hidden layer is calculated and implicitly stored on GPU 0. h_on_gpu0 = F.relu(w1 @ x + b1) # `copy()` transfers the hiddne layer to GPU 1. h_on_gpu1 = F.copy(h_on_gpu0, dev1) # The output layer is calculated and implicitly stored on GPU 1. return w2 @ h_on_gpu1 + b2 ids = list(range(NUM_TRAIN_SAMPLES)) g = Graph() Graph.set_default(g) for epoch in range(MAX_EPOCH): random.shuffle(ids) # Training loop for batch in range(NUM_TRAIN_BATCHES): print("\rTraining... %d / %d" % (batch + 1, NUM_TRAIN_BATCHES), end="") inputs = [train_inputs[ids[batch * BATCH_SIZE + i]] for i in range(BATCH_SIZE)] labels = [train_labels[ids[batch * BATCH_SIZE + i]] for i in range(BATCH_SIZE)] g.clear() y = make_graph(inputs) loss = F.softmax_cross_entropy(y, labels, 0) avg_loss = F.batch.mean(loss) trainer.reset_gradients() avg_loss.backward() trainer.update() print() match = 0 # Test loop for batch in range(NUM_TEST_BATCHES): print("\rTesting... %d / %d" % (batch + 1, NUM_TEST_BATCHES), end="") inputs = [test_inputs[batch * BATCH_SIZE + i] for i in range(BATCH_SIZE)] g.clear() y = make_graph(inputs) y_val = y.to_list() for i in range(BATCH_SIZE): maxval = -1e10 argmax = -1 for j in range(NUM_OUTPUT_UNITS): v = y_val[j + i * NUM_OUTPUT_UNITS] if (v > maxval): maxval = v argmax = j if argmax == test_labels[i + batch * BATCH_SIZE]: match += 1 accuracy = 100.0 * match / NUM_TEST_SAMPLES print("\nepoch %d: accuracy: %.2f%%\n" % (epoch, accuracy))
def test_Parameter_argument(self): # shape w/o data p = Parameter(Shape([2, 3])) self.assertEqual(p.shape(), Shape([2, 3])) # shape w/ Initializer p = Parameter(Shape([4, 3]), I.Constant(1)) self.assertEqual(p.shape(), Shape([4, 3])) self.assertEqual(p.value.to_list(), [1] * 12) # shape w/ list[float] p = Parameter(Shape([4, 3]), self.list_data[:12]) self.assertEqual(p.shape(), Shape([4, 3])) self.assertEqual(p.value.to_list(), self.list_data[:12]) # ndarray w/o shape p = Parameter(init=self.ndarray_data[0]) self.assertEqual(p.shape(), Shape([4, 3])) self.assertEqual(p.value.to_list(), self.list_data[:12]) # ndarray w/ shape p = Parameter(Shape([2, 6]), init=self.ndarray_data[0]) self.assertEqual(p.shape(), Shape([2, 6])) self.assertEqual(p.value.to_list(), self.list_data[:12]) # list[float] w/o shape self.assertRaises(TypeError, lambda: Parameter(init=self.list_data[:12]))
def test_model_load_save(self): submodel = TestModel() sp1 = Parameter([2, 4], I.Constant(0)) sp1.value = tF.input(np.array([[0, 1, 2, 3], [4, 5, 6, 7]])) sp2 = Parameter([2, 4], I.Constant(0)) sp2.value = tF.input(np.array([[9, 8, 7, 6], [5, 4, 3, 2]])) submodel.add("sp1", sp1) submodel.add("sp2", sp2) parentmodel = TestModel() p1 = Parameter([4, 2], I.Constant(0)) p1.value = tF.input(np.array([[0, 1], [2, 3], [4, 5], [6, 7]])) p2 = Parameter([4, 2], I.Constant(0)) p2.value = tF.input(np.array([[9, 8], [7, 6], [5, 4], [3, 2]])) parentmodel.add("p1", p1) parentmodel.add("p2", p2) parentmodel.add("sub", submodel) submodel_load = TestModel() sp1 = Parameter() sp2 = Parameter() submodel_load.add("sp1", sp1) submodel_load.add("sp2", sp2) parentmodel_load = TestModel() p1 = Parameter() p2 = Parameter() parentmodel_load.add("p1", p1) parentmodel_load.add("p2", p2) parentmodel_load.add("sub", submodel_load) with tempfile.NamedTemporaryFile() as fp: parentmodel.save(fp.name) parentmodel_load.load(fp.name) self.assertTrue( (parentmodel_load["p1"].value.to_ndarrays()[0] == np.array( [[0, 1], [2, 3], [4, 5], [6, 7]])).all()) self.assertTrue( (parentmodel_load["p2"].value.to_ndarrays()[0] == np.array( [[9, 8], [7, 6], [5, 4], [3, 2]])).all()) self.assertTrue( (parentmodel_load["sub", "sp1"].value.to_ndarrays()[0] == np.array( [[0, 1, 2, 3], [4, 5, 6, 7]])).all()) self.assertTrue( (parentmodel_load["sub", "sp2"].value.to_ndarrays()[0] == np.array( [[9, 8, 7, 6], [5, 4, 3, 2]])).all())
def __init__(self, name, src_vocab_size, trg_vocab_size, embed_size, hidden_size, dropout_rate): self.name_ = name self.embed_size_ = embed_size self.dropout_rate_ = dropout_rate self.psrc_lookup_ = Parameter([embed_size, src_vocab_size], I.XavierUniform()) self.ptrg_lookup_ = Parameter([embed_size, trg_vocab_size], I.XavierUniform()) self.pwhj_ = Parameter([embed_size, 2 * hidden_size], I.XavierUniform()) self.pbj_ = Parameter([embed_size], I.Constant(0)) self.pwjy_ = Parameter([trg_vocab_size, embed_size], I.XavierUniform()) self.pby_ = Parameter([trg_vocab_size], I.Constant(0)) self.src_fw_lstm_ = LSTM(name + "_src_fw_lstm", embed_size, hidden_size) self.src_bw_lstm_ = LSTM(name + "_src_bw_lstm", embed_size, hidden_size) self.trg_lstm_ = LSTM(name + "_trg_lstm", embed_size * 2, hidden_size)
def __init__(self, name, in_size, out_size): self.name_ = name self.out_size_ = out_size self.pwxh_ = Parameter([4 * out_size, in_size], I.XavierUniform()) self.pwhh_ = Parameter([4 * out_size, out_size], I.XavierUniform()) self.pbh_ = Parameter([4 * out_size], I.Constant(0))
def __init__(self, in_size, out_size): self.out_size = out_size self.pw = Parameter([3 * out_size, in_size], I.Uniform(-0.1, 0.1)) self.pbf = Parameter([out_size], I.Constant(0)) self.pbr = Parameter([out_size], I.Constant(0)) self.add_all_parameters()
def test_model_parameter(self): model = Model() param = Parameter() model.add("p", param) self.assertIs(model["p"], param) self.assertIs(model[("p", )], param)
def main(): # Loads data train_inputs = load_images("data/train-images-idx3-ubyte", NUM_TRAIN_SAMPLES) train_labels = load_labels("data/train-labels-idx1-ubyte", NUM_TRAIN_SAMPLES) test_inputs = load_images("data/t10k-images-idx3-ubyte", NUM_TEST_SAMPLES) test_labels = load_labels("data/t10k-labels-idx1-ubyte", NUM_TEST_SAMPLES) dev = D.Naive() # or D.CUDA(gpuid) Device.set_default(dev) pw1 = Parameter([NUM_HIDDEN_UNITS, NUM_INPUT_UNITS], I.XavierUniform()) pb1 = Parameter([NUM_HIDDEN_UNITS], I.Constant(0)) pw2 = Parameter([NUM_OUTPUT_UNITS, NUM_HIDDEN_UNITS], I.XavierUniform()) pb2 = Parameter([NUM_OUTPUT_UNITS], I.Constant(0)) optimizer = O.SGD(.5) optimizer.add(pw1, pb1, pw2, pb2) def make_graph(inputs, train): x = F.input(inputs) w1 = F.parameter(pw1) b1 = F.parameter(pb1) h = F.relu(w1 @ x + b1) h = F.dropout(h, .5, train) w2 = F.parameter(pw2) b2 = F.parameter(pb2) return w2 @ h + b2 ids = list(range(NUM_TRAIN_SAMPLES)) g = Graph() Graph.set_default(g) for epoch in range(MAX_EPOCH): random.shuffle(ids) # Training loop for batch in range(NUM_TRAIN_BATCHES): print("\rTraining... %d / %d" % (batch + 1, NUM_TRAIN_BATCHES), end="") inputs = [train_inputs[ids[batch * BATCH_SIZE + i]] for i in range(BATCH_SIZE)] labels = [train_labels[ids[batch * BATCH_SIZE + i]] for i in range(BATCH_SIZE)] g.clear() y = make_graph(inputs, True) loss = F.softmax_cross_entropy(y, labels, 0) avg_loss = F.batch.mean(loss) optimizer.reset_gradients() avg_loss.backward() optimizer.update() print() match = 0 # Test loop for batch in range(NUM_TEST_BATCHES): print("\rTesting... %d / %d" % (batch + 1, NUM_TEST_BATCHES), end="") inputs = [test_inputs[batch * BATCH_SIZE + i] for i in range(BATCH_SIZE)] g.clear() y = make_graph(inputs, False) y_val = y.to_list() for i in range(BATCH_SIZE): maxval = -1e10 argmax = -1 for j in range(NUM_OUTPUT_UNITS): v = y_val[j + i * NUM_OUTPUT_UNITS] if (v > maxval): maxval = v argmax = j if argmax == test_labels[i + batch * BATCH_SIZE]: match += 1 accuracy = 100.0 * match / NUM_TEST_SAMPLES print("\nepoch %d: accuracy: %.2f%%\n" % (epoch, accuracy))