def test_ModelTest_CheckSaveLoad_Insufficient(self): shape = Shape([2, 2]) values1 = [1, 2, 3, 4] values2 = [5, 6, 7, 8] tmp = tempfile.NamedTemporaryFile() m1 = Model() m2 = Model() p1 = Parameter(shape, I.Constant(0)) p1.value += tF.raw_input(shape, values1) p2 = Parameter(shape, I.Constant(0)) p2.value += tF.raw_input(shape, values2) m1.add("p", p1) m2.add("p", p2) m1.add("sm", m2) m1.save(tmp.name) m1 = Model() m2 = Model() p1 = Parameter() m1.add("p", p1) m1.add("sm", m2) with self.assertRaises(RuntimeError): m1.load(tmp.name)
def main(): with DefaultScopeDevice(CPUDevice()): pw1 = Parameter("w1", [8, 2], I.XavierUniform()) pb1 = Parameter("b1", [8], I.Constant(0)) pw2 = Parameter("w2", [1, 8], I.XavierUniform()) pb2 = Parameter("b2", [], I.Constant(0)) trainer = T.SGD(0.1) trainer.add_parameter(pw1) trainer.add_parameter(pb1) trainer.add_parameter(pw2) trainer.add_parameter(pb2) input_data = np.array( [ [1, 1], # Sample 1 [1, -1], # Sample 2 [-1, 1], # Sample 3 [-1, -1], # Sample 4 ], dtype=np.float32) output_data = np.array( [ 1, # Label 1 -1, # Label 2 -1, # Label 3 1, # Label 4 ], dtype=np.float32) for i in range(100): g = Graph() with DefaultScopeGraph(g): # Builds a computation graph. #x = F.input(shape=Shape([2], 4), data=input_data) x = F.input(data=input_data) w1 = F.input(param=pw1) b1 = F.input(param=pb1) w2 = F.input(param=pw2) b2 = F.input(param=pb2) h = F.tanh(F.matmul(w1, x) + b1) y = F.matmul(w2, h) + b2 # Calculates values. y_val = g.forward(y).to_list() print("epoch ", i, ":") for j in range(4): print(" [", j, "]: ", y_val[j]) #t = F.input(shape=Shape([], 4), data=output_data) t = F.input(data=output_data) diff = t - y loss = F.batch.mean(diff * diff) loss_val = g.forward(loss).to_list()[0] print(" loss: ", loss_val) trainer.reset_gradients() g.backward(loss) trainer.update()
def test_ModelTest_CheckSaveLoad_Same(self): shape = Shape([2, 2]) values1 = [1, 2, 3, 4] values2 = [5, 6, 7, 8] tmp = tempfile.NamedTemporaryFile() m1 = Model() m2 = Model() p1 = Parameter(shape, I.Constant(0)) p1.value += tF.raw_input(shape, values1) p2 = Parameter(shape, I.Constant(0)) p2.value += tF.raw_input(shape, values2) m1.add("p", p1) m2.add("p", p2) m1.add("sm", m2) m1.save(tmp.name) m1 = Model() m2 = Model() p1 = Parameter() p2 = Parameter() m1.add("p", p1) m2.add("p", p2) m1.add("sm", m2) m1.load(tmp.name) self.assertTrue(p1.valid()) self.assertTrue(p2.valid()) self.assertEqual(shape, p1.shape()) self.assertEqual(shape, p2.shape()) self.assertEqual(values1, p1.value.to_list()) self.assertEqual(values2, p2.value.to_list())
def __init__(self, in_size, out_size, trainer): self.out_size_ = out_size self.pw_ = Parameter([3 * out_size, in_size], I.Uniform(-0.1, 0.1)) self.pbf_ = Parameter([out_size], I.Constant(0)) self.pbr_ = Parameter([out_size], I.Constant(0)) trainer.add_parameter(self.pw_) trainer.add_parameter(self.pbf_) trainer.add_parameter(self.pbr_)
def init(self, src_vocab_size, trg_vocab_size, embed_size, hidden_size): """Creates a new AttentionalEncoderDecoder object.""" self.psrc_lookup.init([embed_size, src_vocab_size], I.XavierUniform()) self.ptrg_lookup.init([embed_size, trg_vocab_size], I.XavierUniform()) self.pwhj.init([embed_size, 2 * hidden_size], I.XavierUniform()) self.pbj.init([embed_size], I.Constant(0)) self.pwjy.init([trg_vocab_size, embed_size], I.XavierUniform()) self.pby.init([trg_vocab_size], I.Constant(0)) self.src_fw_lstm.init(embed_size, hidden_size) self.src_bw_lstm.init(embed_size, hidden_size) self.trg_lstm.init(2 * embed_size, hidden_size)
def test_model_load_save(self): submodel = TestModel() submodel.sp1 = Parameter([2, 4], I.Constant(0)) submodel.sp1.value = tF.input(np.array([[0, 1, 2, 3], [4, 5, 6, 7]])) submodel.sp2 = Parameter([2, 4], I.Constant(0)) submodel.sp2.value = tF.input(np.array([[9, 8, 7, 6], [5, 4, 3, 2]])) submodel.add("sp1", submodel.sp1) submodel.add("sp2", submodel.sp2) parentmodel = TestModel() parentmodel.p1 = Parameter([4, 2], I.Constant(0)) parentmodel.p1.value = tF.input( np.array([[0, 1], [2, 3], [4, 5], [6, 7]])) parentmodel.p2 = Parameter([4, 2], I.Constant(0)) parentmodel.p2.value = tF.input( np.array([[9, 8], [7, 6], [5, 4], [3, 2]])) parentmodel.sub = submodel parentmodel.add("p1", parentmodel.p1) parentmodel.add("p2", parentmodel.p2) parentmodel.add("sub", parentmodel.sub) submodel_load = TestModel() submodel_load.sp1 = Parameter() submodel_load.sp2 = Parameter() submodel_load.add("sp1", submodel_load.sp1) submodel_load.add("sp2", submodel_load.sp2) parentmodel_load = TestModel() parentmodel_load.p1 = Parameter() parentmodel_load.p2 = Parameter() parentmodel_load.sub = submodel_load parentmodel_load.add("p1", parentmodel_load.p1) parentmodel_load.add("p2", parentmodel_load.p2) parentmodel_load.add("sub", parentmodel_load.sub) with tempfile.NamedTemporaryFile() as fp: parentmodel.save(fp.name) parentmodel_load.load(fp.name) self.assertTrue( (parentmodel_load.p1.value.to_ndarrays()[0] == np.array([[0, 1], [2, 3], [4, 5], [6, 7] ])).all()) self.assertTrue( (parentmodel_load.p2.value.to_ndarrays()[0] == np.array([[9, 8], [7, 6], [5, 4], [3, 2] ])).all()) self.assertTrue( (parentmodel_load.sub.sp1.value.to_ndarrays()[0] == np.array( [[0, 1, 2, 3], [4, 5, 6, 7]])).all()) self.assertTrue( (parentmodel_load.sub.sp2.value.to_ndarrays()[0] == np.array( [[9, 8, 7, 6], [5, 4, 3, 2]])).all())
def init(self, src_vocab_size, trg_vocab_size, embed_size, hidden_size): self.psrc_lookup_.init([embed_size, src_vocab_size], I.XavierUniform()) self.ptrg_lookup_.init([embed_size, trg_vocab_size], I.XavierUniform()) self.pwfbw_.init([2*hidden_size, hidden_size], I.XavierUniform()) self.pwhw_.init([hidden_size, hidden_size], I.XavierUniform()) self.pwwe_.init([hidden_size], I.XavierUniform()) self.pwhj_.init([embed_size, hidden_size], I.XavierUniform()) self.pbj_.init([embed_size], I.Constant(0)) self.pwjy_.init([trg_vocab_size, embed_size], I.XavierUniform()) self.pby_.init([trg_vocab_size], I.Constant(0)) self.src_fw_lstm_.init(embed_size, hidden_size) self.src_bw_lstm_.init(embed_size, hidden_size) self.trg_lstm_.init(embed_size+hidden_size*2, hidden_size)
def test_device_instance(self): dev = Device.get_default() self.assertIs(dev, self.device) tensor = tF.raw_input([], [0]) dev = tensor.device() self.assertIs(dev, self.device) node = F.raw_input([], [0]) dev = node.device() self.assertIs(dev, self.device) my_device = Naive() self.assertIsNot(my_device, self.device) node = F.raw_input([], [0], device=my_device) dev = node.device() self.assertIs(dev, my_device) dev = self.graph.get_device(node) self.assertIs(dev, my_device) param = Parameter([], I.Constant(1)) dev = param.device() self.assertIs(dev, self.device)
def test_Parameter_argument(self): # shape w/o data p = Parameter(Shape([2, 3])) self.assertEqual(p.shape(), Shape([2, 3])) # shape w/ Initializer p = Parameter(Shape([4, 3]), I.Constant(1)) self.assertEqual(p.shape(), Shape([4, 3])) self.assertEqual(p.value.to_list(), [1] * 12) # shape w/ list[float] p = Parameter(Shape([4, 3]), self.list_data[:12]) self.assertEqual(p.shape(), Shape([4, 3])) self.assertEqual(p.value.to_list(), self.list_data[:12]) # ndarray w/o shape p = Parameter(init=self.ndarray_data[0]) self.assertEqual(p.shape(), Shape([4, 3])) self.assertEqual(p.value.to_list(), self.list_data[:12]) # ndarray w/ shape p = Parameter(Shape([2, 6]), init=self.ndarray_data[0]) self.assertEqual(p.shape(), Shape([2, 6])) self.assertEqual(p.value.to_list(), self.list_data[:12]) # list[float] w/o shape self.assertRaises(TypeError, lambda: Parameter(init=self.list_data[:12]))
def __init__(self, in_size, out_size, trainer): self.out_size_ = out_size self.pwxh_ = Parameter([4 * out_size, in_size], I.Uniform(-0.1, 0.1)) self.pwhh_ = Parameter([4 * out_size, out_size], I.Uniform(-0.1, 0.1)) self.pbh_ = Parameter([4 * out_size], I.Constant(0)) trainer.add_parameter(self.pwxh_) trainer.add_parameter(self.pwhh_) trainer.add_parameter(self.pbh_)
def init(self, src_vocab_size, trg_vocab_size, embed_size, hidden_size): """Creates a new EncoderDecoder object.""" self.psrc_lookup.init([embed_size, src_vocab_size], I.XavierUniform()) self.ptrg_lookup.init([embed_size, trg_vocab_size], I.XavierUniform()) self.pwhy.init([trg_vocab_size, hidden_size], I.XavierUniform()) self.pby.init([trg_vocab_size], I.Constant(0)) self.src_lstm.init(embed_size, hidden_size) self.trg_lstm.init(embed_size, hidden_size)
def test_Parameter_argument(self): # no argument p = Parameter() self.assertFalse(p.valid()) # shape w/ Initializer p = Parameter(Shape([4, 3]), I.Constant(1)) self.assertEqual(p.shape(), Shape([4, 3])) self.assertEqual(p.value.to_list(), [1] * 12)
def test_ModelTest_CheckSaveLoadWithStats(self): shape = Shape([2, 2]) values1 = [1, 2, 3, 4] values2 = [5, 6, 7, 8] stats1 = [10, 20, 30, 40] stats2 = [50, 60, 70, 80] tmp = tempfile.NamedTemporaryFile() m1 = Model() m2 = Model() p1 = Parameter(shape, I.Constant(0)) p1.value += tF.raw_input(shape, values1) p2 = Parameter(shape, I.Constant(0)) p2.value += tF.raw_input(shape, values2) p1.add_stats("a", shape) p2.add_stats("b", shape) p1.stats["a"].reset_by_vector(stats1); p2.stats["b"].reset_by_vector(stats2); m1.add("p", p1) m2.add("p", p2) m1.add("sm", m2) m1.save(tmp.name) m1 = Model() m2 = Model() p1 = Parameter() p2 = Parameter() m1.add("p", p1) m2.add("p", p2) m1.add("sm", m2) m1.load(tmp.name) self.assertTrue(p1.valid()) self.assertTrue(p2.valid()) self.assertEqual(shape, p1.shape()) self.assertEqual(shape, p2.shape()) self.assertEqual(values1, p1.value.to_list()) self.assertEqual(values2, p2.value.to_list()) self.assertTrue("a" in p1.stats) self.assertTrue("b" in p2.stats) self.assertEqual(stats1, p1.stats["a"].to_list()) self.assertEqual(stats2, p2.stats["b"].to_list())
def __init__(self, name, src_vocab_size, trg_vocab_size, embed_size, hidden_size, dropout_rate): self.name_ = name self.embed_size_ = embed_size self.dropout_rate_ = dropout_rate self.psrc_lookup_ = Parameter([embed_size, src_vocab_size], I.XavierUniform()) self.ptrg_lookup_ = Parameter([embed_size, trg_vocab_size], I.XavierUniform()) self.pwhj_ = Parameter([embed_size, 2 * hidden_size], I.XavierUniform()) self.pbj_ = Parameter([embed_size], I.Constant(0)) self.pwjy_ = Parameter([trg_vocab_size, embed_size], I.XavierUniform()) self.pby_ = Parameter([trg_vocab_size], I.Constant(0)) self.src_fw_lstm_ = LSTM(name + "_src_fw_lstm", embed_size, hidden_size) self.src_bw_lstm_ = LSTM(name + "_src_bw_lstm", embed_size, hidden_size) self.trg_lstm_ = LSTM(name + "_trg_lstm", embed_size * 2, hidden_size)
def train_func(trainer): dev = D.Naive(12345) Device.set_default(dev) g = Graph() Graph.set_default(g) pw1 = Parameter([8, 2], I.XavierUniform()) pb1 = Parameter([8], I.Constant(0)) pw2 = Parameter([1, 8], I.XavierUniform()) pb2 = Parameter([1], I.Constant(0)) trainer.add_parameter(pw1) trainer.add_parameter(pb1) trainer.add_parameter(pw2) trainer.add_parameter(pb2) input_data = [1, 1, 1, -1, -1, 1, -1, -1] output_data = [1, -1, -1, 1] for i in range(10): g.clear() x = F.input(input_data, Shape([2], 4)) w1 = F.parameter(pw1) b1 = F.parameter(pb1) w2 = F.parameter(pw2) b2 = F.parameter(pb2) h = F.tanh(w1 @ x + b1) y = w2 @ h + b2 t = F.input(output_data, Shape([], 4)) diff = t - y loss = F.batch.mean(diff * diff) trainer.reset_gradients() loss.backward() trainer.update() return [ pw1.value.to_list(), pb1.value.to_list(), pw2.value.to_list(), pb2.value.to_list() ]
def test_optimizer_add(self): model = TestModel() p = Parameter([5], I.Constant(0)) p.gradient = tF.raw_input([5], [1, 2, 3, 4, 5]) optimizer = O.Adam() optimizer.set_weight_decay(1e-6) optimizer.set_gradient_clipping(5) optimizer.add(model) optimizer.add(p) self.assertEqual(p.gradient.to_list(), [1, 2, 3, 4, 5]) self.assertEqual(model.param.gradient.to_list(), [1, 2, 3, 4, 5]) optimizer.reset_gradients() self.assertEqual(p.gradient.to_list(), [0, 0, 0, 0, 0]) self.assertEqual(model.param.gradient.to_list(), [0, 0, 0, 0, 0])
def test_tensor_instance(self): param = Parameter([], I.Constant(1)) t_origin = param.gradient t = param.gradient self.assertIs(t, t_origin) t = Tensor(t_origin) self.assertEqual(t.to_list(), t.to_list()) self.assertIsNot(t, t_origin) t = t_origin t *= 2 self.assertIs(t, t_origin) t = t * 2 self.assertIsNot(t, t_origin)
def __init__(self, in_size, out_size): self.out_size = out_size self.pw = Parameter([3 * out_size, in_size], I.Uniform(-0.1, 0.1)) self.pbf = Parameter([out_size], I.Constant(0)) self.pbr = Parameter([out_size], I.Constant(0)) self.add_all_parameters()
def __init__(self, name, in_size, out_size): self.name_ = name self.out_size_ = out_size self.pwxh_ = Parameter([4 * out_size, in_size], I.XavierUniform()) self.pwhh_ = Parameter([4 * out_size, out_size], I.XavierUniform()) self.pbh_ = Parameter([4 * out_size], I.Constant(0))
def init(self, in_size, out_size): """Creates a new LSTM.""" self._pwxh.init([4 * out_size, in_size], I.XavierUniform()) self._pwhh.init([4 * out_size, out_size], I.XavierUniform()) self._pbh.init([4 * out_size], I.Constant(0))
def __init__(self, in_size, out_size): self.out_size = out_size self.pw = Parameter([3 * out_size, in_size], I.Uniform(-0.1, 0.1)) self.pbf = Parameter([out_size], I.Constant(0)) self.pbr = Parameter([out_size], I.Constant(0)) self.scan_attributes()
def main(): # Loads data train_inputs = load_images("data/train-images-idx3-ubyte", NUM_TRAIN_SAMPLES) train_labels = load_labels("data/train-labels-idx1-ubyte", NUM_TRAIN_SAMPLES) test_inputs = load_images("data/t10k-images-idx3-ubyte", NUM_TEST_SAMPLES) test_labels = load_labels("data/t10k-labels-idx1-ubyte", NUM_TEST_SAMPLES) # Initializes 2 device objects which manage different GPUs. dev0 = D.CUDA(0) dev1 = D.CUDA(1) # Parameters on GPU 0. pw1 = Parameter([NUM_HIDDEN_UNITS, NUM_INPUT_UNITS], I.XavierUniform(), dev0) pb1 = Parameter([NUM_HIDDEN_UNITS], I.Constant(0), dev0) # Parameters on GPU 1. pw2 = Parameter([NUM_OUTPUT_UNITS, NUM_HIDDEN_UNITS], I.XavierUniform(), dev1) pb2 = Parameter([NUM_OUTPUT_UNITS], I.Constant(0), dev1) trainer = T.SGD(.1) trainer.add_parameter(pw1) trainer.add_parameter(pb1) trainer.add_parameter(pw2) trainer.add_parameter(pb2) def make_graph(inputs): # We first store input values explicitly on GPU 0. x = F.input(inputs, device=dev0) w1 = F.parameter(pw1) b1 = F.parameter(pb1) w2 = F.parameter(pw2) b2 = F.parameter(pb2) # The hidden layer is calculated and implicitly stored on GPU 0. h_on_gpu0 = F.relu(w1 @ x + b1) # `copy()` transfers the hiddne layer to GPU 1. h_on_gpu1 = F.copy(h_on_gpu0, dev1) # The output layer is calculated and implicitly stored on GPU 1. return w2 @ h_on_gpu1 + b2 ids = list(range(NUM_TRAIN_SAMPLES)) g = Graph() Graph.set_default(g) for epoch in range(MAX_EPOCH): random.shuffle(ids) # Training loop for batch in range(NUM_TRAIN_BATCHES): print("\rTraining... %d / %d" % (batch + 1, NUM_TRAIN_BATCHES), end="") inputs = [train_inputs[ids[batch * BATCH_SIZE + i]] for i in range(BATCH_SIZE)] labels = [train_labels[ids[batch * BATCH_SIZE + i]] for i in range(BATCH_SIZE)] g.clear() y = make_graph(inputs) loss = F.softmax_cross_entropy(y, labels, 0) avg_loss = F.batch.mean(loss) trainer.reset_gradients() avg_loss.backward() trainer.update() print() match = 0 # Test loop for batch in range(NUM_TEST_BATCHES): print("\rTesting... %d / %d" % (batch + 1, NUM_TEST_BATCHES), end="") inputs = [test_inputs[batch * BATCH_SIZE + i] for i in range(BATCH_SIZE)] g.clear() y = make_graph(inputs) y_val = y.to_list() for i in range(BATCH_SIZE): maxval = -1e10 argmax = -1 for j in range(NUM_OUTPUT_UNITS): v = y_val[j + i * NUM_OUTPUT_UNITS] if (v > maxval): maxval = v argmax = j if argmax == test_labels[i + batch * BATCH_SIZE]: match += 1 accuracy = 100.0 * match / NUM_TEST_SAMPLES print("\nepoch %d: accuracy: %.2f%%\n" % (epoch, accuracy))
def __init__(self): self.param = Parameter([5], I.Constant(0)) self.param.gradient = tF.raw_input([5], [1, 2, 3, 4, 5]) self.scan_attributes()
def main(): dev = D.Naive() # or D.CUDA(gpuid) Device.set_default(dev) # Parameters pw1 = Parameter([8, 2], I.XavierUniform()) pb1 = Parameter([8], I.Constant(0)) pw2 = Parameter([1, 8], I.XavierUniform()) pb2 = Parameter([], I.Constant(0)) # Optimizer optimizer = O.SGD(0.1) # Registers parameters. optimizer.add_parameter(pw1) optimizer.add_parameter(pb1) optimizer.add_parameter(pw2) optimizer.add_parameter(pb2) # Training data input_data = [ np.array([1, 1], dtype=np.float32), # Sample 1 np.array([1, -1], dtype=np.float32), # Sample 2 np.array([-1, 1], dtype=np.float32), # Sample 3 np.array([-1, -1], dtype=np.float32), # Sample 4 ] output_data = [ np.array([1], dtype=np.float32), # Label 1 np.array([-1], dtype=np.float32), # Label 2 np.array([-1], dtype=np.float32), # Label 3 np.array([1], dtype=np.float32), # Label 4 ] g = Graph() Graph.set_default(g) for i in range(10): g.clear() # Builds a computation graph. x = F.input(input_data) w1 = F.parameter(pw1) b1 = F.parameter(pb1) w2 = F.parameter(pw2) b2 = F.parameter(pb2) h = F.tanh(w1 @ x + b1) y = w2 @ h + b2 # Obtains values. y_val = y.to_list() print("epoch ", i, ":") for j in range(4): print(" [", j, "]: ", y_val[j]) # Extends the computation graph to calculate loss values. t = F.input(output_data) diff = t - y loss = F.batch.mean(diff * diff) # Obtains the loss. loss_val = loss.to_float() print(" loss: ", loss_val) # Updates parameters. optimizer.reset_gradients() loss.backward() optimizer.update()
def primitiv_xor_test(self): dev = D.Naive() Device.set_default(dev) g = Graph() Graph.set_default(g) input_data = [ np.array([[1], [1]]), np.array([[-1], [1]]), np.array([[-1], [-1]]), np.array([[1], [-1]]), ] label_data = [ np.array([1]), np.array([-1]), np.array([1]), np.array([-1]), ] N = 8 pw = Parameter([1, N], I.XavierUniform()) pb = Parameter([], I.Constant(0)) pu = Parameter([N, 2], I.XavierUniform()) pc = Parameter([N], I.Constant(0)) if os.path.isfile('output/xor/pw.data') and os.path.isfile( 'output/xor/pb.data') and os.path.isfile( 'output/xor/pu.data') and os.path.isfile( 'output/xor/pc.data'): pw.load('output/xor/pw.data') pb.load('output/xor/pb.data') pu.load('output/xor/pu.data') pc.load('output/xor/pc.data') optimizer = O.SGD(0.01) optimizer.add(pw, pb, pu, pc) for epoch in range(1000): print(epoch, end=' ') g.clear() x = F.input(input_data) w = F.parameter(pw) b = F.parameter(pb) u = F.parameter(pu) c = F.parameter(pc) h = F.tanh(u @ x + c) y = F.tanh(w @ h + b) for val in y.to_list(): print('{:+.6f},'.format(val), end=' ') loss = self.calc_loss(y, label_data) print('loss={:.6f}'.format(loss.to_float())) optimizer.reset_gradients() loss.backward() optimizer.update() pw.save('output/xor/pw.data') pb.save('output/xor/pb.data') pu.save('output/xor/pu.data') pc.save('output/xor/pc.data') return y.to_list()
def main(): # Loads data train_inputs = load_images("data/train-images-idx3-ubyte", NUM_TRAIN_SAMPLES) train_labels = load_labels("data/train-labels-idx1-ubyte", NUM_TRAIN_SAMPLES) test_inputs = load_images("data/t10k-images-idx3-ubyte", NUM_TEST_SAMPLES) test_labels = load_labels("data/t10k-labels-idx1-ubyte", NUM_TEST_SAMPLES) dev = D.Naive() # or D.CUDA(gpuid) Device.set_default(dev) pw1 = Parameter([NUM_HIDDEN_UNITS, NUM_INPUT_UNITS], I.XavierUniform()) pb1 = Parameter([NUM_HIDDEN_UNITS], I.Constant(0)) pw2 = Parameter([NUM_OUTPUT_UNITS, NUM_HIDDEN_UNITS], I.XavierUniform()) pb2 = Parameter([NUM_OUTPUT_UNITS], I.Constant(0)) optimizer = O.SGD(.5) optimizer.add(pw1, pb1, pw2, pb2) def make_graph(inputs, train): x = F.input(inputs) w1 = F.parameter(pw1) b1 = F.parameter(pb1) h = F.relu(w1 @ x + b1) h = F.dropout(h, .5, train) w2 = F.parameter(pw2) b2 = F.parameter(pb2) return w2 @ h + b2 ids = list(range(NUM_TRAIN_SAMPLES)) g = Graph() Graph.set_default(g) for epoch in range(MAX_EPOCH): random.shuffle(ids) # Training loop for batch in range(NUM_TRAIN_BATCHES): print("\rTraining... %d / %d" % (batch + 1, NUM_TRAIN_BATCHES), end="") inputs = [train_inputs[ids[batch * BATCH_SIZE + i]] for i in range(BATCH_SIZE)] labels = [train_labels[ids[batch * BATCH_SIZE + i]] for i in range(BATCH_SIZE)] g.clear() y = make_graph(inputs, True) loss = F.softmax_cross_entropy(y, labels, 0) avg_loss = F.batch.mean(loss) optimizer.reset_gradients() avg_loss.backward() optimizer.update() print() match = 0 # Test loop for batch in range(NUM_TEST_BATCHES): print("\rTesting... %d / %d" % (batch + 1, NUM_TEST_BATCHES), end="") inputs = [test_inputs[batch * BATCH_SIZE + i] for i in range(BATCH_SIZE)] g.clear() y = make_graph(inputs, False) y_val = y.to_list() for i in range(BATCH_SIZE): maxval = -1e10 argmax = -1 for j in range(NUM_OUTPUT_UNITS): v = y_val[j + i * NUM_OUTPUT_UNITS] if (v > maxval): maxval = v argmax = j if argmax == test_labels[i + batch * BATCH_SIZE]: match += 1 accuracy = 100.0 * match / NUM_TEST_SAMPLES print("\nepoch %d: accuracy: %.2f%%\n" % (epoch, accuracy))
def setUp(self): self.dev = D.Naive() Device.set_default(self.dev) self.p = Parameter([8], I.Constant(0)) self.p.value.reset_by_vector([1, 2, 3, 4, 5, 6, 7, 8])
def main(): # Loads data train_inputs = load_images("data/train-images-idx3-ubyte", NUM_TRAIN_SAMPLES) train_labels = load_labels("data/train-labels-idx1-ubyte", NUM_TRAIN_SAMPLES) test_inputs = load_images("data/t10k-images-idx3-ubyte", NUM_TEST_SAMPLES) test_labels = load_labels("data/t10k-labels-idx1-ubyte", NUM_TEST_SAMPLES) dev = D.CUDA(0) Device.set_default(dev) g = Graph() Graph.set_default(g) # Parameters of CNNs # Shape: {kernel_height, kernel_width, in_channels, out_channels} pw_cnn1 = Parameter(Shape([KERNEL_SIZE1, KERNEL_SIZE1, 1, NUM_CHANNELS1]), I.XavierUniformConv2D()) pw_cnn2 = Parameter( Shape([KERNEL_SIZE2, KERNEL_SIZE2, NUM_CHANNELS1, NUM_CHANNELS2]), I.XavierUniformConv2D()) # Parameters of FC layers pw_fc1 = Parameter(Shape([NUM_HIDDEN_UNITS, NUM_INPUT_UNITS]), I.XavierUniform()) pw_fc2 = Parameter(Shape([NUM_OUTPUT_UNITS, NUM_HIDDEN_UNITS]), I.XavierUniform()) pb_fc1 = Parameter(Shape([NUM_HIDDEN_UNITS]), I.Constant(0)) pb_fc2 = Parameter(Shape([NUM_OUTPUT_UNITS]), I.Constant(0)) # Optimizer optimizer = O.SGD(.1) optimizer.add(pw_cnn1, pw_cnn2, pw_fc1, pw_fc2, pb_fc1, pb_fc2) # Helper lambda to construct the predictor network. def make_graph(inputs, train): # Input and parameters. #x = F.input(Shape([IMAGE_HEIGHT, IMAGE_WIDTH], BATCH_SIZE), inputs) x = F.input(inputs) w_cnn1 = F.parameter(pw_cnn1) w_cnn2 = F.parameter(pw_cnn2) w_fc1 = F.parameter(pw_fc1) w_fc2 = F.parameter(pw_fc2) b_fc1 = F.parameter(pb_fc1) b_fc2 = F.parameter(pb_fc2) # CNNs h_cnn1 = F.relu(F.conv2d(x, w_cnn1, PADDING1, PADDING1, 1, 1, 1, 1)) h_pool1 = F.max_pool2d(h_cnn1, 2, 2, 0, 0, 2, 2) h_cnn2 = F.relu( F.conv2d(h_pool1, w_cnn2, PADDING2, PADDING2, 1, 1, 1, 1)) h_pool2 = F.max_pool2d(h_cnn2, 2, 2, 0, 0, 2, 2) # FC layers x_fc = F.dropout(F.flatten(h_pool2), .5, train) h_fc = F.dropout(F.relu(F.matmul(w_fc1, x_fc) + b_fc1), .5, train) return F.matmul(w_fc2, h_fc) + b_fc2 # Batch randomizer ids = list(range(NUM_TRAIN_SAMPLES)) for epoch in range(MAX_EPOCH): # Shuffles sample IDs. random.shuffle(ids) # Training loop for batch in range(NUM_TRAIN_BATCHES): print("\rTraining... %d / %d" % (batch + 1, NUM_TRAIN_BATCHES), end="") # Makes a minibatch for training. inputs = [ train_inputs[ids[batch * BATCH_SIZE + i]] for i in range(BATCH_SIZE) ] labels = [ train_labels[ids[batch * BATCH_SIZE + i]] for i in range(BATCH_SIZE) ] # Constructs the graph. g.clear() y = make_graph(inputs, True) loss = F.softmax_cross_entropy(y, labels, 0) avg_loss = F.batch.mean(loss) # Dump computation graph at the first time. # if epoch == 0 and batch == 0: # print(g.dump("dot")) # Implicit forward, backward, and updates parameters. optimizer.reset_gradients() avg_loss.backward() optimizer.update() print() match = 0 # Test loop for batch in range(NUM_TEST_BATCHES): print("\rTesting... %d / %d" % (batch + 1, NUM_TEST_BATCHES), end="") # Makes a test minibatch. inputs = [ test_inputs[batch * BATCH_SIZE + i] for i in range(BATCH_SIZE) ] # Constructs the graph. g.clear() y = make_graph(inputs, False) # Gets outputs, argmax, and compares them with the label. y_val = y.to_list() for i in range(BATCH_SIZE): maxval = -1e10 argmax = -1 for j in range(NUM_OUTPUT_UNITS): v = y_val[j + i * NUM_OUTPUT_UNITS] if v > maxval: maxval = v argmax = j if argmax == test_labels[i + batch * BATCH_SIZE]: match += 1 accuracy = 100.0 * match / NUM_TEST_SAMPLES print("epoch %d: accuracy: %.2f%%" % (epoch, accuracy)) return 0
def init(self, d_model): self.pgain.init([1, d_model], I.Constant(1)) self.pbias.init([1, d_model], I.Constant(0))