def predict(self, queries): image_size = self._image_size batch_size = self._batch_size max_image_size = 224 images, _ = utils.dataset.transform_images(queries, image_size=image_size, mode='RGB') # (images, _, _) = utils.dataset.normalize_images(images, # self._normalize_mean, # self._normalize_std) images = np.transpose(np.asarray(images, dtype=np.float32), [0, 3, 1, 2]) tx = tensor.Tensor((batch_size, 3, max_image_size, max_image_size), self.dev, tensor.float32) ty = tensor.Tensor((batch_size, ), self.dev, tensor.int32) num_batch = int(np.ceil(images.shape[0] / batch_size)) idx = np.arange(images.shape[0], dtype=np.int32) probs = None # Evaluation Phase self._model.eval() for b in tqdm(range(num_batch)): x = images[b * batch_size:(b + 1) * batch_size] if self._model.dimension == 4: if (x.shape[2] != self._model.input_size): x = resize_dataset(x, self._model.input_size) tx.copy_from_numpy(x) out = self._model(tx) out_probs = tensor.to_numpy(out) probs = out_probs if not probs else np.concatenate( (probs, out_probs), axis=0) return probs.tolist()
def train(data, net, max_epoch, get_lr, weight_decay, batch_size=100, use_cpu=False): print('Start intialization............') if use_cpu: print('Using CPU') dev = device.get_default_device() else: print('Using GPU') dev = device.create_cuda_gpu() net.to_device(dev) opt = optimizer.SGD(momentum=0.9, weight_decay=weight_decay) for (p, specs) in zip(net.param_names(), net.param_specs()): opt.register(p, specs) tx = tensor.Tensor((batch_size, 3, 32, 32), dev) ty = tensor.Tensor((batch_size, ), dev, core_pb2.kInt) train_x, train_y, test_x, test_y = data num_train_batch = train_x.shape[0] // batch_size num_test_batch = test_x.shape[0] // batch_size idx = np.arange(train_x.shape[0], dtype=np.int32) for epoch in range(max_epoch): np.random.shuffle(idx) loss, acc = 0.0, 0.0 print('Epoch %d' % epoch) for b in range(num_train_batch): x = train_x[idx[b * batch_size:(b + 1) * batch_size]] y = train_y[idx[b * batch_size:(b + 1) * batch_size]] tx.copy_from_numpy(x) ty.copy_from_numpy(y) grads, (l, a) = net.train(tx, ty) loss += l acc += a for (s, p, g) in zip(net.param_names(), net.param_values(), grads): opt.apply_with_lr(epoch, get_lr(epoch), g, p, str(s), b) # update progress bar utils.update_progress(b * 1.0 / num_train_batch, 'training loss = %f, accuracy = %f' % (l, a)) info = '\ntraining loss = %f, training accuracy = %f, lr = %f' \ % ((loss / num_train_batch), (acc / num_train_batch), get_lr(epoch)) print(info) loss, acc = 0.0, 0.0 for b in range(num_test_batch): x = test_x[b * batch_size:(b + 1) * batch_size] y = test_y[b * batch_size:(b + 1) * batch_size] tx.copy_from_numpy(x) ty.copy_from_numpy(y) l, a = net.evaluate(tx, ty) loss += l acc += a print('test loss = %f, test accuracy = %f' % ((loss / num_test_batch), (acc / num_test_batch))) net.save('model', 20) # save model params into checkpoint file
def test_Adam_const_lr(self, dev=cpu_dev): cpu_dev.EnableGraph(False) opt1 = opt.Adam(lr=0.1) w_shape = (2, 3) w = tensor.Tensor(w_shape, device=dev).set_value(1.0) g = tensor.Tensor(w_shape, device=dev).set_value(0.1) # m := beta_1 * m + (1 - beta_1) * grad # v := beta_2 * v + (1 - beta_2) * grad * grad # m_norm = m / (1 - beta_1 ^ step) # v_norm = v / (1 - beta_2 ^ step) # param := param - (lr * m_norm) / ( sqrt(v_norm) + epsilon) ) m = 0.1 * g tmp = tensor.square(g) v = 0.001 * tmp m_norm = m / 0.1 v_norm = v / 0.001 tmp = tensor.sqrt(v_norm) + 1e-8 tmp = m_norm / tmp w_step1 = w - 0.1 * tmp opt1.apply(w.name, w, g) assertTensorEqual(w, w_step1, decimal=5)
def inference(self, data, batchsize=1, model_path='model'): lens = rm_padding(data) input_arr = convert(data, batchsize, self.seq_length, self.vocab_size, self.dev) input_arr = np.swapaxes(input_arr, 0, 1).reshape(( batchsize * self.seq_length, self.vocab_size)) inputs = tensor.from_numpy(input_arr) inputs.to_device(self.dev) embed = self.embed.forward(model_pb2.kEval, inputs) embeded = [] for idx in range(self.seq_length): point = tensor.Tensor((batchsize, self.embed_size), self.dev) tensor.copy_data_to_from(point, embed, batchsize * self.embed_size, 0, idx * batchsize * self.embed_size) embeded.append(point) embeded.append(tensor.Tensor()) # hx embeded.append(tensor.Tensor()) # cx hidden = self.lstm.forward(model_pb2.kEval, embeded) hidden_batch = tensor.Tensor((batchsize, self.hidden_size), self.dev) for idx in range(batchsize): tensor.copy_data_to_from(hidden_batch, hidden[lens[idx]-1], self.hidden_size, idx * self.hidden_size, idx* self.hidden_size) act = self.dense.forward(model_pb2.kEval, hidden_batch) probs = self.sft.forward(model_pb2.kEval, act) probs = tensor.to_numpy(probs) return probs[:,1]
def test_retraining(self): # forward x = tensor.Tensor(shape=(2, 3, 3, 3), device=gpu_dev) x.gaussian(0.0, 1.0) x1 = autograd.Conv2d(3, 1, 2)(x) x2 = autograd.Conv2d(1, 1, 2)(x1) y = autograd.Flatten()(x2)[0] y_t = tensor.Tensor(shape=(2, 1), device=gpu_dev) y_t.gaussian(0.0, 1.0) loss = autograd.MeanSquareError()(y, y_t)[0] # backward sgd = opt.SGD(lr=0.01) for p, gp in autograd.backward(loss): sgd.update(p, gp) sgd.step() # frontend model = sonnx.to_onnx([x], [y]) # print('The model is:\n{}'.format(model)) # backend sg_ir = sonnx.prepare(model, device=gpu_dev) for idx, tens in sg_ir.tensor_map.items(): tens.requires_grad = True tens.stores_grad = True sg_ir.tensor_map[idx] = tens # forward y_o = sg_ir.run([x])[0] # backward loss = autograd.MeanSquareError()(y_o, y_t)[0] sgd = opt.SGD(lr=0.01) for p, gp in autograd.backward(loss): sgd.update(p, gp) sgd.step()
def train(data_dir, net, num_epoch=20, batch_size=250): print 'Start intialization............' cuda = device.create_cuda_gpu() net.to_device(cuda) opt = optimizer.SGD(momentum=0.9,weight_decay=0.04) for (p, specs) in zip(net.param_values(), net.param_specs()): filler = specs.filler if filler.type == 'gaussian': initializer.gaussian(p, filler.mean, filler.std) else: p.set_value(0) opt.register(p, specs) print specs.name, filler.type, p.l1() print 'Loading data ..................' train_x, train_y = load_dataset(data_dir,1) test_x, test_y = load_dataset(data_dir,2) tx = tensor.Tensor((batch_size,3), cuda) ty = tensor.Tensor((batch_size,),cuda, core_pb2.kInt) #ta = tensor.Tensor((batch_size,3), cuda) #tb = tensor.Tensor((batch_size,),cuda, core_pb2.kInt) num_train_batch = train_x.shape[0]/batch_size num_test_batch = test_x.shape[0]/batch_size idx = np.arange(train_x.shape[0], dtype=np.int32) id = np.arange(test_x.shape[0],dtype=np.int32) for epoch in range(num_epoch): np.random.shuffle(idx) loss, acc = 0.000,0.000 print 'Epoch %d' % epoch for b in range(num_train_batch): x = train_x[idx[b * batch_size:(b+1)* batch_size]] y = train_y[idx[b * batch_size:(b+1)* batch_size]] tx.copy_from_numpy(x) ty.copy_from_numpy(y) grads, (l, a) = net.train(tx, ty) loss += l acc += a for (s, p, g) in zip(net.param_specs(), net.param_values(), grads): opt.apply_with_lr(epoch, get_lr(epoch), g, p, str(s.name)) # update progress bar utils.update_progress(b * 1.0 / num_train_batch, 'training loss = %f, accuracy = %f' % (l, a)) info = '\ntraining loss = %f, training accuracy = %f' \ % (loss/num_train_batch, acc/num_train_batch) print info loss,acc=0.000,0.000 np.random.shuffle(id) for b in range(num_test_batch): x = test_x[b * batch_size:(b+1) * batch_size] y = test_y[b * batch_size:(b+1) * batch_size] tx.copy_from_numpy(x) ty.copy_from_numpy(y) l, a = net.evaluate(tx, ty) loss += l acc += a print 'test loss = %f, test accuracy = %f' \ % (loss / num_test_batch, acc / num_test_batch) net.save('model.bin') # save model params into checkpoint file
def train(model, x, y, epochs=1, batch_size=64, dev=device.get_default_device()): batch_number = x.shape[0] // batch_size for i in range(epochs): for b in range(batch_number): l_idx = b * batch_size r_idx = (b + 1) * batch_size x_batch = tensor.Tensor(device=dev, data=x[l_idx:r_idx]) target_batch = tensor.Tensor(device=dev, data=y[l_idx:r_idx]) output_batch = model.forward(x_batch) # onnx_model = sonnx.to_onnx([x_batch], [y]) # print('The model is:\n{}'.format(onnx_model)) loss = autograd.softmax_cross_entropy(output_batch, target_batch) accuracy_rate = accuracy(tensor.to_numpy(output_batch), tensor.to_numpy(target_batch)) sgd = opt.SGD(lr=0.001) for p, gp in autograd.backward(loss): sgd.update(p, gp) sgd.step() if b % 1e2 == 0: print("acc %6.2f loss, %6.2f" % (accuracy_rate, tensor.to_numpy(loss)[0])) print("training completed") return x_batch, output_batch
def test_transfer_learning(self): # forward x = tensor.Tensor(shape=(2, 3, 3, 3), device=gpu_dev) x.gaussian(0.0, 1.0) x1 = autograd.Conv2d(3, 1, 2)(x) y = autograd.Flatten()(x1)[0] y_t = tensor.Tensor(shape=(2, 4), device=gpu_dev) y_t.gaussian(0.0, 1.0) loss = autograd.MeanSquareError()(y, y_t)[0] # backward sgd = opt.SGD(lr=0.01) for p, gp in autograd.backward(loss): sgd.update(p, gp) sgd.step() # frontend model = sonnx.to_onnx([x], [y]) # print('The model is:\n{}'.format(model)) # backend sg_ir = sonnx.prepare(model, device=gpu_dev) # forward x1 = sg_ir.run([x], last_layers=-1)[0] x2 = autograd.Conv2d(1, 1, 2)(x1) y_o = autograd.Flatten()(x2)[0] # backward y_ot = tensor.Tensor(shape=(2, 1), device=gpu_dev) y_ot.gaussian(0.0, 1.0) loss = autograd.MeanSquareError()(y_o, y_ot)[0] sgd = opt.SGD(lr=0.01) for p, gp in autograd.backward(loss): sgd.update(p, gp) sgd.step()
def transfer_learning(sg_ir, x, y, epochs=1, batch_size=64, dev=device.get_default_device()): batch_number = x.shape[0] // batch_size trans_model = Trans(sg_ir, -1) for i in range(epochs): for b in range(batch_number): l_idx = b * batch_size r_idx = (b + 1) * batch_size x_batch = tensor.Tensor(device=dev, data=x[l_idx:r_idx]) target_batch = tensor.Tensor(device=dev, data=y[l_idx:r_idx]) output_batch = trans_model.forward(x_batch) loss = autograd.softmax_cross_entropy(output_batch, target_batch) accuracy_rate = accuracy(tensor.to_numpy(output_batch), tensor.to_numpy(target_batch)) sgd = opt.SGD(lr=0.07) for p, gp in autograd.backward(loss): sgd.update(p, gp) sgd.step() if b % 1e2 == 0: print("acc %6.2f loss, %6.2f" % (accuracy_rate, tensor.to_numpy(loss)[0])) print("transfer-learning completed") return trans_model
def test_dnnl_pooling_avg(self): dev = cpu_dev N = 1 C = 3 H = 2 W = 2 data_shape = [N, C, H, W] param_shape = [1, C, 1, 1] data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] x0 = np.array(data, dtype=np.float32).reshape(data_shape) x0_ct = tensor.Tensor(device=dev, data=x0).data dy0 = np.array([1, 2, 3], dtype=np.float32).reshape([1, 3, 1, 1]) dy0_ct = tensor.Tensor(device=dev, data=dy0).data hndl = singa_api.PoolingHandle(x0_ct, [2, 2], [1, 1], [0, 0], False) y0_ct = singa_api.CpuPoolingForward(hndl, x0_ct) y1 = np.array([[[[2.5000]], [[6.5000]], [[10.5000]]]]) np.testing.assert_array_almost_equal( tensor.to_numpy(_cTensor_to_pyTensor(y0_ct)), y1) dx0_ct = singa_api.CpuPoolingBackward(hndl, dy0_ct, x0_ct, y0_ct) dx1 = np.array([[[[0.2500, 0.2500], [0.2500, 0.2500]], [[0.5000, 0.5000], [0.5000, 0.5000]], [[0.7500, 0.7500], [0.7500, 0.7500]]]]) np.testing.assert_array_almost_equal( tensor.to_numpy(_cTensor_to_pyTensor(dx0_ct)), dx1)
def _as_type_helper(self, dev): np1 = np.random.random([3]).astype(np.float32) np1 = np1 * 10 - 5 np2 = np1.astype(np.int32) np3 = np2.astype(np.float32) t1 = tensor.Tensor(device=dev, data=np1) t1 = tensor.Tensor(device=dev, data=np1) t1_ct = t1.data self.assertEqual(t1_ct.data_type(), singa_api.kFloat32) t1_ct = t1_ct.AsType(singa_api.kInt) self.assertEqual(t1_ct.data_type(), singa_api.kInt) np.testing.assert_array_almost_equal( tensor.to_numpy(_cTensor_to_pyTensor(t1_ct)), np2) t1_ct = t1_ct.AsType(singa_api.kFloat32) self.assertEqual(t1_ct.data_type(), singa_api.kFloat32) np.testing.assert_array_almost_equal( tensor.to_numpy(_cTensor_to_pyTensor(t1_ct)), np3)
def train_resnet(DIST=True, graph=True, sequential=False, verbosity=0): # Define the hypermeters good for the train_resnet niters = 100 batch_size = 32 sgd = opt.SGD(lr=0.1, momentum=0.9, weight_decay=1e-5) IMG_SIZE = 224 # For distributed training, sequential has better throughput in the current version if DIST == True: sgd = opt.DistOpt(sgd) world_size = sgd.world_size local_rank = sgd.local_rank global_rank = sgd.global_rank sequential = True else: local_rank = 0 world_size = 1 global_rank = 0 sequential = False dev = device.create_cuda_gpu_on(local_rank) tx = tensor.Tensor((batch_size, 3, IMG_SIZE, IMG_SIZE), dev) ty = tensor.Tensor((batch_size,), dev, tensor.int32) x = np.random.randn(batch_size, 3, IMG_SIZE, IMG_SIZE).astype(np.float32) y = np.random.randint(0, 1000, batch_size, dtype=np.int32) tx.copy_from_numpy(x) ty.copy_from_numpy(y) dev.SetVerbosity(verbosity) dev.SetSkipIteration(5) # construct the model from model import resnet model = resnet.resnet50(num_channels=3, num_classes=1000) model.train() model.set_optimizer(sgd) model.compile([tx], is_train=True, use_graph=graph, sequential=sequential) # train model dev.Sync() start = time.time() with trange(niters) as t: for _ in t: model(tx, ty, dist_option='fp32', spars=None) dev.Sync() end = time.time() titer = (end - start) / float(niters) throughput = float(niters * batch_size * world_size) / (end - start) if global_rank == 0: print("Throughput = {} per second".format(throughput), flush=True) print("TotalTime={}".format(end - start), flush=True) print("Total={}".format(titer), flush=True) dev.PrintTimeProfiling()
def __init__(self, vocab_size, hidden_size=32): super(CharRNN, self).__init__() self.rnn = autograd.LSTM(vocab_size, hidden_size) self.dense = autograd.Linear(hidden_size, vocab_size) self.optimizer = opt.SGD(0.01) self.hidden_size = hidden_size self.vocab_size = vocab_size self.hx = tensor.Tensor((1, self.hidden_size)) self.cx = tensor.Tensor((1, self.hidden_size))
def test_concat(self): t1 = tensor.Tensor((2, 3)) t2 = tensor.Tensor((1, 3)) t1.set_value(1) t2.set_value(2) lyr = layer.Concat('concat', 0, [t1.shape, t2.shape]) t = lyr.forward(model_pb2.kTrain, [t1, t2]) tnp = tensor.to_numpy(t[0]) self.assertEquals(np.sum(tnp), 12)
def train(self): train_data, _, _, _, _, _ = load_data(self.dataset_filepath) dev = device.create_cuda_gpu_on(0) dev.SetRandSeed(0) np.random.seed(0) # sgd = opt.SGD(lr=self.learning_rate, momentum=0.9, weight_decay=1e-5) sgd = opt.Adam(lr=self.learning_rate) noise = tensor.Tensor((self.batch_size, self.noise_size), dev, tensor.float32) real_images = tensor.Tensor((self.batch_size, self.feature_size), dev, tensor.float32) real_labels = tensor.Tensor((self.batch_size, 1), dev, tensor.float32) fake_labels = tensor.Tensor((self.batch_size, 1), dev, tensor.float32) # attached model to graph self.model.set_optimizer(sgd) self.model.compile([noise], is_train=True, use_graph=False, sequential=True) real_labels.set_value(1.0) fake_labels.set_value(0.0) for iteration in range(self.iterations): idx = np.random.randint(0, train_data.shape[0], self.batch_size) real_images.copy_from_numpy(train_data[idx]) self.model.train() # Training the Discriminative Net _, d_loss_real = self.model.train_one_batch_dis( real_images, real_labels) noise.uniform(-1, 1) fake_images = self.model.forward_gen(noise) _, d_loss_fake = self.model.train_one_batch_dis( fake_images, fake_labels) d_loss = tensor.to_numpy(d_loss_real)[0] + tensor.to_numpy( d_loss_fake)[0] # Training the Generative Net noise.uniform(-1, 1) _, g_loss_tensor = self.model.train_one_batch( noise, real_labels) g_loss = tensor.to_numpy(g_loss_tensor)[0] if iteration % self.interval == 0: self.model.eval() self.save_image(iteration) print_log(' The {} iteration, G_LOSS: {}, D_LOSS: {}'.format( iteration, g_loss, d_loss))
def _concatenate_helper(self, dev): np1 = np.random.random([5, 6, 7, 8]).astype(np.float32) np2 = np.random.random([5, 6, 7, 1]).astype(np.float32) np3 = np.concatenate((np1, np2), axis=3) t1 = tensor.Tensor(device=dev, data=np1) t2 = tensor.Tensor(device=dev, data=np2) t3 = tensor.concatenate((t1, t2), 3) np.testing.assert_array_almost_equal(tensor.to_numpy(t3), np3)
def test_sgd_const_lr(self, dev=cpu_dev): cpu_dev.EnableGraph(False) sgd1 = opt.SGD(lr=0.1) w_shape = (2, 3) w = tensor.Tensor(w_shape, device=dev).set_value(0.1) g = tensor.Tensor(w_shape, device=dev).set_value(0.1) w_step1 = w - 0.1 * g sgd1.apply(w.name, w, g) assertTensorEqual(w, w_step1)
def _run_test(dev, singa_op, np_op, s1, s2): x_0 = np.random.random(s1).astype(np.float32) y_0 = np.random.random(s2).astype(np.float32) x0 = tensor.Tensor(device=dev, data=x_0) y0 = tensor.Tensor(device=dev, data=y_0) z0 = tensor._call_singa_func(singa_op, x0.data, y0.data) z0.to_host() np.testing.assert_array_almost_equal(tensor.to_numpy(z0), np_op(x_0, y_0)) return
def test_sgd_const_lr_momentum_weight_decay(self, dev=cpu_dev): sgd1 = opt.SGD(lr=0.1, weight_decay=0.2) w_shape = (2, 3) w = tensor.Tensor(w_shape, device=dev).set_value(0.1) g = tensor.Tensor(w_shape, device=dev).set_value(0.01) w_step1 = w - 0.1 * (g + 0.2 * w) sgd1.apply(w.name, w, g) assertTensorEqual(w, w_step1)
def test_sgd_const_lr_momentum_nesterov(self, dev=cpu_dev): sgd1 = opt.SGD(lr=0.1, momentum=0.9, nesterov=True) w_shape = (2, 3) w = tensor.Tensor(w_shape, device=dev).set_value(0.1) g = tensor.Tensor(w_shape, device=dev).set_value(0.1) buf = g w_step1 = w - 0.1 * (g + 0.9 * buf) sgd1.apply(w.name, w, g) assertTensorEqual(w, w_step1)
def test_mult_inputs(self): ffn = net.FeedForwardNet(loss.SoftmaxCrossEntropy()) s1 = ffn.add(layer.Activation('relu1', input_sample_shape=(2, )), []) s2 = ffn.add(layer.Activation('relu2', input_sample_shape=(2, )), []) ffn.add(layer.Merge('merge', input_sample_shape=(2, )), [s1, s2]) x1 = tensor.Tensor((2, 2)) x1.set_value(1.1) x2 = tensor.Tensor((2, 2)) x2.set_value(0.9) out = ffn.forward(False, {'relu1': x1, 'relu2': x2}) out = tensor.to_numpy(out) self.assertAlmostEqual(np.average(out), 2)
def singa_to_onnx(epochs, use_cpu=False, batchsize=32): sgd = opt.SGD(lr=0.1) # operations initialization conv1 = autograd.Conv2d(1, 8, 3, 2, padding=1) # 28 - 14 conv2 = autograd.Conv2d(8, 4, 3, 2, padding=1) # 14 - 7 pooling = autograd.MaxPool2d(3, 2, padding=1) # 7 - 4 linear = autograd.Linear(64, 10) def forward(x, t): y = conv1(x) y = autograd.relu(y) y = conv2(y) y = autograd.relu(y) y = pooling(y) y = autograd.flatten(y) y = linear(y) loss = autograd.softmax_cross_entropy(y, t) return loss, y autograd.training = True (x_train, y_train), (x_test, y_test), dev = common(use_cpu) niter = 1 # x_train.shape[0] // batchsize for epoch in range(epochs): accuracy_rate = 0.0 loss_rate = 0.0 for i in range(niter): inputs = tensor.Tensor( device=dev, data=x_train[i * batchsize : (i + 1) * batchsize], stores_grad=False, name="input", ) targets = tensor.Tensor( device=dev, data=y_train[i * batchsize : (i + 1) * batchsize], requires_grad=False, stores_grad=False, name="target", ) loss, y = forward(inputs, targets) accuracy_rate += accuracy( tensor.to_numpy(y), y_train[i * batchsize : (i + 1) * batchsize] ) loss_rate += tensor.to_numpy(loss)[0] for p, gp in autograd.backward(loss): sgd.update(p, gp) print( "accuracy is {}, loss is {}".format( accuracy_rate / niter, loss_rate / niter)) model = sonnx.to_onnx_model([inputs], [y]) sonnx.save(model, "cnn.onnx")
def test_lstm_model(self, dev=gpu_dev): hidden_size = 3 seq_length = 2 batch_size = 4 feature_size = 3 bidirectional = False directions = 2 if bidirectional else 1 num_layers = 2 out_size = hidden_size return_sequences = False batch_first = True rnn_mode = "lstm" # manual test case x_data = np.array( [[[0, 0, 1], [0, 1, 0]], [[0, 1, 0], [1, 0, 0]], [[0, 0, 1], [0, 1, 0]], [[1, 0, 0], [0, 0, 1]]], dtype=np.float32).reshape(batch_size, seq_length, hidden_size) # bs, seq, fea if return_sequences: y_data = np.array([[[0, 1, 0], [1, 0, 0]], [[1, 0, 0], [0, 0, 1]], [[0, 1, 0], [1, 0, 0]], [[0, 0, 1], [0, 1, 0]]], dtype=np.float32).reshape( batch_size, seq_length, hidden_size) # bs, hidden y_data.reshape(batch_size, -1) else: y_data = np.array([[1, 0, 0], [0, 0, 1], [1, 0, 0], [0, 1, 0]], dtype=np.float32).reshape( batch_size, hidden_size) # bs, hidden x = tensor.Tensor(device=dev, data=x_data) y_t = tensor.Tensor(device=dev, data=y_data) m = LSTMModel(hidden_size, seq_length, batch_size, bidirectional, num_layers, return_sequences, rnn_mode, batch_first) m.compile([x], is_train=True, use_graph=False, sequential=False) m.train() for i in range(1000): y = m.forward(x) assert y.shape == y_t.shape loss = autograd.softmax_cross_entropy(y, y_t) if i % 100 == 0: print("loss", loss) m.optimizer(loss) m.eval() y = m.forward(x) loss = autograd.softmax_cross_entropy(y, y_t) print("eval loss", loss)
def test_concat(self): t1 = tensor.Tensor((2, 3)) t2 = tensor.Tensor((1, 3)) t1.set_value(1) t2.set_value(2) lyr = layer.Concat('concat', 0, [(3, ), (3, )]) t = lyr.forward(model_pb2.kTrain, [t1, t2]) tnp = tensor.to_numpy(t) self.assertEquals(np.sum(tnp), 12) t3 = tensor.Tensor((3, 3)) t3.set_value(1.5) grads, _ = lyr.backward(model_pb2.kTrain, [t3]) gnp = tensor.to_numpy(grads[0]) self.assertEquals(np.sum(gnp), 6 * 1.5)
def test_transpose_and_mul(self): s1 = [3, 2, 1, 1] s2 = [3, 2, 1, 1] x_0 = np.random.random(s1).astype(np.float32) y_0 = np.random.random(s2).astype(np.float32) x0 = tensor.Tensor(device=gpu_dev, data=x_0) y0 = tensor.Tensor(device=gpu_dev, data=y_0) x1 = x0.transpose([3, 2, 1, 0]) #print(x1.shape) #print(y0.shape) z0 = x1 * y0 np.testing.assert_array_almost_equal(tensor.to_numpy(z0), x_0.transpose() * y_0)
def test_const_decay_scheduler(self, dev): c1 = opt.Constant(0.2) step = tensor.Tensor((1, ), device=dev).set_value(0) lr_val = c1(step) np.testing.assert_array_almost_equal(tensor.to_numpy(c1(step)), [0.2]) step += 1 np.testing.assert_array_almost_equal(tensor.to_numpy(c1(step)), [0.2])
def predict(img, gender, model, args=None): assert gender == 'female' or gender == 'male', 'please input gender(female or male)' autograd.training = False img_array = image2array(img) inputs = tensor.Tensor(device=dev, data=img_array, requires_grad=False, stores_grad=False) y = model(inputs) y_np = tensor.to_numpy(y)[0] for l in range(len(y_np)): if y_np[l] <= 0: y_np[l] = 1 prediction = {} if gender == 'female': prediction['predicted bone age'] = float(y_np[0]) else: prediction['predicted bone age'] = float(y_np[1]) return prediction
def _cTensor_to_pyTensor(cTensor): new_t = tensor.Tensor() new_t.data = cTensor new_t.shape = tuple(new_t.data.shape()) new_t.device = new_t.data.device() new_t.dtype = new_t.data.data_type() return new_t
def _concat_helper(self, dev): np1 = np.random.random([5, 6, 7, 8]).astype(np.float32) np2 = np.random.random([5, 6, 7, 1]).astype(np.float32) np3 = np.concatenate((np1, np2), axis=3) t1 = tensor.Tensor(device=dev, data=np1) t2 = tensor.Tensor(device=dev, data=np2) ctensors = singa_api.VecTensor() ctensors.append(t1.data) ctensors.append(t2.data) t3_ct = singa_api.ConcatOn(ctensors, 3) np.testing.assert_array_almost_equal( tensor.to_numpy(_cTensor_to_pyTensor(t3_ct)), np3)
def handle_odd_pad_fwd(x, odd_padding): """ handle odd padding mode forward Args:x the input tensor Args:odd_padding the odd_padding Returns: tensor, the output """ x_tensor = tensor.from_raw_tensor(x) # (axis, left padding if True else right padding) flags = [(2, True), (2, False), (3, True), (3, False)] for (axis, left), pad in zip(flags, odd_padding): if pad == 0: continue zeros_shape = list(x_tensor.data.shape()) zeros_shape[axis] = pad zero_padding = np.zeros(zeros_shape).astype(np.float32) zero_padding = tensor.Tensor(device=x.device(), data=zero_padding) if left: x_tensor = tensor.concatenate((zero_padding, x_tensor), axis) else: x_tensor = tensor.concatenate((x_tensor, zero_padding), axis) return x_tensor.data