def test_rnn_with_seq_lengths(self, dev=gpu_dev): bs = 2 seq_length = 3 hidden_size = 2 em_size = 2 x_np = np.array([[[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]], [[0.3, 0.3], [0.4, 0.4], [0.0, 0.0]]]).astype(np.float32) y_np = np.array([[0.4, 0.4], [0.5, 0.5]]).astype(np.float32) seq_lengths_np = np.array([3, 2]).astype(np.int32) x = tensor.from_numpy(x_np) x.to_device(dev) y = tensor.from_numpy(y_np) y.to_device(dev) seq_lengths = tensor.from_numpy(seq_lengths_np) m = LSTMModel3(hidden_size) m.compile([x, seq_lengths], is_train=True, use_graph=False, sequential=False) m.train() for i in range(10): out = m.forward(x, seq_lengths) loss = autograd.mse_loss(out, y) print("train l:", tensor.to_numpy(loss)) m.optimizer(loss) m.eval() out = m.forward(x, seq_lengths) loss = autograd.mse_loss(out, y) print(" eval l:", tensor.to_numpy(loss))
def test_batch_norm(self): x = np.array([[[[-1, 0, 1]], [[2, 3, 4]]]]).astype(np.float32) s = np.array([1.0, 1.5]).astype(np.float32) bias = np.array([0, 1]).astype(np.float32) mean = np.array([0, 3]).astype(np.float32) var = np.array([1, 1.5]).astype(np.float32) x = tensor.from_numpy(x) x.to_device(gpu_dev) s = tensor.from_numpy(s) s.to_device(gpu_dev) bias = tensor.from_numpy(bias) mean = tensor.from_numpy(mean) var = tensor.from_numpy(var) bias.to_device(gpu_dev) mean.to_device(gpu_dev) var.to_device(gpu_dev) handle = singa.CudnnBatchNormHandle(0.9, x.data) y = autograd.batchnorm_2d(handle, x, s, bias, mean, var) # frontend model = sonnx.to_onnx([x, s, bias, mean, var], [y]) # print('The model is:\n{}'.format(model)) # backend sg_ir = sonnx.prepare(model, device=gpu_dev) y_t = sg_ir.run([x, s, bias, mean, var]) np.testing.assert_array_almost_equal(tensor.to_numpy(y), tensor.to_numpy(y_t[0]), decimal=5)
def sample(model, data, dev, nsamples=100, use_max=False): while True: cmd = input('Do you want to sample text from the model [y/n]') if cmd == 'n': return else: seed = input('Please input some seeding text, e.g., #include <c: ') inputs = [] for c in seed: x = np.zeros((1, data.vocab_size), dtype=np.float32) x[0, data.char_to_idx[c]] = 1 tx = tensor.from_numpy(x) tx.to_device(dev) inputs.append(tx) model.reset_states(dev) outputs = model(inputs) y = tensor.softmax(outputs[-1]) sys.stdout.write(seed) for i in range(nsamples): prob = tensor.to_numpy(y)[0] if use_max: cur = np.argmax(prob) else: cur = np.random.choice(data.vocab_size, 1, p=prob)[0] sys.stdout.write(data.idx_to_char[cur]) x = np.zeros((1, data.vocab_size), dtype=np.float32) x[0, cur] = 1 tx = tensor.from_numpy(x) tx.to_device(dev) outputs = model([tx]) y = tensor.softmax(outputs[-1])
def test_momentum(self): lr = 0.1 n, m = 2, 2 p1 = np.random.rand(n, m) p2 = np.random.rand(n, m) g1 = np.random.rand(n, m) * 0.01 g2 = np.random.rand(n, m) * 0.01 v1 = np.zeros((n, m)) v2 = np.zeros((n, m)) t1 = tensor.from_numpy(p1) t2 = tensor.from_numpy(p2) tg1 = tensor.from_numpy(g1) tg2 = tensor.from_numpy(g2) for t in range(1, 4): np_momentum([p1, p2], [g1, g2], [v1, v2], lr, t) momentum = opt.SGD(lr, momentum=0.9) for t in range(1, 4): momentum.apply(0, tg1, t1, 'p1', t) momentum.apply(0, tg2, t2, 'p2', t) t1 = tensor.to_numpy(t1) t2 = tensor.to_numpy(t2) for t, p in zip([t1, t2], [p1, p2]): for i in range(n): for j in range(m): self.assertAlmostEqual(t[i, j], p[i, j], 2)
def test_adagrad_cuda(self): lr = 0.1 n, m = 2, 2 p1 = np.random.rand(n, m) p2 = np.random.rand(n, m) g1 = np.random.rand(n, m) * 0.01 g2 = np.random.rand(n, m) * 0.01 v1 = np.zeros((n, m)) v2 = np.zeros((n, m)) t1 = tensor.from_numpy(p1) t2 = tensor.from_numpy(p2) tg1 = tensor.from_numpy(g1) tg2 = tensor.from_numpy(g2) for t in range(1, 4): np_adagrad([p1, p2], [g1, g2], [v1, v2], lr, t) adagrad = opt.AdaGrad(lr=lr) self.to_cuda() for t in range(1, 4): adagrad.apply(0, tg1, t1, 'p1', t) adagrad.apply(0, tg2, t2, 'p2', t) t1 = tensor.to_numpy(t1) t2 = tensor.to_numpy(t2) for t, p in zip([t1, t2], [p1, p2]): for i in range(n): for j in range(m): self.assertAlmostEqual(t[i, j], p[i, j], 2)
def test_adam(self): lr = 0.1 n, m = 4, 6 p1 = np.random.rand(n, m) p2 = np.random.rand(n, m) g1 = np.random.rand(n, m) * 0.01 g2 = np.random.rand(n, m) * 0.01 m1 = np.zeros((n, m)) m2 = np.zeros((n, m)) v1 = np.zeros((n, m)) v2 = np.zeros((n, m)) t1 = tensor.from_numpy(p1) t2 = tensor.from_numpy(p2) tg1 = tensor.from_numpy(g1) tg2 = tensor.from_numpy(g2) for t in range(1, 10): np_adam([p1, p2], [g1, g2], [m1, m2], [v1, v2], lr, t) adam = opt.Adam(lr=lr) for t in range(1, 10): adam.apply(0, tg1, t1, 'p1', t) adam.apply(0, tg2, t2, 'p2', t) t1 = tensor.to_numpy(t1) t2 = tensor.to_numpy(t2) for t, p in zip([t1, t2], [p1, p2]): for i in range(n): for j in range(m): self.assertAlmostEqual(t[i, j], p[i, j], 6)
def setUp(self): x_np = np.asarray([[0.7, 0.2, 0.1], [0.2, 0.4, 0.5], [0.2, 0.4, 0.4]], dtype=np.float32) y_np = np.asarray([[1, 0, 1], [1, 1, 1], [1, 0, 0]], dtype=np.int32) self.recall = metric.Recall(top_k=2) self.x = tensor.from_numpy(x_np) self.y = tensor.from_numpy(y_np)
def test_numpy_convert(self): a = np.asarray([[1, 0, 0], [0, 1, 0]], dtype=np.int) t = tensor.from_numpy(a) b = tensor.to_numpy(t) self.assertEqual(np.sum(a-b), 0) a = np.asarray([[1, 0, 0], [0, 1, 0]], dtype=np.float32) t = tensor.from_numpy(a) b = tensor.to_numpy(t) self.assertEqual(np.sum(a-b), 0.)
def setUp(self): self.x_np = np.asarray( [[0.9, 0.2, 0.1], [0.1, 0.4, 0.5], [0.2, 0.4, 0.4]], dtype=np.float32) self.y_np = np.asarray([[1, 0, 1], [0, 1, 1], [1, 0, 0]], dtype=np.float32) self.x = tensor.from_numpy(self.x_np) self.y = tensor.from_numpy(self.y_np)
def test_numpy_convert(self): a = np.asarray([[1, 0, 0], [0, 1, 0]], dtype=np.int) t = tensor.from_numpy(a) b = tensor.to_numpy(t) self.assertEqual(np.sum(a - b), 0) a = np.asarray([[1, 0, 0], [0, 1, 0]], dtype=np.float32) t = tensor.from_numpy(a) b = tensor.to_numpy(t) self.assertEqual(np.sum(a - b), 0.)
def setUp(self): self.x_np = np.asarray([[0.9, 0.2, 0.1], [0.1, 0.4, 0.5], [0.2, 0.4, 0.4]], dtype=np.float32) self.y_np = np.asarray([[1, 0, 1], [0, 1, 1], [1, 0, 0]], dtype=np.float32) self.x = tensor.from_numpy(self.x_np) self.y = tensor.from_numpy(self.y_np)
def load_states(self, fpath): """Load the model states and auxiliary states from disk. Usage: m = MyModel() m.compile(...) aux_states = m.load_states('mymodel.zip') Args: path: input file path (without the extension) Returns: dict """ assert os.path.isfile(fpath), ( "Failed to load states, %s is not exist." % fpath) timestamp = time.time() tmp_dir = '/tmp/singa_load_states_%s' % timestamp os.mkdir(tmp_dir) with zipfile.ZipFile(fpath, 'r') as zf: zf.extractall(tmp_dir) tensor_dict_fp = tmp_dir + self.TENSOR_DICT_FILENAME states_attr_fp = tmp_dir + self.STATES_ATTR_FILENAME with open(states_attr_fp) as f: states_attr = json.load(f) tensor_dict = np.load(tensor_dict_fp) # restore singa tensor from numpy model_states = dict() aux_states = dict() for k in tensor_dict.files: if states_attr[k]['state_type'] == self.MODEL_STATE_TYPE: model_states[k] = tensor.from_numpy(tensor_dict[k]) elif states_attr[k]['state_type'] == self.AUX_STATE_TYPE: aux_states[k] = tensor.from_numpy(tensor_dict[k]) # restore model_states self.set_states(model_states) # clean up tmp files os.remove(tensor_dict_fp) os.remove(states_attr_fp) os.rmdir(tmp_dir) return aux_states
def test_MeanSquareError(self): X=np.array([4.3,5.4,3.3,3.6,5.7,6.0]).reshape(3,2).astype(np.float32) T=np.array([4.4,5.3,3.2,3.7,5.4,6.3]).reshape(3,2).astype(np.float32) x=tensor.from_numpy(X) t=tensor.from_numpy(T) x.to_device(gpu_dev) t.to_device(gpu_dev) loss= autograd.mse_loss(x,t) dx=loss.creator.backward()[0] loss_np=tensor.to_numpy(loss) self.assertAlmostEqual(loss_np, 0.0366666, places=4) self.check_shape(dx.shape(), (3, 2))
def numpy2tensors(npx, npy, dev): '''batch, seq, dim -- > seq, batch, dim''' tmpx = np.swapaxes(npx, 0, 1) tmpy = np.swapaxes(npy, 0, 1) inputs = [] labels = [] for t in range(tmpx.shape[0]): x = tensor.from_numpy(tmpx[t]) y = tensor.from_numpy(tmpy[t]) x.to_device(dev) y.to_device(dev) inputs.append(x) labels.append(y) return inputs, labels
def test_MeanSquareError(self): X=np.array([4.3,5.4,3.3,3.6,5.7,6.0]).reshape(3,2).astype(np.float32) T=np.array([4.4,5.3,3.2,3.7,5.4,6.3]).reshape(3,2).astype(np.float32) x=tensor.from_numpy(X) t=tensor.from_numpy(T) x.to_device(gpu_dev) t.to_device(gpu_dev) loss= autograd.mse_loss(x,t) dx=loss.creator.backward()[0] loss_np=tensor.to_numpy(loss)[0] self.assertAlmostEqual(loss_np, 0.0366666, places=4) self.check_shape(dx.shape(), (3, 2))
def gradients_check(self, func, param, autograds, h=0.0005, df=1): # param: PyTensor # autograds: numpy_tensor p = tensor.to_numpy(param) it = np.nditer(p, flags=['multi_index'], op_flags=['readwrite']) while not it.finished: idx = it.multi_index diff = np.zeros_like(p) diff[idx] += h diff = tensor.from_numpy(diff) diff.to_device(gpu_dev) param += diff pos = func() pos = tensor.to_numpy(pos) param -= diff param -= diff neg = func() neg = tensor.to_numpy(neg) numerical_grad = np.sum((pos - neg) * df) / (2 * h) #print((autograds[idx] - numerical_grad)/numerical_grad) # threshold set as -5% to +5% #self.assertAlmostEqual((autograds[idx] - numerical_grad)/(numerical_grad+0.0000001), 0., places=1) self.assertAlmostEqual( autograds[idx] - numerical_grad, 0., places=2) it.iternext()
def inference(self, data, batchsize=1, model_path='model'): lens = rm_padding(data) input_arr = convert(data, batchsize, self.seq_length, self.vocab_size, self.dev) input_arr = np.swapaxes(input_arr, 0, 1).reshape(( batchsize * self.seq_length, self.vocab_size)) inputs = tensor.from_numpy(input_arr) inputs.to_device(self.dev) embed = self.embed.forward(model_pb2.kEval, inputs) embeded = [] for idx in range(self.seq_length): point = tensor.Tensor((batchsize, self.embed_size), self.dev) tensor.copy_data_to_from(point, embed, batchsize * self.embed_size, 0, idx * batchsize * self.embed_size) embeded.append(point) embeded.append(tensor.Tensor()) # hx embeded.append(tensor.Tensor()) # cx hidden = self.lstm.forward(model_pb2.kEval, embeded) hidden_batch = tensor.Tensor((batchsize, self.hidden_size), self.dev) for idx in range(batchsize): tensor.copy_data_to_from(hidden_batch, hidden[lens[idx]-1], self.hidden_size, idx * self.hidden_size, idx* self.hidden_size) act = self.dense.forward(model_pb2.kEval, hidden_batch) probs = self.sft.forward(model_pb2.kEval, act) probs = tensor.to_numpy(probs) return probs[:,1]
def gradients_check(self, func, param, autograds, h=0.0005, df=1): # param: PyTensor # autograds: numpy_tensor p = tensor.to_numpy(param) it = np.nditer(p, flags=['multi_index'], op_flags=['readwrite']) while not it.finished: idx = it.multi_index diff = np.zeros_like(p) diff[idx] += h diff = tensor.from_numpy(diff) diff.to_device(gpu_dev) param += diff pos = func() pos = tensor.to_numpy(pos) param -= diff param -= diff neg = func() neg = tensor.to_numpy(neg) numerical_grad = np.sum((pos - neg) * df) / (2 * h) #print((autograds[idx] - numerical_grad)/numerical_grad) # threshold set as -5% to +5% #self.assertAlmostEqual((autograds[idx] - numerical_grad)/(numerical_grad+0.0000001), 0., places=1) self.assertAlmostEqual(autograds[idx] - numerical_grad, 0., places=2) it.iternext()
def predict(net, dev, synset_list, topk=5): '''Predict the label of each image. Args: net, a pretrained neural net images, a batch of images [batch_size, 3, 32, 32], which have been pre-processed dev, the training device synset_list: the synset of labels topk, return the topk labels for each image. ''' while True: img_path = eval(input("Enter input image path('quit' to exit): ")) if img_path == 'quit': return if not os.path.exists(img_path): print('Path is invalid') continue img = read_image(img_path) x = tensor.from_numpy(img.astype(np.float32)[np.newaxis, :]) x.to_device(dev) y = net.predict(x) y.to_host() prob = tensor.to_numpy(y) lbl = np.argsort(-prob[0]) # sort prob in descending order print([synset_list[lbl[i]] for i in range(topk)])
def serve(agent, use_cpu, parameter_file, topk=5): if use_cpu: print('running with cpu') dev = device.get_default_device() layer.engine = 'singacpp' else: print("runing with gpu") dev = device.create_cuda_gpu() print('Start intialization............') net = create_net((3, 224, 224), parameter_file) net.to_device(dev) print('End intialization............') labels = np.loadtxt('synset_words.txt', str, delimiter='\t ') while True: key, val = agent.pull() if key is None: time.sleep(0.1) continue msg_type = MsgType.parse(key) if msg_type.is_request(): try: response = "" img = imread(val['image'], mode='RGB').astype(np.float32) height,width = img.shape[:2] img[:, :, 0] -= 123.68 img[:, :, 1] -= 116.779 img[:, :, 2] -= 103.939 img[:,:,[0,1,2]] = img[:,:,[2,1,0]] img = img.transpose((2, 0, 1)) img = img[:, (height-224)//2:(height+224)//2,\ (width-224)//2:(width+224)//2] images = np.expand_dims(img, axis=0) x = tensor.from_numpy(images.astype(np.float32)) x.to_device(dev) y = net.predict(x) prob = np.average(tensor.to_numpy(y), 0) # sort and reverse idx = np.argsort(-prob)[0:topk] for i in idx: response += "%s:%s<br/>" % (labels[i], prob[i]) except Exception: traceback.print_exc() response = "Sorry, system error during prediction." except SystemExit: traceback.print_exc() response = "Sorry, error triggered sys.exit() during prediction." agent.push(MsgType.kResponse, response) elif MsgType.kCommandStop.equal(msg_type): print('get stop command') agent.push(MsgType.kStatus, "success") break else: print('get unsupported message %s' % str(msg_type)) agent.push(MsgType.kStatus, "Unknown command") break # while loop print("server stop")
def predict(net, dev, synset_list, topk=5): '''Predict the label of each image. Args: net, a pretrained neural net images, a batch of images [batch_size, 3, 32, 32], which have been pre-processed dev, the training device synset_list: the synset of labels topk, return the topk labels for each image. ''' while True: img_path = raw_input("Enter input image path('quit' to exit): ") if img_path == 'quit': return if not os.path.exists(img_path): print 'Path is invalid' continue img = read_image(img_path) x = tensor.from_numpy(img.astype(np.float32)[np.newaxis,:]) x.to_device(dev) y = net.predict(x) y.to_host() prob = tensor.to_numpy(y) lbl = np.argsort(-prob[0]) # sort prob in descending order print [synset_list[lbl[i]] for i in range(topk)]
def _kint_kint_bc(self, dev=gpu_dev): a_np = np.array([[[17, 4, 9, 22, 18], [-9, 9, -1, -1, 4], [1, 14, 7, 1, 4], [3, 14, -2, 3, -8]], [[-25, 6, 8, -7, 22], [-14, 0, -1, 15, 14], [1, 3, -8, -19, -3], [1, 12, 12, -3, -3]], [[-10, -14, -17, 19, -5], [-4, -12, 7, -16, -2], [-8, 3, -5, -11, 0], [4, 0, 3, -6, -3]]], dtype=np.int32) b_np = np.array([[-6, -3, -8, -17, 1], [-4, -16, 4, -9, 0], [7, 1, 11, -12, 4], [-6, -8, -5, -3, 0]], dtype=np.int32) ta = tensor.from_numpy(a_np) tb = tensor.from_numpy(b_np) ta.to_device(dev) tb.to_device(dev) y = ta - tb np.testing.assert_array_almost_equal(tensor.to_numpy(y), a_np - b_np)
def serve(agent, use_cpu, parameter_file, topk=5): if use_cpu: print('running with cpu') dev = device.get_default_device() layer.engine = 'singacpp' else: print("runing with gpu") dev = device.create_cuda_gpu() agent = agent print('Start intialization............') net = create_net((3, 224, 224), parameter_file) net.to_device(dev) print('End intialization............') labels = np.loadtxt('synset_words.txt', str, delimiter='\t ') while True: key, val = agent.pull() if key is None: time.sleep(0.1) continue msg_type = MsgType.parse(key) if msg_type.is_request(): try: response = "" img = imread(val['image'], mode='RGB').astype(np.float32) height,width = img.shape[:2] img[:, :, 0] -= 123.68 img[:, :, 1] -= 116.779 img[:, :, 2] -= 103.939 img[:,:,[0,1,2]] = img[:,:,[2,1,0]] img = img.transpose((2, 0, 1)) img = img[:, (height-224)//2:(height+224)//2,\ (width-224)//2:(width+224)//2] images = np.expand_dims(img, axis=0) x = tensor.from_numpy(images.astype(np.float32)) x.to_device(dev) y = net.predict(x) prob = np.average(tensor.to_numpy(y), 0) # sort and reverse idx = np.argsort(-prob)[0:topk] for i in idx: response += "%s:%s<br/>" % (labels[i], prob[i]) except: traceback.print_exc() response = "Sorry, system error during prediction." agent.push(MsgType.kResponse, response) elif MsgType.kCommandStop.equal(msg_type): print('get stop command') agent.push(MsgType.kStatus, "success") break else: print('get unsupported message %s' % str(msg_type)) agent.push(MsgType.kStatus, "Unknown command") break # while loop print("server stop")
def matmul_high_dim_helper(self, dev): configs = [ [(1, 12, 7, 64), (1, 12, 64, 7)], [(1, 7, 768), (768, 768)], ] print() for config in configs: X = np.random.random(config[0]).astype(np.float32) x = tensor.from_numpy(X) x.to_device(dev) W = np.random.random(config[1]).astype(np.float32) w = tensor.from_numpy(W) w.to_device(dev) y_t = np.matmul(X, W) y = autograd.matmul(x, w) np.testing.assert_array_almost_equal(tensor.to_numpy(y), y_t, 3)
def predict(net, images, cuda, topk=8): x = tensor.from_numpy(images.astype(np.float32)) x.to_device(cuda) y = net.predict(x) y.to_host() y = tensor.to_numpy(y) prob = np.average(y, 0) labels = np.flipud(np.argsort(prob)) # sort prob in descending order return labels[0:topk], prob[labels[0:topk]]
def forward(self, flag, x): '''pad zeros''' tmp = tensor.to_numpy(x) shape = add_to_tuple(x.shape) ret = np.zeros(shape) ret[:, :, :-1, :-1] = tmp y = tensor.from_numpy(ret) y.to_device(x.device) return y
def forward(self, flag, x): '''pad zeros''' tmp = tensor.to_numpy(x) shape = add_to_tuple(x.shape) ret = np.zeros(shape) ret[:,:,:-1, :-1] = tmp y = tensor.from_numpy(ret) y.to_device(x.device) return y
def numpy2tensors(num, dev): '''batch, seq, dim -- > seq, batch, dim''' tmpx = np.swapaxes(num, 0, 1) inputs = [] for t in range(tmpx.shape[0]): x = tensor.from_numpy(tmpx[t]) x.to_device(dev) inputs.append(x) return inputs
def test_reshape(self): a = np.array([[[1.1, 1.1, 1.4], [1.1, 1.1, 1.1]], [[1.1, 1.1, 1.3], [1.6, 1.1, 1.2]]]) ta = tensor.from_numpy(a) tb = tensor.reshape(ta, [2, 6]) self.assertAlmostEqual(tb.shape[0], 2., places=3) self.assertAlmostEqual(tb.shape[1], 6., places=3) np.testing.assert_array_almost_equal(tensor.to_numpy(tb), a.reshape((2, 6)))
def _kint_float(self, dev=gpu_dev): np.random.seed(0) x_val = np.random.randint(0, 10, (2, 3)) x = tensor.from_numpy(x_val) x.to_device(dev) scalar = np.random.random((1, ))[0] * 100 y = x + scalar self.assertEqual(y.dtype, tensor.float32) np.testing.assert_array_almost_equal(tensor.to_numpy(y), x_val + scalar)
def test_slice(self): t = np.zeros((3, 3)) t[:, :2] = float(2) t[:, 2] = float(1) lyr = layer.Slice('slice', 1, [2], t.shape) out = lyr.forward(model_pb2.kTrain, [tensor.from_numpy(t)]) t1 = tensor.to_numpy(out[0]) t2 = tensor.to_numpy(out[1]) self.assertEquals(np.average(t1), 2) self.assertEquals(np.average(t2), 1)
def test_Abs(self): X=np.array([0.8,-1.2,3.3,-3.6,-0.5,0.5]).reshape(3,2).astype(np.float32) XT=np.array([0.8,1.2,3.3,3.6,0.5,0.5]).reshape(3,2).astype(np.float32) x=tensor.from_numpy(X) x.to_device(gpu_dev) result=autograd.abs(x) dx=result.creator.backward(x.data) np.testing.assert_array_almost_equal(tensor.to_numpy(result), XT) self.check_shape(dx.shape(), (3, 2))
def _4d_matmul_helper(self, dev): np_x1 = np.random.randn(2, 12, 256, 64).astype(np.float32) np_x2 = np.random.randn(2, 12, 64, 256).astype(np.float32) x1 = tensor.from_numpy(np_x1) x1.to_device(dev) x2 = tensor.from_numpy(np_x2) x2.to_device(dev) y = autograd.matmul(x1, x2) np_y = np.matmul(np_x1, np_x2) np.testing.assert_array_almost_equal(tensor.to_numpy(y), np_y) np_x1 = np.random.randn(2, 12, 256, 64).astype(np.float32) np_x2 = np.random.randn(2, 12, 64, 1024).astype(np.float32) x1 = tensor.from_numpy(np_x1) x1.to_device(dev) x2 = tensor.from_numpy(np_x2) x2.to_device(dev) y = autograd.matmul(x1, x2) np_y = np.matmul(np_x1, np_x2) np.testing.assert_array_almost_equal(tensor.to_numpy(y), np_y)
def test_concat(self): X1 = np.random.randn(3, 4, 5).astype(np.float32) X2 = np.random.randn(3, 4, 5).astype(np.float32) x1 = tensor.from_numpy(X1) x2 = tensor.from_numpy(X2) x1.to_device(gpu_dev) x2.to_device(gpu_dev) y = autograd.Concat()(x1, x2)[0] # frontend model = sonnx.to_onnx([x1, x2], [y]) # backend sg_ir = sonnx.prepare(model, device=gpu_dev) y_t = sg_ir.run([x1, x2]) np.testing.assert_array_almost_equal(tensor.to_numpy(y), tensor.to_numpy(y_t[0]), decimal=5)
def test_sum(self): x = np.array([0.1, -1.0, 0.4, 4.0, -0.9, 9.0]).reshape(3, 2).astype(np.float32) x1 = np.array([0.1, 1.0, 0.4, 4.0, 0.9, 9.0]).reshape(3, 2).astype(np.float32) x = tensor.from_numpy(x) x1 = tensor.from_numpy(x1) y = autograd.Sum()(x, x1)[0] # frontend model = sonnx.to_onnx([x, x1], [y]) # print('The model is:\n{}'.format(model)) # backend sg_ir = sonnx.prepare(model, device=gpu_dev) y_t = sg_ir.run([x, x1]) np.testing.assert_array_almost_equal(tensor.to_numpy(y), tensor.to_numpy(y_t[0]), decimal=5)
def numpy2tensors(npx, npy, dev, inputs=None, labels=None): '''batch, seq, dim -- > seq, batch, dim''' tmpy = np.swapaxes(npy, 0, 1).reshape((-1, 1)) if labels: labels.copy_from_numpy(tmpy) else: labels = tensor.from_numpy(tmpy) labels.to_device(dev) tmpx = np.swapaxes(npx, 0, 1) inputs_ = [] for t in range(tmpx.shape[0]): if inputs: inputs[t].copy_from_numpy(tmpx[t]) else: x = tensor.from_numpy(tmpx[t]) x.to_device(dev) inputs_.append(x) if not inputs: inputs = inputs_ return inputs, labels
def test_single_input_output(self): ffn = net.FeedForwardNet(loss.SoftmaxCrossEntropy()) ffn.add(layer.Activation('relu1', input_sample_shape=(2,))) ffn.add(layer.Activation('relu2')) x = np.array([[-1, 1], [1, 1], [-1, -2]], dtype=np.float32) x = tensor.from_numpy(x) y = tensor.Tensor((3,)) y.set_value(0) out, _ = ffn.evaluate(x, y) self.assertAlmostEqual(out * 3, - math.log(1.0/(1+math.exp(1))) - math.log(0.5) -math.log(0.5), 5);
def test_transpose(self): a = np.array([1.1,1.1,1.1,1.1,1.4,1.3,1.1,1.6,1.1,1.1,1.1,1.2]) a = np.reshape(a,(2,3,2)) ta = tensor.from_numpy(a) A1 = np.transpose(a) tA1 = tensor.transpose(ta) TA1 = tensor.to_numpy(tA1) A2 = np.transpose(a,[0,2,1]) tA2 = tensor.transpose(ta,[0,2,1]) TA2 = tensor.to_numpy(tA2) self.assertAlmostEqual(np.sum(TA1 - A1), 0.,places=3) self.assertAlmostEqual(np.sum(TA2 - A2), 0.,places=3)
def test_einsum(self): a = np.array([1.1,1.1,1.1,1.1,1.4,1.3,1.1,1.6,1.1,1.1,1.1,1.2]) a = np.reshape(a,(2,3,2)) ta = tensor.from_numpy(a) res1 = np.einsum('kij,kij->kij', a, a) tres1 = tensor.einsum('kij,kij->kij', ta, ta) Tres1 = tensor.to_numpy(tres1) res2 = np.einsum('kij,kih->kjh', a, a) tres2 = tensor.einsum('kij,kih->kjh', ta, ta) Tres2 = tensor.to_numpy(tres2) self.assertAlmostEqual(np.sum(Tres1 - res1), 0.,places=3) self.assertAlmostEqual(np.sum(Tres2 - res2), 0.,places=3)
def test_repeat(self): a = np.array([1.1,1.1,1.1,1.1,1.4,1.3,1.1,1.6,1.1,1.1,1.1,1.2]) a = np.reshape(a,(2,3,2)) ta = tensor.from_numpy(a) ta_repeat1 = tensor.repeat(ta,2,axis = None) a_repeat1 = np.repeat(a,2,axis = None) Ta_repeat1 = tensor.to_numpy(ta_repeat1) ta_repeat2 = tensor.repeat(ta, 4, axis = 1) a_repeat2 = np.repeat(a, 4, axis = 1) Ta_repeat2 = tensor.to_numpy(ta_repeat2) self.assertAlmostEqual(np.sum(Ta_repeat1 - a_repeat1), 0., places=3) self.assertAlmostEqual(np.sum(Ta_repeat2 - a_repeat2), 0., places=3)
def test_tensordot(self): a = np.array([1.1,1.1,1.1,1.1,1.4,1.3,1.1,1.6,1.1,1.1,1.1,1.2]) a = np.reshape(a,(2,3,2)) ta = tensor.from_numpy(a) res1 = np.tensordot(a, a, axes = 1) tres1 = tensor.tensordot(ta, ta, axes = 1) Tres1 = tensor.to_numpy(tres1) res2 = np.tensordot(a, a, axes = ([0,1],[2,1])) tres2 = tensor.tensordot(ta, ta, axes = ([0,1],[2,1])) Tres2 = tensor.to_numpy(tres2) self.assertAlmostEqual(np.sum(Tres1 - res1), 0., places=3) self.assertAlmostEqual(np.sum(Tres2 - res2), 0., places=3)
def test_slice(self): t = np.zeros((3, 3)) t[:, :2] = float(2) t[:, 2] = float(1) lyr = layer.Slice('slice', 1, [2], (3,)) out = lyr.forward(model_pb2.kTrain, [tensor.from_numpy(t)]) t1 = tensor.to_numpy(out[0]) t2 = tensor.to_numpy(out[1]) self.assertEqual(np.average(t1), 2) self.assertEqual(np.average(t2), 1) t1 = tensor.Tensor((3, 2)) t2 = tensor.Tensor((3, 1)) t1.set_value(1) t2.set_value(2) grad, _ = lyr.backward(model_pb2.kTrain, [t1, t2]) gnp = tensor.to_numpy(grad) self.assertEqual(np.sum(gnp), 12)
def train(self): train_data, _, _, _, _, _ = load_data(self.dataset_filepath) opt_0 = optimizer.Adam(lr=self.learning_rate) # optimizer for discriminator opt_1 = optimizer.Adam(lr=self.learning_rate) # optimizer for generator, aka the combined model for (p, specs) in zip(self.dis_net.param_names(), self.dis_net.param_specs()): opt_0.register(p, specs) for (p, specs) in zip(self.gen_net.param_names(), self.gen_net.param_specs()): opt_1.register(p, specs) for epoch in range(self.epochs): for d_step in range(self.d_steps): idx = np.random.randint(0, train_data.shape[0], self.batch_size) real_imgs = train_data[idx] real_imgs = tensor.from_numpy(real_imgs) real_imgs.to_device(self.dev) noise = tensor.Tensor((self.batch_size, self.noise_size)) noise.uniform(-1, 1) noise.to_device(self.dev) fake_imgs = self.gen_net.forward(flag=False, x=noise) substrahend = tensor.Tensor((real_imgs.shape[0], 1)) substrahend.set_value(1.0) substrahend.to_device(self.dev) grads, (d_loss_real, _) = self.dis_net.train(real_imgs, substrahend) for (s, p ,g) in zip(self.dis_net.param_names(), self.dis_net.param_values(), grads): opt_0.apply_with_lr(epoch, self.learning_rate, g, p, str(s), epoch) substrahend.set_value(-1.0) grads, (d_loss_fake, _) = self.dis_net.train(fake_imgs, substrahend) for (s, p ,g) in zip(self.dis_net.param_names(), self.dis_net.param_values(), grads): opt_0.apply_with_lr(epoch, self.learning_rate, g, p, str(s), epoch) d_loss = d_loss_real + d_loss_fake for g_step in range(self.g_steps): noise = tensor.Tensor((self.batch_size, self.noise_size)) noise.uniform(-1, 1) noise.to_device(self.dev) substrahend = tensor.Tensor((real_imgs.shape[0], 1)) substrahend.set_value(0.0) substrahend.to_device(self.dev) grads, (g_loss, _) = self.combined_net.train(noise, substrahend) for (s, p ,g) in zip(self.gen_net.param_names(), self.gen_net.param_values(), grads): opt_1.apply_with_lr(epoch, self.learning_rate, g, p, str(s), epoch) if epoch % self.interval == 0: self.save_image(epoch) print_log('The {} epoch, G_LOSS: {}, D_LOSS: {}'.format(epoch, g_loss, d_loss))
def predict(net, images, dev, topk=5): '''Predict the label of each image. Args: net, a pretrained neural net images, a batch of images [batch_size, 3, 32, 32], which have been pre-processed dev, the training device topk, return the topk labels for each image. ''' x = tensor.from_numpy(images.astype(np.float32)) x.to_device(dev) y = net.predict(x) y.to_host() prob = tensor.to_numpy(y) # prob = np.average(prob, 0) labels = np.flipud(np.argsort(prob)) # sort prob in descending order return labels[:, 0:topk]
def test_sum(self): a = np.array([1.1,1.1,1.1,1.1,1.4,1.3,1.1,1.6,1.1,1.1,1.1,1.2]) a = np.reshape(a,(2,3,2)) ta = tensor.from_numpy(a) a_sum0 = np.sum(a) ta_sum0 = tensor.sum(ta) Ta_sum0 = tensor.to_numpy(ta_sum0) a_sum1 = np.sum(a, axis = 1) ta_sum1 = tensor.sum(ta, axis = 1) Ta_sum1 = tensor.to_numpy(ta_sum1) a_sum2 = np.sum(a, axis = 2) ta_sum2 = tensor.sum(ta, axis = 2) Ta_sum2 = tensor.to_numpy(ta_sum2) self.assertAlmostEqual(np.sum(a_sum0 - Ta_sum0), 0., places=3) self.assertAlmostEqual(np.sum(a_sum1 - Ta_sum1), 0., places=3) self.assertAlmostEqual(np.sum(a_sum2 - Ta_sum2), 0., places=3)
def serve(agent, net, use_cpu, parameter_file, topk=5): if use_cpu: print('running with cpu') dev = device.get_default_device() layer.engine = 'singacpp' else: print("runing with gpu") dev = device.create_cuda_gpu() agent = agent print('Start intialization............') # fix the bug when creating net if net == 'v3': model = inception_v3 else: model = inception_v4 net, _ = model.create_net(is_training=False) net.load(parameter_file, use_pickle=True) net.to_device(dev) print('End intialization............') labels = np.loadtxt('synset_words.txt', str, delimiter='\t').tolist() labels.insert(0, 'empty background') while True: key, val = agent.pull() if key is None: time.sleep(0.1) continue msg_type = MsgType.parse(key) if msg_type.is_request(): try: response = "" ratio = 0.875 img = image_tool.load_img(val['image']) height, width = img.size[0], img.size[1] print(img.size) crop_h, crop_w = int(height * ratio), int(width * ratio) img = np.array(image_tool.crop(img,\ (crop_h, crop_w), 'center').\ resize((299, 299))).astype(np.float32) / float(255) img -= 0.5 img *= 2 # img[:,:,[0,1,2]] = img[:,:,[2,1,0]] img = img.transpose((2, 0, 1)) images = np.expand_dims(img, axis=0) x = tensor.from_numpy(images.astype(np.float32)) x.to_device(dev) y = net.predict(x) prob = np.average(tensor.to_numpy(y), 0) # sort and reverse idx = np.argsort(-prob)[0:topk] for i in idx: response += "%s:%s<br/>" % (labels[i], prob[i]) except: traceback.print_exc() response = "Sorry, system error during prediction." agent.push(MsgType.kResponse, response) elif MsgType.kCommandStop.equal(msg_type): print('get stop command') agent.push(MsgType.kStatus, "success") break else: print('get unsupported message %s' % str(msg_type)) agent.push(MsgType.kStatus, "Unknown command") break # while loop print("server stop")
def setUp(self): self.np_W = np.array([0.1, 0.2, 0.3, 0.4], dtype=np.float32) self.W = tensor.from_numpy(self.np_W) self.np_g = np.array([0.1, 0.3, 0.1, 0.2], dtype=np.float32) self.g = tensor.from_numpy(self.np_g)
def test_conv2D_forward_backward(self): in_sample_shape = (1, 3, 3) conv = layer.Conv2D('conv', 1, 3, 2, W_specs=self.w, b_specs=self.b, pad=1, input_sample_shape=in_sample_shape) # cuda = device.create_cuda_gpu() # conv.to_device(cuda) params = conv.param_values() raw_x = np.arange(9, dtype=np.float32) + 1 x = tensor.from_numpy(raw_x) x.reshape((1, 1, 3, 3)) w = np.array([1, 1, 0, 0, 0, -1, 0, 1, 0], dtype=np.float32) params[0].copy_from_numpy(w) params[1].set_value(1.0) # x.to_device(cuda) y = conv.forward(model_pb2.kTrain, x) # y.to_host() npy = tensor.to_numpy(y).flatten() self.assertAlmostEqual(3.0, npy[0]) self.assertAlmostEqual(7.0, npy[1]) self.assertAlmostEqual(-3.0, npy[2]) self.assertAlmostEqual(12.0, npy[3]) dy = np.asarray([0.1, 0.2, 0.3, 0.4], dtype=np.float32).reshape(y.shape) grad = tensor.from_numpy(dy) # grad.to_device(cuda) (dx, [dw, db]) = conv.backward(model_pb2.kTrain, grad) dx.to_host() dw.to_host() dx = tensor.to_numpy(dx).flatten() dw = tensor.to_numpy(dw).flatten() dy = dy.flatten() self.assertAlmostEqual(dy[0] * w[4], dx[0]) self.assertAlmostEqual(dy[0] * w[5] + dy[1] * w[3], dx[1]) self.assertAlmostEqual(dy[1] * w[4], dx[2]) self.assertAlmostEqual(dy[0] * w[7] + dy[2] * w[1], dx[3]) self.assertAlmostEqual( dy[0] * w[8] + dy[1] * w[6] + dy[2] * w[2] + dy[3] * w[0], dx[4]) self.assertAlmostEqual(dy[1] * w[7] + dy[3] * w[1], dx[5]) self.assertAlmostEqual(dy[2] * w[4], dx[6]) self.assertAlmostEqual(dy[2] * w[5] + dy[3] * w[3], dx[7]) self.assertAlmostEqual(dy[3] * w[4], dx[8]) self.assertAlmostEqual(dy[3] * raw_x[4], dw[0]) self.assertAlmostEqual(dy[3] * raw_x[5] + dy[2] * raw_x[3], dw[1]) self.assertAlmostEqual(dy[2] * raw_x[4], dw[2]) self.assertAlmostEqual(dy[1] * raw_x[1] + dy[3] * raw_x[7], dw[3]) self.assertAlmostEqual( dy[0] * raw_x[0] + dy[1] * raw_x[2] + dy[2] * raw_x[6] + dy[3] * raw_x[8], dw[4], 5) self.assertAlmostEqual(dy[0] * raw_x[1] + dy[2] * raw_x[7], dw[5]) self.assertAlmostEqual(dy[1] * raw_x[4], dw[6]) self.assertAlmostEqual(dy[0] * raw_x[3] + dy[1] * raw_x[5], dw[7]) self.assertAlmostEqual(dy[0] * raw_x[4], dw[8])
def backward(self, falg, dy): '''remove paddings''' tmp = tensor.to_numpy(dy) dx = tensor.from_numpy(tmp[:,:,:-1,:-1]) dx.to_device(dy.device) return dx, []
def sample(model_path, nsamples=100, seed_text='', do_sample=True): with open(model_path, 'rb') as fd: d = pickle.load(fd) rnn_w = tensor.from_numpy(d['rnn_w']) idx_to_char = d['idx_to_char'] char_to_idx = d['char_to_idx'] vocab_size = len(idx_to_char) dense_w = tensor.from_numpy(d['dense_w']) dense_b = tensor.from_numpy(d['dense_b']) hidden_size = d['hidden_size'] num_stacks = d['num_stacks'] dropout = d['dropout'] cuda = device.create_cuda_gpu() rnn = layer.LSTM(name='lstm', hidden_size=hidden_size, num_stacks=num_stacks, dropout=dropout, input_sample_shape=(len(idx_to_char),)) rnn.to_device(cuda) rnn.param_values()[0].copy_data(rnn_w) dense = layer.Dense('dense', vocab_size, input_sample_shape=(hidden_size,)) dense.to_device(cuda) dense.param_values()[0].copy_data(dense_w) dense.param_values()[1].copy_data(dense_b) hx = tensor.Tensor((num_stacks, 1, hidden_size), cuda) cx = tensor.Tensor((num_stacks, 1, hidden_size), cuda) hx.set_value(0.0) cx.set_value(0.0) if len(seed_text) > 0: for c in seed_text: x = np.zeros((1, vocab_size), dtype=np.float32) x[0, char_to_idx[c]] = 1 tx = tensor.from_numpy(x) tx.to_device(cuda) inputs = [tx, hx, cx] outputs = rnn.forward(False, inputs) y = dense.forward(False, outputs[0]) y = tensor.softmax(y) hx = outputs[1] cx = outputs[2] sys.stdout.write(seed_text) else: y = tensor.Tensor((1, vocab_size), cuda) y.set_value(1.0 / vocab_size) for i in range(nsamples): y.to_host() prob = tensor.to_numpy(y)[0] if do_sample: cur = np.random.choice(vocab_size, 1, p=prob)[0] else: cur = np.argmax(prob) sys.stdout.write(idx_to_char[cur]) x = np.zeros((1, vocab_size), dtype=np.float32) x[0, cur] = 1 tx = tensor.from_numpy(x) tx.to_device(cuda) inputs = [tx, hx, cx] outputs = rnn.forward(False, inputs) y = dense.forward(False, outputs[0]) y = tensor.softmax(y) hx = outputs[1] cx = outputs[2] print('')
def train(data_file, use_gpu, num_epoch=10, batch_size=100): print 'Start intialization............' lr = 0.1 # Learning rate weight_decay = 0.0002 hdim = 1000 vdim = 784 opt = optimizer.SGD(momentum=0.8, weight_decay=weight_decay) tweight = tensor.Tensor((vdim, hdim)) tweight.gaussian(0.0, 0.1) tvbias = tensor.from_numpy(np.zeros(vdim, dtype = np.float32)) thbias = tensor.from_numpy(np.zeros(hdim, dtype = np.float32)) opt = optimizer.SGD(momentum=0.5, weight_decay=weight_decay) print 'Loading data ..................' train_x, valid_x = load_train_data(data_file) if use_gpu: dev = device.create_cuda_gpu() else: dev = device.get_default_device() for t in [tweight, tvbias, thbias]: t.to_device(dev) num_train_batch = train_x.shape[0] / batch_size print "num_train_batch = %d " % (num_train_batch) for epoch in range(num_epoch): trainerrorsum = 0.0 print 'Epoch %d' % epoch for b in range(num_train_batch): # positive phase tdata = tensor.from_numpy( train_x[(b * batch_size):((b + 1) * batch_size), : ]) tdata.to_device(dev) tposhidprob = tensor.mult(tdata, tweight) tposhidprob.add_row(thbias) tposhidprob = tensor.sigmoid(tposhidprob) tposhidrandom = tensor.Tensor(tposhidprob.shape, dev) tposhidrandom.uniform(0.0, 1.0) tposhidsample = tensor.gt(tposhidprob, tposhidrandom) # negative phase tnegdata = tensor.mult(tposhidsample, tweight.T()) tnegdata.add_row(tvbias) tnegdata = tensor.sigmoid(tnegdata) tneghidprob = tensor.mult(tnegdata, tweight) tneghidprob.add_row(thbias) tneghidprob = tensor.sigmoid(tneghidprob) error = tensor.sum(tensor.square((tdata - tnegdata))) trainerrorsum = error + trainerrorsum tgweight = tensor.mult(tnegdata.T(), tneghidprob) -\ tensor.mult(tdata.T(), tposhidprob) tgvbias = tensor.sum(tnegdata, 0) - tensor.sum(tdata, 0) tghbias = tensor.sum(tneghidprob, 0) - tensor.sum(tposhidprob, 0) opt.apply_with_lr(epoch, lr / batch_size, tgweight, tweight, 'w') opt.apply_with_lr(epoch, lr / batch_size, tgvbias, tvbias, 'vb') opt.apply_with_lr(epoch, lr / batch_size, tghbias, thbias, 'hb') print 'training errorsum = %f' % (trainerrorsum) tvaliddata = tensor.from_numpy(valid_x) tvaliddata.to_device(dev) tvalidposhidprob = tensor.mult(tvaliddata, tweight) tvalidposhidprob.add_row(thbias) tvalidposhidprob = tensor.sigmoid(tvalidposhidprob) tvalidposhidrandom = tensor.Tensor(tvalidposhidprob.shape, dev) initializer.uniform(tvalidposhidrandom, 0.0, 1.0) tvalidposhidsample = tensor.gt(tvalidposhidprob, tvalidposhidrandom) tvalidnegdata = tensor.mult(tvalidposhidsample, tweight.T()) tvalidnegdata.add_row(tvbias) tvalidnegdata = tensor.sigmoid(tvalidnegdata) validerrorsum = tensor.sum(tensor.square((tvaliddata - tvalidnegdata))) print 'valid errorsum = %f' % (validerrorsum)