def test_bprop(self): r = [] for i in xrange(self.N): repeats = self.rng.random_integers(42) axis = self.rng.randint(2) input_dim, output_dim = self.rng.random_integers(2000, size=2) x = self.get_normal_matrix(input_dim, output_dim) input_dim = input_dim if axis else input_dim * repeats true_labels = self.rng.randint(output_dim, size=(input_dim, 1)).astype(np.int32) device_id = 0 output = {} for processor_type in ['gpu', 'cpu']: quagga.processor_type = processor_type qx = Connector(Matrix.from_npa(x), device_id) qtrue_labels = Connector(Matrix.from_npa(true_labels)) repeat_block = RepeatBlock(qx, repeats, axis) sce_block = SoftmaxCeBlock(repeat_block.output, qtrue_labels) qx.fprop() qtrue_labels.fprop() repeat_block.fprop() sce_block.fprop() sce_block.bprop() repeat_block.bprop() output[processor_type] = qx.backward_matrix.to_host() r.append(np.allclose(output['gpu'], output['cpu'])) self.assertEqual(sum(r), len(r))
def test_bprop_vector(self): r = [] for _ in xrange(self.N): embd_dim = self.rng.random_integers(10000) batch_size, output_dim = self.rng.random_integers(2000, size=2) W = self.get_orthogonal_matrix(embd_dim, output_dim) row_idxs = self.rng.randint(embd_dim, size=(batch_size, 1)).astype(np.int32) true_labels = self.rng.randint(output_dim, size=(batch_size, 1)).astype(np.int32) device_id = 0 output = {} for processor_type in ['gpu', 'cpu']: quagga.processor_type = processor_type qrow_idxs = Connector(Matrix.from_npa(row_idxs)) qtrue_labels = Connector(Matrix.from_npa(true_labels)) qW = Connector(Matrix.from_npa(W), device_id) row_slicing_block = RowSlicingBlock(qW, qrow_idxs) sce_block = SoftmaxCeBlock(row_slicing_block.output, qtrue_labels) qW.fprop() qrow_idxs.fprop() row_slicing_block.fprop() sce_block.fprop() sce_block.bprop() row_slicing_block.bprop() qW.add(Context(), qW.backward_matrix) output[processor_type] = qW.to_host() r.append(np.allclose(output['gpu'], output['cpu'])) self.assertEqual(sum(r), len(r))
def test_bprop(self): r = [] for i in xrange(self.N): matrices = [] ncols = self.rng.random_integers(1, 3000) nrows = [0] row_slices = [] device_ids = [] for _ in xrange(self.rng.random_integers(1, 10)): _nrows = self.rng.random_integers(1, 2000) nrows.append(nrows[-1] + _nrows) if self.rng.choice([True, False]): device_ids.append(0) row_slices.append((nrows[-2], nrows[-1])) else: device_ids.append(None) matrices.append( self.rng.rand(_nrows, ncols).astype(np.float32)) true_labels = self.rng.randint(ncols, size=(nrows[-1], 1)).astype(np.int32) if not row_slices: r.append(True) continue output = {} for processor_type in ['gpu', 'cpu']: quagga.processor_type = processor_type qmatrices = [ Connector(Matrix.from_npa(m), d_id) for m, d_id in izip(matrices, device_ids) ] qtrue_labels = Connector(Matrix.from_npa(true_labels)) vstack_block = VerticalStackBlock(*qmatrices) sce_block = SoftmaxCeBlock(vstack_block.output, qtrue_labels) for m in qmatrices: m.fprop() qtrue_labels.fprop() vstack_block.fprop() sce_block.fprop() sce_block.bprop() vstack_block.bprop() output[processor_type] = [ m.backward_matrix.to_host() for m in qmatrices if m.bpropagable ] for dL_dm_gpu, dL_dm_cpu in izip(output['gpu'], output['cpu']): if not np.allclose(dL_dm_gpu, dL_dm_cpu): r.append(False) break else: r.append(True) self.assertEqual(sum(r), self.N)
def test_theano_grad(self): quagga.processor_type = 'gpu' r = [] for i in xrange(self.N): for sparse in [True, False]: batch_size, dim = self.rng.random_integers(2000, size=2) if sparse: true_labels = np.zeros((batch_size, dim), np.float32) for k, j in enumerate(self.rng.randint(dim, size=batch_size)): true_labels[k, j] = 1.0 else: true_labels = self.rng.randint(dim, size=(batch_size, 1)).astype(np.int32) x = self.rng.randn(batch_size, dim).astype(np.float32) mask = (self.rng.rand(batch_size, 1) < 0.8).astype(np.float32) device_id = 0 for with_mask in [False, True]: # Theano model th_x = T.fmatrix() th_mask = T.fcol() th_true_labels = T.fmatrix() if sparse else T.ivector() if with_mask: probs = T.nnet.softmax(th_mask * th_x) else: probs = T.nnet.softmax(th_x) loss = T.mean(T.nnet.categorical_crossentropy(probs, th_true_labels)) if with_mask: get_theano_grads = theano.function([th_x, th_true_labels, th_mask], T.grad(loss, wrt=th_x)) th_dL_dx = get_theano_grads(x, true_labels if sparse else true_labels[:, 0], mask) else: get_theano_grads = theano.function([th_x, th_true_labels], T.grad(loss, wrt=th_x)) th_dL_dx = get_theano_grads(x, true_labels if sparse else true_labels[:, 0]) # quagga model x_gpu = Connector(Matrix.from_npa(x), device_id) true_labels_gpu = Connector(Matrix.from_npa(true_labels)) mask_gpu = Connector(Matrix.from_npa(mask)) if with_mask else None softmax_ce_block = SoftmaxCeBlock(x_gpu, true_labels_gpu, mask_gpu) x_gpu.fprop() true_labels_gpu.fprop() if with_mask: mask_gpu.fprop() softmax_ce_block.fprop() softmax_ce_block.bprop() q_dL_dx = x_gpu.backward_matrix.to_host() r.append(np.allclose(th_dL_dx, q_dL_dx)) self.assertEqual(sum(r), len(r))
def test_bprop(self): r = [] for i in xrange(self.N): matrices = [] nrows = self.rng.random_integers(1, 3000) ncols = [0] col_slices = [] device_ids = [] for _ in xrange(self.rng.random_integers(1, 10)): _ncols = self.rng.random_integers(1, 2000) ncols.append(ncols[-1] + _ncols) if self.rng.choice([True, False]): device_ids.append(0) col_slices.append((ncols[-2], ncols[-1])) else: device_ids.append(None) matrices.append(self.rng.rand(nrows, _ncols).astype(np.float32)) true_labels = self.rng.randint(ncols[-1], size=(nrows, 1)).astype(np.int32) if not col_slices: r.append(True) continue output = {} for processor_type in ['gpu', 'cpu']: quagga.processor_type = processor_type qmatrices = [Connector(Matrix.from_npa(m), d_id) for m, d_id in izip(matrices, device_ids)] qtrue_labels = Connector(Matrix.from_npa(true_labels)) hstack_block = HorizontalStackBlock(*qmatrices) sce_block = SoftmaxCeBlock(hstack_block.output, qtrue_labels) for m in qmatrices: m.fprop() qtrue_labels.fprop() hstack_block.fprop() sce_block.fprop() sce_block.bprop() hstack_block.bprop() output[processor_type] = [m.backward_matrix.to_host() for m in qmatrices if m.bpropagable] for dL_dm_gpu, dL_dm_cpu in izip(output['gpu'], output['cpu']): if not np.allclose(dL_dm_gpu, dL_dm_cpu): r.append(False) break else: r.append(True) self.assertEqual(sum(r), self.N)
def test_bprop(self): """ compare `bprop` results for cpu and gpu backends """ r = [] for i in xrange(self.N): for sparse in [True, False]: batch_size, dim = self.rng.random_integers(2000, size=2) if sparse: true_labels = np.zeros((batch_size, dim), np.float32) for k, j in enumerate(self.rng.randint(dim, size=batch_size)): true_labels[k, j] = 1.0 else: true_labels = self.rng.randint(dim, size=(batch_size, 1)).astype(np.int32) x = self.rng.randn(batch_size, dim).astype(np.float32) mask = (self.rng.rand(batch_size, 1) < 0.8).astype(np.float32) device_id = 0 for with_mask in [False, True]: quagga.processor_type = 'gpu' x_gpu = Connector(Matrix.from_npa(x), device_id) true_labels_gpu = Connector(Matrix.from_npa(true_labels)) mask_gpu = Connector(Matrix.from_npa(mask)) if with_mask else None softmax_ce_block = SoftmaxCeBlock(x_gpu, true_labels_gpu, mask_gpu) x_gpu.fprop() true_labels_gpu.fprop() if with_mask: mask_gpu.fprop() softmax_ce_block.fprop() softmax_ce_block.bprop() dL_dx_gpu = x_gpu.backward_matrix.to_host() quagga.processor_type = 'cpu' x_cpu = Connector(Matrix.from_npa(x), device_id) true_labels_cpu = Connector(Matrix.from_npa(true_labels)) mask_cpu = Connector(Matrix.from_npa(mask)) if with_mask else None softmax_ce_block = SoftmaxCeBlock(x_cpu, true_labels_cpu, mask_cpu) x_cpu.fprop() true_labels_cpu.fprop() if with_mask: mask_cpu.fprop() softmax_ce_block.fprop() softmax_ce_block.bprop() dL_dx_cpu = x_cpu.backward_matrix.to_host() r.append(np.allclose(dL_dx_gpu, dL_dx_cpu)) self.assertEqual(sum(r), len(r))
def test_theano_bprop_vector(self): r = [] for _ in xrange(self.N): embd_dim = self.rng.random_integers(10000) batch_size, output_dim = self.rng.random_integers(2000, size=2) W = self.get_orthogonal_matrix(embd_dim, output_dim) row_idxs = self.rng.randint(embd_dim, size=(batch_size, 1)).astype(np.int32) true_labels = self.rng.randint(output_dim, size=(batch_size, 1)).astype(np.int32) device_id = 0 quagga.processor_type = 'gpu' qrow_idxs = Connector(Matrix.from_npa(row_idxs)) qW = Connector(Matrix.from_npa(W), device_id) qtrue_labels = Connector(Matrix.from_npa(true_labels)) row_slicing_block = RowSlicingBlock(qW, qrow_idxs) sce_block = SoftmaxCeBlock(row_slicing_block.output, qtrue_labels) qtrue_labels.fprop() qW.fprop() qrow_idxs.fprop() row_slicing_block.fprop() sce_block.fprop() sce_block.bprop() row_slicing_block.bprop() qW.add(Context(), qW.backward_matrix) th_row_idxs = T.ivector() th_true_labels = T.ivector() row_slicing_layer = RowSlicingLayer(W) toutput = row_slicing_layer.get_output_expr(th_row_idxs) loss = SoftmaxLayer.get_loss(toutput, th_true_labels) dL_dW = T.grad(loss, row_slicing_layer.W) fun = theano.function([th_row_idxs, th_true_labels], updates=[(row_slicing_layer.W, row_slicing_layer.W + dL_dW)]) fun(row_idxs[:, 0], true_labels[:, 0]) r.append(np.allclose(qW.to_host(), row_slicing_layer.W.get_value())) self.assertEqual(sum(r), len(r))
def test_theano_bprop(self): r = [] for i in xrange(self.N): repeats = self.rng.random_integers(42) axis = self.rng.randint(2) input_dim, output_dim = self.rng.random_integers(2000, size=2) x = self.get_normal_matrix(input_dim, output_dim) input_dim = input_dim if axis else input_dim * repeats true_labels = self.rng.randint(output_dim, size=(input_dim, 1)).astype(np.int32) device_id = 0 quagga.processor_type = 'gpu' qx = Connector(Matrix.from_npa(x), device_id) qtrue_labels = Connector(Matrix.from_npa(true_labels)) repeat_block = RepeatBlock(qx, repeats, axis) sce_block = SoftmaxCeBlock(repeat_block.output, qtrue_labels) qx.fprop() qtrue_labels.fprop() repeat_block.fprop() sce_block.fprop() sce_block.bprop() repeat_block.bprop() q_dL_dx = qx.backward_matrix.to_host() th_x = T.fmatrix() th_true_labels = T.ivector() reps = [1, 1] reps[axis] = repeats th_output = T.tile(th_x, reps) th_output = T.nnet.softmax(th_output) loss = T.mean(T.nnet.categorical_crossentropy(th_output, th_true_labels)) get_grads = theano.function([th_x, th_true_labels], T.grad(loss, th_x)) th_dL_dx = get_grads(x, true_labels[:, 0]) r.append(np.allclose(q_dL_dx, th_dL_dx)) self.assertEqual(sum(r), len(r))