Ejemplo n.º 1
0
    def test_bprop(self):
        r = []
        for i in xrange(self.N):
            matrices = []
            nrows = self.rng.random_integers(1, 3000)
            ncols = [0]
            col_slices = []
            device_ids = []
            for _ in xrange(self.rng.random_integers(1, 10)):
                _ncols = self.rng.random_integers(1, 2000)
                ncols.append(ncols[-1] + _ncols)
                if self.rng.choice([True, False]):
                    device_ids.append(0)
                    col_slices.append((ncols[-2], ncols[-1]))
                else:
                    device_ids.append(None)
                matrices.append(self.rng.rand(nrows, _ncols).astype(np.float32))
            true_labels = self.rng.randint(ncols[-1], size=(nrows, 1)).astype(np.int32)
            if not col_slices:
                r.append(True)
                continue

            output = {}
            for processor_type in ['gpu', 'cpu']:
                quagga.processor_type = processor_type
                qmatrices = [Connector(Matrix.from_npa(m), d_id) for m, d_id in izip(matrices, device_ids)]
                qtrue_labels = Connector(Matrix.from_npa(true_labels))
                hstack_block = HorizontalStackBlock(*qmatrices)
                sce_block = SoftmaxCeBlock(hstack_block.output, qtrue_labels)

                for m in qmatrices:
                    m.fprop()
                qtrue_labels.fprop()
                hstack_block.fprop()
                sce_block.fprop()
                sce_block.bprop()
                hstack_block.bprop()

                output[processor_type] = [m.backward_matrix.to_host()
                                          for m in qmatrices if m.bpropagable]

            for dL_dm_gpu, dL_dm_cpu in izip(output['gpu'], output['cpu']):
                if not np.allclose(dL_dm_gpu, dL_dm_cpu):
                    r.append(False)
                    break
            else:
                r.append(True)
        self.assertEqual(sum(r), self.N)
Ejemplo n.º 2
0
    def test_bprop(self):
        """
        compare `bprop` results for cpu and gpu backends
        """
        r = []
        for i in xrange(self.N):
            for sparse in [True, False]:
                batch_size, dim = self.rng.random_integers(2000, size=2)
                if sparse:
                    true_labels = np.zeros((batch_size, dim), np.float32)
                    for k, j in enumerate(self.rng.randint(dim, size=batch_size)):
                        true_labels[k, j] = 1.0
                else:
                    true_labels = self.rng.randint(dim, size=(batch_size, 1)).astype(np.int32)
                x = self.rng.randn(batch_size, dim).astype(np.float32)
                mask = (self.rng.rand(batch_size, 1) < 0.8).astype(np.float32)
                device_id = 0
                for with_mask in [False, True]:
                    quagga.processor_type = 'gpu'
                    x_gpu = Connector(Matrix.from_npa(x), device_id)
                    true_labels_gpu = Connector(Matrix.from_npa(true_labels))
                    mask_gpu = Connector(Matrix.from_npa(mask)) if with_mask else None
                    softmax_ce_block = SoftmaxCeBlock(x_gpu, true_labels_gpu, mask_gpu)
                    x_gpu.fprop()
                    true_labels_gpu.fprop()
                    if with_mask:
                        mask_gpu.fprop()
                    softmax_ce_block.fprop()
                    softmax_ce_block.bprop()
                    dL_dx_gpu = x_gpu.backward_matrix.to_host()

                    quagga.processor_type = 'cpu'
                    x_cpu = Connector(Matrix.from_npa(x), device_id)
                    true_labels_cpu = Connector(Matrix.from_npa(true_labels))
                    mask_cpu = Connector(Matrix.from_npa(mask)) if with_mask else None
                    softmax_ce_block = SoftmaxCeBlock(x_cpu, true_labels_cpu, mask_cpu)
                    x_cpu.fprop()
                    true_labels_cpu.fprop()
                    if with_mask:
                        mask_cpu.fprop()
                    softmax_ce_block.fprop()
                    softmax_ce_block.bprop()
                    dL_dx_cpu = x_cpu.backward_matrix.to_host()

                    r.append(np.allclose(dL_dx_gpu, dL_dx_cpu))

        self.assertEqual(sum(r), len(r))
Ejemplo n.º 3
0
    def test_bprop_vector(self):
        r = []
        for _ in xrange(self.N):
            embd_dim = self.rng.random_integers(10000)
            batch_size, output_dim = self.rng.random_integers(2000, size=2)
            W = self.get_orthogonal_matrix(embd_dim, output_dim)
            row_idxs = self.rng.randint(embd_dim, size=(batch_size, 1)).astype(np.int32)
            true_labels = self.rng.randint(output_dim, size=(batch_size, 1)).astype(np.int32)
            device_id = 0

            output = {}
            for processor_type in ['gpu', 'cpu']:
                quagga.processor_type = processor_type
                qrow_idxs = Connector(Matrix.from_npa(row_idxs))
                qtrue_labels = Connector(Matrix.from_npa(true_labels))
                qW = Connector(Matrix.from_npa(W), device_id)
                row_slicing_block = RowSlicingBlock(qW, qrow_idxs)
                sce_block = SoftmaxCeBlock(row_slicing_block.output, qtrue_labels)
                qW.fprop()
                qrow_idxs.fprop()
                row_slicing_block.fprop()
                sce_block.fprop()
                sce_block.bprop()
                row_slicing_block.bprop()
                qW.add(Context(), qW.backward_matrix)
                output[processor_type] = qW.to_host()

            r.append(np.allclose(output['gpu'], output['cpu']))

        self.assertEqual(sum(r), len(r))
Ejemplo n.º 4
0
    def test_bprop(self):
        r = []
        for i in xrange(self.N):
            repeats = self.rng.random_integers(42)
            axis = self.rng.randint(2)
            input_dim, output_dim = self.rng.random_integers(2000, size=2)
            x = self.get_normal_matrix(input_dim, output_dim)
            input_dim = input_dim if axis else input_dim * repeats
            true_labels = self.rng.randint(output_dim, size=(input_dim, 1)).astype(np.int32)
            device_id = 0

            output = {}
            for processor_type in ['gpu', 'cpu']:
                quagga.processor_type = processor_type
                qx = Connector(Matrix.from_npa(x), device_id)
                qtrue_labels = Connector(Matrix.from_npa(true_labels))
                repeat_block = RepeatBlock(qx, repeats, axis)
                sce_block = SoftmaxCeBlock(repeat_block.output, qtrue_labels)
                qx.fprop()
                qtrue_labels.fprop()
                repeat_block.fprop()
                sce_block.fprop()
                sce_block.bprop()
                repeat_block.bprop()
                output[processor_type] = qx.backward_matrix.to_host()

            r.append(np.allclose(output['gpu'], output['cpu']))

        self.assertEqual(sum(r), len(r))
Ejemplo n.º 5
0
    def test_bprop(self):
        r = []
        for i in xrange(self.N):
            matrices = []
            ncols = self.rng.random_integers(1, 3000)
            nrows = [0]
            row_slices = []
            device_ids = []
            for _ in xrange(self.rng.random_integers(1, 10)):
                _nrows = self.rng.random_integers(1, 2000)
                nrows.append(nrows[-1] + _nrows)
                if self.rng.choice([True, False]):
                    device_ids.append(0)
                    row_slices.append((nrows[-2], nrows[-1]))
                else:
                    device_ids.append(None)
                matrices.append(
                    self.rng.rand(_nrows, ncols).astype(np.float32))
            true_labels = self.rng.randint(ncols, size=(nrows[-1],
                                                        1)).astype(np.int32)
            if not row_slices:
                r.append(True)
                continue

            output = {}
            for processor_type in ['gpu', 'cpu']:
                quagga.processor_type = processor_type
                qmatrices = [
                    Connector(Matrix.from_npa(m), d_id)
                    for m, d_id in izip(matrices, device_ids)
                ]
                qtrue_labels = Connector(Matrix.from_npa(true_labels))
                vstack_block = VerticalStackBlock(*qmatrices)
                sce_block = SoftmaxCeBlock(vstack_block.output, qtrue_labels)

                for m in qmatrices:
                    m.fprop()
                qtrue_labels.fprop()
                vstack_block.fprop()
                sce_block.fprop()
                sce_block.bprop()
                vstack_block.bprop()

                output[processor_type] = [
                    m.backward_matrix.to_host() for m in qmatrices
                    if m.bpropagable
                ]

            for dL_dm_gpu, dL_dm_cpu in izip(output['gpu'], output['cpu']):
                if not np.allclose(dL_dm_gpu, dL_dm_cpu):
                    r.append(False)
                    break
            else:
                r.append(True)
        self.assertEqual(sum(r), self.N)
Ejemplo n.º 6
0
    def test_fprop(self):
        """
        compare `fprop` results for cpu and gpu backends
        """
        r = []
        for i in xrange(self.N):
            for sparse in [False, True]:
                batch_size, dim = self.rng.random_integers(2000, size=2)
                if sparse:
                    true_labels = np.zeros((batch_size, dim), np.float32)
                    for k, j in enumerate(self.rng.randint(dim, size=batch_size)):
                        true_labels[k, j] = 1.0
                else:
                    true_labels = self.rng.randint(dim, size=(batch_size, 1)).astype(np.int32)
                x = self.rng.randn(batch_size, dim).astype(np.float32)
                mask = (self.rng.rand(batch_size, 1) < 0.8).astype(np.float32)
                for with_mask in [False, True]:
                    quagga.processor_type = 'gpu'
                    x_gpu = Connector(Matrix.from_npa(x))
                    true_labels_gpu = Connector(Matrix.from_npa(true_labels))
                    mask_gpu = Connector(Matrix.from_npa(mask)) if with_mask else None
                    softmax_ce_block = SoftmaxCeBlock(x_gpu, true_labels_gpu, mask_gpu)
                    x_gpu.fprop()
                    true_labels_gpu.fprop()
                    if with_mask:
                        mask_gpu.fprop()
                    softmax_ce_block.fprop()
                    probs_gpu = softmax_ce_block.probs.to_host()

                    quagga.processor_type = 'cpu'
                    x_cpu = Connector(Matrix.from_npa(x))
                    true_labels_cpu = Connector(Matrix.from_npa(true_labels))
                    mask_cpu = Connector(Matrix.from_npa(mask)) if with_mask else None
                    softmax_ce_block = SoftmaxCeBlock(x_cpu, true_labels_cpu, mask_cpu)
                    x_cpu.fprop()
                    true_labels_cpu.fprop()
                    if with_mask:
                        mask_cpu.fprop()
                    softmax_ce_block.fprop()
                    probs_cpu = softmax_ce_block.probs.to_host()

                    r.append(np.allclose(probs_gpu, probs_cpu))

        self.assertEqual(sum(r), len(r))
Ejemplo n.º 7
0
    def test_theano_grad(self):
        quagga.processor_type = 'gpu'
        r = []
        for i in xrange(self.N):
            for sparse in [True, False]:
                batch_size, dim = self.rng.random_integers(2000, size=2)
                if sparse:
                    true_labels = np.zeros((batch_size, dim), np.float32)
                    for k, j in enumerate(self.rng.randint(dim, size=batch_size)):
                        true_labels[k, j] = 1.0
                else:
                    true_labels = self.rng.randint(dim, size=(batch_size, 1)).astype(np.int32)
                x = self.rng.randn(batch_size, dim).astype(np.float32)
                mask = (self.rng.rand(batch_size, 1) < 0.8).astype(np.float32)
                device_id = 0
                for with_mask in [False, True]:
                    # Theano model
                    th_x = T.fmatrix()
                    th_mask = T.fcol()
                    th_true_labels = T.fmatrix() if sparse else T.ivector()
                    if with_mask:
                        probs = T.nnet.softmax(th_mask * th_x)
                    else:
                        probs = T.nnet.softmax(th_x)
                    loss = T.mean(T.nnet.categorical_crossentropy(probs, th_true_labels))
                    if with_mask:
                        get_theano_grads = theano.function([th_x, th_true_labels, th_mask], T.grad(loss, wrt=th_x))
                        th_dL_dx = get_theano_grads(x, true_labels if sparse else true_labels[:, 0], mask)
                    else:
                        get_theano_grads = theano.function([th_x, th_true_labels], T.grad(loss, wrt=th_x))
                        th_dL_dx = get_theano_grads(x, true_labels if sparse else true_labels[:, 0])

                    # quagga model
                    x_gpu = Connector(Matrix.from_npa(x), device_id)
                    true_labels_gpu = Connector(Matrix.from_npa(true_labels))
                    mask_gpu = Connector(Matrix.from_npa(mask)) if with_mask else None
                    softmax_ce_block = SoftmaxCeBlock(x_gpu, true_labels_gpu, mask_gpu)
                    x_gpu.fprop()
                    true_labels_gpu.fprop()
                    if with_mask:
                        mask_gpu.fprop()
                    softmax_ce_block.fprop()
                    softmax_ce_block.bprop()
                    q_dL_dx = x_gpu.backward_matrix.to_host()

                    r.append(np.allclose(th_dL_dx, q_dL_dx))

        self.assertEqual(sum(r), len(r))
Ejemplo n.º 8
0
    def test_theano_bprop_vector(self):
        r = []
        for _ in xrange(self.N):
            embd_dim = self.rng.random_integers(10000)
            batch_size, output_dim = self.rng.random_integers(2000, size=2)
            W = self.get_orthogonal_matrix(embd_dim, output_dim)
            row_idxs = self.rng.randint(embd_dim, size=(batch_size, 1)).astype(np.int32)
            true_labels = self.rng.randint(output_dim, size=(batch_size, 1)).astype(np.int32)
            device_id = 0

            quagga.processor_type = 'gpu'
            qrow_idxs = Connector(Matrix.from_npa(row_idxs))
            qW = Connector(Matrix.from_npa(W), device_id)
            qtrue_labels = Connector(Matrix.from_npa(true_labels))
            row_slicing_block = RowSlicingBlock(qW, qrow_idxs)
            sce_block = SoftmaxCeBlock(row_slicing_block.output, qtrue_labels)
            qtrue_labels.fprop()
            qW.fprop()
            qrow_idxs.fprop()
            row_slicing_block.fprop()
            sce_block.fprop()
            sce_block.bprop()
            row_slicing_block.bprop()
            qW.add(Context(), qW.backward_matrix)

            th_row_idxs = T.ivector()
            th_true_labels = T.ivector()
            row_slicing_layer = RowSlicingLayer(W)
            toutput = row_slicing_layer.get_output_expr(th_row_idxs)
            loss = SoftmaxLayer.get_loss(toutput, th_true_labels)
            dL_dW = T.grad(loss, row_slicing_layer.W)
            fun = theano.function([th_row_idxs, th_true_labels],
                                  updates=[(row_slicing_layer.W, row_slicing_layer.W + dL_dW)])
            fun(row_idxs[:, 0], true_labels[:, 0])
            r.append(np.allclose(qW.to_host(), row_slicing_layer.W.get_value()))

        self.assertEqual(sum(r), len(r))
Ejemplo n.º 9
0
    def test_theano_bprop(self):
        r = []
        for i in xrange(self.N):
            repeats = self.rng.random_integers(42)
            axis = self.rng.randint(2)
            input_dim, output_dim = self.rng.random_integers(2000, size=2)
            x = self.get_normal_matrix(input_dim, output_dim)
            input_dim = input_dim if axis else input_dim * repeats
            true_labels = self.rng.randint(output_dim, size=(input_dim, 1)).astype(np.int32)
            device_id = 0

            quagga.processor_type = 'gpu'
            qx = Connector(Matrix.from_npa(x), device_id)
            qtrue_labels = Connector(Matrix.from_npa(true_labels))
            repeat_block = RepeatBlock(qx, repeats, axis)
            sce_block = SoftmaxCeBlock(repeat_block.output, qtrue_labels)
            qx.fprop()
            qtrue_labels.fprop()
            repeat_block.fprop()
            sce_block.fprop()
            sce_block.bprop()
            repeat_block.bprop()
            q_dL_dx = qx.backward_matrix.to_host()

            th_x = T.fmatrix()
            th_true_labels = T.ivector()
            reps = [1, 1]
            reps[axis] = repeats
            th_output = T.tile(th_x, reps)
            th_output = T.nnet.softmax(th_output)
            loss = T.mean(T.nnet.categorical_crossentropy(th_output, th_true_labels))
            get_grads = theano.function([th_x, th_true_labels], T.grad(loss, th_x))
            th_dL_dx = get_grads(x, true_labels[:, 0])

            r.append(np.allclose(q_dL_dx, th_dL_dx))

        self.assertEqual(sum(r), len(r))
Ejemplo n.º 10
0
    second_dot_block = DotBlock(W=p['second_dot_block_W'],
                                b=p['second_dot_block_b'],
                                x=first_dropout_block.output,
                                device_id=0)
    second_nonl_block = NonlinearityBlock(x=second_dot_block.output,
                                          nonlinearity='relu',
                                          device_id=0)
    second_dropout_block = DropoutBlock(x=second_nonl_block.output,
                                        dropout_prob=0.5,
                                        device_id=0)
    sce_dot_block = DotBlock(W=p['sce_dot_block_W'],
                             b=p['sce_dot_block_b'],
                             x=second_dropout_block.output,
                             device_id=0)
    sce_block = SoftmaxCeBlock(x=sce_dot_block.output,
                               true_labels=data_block.y,
                               device_id=0)
    model = Model([p, data_block, input_data_dropout_block,
                   first_dot_block, first_nonl_block, first_dropout_block,
                   second_dot_block, second_nonl_block, second_dropout_block,
                   sce_dot_block, sce_block])

    logger = get_logger('train.log')
    learning_rate_policy = FixedValuePolicy(0.01)
    momentum_policy = FixedValuePolicy(0.95)
    train_loss_tracker = TrainLossTracker(model, 200, logger)
    valid_tracker = ValidTracker(model, 200, logger)
    loss_tracker = LossForValidTracker(logger)
    valid_tracker.add_observer(loss_tracker)
    saver = Hdf5Saver(p.parameters, 5000, 'mnist_parameters.hdf5', logger)
    nag_step = NagStep(p.parameters.values(), learning_rate_policy, momentum_policy)