Esempio n. 1
0
    def __init__(self, train_data, valid_data, batch_size, word_dropout_prob, device_id):
        self.train_data = HomogeneousDataIterator(train_data, batch_size, randomize=True, infinite=True)
        self.valid_data = HomogeneousDataIterator(valid_data, batch_size)
        self.train_data_iterator = iter(self.train_data)
        self.valid_data_iterator = iter(self.valid_data)
        self.word_keep_prob = 1.0 - word_dropout_prob
        self.rnd = RandomState(47571)
        self.unk_idx = word_to_idx['<UNK>']

        self.context = Context(device_id)
        c = Counter([len(line) for line in chain(train_data, valid_data)])
        print c.most_common()
        max_len = max([len(line) for line in chain(train_data, valid_data)])

        self.enc_x = Connector(Matrix.empty(batch_size, max_len, 'int', device_id))
        self.enc_lengths = Matrix.empty(self.enc_x.nrows, 1, 'int', device_id)
        self._enc_mask = Matrix.empty(self.enc_x.nrows, self.enc_x.ncols, 'float', device_id)
        self.enc_mask = List([Connector(self._enc_mask[:, i]) for i in xrange(max_len)], self.enc_x.ncols)

        self.dec_x = Connector(Matrix.empty(batch_size, max_len + 1, 'int', device_id))
        self._dec_y = Matrix.empty(batch_size, max_len + 1, 'int', device_id)
        self.dec_y = List([Connector(self._dec_y[:, i]) for i in xrange(max_len + 1)], self._dec_y.ncols)
        self.dec_lengths = Matrix.empty(self.dec_x.nrows, 1, 'int', device_id)
        self._dec_mask = Matrix.empty(self.dec_x.nrows, self.dec_x.ncols, 'float', device_id)
        self.dec_mask = List([Connector(self._dec_mask[:, i]) for i in xrange(max_len + 1)], self.dec_x.ncols)

        self.blocking_contexts = None
        self.training_mode = True
    def test_fprop(self):
        """
        compare `fprop` results for cpu and gpu backends
        """
        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(500)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(
                max_input_sequence_len)
            batch_size = self.rng.random_integers(512)
            dim = self.rng.random_integers(1500)
            x = [
                self.rng.rand(batch_size, dim).astype(dtype=np.float32)
                for _ in xrange(max_input_sequence_len)
            ]

            state = self.rng.get_state()
            quagga.processor_type = 'gpu'
            x_gpu = List([Connector(Matrix.from_npa(e)) for e in x])
            smean_pooling_block_gpu = SequentialMeanPoolingBlock(x_gpu)
            x_gpu.set_length(sequence_len)
            smean_pooling_block_gpu.fprop()
            output_gpu = smean_pooling_block_gpu.output.to_host()

            self.rng.set_state(state)
            quagga.processor_type = 'cpu'
            x_cpu = List([Connector(Matrix.from_npa(e)) for e in x])
            smean_pooling_block_cpu = SequentialMeanPoolingBlock(x_cpu)
            x_cpu.set_length(sequence_len)
            smean_pooling_block_cpu.fprop()
            output_cpu = smean_pooling_block_cpu.output.to_host()

            r.append(np.allclose(output_gpu, output_cpu))

        self.assertEqual(sum(r), self.N)
Esempio n. 3
0
 def __init__(self, data, char_to_idx, batch_size, x_device_id,
              y_device_id):
     self.data = HomogeneousDataIterator(data, char_to_idx, batch_size,
                                         True, True)
     self.data_iterator = iter(self.data)
     self.x_context = Context(x_device_id)
     self.y_context = Context(y_device_id)
     max_len = 0
     for sub_line in data:
         cur_len = len(sub_line)
         if cur_len > max_len:
             max_len = cur_len
     print max_len
     self.x = Connector(
         Matrix.empty(batch_size, max_len - 1, 'int', x_device_id))
     self._y = Matrix.empty(batch_size, max_len - 1, 'int', y_device_id)
     self.y = List([Connector(self._y[:, i]) for i in xrange(max_len - 1)],
                   self.x.ncols)
     self.lengths = Matrix.empty(self.x.nrows, 1, 'int', x_device_id)
     self._mask = Matrix.empty(self.x.nrows, self.x.ncols, 'float',
                               x_device_id)
     self.mask = List(
         [Connector(self._mask[:, i]) for i in xrange(max_len)],
         self.x.ncols)
     self.blocking_contexts = None
    def test_bprop(self):
        """
        compare `bprop` results for cpu and gpu backends
        """
        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(500)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(
                max_input_sequence_len)
            batch_size = self.rng.random_integers(512)
            dim = self.rng.random_integers(1500)
            x = [
                self.rng.rand(batch_size, dim).astype(dtype=np.float32)
                for _ in xrange(max_input_sequence_len)
            ]

            state = self.rng.get_state()
            quagga.processor_type = 'gpu'
            context = Context()
            x_gpu = List(
                [Connector(Matrix.from_npa(e), context, context) for e in x])
            smean_pooling_block_gpu = SequentialMeanPoolingBlock(x_gpu)
            x_gpu.set_length(sequence_len)
            _, dL_doutput = smean_pooling_block_gpu.output.register_usage(
                context, context)
            smean_pooling_block_gpu.fprop()
            random_matrix = self.rng.rand(dL_doutput.nrows, dL_doutput.ncols)
            Matrix.from_npa(random_matrix,
                            'float').copy_to(context, dL_doutput)
            smean_pooling_block_gpu.bprop()
            dL_dmatrices_gpu = [e.backward_matrix.to_host() for e in x_gpu]

            self.rng.set_state(state)
            quagga.processor_type = 'cpu'
            context = Context()
            x_cpu = List(
                [Connector(Matrix.from_npa(e), context, context) for e in x])
            smean_pooling_block_cpu = SequentialMeanPoolingBlock(x_cpu)
            x_cpu.set_length(sequence_len)
            _, dL_doutput = smean_pooling_block_cpu.output.register_usage(
                context, context)
            smean_pooling_block_cpu.fprop()
            random_matrix = self.rng.rand(dL_doutput.nrows, dL_doutput.ncols)
            Matrix.from_npa(random_matrix,
                            'float').copy_to(context, dL_doutput)
            smean_pooling_block_cpu.bprop()
            dL_dmatrices_cpu = [e.backward_matrix.to_host() for e in x_cpu]

            for dL_dmatrix_gpu, dL_dmatrix_cpu in izip(dL_dmatrices_gpu,
                                                       dL_dmatrices_cpu):
                if not np.allclose(dL_dmatrix_gpu, dL_dmatrix_cpu):
                    r.append(False)
                    break
            else:
                r.append(True)

        self.assertEqual(sum(r), self.N)
Esempio n. 5
0
    def test_fprop(self):
        """
        compare `fprop` results for cpu and gpu backends
        """
        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(500)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(
                max_input_sequence_len)
            batch_size = self.rng.random_integers(512)
            dim_x, dim_y = self.rng.random_integers(1500, size=2)
            x = [
                self.rng.rand(batch_size, dim_x).astype(dtype=np.float32)
                for _ in xrange(max_input_sequence_len)
            ]
            y = [
                self.rng.rand(batch_size, dim_y).astype(dtype=np.float32)
                for _ in xrange(max_input_sequence_len)
            ]

            state = self.rng.get_state()
            quagga.processor_type = 'gpu'
            x_gpu = List([Connector(Matrix.from_npa(e)) for e in x])
            y_gpu = List([Connector(Matrix.from_npa(e)) for e in y])
            seq_hstack_block_gpu = SequentialHorizontalStackBlock(x_gpu, y_gpu)
            x_gpu.length = sequence_len
            y_gpu.length = sequence_len
            if sequence_len == 0:
                pass
            seq_hstack_block_gpu.fprop()
            output_sequence_gpu = seq_hstack_block_gpu.output.to_host()

            self.rng.set_state(state)
            quagga.processor_type = 'cpu'
            x_cpu = List([Connector(Matrix.from_npa(e)) for e in x])
            y_cpu = List([Connector(Matrix.from_npa(e)) for e in y])
            seq_hstack_block_cpu = SequentialHorizontalStackBlock(x_cpu, y_cpu)
            x_cpu.length = sequence_len
            y_cpu.length = sequence_len
            seq_hstack_block_cpu.fprop()
            output_sequence_cpu = seq_hstack_block_cpu.output.to_host()

            for out_gpu, out_cpu in izip(output_sequence_gpu,
                                         output_sequence_cpu):
                if not np.allclose(out_gpu, out_cpu):
                    r.append(False)
                    break
            else:
                r.append(True)

        self.assertEqual(sum(r), self.N)
Esempio n. 6
0
    def test_fprop(self):
        """
        compare `fprop` results for cpu and gpu backends
        """

        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(500)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(
                max_input_sequence_len)
            batch_size = self.rng.random_integers(256)
            input_dim, hidden_dim = self.rng.random_integers(1500, size=2)
            x = [
                self.rng.randn(batch_size, input_dim).astype(np.float32)
                for _ in xrange(max_input_sequence_len)
            ]
            W = self.get_orthogonal_matrix(input_dim, hidden_dim)
            b = self.rng.rand(1, hidden_dim).astype(np.float32)

            from quagga.cuda import cudart
            cudart.cuda_set_device(1)

            qoutput = {}
            for reverse in [False, True]:
                for with_bias in [False, True]:
                    for processor_type in ['gpu', 'cpu']:
                        quagga.processor_type = processor_type
                        qx = List([Connector(Matrix.from_npa(e)) for e in x])
                        qW = Connector(Matrix.from_npa(W))
                        qb = Connector(
                            Matrix.from_npa(b)) if with_bias else None
                        seq_dot_block = SequencerBlock(block_class=DotBlock,
                                                       params=[qW, qb],
                                                       sequences=[qx],
                                                       output_names=['output'],
                                                       reverse=reverse)
                        qx.length = sequence_len
                        qx.fprop()
                        qW.fprop()
                        if qb:
                            qb.fprop()
                        seq_dot_block.fprop()
                        qoutput[processor_type] = seq_dot_block.output.to_host(
                        )

                    for output_gpu, output_cpu in izip(qoutput['gpu'],
                                                       qoutput['cpu']):
                        if not np.allclose(output_gpu, output_cpu, atol=1e-5):
                            r.append(False)
                            break
                    else:
                        r.append(True)

        self.assertEqual(sum(r), len(r))
Esempio n. 7
0
    def __init__(self, x_sequence, y_sequence, device_id=None):
        """
        TODO
        """
        # TODO add during hsplit otherwise wrong accumulation of gradients
        if all(e.bpropagable for e in chain(x_sequence, y_sequence)):
            learning = True
        elif all(not e.bpropagable for e in chain(x_sequence, y_sequence)):
            learning = False
        else:
            raise ValueError('All elements should be bpropagable or '
                             'non-bpropagable. Mixed state is not allowed!')
        x_ncols = x_sequence[0].ncols
        y_ncols = y_sequence[0].ncols
        dtype = x_sequence[0].dtype
        for x, y in izip(x_sequence, y_sequence):
            if x.ncols != x_ncols or y.ncols != y_ncols:
                raise ValueError(
                    "All matrices in the sequence should have the same number of columns!"
                )
            if x.nrows != y.nrows:
                raise ValueError(
                    "Can't stack matrices in sequence with different number of rows!"
                )
            if x.dtype != dtype or y.dtype != dtype:
                raise ValueError("Can't stack matrices with different dtypes!")

        self.context = Context(device_id)
        device_id = self.context.device_id
        if learning:
            self.x_sequence, self.dL_dx_sequences = izip(
                *x_sequence.register_usage(device_id, device_id))
            self.y_sequence, self.dL_dy_sequences = izip(
                *y_sequence.register_usage(device_id, device_id))
            self.dL_dx_sequences = List(self.dL_dx_sequences,
                                        x_sequence.length)
            self.dL_dy_sequences = List(self.dL_dy_sequences,
                                        y_sequence.length)
        else:
            self.x_sequence = x_sequence.register_usage(device_id)
            self.y_sequence = y_sequence.register_usage(device_id)
        self.x_sequence = List(self.x_sequence, x_sequence.length)
        self.y_sequence = List(self.y_sequence, y_sequence.length)
        output = []
        for _ in xrange(x_sequence.length):
            matrix = Matrix.empty(x_sequence[0].nrows, x_ncols + y_ncols,
                                  dtype, device_id)
            output.append(Connector(matrix, device_id))
        self.output = List(output, x_sequence.length)
        if learning:
            self.dL_dx_sequences = List(self.dL_dx_sequences,
                                        x_sequence.length)
            self.dL_dy_sequences = List(self.dL_dy_sequences,
                                        x_sequence.length)
Esempio n. 8
0
    def test_theano_fprop(self):
        quagga.processor_type = 'gpu'
        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(500)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(
                max_input_sequence_len)
            batch_size = self.rng.random_integers(256)
            input_dim, hidden_dim = self.rng.random_integers(1500, size=2)
            x = [
                self.rng.randn(batch_size, input_dim).astype(np.float32)
                for _ in xrange(max_input_sequence_len)
            ]
            W = self.get_orthogonal_matrix(input_dim, hidden_dim)
            b = self.rng.rand(1, hidden_dim).astype(np.float32)

            for reverse in [False, True]:
                for with_bias in [False, True]:
                    qx = List([Connector(Matrix.from_npa(e)) for e in x])
                    qW = Connector(Matrix.from_npa(W))
                    qb = Connector(Matrix.from_npa(b)) if with_bias else None
                    seq_dot_block = SequencerBlock(block_class=DotBlock,
                                                   params=[qW, qb],
                                                   sequences=[qx],
                                                   output_names=['output'],
                                                   reverse=reverse)
                    qx.length = sequence_len
                    qx.fprop()
                    qW.fprop()
                    if qb:
                        qb.fprop()
                    seq_dot_block.fprop()
                    qoutput = seq_dot_block.output.to_host()

                    seq_dot_layer = SequentialDotLayer(
                        W, b if with_bias else None, reverse)
                    th_x = T.ftensor3()
                    get_th_output = theano.function(
                        [th_x], seq_dot_layer.get_output_expr(th_x))
                    th_output = get_th_output(np.dstack(x[:sequence_len]))

                    for i in xrange(th_output.shape[0]):
                        if not np.allclose(qoutput[i], th_output[i]):
                            r.append(False)
                            break
                    else:
                        r.append(True)

        self.assertEqual(sum(r), len(r))
Esempio n. 9
0
    def test_theano_bprop_matrix(self):
        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(300)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(2, max_input_sequence_len)
            embd_dim = self.rng.random_integers(10000)
            batch_size = self.rng.random_integers(500)
            output_dim = self.rng.random_integers(2000)
            W = self.get_orthogonal_matrix(embd_dim, output_dim)
            row_idxs = self.rng.randint(embd_dim, size=(batch_size, max_input_sequence_len)).astype(np.int32)
            true_labels = [self.rng.randint(output_dim, size=(batch_size, 1)).astype(np.int32) for _ in xrange(max_input_sequence_len)]
            device_id = 0

            quagga.processor_type = 'gpu'
            qrow_idxs = Connector(Matrix.from_npa(row_idxs))
            qtrue_labels = List([Connector(Matrix.from_npa(e)) for e in true_labels], qrow_idxs.ncols)
            qW = Connector(Matrix.from_npa(W), device_id)
            row_slicing_block = RowSlicingBlock(qW, qrow_idxs)
            seq_sce_block = SequencerBlock(block_class=SoftmaxCeBlock,
                                           params=[],
                                           sequences=[row_slicing_block.output, qtrue_labels])
            qW.fprop()
            qrow_idxs.ncols = sequence_len
            qrow_idxs.fprop()
            row_slicing_block.fprop()
            seq_sce_block.fprop()
            seq_sce_block.bprop()
            row_slicing_block.bprop()
            qW.add(Context(), qW.backward_matrix)

            th_row_idxs = T.imatrix()
            th_true_labels = T.imatrix()
            row_slicing_layer = RowSlicingLayer(W)
            toutput = row_slicing_layer.get_output_expr(th_row_idxs)
            loss = SequentialSoftmaxLayer.get_loss(toutput, th_true_labels)
            dL_dW = T.grad(loss, row_slicing_layer.W)
            fun = theano.function([th_row_idxs, th_true_labels],
                                  updates=[(row_slicing_layer.W, row_slicing_layer.W + dL_dW)])
            fun(row_idxs, np.hstack(true_labels[:sequence_len]))

            r.append(np.allclose(qW.to_host(), row_slicing_layer.W.get_value(), atol=1e-5))

        self.assertEqual(sum(r), len(r))
Esempio n. 10
0
    def __init__(self, block_class, params, sequences, output_names=None, prev_names=None, paddings=None, reverse=False, device_id=None):
        context = Context(device_id)
        device_id = context.device_id
        self.reverse = reverse
        self.prev_names = prev_names
        if prev_names and reverse:
            self.temp_prev = []
            self.dL_dtemp_prev = []
            self.k = None
        self._length = sequences[0]._length
        self.blocks = []
        output_names = output_names if output_names else []
        outputs = [[] for _ in output_names]
        for k in xrange(self._length):
            k = self._length.value - 1 - k if reverse else k
            args = params + [s[k] for s in sequences]
            if prev_names:
                if k == (self._length.value - 1 if reverse else 0):
                    prevs = paddings
                else:
                    prev_block = self.blocks[-1]
                    prevs = [getattr(prev_block, name) for name in prev_names]
                args += prevs
            try:
                self.blocks.append(block_class(*args, device_id=device_id))
            except TypeError:
                self.blocks.append(block_class(*args))
            for i, output_name in enumerate(output_names):
                outputs[i].append(getattr(self.blocks[-1], output_name))
        for output_name, output in izip(output_names, outputs):
            output = output[::-1] if reverse else output
            output = List(output, self._length)
            setattr(self, output_name, output)

        if hasattr(self.blocks[0], 'calculate_loss') and hasattr(self.blocks[0], 'loss'):
            def calculate_loss(context):
                context.wait(*[self.blocks[i].context for i in xrange(self._length)])
                for i in xrange(self._length):
                    self.blocks[i].calculate_loss(context)
            self.calculate_loss = calculate_loss
            self.context = context
            SequencerBlock.loss = property(lambda self: [self.blocks[i].loss for i in xrange(self._length)])
Esempio n. 11
0
    def __init__(self, ptb_train, ptb_valid, batch_size, sentence_max_len,
                 device_id):
        self.blocking_contexts = None
        self.context = Context(device_id)
        device_id = self.context.device_id
        self.train_offsets = HomogeneousDataGenerator(ptb_train,
                                                      batch_size,
                                                      sentence_max_len,
                                                      randomize=True,
                                                      infinite=True)
        self.valid_offsets = HomogeneousDataGenerator(ptb_valid, batch_size,
                                                      sentence_max_len)

        train_sentences = np.array([self.train_offsets.flatten_sentences])
        valid_sentences = np.array([self.valid_offsets.flatten_sentences])
        self.train_sents = Matrix.from_npa(train_sentences, 'int', device_id)
        self.valid_sents = Matrix.from_npa(valid_sentences, 'int', device_id)
        self._sent_lengths = np.empty((batch_size, 1),
                                      dtype=np.int32,
                                      order='F')[...]
        self.sent_lengths = Matrix.from_npa(self._sent_lengths,
                                            device_id=device_id)

        sentence_batch = Matrix.empty(batch_size, sentence_max_len, 'int',
                                      device_id)
        self.sentence_batch = Connector(sentence_batch, self.context)
        self.sentence_batch.sync_fill(0)

        self._mask = Matrix.empty(sentence_batch.nrows,
                                  self.sentence_batch.ncols, 'float',
                                  device_id)
        self.mask = List(
            [Connector(self._mask[:, i]) for i in xrange(sentence_max_len)],
            self.sentence_batch.ncols)
        self.train_offsets_iterator = iter(self.train_offsets)
        self.valid_offsets_iterator = iter(self.valid_offsets)
        self.training_mode = True
Esempio n. 12
0
    def test_bprop_matrix(self):
        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(500)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(max_input_sequence_len)
            embd_dim = self.rng.random_integers(10000)
            batch_size = self.rng.random_integers(500)
            output_dim = self.rng.random_integers(2000)
            W = self.get_orthogonal_matrix(embd_dim, output_dim)
            row_idxs = self.rng.randint(embd_dim, size=(batch_size, max_input_sequence_len)).astype(np.int32)
            true_labels = [self.rng.randint(output_dim, size=(batch_size, 1)).astype(np.int32) for _ in xrange(max_input_sequence_len)]
            device_id = 0

            output = {}
            for processor_type in ['gpu', 'cpu']:
                quagga.processor_type = processor_type
                qrow_idxs = Connector(Matrix.from_npa(row_idxs))
                qtrue_labels = List([Connector(Matrix.from_npa(e)) for e in true_labels], qrow_idxs.ncols)
                qW = Connector(Matrix.from_npa(W), device_id)
                row_slicing_block = RowSlicingBlock(qW, qrow_idxs)
                seq_sce_block = SequencerBlock(block_class=SoftmaxCeBlock,
                                               params=[],
                                               sequences=[row_slicing_block.output, qtrue_labels])
                qW.fprop()
                qrow_idxs.ncols = sequence_len
                qrow_idxs.fprop()
                row_slicing_block.fprop()
                seq_sce_block.fprop()
                seq_sce_block.bprop()
                row_slicing_block.bprop()
                qW.add(Context(), qW.backward_matrix)
                output[processor_type] = qW.to_host()

            r.append(np.allclose(output['gpu'], output['cpu']))

        self.assertEqual(sum(r), len(r))
Esempio n. 13
0
    def test_theano_grad(self):
        quagga.processor_type = 'gpu'
        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(300)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(
                max_input_sequence_len)
            batch_size = self.rng.random_integers(128)
            input_dim, hidden_dim, class_num = self.rng.random_integers(1500,
                                                                        size=3)

            x = [
                self.rng.randn(batch_size, input_dim).astype(np.float32)
                for _ in xrange(max_input_sequence_len)
            ]
            true_labels = [
                self.rng.randint(class_num,
                                 size=(batch_size, 1)).astype(np.int32)
                for _ in xrange(max_input_sequence_len)
            ]
            mask = (self.rng.rand(batch_size, sequence_len) < 0.8).astype(
                np.float32)
            h_0 = self.rng.randn(batch_size, hidden_dim).astype(np.float32)
            c_0 = self.rng.randn(batch_size, hidden_dim).astype(np.float32)
            W_z = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W_i = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W_f = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W_o = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W = np.hstack((W_z, W_i, W_f, W_o))
            R_z = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R_i = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R_f = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R_o = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R = np.hstack((R_z, R_i, R_f, R_o))
            lr_W = self.get_orthogonal_matrix(hidden_dim, class_num)
            lr_b = self.rng.rand(1, class_num).astype(dtype=np.float32)
            device_id = 0

            for reverse in [False, True]:
                for with_mask in [False, True]:
                    for learn_inital_states in [False, True]:
                        # quagga model
                        context = Context()
                        qx = List([
                            Connector(Matrix.from_npa(e), device_id) for e in x
                        ])
                        qtrue_labels = List([
                            Connector(Matrix.from_npa(e)) for e in true_labels
                        ], qx.length)
                        qmask = Matrix.empty(batch_size, qx.length, 'float')
                        qmask_list = [
                            Connector(qmask[:, i]) for i in xrange(qmask.ncols)
                        ]
                        qmask = Connector(qmask)
                        qh_0 = Connector(
                            Matrix.from_npa(h_0),
                            device_id if learn_inital_states else None)
                        qc_0 = Connector(
                            Matrix.from_npa(c_0),
                            device_id if learn_inital_states else None)
                        qW = Connector(Matrix.from_npa(W), device_id)
                        qR = Connector(Matrix.from_npa(R), device_id)
                        qlr_W = Connector(Matrix.from_npa(lr_W), device_id)
                        qlr_b = Connector(Matrix.from_npa(lr_b), device_id)
                        lstm = SequencerBlock(
                            block_class=LstmBlock,
                            params=[qW, qR],
                            sequences=[
                                qx,
                                qmask_list if with_mask else [None] * len(qx)
                            ],
                            output_names=['h'],
                            prev_names=['c', 'h'],
                            paddings=[qc_0, qh_0],
                            reverse=reverse)
                        seq_dot_block = SequencerBlock(block_class=DotBlock,
                                                       params=[qlr_W, qlr_b],
                                                       sequences=[lstm.h],
                                                       output_names=['output'])
                        seq_sce_block = SequencerBlock(
                            block_class=SoftmaxCeBlock,
                            params=[],
                            sequences=[
                                seq_dot_block.output, qtrue_labels,
                                qmask_list if with_mask else [None] * len(qx)
                            ])
                        qx.length = sequence_len
                        for e in qx:
                            e.fprop()
                        for e in qtrue_labels:
                            e.fprop()
                        qmask.assign_npa(context, mask)
                        qmask.fprop()
                        qlr_W.fprop()
                        qlr_b.fprop()
                        qh_0.fprop()
                        qc_0.fprop()
                        qW.fprop()
                        qR.fprop()
                        lstm.fprop()
                        seq_dot_block.fprop()
                        seq_sce_block.fprop()
                        seq_sce_block.bprop()
                        seq_dot_block.bprop()
                        lstm.bprop()
                        quagga_grads = [
                            qlr_b.backward_matrix.to_host(),
                            qlr_W.backward_matrix.to_host(),
                            qW.backward_matrix.to_host(),
                            qR.backward_matrix.to_host()
                        ]
                        if learn_inital_states:
                            quagga_grads.append(qc_0.backward_matrix.to_host())
                            quagga_grads.append(qh_0.backward_matrix.to_host())
                        quagga_grads.append(
                            [e.backward_matrix.to_host() for e in qx])
                        del qx
                        del qlr_b
                        del qlr_W
                        del qW
                        del qR
                        del qmask
                        del lstm
                        del seq_dot_block
                        del seq_sce_block

                        # theano model
                        th_x = T.ftensor3()
                        th_true_labels = T.imatrix()
                        th_mask = T.fmatrix()
                        lstm_layer = LstmLayer(W, R, c_0, h_0, reverse=reverse)
                        th_h = lstm_layer.get_output_expr(
                            th_x, th_mask if with_mask else None)
                        seq_softmax_layer = SequentialSoftmaxLayer(
                            lr_W, lr_b, reverse)
                        loss = seq_softmax_layer.get_loss(
                            th_h, th_true_labels,
                            th_mask if with_mask else None)
                        wrt = [
                            seq_softmax_layer.b, seq_softmax_layer.W,
                            lstm_layer.W, lstm_layer.R
                        ]
                        if learn_inital_states:
                            wrt.append(lstm_layer.c0)
                            wrt.append(lstm_layer.h0)
                        wrt.append(th_x)
                        grads = T.grad(loss, wrt)
                        if with_mask:
                            get_theano_grads = theano.function(
                                [th_x, th_true_labels, th_mask], grads)
                            theano_grads = get_theano_grads(
                                np.dstack(x[:sequence_len]),
                                np.hstack(true_labels[:sequence_len]),
                                mask[:, :sequence_len])
                        else:
                            get_theano_grads = theano.function(
                                [th_x, th_true_labels], grads)
                            theano_grads = get_theano_grads(
                                np.dstack(x[:sequence_len]),
                                np.hstack(true_labels[:sequence_len]))

                        for quagga_grad, theano_grad in izip(
                                quagga_grads[:-1], theano_grads[:-1]):
                            r.append(
                                np.allclose(quagga_grad,
                                            theano_grad,
                                            atol=1e-6))
                        for i in xrange(theano_grads[-1].shape[-1]):
                            if not np.allclose(quagga_grads[-1][i],
                                               theano_grads[-1][..., i],
                                               atol=1e-6):
                                r.append(False)
                                break
                        else:
                            r.append(True)

        self.assertEqual(sum(r), len(r))
Esempio n. 14
0
     sce_dot_block_W={
         'init': Orthogonal(1024, len(vocab)),
         'device_id': 0
     },
     sce_dot_block_b={
         'init': Constant(1, len(vocab)),
         'device_id': 0
     })
 data_block = PtbMiniBatchesGenerator(ptb_train,
                                      ptb_valid,
                                      batch_size=64,
                                      sentence_max_len=100,
                                      device_id=0)
 seq_embd_block = RowSlicingBlock(p['embd_W'], data_block.sentence_batch)
 # remove last in the list
 output = List(seq_embd_block.output[:-1], seq_embd_block.output.length - 1)
 c_fwd_repeat_block = RepeatBlock(p['lstm_fwd_c0'],
                                  data_block.sentence_batch.nrows,
                                  axis=0,
                                  device_id=0)
 h_fwd_repeat_block = RepeatBlock(p['lstm_fwd_h0'],
                                  data_block.sentence_batch.nrows,
                                  axis=0,
                                  device_id=0)
 fwd_lstm_block = SequencerBlock(
     block_class=LstmBlock,
     params=[p['lstm_fwd_W'], p['lstm_fwd_R'], 0.5],
     sequences=[output, data_block.mask],
     output_names=['h'],
     prev_names=['c', 'h'],
     paddings=[c_fwd_repeat_block.output, h_fwd_repeat_block.output],
    def test_theano_grad(self):
        class SequentialMeanPoolingLayer(object):
            def get_output_expr(self, input_sequence):
                return T.mean(input_sequence, axis=2)

        class LogisticRegressionLayer(object):
            def __init__(self, W_init, b_init):
                self.W = theano.shared(value=W_init())
                self.b = theano.shared(value=b_init())

            def get_output_expr(self, input_expr):
                return T.nnet.sigmoid(T.dot(input_expr, self.W) + self.b)

        quagga.processor_type = 'gpu'
        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(500)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(
                max_input_sequence_len)
            batch_size = self.rng.random_integers(512)
            dim = self.rng.random_integers(1500)
            x = [
                self.rng.rand(batch_size, dim).astype(dtype=np.float32)
                for _ in xrange(max_input_sequence_len)
            ]
            true_labels = self.rng.randint(1,
                                           size=(batch_size,
                                                 1)).astype(dtype=np.float32)

            W_init = self.get_orthogonal_initializer(dim, 1)
            b_init = lambda: self.rng.rand(1, 1).astype(dtype=np.float32)

            # Theano model
            state = self.rng.get_state()
            th_x = T.ftensor3()
            th_true_labels = T.fmatrix()
            smp_layer = SequentialMeanPoolingLayer()
            lr_layer = LogisticRegressionLayer(W_init, lambda: b_init()[0])
            probs = lr_layer.get_output_expr(smp_layer.get_output_expr(th_x))
            loss = T.mean(T.nnet.binary_crossentropy(probs, th_true_labels))
            grad_x = T.grad(loss, wrt=th_x)
            get_grad_x = theano.function([th_x, th_true_labels], grad_x)

            # quagga model
            self.rng.set_state(state)
            context = Context()
            x = List(
                [Connector(Matrix.from_npa(e), context, context) for e in x])
            true_labels = Connector(Matrix.from_npa(true_labels))
            smp_block = SequentialMeanPoolingBlock(x)
            dot_block = DotBlock(W_init, b_init, smp_block.output)
            sce_block = SigmoidCeBlock(dot_block.output, true_labels)
            x.set_length(sequence_len)
            smp_block.fprop()
            dot_block.fprop()
            sce_block.fprop()
            sce_block.bprop()
            dot_block.bprop()
            smp_block.bprop()

            dL_dx = [e.backward_matrix.to_host() for e in x]
            dL_dx_th = get_grad_x(np.dstack([e.to_host() for e in x]),
                                  true_labels.to_host())
            for i in xrange(dL_dx_th.shape[-1]):
                if not np.allclose(dL_dx[i], dL_dx_th[..., i]):
                    r.append(False)
                    break
            else:
                r.append(True)

        self.assertEqual(sum(r), self.N)
Esempio n. 16
0
    def test_theano_grad(self):
        class AttentionLayer(object):
            def __init__(self, u, mask=None):
                self.u = theano.shared(value=u)
                self.mask = mask

            def get_output_expr(self, input_expr):
                input_expr = input_expr.dimshuffle(0, 2, 1)
                pre_a = T.dot(input_expr, self.u)[:, :, 0]
                if self.mask:
                    pre_a = self.mask * pre_a - \
                            (1 - self.mask) * 3.402823466e+38
                a = T.nnet.softmax(pre_a)[:, :, np.newaxis]
                return T.sum(a * input_expr, axis=1)

        class LogisticRegressionLayer(object):
            def __init__(self, W, b):
                self.W = theano.shared(value=W)
                if b is not None:
                    self.b = theano.shared(value=b[0])

            def get_output_expr(self, input_expr):
                if hasattr(self, 'b'):
                    return T.nnet.sigmoid(T.dot(input_expr, self.W) + self.b)
                else:
                    return T.nnet.sigmoid(T.dot(input_expr, self.W))

        r = []
        for i in xrange(self.N):
            batch_size = self.rng.random_integers(500)
            x_dim = self.rng.random_integers(3000)
            n_ts = self.rng.random_integers(100)
            x = [
                self.rng.rand(batch_size, x_dim).astype(np.float32)
                for _ in xrange(n_ts)
            ]
            u = self.get_orthogonal_matrix(x_dim, 1)
            lr_dot_W = self.get_orthogonal_matrix(x_dim, 1)
            lr_dot_b = self.rng.rand(1, 1).astype(
                np.float32) if self.rng.randint(2) else None
            true_labels = self.rng.randint(2, size=(batch_size,
                                                    1)).astype(np.float32)
            mask = self.rng.randint(2, size=(batch_size, n_ts)).astype(
                np.float32) if self.rng.randint(2) else None
            device_id = 0

            # Theano model
            state = self.rng.get_state()
            th_x = T.ftensor3()
            th_mask = T.fmatrix() if mask is not None else None

            th_true_labels = T.fmatrix()
            attnt_layer = AttentionLayer(u, th_mask)
            lr_layer = LogisticRegressionLayer(lr_dot_W, lr_dot_b)
            probs = th_x
            for layer in [attnt_layer, lr_layer]:
                probs = layer.get_output_expr(probs)
            loss = T.mean(T.nnet.binary_crossentropy(probs, th_true_labels))

            params = [lr_layer.W, attnt_layer.u, th_x]
            if hasattr(lr_layer, 'b'):
                params.append(lr_layer.b)
            th_grads = T.grad(loss, wrt=params)
            get_theano_grads = theano.function(
                [th_x, th_true_labels] +
                ([th_mask] if mask is not None else []), th_grads)
            th_grads = get_theano_grads(
                *([np.dstack(x), true_labels] +
                  ([mask] if mask is not None else [])))

            # quagga model
            self.rng.set_state(state)
            x = List([Connector(Matrix.from_npa(e), device_id) for e in x])
            u = Connector(Matrix.from_npa(u), device_id)
            lr_dot_W = Connector(Matrix.from_npa(lr_dot_W), device_id)
            lr_dot_b = Connector(
                Matrix.from_npa(lr_dot_b),
                device_id) if lr_dot_b is not None else lr_dot_b
            true_labels = Connector(Matrix.from_npa(true_labels))
            if mask is not None:
                mask = Connector(Matrix.from_npa(mask))

            attnt_block = AttentionBlock(x, u, mask)
            lrdot_block = DotBlock(lr_dot_W, lr_dot_b, attnt_block.output)
            sce_block = SigmoidCeBlock(lrdot_block.output, true_labels)

            x.fprop()
            true_labels.fprop()
            u.fprop()
            lr_dot_W.fprop()
            if lr_dot_b:
                lr_dot_b.fprop()
            attnt_block.fprop()
            lrdot_block.fprop()
            sce_block.fprop()
            sce_block.bprop()
            lrdot_block.bprop()
            attnt_block.bprop()
            q_grads = [
                lr_dot_W.backward_matrix.to_host(),
                u.backward_matrix.to_host(),
                np.dstack([e.backward_matrix.to_host() for e in x])
            ]
            if lr_dot_b:
                q_grads.append(lr_dot_b.backward_matrix.to_host())

            for th_grad, q_grad in izip(th_grads, q_grads):
                r.append(np.allclose(th_grad, q_grad, atol=1.e-7))
                print r[-1]

        self.assertEqual(sum(r), len(r))
Esempio n. 17
0
    def test_theano_bprop(self):
        quagga.processor_type = 'gpu'
        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(500)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(
                max_input_sequence_len)
            batch_size = self.rng.random_integers(256)
            input_dim, hidden_dim = self.rng.random_integers(1500, size=2)
            x = [
                self.rng.randn(batch_size, input_dim).astype(np.float32)
                for _ in xrange(max_input_sequence_len)
            ]
            true_labels = [
                self.rng.randint(hidden_dim,
                                 size=(batch_size, 1)).astype(np.int32)
                for _ in xrange(max_input_sequence_len)
            ]
            W = self.get_orthogonal_matrix(input_dim, hidden_dim)
            b = self.rng.rand(1, hidden_dim).astype(np.float32)
            device_id = 0

            for reverse in [False, True]:
                for with_bias in [False, True]:
                    qx = List(
                        [Connector(Matrix.from_npa(e), device_id) for e in x])
                    qtrue_labels = List(
                        [Connector(Matrix.from_npa(e)) for e in true_labels],
                        len(qx))
                    qW = Connector(Matrix.from_npa(W), device_id)
                    qb = Connector(Matrix.from_npa(b),
                                   device_id) if with_bias else None
                    seq_dot_block = SequencerBlock(block_class=DotBlock,
                                                   params=[qW, qb],
                                                   sequences=[qx],
                                                   output_names=['output'],
                                                   reverse=reverse)
                    seq_sce_block = SequencerBlock(
                        block_class=SoftmaxCeBlock,
                        params=[],
                        sequences=[seq_dot_block.output, qtrue_labels],
                        reverse=reverse)
                    qx.length = sequence_len
                    qx.fprop()
                    qtrue_labels.fprop()
                    qW.fprop()
                    if qb:
                        qb.fprop()
                    seq_dot_block.fprop()
                    seq_sce_block.fprop()
                    seq_sce_block.bprop()
                    seq_dot_block.bprop()
                    quagga_grads = [qW.backward_matrix.to_host()]
                    if with_bias:
                        quagga_grads.append(qb.backward_matrix.to_host())
                    quagga_grads.append(
                        [e.backward_matrix.to_host() for e in qx])

                    seq_dot_layer = SequentialDotLayer(
                        W, b if with_bias else None, reverse)
                    seq_sce_layer = SequentialSoftmaxLayer()
                    th_x = T.ftensor3()
                    th_true_labels = T.imatrix()
                    loss = seq_sce_layer.get_loss(
                        seq_dot_layer.get_output_expr(th_x), th_true_labels)
                    wrt = [seq_dot_layer.W]
                    if with_bias:
                        wrt.append(seq_dot_layer.b)
                    wrt.append(th_x)
                    grads = T.grad(loss, wrt)
                    get_theano_grads = theano.function([th_x, th_true_labels],
                                                       grads)
                    theano_grads = get_theano_grads(
                        np.dstack(x[:sequence_len]),
                        np.hstack(true_labels[:sequence_len]))

                    for quagga_grad, theano_grad in izip(
                            quagga_grads[:-1], theano_grads[:-1]):
                        r.append(
                            np.allclose(quagga_grad, theano_grad, atol=1e-5))
                    for i in xrange(theano_grads[-1].shape[-1]):
                        if not np.allclose(quagga_grads[-1][i],
                                           theano_grads[-1][..., i],
                                           atol=1e-5):
                            r.append(False)
                            break
                    else:
                        r.append(True)

        self.assertEqual(sum(r), len(r))
Esempio n. 18
0
    def test_bprop(self):
        """
        compare `bprop` results for cpu and gpu backends
        """

        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(500)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(
                max_input_sequence_len)
            batch_size = self.rng.random_integers(256)
            input_dim, hidden_dim = self.rng.random_integers(1500, size=2)

            x = [
                self.rng.randn(batch_size, input_dim).astype(np.float32)
                for _ in xrange(max_input_sequence_len)
            ]
            true_labels = [
                self.rng.randint(2, size=(batch_size, 1)).astype(np.float32)
                for _ in xrange(max_input_sequence_len)
            ]
            mask = (self.rng.rand(batch_size, sequence_len) < 0.8).astype(
                np.float32)
            h_0 = self.rng.randn(batch_size, hidden_dim).astype(np.float32)
            c_0 = self.rng.randn(batch_size, hidden_dim).astype(np.float32)
            W_z = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W_i = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W_f = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W_o = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W = np.hstack((W_z, W_i, W_f, W_o))
            R_z = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R_i = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R_f = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R_o = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R = np.hstack((R_z, R_i, R_f, R_o))
            lr_W = self.get_orthogonal_matrix(hidden_dim, 1)
            lr_b = self.rng.rand(1, 1).astype(dtype=np.float32)
            device_id = 0

            quagga_grads = {}
            for reverse in [False, True]:
                for with_mask in [False, True]:
                    for learn_inital_states in [False, True]:
                        for processor_type in ['gpu', 'cpu']:
                            quagga.processor_type = processor_type
                            context = Context()
                            qx = List([
                                Connector(Matrix.from_npa(e), device_id)
                                for e in x
                            ])
                            qtrue_labels = List([
                                Connector(Matrix.from_npa(e))
                                for e in true_labels
                            ], len(qx))
                            qmask = Matrix.empty(batch_size, len(qx))
                            qh_0 = Connector(
                                Matrix.from_npa(h_0),
                                device_id if learn_inital_states else None)
                            qc_0 = Connector(
                                Matrix.from_npa(c_0),
                                device_id if learn_inital_states else None)
                            qW = Connector(Matrix.from_npa(W), device_id)
                            qR = Connector(Matrix.from_npa(R), device_id)
                            qlr_W = Connector(Matrix.from_npa(lr_W), device_id)
                            qlr_b = Connector(Matrix.from_npa(lr_b), device_id)
                            sequences = [qx]
                            if with_mask:
                                sequences.append(
                                    List([
                                        Connector(qmask[:, i])
                                        for i in xrange(len(qx))
                                    ], len(qx)))
                                qmask.assign_npa(context, mask)
                                qmask = sequences[-1]
                            else:
                                sequences.append([None] * len(qx))
                            lstm = SequencerBlock(block_class=LstmBlock,
                                                  params=[qW, qR],
                                                  sequences=sequences,
                                                  output_names=['h'],
                                                  prev_names=['c', 'h'],
                                                  paddings=[qc_0, qh_0],
                                                  reverse=reverse)
                            seq_dot_block = SequencerBlock(
                                block_class=DotBlock,
                                params=[qlr_W, qlr_b],
                                sequences=[lstm.h],
                                output_names=['output'])
                            seq_sce_block = SequencerBlock(
                                block_class=SigmoidCeBlock,
                                params=[],
                                sequences=[seq_dot_block.output, qtrue_labels
                                           ] + ([qmask] if with_mask else []))
                            qx.length = sequence_len
                            qx.fprop()
                            qtrue_labels.fprop()
                            if with_mask:
                                qmask.fprop()
                            qlr_W.fprop()
                            qlr_b.fprop()
                            qh_0.fprop()
                            qc_0.fprop()
                            qW.fprop()
                            qR.fprop()
                            lstm.fprop()
                            seq_dot_block.fprop()
                            seq_sce_block.fprop()
                            seq_sce_block.bprop()
                            seq_dot_block.bprop()
                            lstm.bprop()
                            quagga_grads[processor_type] = [
                                qlr_b.backward_matrix.to_host(),
                                qlr_W.backward_matrix.to_host(),
                                qW.backward_matrix.to_host(),
                                qR.backward_matrix.to_host()
                            ]
                            if learn_inital_states:
                                quagga_grads[processor_type].append(
                                    qc_0.backward_matrix.to_host())
                                quagga_grads[processor_type].append(
                                    qh_0.backward_matrix.to_host())
                            quagga_grads[processor_type].extend(
                                e.backward_matrix.to_host() for e in qx)

                        for grad_gpu, grad_cpu in izip(quagga_grads['gpu'],
                                                       quagga_grads['cpu']):
                            r.append(np.allclose(grad_gpu, grad_cpu,
                                                 atol=1e-6))

        self.assertEqual(sum(r), len(r))
Esempio n. 19
0
    def test_bprop(self):
        """
        compare `fprop` results for cpu and gpu backends
        """

        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(500)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(
                max_input_sequence_len)
            batch_size = self.rng.random_integers(256)
            input_dim, hidden_dim = self.rng.random_integers(1500, size=2)
            x = [
                self.rng.randn(batch_size, input_dim).astype(np.float32)
                for _ in xrange(max_input_sequence_len)
            ]
            true_labels = [
                self.rng.randint(hidden_dim,
                                 size=(batch_size, 1)).astype(np.int32)
                for _ in xrange(max_input_sequence_len)
            ]
            W = self.get_orthogonal_matrix(input_dim, hidden_dim)
            b = self.rng.rand(1, hidden_dim).astype(np.float32)
            device_id = 0

            quagga_grads = {}
            for reverse in [False, True]:
                for with_bias in [False, True]:
                    for processor_type in ['gpu', 'cpu']:
                        quagga.processor_type = processor_type
                        qx = List([
                            Connector(Matrix.from_npa(e), device_id) for e in x
                        ])
                        qtrue_labels = List([
                            Connector(Matrix.from_npa(e)) for e in true_labels
                        ], len(qx))
                        qW = Connector(Matrix.from_npa(W), device_id)
                        qb = Connector(Matrix.from_npa(b),
                                       device_id) if with_bias else None
                        seq_dot_block = SequencerBlock(block_class=DotBlock,
                                                       params=[qW, qb],
                                                       sequences=[qx],
                                                       output_names=['output'],
                                                       reverse=reverse)
                        seq_sce_block = SequencerBlock(
                            block_class=SoftmaxCeBlock,
                            params=[],
                            sequences=[seq_dot_block.output, qtrue_labels],
                            reverse=reverse)
                        qx.length = sequence_len
                        qx.fprop()
                        qtrue_labels.fprop()
                        qW.fprop()
                        if qb:
                            qb.fprop()
                        seq_dot_block.fprop()
                        seq_sce_block.fprop()
                        seq_sce_block.bprop()
                        seq_dot_block.bprop()
                        quagga_grads[processor_type] = [
                            qW.backward_matrix.to_host()
                        ]
                        if with_bias:
                            quagga_grads[processor_type].append(
                                qb.backward_matrix.to_host())
                        quagga_grads[processor_type].extend(
                            e.backward_matrix.to_host() for e in qx)

                    for grad_gpu, grad_cpu in izip(quagga_grads['gpu'],
                                                   quagga_grads['cpu']):
                        r.append(np.allclose(grad_gpu, grad_cpu, atol=1e-5))

        self.assertEqual(sum(r), len(r))
Esempio n. 20
0
    def test_theano_fprop(self):
        quagga.processor_type = 'gpu'
        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(500)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(
                max_input_sequence_len)
            batch_size = self.rng.random_integers(256)
            input_dim, hidden_dim = self.rng.random_integers(1500, size=2)
            x = [
                self.rng.randn(batch_size, input_dim).astype(np.float32)
                for _ in xrange(max_input_sequence_len)
            ]
            mask = (self.rng.rand(batch_size, sequence_len) < 0.8).astype(
                np.float32)
            h_0 = self.rng.randn(batch_size, hidden_dim).astype(np.float32)
            c_0 = self.rng.randn(batch_size, hidden_dim).astype(np.float32)
            W_z = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W_i = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W_f = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W_o = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W = np.hstack((W_z, W_i, W_f, W_o))
            R_z = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R_i = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R_f = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R_o = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R = np.hstack((R_z, R_i, R_f, R_o))

            for reverse in [False, True]:
                for with_mask in [False, True]:
                    context = Context()
                    qx = List([Connector(Matrix.from_npa(e)) for e in x])
                    qmask = Connector(
                        Matrix.empty(batch_size, len(qx), 'float'))
                    qh_0 = Connector(Matrix.from_npa(h_0))
                    qc_0 = Connector(Matrix.from_npa(c_0))
                    qW = Connector(Matrix.from_npa(W))
                    qR = Connector(Matrix.from_npa(R))
                    lstm = SequencerBlock(block_class=LstmBlock,
                                          params=[qW, qR],
                                          sequences=[qx] +
                                          ([qmask] if with_mask else []),
                                          output_names=['h'],
                                          prev_names=['c', 'h'],
                                          paddings=[qc_0, qh_0],
                                          reverse=reverse)

                    qx.length = sequence_len
                    for e in qx:
                        e.fprop()
                    qmask.assign_npa(context, mask)
                    qmask.fprop()
                    qh_0.fprop()
                    qc_0.fprop()
                    qW.fprop()
                    qR.fprop()
                    lstm.fprop()
                    q_h = lstm.h.to_host()

                    th_x = T.ftensor3()
                    lstm_layer = LstmLayer(W, R, c_0, h_0, reverse)
                    if with_mask:
                        th_mask = T.fmatrix()
                        get_th_h = theano.function([th_x, th_mask],
                                                   lstm_layer.get_output_expr(
                                                       th_x, th_mask))
                        th_h = get_th_h(np.dstack(x[:sequence_len]),
                                        mask[:, :sequence_len])
                    else:
                        get_th_h = theano.function(
                            [th_x], lstm_layer.get_output_expr(th_x))
                        th_h = get_th_h(np.dstack(x[:sequence_len]))

                    for i in xrange(th_h.shape[0]):
                        if not np.allclose(q_h[i], th_h[i]):
                            r.append(False)
                            break
                    else:
                        r.append(True)

        self.assertEqual(sum(r), len(r))
Esempio n. 21
0
    def test_theano_grad(self):
        device_id = 0

        class SequentialHorizontalStackLayer(object):
            def get_output_expr(self, x_sequence, y_sequence):
                return T.concatenate((x_sequence, y_sequence), axis=1)

        class SequentialMeanPoolingLayer(object):
            def get_output_expr(self, input_sequence):
                return T.mean(input_sequence, axis=2)

        class LogisticRegressionLayer(object):
            def __init__(self, W_init, b_init):
                self.W = theano.shared(value=W_init())
                self.b = theano.shared(value=b_init())

            def get_output_expr(self, input_expr):
                return T.nnet.sigmoid(T.dot(input_expr, self.W) + self.b)

        quagga.processor_type = 'gpu'
        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(500)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(
                max_input_sequence_len)
            batch_size = self.rng.random_integers(256)
            dim_x, dim_y = self.rng.random_integers(1280, size=2)
            x = [
                self.rng.rand(batch_size, dim_x).astype(dtype=np.float32)
                for _ in xrange(max_input_sequence_len)
            ]
            y = [
                self.rng.rand(batch_size, dim_y).astype(dtype=np.float32)
                for _ in xrange(max_input_sequence_len)
            ]
            true_labels = self.rng.randint(1,
                                           size=(batch_size,
                                                 1)).astype(dtype=np.float32)

            W_init = self.get_orthogonal_initializer(dim_x + dim_y, 1)
            b_init = lambda: self.rng.rand(1, 1).astype(dtype=np.float32)

            # Theano model
            state = self.rng.get_state()
            th_x = T.ftensor3()
            th_y = T.ftensor3()
            th_true_labels = T.fmatrix()
            shs_layer = SequentialHorizontalStackLayer()
            smp_layer = SequentialMeanPoolingLayer()
            lr_layer = LogisticRegressionLayer(W_init, lambda: b_init()[0])
            probs = shs_layer.get_output_expr(th_x, th_y)
            probs = lr_layer.get_output_expr(smp_layer.get_output_expr(probs))
            loss = T.mean(T.nnet.binary_crossentropy(probs, th_true_labels))
            grads = T.grad(loss, wrt=[th_x, th_y])
            get_grads = theano.function([th_x, th_y, th_true_labels], grads)
            dL_dx_sequence_th, dL_dy_sequence_th = get_grads(
                np.dstack(x[:sequence_len]), np.dstack(y[:sequence_len]),
                true_labels)

            # quagga model
            self.rng.set_state(state)
            W = Connector(Matrix.from_npa(W_init(), device_id=device_id),
                          device_id)
            b = Connector(Matrix.from_npa(b_init(), device_id=device_id),
                          device_id)
            x = List([Connector(Matrix.from_npa(e), device_id) for e in x])
            y = List([Connector(Matrix.from_npa(e), device_id) for e in y])
            true_labels = Connector(Matrix.from_npa(true_labels))
            shs_block = SequentialHorizontalStackBlock(x, y)
            smp_block = SequentialMeanPoolingBlock(shs_block.output)
            dot_block = DotBlock(W, b, smp_block.output)
            sce_block = SigmoidCeBlock(dot_block.output, true_labels)
            x.length = sequence_len
            y.length = sequence_len
            shs_block.fprop()
            smp_block.fprop()
            dot_block.fprop()
            sce_block.fprop()
            sce_block.bprop()
            dot_block.bprop()
            smp_block.bprop()
            shs_block.bprop()
            dL_dx_sequence = [e.backward_matrix.to_host() for e in x]
            dL_dy_sequence = [e.backward_matrix.to_host() for e in y]

            for i in xrange(dL_dx_sequence_th.shape[-1]):
                if not np.allclose(dL_dx_sequence[i],
                                   dL_dx_sequence_th[..., i],
                                   atol=1.e-6):
                    r.append(False)
                    break
            else:
                r.append(True)
            for i in xrange(dL_dy_sequence_th.shape[-1]):
                if not np.allclose(dL_dy_sequence[i],
                                   dL_dy_sequence_th[..., i],
                                   atol=1.e-6):
                    r.append(False)
                    break
            else:
                r.append(True)

        self.assertEqual(sum(r), self.N * 2)
Esempio n. 22
0
    def test_fprop(self):
        """
        compare `fprop` results for cpu and gpu backends
        """

        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(500)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(
                max_input_sequence_len)
            batch_size = self.rng.random_integers(256)
            input_dim, hidden_dim = self.rng.random_integers(1500, size=2)
            x = [
                self.rng.randn(batch_size, input_dim).astype(np.float32)
                for _ in xrange(max_input_sequence_len)
            ]
            mask = (self.rng.rand(batch_size, sequence_len) < 0.8).astype(
                np.float32)
            h_0 = self.rng.randn(batch_size, hidden_dim).astype(np.float32)
            c_0 = self.rng.randn(batch_size, hidden_dim).astype(np.float32)
            W_z = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W_i = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W_f = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W_o = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W = np.hstack((W_z, W_i, W_f, W_o))
            R_z = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R_i = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R_f = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R_o = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R = np.hstack((R_z, R_i, R_f, R_o))

            qh = {}
            for reverse in [False, True]:
                for with_mask in [False, True]:
                    for processor_type in ['gpu', 'cpu']:
                        quagga.processor_type = processor_type
                        context = Context()
                        qx = List([Connector(Matrix.from_npa(e)) for e in x])
                        qmask = Matrix.empty(batch_size, len(qx), 'float')
                        qh_0 = Connector(Matrix.from_npa(h_0))
                        qc_0 = Connector(Matrix.from_npa(c_0))
                        qW = Connector(Matrix.from_npa(W))
                        qR = Connector(Matrix.from_npa(R))
                        sequences = [qx]
                        if with_mask:
                            sequences.append(
                                List([
                                    Connector(qmask[:, i])
                                    for i in xrange(len(qx))
                                ], len(qx)))
                            qmask.assign_npa(context, mask)
                            qmask = sequences[-1]
                        else:
                            sequences.append([None] * len(qx))
                        lstm = SequencerBlock(block_class=LstmBlock,
                                              params=[qW, qR],
                                              sequences=sequences,
                                              output_names=['h'],
                                              prev_names=['c', 'h'],
                                              paddings=[qc_0, qh_0],
                                              reverse=reverse)
                        qx.length = sequence_len
                        if with_mask:
                            qmask.fprop()
                        qx.fprop()
                        qh_0.fprop()
                        qc_0.fprop()
                        qW.fprop()
                        qR.fprop()
                        lstm.fprop()
                        qh[processor_type] = lstm.h.to_host()

                    for h_gpu, h_cpu in izip(qh['gpu'], qh['cpu']):
                        if not np.allclose(h_gpu, h_cpu, rtol=1e-7, atol=1e-3):
                            r.append(False)
                            break
                    else:
                        r.append(True)

        self.assertEqual(sum(r), len(r))
Esempio n. 23
0
    def test_bprop(self):
        """
        compare `bprop` results for cpu and gpu backends
        """
        device_id = 0
        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(500)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(
                max_input_sequence_len)
            batch_size = self.rng.random_integers(256)
            dim_x, dim_y = self.rng.random_integers(1280, size=2)
            x = [
                self.rng.rand(batch_size, dim_x).astype(dtype=np.float32)
                for _ in xrange(max_input_sequence_len)
            ]
            y = [
                self.rng.rand(batch_size, dim_y).astype(dtype=np.float32)
                for _ in xrange(max_input_sequence_len)
            ]

            state = self.rng.get_state()
            quagga.processor_type = 'gpu'
            context = Context()
            x_gpu = List([Connector(Matrix.from_npa(e), device_id) for e in x])
            y_gpu = List([Connector(Matrix.from_npa(e), device_id) for e in y])
            seq_hstack_block_gpu = SequentialHorizontalStackBlock(x_gpu, y_gpu)
            x_gpu.length = sequence_len
            y_gpu.length = sequence_len
            _, dL_doutput_sequence = izip(
                *seq_hstack_block_gpu.output.register_usage(
                    device_id, device_id))
            seq_hstack_block_gpu.fprop()
            for dL_doutput in dL_doutput_sequence:
                random_matrix = self.rng.rand(dL_doutput.nrows,
                                              dL_doutput.ncols)
                dL_doutput.assign(context,
                                  Matrix.from_npa(random_matrix, 'float'))
            seq_hstack_block_gpu.bprop()
            dL_dx_matrices_gpu = [e.backward_matrix.to_host() for e in x_gpu]
            dL_dy_matrices_gpu = [e.backward_matrix.to_host() for e in y_gpu]

            self.rng.set_state(state)
            quagga.processor_type = 'cpu'
            context = Context()
            x_cpu = List([Connector(Matrix.from_npa(e), device_id) for e in x])
            y_cpu = List([Connector(Matrix.from_npa(e), device_id) for e in y])
            seq_hstack_block_cpu = SequentialHorizontalStackBlock(x_cpu, y_cpu)
            x_cpu.length = sequence_len
            y_cpu.length = sequence_len
            _, dL_doutput_sequence = izip(
                *seq_hstack_block_cpu.output.register_usage(
                    device_id, device_id))
            seq_hstack_block_cpu.fprop()
            for dL_doutput in dL_doutput_sequence:
                random_matrix = self.rng.rand(dL_doutput.nrows,
                                              dL_doutput.ncols)
                dL_doutput.assign(context,
                                  Matrix.from_npa(random_matrix, 'float'))
            seq_hstack_block_cpu.bprop()
            dL_dx_matrices_cpu = [e.backward_matrix.to_host() for e in x_cpu]
            dL_dy_matrices_cpu = [e.backward_matrix.to_host() for e in y_cpu]

            for dL_dx_gpu, dL_dx_cpu in izip(dL_dx_matrices_gpu,
                                             dL_dx_matrices_cpu):
                if not np.allclose(dL_dx_gpu, dL_dx_cpu):
                    r.append(False)
                    break
            else:
                r.append(True)
            for dL_dy_gpu, dL_dy_cpu in izip(dL_dy_matrices_gpu,
                                             dL_dy_matrices_cpu):
                if not np.allclose(dL_dy_gpu, dL_dy_cpu):
                    r.append(False)
                    break
            else:
                r.append(True)

            del x_gpu
            del y_gpu
            del seq_hstack_block_gpu
            del dL_dx_matrices_gpu
            del dL_dy_matrices_gpu

        self.assertEqual(sum(r), self.N * 2)