Esempio n. 1
0
    def test_fprop(self):
        """
        compare `fprop` results for cpu and gpu backends
        """

        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(500)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(
                max_input_sequence_len)
            batch_size = self.rng.random_integers(256)
            input_dim, hidden_dim = self.rng.random_integers(1500, size=2)
            x = [
                self.rng.randn(batch_size, input_dim).astype(np.float32)
                for _ in xrange(max_input_sequence_len)
            ]
            W = self.get_orthogonal_matrix(input_dim, hidden_dim)
            b = self.rng.rand(1, hidden_dim).astype(np.float32)

            from quagga.cuda import cudart
            cudart.cuda_set_device(1)

            qoutput = {}
            for reverse in [False, True]:
                for with_bias in [False, True]:
                    for processor_type in ['gpu', 'cpu']:
                        quagga.processor_type = processor_type
                        qx = List([Connector(Matrix.from_npa(e)) for e in x])
                        qW = Connector(Matrix.from_npa(W))
                        qb = Connector(
                            Matrix.from_npa(b)) if with_bias else None
                        seq_dot_block = SequencerBlock(block_class=DotBlock,
                                                       params=[qW, qb],
                                                       sequences=[qx],
                                                       output_names=['output'],
                                                       reverse=reverse)
                        qx.length = sequence_len
                        qx.fprop()
                        qW.fprop()
                        if qb:
                            qb.fprop()
                        seq_dot_block.fprop()
                        qoutput[processor_type] = seq_dot_block.output.to_host(
                        )

                    for output_gpu, output_cpu in izip(qoutput['gpu'],
                                                       qoutput['cpu']):
                        if not np.allclose(output_gpu, output_cpu, atol=1e-5):
                            r.append(False)
                            break
                    else:
                        r.append(True)

        self.assertEqual(sum(r), len(r))
    def test_bprop(self):
        """
        compare `fprop` results for cpu and gpu backends
        """

        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(500)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(max_input_sequence_len)
            batch_size = self.rng.random_integers(256)
            input_dim, hidden_dim = self.rng.random_integers(1500, size=2)
            x = [self.rng.randn(batch_size, input_dim).astype(np.float32) for _ in xrange(max_input_sequence_len)]
            true_labels = [self.rng.randint(hidden_dim, size=(batch_size, 1)).astype(np.int32) for _ in xrange(max_input_sequence_len)]
            W = self.get_orthogonal_matrix(input_dim, hidden_dim)
            b = self.rng.rand(1, hidden_dim).astype(np.float32)
            device_id = 0

            quagga_grads = {}
            for reverse in [False, True]:
                for with_bias in [False, True]:
                    for processor_type in ['gpu', 'cpu']:
                        quagga.processor_type = processor_type
                        qx = List([Connector(Matrix.from_npa(e), device_id) for e in x])
                        qtrue_labels = List([Connector(Matrix.from_npa(e)) for e in true_labels], len(qx))
                        qW = Connector(Matrix.from_npa(W), device_id)
                        qb = Connector(Matrix.from_npa(b), device_id) if with_bias else None
                        seq_dot_block = SequencerBlock(block_class=DotBlock,
                                                       params=[qW, qb],
                                                       sequences=[qx],
                                                       output_names=['output'],
                                                       reverse=reverse)
                        seq_sce_block = SequencerBlock(block_class=SoftmaxCeBlock,
                                                       params=[],
                                                       sequences=[seq_dot_block.output, qtrue_labels],
                                                       reverse=reverse)
                        qx.length = sequence_len
                        qx.fprop()
                        qtrue_labels.fprop()
                        qW.fprop()
                        if qb:
                            qb.fprop()
                        seq_dot_block.fprop()
                        seq_sce_block.fprop()
                        seq_sce_block.bprop()
                        seq_dot_block.bprop()
                        quagga_grads[processor_type] = [qW.backward_matrix.to_host()]
                        if with_bias:
                            quagga_grads[processor_type].append(qb.backward_matrix.to_host())
                        quagga_grads[processor_type].extend(e.backward_matrix.to_host() for e in qx)

                    for grad_gpu, grad_cpu in izip(quagga_grads['gpu'], quagga_grads['cpu']):
                        r.append(np.allclose(grad_gpu, grad_cpu, atol=1e-5))

        self.assertEqual(sum(r), len(r))
Esempio n. 3
0
    def test_theano_fprop(self):
        quagga.processor_type = 'gpu'
        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(500)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(
                max_input_sequence_len)
            batch_size = self.rng.random_integers(256)
            input_dim, hidden_dim = self.rng.random_integers(1500, size=2)
            x = [
                self.rng.randn(batch_size, input_dim).astype(np.float32)
                for _ in xrange(max_input_sequence_len)
            ]
            W = self.get_orthogonal_matrix(input_dim, hidden_dim)
            b = self.rng.rand(1, hidden_dim).astype(np.float32)

            for reverse in [False, True]:
                for with_bias in [False, True]:
                    qx = List([Connector(Matrix.from_npa(e)) for e in x])
                    qW = Connector(Matrix.from_npa(W))
                    qb = Connector(Matrix.from_npa(b)) if with_bias else None
                    seq_dot_block = SequencerBlock(block_class=DotBlock,
                                                   params=[qW, qb],
                                                   sequences=[qx],
                                                   output_names=['output'],
                                                   reverse=reverse)
                    qx.length = sequence_len
                    qx.fprop()
                    qW.fprop()
                    if qb:
                        qb.fprop()
                    seq_dot_block.fprop()
                    qoutput = seq_dot_block.output.to_host()

                    seq_dot_layer = SequentialDotLayer(
                        W, b if with_bias else None, reverse)
                    th_x = T.ftensor3()
                    get_th_output = theano.function(
                        [th_x], seq_dot_layer.get_output_expr(th_x))
                    th_output = get_th_output(np.dstack(x[:sequence_len]))

                    for i in xrange(th_output.shape[0]):
                        if not np.allclose(qoutput[i], th_output[i]):
                            r.append(False)
                            break
                    else:
                        r.append(True)

        self.assertEqual(sum(r), len(r))
    def test_fprop(self):
        """
        compare `fprop` results for cpu and gpu backends
        """

        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(500)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(max_input_sequence_len)
            batch_size = self.rng.random_integers(256)
            input_dim, hidden_dim = self.rng.random_integers(1500, size=2)
            x = [self.rng.randn(batch_size, input_dim).astype(np.float32) for _ in xrange(max_input_sequence_len)]
            W = self.get_orthogonal_matrix(input_dim, hidden_dim)
            b = self.rng.rand(1, hidden_dim).astype(np.float32)

            from quagga.cuda import cudart
            cudart.cuda_set_device(1)

            qoutput = {}
            for reverse in [False, True]:
                for with_bias in [False, True]:
                    for processor_type in ['gpu', 'cpu']:
                        quagga.processor_type = processor_type
                        qx = List([Connector(Matrix.from_npa(e)) for e in x])
                        qW = Connector(Matrix.from_npa(W))
                        qb = Connector(Matrix.from_npa(b)) if with_bias else None
                        seq_dot_block = SequencerBlock(block_class=DotBlock,
                                                       params=[qW, qb],
                                                       sequences=[qx],
                                                       output_names=['output'],
                                                       reverse=reverse)
                        qx.length = sequence_len
                        qx.fprop()
                        qW.fprop()
                        if qb:
                            qb.fprop()
                        seq_dot_block.fprop()
                        qoutput[processor_type] = seq_dot_block.output.to_host()

                    for output_gpu, output_cpu in izip(qoutput['gpu'], qoutput['cpu']):
                        if not np.allclose(output_gpu, output_cpu, atol=1e-5):
                            r.append(False)
                            break
                    else:
                        r.append(True)

        self.assertEqual(sum(r), len(r))
    def test_theano_fprop(self):
        quagga.processor_type = 'gpu'
        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(500)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(max_input_sequence_len)
            batch_size = self.rng.random_integers(256)
            input_dim, hidden_dim = self.rng.random_integers(1500, size=2)
            x = [self.rng.randn(batch_size, input_dim).astype(np.float32) for _ in xrange(max_input_sequence_len)]
            W = self.get_orthogonal_matrix(input_dim, hidden_dim)
            b = self.rng.rand(1, hidden_dim).astype(np.float32)

            for reverse in [False, True]:
                for with_bias in [False, True]:
                    qx = List([Connector(Matrix.from_npa(e)) for e in x])
                    qW = Connector(Matrix.from_npa(W))
                    qb = Connector(Matrix.from_npa(b)) if with_bias else None
                    seq_dot_block = SequencerBlock(block_class=DotBlock,
                                                   params=[qW, qb],
                                                   sequences=[qx],
                                                   output_names=['output'],
                                                   reverse=reverse)
                    qx.length = sequence_len
                    qx.fprop()
                    qW.fprop()
                    if qb:
                        qb.fprop()
                    seq_dot_block.fprop()
                    qoutput = seq_dot_block.output.to_host()

                    seq_dot_layer = SequentialDotLayer(W, b if with_bias else None, reverse)
                    th_x = T.ftensor3()
                    get_th_output = theano.function([th_x], seq_dot_layer.get_output_expr(th_x))
                    th_output = get_th_output(np.dstack(x[:sequence_len]))

                    for i in xrange(th_output.shape[0]):
                        if not np.allclose(qoutput[i], th_output[i]):
                            r.append(False)
                            break
                    else:
                        r.append(True)

        self.assertEqual(sum(r), len(r))
Esempio n. 6
0
    def test_theano_bprop_matrix(self):
        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(300)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(2, max_input_sequence_len)
            embd_dim = self.rng.random_integers(10000)
            batch_size = self.rng.random_integers(500)
            output_dim = self.rng.random_integers(2000)
            W = self.get_orthogonal_matrix(embd_dim, output_dim)
            row_idxs = self.rng.randint(embd_dim, size=(batch_size, max_input_sequence_len)).astype(np.int32)
            true_labels = [self.rng.randint(output_dim, size=(batch_size, 1)).astype(np.int32) for _ in xrange(max_input_sequence_len)]
            device_id = 0

            quagga.processor_type = 'gpu'
            qrow_idxs = Connector(Matrix.from_npa(row_idxs))
            qtrue_labels = List([Connector(Matrix.from_npa(e)) for e in true_labels], qrow_idxs.ncols)
            qW = Connector(Matrix.from_npa(W), device_id)
            row_slicing_block = RowSlicingBlock(qW, qrow_idxs)
            seq_sce_block = SequencerBlock(block_class=SoftmaxCeBlock,
                                           params=[],
                                           sequences=[row_slicing_block.output, qtrue_labels])
            qW.fprop()
            qrow_idxs.ncols = sequence_len
            qrow_idxs.fprop()
            row_slicing_block.fprop()
            seq_sce_block.fprop()
            seq_sce_block.bprop()
            row_slicing_block.bprop()
            qW.add(Context(), qW.backward_matrix)

            th_row_idxs = T.imatrix()
            th_true_labels = T.imatrix()
            row_slicing_layer = RowSlicingLayer(W)
            toutput = row_slicing_layer.get_output_expr(th_row_idxs)
            loss = SequentialSoftmaxLayer.get_loss(toutput, th_true_labels)
            dL_dW = T.grad(loss, row_slicing_layer.W)
            fun = theano.function([th_row_idxs, th_true_labels],
                                  updates=[(row_slicing_layer.W, row_slicing_layer.W + dL_dW)])
            fun(row_idxs, np.hstack(true_labels[:sequence_len]))

            r.append(np.allclose(qW.to_host(), row_slicing_layer.W.get_value(), atol=1e-5))

        self.assertEqual(sum(r), len(r))
Esempio n. 7
0
    def test_bprop_matrix(self):
        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(500)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(max_input_sequence_len)
            embd_dim = self.rng.random_integers(10000)
            batch_size = self.rng.random_integers(500)
            output_dim = self.rng.random_integers(2000)
            W = self.get_orthogonal_matrix(embd_dim, output_dim)
            row_idxs = self.rng.randint(embd_dim, size=(batch_size, max_input_sequence_len)).astype(np.int32)
            true_labels = [self.rng.randint(output_dim, size=(batch_size, 1)).astype(np.int32) for _ in xrange(max_input_sequence_len)]
            device_id = 0

            output = {}
            for processor_type in ['gpu', 'cpu']:
                quagga.processor_type = processor_type
                qrow_idxs = Connector(Matrix.from_npa(row_idxs))
                qtrue_labels = List([Connector(Matrix.from_npa(e)) for e in true_labels], qrow_idxs.ncols)
                qW = Connector(Matrix.from_npa(W), device_id)
                row_slicing_block = RowSlicingBlock(qW, qrow_idxs)
                seq_sce_block = SequencerBlock(block_class=SoftmaxCeBlock,
                                               params=[],
                                               sequences=[row_slicing_block.output, qtrue_labels])
                qW.fprop()
                qrow_idxs.ncols = sequence_len
                qrow_idxs.fprop()
                row_slicing_block.fprop()
                seq_sce_block.fprop()
                seq_sce_block.bprop()
                row_slicing_block.bprop()
                qW.add(Context(), qW.backward_matrix)
                output[processor_type] = qW.to_host()

            r.append(np.allclose(output['gpu'], output['cpu']))

        self.assertEqual(sum(r), len(r))
Esempio n. 8
0
                        x_device_id=1,
                        y_device_id=0)
 embd_block = RowSlicingBlock(W=p['embd_W'], row_indexes=data_block.x)
 f_c_repeat_block = RepeatBlock(p['f_lstm_c0'],
                                data_block.x.nrows,
                                axis=0,
                                device_id=1)
 f_h_repeat_block = RepeatBlock(p['f_lstm_h0'],
                                data_block.x.nrows,
                                axis=0,
                                device_id=1)
 f_lstm_rnn_block = SequencerBlock(
     block_class=LstmBlock,
     params=[p['f_lstm_W'], p['f_lstm_R'], None],
     sequences=[embd_block.output, data_block.mask],
     output_names=['h'],
     prev_names=['c', 'h'],
     paddings=[f_c_repeat_block.output, f_h_repeat_block.output],
     reverse=False,
     device_id=1)
 s_c_repeat_block = RepeatBlock(p['s_lstm_c0'],
                                data_block.x.nrows,
                                axis=0,
                                device_id=1)
 s_h_repeat_block = RepeatBlock(p['s_lstm_h0'],
                                data_block.x.nrows,
                                axis=0,
                                device_id=1)
 s_lstm_rnn_block = SequencerBlock(
     block_class=LstmBlock,
     params=[p['s_lstm_W'], p['s_lstm_R'], None],
Esempio n. 9
0
    def test_bprop(self):
        """
        compare `bprop` results for cpu and gpu backends
        """

        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(500)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(
                max_input_sequence_len)
            batch_size = self.rng.random_integers(256)
            input_dim, hidden_dim = self.rng.random_integers(1500, size=2)

            x = [
                self.rng.randn(batch_size, input_dim).astype(np.float32)
                for _ in xrange(max_input_sequence_len)
            ]
            true_labels = [
                self.rng.randint(2, size=(batch_size, 1)).astype(np.float32)
                for _ in xrange(max_input_sequence_len)
            ]
            mask = (self.rng.rand(batch_size, sequence_len) < 0.8).astype(
                np.float32)
            h_0 = self.rng.randn(batch_size, hidden_dim).astype(np.float32)
            c_0 = self.rng.randn(batch_size, hidden_dim).astype(np.float32)
            W_z = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W_i = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W_f = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W_o = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W = np.hstack((W_z, W_i, W_f, W_o))
            R_z = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R_i = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R_f = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R_o = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R = np.hstack((R_z, R_i, R_f, R_o))
            lr_W = self.get_orthogonal_matrix(hidden_dim, 1)
            lr_b = self.rng.rand(1, 1).astype(dtype=np.float32)
            device_id = 0

            quagga_grads = {}
            for reverse in [False, True]:
                for with_mask in [False, True]:
                    for learn_inital_states in [False, True]:
                        for processor_type in ['gpu', 'cpu']:
                            quagga.processor_type = processor_type
                            context = Context()
                            qx = List([
                                Connector(Matrix.from_npa(e), device_id)
                                for e in x
                            ])
                            qtrue_labels = List([
                                Connector(Matrix.from_npa(e))
                                for e in true_labels
                            ], len(qx))
                            qmask = Matrix.empty(batch_size, len(qx))
                            qh_0 = Connector(
                                Matrix.from_npa(h_0),
                                device_id if learn_inital_states else None)
                            qc_0 = Connector(
                                Matrix.from_npa(c_0),
                                device_id if learn_inital_states else None)
                            qW = Connector(Matrix.from_npa(W), device_id)
                            qR = Connector(Matrix.from_npa(R), device_id)
                            qlr_W = Connector(Matrix.from_npa(lr_W), device_id)
                            qlr_b = Connector(Matrix.from_npa(lr_b), device_id)
                            sequences = [qx]
                            if with_mask:
                                sequences.append(
                                    List([
                                        Connector(qmask[:, i])
                                        for i in xrange(len(qx))
                                    ], len(qx)))
                                qmask.assign_npa(context, mask)
                                qmask = sequences[-1]
                            else:
                                sequences.append([None] * len(qx))
                            lstm = SequencerBlock(block_class=LstmBlock,
                                                  params=[qW, qR],
                                                  sequences=sequences,
                                                  output_names=['h'],
                                                  prev_names=['c', 'h'],
                                                  paddings=[qc_0, qh_0],
                                                  reverse=reverse)
                            seq_dot_block = SequencerBlock(
                                block_class=DotBlock,
                                params=[qlr_W, qlr_b],
                                sequences=[lstm.h],
                                output_names=['output'])
                            seq_sce_block = SequencerBlock(
                                block_class=SigmoidCeBlock,
                                params=[],
                                sequences=[seq_dot_block.output, qtrue_labels
                                           ] + ([qmask] if with_mask else []))
                            qx.length = sequence_len
                            qx.fprop()
                            qtrue_labels.fprop()
                            if with_mask:
                                qmask.fprop()
                            qlr_W.fprop()
                            qlr_b.fprop()
                            qh_0.fprop()
                            qc_0.fprop()
                            qW.fprop()
                            qR.fprop()
                            lstm.fprop()
                            seq_dot_block.fprop()
                            seq_sce_block.fprop()
                            seq_sce_block.bprop()
                            seq_dot_block.bprop()
                            lstm.bprop()
                            quagga_grads[processor_type] = [
                                qlr_b.backward_matrix.to_host(),
                                qlr_W.backward_matrix.to_host(),
                                qW.backward_matrix.to_host(),
                                qR.backward_matrix.to_host()
                            ]
                            if learn_inital_states:
                                quagga_grads[processor_type].append(
                                    qc_0.backward_matrix.to_host())
                                quagga_grads[processor_type].append(
                                    qh_0.backward_matrix.to_host())
                            quagga_grads[processor_type].extend(
                                e.backward_matrix.to_host() for e in qx)

                        for grad_gpu, grad_cpu in izip(quagga_grads['gpu'],
                                                       quagga_grads['cpu']):
                            r.append(np.allclose(grad_gpu, grad_cpu,
                                                 atol=1e-6))

        self.assertEqual(sum(r), len(r))
Esempio n. 10
0
 seq_embd_block = RowSlicingBlock(p['embd_W'], data_block.sentence_batch)
 # remove last in the list
 output = List(seq_embd_block.output[:-1], seq_embd_block.output.length - 1)
 c_fwd_repeat_block = RepeatBlock(p['lstm_fwd_c0'],
                                  data_block.sentence_batch.nrows,
                                  axis=0,
                                  device_id=0)
 h_fwd_repeat_block = RepeatBlock(p['lstm_fwd_h0'],
                                  data_block.sentence_batch.nrows,
                                  axis=0,
                                  device_id=0)
 fwd_lstm_block = SequencerBlock(
     block_class=LstmBlock,
     params=[p['lstm_fwd_W'], p['lstm_fwd_R'], 0.5],
     sequences=[output, data_block.mask],
     output_names=['h'],
     prev_names=['c', 'h'],
     paddings=[c_fwd_repeat_block.output, h_fwd_repeat_block.output],
     reverse=False,
     device_id=0)
 # remove first in the list
 output = List(seq_embd_block.output[1:], seq_embd_block.output.length - 1)
 c_bwd_repeat_block = RepeatBlock(p['lstm_bwd_c0'],
                                  data_block.sentence_batch.nrows,
                                  axis=0,
                                  device_id=0)
 h_bwd_repeat_block = RepeatBlock(p['lstm_bwd_h0'],
                                  data_block.sentence_batch.nrows,
                                  axis=0,
                                  device_id=0)
 bwd_lstm_block = SequencerBlock(
Esempio n. 11
0
    def test_theano_fprop(self):
        quagga.processor_type = 'gpu'
        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(500)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(
                max_input_sequence_len)
            batch_size = self.rng.random_integers(256)
            input_dim, hidden_dim = self.rng.random_integers(1500, size=2)
            x = [
                self.rng.randn(batch_size, input_dim).astype(np.float32)
                for _ in xrange(max_input_sequence_len)
            ]
            mask = (self.rng.rand(batch_size, sequence_len) < 0.8).astype(
                np.float32)
            h_0 = self.rng.randn(batch_size, hidden_dim).astype(np.float32)
            c_0 = self.rng.randn(batch_size, hidden_dim).astype(np.float32)
            W_z = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W_i = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W_f = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W_o = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W = np.hstack((W_z, W_i, W_f, W_o))
            R_z = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R_i = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R_f = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R_o = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R = np.hstack((R_z, R_i, R_f, R_o))

            for reverse in [False, True]:
                for with_mask in [False, True]:
                    context = Context()
                    qx = List([Connector(Matrix.from_npa(e)) for e in x])
                    qmask = Connector(
                        Matrix.empty(batch_size, len(qx), 'float'))
                    qh_0 = Connector(Matrix.from_npa(h_0))
                    qc_0 = Connector(Matrix.from_npa(c_0))
                    qW = Connector(Matrix.from_npa(W))
                    qR = Connector(Matrix.from_npa(R))
                    lstm = SequencerBlock(block_class=LstmBlock,
                                          params=[qW, qR],
                                          sequences=[qx] +
                                          ([qmask] if with_mask else []),
                                          output_names=['h'],
                                          prev_names=['c', 'h'],
                                          paddings=[qc_0, qh_0],
                                          reverse=reverse)

                    qx.length = sequence_len
                    for e in qx:
                        e.fprop()
                    qmask.assign_npa(context, mask)
                    qmask.fprop()
                    qh_0.fprop()
                    qc_0.fprop()
                    qW.fprop()
                    qR.fprop()
                    lstm.fprop()
                    q_h = lstm.h.to_host()

                    th_x = T.ftensor3()
                    lstm_layer = LstmLayer(W, R, c_0, h_0, reverse)
                    if with_mask:
                        th_mask = T.fmatrix()
                        get_th_h = theano.function([th_x, th_mask],
                                                   lstm_layer.get_output_expr(
                                                       th_x, th_mask))
                        th_h = get_th_h(np.dstack(x[:sequence_len]),
                                        mask[:, :sequence_len])
                    else:
                        get_th_h = theano.function(
                            [th_x], lstm_layer.get_output_expr(th_x))
                        th_h = get_th_h(np.dstack(x[:sequence_len]))

                    for i in xrange(th_h.shape[0]):
                        if not np.allclose(q_h[i], th_h[i]):
                            r.append(False)
                            break
                    else:
                        r.append(True)

        self.assertEqual(sum(r), len(r))
Esempio n. 12
0
    def test_theano_bprop(self):
        quagga.processor_type = 'gpu'
        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(500)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(
                max_input_sequence_len)
            batch_size = self.rng.random_integers(256)
            input_dim, hidden_dim = self.rng.random_integers(1500, size=2)
            x = [
                self.rng.randn(batch_size, input_dim).astype(np.float32)
                for _ in xrange(max_input_sequence_len)
            ]
            true_labels = [
                self.rng.randint(hidden_dim,
                                 size=(batch_size, 1)).astype(np.int32)
                for _ in xrange(max_input_sequence_len)
            ]
            W = self.get_orthogonal_matrix(input_dim, hidden_dim)
            b = self.rng.rand(1, hidden_dim).astype(np.float32)
            device_id = 0

            for reverse in [False, True]:
                for with_bias in [False, True]:
                    qx = List(
                        [Connector(Matrix.from_npa(e), device_id) for e in x])
                    qtrue_labels = List(
                        [Connector(Matrix.from_npa(e)) for e in true_labels],
                        len(qx))
                    qW = Connector(Matrix.from_npa(W), device_id)
                    qb = Connector(Matrix.from_npa(b),
                                   device_id) if with_bias else None
                    seq_dot_block = SequencerBlock(block_class=DotBlock,
                                                   params=[qW, qb],
                                                   sequences=[qx],
                                                   output_names=['output'],
                                                   reverse=reverse)
                    seq_sce_block = SequencerBlock(
                        block_class=SoftmaxCeBlock,
                        params=[],
                        sequences=[seq_dot_block.output, qtrue_labels],
                        reverse=reverse)
                    qx.length = sequence_len
                    qx.fprop()
                    qtrue_labels.fprop()
                    qW.fprop()
                    if qb:
                        qb.fprop()
                    seq_dot_block.fprop()
                    seq_sce_block.fprop()
                    seq_sce_block.bprop()
                    seq_dot_block.bprop()
                    quagga_grads = [qW.backward_matrix.to_host()]
                    if with_bias:
                        quagga_grads.append(qb.backward_matrix.to_host())
                    quagga_grads.append(
                        [e.backward_matrix.to_host() for e in qx])

                    seq_dot_layer = SequentialDotLayer(
                        W, b if with_bias else None, reverse)
                    seq_sce_layer = SequentialSoftmaxLayer()
                    th_x = T.ftensor3()
                    th_true_labels = T.imatrix()
                    loss = seq_sce_layer.get_loss(
                        seq_dot_layer.get_output_expr(th_x), th_true_labels)
                    wrt = [seq_dot_layer.W]
                    if with_bias:
                        wrt.append(seq_dot_layer.b)
                    wrt.append(th_x)
                    grads = T.grad(loss, wrt)
                    get_theano_grads = theano.function([th_x, th_true_labels],
                                                       grads)
                    theano_grads = get_theano_grads(
                        np.dstack(x[:sequence_len]),
                        np.hstack(true_labels[:sequence_len]))

                    for quagga_grad, theano_grad in izip(
                            quagga_grads[:-1], theano_grads[:-1]):
                        r.append(
                            np.allclose(quagga_grad, theano_grad, atol=1e-5))
                    for i in xrange(theano_grads[-1].shape[-1]):
                        if not np.allclose(quagga_grads[-1][i],
                                           theano_grads[-1][..., i],
                                           atol=1e-5):
                            r.append(False)
                            break
                    else:
                        r.append(True)

        self.assertEqual(sum(r), len(r))
Esempio n. 13
0
    def test_bprop(self):
        """
        compare `fprop` results for cpu and gpu backends
        """

        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(500)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(
                max_input_sequence_len)
            batch_size = self.rng.random_integers(256)
            input_dim, hidden_dim = self.rng.random_integers(1500, size=2)
            x = [
                self.rng.randn(batch_size, input_dim).astype(np.float32)
                for _ in xrange(max_input_sequence_len)
            ]
            true_labels = [
                self.rng.randint(hidden_dim,
                                 size=(batch_size, 1)).astype(np.int32)
                for _ in xrange(max_input_sequence_len)
            ]
            W = self.get_orthogonal_matrix(input_dim, hidden_dim)
            b = self.rng.rand(1, hidden_dim).astype(np.float32)
            device_id = 0

            quagga_grads = {}
            for reverse in [False, True]:
                for with_bias in [False, True]:
                    for processor_type in ['gpu', 'cpu']:
                        quagga.processor_type = processor_type
                        qx = List([
                            Connector(Matrix.from_npa(e), device_id) for e in x
                        ])
                        qtrue_labels = List([
                            Connector(Matrix.from_npa(e)) for e in true_labels
                        ], len(qx))
                        qW = Connector(Matrix.from_npa(W), device_id)
                        qb = Connector(Matrix.from_npa(b),
                                       device_id) if with_bias else None
                        seq_dot_block = SequencerBlock(block_class=DotBlock,
                                                       params=[qW, qb],
                                                       sequences=[qx],
                                                       output_names=['output'],
                                                       reverse=reverse)
                        seq_sce_block = SequencerBlock(
                            block_class=SoftmaxCeBlock,
                            params=[],
                            sequences=[seq_dot_block.output, qtrue_labels],
                            reverse=reverse)
                        qx.length = sequence_len
                        qx.fprop()
                        qtrue_labels.fprop()
                        qW.fprop()
                        if qb:
                            qb.fprop()
                        seq_dot_block.fprop()
                        seq_sce_block.fprop()
                        seq_sce_block.bprop()
                        seq_dot_block.bprop()
                        quagga_grads[processor_type] = [
                            qW.backward_matrix.to_host()
                        ]
                        if with_bias:
                            quagga_grads[processor_type].append(
                                qb.backward_matrix.to_host())
                        quagga_grads[processor_type].extend(
                            e.backward_matrix.to_host() for e in qx)

                    for grad_gpu, grad_cpu in izip(quagga_grads['gpu'],
                                                   quagga_grads['cpu']):
                        r.append(np.allclose(grad_gpu, grad_cpu, atol=1e-5))

        self.assertEqual(sum(r), len(r))
Esempio n. 14
0
                        dec_lstm_W={'init': H5pyInitializer(model_file_name, 'dec_lstm_W'),
                                    'device_id': 0},
                        dec_lstm_R={'init': H5pyInitializer(model_file_name, 'dec_lstm_R'),
                                    'device_id': 0},
                        sce_dot_block_W={'init': H5pyInitializer(model_file_name, 'sce_dot_block_W'),
                                         'device_id': 0},
                        sce_dot_block_b={'init': H5pyInitializer(model_file_name, 'sce_dot_block_b'),
                                         'device_id': 0})
 data_block = DataBlock(train_data, valid_data, 64, word_dropout_prob=0.99, device_id=0)
 enc_embd_block = RowSlicingBlock(p['embd_W'], data_block.enc_x)
 enc_c_repeat_block = RepeatBlock(p['enc_lstm_c0'], data_block.enc_x.nrows, axis=0, device_id=0)
 enc_h_repeat_block = RepeatBlock(p['enc_lstm_h0'], data_block.enc_x.nrows, axis=0, device_id=0)
 enc_lstm_block = SequencerBlock(block_class=LstmBlock,
                                 params=[p['enc_lstm_W'], p['enc_lstm_R'], 0.25],
                                 sequences=[enc_embd_block.output, data_block.enc_mask],
                                 output_names=['h'],
                                 prev_names=['c', 'h'],
                                 paddings=[enc_c_repeat_block.output, enc_h_repeat_block.output],
                                 reverse=False,
                                 device_id=0)
 dec_embd_block = RowSlicingBlock(p['embd_W'], data_block.dec_x)
 dec_c_repeat_block = RepeatBlock(p['dec_lstm_c0'], data_block.enc_x.nrows, axis=0, device_id=0)
 last_selector_block = LastSelectorBlock(enc_lstm_block.h)
 l2_reg_block = L2RegularizationBlock(last_selector_block.output, 0.001)
 dec_lstm_block = SequencerBlock(block_class=LstmBlock,
                                 params=[p['dec_lstm_W'], p['dec_lstm_R'], 0.25],
                                 sequences=[dec_embd_block.output, data_block.dec_mask],
                                 output_names=['h'],
                                 prev_names=['c', 'h'],
                                 paddings=[dec_c_repeat_block.output, last_selector_block.output],
                                 reverse=False,
                                 device_id=0)
Esempio n. 15
0
    def test_theano_bprop(self):
        quagga.processor_type = 'gpu'
        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(500)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(max_input_sequence_len)
            batch_size = self.rng.random_integers(256)
            input_dim, hidden_dim = self.rng.random_integers(1500, size=2)
            x = [self.rng.randn(batch_size, input_dim).astype(np.float32) for _ in xrange(max_input_sequence_len)]
            true_labels = [self.rng.randint(hidden_dim, size=(batch_size, 1)).astype(np.int32) for _ in xrange(max_input_sequence_len)]
            W = self.get_orthogonal_matrix(input_dim, hidden_dim)
            b = self.rng.rand(1, hidden_dim).astype(np.float32)
            device_id = 0

            for reverse in [False, True]:
                for with_bias in [False, True]:
                    qx = List([Connector(Matrix.from_npa(e), device_id) for e in x])
                    qtrue_labels = List([Connector(Matrix.from_npa(e)) for e in true_labels], len(qx))
                    qW = Connector(Matrix.from_npa(W), device_id)
                    qb = Connector(Matrix.from_npa(b), device_id) if with_bias else None
                    seq_dot_block = SequencerBlock(block_class=DotBlock,
                                                   params=[qW, qb],
                                                   sequences=[qx],
                                                   output_names=['output'],
                                                   reverse=reverse)
                    seq_sce_block = SequencerBlock(block_class=SoftmaxCeBlock,
                                                   params=[],
                                                   sequences=[seq_dot_block.output, qtrue_labels],
                                                   reverse=reverse)
                    qx.length = sequence_len
                    qx.fprop()
                    qtrue_labels.fprop()
                    qW.fprop()
                    if qb:
                        qb.fprop()
                    seq_dot_block.fprop()
                    seq_sce_block.fprop()
                    seq_sce_block.bprop()
                    seq_dot_block.bprop()
                    quagga_grads = [qW.backward_matrix.to_host()]
                    if with_bias:
                        quagga_grads.append(qb.backward_matrix.to_host())
                    quagga_grads.append([e.backward_matrix.to_host() for e in qx])

                    seq_dot_layer = SequentialDotLayer(W, b if with_bias else None, reverse)
                    seq_sce_layer = SequentialSoftmaxLayer()
                    th_x = T.ftensor3()
                    th_true_labels = T.imatrix()
                    loss = seq_sce_layer.get_loss(seq_dot_layer.get_output_expr(th_x), th_true_labels)
                    wrt = [seq_dot_layer.W]
                    if with_bias:
                        wrt.append(seq_dot_layer.b)
                    wrt.append(th_x)
                    grads = T.grad(loss, wrt)
                    get_theano_grads = theano.function([th_x, th_true_labels], grads)
                    theano_grads = get_theano_grads(np.dstack(x[:sequence_len]), np.hstack(true_labels[:sequence_len]))

                    for quagga_grad, theano_grad in izip(quagga_grads[:-1], theano_grads[:-1]):
                        r.append(np.allclose(quagga_grad, theano_grad, atol=1e-5))
                    for i in xrange(theano_grads[-1].shape[-1]):
                        if not np.allclose(quagga_grads[-1][i], theano_grads[-1][..., i], atol=1e-5):
                            r.append(False)
                            break
                    else:
                        r.append(True)

        self.assertEqual(sum(r), len(r))
Esempio n. 16
0
    def test_fprop(self):
        """
        compare `fprop` results for cpu and gpu backends
        """

        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(500)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(
                max_input_sequence_len)
            batch_size = self.rng.random_integers(256)
            input_dim, hidden_dim = self.rng.random_integers(1500, size=2)
            x = [
                self.rng.randn(batch_size, input_dim).astype(np.float32)
                for _ in xrange(max_input_sequence_len)
            ]
            mask = (self.rng.rand(batch_size, sequence_len) < 0.8).astype(
                np.float32)
            h_0 = self.rng.randn(batch_size, hidden_dim).astype(np.float32)
            c_0 = self.rng.randn(batch_size, hidden_dim).astype(np.float32)
            W_z = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W_i = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W_f = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W_o = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W = np.hstack((W_z, W_i, W_f, W_o))
            R_z = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R_i = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R_f = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R_o = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R = np.hstack((R_z, R_i, R_f, R_o))

            qh = {}
            for reverse in [False, True]:
                for with_mask in [False, True]:
                    for processor_type in ['gpu', 'cpu']:
                        quagga.processor_type = processor_type
                        context = Context()
                        qx = List([Connector(Matrix.from_npa(e)) for e in x])
                        qmask = Matrix.empty(batch_size, len(qx), 'float')
                        qh_0 = Connector(Matrix.from_npa(h_0))
                        qc_0 = Connector(Matrix.from_npa(c_0))
                        qW = Connector(Matrix.from_npa(W))
                        qR = Connector(Matrix.from_npa(R))
                        sequences = [qx]
                        if with_mask:
                            sequences.append(
                                List([
                                    Connector(qmask[:, i])
                                    for i in xrange(len(qx))
                                ], len(qx)))
                            qmask.assign_npa(context, mask)
                            qmask = sequences[-1]
                        else:
                            sequences.append([None] * len(qx))
                        lstm = SequencerBlock(block_class=LstmBlock,
                                              params=[qW, qR],
                                              sequences=sequences,
                                              output_names=['h'],
                                              prev_names=['c', 'h'],
                                              paddings=[qc_0, qh_0],
                                              reverse=reverse)
                        qx.length = sequence_len
                        if with_mask:
                            qmask.fprop()
                        qx.fprop()
                        qh_0.fprop()
                        qc_0.fprop()
                        qW.fprop()
                        qR.fprop()
                        lstm.fprop()
                        qh[processor_type] = lstm.h.to_host()

                    for h_gpu, h_cpu in izip(qh['gpu'], qh['cpu']):
                        if not np.allclose(h_gpu, h_cpu, rtol=1e-7, atol=1e-3):
                            r.append(False)
                            break
                    else:
                        r.append(True)

        self.assertEqual(sum(r), len(r))
Esempio n. 17
0
    def test_theano_grad(self):
        quagga.processor_type = 'gpu'
        r = []
        for i in xrange(self.N):
            max_input_sequence_len = self.rng.random_integers(300)
            sequence_len = max_input_sequence_len if i == 0 else self.rng.random_integers(
                max_input_sequence_len)
            batch_size = self.rng.random_integers(128)
            input_dim, hidden_dim, class_num = self.rng.random_integers(1500,
                                                                        size=3)

            x = [
                self.rng.randn(batch_size, input_dim).astype(np.float32)
                for _ in xrange(max_input_sequence_len)
            ]
            true_labels = [
                self.rng.randint(class_num,
                                 size=(batch_size, 1)).astype(np.int32)
                for _ in xrange(max_input_sequence_len)
            ]
            mask = (self.rng.rand(batch_size, sequence_len) < 0.8).astype(
                np.float32)
            h_0 = self.rng.randn(batch_size, hidden_dim).astype(np.float32)
            c_0 = self.rng.randn(batch_size, hidden_dim).astype(np.float32)
            W_z = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W_i = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W_f = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W_o = self.get_orthogonal_matrix(input_dim, hidden_dim)
            W = np.hstack((W_z, W_i, W_f, W_o))
            R_z = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R_i = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R_f = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R_o = self.get_orthogonal_matrix(hidden_dim, hidden_dim)
            R = np.hstack((R_z, R_i, R_f, R_o))
            lr_W = self.get_orthogonal_matrix(hidden_dim, class_num)
            lr_b = self.rng.rand(1, class_num).astype(dtype=np.float32)
            device_id = 0

            for reverse in [False, True]:
                for with_mask in [False, True]:
                    for learn_inital_states in [False, True]:
                        # quagga model
                        context = Context()
                        qx = List([
                            Connector(Matrix.from_npa(e), device_id) for e in x
                        ])
                        qtrue_labels = List([
                            Connector(Matrix.from_npa(e)) for e in true_labels
                        ], qx.length)
                        qmask = Matrix.empty(batch_size, qx.length, 'float')
                        qmask_list = [
                            Connector(qmask[:, i]) for i in xrange(qmask.ncols)
                        ]
                        qmask = Connector(qmask)
                        qh_0 = Connector(
                            Matrix.from_npa(h_0),
                            device_id if learn_inital_states else None)
                        qc_0 = Connector(
                            Matrix.from_npa(c_0),
                            device_id if learn_inital_states else None)
                        qW = Connector(Matrix.from_npa(W), device_id)
                        qR = Connector(Matrix.from_npa(R), device_id)
                        qlr_W = Connector(Matrix.from_npa(lr_W), device_id)
                        qlr_b = Connector(Matrix.from_npa(lr_b), device_id)
                        lstm = SequencerBlock(
                            block_class=LstmBlock,
                            params=[qW, qR],
                            sequences=[
                                qx,
                                qmask_list if with_mask else [None] * len(qx)
                            ],
                            output_names=['h'],
                            prev_names=['c', 'h'],
                            paddings=[qc_0, qh_0],
                            reverse=reverse)
                        seq_dot_block = SequencerBlock(block_class=DotBlock,
                                                       params=[qlr_W, qlr_b],
                                                       sequences=[lstm.h],
                                                       output_names=['output'])
                        seq_sce_block = SequencerBlock(
                            block_class=SoftmaxCeBlock,
                            params=[],
                            sequences=[
                                seq_dot_block.output, qtrue_labels,
                                qmask_list if with_mask else [None] * len(qx)
                            ])
                        qx.length = sequence_len
                        for e in qx:
                            e.fprop()
                        for e in qtrue_labels:
                            e.fprop()
                        qmask.assign_npa(context, mask)
                        qmask.fprop()
                        qlr_W.fprop()
                        qlr_b.fprop()
                        qh_0.fprop()
                        qc_0.fprop()
                        qW.fprop()
                        qR.fprop()
                        lstm.fprop()
                        seq_dot_block.fprop()
                        seq_sce_block.fprop()
                        seq_sce_block.bprop()
                        seq_dot_block.bprop()
                        lstm.bprop()
                        quagga_grads = [
                            qlr_b.backward_matrix.to_host(),
                            qlr_W.backward_matrix.to_host(),
                            qW.backward_matrix.to_host(),
                            qR.backward_matrix.to_host()
                        ]
                        if learn_inital_states:
                            quagga_grads.append(qc_0.backward_matrix.to_host())
                            quagga_grads.append(qh_0.backward_matrix.to_host())
                        quagga_grads.append(
                            [e.backward_matrix.to_host() for e in qx])
                        del qx
                        del qlr_b
                        del qlr_W
                        del qW
                        del qR
                        del qmask
                        del lstm
                        del seq_dot_block
                        del seq_sce_block

                        # theano model
                        th_x = T.ftensor3()
                        th_true_labels = T.imatrix()
                        th_mask = T.fmatrix()
                        lstm_layer = LstmLayer(W, R, c_0, h_0, reverse=reverse)
                        th_h = lstm_layer.get_output_expr(
                            th_x, th_mask if with_mask else None)
                        seq_softmax_layer = SequentialSoftmaxLayer(
                            lr_W, lr_b, reverse)
                        loss = seq_softmax_layer.get_loss(
                            th_h, th_true_labels,
                            th_mask if with_mask else None)
                        wrt = [
                            seq_softmax_layer.b, seq_softmax_layer.W,
                            lstm_layer.W, lstm_layer.R
                        ]
                        if learn_inital_states:
                            wrt.append(lstm_layer.c0)
                            wrt.append(lstm_layer.h0)
                        wrt.append(th_x)
                        grads = T.grad(loss, wrt)
                        if with_mask:
                            get_theano_grads = theano.function(
                                [th_x, th_true_labels, th_mask], grads)
                            theano_grads = get_theano_grads(
                                np.dstack(x[:sequence_len]),
                                np.hstack(true_labels[:sequence_len]),
                                mask[:, :sequence_len])
                        else:
                            get_theano_grads = theano.function(
                                [th_x, th_true_labels], grads)
                            theano_grads = get_theano_grads(
                                np.dstack(x[:sequence_len]),
                                np.hstack(true_labels[:sequence_len]))

                        for quagga_grad, theano_grad in izip(
                                quagga_grads[:-1], theano_grads[:-1]):
                            r.append(
                                np.allclose(quagga_grad,
                                            theano_grad,
                                            atol=1e-6))
                        for i in xrange(theano_grads[-1].shape[-1]):
                            if not np.allclose(quagga_grads[-1][i],
                                               theano_grads[-1][..., i],
                                               atol=1e-6):
                                r.append(False)
                                break
                        else:
                            r.append(True)

        self.assertEqual(sum(r), len(r))