Ejemplo n.º 1
0
def benchmark_chainer_sru(batchsize, seq_length, feature_dimension, repeat=50):
    layer = SRU(feature_dimension)
    x_data = np.random.normal(0,
                              1,
                              size=(batchsize, feature_dimension,
                                    seq_length)).astype(np.float32)
    x_data = cuda.to_gpu(x_data)
    layer.to_gpu()

    with chainer.no_backprop_mode() and chainer.using_config("train", False):
        # forward
        start_time = time.time()
        for i in range(repeat):
            output, cell, last_cell = layer(x_data, None)
        forward_time_mean = (time.time() - start_time) / repeat

    with chainer.using_config("train", True):
        # backward
        start_time = time.time()
        for i in range(repeat):
            output, cell, last_cell = layer(x_data, None)
            layer.cleargrads()
            functions.sum(output).backward()
        backward_time_mean = (time.time() - start_time) / repeat

    return forward_time_mean, backward_time_mean
Ejemplo n.º 2
0
def check_dropout_forward(batchsize, feature_dimension, seq_length, use_tanh):
    x_cpu_data = np.random.normal(
        0, 1, size=(batchsize, feature_dimension, seq_length * 3)).astype(
            np.float32) * 10
    x_gpu_data = cuda.to_gpu(x_cpu_data, gpu_device)

    with chainer.using_config("train", True):
        # get true output
        layer = SRU(feature_dimension,
                    feature_dimension,
                    use_tanh=use_tanh,
                    dropout=0.5)
        mask_x = layer.generate_dropout_mask(x_cpu_data)
        output_true, cell_true, last_cell_true = autograd(
            x_cpu_data[..., :seq_length], layer.W, layer.B, None,
            layer.use_tanh, mask_x)
        output_true, cell_true, last_cell_true = autograd(
            x_cpu_data[..., seq_length:seq_length * 2], layer.W, layer.B,
            last_cell_true, layer.use_tanh, mask_x)
        output_true, cell_true, last_cell_true = autograd(
            x_cpu_data[..., seq_length * 2:], layer.W, layer.B, last_cell_true,
            layer.use_tanh, mask_x)

        # get cuda output
        layer.to_gpu(gpu_device)
        output, cell, last_cell = layer(x_gpu_data[..., :seq_length], None,
                                        cuda.to_gpu(mask_x))
        output, cell, last_cell = layer(
            x_gpu_data[..., seq_length:seq_length * 2], last_cell,
            cuda.to_gpu(mask_x))
        output, cell, last_cell = layer(x_gpu_data[..., seq_length * 2:],
                                        last_cell, cuda.to_gpu(mask_x))

    threshold = 1e-5
    assert (xp.mean(abs(output_true.data - cuda.to_cpu(output.data))) <=
            threshold), xp.mean(
                abs(output_true.data - cuda.to_cpu(output.data)))
    assert (xp.mean(abs(cell_true.data - cuda.to_cpu(cell.data))) <=
            threshold), xp.mean(abs(cell_true.data - cuda.to_cpu(cell.data)))
    assert (xp.mean(abs(last_cell_true.data - cuda.to_cpu(last_cell.data))) <=
            threshold), xp.mean(
                abs(last_cell_true.data - cuda.to_cpu(last_cell.data)))
Ejemplo n.º 3
0
def check_outputs():
    seq_length = 50
    batchsize = 48
    feature_dimension = 128
    data_cpu = np.random.normal(0,
                                1,
                                size=(batchsize, feature_dimension,
                                      seq_length)).astype(np.float32)
    data_gpu = cuda.to_gpu(data_cpu, gpu_device)

    # CPU
    layer = SRU(feature_dimension, feature_dimension)
    h_cpu, c_cpu = layer(data_cpu)
    layer.reset_state()

    # GPU
    layer.to_gpu(gpu_device)
    h_gpu, c_gpu = layer(data_gpu)
    layer.reset_state()

    print(np.mean(abs(c_cpu.data - cuda.to_cpu(c_gpu.data))))
    print(np.mean(abs(h_cpu.data - cuda.to_cpu(h_gpu.data))))
Ejemplo n.º 4
0
def _profile():
    seq_length = 50
    batchsize = 48
    feature_dimension = 128
    data_cpu = np.random.normal(0,
                                1,
                                size=(batchsize, feature_dimension,
                                      seq_length)).astype(np.float32)
    data_gpu = cuda.to_gpu(data_cpu, gpu_device)

    # CPU
    layer = SRU(feature_dimension, feature_dimension)
    for _ in range(100):
        h_cpu, c_cpu = layer(data_cpu)
        layer.reset_state()

    # GPU (define-by-run)
    layer = NaiveSRU(feature_dimension, feature_dimension)
    layer.to_gpu(gpu_device)
    for _ in range(100):
        h, c = layer(data_gpu)
        layer.reset_state()

    # GPU (CUDA Kernel)
    layer = SRU(feature_dimension, feature_dimension)
    layer.to_gpu(gpu_device)
    for _ in range(100):
        h_gpu, c_gpu = layer(data_gpu)
        layer.reset_state()

    # GPU (PyTorch)
    with torch.cuda.device(gpu_device):
        from cuda_functional import SRU as PyTorchSRU
        data_gpu_torch = torch.FloatTensor(seq_length, batchsize,
                                           feature_dimension).cuda()
        rnn = PyTorchSRU(128,
                         128,
                         num_layers=1,
                         dropout=0.0,
                         rnn_dropout=0.0,
                         use_tanh=0,
                         bidirectional=False)
        rnn.cuda()
        for _ in range(100):
            output, hidden = rnn(torch.autograd.Variable(data_gpu_torch))

    # LSTM (Chainer)
    layer = links.LSTM(feature_dimension, feature_dimension)
    layer.to_gpu(gpu_device)
    for _ in range(100):
        for t in range(seq_length):
            h = layer(data_gpu[..., t])
        layer.reset_state()

    print(h_cpu)
    print(h_gpu)
Ejemplo n.º 5
0
def check_dropout_backward(batchsize, feature_dimension, seq_length, use_tanh):
    x_cpu_data = np.random.normal(
        0, 1, size=(batchsize, feature_dimension, seq_length * 3)).astype(
            np.float32) * 10
    x_gpu_data = cuda.to_gpu(x_cpu_data, gpu_device)
    x_cpu = chainer.Variable(x_cpu_data)
    x_gpu = chainer.Variable(x_gpu_data)

    with chainer.using_config("train", True):
        # get true output
        layer = SRU(feature_dimension, use_tanh=use_tanh, dropout=0.5)
        mask_x = layer.generate_dropout_mask(x_cpu_data)
        output_true, cell_true, last_cell_true = autograd(
            x_cpu[..., :seq_length], layer.W, layer.B, None, layer.use_tanh,
            mask_x)
        output_true, cell_true, last_cell_true = autograd(
            x_cpu[..., seq_length:seq_length * 2], layer.W, layer.B,
            last_cell_true, layer.use_tanh, mask_x)
        output_true, cell_true, last_cell_true = autograd(
            x_cpu[..., seq_length * 2:], layer.W, layer.B, last_cell_true,
            layer.use_tanh, mask_x)

        layer.cleargrads()
        functions.sum(output_true).backward()

        b_grad_true = layer.B.grad.copy()
        w_grad_true = layer.W.grad.copy()
        x_grad_true = x_cpu.grad.copy()

        # print("last_cell_true")
        # print(last_cell_true)

        layer.to_gpu(gpu_device)
        output, cell, last_cell = layer(x_gpu[..., :seq_length], None,
                                        cuda.to_gpu(mask_x))
        output, cell, last_cell = layer(x_gpu[..., seq_length:seq_length * 2],
                                        last_cell, cuda.to_gpu(mask_x))
        output, cell, last_cell = layer(x_gpu[..., seq_length * 2:], last_cell,
                                        cuda.to_gpu(mask_x))

        # print(np.mean(abs(output_true.data - cuda.to_cpu(output.data))))
        # print(np.mean(abs(cell_true.data - cuda.to_cpu(cell.data))))

        layer.cleargrads()
        functions.sum(output).backward()
    # print("last_cell")
    # print(last_cell)

    # print("layer.W.data")
    # print(layer.W.data)

    # print("b_grad")
    # print(b_grad)
    # print("b_grad")
    # print(layer.B.grad)

    # print("w_grad")
    # print(w_grad)
    # print("w_grad")
    # print(layer.W.grad)

    # print("x_grad")
    # print(x_cpu.grad)
    # print("x_grad")
    # print(x_gpu.grad)

    threshold = 1e-3
    assert (xp.mean(abs(b_grad_true - cuda.to_cpu(layer.B.grad))) <=
            threshold), xp.mean(abs(b_grad_true - cuda.to_cpu(layer.B.grad)))
    assert (xp.mean(abs(w_grad_true - cuda.to_cpu(layer.W.grad))) <=
            threshold), xp.mean(abs(w_grad_true - cuda.to_cpu(layer.W.grad)))
    assert (xp.mean(abs(x_grad_true - cuda.to_cpu(x_gpu.grad))) <=
            threshold), xp.mean(abs(x_grad_true - cuda.to_cpu(x_gpu.grad)))