Ejemplo n.º 1
0
class SRUTest(Benchmark):
    default_params = dict()
    params = make_params(use_kernel=over(False, True))

    def prepare(self, p):
        # input has length 20, batch size 32 and dimension 128
        self.x = Variable(torch.rand(20, 32, 128).cuda())
        input_size, hidden_size = 128, 128

        self.rnn = SRU(
            input_size,
            hidden_size,
            num_layers=2,  # number of stacking RNN layers
            dropout=0.00001,  # dropout applied between RNN layers
            rnn_dropout=
            0.0001,  # variational dropout applied on linear transformation
            use_tanh=1,  # use tanh?
            use_relu=0,  # use ReLU?
            bidirectional=False,  # bidirectional RNN ?
            use_kernel=p.use_kernel,
        )
        self.rnn.cuda()

    def time_sru(self, p):
        output, hidden = self.rnn(self.x)  # forward pass
Ejemplo n.º 2
0
def run_sru(cpu=0,
            gpu=0,
            jit=False,
            use_kernel=False,
            backward=False,
            warmup=10,
            benchmark=20):
    assert not (jit and use_kernel)
    benchmark_init(0, 0, True)

    # input has length 20, batch size 32 and dimension 128
    x = Variable(torch.rand(20, 32, 128).cuda())
    input_size, hidden_size = 128, 128

    rnn = SRU(
        input_size,
        hidden_size,
        num_layers=2,  # number of stacking RNN layers
        dropout=0.00001,  # dropout applied between RNN layers
        rnn_dropout=
        0.0001,  # variational dropout applied on linear transformation
        use_tanh=1,  # use tanh?
        use_relu=0,  # use ReLU?
        bidirectional=False,  # bidirectional RNN ?
        use_kernel=use_kernel,
        jit=jit,
    )
    rnn.cuda()

    kernel_tag = '_kernel' if use_kernel else ''
    backward_tag = '_training' if backward else '_forward'
    jit_tag = '_jit' if jit else ''
    name = 'sru{}{}{}'.format(backward_tag, kernel_tag, jit_tag)
    iter_timer = Bench(cuda=True, name=name, warmup_iters=warmup)

    for _ in range(warmup + benchmark):
        gc.collect()
        with iter_timer:
            output, hidden = rnn(x)  # forward pass
            if backward:
                output.sum().backward()
        # output is (length, batch size, hidden size * number of directions)
        # hidden is (layers, batch size, hidden size * number of directions)
    return iter_timer
Ejemplo n.º 3
0
if torch.cuda.is_available():
    torch.cuda.manual_seed(1)

x0 = np.random.random_sample((2, 4, 6))
x = Variable(torch.FloatTensor(x0))

input_size, hidden_size = 6, 8

rnn1 = CuSRU(input_size, hidden_size, num_layers=2, bidirectional=True)
states = rnn1.state_dict()
o1, c1 = rnn1(x)

rnn2 = CuSRU(input_size, hidden_size, num_layers=2, bidirectional=True)
rnn2.load_state_dict(states)
if torch.cuda.is_available():
    rnn2.cuda()
    x2 = x.cuda()
else:
    x2 = x
o2, c2 = rnn2(x2)

rnn3 = SRU(input_size, hidden_size, num_layers=2, bidirectional=True)
rnn3.load_state_dict(states)
o3, c3 = rnn3(x)

rnn4 = SRU(input_size, hidden_size, num_layers=2, bidirectional=True)
rnn4.load_state_dict(states)
if torch.cuda.is_available():
    rnn4.cuda()
    x4 = x.cuda()
else: