class SRUTest(Benchmark): default_params = dict() params = make_params(use_kernel=over(False, True)) def prepare(self, p): # input has length 20, batch size 32 and dimension 128 self.x = Variable(torch.rand(20, 32, 128).cuda()) input_size, hidden_size = 128, 128 self.rnn = SRU( input_size, hidden_size, num_layers=2, # number of stacking RNN layers dropout=0.00001, # dropout applied between RNN layers rnn_dropout= 0.0001, # variational dropout applied on linear transformation use_tanh=1, # use tanh? use_relu=0, # use ReLU? bidirectional=False, # bidirectional RNN ? use_kernel=p.use_kernel, ) self.rnn.cuda() def time_sru(self, p): output, hidden = self.rnn(self.x) # forward pass
def run_sru(cpu=0, gpu=0, jit=False, use_kernel=False, backward=False, warmup=10, benchmark=20): assert not (jit and use_kernel) benchmark_init(0, 0, True) # input has length 20, batch size 32 and dimension 128 x = Variable(torch.rand(20, 32, 128).cuda()) input_size, hidden_size = 128, 128 rnn = SRU( input_size, hidden_size, num_layers=2, # number of stacking RNN layers dropout=0.00001, # dropout applied between RNN layers rnn_dropout= 0.0001, # variational dropout applied on linear transformation use_tanh=1, # use tanh? use_relu=0, # use ReLU? bidirectional=False, # bidirectional RNN ? use_kernel=use_kernel, jit=jit, ) rnn.cuda() kernel_tag = '_kernel' if use_kernel else '' backward_tag = '_training' if backward else '_forward' jit_tag = '_jit' if jit else '' name = 'sru{}{}{}'.format(backward_tag, kernel_tag, jit_tag) iter_timer = Bench(cuda=True, name=name, warmup_iters=warmup) for _ in range(warmup + benchmark): gc.collect() with iter_timer: output, hidden = rnn(x) # forward pass if backward: output.sum().backward() # output is (length, batch size, hidden size * number of directions) # hidden is (layers, batch size, hidden size * number of directions) return iter_timer
if torch.cuda.is_available(): torch.cuda.manual_seed(1) x0 = np.random.random_sample((2, 4, 6)) x = Variable(torch.FloatTensor(x0)) input_size, hidden_size = 6, 8 rnn1 = CuSRU(input_size, hidden_size, num_layers=2, bidirectional=True) states = rnn1.state_dict() o1, c1 = rnn1(x) rnn2 = CuSRU(input_size, hidden_size, num_layers=2, bidirectional=True) rnn2.load_state_dict(states) if torch.cuda.is_available(): rnn2.cuda() x2 = x.cuda() else: x2 = x o2, c2 = rnn2(x2) rnn3 = SRU(input_size, hidden_size, num_layers=2, bidirectional=True) rnn3.load_state_dict(states) o3, c3 = rnn3(x) rnn4 = SRU(input_size, hidden_size, num_layers=2, bidirectional=True) rnn4.load_state_dict(states) if torch.cuda.is_available(): rnn4.cuda() x4 = x.cuda() else: