def test_rnn_n(): T.manual_seed(1111) input_size = 100 hidden_size = 100 rnn_type = 'gru' num_layers = 3 num_hidden_layers = 5 dropout = 0.2 nr_cells = 200 cell_size = 17 read_heads = 2 sparse_reads = 4 temporal_reads = 3 gpu_id = -1 debug = True lr = 0.001 sequence_max_length = 10 batch_size = 10 cuda = gpu_id clip = 20 length = 13 rnn = SDNC(input_size=input_size, hidden_size=hidden_size, rnn_type=rnn_type, num_layers=num_layers, num_hidden_layers=num_hidden_layers, dropout=dropout, nr_cells=nr_cells, cell_size=cell_size, read_heads=read_heads, sparse_reads=sparse_reads, temporal_reads=temporal_reads, gpu_id=gpu_id, debug=debug) optimizer = optim.Adam(rnn.parameters(), lr=lr) optimizer.zero_grad() input_data, target_output = generate_data(batch_size, length, input_size, cuda) target_output = target_output.transpose(0, 1).contiguous() output, (chx, mhx, rv), v = rnn(input_data, None) output = output.transpose(0, 1) loss = criterion((output), target_output) loss.backward() T.nn.utils.clip_grad_norm_(rnn.parameters(), clip) optimizer.step() assert target_output.size() == T.Size([27, 10, 100]) assert chx[0].size() == T.Size([num_hidden_layers, 10, 100]) # assert mhx['memory'].size() == T.Size([10,12,17]) assert rv.size() == T.Size([10, 34])
nr_cells=mem_slot, cell_size=mem_size, read_heads=read_heads, gpu_id=args.cuda, debug=args.visdom, batch_first=True, independent_linears=True) elif args.memory_type == 'sdnc': rnn = SDNC(input_size=args.input_size, hidden_size=args.nhid, rnn_type=args.rnn_type, num_layers=args.nlayer, num_hidden_layers=args.nhlayer, dropout=args.dropout, nr_cells=mem_slot, cell_size=mem_size, sparse_reads=args.sparse_reads, temporal_reads=args.temporal_reads, read_heads=args.read_heads, gpu_id=args.cuda, debug=args.visdom, batch_first=True, independent_linears=False) elif args.memory_type == 'sam': rnn = SAM(input_size=args.input_size, hidden_size=args.nhid, rnn_type=args.rnn_type, num_layers=args.nlayer, num_hidden_layers=args.nhlayer, dropout=args.dropout, nr_cells=mem_slot,
def test_rnn_no_memory_pass(): T.manual_seed(1111) input_size = 100 hidden_size = 100 rnn_type = 'gru' num_layers = 3 num_hidden_layers = 5 dropout = 0.2 nr_cells = 5000 cell_size = 17 sparse_reads = 3 temporal_reads = 4 gpu_id = -1 debug = True lr = 0.001 sequence_max_length = 10 batch_size = 10 cuda = gpu_id clip = 20 length = 13 rnn = SDNC(input_size=input_size, hidden_size=hidden_size, rnn_type=rnn_type, num_layers=num_layers, num_hidden_layers=num_hidden_layers, dropout=dropout, nr_cells=nr_cells, cell_size=cell_size, sparse_reads=sparse_reads, temporal_reads=temporal_reads, gpu_id=gpu_id, debug=debug) optimizer = optim.Adam(rnn.parameters(), lr=lr) optimizer.zero_grad() input_data, target_output = generate_data(batch_size, length, input_size, cuda) target_output = target_output.transpose(0, 1).contiguous() (chx, mhx, rv) = (None, None, None) outputs = [] for x in range(6): output, (chx, mhx, rv), v = rnn(input_data, (chx, mhx, rv), pass_through_memory=False) output = output.transpose(0, 1) outputs.append(output) output = functools.reduce(lambda x, y: x + y, outputs) loss = criterion((output), target_output) loss.backward() T.nn.utils.clip_grad_norm_(rnn.parameters(), clip) optimizer.step() assert target_output.size() == T.Size([27, 10, 100]) assert chx[0].size() == T.Size([num_hidden_layers, 10, 100]) # assert mhx['memory'].size() == T.Size([10,12,17]) assert rv == None
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, nhlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False, nr_cells=5, read_heads=2, sparse_reads=10, cell_size=10, gpu_id=-1, independent_linears=False, debug=True): super(RNNModel, self).__init__() self.lockdrop = LockedDropout() self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) self.debug = debug assert rnn_type in ['LSTM', 'QRNN', 'DNC', 'SDNC'], 'RNN type is not supported' if rnn_type == 'LSTM': self.rnns = [ torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else ninp, 1, dropout=0) for l in range(nlayers) ] if wdrop: self.rnns = [ WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns ] elif rnn_type == 'QRNN': from torchqrnn import QRNNLayer self.rnns = [ QRNNLayer(input_size=ninp if l == 0 else nhid, hidden_size=nhid if l != nlayers - 1 else ninp, save_prev_x=True, zoneout=0, window=2 if l == 0 else 1, output_gate=True if l != nlayers - 1 else True) for l in range(nlayers) ] for rnn in self.rnns: rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop) elif rnn_type.lower() == 'sdnc': self.rnns = [] self.rnns.append( SDNC(input_size=ninp, hidden_size=nhid, num_layers=nlayers, num_hidden_layers=nhlayers, rnn_type='lstm', nr_cells=nr_cells, read_heads=read_heads, sparse_reads=sparse_reads, cell_size=cell_size, gpu_id=gpu_id, independent_linears=independent_linears, debug=debug, dropout=0)) elif rnn_type.lower() == 'dnc': self.rnns = [] self.rnns.append( DNC(input_size=ninp, hidden_size=nhid, num_layers=nlayers, num_hidden_layers=nhlayers, rnn_type='lstm', nr_cells=nr_cells, read_heads=read_heads, cell_size=cell_size, gpu_id=gpu_id, independent_linears=independent_linears, debug=debug, dropout=wdrop)) print(self.rnns) self.rnns = torch.nn.ModuleList(self.rnns) self.decoder = nn.Linear(ninp, ntoken) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: #if nhid != ninp: # raise ValueError('When using the tied flag, nhid must be equal to emsize') self.decoder.weight = self.encoder.weight self.init_weights() self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute