def __init__(self, ninp, nhid, nlayers, num_blocks, top_k, use_inactive, blocked_grad, step_att=True, do_gru=False): super(Blocks, self).__init__() self.nhid = nhid self.ninp = ninp self.top_k = top_k self.step_att = step_att self.do_gru = do_gru self.nlayers = nlayers self.num_blocks = num_blocks self.use_inactive = use_inactive self.blocked_grad = blocked_grad print("Number of Layers: ", nlayers) print("Input Dimension: ", ninp) print("Hidden Dimensions: ", nhid) print("Number of Blocks: ", num_blocks) print("Top k Blocks: ", top_k) print('Is the model using inactive blocks for higher representations? ', use_inactive) print('Is the model blocking gradients down inactive blocks? ', blocked_grad) self.bc_lst = [] self.dropout_lst = [] for i in range(nlayers): if i==0: self.bc_lst.append(BlocksCore(ninp, nhid[i], 1, num_blocks[i], top_k[i], True, do_gru=do_gru)) else: self.bc_lst.append(BlocksCore(nhid[i-1], nhid[i], 1, num_blocks[i], top_k[i], True, do_gru=do_gru)) self.bc_lst = nn.ModuleList(self.bc_lst)
def __init__(self, ntokens, nhid, dropout=0.0, num_blocks=4, update_topk=4): super(BlockWrapper, self).__init__() #self.myrnn = rnn_models.RNNModel("LSTM", ntokens, nhid, nhid, # nlayers=1, dropout=dropout, tie_weights=False, # use_cudnn_version=False, use_adaptive_softmax=False, # cutoffs=[10000], discrete_input=False, num_blocks=num_blocks, topk=update_topk, use_gru=True).cuda() self.myrnn = BlocksCore(nhid, num_blocks_in=1, num_blocks_out=num_blocks, topkval=update_topk, step_att=True, do_gru=True) #self.myrnn = nn.GRU(ntokens, nhid) self.nhid = nhid print('using blocks wrapper!')
class BlockWrapper(nn.Module): def __init__(self, ntokens, nhid, dropout=0.0, num_blocks=4, update_topk=4): super(BlockWrapper, self).__init__() #self.myrnn = rnn_models.RNNModel("LSTM", ntokens, nhid, nhid, # nlayers=1, dropout=dropout, tie_weights=False, # use_cudnn_version=False, use_adaptive_softmax=False, # cutoffs=[10000], discrete_input=False, num_blocks=num_blocks, topk=update_topk, use_gru=True).cuda() self.myrnn = BlocksCore(nhid, num_blocks_in=1, num_blocks_out=num_blocks, topkval=update_topk, step_att=True, do_gru=True) #self.myrnn = nn.GRU(ntokens, nhid) self.nhid = nhid print('using blocks wrapper!') def forward(self, inp, h): self.myrnn.blockify_params() hlst = [] h = h[0] for step in range(inp.shape[0]): cx = torch.zeros_like(h) h, cx, mask = self.myrnn(inp[step], h, cx) hlst.append(h) output = torch.stack(hlst) return output, h.unsqueeze(0)
for idx_layer in range(self.nlayers): hx_, cx_, mask = self.bc_lst[idx_layer](inp_use, hx[idx_layer], cx[idx_layer], step) hx_new.append(hx_) cx_new.append(cx_) mask_new.append(mask) if self.use_inactive: if self.blocked_grad: inp_use = blocked_grad.apply(hx_, mask) else: inp_use = hx_ else: if self.blocked_grad: inp_use = blocked_grad.apply((mask)*hx_, mask) else: inp_use = (mask)*hx_ return hx_new, cx_new, mask_new if __name__ == "__main__": bc = BlocksCore(512, 1, 4, 4) inp = torch.randn(10, 512) hx = torch.randn(10,512) cx = torch.randn(10,512) hx, cx = bc(inp, hx, cx) print('hx cx shape', hx.shape, cx.shape)
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, tie_weights=False, use_cudnn_version=True, use_adaptive_softmax=False, cutoffs=None, discrete_input=True, num_blocks=[6], topk=[4], do_gru=False, use_inactive=False, blocked_grad=False, layer_dilation=-1, block_dilation=-1, num_modules_read_input=2): super(RNNModel, self).__init__() self.topk = topk print('Top k Blocks: ', topk) self.use_cudnn_version = use_cudnn_version self.drop = nn.Dropout(dropout) print('Number of Inputs, ninp: ', ninp) if discrete_input: self.encoder = nn.Embedding(ntoken, ninp) else: self.encoder = nn.Linear(ntoken, ninp) self.num_blocks = num_blocks print('Number of Blocks: ', self.num_blocks) self.nhid = nhid print('Dimensions of Hidden Layers: ', nhid) self.discrete_input = discrete_input self.sigmoid = nn.Sigmoid() self.sm = nn.Softmax(dim=1) self.use_inactive = use_inactive self.blocked_grad = blocked_grad print( 'Is the model using inactive blocks for higher representations? ', use_inactive) if layer_dilation == -1: self.layer_dilation = [1] * nlayers else: self.layer_dilation = layer_dilation if block_dilation == -1: self.block_dilation = [1] * nlayers else: self.block_dilation = block_dilation num_blocks_in = [1 for i in topk] self.bc_lst = [] self.dropout_lst = [] print("Dropout rate", dropout) for i in range(nlayers): if i == 0: self.bc_lst.append( BlocksCore(ninp, nhid[i], num_blocks_in[i], num_blocks[i], topk[i], True, do_gru=do_gru, num_modules_read_input=num_modules_read_input)) else: self.bc_lst.append( BlocksCore(nhid[i - 1], nhid[i], num_blocks_in[i], num_blocks[i], topk[i], True, do_gru=do_gru, num_modules_read_input=num_modules_read_input)) for i in range(nlayers - 1): self.dropout_lst.append(nn.Dropout(dropout)) self.bc_lst = nn.ModuleList(self.bc_lst) self.dropout_lst = nn.ModuleList(self.dropout_lst) if True: self.use_adaptive_softmax = use_adaptive_softmax self.decoder = nn.Linear(nhid[-1], ntoken) if tie_weights: if nhid[-1] != ninp: raise ValueError( 'When using the tied flag, nhid must be equal to emsize' ) else: self.decoder.weight = self.encoder.weight self.init_weights() self.rnn_type = rnn_type self.nhid = nhid self.nlayers = nlayers
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers=2, dropout=0.5, tie_weights=False, use_cudnn_version=True, use_adaptive_softmax=False, cutoffs=None, discrete_input=True, num_blocks=[6], topk=[4], do_gru=False, use_inactive=False, lstm_layers=1, block_layers=1, blocked_grad=False): super(RNNModel, self).__init__() self.topk = topk print('Top k Blocks: ', topk) self.use_cudnn_version = use_cudnn_version self.drop = nn.Dropout(dropout) print('Number of Inputs, ninp: ', ninp) if discrete_input: self.encoder = nn.Embedding(ntoken, ninp) else: self.encoder = nn.Linear(ntoken, ninp) self.num_blocks = num_blocks self.nhid = nhid print('Number of Blocks: ', self.num_blocks) self.discrete_input = discrete_input self.sigmoid = nn.Sigmoid() self.sm = nn.Softmax(dim=1) self.use_inactive = use_inactive self.blocked_grad = blocked_grad nhid = nhid[0] print( 'Is the model using inactive blocks for higher representations? ', use_inactive) num_blocks_in = [1 for i in topk] self.lstm_layers = lstm_layers self.block_layers = block_layers self.bc_lst = [] self.dropout_lst = [] print("Dropout rate", dropout) for i in range(lstm_layers): self.bc_lst.append(getattr(nn, 'LSTMCell')(ninp, nhid)) for i in range(block_layers): self.bc_lst.append( BlocksCore(nhid, nhid, num_blocks_in[i], num_blocks[i], topk[i], True, do_gru=do_gru)) for i in range(nlayers - 1): self.dropout_lst.append(nn.Dropout(dropout)) self.bc_lst = nn.ModuleList(self.bc_lst) self.dropout_lst = nn.ModuleList(self.dropout_lst) if True: self.use_adaptive_softmax = use_adaptive_softmax self.decoder = nn.Linear(nhid, ntoken) if tie_weights: print('tying weights!') if nhid != ninp: raise ValueError( 'When using the tied flag, nhid must be equal to emsize' ) self.decoder.weight = self.encoder.weight self.init_weights() self.rnn_type = rnn_type self.nhid = nhid self.nlayers = nlayers