Ejemplo n.º 1
0
    def __init__(self, ninp, nhid, nlayers, num_blocks, top_k, use_inactive, blocked_grad, step_att=True, do_gru=False): 
        super(Blocks, self).__init__()
        self.nhid = nhid
        self.ninp = ninp
        self.top_k = top_k
        self.step_att = step_att
        self.do_gru = do_gru
        self.nlayers = nlayers
        self.num_blocks = num_blocks
        self.use_inactive = use_inactive
        self.blocked_grad = blocked_grad

        print("Number of Layers: ", nlayers)
        print("Input Dimension: ", ninp)
        print("Hidden Dimensions: ", nhid)
        print("Number of Blocks: ", num_blocks)
        print("Top k Blocks: ", top_k)
        print('Is the model using inactive blocks for higher representations? ', use_inactive)
        print('Is the model blocking gradients down inactive blocks? ', blocked_grad)

        self.bc_lst = []
        self.dropout_lst = []

        for i in range(nlayers):
            if i==0:
                self.bc_lst.append(BlocksCore(ninp, nhid[i], 1, num_blocks[i], top_k[i], True, do_gru=do_gru))
            else:
                self.bc_lst.append(BlocksCore(nhid[i-1], nhid[i], 1, num_blocks[i], top_k[i], True, do_gru=do_gru))

        self.bc_lst = nn.ModuleList(self.bc_lst)
Ejemplo n.º 2
0
    def __init__(self,
                 ntokens,
                 nhid,
                 dropout=0.0,
                 num_blocks=4,
                 update_topk=4):
        super(BlockWrapper, self).__init__()
        #self.myrnn = rnn_models.RNNModel("LSTM", ntokens, nhid, nhid,
        #                    nlayers=1, dropout=dropout, tie_weights=False,
        #                    use_cudnn_version=False, use_adaptive_softmax=False,
        #                    cutoffs=[10000], discrete_input=False, num_blocks=num_blocks, topk=update_topk, use_gru=True).cuda()

        self.myrnn = BlocksCore(nhid,
                                num_blocks_in=1,
                                num_blocks_out=num_blocks,
                                topkval=update_topk,
                                step_att=True,
                                do_gru=True)

        #self.myrnn = nn.GRU(ntokens, nhid)
        self.nhid = nhid

        print('using blocks wrapper!')
Ejemplo n.º 3
0
class BlockWrapper(nn.Module):
    def __init__(self,
                 ntokens,
                 nhid,
                 dropout=0.0,
                 num_blocks=4,
                 update_topk=4):
        super(BlockWrapper, self).__init__()
        #self.myrnn = rnn_models.RNNModel("LSTM", ntokens, nhid, nhid,
        #                    nlayers=1, dropout=dropout, tie_weights=False,
        #                    use_cudnn_version=False, use_adaptive_softmax=False,
        #                    cutoffs=[10000], discrete_input=False, num_blocks=num_blocks, topk=update_topk, use_gru=True).cuda()

        self.myrnn = BlocksCore(nhid,
                                num_blocks_in=1,
                                num_blocks_out=num_blocks,
                                topkval=update_topk,
                                step_att=True,
                                do_gru=True)

        #self.myrnn = nn.GRU(ntokens, nhid)
        self.nhid = nhid

        print('using blocks wrapper!')

    def forward(self, inp, h):
        self.myrnn.blockify_params()
        hlst = []
        h = h[0]
        for step in range(inp.shape[0]):
            cx = torch.zeros_like(h)
            h, cx, mask = self.myrnn(inp[step], h, cx)
            hlst.append(h)
        output = torch.stack(hlst)

        return output, h.unsqueeze(0)
Ejemplo n.º 4
0
        for idx_layer in range(self.nlayers):
            hx_, cx_, mask = self.bc_lst[idx_layer](inp_use, hx[idx_layer], cx[idx_layer], step)

            hx_new.append(hx_)
            cx_new.append(cx_)
            mask_new.append(mask)

            if self.use_inactive:
                if self.blocked_grad:
                    inp_use = blocked_grad.apply(hx_, mask)
                else:
                    inp_use = hx_
            else:
                if self.blocked_grad:
                    inp_use = blocked_grad.apply((mask)*hx_, mask)
                else:
                    inp_use = (mask)*hx_

        return hx_new, cx_new, mask_new


if __name__ == "__main__":
    bc = BlocksCore(512, 1, 4, 4)

    inp = torch.randn(10, 512)
    hx = torch.randn(10,512)
    cx = torch.randn(10,512)

    hx, cx = bc(inp, hx, cx)

    print('hx cx shape', hx.shape, cx.shape)
Ejemplo n.º 5
0
    def __init__(self,
                 rnn_type,
                 ntoken,
                 ninp,
                 nhid,
                 nlayers,
                 dropout=0.5,
                 tie_weights=False,
                 use_cudnn_version=True,
                 use_adaptive_softmax=False,
                 cutoffs=None,
                 discrete_input=True,
                 num_blocks=[6],
                 topk=[4],
                 do_gru=False,
                 use_inactive=False,
                 blocked_grad=False,
                 layer_dilation=-1,
                 block_dilation=-1,
                 num_modules_read_input=2):

        super(RNNModel, self).__init__()

        self.topk = topk
        print('Top k Blocks: ', topk)

        self.use_cudnn_version = use_cudnn_version
        self.drop = nn.Dropout(dropout)

        print('Number of Inputs, ninp: ', ninp)
        if discrete_input:
            self.encoder = nn.Embedding(ntoken, ninp)
        else:
            self.encoder = nn.Linear(ntoken, ninp)

        self.num_blocks = num_blocks
        print('Number of Blocks: ', self.num_blocks)

        self.nhid = nhid
        print('Dimensions of Hidden Layers: ', nhid)

        self.discrete_input = discrete_input
        self.sigmoid = nn.Sigmoid()
        self.sm = nn.Softmax(dim=1)

        self.use_inactive = use_inactive
        self.blocked_grad = blocked_grad
        print(
            'Is the model using inactive blocks for higher representations? ',
            use_inactive)

        if layer_dilation == -1:
            self.layer_dilation = [1] * nlayers
        else:
            self.layer_dilation = layer_dilation

        if block_dilation == -1:
            self.block_dilation = [1] * nlayers
        else:
            self.block_dilation = block_dilation

        num_blocks_in = [1 for i in topk]

        self.bc_lst = []
        self.dropout_lst = []

        print("Dropout rate", dropout)

        for i in range(nlayers):
            if i == 0:
                self.bc_lst.append(
                    BlocksCore(ninp,
                               nhid[i],
                               num_blocks_in[i],
                               num_blocks[i],
                               topk[i],
                               True,
                               do_gru=do_gru,
                               num_modules_read_input=num_modules_read_input))
            else:
                self.bc_lst.append(
                    BlocksCore(nhid[i - 1],
                               nhid[i],
                               num_blocks_in[i],
                               num_blocks[i],
                               topk[i],
                               True,
                               do_gru=do_gru,
                               num_modules_read_input=num_modules_read_input))
        for i in range(nlayers - 1):
            self.dropout_lst.append(nn.Dropout(dropout))

        self.bc_lst = nn.ModuleList(self.bc_lst)
        self.dropout_lst = nn.ModuleList(self.dropout_lst)

        if True:
            self.use_adaptive_softmax = use_adaptive_softmax
            self.decoder = nn.Linear(nhid[-1], ntoken)
            if tie_weights:
                if nhid[-1] != ninp:
                    raise ValueError(
                        'When using the tied flag, nhid must be equal to emsize'
                    )
                else:
                    self.decoder.weight = self.encoder.weight

        self.init_weights()

        self.rnn_type = rnn_type
        self.nhid = nhid
        self.nlayers = nlayers
Ejemplo n.º 6
0
    def __init__(self,
                 rnn_type,
                 ntoken,
                 ninp,
                 nhid,
                 nlayers=2,
                 dropout=0.5,
                 tie_weights=False,
                 use_cudnn_version=True,
                 use_adaptive_softmax=False,
                 cutoffs=None,
                 discrete_input=True,
                 num_blocks=[6],
                 topk=[4],
                 do_gru=False,
                 use_inactive=False,
                 lstm_layers=1,
                 block_layers=1,
                 blocked_grad=False):
        super(RNNModel, self).__init__()
        self.topk = topk
        print('Top k Blocks: ', topk)
        self.use_cudnn_version = use_cudnn_version
        self.drop = nn.Dropout(dropout)
        print('Number of Inputs, ninp: ', ninp)
        if discrete_input:
            self.encoder = nn.Embedding(ntoken, ninp)
        else:
            self.encoder = nn.Linear(ntoken, ninp)
        self.num_blocks = num_blocks
        self.nhid = nhid
        print('Number of Blocks: ', self.num_blocks)
        self.discrete_input = discrete_input
        self.sigmoid = nn.Sigmoid()
        self.sm = nn.Softmax(dim=1)
        self.use_inactive = use_inactive
        self.blocked_grad = blocked_grad
        nhid = nhid[0]
        print(
            'Is the model using inactive blocks for higher representations? ',
            use_inactive)

        num_blocks_in = [1 for i in topk]
        self.lstm_layers = lstm_layers
        self.block_layers = block_layers

        self.bc_lst = []
        self.dropout_lst = []

        print("Dropout rate", dropout)

        for i in range(lstm_layers):
            self.bc_lst.append(getattr(nn, 'LSTMCell')(ninp, nhid))

        for i in range(block_layers):
            self.bc_lst.append(
                BlocksCore(nhid,
                           nhid,
                           num_blocks_in[i],
                           num_blocks[i],
                           topk[i],
                           True,
                           do_gru=do_gru))

        for i in range(nlayers - 1):
            self.dropout_lst.append(nn.Dropout(dropout))

        self.bc_lst = nn.ModuleList(self.bc_lst)
        self.dropout_lst = nn.ModuleList(self.dropout_lst)

        if True:
            self.use_adaptive_softmax = use_adaptive_softmax
            self.decoder = nn.Linear(nhid, ntoken)
            if tie_weights:
                print('tying weights!')
                if nhid != ninp:
                    raise ValueError(
                        'When using the tied flag, nhid must be equal to emsize'
                    )
                self.decoder.weight = self.encoder.weight

        self.init_weights()

        self.rnn_type = rnn_type
        self.nhid = nhid
        self.nlayers = nlayers