Beispiel #1
0
    def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False, alpha=2, beta=1, bsz=20):
        super(RNNModel, self).__init__()
        self.bsz = bsz
        self.ntoken = ntoken
        self.rnn_type = rnn_type
        self.ninp = ninp
        self.nhid = nhid
        self.nlayers = nlayers
        self.dropout = dropout
        self.dropouti = dropouti
        self.dropouth = dropouth
        self.dropoute = dropoute
        self.tie_weights = tie_weights
        self.alpha = alpha
        self.beta = beta
        self.metrics = [self.acc, self.perplexity]

        self.lockdrop = LockedDropout()
        self.idrop = nn.Dropout(dropouti)
        self.hdrop = nn.Dropout(dropouth)
        self.drop = nn.Dropout(dropout)
        self.encoder = nn.Embedding(ntoken, ninp)

        assert rnn_type in ['LSTM', 'QRNN', 'GRU'], 'RNN type is not supported'
        if rnn_type == 'LSTM':
            self.rnns = [torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), 1, dropout=0) for l in range(nlayers)]
            if wdrop:
                self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns]
        if rnn_type == 'GRU':
            self.rnns = [torch.nn.GRU(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else ninp, 1, dropout=0) for l in range(nlayers)]
            if wdrop:
                self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns]
        elif rnn_type == 'QRNN':
            from torchqrnn import QRNNLayer
            self.rnns = [QRNNLayer(input_size=ninp if l == 0 else nhid, hidden_size=nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), save_prev_x=True, zoneout=0, window=2 if l == 0 else 1, output_gate=True) for l in range(nlayers)]
            for rnn in self.rnns:
                rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop)
        print(self.rnns)
        self.rnns = torch.nn.ModuleList(self.rnns)
        self.decoder = nn.Linear(nhid, ntoken)


        # Optionally tie weights as in:
        # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016)
        # https://arxiv.org/abs/1608.05859
        # and
        # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016)
        # https://arxiv.org/abs/1611.01462
        if tie_weights:
            #if nhid != ninp:
            #    raise ValueError('When using the tied flag, nhid must be equal to emsize')
            self.decoder.weight = self.encoder.weight

        self.init_weights()


        # Build the SplitCrossEntropyLoss criterion here
        self.build_criterion()

        self.hidden = None
Beispiel #2
0
    def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1,
                 wdrop=0, tie_weights=False):
        super(RNNModel, self).__init__()
        self.lockdrop = LockedDropout()
        self.idrop = nn.Dropout(dropouti)
        self.hdrop = nn.Dropout(dropouth)
        self.drop = nn.Dropout(dropout)
        self.encoder = nn.Embedding(ntoken, ninp)
        assert rnn_type in ['LSTM', 'QRNN', 'GRU'], 'RNN type is not supported'
        if rnn_type == 'LSTM':
            self.rnns = [
                torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid),
                              1, dropout=0) for l in range(nlayers)]
            if wdrop:
                self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns]
        if rnn_type == 'GRU':
            self.rnns = [torch.nn.GRU(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else ninp, 1, dropout=0) for l
                         in range(nlayers)]
            if wdrop:
                self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns]
        elif rnn_type == 'QRNN':
            from torchqrnn import QRNNLayer
            self.rnns = [QRNNLayer(input_size=ninp if l == 0 else nhid,
                                   hidden_size=nhid if l != nlayers - 1 else (ninp if tie_weights else nhid),
                                   save_prev_x=True, zoneout=0, window=2 if l == 0 else 1, output_gate=True) for l in
                         range(nlayers)]
            for rnn in self.rnns:
                rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop)

        print(self.rnns)
        self.rnns = torch.nn.ModuleList(self.rnns)
        self.decoder = nn.Linear(nhid, ntoken)

        # Optionally tie weights as in:
        # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016)
        # https://arxiv.org/abs/1608.05859
        # and
        # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016)
        # https://arxiv.org/abs/1611.01462
        if tie_weights:
            # if nhid != ninp:
            #    raise ValueError('When using the tied flag, nhid must be equal to emsize')
            # NOTE: This is really awful code and is just overwriting this one tiny part of the decoders variables, if
            # your models aren't displaying correctly this is why. Specifically ruins the display of the decoder models
            # dimensions as they stay the original decode dimensions even though the weights have been tied
            self.decoder.weight = self.encoder.weight

        self.init_weights()

        self.rnn_type = rnn_type
        self.ninp = ninp
        self.nhid = nhid
        self.nlayers = nlayers
        self.dropout = dropout
        self.dropouti = dropouti
        self.dropouth = dropouth
        self.dropoute = dropoute
        self.tie_weights = tie_weights
Beispiel #3
0
    def __init__(self,vocab_obj, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False):
        super(RNNModel, self).__init__()
        self.lockdrop = LockedDropout()
        self.idrop = nn.Dropout(dropouti)
        self.hdrop = nn.Dropout(dropouth)
        self.drop = nn.Dropout(dropout)
        self.encoder = nn.Embedding(ntoken, ninp)
        embed_matrix_tensor=torch.from_numpy(vocab_obj.embed_matrix).cuda()
        self.encoder.load_state_dict({'weight':embed_matrix_tensor})
        assert rnn_type in ['LSTM', 'QRNN', 'GRU'], 'RNN type is not supported'
        if rnn_type == 'LSTM':
            self.rnns = [torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), 1, dropout=0) for l in range(nlayers)]
            if wdrop:
                self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns]
        if rnn_type == 'GRU':
            self.rnns = [torch.nn.GRU(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else ninp, 1, dropout=0) for l in range(nlayers)]
            if wdrop:
                self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns]
        elif rnn_type == 'QRNN':
            from torchqrnn import QRNNLayer
            self.rnns = [QRNNLayer(input_size=ninp if l == 0 else nhid, hidden_size=nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), save_prev_x=True, zoneout=0, window=2 if l == 0 else 1, output_gate=True) for l in range(nlayers)]
            for rnn in self.rnns:
                rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop)
        self.decoder = nn.Linear(nhid, ntoken)
        self.rnns = torch.nn.ModuleList(self.rnns)
        

        # Optionally tie weights as in:
        # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016)
        # https://arxiv.org/abs/1608.05859
        # and
        # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016)
        # https://arxiv.org/abs/1611.01462
        if tie_weights:
            #if nhid != ninp:
            #    raise ValueError('When using the tied flag, nhid must be equal to emsize')
            self.decoder.weight = self.encoder.weight

        self.init_weights()

        self.rnn_type = rnn_type
        self.ninp = ninp
        self.nhid = nhid
        self.nlayers = nlayers
        self.dropout = dropout
        self.dropouti = dropouti
        self.dropouth = dropouth
        self.dropoute = dropoute
        self.tie_weights = tie_weights
Beispiel #4
0
    def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False, joint_emb=None, joint_emb_depth=0, joint_emb_dense=False, joint_emb_dual=True,  joint_dropout=0.2,  joint_emb_activation='Sigmoid',  joint_locked_dropout=False, joint_residual_prev=False, joint_noresid=False):
        super(RNNModel, self).__init__()
        self.use_dropout = True
        self.lockdrop = LockedDropout()
        self.idrop = nn.Dropout(dropouti if self.use_dropout else 0)
        self.hdrop = nn.Dropout(dropouth if self.use_dropout else 0)
        self.drop = nn.Dropout(dropout if self.use_dropout else 0)
        self.encoder = nn.Embedding(ntoken, ninp)
        assert rnn_type in ['LSTM', 'QRNN', 'GRU'], 'RNN type is not supported'
        if rnn_type == 'LSTM':
            self.rnns = [torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights or (joint_emb is not None) else nhid), 1, dropout=0) for l in range(nlayers)]
            if wdrop:
                self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop if self.use_dropout else 0) for rnn in self.rnns]
        if rnn_type == 'GRU':
            self.rnns = [torch.nn.GRU(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else ninp, 1, dropout=0) for l in range(nlayers)]
            if wdrop:
                self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop  if self.use_dropout else 0) for rnn in self.rnns]
        elif rnn_type == 'QRNN':
            from torchqrnn import QRNNLayer
            self.rnns = [QRNNLayer(input_size=ninp if l == 0 else nhid, hidden_size=nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), save_prev_x=True, zoneout=0, window=2 if l == 0 else 1, output_gate=True) for l in range(nlayers)]
            for rnn in self.rnns:
                rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop if self.use_dropout else 0)
        print(self.rnns)
        self.rnns = torch.nn.ModuleList(self.rnns)

        if joint_emb is None:
            if tie_weights:
                if nhid != ninp:
                    raise ValueError('When using the tied flag, nhid must be equal to emsize')
                self.decoder = nn.Linear(ninp, ntoken)
                self.decoder.weight = self.encoder.weight
            else:
                self.decoder = nn.Linear(nhid, ntoken)
        else:
            self.dropjoint = nn.Dropout(joint_dropout if self.use_dropout else 0)

            # Define the first layer of the label encoder network
             if joint_emb_activation != "Linear":
                self.joint_encoder_proj_0 = nn.Sequential(nn.Linear(ninp, joint_emb, bias=True), eval("nn.%s()" % joint_emb_activation))
            else:
Beispiel #5
0
    def __init__(self,
                 rnn_type,
                 ntoken,
                 ninp,
                 nhid,
                 proplstm,
                 nlayers,
                 dropout=0.5,
                 dropouth=0.5,
                 dropouti=0.5,
                 dropoute=0.1,
                 wdrop=0,
                 tie_weights=False,
                 params={}):
        super(RNNModel, self).__init__()
        self.params = params
        self.lockdrop = LockedDropout()
        self.idrop = nn.Dropout(dropouti)
        self.hdrop = nn.Dropout(dropouth)
        self.drop = nn.Dropout(dropout)
        self.encoder = nn.Embedding(ntoken, ninp)
        assert rnn_type in [
            'LSTM', 'QRNN', 'GRU', 'MYLSTM', 'MYFASTLSTM', 'SIMPLEPLASTICLSTM',
            'FASTPLASTICLSTM', 'PLASTICLSTM', 'SPLITLSTM', 'FWMRNNv2'
        ], 'RNN type is not supported'
        if rnn_type == 'LSTM':
            self.rnns = [
                torch.nn.LSTM(ninp if l == 0 else nhid,
                              nhid if l != nlayers - 1 else
                              (ninp if tie_weights else nhid),
                              1,
                              dropout=0) for l in range(nlayers)
            ]

            #for rr in self.rnns:
            #    rr.flatten_parameters()
            if wdrop:
                print("Using WeightDrop!")
                self.rnns = [
                    WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop)
                    for rnn in self.rnns
                ]

        elif rnn_type == 'MYLSTM':
            self.rnns = [
                mylstm.MyLSTM(
                    ninp if l == 0 else nhid, nhid if l != nlayers - 1 else
                    (ninp if tie_weights else nhid)) for l in range(nlayers)
            ]

        elif rnn_type == 'MYFASTLSTM':
            self.rnns = [
                mylstm.MyFastLSTM(
                    ninp if l == 0 else nhid, nhid if l != nlayers - 1 else
                    (ninp if tie_weights else nhid)) for l in range(nlayers)
            ]

        elif rnn_type == 'PLASTICLSTM':
            self.rnns = [
                mylstm.PlasticLSTM(
                    ninp if l == 0 else nhid, nhid if l != nlayers - 1 else
                    (ninp if tie_weights else nhid), params)
                for l in range(nlayers)
            ]

        elif rnn_type == 'SIMPLEPLASTICLSTM':
            # Note that this one ignores the 'params' argument, which is only kept to preserve identical signature with PlasticLSTM
            self.rnns = [
                mylstm.SimplePlasticLSTM(
                    ninp if l == 0 else nhid, nhid if l != nlayers - 1 else
                    (ninp if tie_weights else nhid), params)
                for l in range(nlayers)
            ]

        elif rnn_type == 'FASTPLASTICLSTM':
            self.rnns = [
                mylstm.MyFastPlasticLSTM(
                    ninp if l == 0 else nhid, nhid if l != nlayers - 1 else
                    (ninp if tie_weights else nhid), params)
                for l in range(nlayers)
            ]

        elif rnn_type == 'SPLITLSTM':  # Not used
            self.rnns = [
                mylstm.SplitLSTM(
                    ninp if l == 0 else nhid, nhid if l != nlayers - 1 else
                    (ninp if tie_weights else nhid), proplstm, params)
                for l in range(nlayers)
            ]

        elif rnn_type == 'GRU':
            self.rnns = [
                torch.nn.GRU(ninp if l == 0 else nhid,
                             nhid if l != nlayers - 1 else ninp,
                             1,
                             dropout=0) for l in range(nlayers)
            ]
            if wdrop:
                self.rnns = [
                    WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop)
                    for rnn in self.rnns
                ]
        elif rnn_type == 'QRNN':
            from torchqrnn import QRNNLayer
            self.rnns = [
                QRNNLayer(input_size=ninp if l == 0 else nhid,
                          hidden_size=nhid if l != nlayers - 1 else
                          (ninp if tie_weights else nhid),
                          save_prev_x=True,
                          zoneout=0,
                          window=2 if l == 0 else 1,
                          output_gate=True) for l in range(nlayers)
            ]
            for rnn in self.rnns:
                rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop)
        elif rnn_type == 'FWMRNNv2':
            self.rnns = [
                myfastweights_v2.FWMRNN(isize=ninp if l == 0 else nhid,
                                        hsize=nhid if l != nlayers - 1 else
                                        (ninp if tie_weights else nhid),
                                        withFWM=l == nlayers - 1,
                                        params=params,
                                        wdrop=wdrop) for l in range(nlayers)
            ]
        print(self.rnns)
        self.rnns = torch.nn.ModuleList(self.rnns)
        self.decoder = nn.Linear(nhid, ntoken)

        # Optionally tie weights as in:
        # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016)
        # https://arxiv.org/abs/1608.05859
        # and
        # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016)
        # https://arxiv.org/abs/1611.01462
        if tie_weights:
            #if nhid != ninp:
            #    raise ValueError('When using the tied flag, nhid must be equal to emsize')
            self.decoder.weight = self.encoder.weight

        self.init_weights()

        self.rnn_type = rnn_type
        self.ninp = ninp
        self.nhid = nhid
        self.proplstm = proplstm
        self.nlayers = nlayers
        self.dropout = dropout
        self.dropouti = dropouti
        self.dropouth = dropouth
        self.dropoute = dropoute
        self.tie_weights = tie_weights
    def __init__(self,
                 rnn_type,
                 ntoken,
                 ninp,
                 nhid,
                 nlayers,
                 dropout=0.5,
                 dropouth=0.5,
                 dropouti=0.5,
                 dropoute=0.1,
                 wdrop=0,
                 tie_weights=False,
                 use_pre=False,
                 use_demo=False,
                 useone=None,
                 demoembs=None,
                 demouse=None,
                 mainmatrix=0,
                 printfunc=None):  #, match_input_size=False
        super(RNNModel, self).__init__()
        self.lockdrop = LockedDropout()
        self.idrop = nn.Dropout(dropouti)
        self.hdrop = nn.Dropout(dropouth)
        self.drop = nn.Dropout(dropout)
        self.encoder = nn.Embedding(ntoken, ninp)
        # self.match_input_size = match_input_size

        assert rnn_type in ['LSTM', 'QRNN', 'GRU'], 'RNN type is not supported'
        ninp_mod = ninp  # make this bigger if we want to concatenate demographic embeddings
        if use_demo:
            assert len(demoembs[0][0][1]) == ninp
            if demouse == 'cat':
                if useone != None:
                    ninp_mod = ninp * 2
                else:
                    ninp_mod = ninp * 5

        # modify the size of the input for concatenated embeddings but the output size should be the same as word embedding size; ninp
        if rnn_type == 'LSTM':
            self.rnns = [
                torch.nn.LSTM(ninp_mod if l == 0 else nhid,
                              nhid if l != nlayers - 1 else
                              (ninp if tie_weights else nhid),
                              1,
                              dropout=0) for l in range(nlayers)
            ]
            if wdrop:
                self.rnns = [
                    WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop)
                    for rnn in self.rnns
                ]
        if rnn_type == 'GRU':
            self.rnns = [
                torch.nn.GRU(ninp_mod if l == 0 else nhid,
                             nhid if l != nlayers - 1 else ninp,
                             1,
                             dropout=0) for l in range(nlayers)
            ]
            if wdrop:
                self.rnns = [
                    WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop)
                    for rnn in self.rnns
                ]
        elif rnn_type == 'QRNN':
            from torchqrnn import QRNNLayer
            self.rnns = [
                QRNNLayer(input_size=ninp_mod if l == 0 else nhid,
                          hidden_size=nhid if l != nlayers - 1 else
                          (ninp if tie_weights else nhid),
                          save_prev_x=True,
                          zoneout=0,
                          window=2 if l == 0 else 1,
                          output_gate=True) for l in range(nlayers)
            ]
            for rnn in self.rnns:
                rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop)
        self.rnns = torch.nn.ModuleList(self.rnns)
        self.cprint = printfunc
        self.cprint(self.rnns)

        self.decoder = nn.Linear(nhid, ntoken)
        # self.age_decode = nn.Linear(nhid, len(demoembs[0]))
        # self.location_decode = nn.Linear(nhid, len(demoembs[1]))
        # self.religion_decode = nn.Linear(nhid, len(demoembs[2]))
        # self.gender_decode = nn.Linear(nhid, len(demoembs[3]))

        if useone != None:
            self.cprint('Using one demographic input: ' + str(useone))
        elif use_demo:
            self.cprint('Using all four demographic inputs')

        if use_demo and useone in ['age', None]:
            self.age_embed = torch.nn.ModuleList(
                [nn.Embedding(ntoken, ninp) for i in DEMOVARS['AGE']])
        if use_demo and useone in ['location', None]:
            self.location_embed = torch.nn.ModuleList(
                [nn.Embedding(ntoken, ninp) for i in DEMOVARS['LOCATION']])
        if use_demo and useone in ['religion', None]:
            self.religion_embed = torch.nn.ModuleList(
                [nn.Embedding(ntoken, ninp) for i in DEMOVARS['RELIGION']])
        if use_demo and useone in ['gender', None]:
            self.gender_embed = torch.nn.ModuleList(
                [nn.Embedding(ntoken, ninp) for i in DEMOVARS['GENDER']])

        # Optionally tie weights as in:
        # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016)
        # https://arxiv.org/abs/1608.05859
        # and
        # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016)
        # https://arxiv.org/abs/1611.01462
        if tie_weights:
            #if nhid != ninp:
            #    raise ValueError('When using the tied flag, nhid must be equal to emsize')
            self.decoder.weight = self.encoder.weight
            # self.age_decode.weight = self.age_embed.weight
            # self.location_decode.weight = self.location_embed.weight
            # self.religion_decode.weight = self.religion_embed.weight
            # self.gender_decode.weight = self.gender_embed.weight

        self.use_pre = use_pre
        self.use_demo = use_demo
        self.useone = useone
        self.demouse = demouse
        self.init_weights(demoembs, mainmatrix)

        self.rnn_type = rnn_type
        self.ninp = ninp
        self.nhid = nhid
        self.nlayers = nlayers
        self.dropout = dropout
        self.dropouti = dropouti
        self.dropouth = dropouth
        self.dropoute = dropoute
        self.tie_weights = tie_weights
Beispiel #7
0
    def __init__(self,
                 rnn_type,
                 ntoken,
                 ninp,
                 nhid,
                 nlayers,
                 dropout=0.5,
                 dropouth=0.5,
                 dropouti=0.5,
                 dropoute=0.1,
                 wdrop=0,
                 tie_weights=False,
                 byte=False):
        super(RNNModel, self).__init__()
        self.lockdrop = LockedDropout()
        self.idrop = nn.Dropout(dropouti)
        self.hdrop = nn.Dropout(dropouth)
        self.drop = nn.Dropout(dropout)

        if byte:
            if ninp != 256:
                raise ValueError('wrong embedding size for bytes: %d -> 256' %
                                 ninp)
            assert ninp == 256
            ntoken = 256
            self.encoder = nn.Embedding(ntoken, ninp)
            self.encoder.weight.data.copy_(torch.eye(256))
            self.encoder.weight.requires_grad = False
            print(self.encoder.weight.data)
        else:
            self.encoder = nn.Embedding(ntoken, ninp)

        assert rnn_type in ['LSTM', 'QRNN', 'GRU'], 'RNN type is not supported'
        if rnn_type == 'LSTM':
            self.rnns = [
                torch.nn.LSTM(ninp if l == 0 else nhid,
                              nhid if l != nlayers - 1 else
                              (ninp if tie_weights else nhid),
                              1,
                              dropout=0) for l in range(nlayers)
            ]
            if wdrop:
                self.rnns = [
                    WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop)
                    for rnn in self.rnns
                ]
        if rnn_type == 'GRU':
            self.rnns = [
                torch.nn.GRU(ninp if l == 0 else nhid,
                             nhid if l != nlayers - 1 else ninp,
                             1,
                             dropout=0) for l in range(nlayers)
            ]
            if wdrop:
                self.rnns = [
                    WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop)
                    for rnn in self.rnns
                ]
        elif rnn_type == 'QRNN':
            from torchqrnn import QRNNLayer
            self.rnns = [
                QRNNLayer(input_size=ninp if l == 0 else nhid,
                          hidden_size=nhid if l != nlayers - 1 else
                          (ninp if tie_weights else nhid),
                          save_prev_x=True,
                          zoneout=0,
                          window=2 if l == 0 else 1,
                          output_gate=True) for l in range(nlayers)
            ]
            for rnn in self.rnns:
                rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop)
        print(self.rnns)
        self.rnns = torch.nn.ModuleList(self.rnns)
        self.decoder = nn.Linear(nhid, ntoken)

        # Optionally tie weights as in:
        # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016)
        # https://arxiv.org/abs/1608.05859
        # and
        # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016)
        # https://arxiv.org/abs/1611.01462
        if tie_weights:
            #if nhid != ninp:
            #    raise ValueError('When using the tied flag, nhid must be equal to emsize')
            self.decoder.weight = self.encoder.weight

        self.init_weights(byte)

        self.rnn_type = rnn_type
        self.ninp = ninp
        self.nhid = nhid
        self.nlayers = nlayers
        self.dropout = dropout
        self.dropouti = dropouti
        self.dropouth = dropouth
        self.dropoute = dropoute
        self.tie_weights = tie_weights
    def __init__(self,
                 rnn_type,
                 ntoken,
                 ninp,
                 nhid,
                 nlayers,
                 dropout=0.5,
                 dropouth=0.5,
                 dropouti=0.5,
                 dropoute=0.1,
                 wdrop=0,
                 tie_weights=False,
                 mu=0.9,
                 epsilon=0.1,
                 mus=0.999,
                 restart=0):
        super(RNNModel, self).__init__()
        self.lockdrop = LockedDropout()
        self.idrop = nn.Dropout(dropouti)
        self.hdrop = nn.Dropout(dropouth)
        self.drop = nn.Dropout(dropout)
        self.encoder = nn.Embedding(ntoken, ninp)
        assert rnn_type in ['LSTM', 'QRNN', 'GRU', 'MLSTM', 'NLSTM',
                            'ALSTM'], 'RNN type is not supported'
        if rnn_type == 'LSTM':
            self.rnns = [
                LSTM(ninp if l == 0 else nhid,
                     nhid if l != nlayers - 1 else
                     (ninp if tie_weights else nhid),
                     bias=True) for l in range(nlayers)
            ]
            if wdrop:
                self.rnns = [
                    WeightDrop(rnn, ['weight_hh'], dropout=wdrop)
                    for rnn in self.rnns
                ]
        if rnn_type == 'MLSTM':
            self.rnns = [
                MomentumLSTM(ninp if l == 0 else nhid,
                             nhid if l != nlayers - 1 else
                             (ninp if tie_weights else nhid),
                             mu=mu,
                             epsilon=epsilon,
                             bias=True) for l in range(nlayers)
            ]
            if wdrop:
                self.rnns = [
                    WeightDrop(rnn, ['weight_hh'], dropout=wdrop)
                    for rnn in self.rnns
                ]
        if rnn_type == 'ALSTM':
            self.rnns = [
                AdamLSTM(ninp if l == 0 else nhid,
                         nhid if l != nlayers - 1 else
                         (ninp if tie_weights else nhid),
                         mu=mu,
                         epsilon=epsilon,
                         mus=mus,
                         bias=True) for l in range(nlayers)
            ]
            if wdrop:
                self.rnns = [
                    WeightDrop(rnn, ['weight_hh'], dropout=wdrop)
                    for rnn in self.rnns
                ]
        if rnn_type == 'NLSTM':
            self.rnns = [
                NesterovLSTM(ninp if l == 0 else nhid,
                             nhid if l != nlayers - 1 else
                             (ninp if tie_weights else nhid),
                             epsilon=epsilon,
                             restart=restart,
                             bias=True) for l in range(nlayers)
            ]
            if wdrop:
                self.rnns = [
                    WeightDrop(rnn, ['weight_hh'], dropout=wdrop)
                    for rnn in self.rnns
                ]
        if rnn_type == 'GRU':
            self.rnns = [
                GRU(ninp if l == 0 else nhid,
                    nhid if l != nlayers - 1 else ninp,
                    bias=True) for l in range(nlayers)
            ]
            if wdrop:
                self.rnns = [
                    WeightDrop(rnn, ['weight_hh'], dropout=wdrop)
                    for rnn in self.rnns
                ]
        elif rnn_type == 'QRNN':
            from torchqrnn import QRNNLayer
            self.rnns = [
                QRNNLayer(input_size=ninp if l == 0 else nhid,
                          hidden_size=nhid if l != nlayers - 1 else
                          (ninp if tie_weights else nhid),
                          save_prev_x=True,
                          zoneout=0,
                          window=2 if l == 0 else 1,
                          output_gate=True) for l in range(nlayers)
            ]
            for rnn in self.rnns:
                rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop)
        print(self.rnns)
        self.rnns = torch.nn.ModuleList(self.rnns)
        self.decoder = nn.Linear(nhid, ntoken)

        if tie_weights:
            #if nhid != ninp:
            #    raise ValueError('When using the tied flag, nhid must be equal to emsize')
            self.decoder.weight = self.encoder.weight

        self.init_weights()

        self.rnn_type = rnn_type
        self.ninp = ninp
        self.nhid = nhid
        self.nlayers = nlayers
        self.dropout = dropout
        self.dropouti = dropouti
        self.dropouth = dropouth
        self.dropoute = dropoute
        self.tie_weights = tie_weights
        self.mu = mu
        self.epsilon = epsilon
        self.mus = mus
        self.restart = restart
    def __init__(self,
                 rnn_type,
                 ntoken,
                 ninp,
                 nhid,
                 nlayers,
                 dropout=0.5,
                 dropouth=0.5,
                 dropouti=0.5,
                 dropoute=0.1,
                 wdrop=0,
                 tie_weights=False,
                 prior_numstates=5,
                 use_fixed_uniform_prior=False,
                 cuda=False,
                 dictionary=None,
                 latent_plot_typ='kw',
                 infer_nw_arch_type='linear1',
                 inference_pretrained_model_path=None,
                 infer_nw_skip_first_token=False,
                 infer_nw_ignore_token_type='default',
                 infer_nw_share_encoder=True,
                 inference_nw_frozen=False,
                 inference_nw_uniform_distribution=False,
                 emotion_type='basic',
                 inference_pretrained_model_path_extractinference=False,
                 inference_nw_first_word_distribution=False):

        super(RNNModel, self).__init__()
        self.lockdrop = LockedDropout()
        self.idrop = nn.Dropout(dropouti)
        self.hdrop = nn.Dropout(dropouth)
        self.drop = nn.Dropout(dropout)
        self.encoder = nn.Embedding(ntoken, ninp)
        self.use_cuda = cuda
        assert rnn_type in ['LSTM', 'QRNN', 'GRU'], 'RNN type is not supported'
        if rnn_type == 'LSTM':
            self.rnns = [torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 \
                    else (ninp if tie_weights else nhid), 1, dropout=0) for l in range(nlayers)]
            if wdrop:
                self.rnns = [
                    WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop)
                    for rnn in self.rnns
                ]
        if rnn_type == 'GRU':
            self.rnns = [
                torch.nn.GRU(ninp if l == 0 else nhid,
                             nhid if l != nlayers - 1 else ninp,
                             1,
                             dropout=0) for l in range(nlayers)
            ]
            if wdrop:
                self.rnns = [
                    WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop)
                    for rnn in self.rnns
                ]
        elif rnn_type == 'QRNN':
            from torchqrnn import QRNNLayer
            self.rnns = [QRNNLayer(input_size=ninp if l == 0 else nhid, hidden_size=nhid if l != nlayers - 1 \
                else (ninp if tie_weights else nhid), save_prev_x=True, zoneout=0, window=2 \
                if l == 0 else 1, output_gate=True) for l in range(nlayers)]
            for rnn in self.rnns:
                rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop)
        print(self.rnns)
        self.rnns = torch.nn.ModuleList(self.rnns)
        self.decoder = nn.Linear(nhid, ntoken)
        self.inference_pretrained_model_path_extractinference = inference_pretrained_model_path_extractinference
        # self.decoder_nw_frozen = decoder_nw_frozen

        #################### WEIGHT TIEING
        # Optionally tie weights as in:
        # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016)
        # https://arxiv.org/abs/1608.05859
        # and
        # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016)
        # https://arxiv.org/abs/1611.01462
        if tie_weights:
            #if nhid != ninp:
            #    raise ValueError('When using the tied flag, nhid must be equal to emsize')
            self.decoder.weight = self.encoder.weight
            # self.decoder_prior.weight = self.encoder.weight
            print("******* tied weights ******")
        print("**self.decoder = ", self.decoder)

        #################### PRIOR
        self.latent_plot_typ = latent_plot_typ
        provided_prior_decoder = None
        emotion_vocab_list = None
        self.emotion_type = emotion_type
        if latent_plot_typ == 'kw':
            provided_prior_decoder = self.decoder
        elif latent_plot_typ == 'emotion':
            self.emotion_vocab_list = emotion_vocab_list = self.get_emotion_vocab_list(
                dictionary)
        self.prior_model = PriorModel(
            typ=latent_plot_typ,
            ntoken=ntoken,
            ninp=ninp,
            nhid=nhid,
            tie_weights=tie_weights,
            prior_numstates=None,
            decoder=provided_prior_decoder,
            cuda=False,
            emotion_vocab_list=emotion_vocab_list,
            use_fixed_uniform_prior=use_fixed_uniform_prior)

        #################### INFERENCE NETWORK
        self.infer_nw_share_encoder = infer_nw_share_encoder
        self.inference_nw_uniform_distribution = inference_nw_uniform_distribution
        self.inference_nw_first_word_distribution = inference_nw_first_word_distribution
        if infer_nw_share_encoder:
            infer_nw_encoder = self.encoder
        else:
            infer_nw_encoder = None
        self.inference_nw = InferenceNW(
            typ=latent_plot_typ,
            ninp=ninp,
            ntoken=ntoken,
            encoder=infer_nw_encoder,
            prior_numstates=prior_numstates,
            use_cuda=cuda,
            arch_type=infer_nw_arch_type,
            skip_first_token=infer_nw_skip_first_token,
            dictionary=dictionary,
            ignore_token_type=infer_nw_ignore_token_type,
            nw_frozen=inference_nw_frozen,
            emotion_vocab_list=emotion_vocab_list,
            uniform_distribution=inference_nw_uniform_distribution,
            first_word_distribution=inference_nw_first_word_distribution,
            emotion_type=emotion_type)
        self.inference_pretrained_model_path = inference_pretrained_model_path  # will call in init_weights
        self.inference_nw_frozen = inference_nw_frozen

        #################### INIT AND SAVE HYPERPARAMS
        self.init_weights()
        self.rnn_type = rnn_type
        self.ninp = ninp
        self.nhid = nhid
        self.nlayers = nlayers
        self.dropout = dropout
        self.dropouti = dropouti
        self.dropouth = dropouth
        self.dropoute = dropoute
        self.tie_weights = tie_weights
        self.wdrop = wdrop
        self.use_fixed_uniform_prior = use_fixed_uniform_prior
Beispiel #10
0
    def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, 
            tie_weights=False, binarized=False, collect_stats=False, no_md=False, split_cross=False):
        super(RNNModel, self).__init__()
        self.binarized = binarized
        self.collect_stats = collect_stats
        self.lockdrop = LockedDropout()
        self.idrop = nn.Dropout(dropouti)
        self.hdrop = nn.Dropout(dropouth)
        self.drop = nn.Dropout(dropout)
        self.ctx = ctx = candle.TernaryQuantizeContext()
        self.scale = nn.Parameter(torch.Tensor([0]))
        self.nout = ninp
        self.no_md = no_md
        self.se = split_cross
        # self.ternary = ctx.activation(k=8)
        self.encoder = ctx.bypass(nn.Embedding(ntoken, ninp))
        # self.mdC = []
        # self.mdH = []
        # for _ in range(nlayers):
        #     td = candle.UniformTiedGenerator()
        #     self.mdC.append(candle.LinearMarkovDropout(0.6, min_length=0.4, tied_generator=td, tied_root=True, tied=True, rescale=False))
        #     self.mdH.append(candle.LinearMarkovDropout(0.6, min_length=0.4, tied_generator=td, tied=True, rescale=False))
        if binarized:
            self.decode_bn = ctx.bypass(nn.BatchNorm1d(ninp))
        elif collect_stats:
            self.encode_bn = ctx.moment_stat(name="encoder")
        assert rnn_type in ['LSTM', 'QRNN', 'GRU', 'LSTM-MD'], 'RNN type is not supported'
        if rnn_type == 'LSTM':
            self.rnns = [torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), 1, dropout=0) for l in range(nlayers)]
            if wdrop:
                self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns]
        elif rnn_type == 'LSTM-MD':
            self.rnns = [torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), 1, dropout=0) for l in range(nlayers)]
            if wdrop:
                self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], ['weight_hh_l0', 'weight_ih_l0', 'bias_hh_l0', 'bias_ih_l0'], dropout=wdrop, md=(0.6, 0.4)) for rnn in self.rnns]
        if rnn_type == 'GRU':
            self.rnns = [torch.nn.GRU(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else ninp, 1, dropout=0) for l in range(nlayers)]
            if wdrop:
                self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns]
        elif rnn_type == 'QRNN':
            from torchqrnn import QRNNLayer
            self.rnns = [QRNNLayer(input_size=ninp if l == 0 else nhid, hidden_size=nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), save_prev_x=True, 
                    zoneout=0, window=2 if l == 0 else 1, output_gate=True, binarized=binarized, ctx=ctx, 
                    collect_stats=collect_stats, no_md=no_md, scale=self.scale) for l in range(nlayers)]
            for rnn in self.rnns:
                if binarized:
                    rnn.linear.hook_weight(candle.WeightDrop, p=wdrop)
                    # rnn.linear.hook_weight(candle.SignFlip, p=wdrop)
                else:
                    rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop)
        # print(self.rnns)
        self.rnns = torch.nn.ModuleList(self.rnns)
        # self.decoder = ctx.wrap(nn.Linear(nhid, ntoken), soft=True, scale=self.scale) if binarized else ctx.bypass(nn.Linear(nhid, ntoken))
        self.decoder = ctx.bypass(nn.Linear(nhid, ntoken))

        # Optionally tie weights as in:
        # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016)
        # https://arxiv.org/abs/1608.05859
        # and
        # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016)
        # https://arxiv.org/abs/1611.01462
        if tie_weights:
            #if nhid != ninp:
            #    raise ValueError('When using the tied flag, nhid must be equal to emsize')
            if binarized:
                pass
                self.decoder.weight = self.encoder.weight
                # self.decoder.tie_weight(self.encoder.weight)
            else:
                self.decoder.weight = self.encoder.weight

        self.init_weights()

        self.rnn_type = rnn_type
        self.ninp = ninp
        self.nhid = nhid
        self.nlayers = nlayers
        self.dropout = dropout
        self.dropouti = dropouti
        self.dropouth = dropouth
        self.dropoute = dropoute
        self.tie_weights = tie_weights
Beispiel #11
0
    def __init__(self,
                 rnn_type,
                 ntoken,
                 ninp,
                 nhid,
                 nlayers,
                 dropout=0.5,
                 dropouth=0.5,
                 dropouti=0.5,
                 dropoute=0.1,
                 wdrop=0,
                 tie_weights=False,
                 use_pre=False,
                 use_ind=False,
                 indembs=None,
                 induse=None,
                 printfunc=None):
        super(RNNModel, self).__init__()
        self.lockdrop = LockedDropout()
        self.idrop = nn.Dropout(dropouti)
        self.hdrop = nn.Dropout(dropouth)
        self.drop = nn.Dropout(dropout)
        self.encoder = nn.Embedding(ntoken, ninp)

        assert rnn_type in ['LSTM', 'QRNN', 'GRU'], 'RNN type is not supported'
        ninp_mod = ninp
        if use_ind:
            if type(indembs[0]) != type(None):
                assert len(indembs[0][0]) == ninp
            if induse == 'cat':
                ninp_mod = ninp * 2

        # modify the size of the input for concatenated embeddings but the output size should be the same as word embedding size; ninp
        if rnn_type == 'LSTM':
            self.rnns = [
                torch.nn.LSTM(ninp_mod if l == 0 else nhid,
                              nhid if l != nlayers - 1 else
                              (ninp if tie_weights else nhid),
                              1,
                              dropout=0) for l in range(nlayers)
            ]
            if wdrop:
                self.rnns = [
                    WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop)
                    for rnn in self.rnns
                ]
        if rnn_type == 'GRU':
            self.rnns = [
                torch.nn.GRU(ninp_mod if l == 0 else nhid,
                             nhid if l != nlayers - 1 else ninp,
                             1,
                             dropout=0) for l in range(nlayers)
            ]
            if wdrop:
                self.rnns = [
                    WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop)
                    for rnn in self.rnns
                ]
        elif rnn_type == 'QRNN':
            from torchqrnn import QRNNLayer
            self.rnns = [
                QRNNLayer(input_size=ninp_mod if l == 0 else nhid,
                          hidden_size=nhid if l != nlayers - 1 else
                          (ninp if tie_weights else nhid),
                          save_prev_x=True,
                          zoneout=0,
                          window=2 if l == 0 else 1,
                          output_gate=True) for l in range(nlayers)
            ]
            for rnn in self.rnns:
                rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop)
        self.rnns = torch.nn.ModuleList(self.rnns)
        self.cprint = printfunc
        self.cprint(self.rnns)

        self.decoder = nn.Linear(nhid, ntoken)

        if use_ind:
            self.user_embed = nn.Embedding(ntoken, ninp)

        # Optionally tie weights as in:
        # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016)
        # https://arxiv.org/abs/1608.05859
        # and
        # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016)
        # https://arxiv.org/abs/1611.01462
        if tie_weights:
            #if nhid != ninp:
            #    raise ValueError('When using the tied flag, nhid must be equal to emsize')
            self.decoder.weight = self.encoder.weight

        self.use_pre = use_pre
        self.use_ind = use_ind
        self.induse = induse
        self.init_weights(indembs)

        self.rnn_type = rnn_type
        self.ninp = ninp
        self.nhid = nhid
        self.nlayers = nlayers
        self.dropout = dropout
        self.dropouti = dropouti
        self.dropouth = dropouth
        self.dropoute = dropoute
        self.tie_weights = tie_weights
Beispiel #12
0
    def __init__(self,
                 rnn_type,
                 ntoken,
                 emsize,
                 nhid,
                 nlayers,
                 dropoute=0.2,
                 dropouti=0.2,
                 dropoutrnn=0.2,
                 dropout=0.2,
                 wdrop=0.5,
                 tie_weights=False):
        super(RNNModel, self).__init__()
        self.lockdrop = LockedDropout()
        self.encoder = nn.Embedding(ntoken, emsize)
        assert rnn_type in ['LSTM', 'QRNN', 'GRU'], 'RNN type is not supported'
        if rnn_type == 'LSTM':
            self.rnns = torch.nn.LSTM(emsize,
                                      nhid,
                                      nlayers,
                                      dropout=dropoutrnn)
        if rnn_type == 'GRU':
            self.rnns = torch.nn.GRU(emsize, nhid, nlayers, dropout=dropoutrnn)
        elif rnn_type == 'QRNN':
            from torchqrnn import QRNNLayer
            self.rnns = [
                QRNNLayer(input_size=emsize if l == 0 else nhid,
                          hidden_size=nhid if l != nlayers - 1 else
                          (emsize if tie_weights else nhid),
                          save_prev_x=True,
                          zoneout=0,
                          window=2 if l == 0 else 1,
                          output_gate=True) for l in range(nlayers)
            ]
        if wdrop:
            self.rnns = WeightDrop(
                self.rnns, ['weight_hh_l{}'.format(i) for i in range(nlayers)],
                wdrop)
        print(self.rnns)

        # Optionally tie weights as in:
        # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016)
        # https://arxiv.org/abs/1608.05859
        # and
        # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016)
        # https://arxiv.org/abs/1611.01462
        if tie_weights:
            if nhid != emsize:
                raise ValueError(
                    'When using the tied flag, nhid must be equal to emsize')
            self.decoder = nn.Linear(nhid, ntoken, bias=False)
            self.decoder.weight = self.encoder.weight
        else:
            self.decoder = nn.Linear(nhid, ntoken)

        self.ninp = emsize
        self.nhid = nhid
        self.nlayers = nlayers
        self.dropoute = dropoute
        self.dropouti = dropouti
        self.dropout = dropout
        self.tie_weights = tie_weights

        self.init_weights()
Beispiel #13
0
    def __init__(self, config):
        super(BiRNNLanguageModel, self).__init__()
        self.config = config

        self.tie_weights = config.get('tie_weights', True)
        self.embedding_dim = config.get('embedding_dim', LM_HIDDEN_DIM)
        self.hidden_dim = self.embedding_dim if self.tie_weights else config.get(
            'hidden_dim', LM_HIDDEN_DIM)
        self.dropout_emb = config.get('emb_dropout', .2)
        self.dropout_i = config.get('lock_drop', .5)
        self.dropout_h = config.get('h_dropout', .5)
        self.dropout_w = config.get('w_dropout', 0)
        self.num_words = config.get('num_words', LM_VOCAB_SIZE)
        self.rnn_type = config.get('rnn_type', 'SRU')
        self.n_layers = config.get('n_layers', 6)
        self.dropout_rnn = config.get('rnn_dropout', .2)
        self.highway_bias = config.get('highway_bias', -3)
        self.use_adasoft = config.get('use_adasoft', True)
        self.adasoft_cutoffs = config.get(
            'adasoft_cutoffs', [LM_VOCAB_SIZE // 2, LM_VOCAB_SIZE // 2])

        assert self.rnn_type in ['LSTM', 'GRU', 'SRU', 'QRNN']

        self.encoder = nn.Embedding(self.num_words, self.embedding_dim)
        self.lockdrop = to_gpu(LockedDropout())

        # for the mean time weight drop is broken
        if self.rnn_type == 'LSTM':
            self.rnns = [
                nn.LSTM(
                    self.embedding_dim if layer_ix == 0 else self.hidden_dim,
                    self.hidden_dim // 2,
                    bidirectional=True,
                    dropout=self.dropout_rnn)
                for layer_ix in range(self.n_layers)
            ]
            if self.dropout_w:
                self.rnns = [
                    WeightDrop(rnn, ['weight_hh_l0'], dropout=self.dropout_w)
                    for rnn in self.rnns
                ]
        elif self.rnn_type == 'GRU':
            self.rnns = [
                nn.GRU(
                    self.embedding_dim if layer_ix == 0 else self.hidden_dim,
                    self.hidden_dim // 2,
                    bidirectional=True,
                    dropout=self.dropout_rnn)
                for layer_ix in range(self.n_layers)
            ]
            if self.dropout_w:
                self.rnns = [
                    WeightDrop(rnn, ['weight_hh_l0'], dropout=self.dropout_w)
                    for rnn in self.rnns
                ]
        elif self.rnn_type == 'QRNN':
            from torchqrnn import QRNNLayer
            self.rnns = self.rnns = [
                QRNNLayer(
                    self.embedding_dim if layer_ix == 0 else self.hidden_dim,
                    self.hidden_dim // 2,
                    bidirectional=True) for layer_ix in range(self.n_layers)
            ]
            if self.dropout_w:
                for rnn in self.rnns:
                    rnn.linear = WeightDrop(rnn.linear, ['weight'],
                                            dropout=self.dropout_w)
        else:
            from sru import SRU
            self.rnns = [
                to_gpu(
                    SRU(self.embedding_dim
                        if layer_ix == 0 else self.hidden_dim,
                        self.hidden_dim // 2,
                        num_layers=1,
                        rnn_dropout=self.dropout_rnn,
                        dropout=self.wdrop,
                        rescale=False,
                        highway_bias=self.highway_bias,
                        use_tanh=0,
                        bidirectional=True,
                        v1=True)) for layer_ix in range(self.n_layers)
            ]

        self.rnns = nn.ModuleList(self.rnns)
        self.decoder = nn.Linear(
            self.embedding_dim if self.tie_weights else self.hidden_dim,
            self.num_words)

        # Adaptive softmax
        self.use_adasoft = config.get('use_adasoft', True)

        if self.use_adasoft:
            if 'adasoft_cutoffs' in config:
                splits = config['adasoft_cutoffs']
            else:
                splits = []
                if self.num_words >= 500000:
                    # One Billion
                    # This produces fairly even matrix mults for the buckets:
                    # 0: 11723136, 1: 10854630, 2: 11270961, 3: 11219422
                    splits = [4200, 35000, 180000]
                elif self.num_words >= 75000:
                    # WikiText-103
                    splits = [2800, 20000, 76000]
                elif self.num_words >= 20000:
                    splits = [2000, 4000, 10000]
                else:
                    splits = [self.num_words // 3, self.num_words // 3]

                config['adasoft_cutoffs'] = splits

            # print('Cross Entropy Splits: Using', splits)

            self.adasoft = SplitCrossEntropyLoss(self.hidden_dim,
                                                 splits,
                                                 ignore_index=0)
        else:
            self.adasoft = None

        # Weight tying
        if self.tie_weights:
            self.decoder.weight = self.encoder.weight

        self.init_weights()
Beispiel #14
0
    def __init__(self,
                 rnn_type,
                 ntoken,
                 ninp,
                 nhid,
                 nlayers,
                 nhlayers,
                 dropout=0.5,
                 dropouth=0.5,
                 dropouti=0.5,
                 dropoute=0.1,
                 wdrop=0,
                 tie_weights=False,
                 nr_cells=5,
                 read_heads=2,
                 sparse_reads=10,
                 cell_size=10,
                 gpu_id=-1,
                 independent_linears=False,
                 debug=True):
        super(RNNModel, self).__init__()
        self.lockdrop = LockedDropout()
        self.idrop = nn.Dropout(dropouti)
        self.hdrop = nn.Dropout(dropouth)
        self.drop = nn.Dropout(dropout)
        self.encoder = nn.Embedding(ntoken, ninp)
        self.debug = debug
        assert rnn_type in ['LSTM', 'QRNN', 'DNC',
                            'SDNC'], 'RNN type is not supported'
        if rnn_type == 'LSTM':
            self.rnns = [
                torch.nn.LSTM(ninp if l == 0 else nhid,
                              nhid if l != nlayers - 1 else ninp,
                              1,
                              dropout=0) for l in range(nlayers)
            ]
            if wdrop:
                self.rnns = [
                    WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop)
                    for rnn in self.rnns
                ]
        elif rnn_type == 'QRNN':
            from torchqrnn import QRNNLayer
            self.rnns = [
                QRNNLayer(input_size=ninp if l == 0 else nhid,
                          hidden_size=nhid if l != nlayers - 1 else ninp,
                          save_prev_x=True,
                          zoneout=0,
                          window=2 if l == 0 else 1,
                          output_gate=True if l != nlayers - 1 else True)
                for l in range(nlayers)
            ]
            for rnn in self.rnns:
                rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop)
        elif rnn_type.lower() == 'sdnc':
            self.rnns = []
            self.rnns.append(
                SDNC(input_size=ninp,
                     hidden_size=nhid,
                     num_layers=nlayers,
                     num_hidden_layers=nhlayers,
                     rnn_type='lstm',
                     nr_cells=nr_cells,
                     read_heads=read_heads,
                     sparse_reads=sparse_reads,
                     cell_size=cell_size,
                     gpu_id=gpu_id,
                     independent_linears=independent_linears,
                     debug=debug,
                     dropout=0))
        elif rnn_type.lower() == 'dnc':
            self.rnns = []
            self.rnns.append(
                DNC(input_size=ninp,
                    hidden_size=nhid,
                    num_layers=nlayers,
                    num_hidden_layers=nhlayers,
                    rnn_type='lstm',
                    nr_cells=nr_cells,
                    read_heads=read_heads,
                    cell_size=cell_size,
                    gpu_id=gpu_id,
                    independent_linears=independent_linears,
                    debug=debug,
                    dropout=wdrop))
        print(self.rnns)
        self.rnns = torch.nn.ModuleList(self.rnns)
        self.decoder = nn.Linear(ninp, ntoken)

        # Optionally tie weights as in:
        # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016)
        # https://arxiv.org/abs/1608.05859
        # and
        # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016)
        # https://arxiv.org/abs/1611.01462
        if tie_weights:
            #if nhid != ninp:
            #    raise ValueError('When using the tied flag, nhid must be equal to emsize')
            self.decoder.weight = self.encoder.weight

        self.init_weights()

        self.rnn_type = rnn_type
        self.ninp = ninp
        self.nhid = nhid
        self.nlayers = nlayers
        self.dropout = dropout
        self.dropouti = dropouti
        self.dropouth = dropouth
        self.dropoute = dropoute
Beispiel #15
0
    def __init__(self,
                 rnn_type,
                 ntoken,
                 nemb,
                 nhid,
                 nhidlast,
                 nlayer,
                 dropout=0.5,
                 dropouth=0.5,
                 dropouti=0.5,
                 dropoute=0.1,
                 wdrop=0,
                 spectrum_control=False):
        super(Dai, self).__init__()
        self.use_dropout = True
        self.lockdrop = LockedDropout()
        if spectrum_control:
            self.encoder = SvdEmbed(nemb, ntoken)
        else:
            self.encoder = nn.Embedding(ntoken, nemb)
        self.wdropped = True
        if rnn_type == 'LSTM':
            self.rnns = [
                torch.nn.LSTM(nemb if l == 0 else nhid,
                              nhid if l != nlayer - 1 else nhidlast,
                              1,
                              dropout=0) for l in range(nlayer)
            ]
            if wdrop:
                self.rnns = [
                    WeightDrop(rnn, ['weight_hh_l0'],
                               dropout=wdrop if self.use_dropout else 0)
                    for rnn in self.rnns
                ]
            else:
                self.wdropped = False
        elif rnn_type == 'QRNN':
            from torchqrnn import QRNNLayer
            self.rnns = [
                QRNNLayer(input_size=nemb if l == 0 else nhid,
                          hidden_size=nhid if l != nlayer - 1 else nhidlast,
                          save_prev_x=True,
                          zoneout=0,
                          window=2 if l == 0 else 1,
                          output_gate=True) for l in range(nlayer)
            ]
            for rnn in self.rnns:
                rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop)

        self.rnns = torch.nn.ModuleList(self.rnns)

        self.rnn_type = rnn_type
        self.nemb = nemb
        self.nhid = nhid
        self.nhidlast = nhidlast
        self.nlayer = nlayer
        self.dropout = dropout
        self.dropouti = dropouti
        self.dropouth = dropouth
        self.dropoute = dropoute
        self.ntoken = ntoken
        self.spectrum_control = spectrum_control

        self.init_weights()
Beispiel #16
0
    def __init__(self,
                 rnn_type,
                 input_dim,
                 hidden_dim,
                 nlayers,
                 rnn_out_dropout=.5,
                 dropouth=.5,
                 wdrop=0,
                 tie_weights=False):
        """
        Adapted from Salesforce awd-lstm-lm, with a few modifications.

        Aimed at time series so removed embedding related code.

        Parameters
        ----------
        rnn_type : str
            Either of 'LSTM', 'GRU' or 'QRNN'
        input_dim : TYPE
            input dimension
        hidden_dim : TYPE
            hidden layer size
        nlayers : TYPE
            number of layers
        rnn_out_dropout : float, optional
            locked dropout, i.e. variational dropout rate. Applied to the
            output of the last RNN layer.
        dropouth : float, optional
            variational dropout between RNN layers
        wdrop : int, optional
            weight dropout rate for recurrent weights inside an RNN layer.
        tie_weights : bool, optional
            Default False. If True then for the last RNN layer, the hidden
            size/dim is set to input size/dim.
        """
        # super(AWDRNN, self).__init__()
        super().__init__()

        self.lockdrop = LockedDropout()
        # self.idrop = nn.Dropout(dropouti)
        # self.hdrop = nn.Dropout(dropouth)
        # self.drop = nn.Dropout(dropout)
        # self.encoder = nn.Embedding(ntoken, input_dim)

        assert rnn_type in ['LSTM', 'QRNN', 'GRU'], 'RNN type is not supported'
        if rnn_type == 'LSTM':
            self.rnns = [torch.nn.LSTM(input_dim if ll == 0 else hidden_dim,
                                       hidden_dim if ll != nlayers - 1
                                       else (input_dim if tie_weights
                                             else hidden_dim),
                                       1,
                                       dropout=0)
                         for ll in range(nlayers)]
            if wdrop:
                self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop)
                             for rnn in self.rnns]
        if rnn_type == 'GRU':
            self.rnns = [torch.nn.GRU(input_dim if ll == 0 else hidden_dim,
                                      hidden_dim if ll != nlayers - 1
                                      else (input_dim if tie_weights
                                            else hidden_dim),
                                      1,
                                      dropout=0)
                         for ll in range(nlayers)]
            if wdrop:
                self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop)
                             for rnn in self.rnns]
        elif rnn_type == 'QRNN':
            from torchqrnn import QRNNLayer
            self.rnns = [QRNNLayer(input_size=input_dim if ll == 0
                                   else hidden_dim,
                                   hidden_size=(hidden_dim if ll != nlayers - 1
                                                else (input_dim if tie_weights
                                                      else hidden_dim)),
                                   save_prev_x=True,
                                   zoneout=0,
                                   window=2 if ll == 0 else 1,
                                   output_gate=True)
                         for ll in range(nlayers)]
            for rnn in self.rnns:
                rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop)
        print(self.rnns)
        self.rnns = nn.ModuleList(self.rnns)

        # self.decoder = nn.Linear(hidden_dim, ntoken)
        #
        # # Optionally tie weights as in:
        # # "Using the Output Embedding to Improve Language Models"
        # # (Press & Wolf 2016)
        # # https://arxiv.org/abs/1608.05859
        # # and
        # # "Tying Word Vectors and Word Classifiers: A Loss Framework for
        # # Language Modeling" (Inan et al. 2016)
        # # https://arxiv.org/abs/1611.01462
        # if tie_weights:
        #     # if hidden_dim != input_dim:
        #     #    raise ValueError('When using the tied flag, hidden_dim must
        # be equal to emsize')
        #     self.decoder.weight = self.encoder.weight
        # self.init_weights()

        self.rnn_type = rnn_type
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.nlayers = nlayers
        self.rnn_out_dropout = rnn_out_dropout
        # self.dropouti = dropouti
        self.dropouth = dropouth
        # self.dropoute = dropoute
        self.tie_weights = tie_weights
    def __init__(self,
                 rnn_type,
                 ntoken,
                 ninp,
                 nhid,
                 nlayers,
                 dropout=0.5,
                 dropouth=0.5,
                 dropouti=0.5,
                 dropoute=0.1,
                 wdrop=0,
                 tie_weights=False):
        super(RNNModel, self).__init__()
        self.lockdrop = LockedDropout()
        self.idrop = nn.Dropout(dropouti)
        self.hdrop = nn.Dropout(dropouth)
        self.drop = nn.Dropout(dropout)
        self.encoder = nn.Embedding(ntoken, ninp)
        assert rnn_type in ['LSTM', 'QRNN', 'GRU'], 'RNN type is not supported'
        if rnn_type == 'LSTM':
            self.rnns = [
                torch.nn.LSTM(ninp if l == 0 else nhid,
                              nhid if l != nlayers - 1 else
                              (ninp if tie_weights else nhid),
                              1,
                              dropout=0) for l in range(nlayers)
            ]

            ## start edit: steps for mts model bias assigments

            ## STEP1: make a list of size of hidden layers - useful for init step
            hid_dim = [
                nhid if l != nlayers - 1 else (ninp if tie_weights else nhid)
                for l in range(nlayers)
            ]

            ## STEP2: create bias values depending on type of init we want

            chrono_bias = [np.zeros(hid_dim[l]) for l in range(nlayers)]
            multi_timescale = True

            if multi_timescale:
                #layer 0 with half units of timescale 3 and half of timescale 4
                half_length = int(0.5 * hid_dim[0])
                timescale_first_half, timescale_second_half = 3, 4
                #calculate bias values from timescale and store in an array
                chrono_bias[0][:half_length] = -1 * np.log(
                    np.exp(1 / timescale_first_half) - 1)
                chrono_bias[0][half_length:] = -1 * np.log(
                    np.exp(1 / timescale_second_half) - 1)

                #layer 1 with timescale sampled from an inverse gamma distribution
                timescale_invgamma = scipy.stats.invgamma.isf(np.linspace(
                    0, 1, 1151),
                                                              a=0.56,
                                                              scale=1)[1:]
                #calculate bias values from timescales and store in an array
                chrono_bias[1] = -1 * np.log(
                    np.exp(1 / timescale_invgamma) - 1)

            ## STEP 3: assign bias values to the layers-first half is input gate bias, second half is forget gate for both i to h and h to h

            for l in range(nlayers -
                           1):  #Assign biases for only first two layers
                self.rnns[l].bias_ih_l0.data[0:hid_dim[l] * 2] = torch.tensor(
                    np.zeros(hid_dim[l] * 2), dtype=torch.float)
                self.rnns[l].bias_hh_l0.data[0:hid_dim[l] *
                                             2] = torch.from_numpy(
                                                 np.hstack(
                                                     (-1 * chrono_bias[l],
                                                      chrono_bias[l])).astype(
                                                          np.float32))

            ## STEP 4: fix the bias - if we want to fix the bias instead of just init them
            fixed_weights = True
            if fixed_weights:
                for l in range(nlayers - 1):
                    print(l)
                    self.rnns[l].bias_ih_l0.requires_grad = False
                    self.rnns[l].bias_hh_l0.requires_grad = False

            ##end edit
            ###
            if wdrop:
                self.rnns = [
                    WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop)
                    for rnn in self.rnns
                ]
        if rnn_type == 'GRU':
            self.rnns = [
                torch.nn.GRU(ninp if l == 0 else nhid,
                             nhid if l != nlayers - 1 else ninp,
                             1,
                             dropout=0) for l in range(nlayers)
            ]
            if wdrop:
                self.rnns = [
                    WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop)
                    for rnn in self.rnns
                ]
        elif rnn_type == 'QRNN':
            from torchqrnn import QRNNLayer
            self.rnns = [
                QRNNLayer(input_size=ninp if l == 0 else nhid,
                          hidden_size=nhid if l != nlayers - 1 else
                          (ninp if tie_weights else nhid),
                          save_prev_x=True,
                          zoneout=0,
                          window=2 if l == 0 else 1,
                          output_gate=True) for l in range(nlayers)
            ]
            for rnn in self.rnns:
                rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop)
        print(self.rnns)
        self.rnns = torch.nn.ModuleList(self.rnns)
        self.decoder = nn.Linear(nhid, ntoken)

        # Optionally tie weights as in:
        # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016)
        # https://arxiv.org/abs/1608.05859
        # and
        # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016)
        # https://arxiv.org/abs/1611.01462
        if tie_weights:
            #if nhid != ninp:
            #    raise ValueError('When using the tied flag, nhid must be equal to emsize')
            self.decoder.weight = self.encoder.weight

        self.init_weights()

        self.rnn_type = rnn_type
        self.ninp = ninp
        self.nhid = nhid
        self.nlayers = nlayers
        self.dropout = dropout
        self.dropouti = dropouti
        self.dropouth = dropouth
        self.dropoute = dropoute
        self.tie_weights = tie_weights
Beispiel #18
0
    def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5,
                 dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False,
                 num_features=0, feature_dim=0, feature_relu_bias=2.0):
        super(RNNModel, self).__init__()
        self.lockdrop = LockedDropout()
        self.idrop = nn.Dropout(dropouti)
        self.hdrop = nn.Dropout(dropouth)
        self.drop = nn.Dropout(dropout)
        self.num_features = num_features
        self.feature_dims = feature_dim
        if self.num_features == 0:
            # self.encoder = nn.Embedding(ntoken, ninp)
            self.encoder = nn.Parameter(torch.FloatTensor(ntoken, ninp))
        else:
            self.word_emb = nn.Parameter(torch.FloatTensor(ntoken, feature_dim))
            self.feature_emb = nn.Parameter(torch.FloatTensor(num_features, feature_dim))
            self.feature_relu_bias = nn.Parameter(torch.FloatTensor([feature_relu_bias]),requires_grad=False)
            self.encoder = nn.Parameter(torch.FloatTensor(num_features, ninp))
            self.word_emb_cache = None
            self.feature_emb_cache = None

        assert rnn_type in ['LSTM', 'QRNN', 'GRU'], 'RNN type is not supported'
        if rnn_type == 'LSTM':
            self.rnns = [torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), 1, dropout=0) for l in range(nlayers)]
            if wdrop:
                self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns]
        if rnn_type == 'GRU':
            self.rnns = [torch.nn.GRU(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else ninp, 1, dropout=0) for l in range(nlayers)]
            if wdrop:
                self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns]
        elif rnn_type == 'QRNN':
            from torchqrnn import QRNNLayer
            self.rnns = [QRNNLayer(input_size=ninp if l == 0 else nhid, hidden_size=nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), save_prev_x=True, zoneout=0, window=2 if l == 0 else 1, output_gate=True) for l in range(nlayers)]
            for rnn in self.rnns:
                rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop)
        logging.info(self.rnns)
        self.rnns = torch.nn.ModuleList(self.rnns)
        self.decoder = nn.Linear(nhid, ntoken)

        # Optionally tie weights as in:
        # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016)
        # https://arxiv.org/abs/1608.05859
        # and
        # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016)
        # https://arxiv.org/abs/1611.01462
        if tie_weights:
            assert self.num_features == 0, "Its not supported to tie weights and use feature models right now."
            #if nhid != ninp:
            #    raise ValueError('When using the tied flag, nhid must be equal to emsize')
            self.decoder.weight = self.encoder

        self.init_weights()

        self.rnn_type = rnn_type
        self.ninp = ninp
        self.nhid = nhid
        self.nlayers = nlayers
        self.dropout = dropout
        self.dropouti = dropouti
        self.dropouth = dropouth
        self.dropoute = dropoute
        self.tie_weights = tie_weights

        if self.num_features == 0:
            logging.info('Using normal encoder model')
            self._input_layer_fn = self.normal_encoder
        else:
            logging.info('Using feature encoder model %s %s', self.num_features, self.feature_dims)
            self._input_layer_fn = self.feature_encoder
Beispiel #19
0
    def __init__(self, config, x_embed):
        super().__init__()

        self.num_layers_rnn = 1
        self.x_embed = x_embed.x_embed

        self.wdrop = config.wdrop
        self.dropoute = config.dropoute
        self.encoder_out_size = config.rnn_cell_size
        self.rnn_cell_type = config.rnn_cell_type

        self.training = True

        import warnings
        warnings.filterwarnings("ignore")

        self.model = None
        if self.rnn_cell_type.lower() == "lstm":
            self.rnn = nn.LSTM(input_size=x_embed.embedding_dim,
                               hidden_size=config.rnn_cell_size,
                               num_layers=self.num_layers_rnn,
                               bidirectional=False,
                               dropout=config.dropout,
                               batch_first=True,
                               bias=True)
            self.model = WeightDrop(self.rnn, ['weight_hh_l0'], dropout=self.wdrop)

        elif self.rnn_cell_type.lower() == "gru":
            self.rnn = nn.GRU(input_size=x_embed.embedding_dim,
                              hidden_size=config.rnn_cell_size,
                              num_layers=self.num_layers_rnn,
                              bidirectional=False,
                              dropout=config.dropout,
                              batch_first=True,
                              bias=True)
            self.model = WeightDrop(self.rnn, ['weight_hh_l0'], dropout=self.wdrop)

        elif self.rnn_cell_type.lower() == "qrnn":
            from torchqrnn import QRNNLayer
            self.model = QRNNLayer(input_size=x_embed.embedding_dim,
                                   hidden_size=config.rnn_cell_size,
                                   save_prev_x=True,
                                   zoneout=0,
                                   window=1,
                                   output_gate=True,
                                   use_cuda=config.use_gpu)
            self.model.linear = WeightDrop(self.model.linear, ['weight'], dropout=self.wdrop)
            # self.encoders.reset()

        self.lockdrop = LockedDropout()
        self.dropouti = 0.1

        # temporal averaging
        self.beta_ema = config.beta_ema
        if self.beta_ema > 0:
            self.avg_param = deepcopy(list(p.data for p in self.parameters()))
            if config.use_gpu:
                self.avg_param = [a.cuda() for a in self.avg_param]
            self.steps_ema = 0.

        return
Beispiel #20
0
    def __init__(self,
                 rnn_type,
                 ninp,
                 nhid,
                 nlayers,
                 dropout=0.5,
                 dropouth=0.5,
                 wdrop=0,
                 tie_weights=False,
                 class_num=4):
        super(RNNModel, self).__init__()
        self.lockdrop = LockedDropout()
        #        self.idrop = nn.Dropout(dropouti)
        self.hdrop = nn.Dropout(dropouth)
        self.drop = nn.Dropout(dropout)

        assert rnn_type in ['LSTM', 'QRNN', 'GRU'], 'RNN type is not supported'
        if rnn_type == 'LSTM':
            self.rnns = [
                torch.nn.LSTM(ninp if l == 0 else nhid,
                              nhid if l != nlayers - 1 else
                              (ninp if tie_weights else nhid),
                              1,
                              dropout=0) for l in range(nlayers)
            ]
            if wdrop:
                self.rnns = [
                    WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop)
                    for rnn in self.rnns
                ]
        if rnn_type == 'GRU':
            self.rnns = [
                torch.nn.GRU(ninp if l == 0 else nhid,
                             nhid if l != nlayers - 1 else ninp,
                             1,
                             dropout=0) for l in range(nlayers)
            ]
            if wdrop:
                self.rnns = [
                    WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop)
                    for rnn in self.rnns
                ]
        elif rnn_type == 'QRNN':
            from torchqrnn import QRNNLayer
            self.rnns = [
                QRNNLayer(input_size=ninp if l == 0 else nhid,
                          hidden_size=nhid if l != nlayers - 1 else
                          (ninp if tie_weights else nhid),
                          save_prev_x=True,
                          zoneout=0,
                          window=2 if l == 0 else 1,
                          output_gate=True) for l in range(nlayers)
            ]
            for rnn in self.rnns:
                rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop)
        print(self.rnns)
        self.rnns = torch.nn.ModuleList(self.rnns)
        #############################是否使用BN层,因为LRP没法穿过BN层##############
        #        self.bn=nn.BatchNorm1d(nhid, momentum=0.5)
        #########################################################################
        #############################是否使用bias,bias的使用会对LRP产生影响########
        #        self.decoder = nn.Linear(nhid,class_num,bias=0)
        self.decoder = nn.Linear(nhid, class_num)
        #########################################################################
        # Optionally tie weights as in:
        # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016)
        # https://arxiv.org/abs/1608.05859
        # and
        # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016)
        # https://arxiv.org/abs/1611.01462
        #        if tie_weights:
        #            #if nhid != ninp:
        #            #    raise ValueError('When using the tied flag, nhid must be equal to emsize')
        #            self.decoder.weight = self.encoder.weight

        self.init_weights()

        self.rnn_type = rnn_type
        self.ninp = ninp
        self.nhid = nhid
        self.nlayers = nlayers
        self.dropout = dropout
        self.dropouth = dropouth
        self.tie_weights = tie_weights