def __init__(self, input_size, hidden_size, output_size, numclass=0, dropouti=0.05, wdrop=0.2, dropouto=0.05): super(WeightDropBiLSTM, self).__init__() self.rnn1 = nn.LSTM( input_size, hidden_size, bidirectional=True, ) self.linear_rnn = nn.Linear(hidden_size * 2, hidden_size) self.rnn2 = nn.LSTM( hidden_size, hidden_size, bidirectional=True, ) self.linear = nn.Linear(hidden_size * 2 + hidden_size, numclass) self.lockdrop = LockedDropout() self.weight_drop1 = WeightDrop(self.rnn1, ['weight_hh_l0'], dropout=wdrop) self.weight_drop2 = WeightDrop(self.rnn2, ['weight_hh_l0'], dropout=wdrop) self.dropouti = dropouti self.dropouto = dropouto self.dropouti = 0.05 self.dropouto = 0.05 initrange = 0.1 self.linear_rnn.weight.data.uniform_(-initrange, initrange) self.linear_rnn.bias.data.fill_(0) self.linear.weight.data.uniform_(-initrange, initrange) self.linear.bias.data.fill_(0)
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False, alpha=2, beta=1, bsz=20): super(RNNModel, self).__init__() self.bsz = bsz self.ntoken = ntoken self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.tie_weights = tie_weights self.alpha = alpha self.beta = beta self.metrics = [self.acc, self.perplexity] self.lockdrop = LockedDropout() self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) assert rnn_type in ['LSTM', 'QRNN', 'GRU'], 'RNN type is not supported' if rnn_type == 'LSTM': self.rnns = [torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), 1, dropout=0) for l in range(nlayers)] if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns] if rnn_type == 'GRU': self.rnns = [torch.nn.GRU(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else ninp, 1, dropout=0) for l in range(nlayers)] if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns] elif rnn_type == 'QRNN': from torchqrnn import QRNNLayer self.rnns = [QRNNLayer(input_size=ninp if l == 0 else nhid, hidden_size=nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), save_prev_x=True, zoneout=0, window=2 if l == 0 else 1, output_gate=True) for l in range(nlayers)] for rnn in self.rnns: rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop) print(self.rnns) self.rnns = torch.nn.ModuleList(self.rnns) self.decoder = nn.Linear(nhid, ntoken) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: #if nhid != ninp: # raise ValueError('When using the tied flag, nhid must be equal to emsize') self.decoder.weight = self.encoder.weight self.init_weights() # Build the SplitCrossEntropyLoss criterion here self.build_criterion() self.hidden = None
def __init__(self, vocab_size, embed_size, hid_size, arc_size, stag_size, window_size, wordembed=None, dropout=0.2, dropoute=0.1, dropoutr=0.1): super(distance_parser, self).__init__() self.vocab_size = vocab_size self.embed_size = embed_size self.hid_size = hid_size self.arc_size = arc_size self.stag_size = stag_size self.window_size = window_size self.drop = nn.Dropout(dropout) self.dropoute = dropoute self.dropoutr = dropoutr self.encoder = nn.Embedding(vocab_size, embed_size) if wordembed is not None: self.encoder.weight.data = torch.FloatTensor(wordembed) self.tag_encoder = nn.Embedding(stag_size, embed_size) self.word_rnn = nn.LSTM(2 * embed_size, hid_size, num_layers=2, batch_first=True, dropout=dropout, bidirectional=True) self.word_rnn = WeightDrop(self.word_rnn, ['weight_hh_l0', 'weight_hh_l1'], dropout=dropoutr) self.conv1 = nn.Sequential(nn.Dropout(dropout), nn.Conv1d(hid_size * 2, hid_size, window_size), nn.ReLU()) self.arc_rnn = nn.LSTM(hid_size, hid_size, num_layers=2, batch_first=True, dropout=dropout, bidirectional=True) self.arc_rnn = WeightDrop(self.arc_rnn, ['weight_hh_l0', 'weight_hh_l1'], dropout=dropoutr) self.distance = nn.Sequential( nn.Dropout(dropout), nn.Linear(hid_size * 2, hid_size), nn.ReLU(), nn.Dropout(dropout), nn.Linear(hid_size, 1), ) self.terminal = nn.Sequential( nn.Dropout(dropout), nn.Linear(hid_size * 2, hid_size), nn.ReLU(), ) self.non_terminal = nn.Sequential( nn.Dropout(dropout), nn.Linear(hid_size * 2, hid_size), nn.ReLU(), ) self.arc = nn.Sequential( nn.Dropout(dropout), nn.Linear(hid_size, arc_size), )
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False): super(RNNModel, self).__init__() self.lockdrop = LockedDropout() self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) assert rnn_type in ['LSTM', 'QRNN', 'GRU'], 'RNN type is not supported' if rnn_type == 'LSTM': self.rnns = [ torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), 1, dropout=0) for l in range(nlayers)] if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns] if rnn_type == 'GRU': self.rnns = [torch.nn.GRU(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else ninp, 1, dropout=0) for l in range(nlayers)] if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns] elif rnn_type == 'QRNN': from torchqrnn import QRNNLayer self.rnns = [QRNNLayer(input_size=ninp if l == 0 else nhid, hidden_size=nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), save_prev_x=True, zoneout=0, window=2 if l == 0 else 1, output_gate=True) for l in range(nlayers)] for rnn in self.rnns: rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop) print(self.rnns) self.rnns = torch.nn.ModuleList(self.rnns) self.decoder = nn.Linear(nhid, ntoken) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: # if nhid != ninp: # raise ValueError('When using the tied flag, nhid must be equal to emsize') # NOTE: This is really awful code and is just overwriting this one tiny part of the decoders variables, if # your models aren't displaying correctly this is why. Specifically ruins the display of the decoder models # dimensions as they stay the original decode dimensions even though the weights have been tied self.decoder.weight = self.encoder.weight self.init_weights() self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.tie_weights = tie_weights
def convert_to_custom(self): for ind, rnn in enumerate(self.rnns): if type(rnn) == WeightDrop: rnn.module, type_changed = torch_lstm_to_custom(rnn.module) if type_changed: self.rnns[ind] = WeightDrop(rnn.module, ['weight_hh'], dropout=args.wdrop) else: self.rnns[ind], _ = torch_lstm_to_custom(rnn) self.rnns[ind] = WeightDrop(self.rnns[ind], ['weight_hh'], dropout=args.wdrop) return self
def __init__(self,vocab_obj, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False): super(RNNModel, self).__init__() self.lockdrop = LockedDropout() self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) embed_matrix_tensor=torch.from_numpy(vocab_obj.embed_matrix).cuda() self.encoder.load_state_dict({'weight':embed_matrix_tensor}) assert rnn_type in ['LSTM', 'QRNN', 'GRU'], 'RNN type is not supported' if rnn_type == 'LSTM': self.rnns = [torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), 1, dropout=0) for l in range(nlayers)] if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns] if rnn_type == 'GRU': self.rnns = [torch.nn.GRU(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else ninp, 1, dropout=0) for l in range(nlayers)] if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns] elif rnn_type == 'QRNN': from torchqrnn import QRNNLayer self.rnns = [QRNNLayer(input_size=ninp if l == 0 else nhid, hidden_size=nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), save_prev_x=True, zoneout=0, window=2 if l == 0 else 1, output_gate=True) for l in range(nlayers)] for rnn in self.rnns: rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop) self.decoder = nn.Linear(nhid, ntoken) self.rnns = torch.nn.ModuleList(self.rnns) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: #if nhid != ninp: # raise ValueError('When using the tied flag, nhid must be equal to emsize') self.decoder.weight = self.encoder.weight self.init_weights() self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.tie_weights = tie_weights
def __init__(self, ntoken, ninp, dropout=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, k=0): super(RNNModel, self).__init__() self.idrop = fixMaskDropout(dropouti) self.drop = fixMaskDropout(dropout) self.encoder = nn.Embedding(ntoken, ninp, padding_idx=0) self.embedded_dropout = fixMaskEmbeddedDropout(self.encoder, dropoute) self.lstm_cell = WeightDrop(torch.nn.LSTMCell(ninp, ninp), ['weight_hh'], dropout=wdrop) # self.lstm = WeightDrop(torch.nn.LSTM(ninp, ninp), ['weight_hh_l0'], dropout=wdrop) self.decoder = nn.Linear(ninp, ntoken) self.decoder.weight = self.encoder.weight_raw self.ninp = ninp self.dropoute = dropoute self.k = k if k > 0: self.w_mi = nn.Linear(ninp, 1) self.w_mh = nn.Linear(ninp, 1) self.w_hh = nn.Linear(ninp, ninp) self.w_hm = nn.Linear(ninp, ninp) self.init_weights()
def __init__(self, rnn_type, ntoken, ninp, nhid, nhidlast, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False, ldropout=0.5, n_experts=10): super(RNNModel, self).__init__() self.lockdrop = LockedDropout() self.encoder = nn.Embedding(ntoken, ninp) self.rnns = [torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else nhidlast, 1, dropout=0) for l in range(nlayers)] if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns] self.rnns = torch.nn.ModuleList(self.rnns) self.head = MoShead(ntoken, ninp, nhid, nhidlast, self.encoder, self.lockdrop, tie_weights, n_experts) self.init_weights() self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nhidlast = nhidlast self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.ldropout = ldropout self.dropoutl = ldropout self.n_experts = n_experts self.ntoken = ntoken size = 0 for p in self.parameters(): size += p.nelement() print('Param size: {}'.format(size))
def __init__(self, input_size=configs.frame_size, layer_num=configs.rnn_layer_num): super().__init__() self.input_size = input_size self.hidden_size = (configs.rnn_hidden_size // 2 if configs.uses_bi_rnn else configs.rnn_hidden_size) self.layer_num = layer_num self.direction_num = 2 if configs.uses_bi_rnn else 1 self.rnn_type = nn.LSTM if configs.rnn_type == 'lstm' else nn.GRU self.rnn = self.rnn_type( input_size=self.input_size, hidden_size=self.hidden_size, num_layers=self.layer_num, bidirectional=configs.uses_bi_rnn, # dropout=configs.dropout_prob ) self.cached_rnn_weights = {} self.state_shape = [self.layer_num * self.direction_num, 1, self.hidden_size] self.batch_dim = 1 if self.rnn_type is nn.LSTM: self.initial_hidden_state = nn.Parameter(torch.randn(*self.state_shape)) self.initial_cell_state = nn.Parameter(torch.randn(*self.state_shape)) elif self.rnn_type is nn.GRU: self.initial_hidden_state = nn.Parameter(torch.randn(*self.state_shape)) if configs.uses_weight_dropped_rnn: from weight_drop import WeightDrop weight_names = [name for name, param in self.rnn.named_parameters() if 'weight' in name] self.rnn = WeightDrop(module=self.rnn, weights=weight_names, dropout=configs.rnn_weights_dropout_prob)
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False): super(RNNModel, self).__init__() self.lockdrop = LockedDropout() self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) self.rnns = [torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else ninp, 1, dropout=dropouth) for l in range(nlayers)] print(self.rnns) if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns] self.rnns = torch.nn.ModuleList(self.rnns) self.decoder = nn.Linear(nhid, ntoken) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: #if nhid != ninp: # raise ValueError('When using the tied flag, nhid must be equal to emsize') self.decoder.weight = self.encoder.weight self.init_weights() self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute
def __init__(self, char_embedding_size, vocab_size, hidden_dim, layer_num, weight_dropout_in=0, weight_dropout_hidden=0, char_dropout_prob=0): super(WordNLM, self).__init__() # Hyperparams self.char_embedding_size = char_embedding_size self.vocab_size = vocab_size self.hidden_dim = hidden_dim self.layer_num = layer_num self.weight_dropout_in = weight_dropout_in self.weight_dropout_hidden = weight_dropout_hidden self.char_dropout_prob = char_dropout_prob # Model architecture self.char_embeddings = nn.Embedding( num_embeddings=self.vocab_size, embedding_dim=self.char_embedding_size) self.char_dropout = nn.Dropout2d(p=self.char_dropout_prob) self.rnn = nn.LSTM(self.char_embedding_size, self.hidden_dim, self.layer_num) self.rnn.flatten_parameters() weight_drop_params = self.get_weigh_drop_parameters() self.rnn_drop = WeightDrop(self.rnn, weight_drop_params) self.output = nn.Linear(self.hidden_dim, self.vocab_size)
def __init__(self, code_hidden_size, hidden_size, time_step, regression=True): super(AttnDecoder, self).__init__() self.code_hidden_size = code_hidden_size self.hidden_size = hidden_size self.T = time_step self.attn1 = nn.Linear(in_features=2 * hidden_size, out_features=code_hidden_size) self.attn2 = nn.Linear(in_features=code_hidden_size, out_features=code_hidden_size) self.tanh = nn.Tanh() self.attn3 = nn.Linear(in_features=code_hidden_size, out_features=1) self.lstm = nn.LSTM(input_size=1, hidden_size=self.hidden_size) self.wdrnn = WeightDrop(self.lstm, ['weight_hh_l0', 'weight_ih_l0'], dropout=config.DROP_OUT) self.tilde = nn.Linear(in_features=self.code_hidden_size + 1, out_features=1) self.fc1 = nn.Linear(in_features=code_hidden_size + hidden_size, out_features=hidden_size) if (regression): # regression model self.fc2 = nn.Linear(in_features=hidden_size, out_features=1) else: # classfication model self.fc2 = nn.Linear(in_features=hidden_size, out_features=2)
def __init__(self, nb_words, hidden_size=128, embedding_size=128, n_layers=1, wdrop=0.25, odrop=0.25, edrop=0.1, idrop=0.25, variational=False, standard_dropout=False, batch_first=True): super(Model, self).__init__() self.standard_dropout = standard_dropout self.lockdrop = LockedDropout(batch_first=batch_first) self.odrop = odrop self.idrop = idrop self.edrop = edrop self.n_layers = n_layers self.embedding = nn.Embedding(nb_words, embedding_size) self.rnns = [ nn.LSTM(embedding_size if l == 0 else hidden_size, hidden_size, num_layers=1, batch_first=batch_first) for l in range(n_layers) ] if wdrop: self.rnns = [ WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop, variational=variational) for rnn in self.rnns ] self.rnns = torch.nn.ModuleList(self.rnns) self.output_layer = nn.Linear(hidden_size, 1) self.init_weights()
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False, pooling=False): super(LMmodel, self).__init__() self.lockdrop = LockedDropout() self.ntoken = ntoken # <---------------- Temporary, probably <NUM>, <MIX_NUM> in another dataset. self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(self.ntoken, ninp) # Pre-trained model doens't use batch_first. if rnn_type == 'LSTM': self.rnns = [torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), 1, dropout=0) for l in range(nlayers)] if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns] print(self.rnns) self.rnns = torch.nn.ModuleList(self.rnns) self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.tie_weights = tie_weights self.pooling = pooling
def __init__(self, rnn_type, ntoken, ninp, nhid, nhidlast, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False, ldropout=0.6, n_experts=10, num4embed=0, num4first=0, num4second=0): super(RNNModel, self).__init__() self.model_embeddings_source = ModelEmbeddings(ninp, vocab.src) self.lockdrop = LockedDropout() self.encoder = nn.Embedding(ntoken, ninp) self.rnns = [torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else nhidlast, 1, dropout=0) for l in range(nlayers)] if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns] self.rnns = torch.nn.ModuleList(self.rnns) self.all_experts = n_experts + num4embed + num4first + num4second self.prior = nn.Linear(nhidlast, self.all_experts, bias=False) self.latent = nn.Linear(nhidlast, n_experts*ninp) if num4embed > 0: self.weight4embed = nn.Linear(ninp, num4embed*ninp) if num4first > 0: self.weight4first = nn.Linear(nhid, num4first*ninp) if num4second > 0: self.weight4second = nn.Linear(nhid, num4second*ninp) self.decoder = nn.Linear(ninp, ntoken) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: #if nhid != ninp: # raise ValueError('When using the tied flag, nhid must be equal to emsize') self.decoder.weight = self.encoder.weight self.num4embed = num4embed self.num4first = num4first self.num4second = num4second self.init_weights() self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nhidlast = nhidlast self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.dropoutl = ldropout self.n_experts = n_experts self.ntoken = ntoken size = 0 for p in self.parameters(): size += p.nelement() print('param size: {}'.format(size))
def __init__(self, input_sz: int, hidden_sz: int, dropout=0, variational=False, recycle_hid=False): super().__init__() self.dropout = dropout self.variational = variational self.input_size = input_sz self.hidden_size = hidden_sz self.recycle_hid = recycle_hid # will flag to recycle hidden parameters self.lstm_cell = nn.LSTMCell(input_size=input_sz, hidden_size=hidden_sz) if dropout: self.lstm_cell = WeightDrop(self.lstm_cell, ['weight_hh'], dropout=dropout)
def __init__ (self, model, vocsize, embsize, hiddensize, n_layers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=True, ldropout=0.5, n_experts=5, uncertain='gp', position=1): super(RNNLM, self).__init__() self.model = model.lower() self.lockdrop = LockedDropout() self.encoder = nn.Embedding(vocsize, embsize) self.rnns = [] for l in range(n_layers): if l == 0: if uncertain == 'gp': self.rnns.append(GPLSTM(embsize, hiddensize if l != n_layers-1 else embsize, position)) elif uncertain == 'bayes': self.rnns.append(BayesLSTM(embsize, hiddensize if l != n_layers-1 else embsize, position)) else: self.rnns.append(torch.nn.LSTM(embsize, hiddensize if l != n_layers-1 else embsize, 1, dropout=0)) else: self.rnns.append(torch.nn.LSTM(hiddensize, hiddensize if l != n_layers-1 else embsize, 1, dropout=0)) if wdrop: self.rnns = [WeightDrop(rnn, hiddensize if l != n_layers-1 else embsize, ['weight_hh_l0'], dropout=wdrop) for l, rnn in enumerate(self.rnns) if rnn.__class__.__name__ != "GPLSTM"] self.rnns = torch.nn.ModuleList(self.rnns) self.prior = nn.Linear(embsize, n_experts, bias=False) self.latent = nn.Sequential(nn.Linear(embsize, n_experts*embsize), nn.Tanh()) self.decoder_bias = nn.Parameter(torch.empty(vocsize)) if tie_weights: # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" # (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" # (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 self.decoder_weight = self.encoder.weight else: self.decoder_weight = nn.Parameter(torch.empty(vocsize, embsize)) self.vocsize = vocsize self.embsize = embsize self.hiddensize = hiddensize self.n_layers = n_layers self.tie_weights = tie_weights self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.ldropout = ldropout self.dropoutl = ldropout self.n_experts = n_experts self.init_parameters()
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False, joint_emb=None, joint_emb_depth=0, joint_emb_dense=False, joint_emb_dual=True, joint_dropout=0.2, joint_emb_activation='Sigmoid', joint_locked_dropout=False, joint_residual_prev=False, joint_noresid=False): super(RNNModel, self).__init__() self.use_dropout = True self.lockdrop = LockedDropout() self.idrop = nn.Dropout(dropouti if self.use_dropout else 0) self.hdrop = nn.Dropout(dropouth if self.use_dropout else 0) self.drop = nn.Dropout(dropout if self.use_dropout else 0) self.encoder = nn.Embedding(ntoken, ninp) assert rnn_type in ['LSTM', 'QRNN', 'GRU'], 'RNN type is not supported' if rnn_type == 'LSTM': self.rnns = [torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights or (joint_emb is not None) else nhid), 1, dropout=0) for l in range(nlayers)] if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop if self.use_dropout else 0) for rnn in self.rnns] if rnn_type == 'GRU': self.rnns = [torch.nn.GRU(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else ninp, 1, dropout=0) for l in range(nlayers)] if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop if self.use_dropout else 0) for rnn in self.rnns] elif rnn_type == 'QRNN': from torchqrnn import QRNNLayer self.rnns = [QRNNLayer(input_size=ninp if l == 0 else nhid, hidden_size=nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), save_prev_x=True, zoneout=0, window=2 if l == 0 else 1, output_gate=True) for l in range(nlayers)] for rnn in self.rnns: rnn.linear = WeightDrop(rnn.linear, ['weight'], dropout=wdrop if self.use_dropout else 0) print(self.rnns) self.rnns = torch.nn.ModuleList(self.rnns) if joint_emb is None: if tie_weights: if nhid != ninp: raise ValueError('When using the tied flag, nhid must be equal to emsize') self.decoder = nn.Linear(ninp, ntoken) self.decoder.weight = self.encoder.weight else: self.decoder = nn.Linear(nhid, ntoken) else: self.dropjoint = nn.Dropout(joint_dropout if self.use_dropout else 0) # Define the first layer of the label encoder network if joint_emb_activation != "Linear": self.joint_encoder_proj_0 = nn.Sequential(nn.Linear(ninp, joint_emb, bias=True), eval("nn.%s()" % joint_emb_activation)) else:
def __init__(self, rnn_type, ntoken, ninp, nhid, nhidlast, nlayers, dropout=0.25, dropouth=0.25, dropouti=0.25, dropoute=0.1, wdrop=0, tie_weights=False, ldropout=0.25, n_classes=10, class_count=[]): super(RNNModel, self).__init__() self.lockdrop = LockedDropout() self.encoder = nn.Embedding(ntoken, ninp) self.rnns = [torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else nhidlast, 1, dropout=0) for l in range(nlayers)] if wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns] self.rnns = torch.nn.ModuleList(self.rnns) #seperate the hidden state? self.word_class = nn.Linear(int(nhidlast / 2), n_classes, bias=False) # self.latent = nn.Sequential(nn.Linear(nhidlast, n_experts * ninp), nn.Tanh()) self.latent = nn.Sequential(nn.Linear(int(nhidlast / 2), ninp), nn.Tanh()) #self.decoder = nn.Linear(ninp, ntoken + n_classes) self.decoder = nn.Linear(ninp, ntoken) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: # if nhid != ninp: # raise ValueError('When using the tied flag, nhid must be equal to emsize') self.decoder.weight = self.encoder.weight self.init_weights() self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nhidlast = nhidlast self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.ldropout = ldropout self.dropoutl = ldropout self.n_classes = n_classes self.ntoken = ntoken self.class_count = class_count size = 0 for p in self.parameters(): size += p.nelement() print('param size: {}'.format(size))
def __init__(self, input_size, hidden_size, time_step): super(AttnEncoder, self).__init__() self.input_size = input_size self.hidden_size = hidden_size self.T = time_step self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=1) self.wdrnn = WeightDrop(self.lstm, ['weight_hh_l0', 'weight_ih_l0'], dropout=config.DROP_OUT) self.attn1 = nn.Linear(in_features=2 * hidden_size, out_features=self.T) self.attn2 = nn.Linear(in_features=input_size, out_features=input_size) self.tanh = nn.Tanh() self.attn3 = nn.Linear(in_features=self.T, out_features=1)
def __init__(self, ntoken, ninp, dropout=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False): super(RNNModel, self).__init__() self.idrop = fixMaskDropout(dropouti) self.drop = fixMaskDropout(dropout) self.encoder = nn.Embedding(ntoken, ninp, padding_idx=0) self.embedded_dropout = fixMaskEmbeddedDropout(self.encoder, dropoute) self.lstm = WeightDrop(torch.nn.LSTM(ninp, ninp), ['weight_hh_l0'], dropout=wdrop) self.decoder = nn.Linear(ninp, ntoken) self.decoder.weight = self.encoder.weight_raw self.W = nn.Linear(ninp, ninp) self.init_weights() self.ninp = ninp self.dropoute = dropoute
def __init__(self, isize, hsize, withFWM, params, wdrop=0.5): super().__init__() s_size = params["s_size"] r_size = params["r_size"] t_size = params["t_size"] self.rnn = nn.LSTM(isize, hsize, 1, dropout=0) if withFWM: self.fwm = FWM(hsize, s_size, r_size, t_size) self.linear = nn.Linear(t_size, hsize) self.isize = isize self.hsize = hsize self.hasFWM = withFWM self.rnn = WeightDrop(self.rnn, ['weight_hh_l0'], dropout=wdrop)
def __init__(self, ntoken, ninp, nhid, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, tie_weights=False): super(WeightDropLSTM, self).__init__() self.lockdrop = LockedDropout() self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) self.rnns = [ torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), 1, dropout=0, batch_first=True) for l in range(nlayers) ] if wdrop: self.rnns = [ WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns ] print(self.rnns) self.rnns = torch.nn.ModuleList(self.rnns) self.decoder = nn.Linear(nhid, ntoken) if tie_weights: # Optionally tie weights self.decoder.weight = self.encoder.weight self.init_weights() self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.tie_weights = tie_weights
def __init__(self, ntoken, args): super(RNNModel, self).__init__() self.lockdrop = LockedDropout() self.drop = nn.Dropout(args.dropout) self.ninp = args.emsize self.encoder = nn.Embedding(ntoken, self.ninp) self.nhid = args.nhid self.nlayers = args.nlayers self.dropout = args.dropout self.dropouti = args.dropouti self.dropouth = args.dropouth self.dropoute = args.dropoute self._max_span_length_ = args.max_span_length self.wdrop = args.wdrop self.tie_weights = args.tie_weights self.max_span_length = args.max_span_length self._cxt_size_ = args.cxtsize self._rrnn_size_ = args.rrnn_size self.nonlinearity = torch.tanh self.rnns = [] for l in range(self.nlayers): in_size = self.ninp if l == 0 else self.nhid out_size = self.nhid if l != self.nlayers - 1 else (self.ninp if self.tie_weights else self.nhid) self.rnns.append(torch.nn.LSTM(in_size, out_size, 1, dropout=0, batch_first=False)) if self.wdrop: self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=self.wdrop, variational=False) for rnn in self.rnns] self._att_ = SpanScorer(input_size=self.nhid, hidden_size=args.parser_size, rrnn_size=self._rrnn_size_, context_size=self._cxt_size_, drop=self.dropouth, max_span_length=self.max_span_length) self._hidden_layer_ = nn.Linear(self._cxt_size_, self.nhid, bias=True) self._hidden_gate_ = nn.Linear(self.nhid + self._cxt_size_, self.nhid, bias=True) self.rnns = torch.nn.ModuleList(self.rnns) self.decoder = nn.Linear(self.nhid, ntoken) if self.tie_weights: self.decoder.weight = self.encoder.weight self.init_weights()
def __init__( self, rnn_type, ntoken, nemoji, ninp, nhid, nlayers, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0, ): super(RNNModel, self).__init__() self.lockdrop = LockedDropout() self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) self.rnns = [ torch.nn.LSTM(ninp if l == 0 else nhid, nhid, 1, dropout=0) for l in range(nlayers) ] if wdrop: self.rnns = [ WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns ] print(self.rnns) self.rnns = torch.nn.ModuleList(self.rnns) self.decoder = nn.Linear(nhid, nemoji) self.rnn_type = rnn_type self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute
def __init__(self, **kwargs): super(BaselineLSTM, self).__init__() # Instantiate embeddings embedding_weights = kwargs.get('embeddings') self.hidden_dim = kwargs.get('hidden_dim', 50) self.vocab_size = embedding_weights.shape[0] self.word_embedding_dim = embedding_weights.shape[1] self.embeddings = Embedding(self.vocab_size, self.word_embedding_dim) self.embeddings.weight = Parameter(torch.zeros(self.vocab_size, self.word_embedding_dim), requires_grad=False) self.embeddings.weight.data = torch.from_numpy(embedding_weights) # Initialize LSTM lstm_in_dim = self.word_embedding_dim layers = kwargs.get("layers", 1) self.lstm = LSTM(lstm_in_dim, self.hidden_dim, bidirectional=True, num_layers=layers, batch_first=True) # classify from last-out (each direction) self.fc1 = Linear(self.hidden_dim*2, 20) self.fc2 = Linear(20, 1) positive_weight = kwargs.get('positive_weight', 0.5) self.pos_weight = (1 - positive_weight)/positive_weight self.criterion = BCEWithLogitsLoss() self._initialize_biases(self.lstm, is_GRU=False) self._initialize_lstm_weights(self.lstm, orthogonal=kwargs.get("orthogonal")) self._init_fc_parameters(self.fc1) self._init_fc_parameters(self.fc2) drop_connect = kwargs.get('drop_connect', .5) if drop_connect > 0.: base_weights = ['weight_hh_l{}', 'weight_hh_l{}_reverse'] hh_weights = [w.format(l) for w in base_weights for l in range(layers)] self.lstm = WeightDrop(self.lstm, hh_weights, drop_connect) dropout = kwargs.get('dropout', .5) self.dropout = Dropout(dropout, inplace=True)
def __init__(self, ntoken, ninp, nhid, nlayers, dropout, dropout_h, dropout_i, dropout_e, weight_drop, weight_tying): super().__init__() self.ninp = ninp self.nhid = nhid self.nlayers = nlayers self.weight_tying = weight_tying self.dropout = dropout self.dropout_h = dropout_h self.dropout_i = dropout_i self.dropout_e = dropout_e self.variational_dropout = VariationalDropout() self.encoder = nn.Embedding(ntoken, ninp) self.rnns = nn.ModuleList([ WeightDrop( nn.LSTM(self.get_input_size(i), self.get_hidden_size(i)), ["weight_hh_l0"], weight_drop) for i in range(nlayers) ]) self.decoder = nn.Linear(ninp if weight_tying else nhid, ntoken) if self.weight_tying: self.decoder.weight = self.encoder.weight self.init_weights()
itos.append(" ") print(itos) stoi = dict([(itos[i],i) for i in range(len(itos))]) halfSequenceLength = int(args.sequence_length/2) import random import torch print(torch.__version__) from weight_drop import WeightDrop # Create the neural model rnn = torch.nn.LSTM(args.char_embedding_size, args.hidden_dim, args.layer_num).cuda() rnn_parameter_names = [name for name, _ in rnn.named_parameters()] print(rnn_parameter_names) rnn_drop = WeightDrop(rnn, [(name, args.weight_dropout_in) for name, _ in rnn.named_parameters() if name.startswith("weight_ih_")] + [ (name, args.weight_dropout_hidden) for name, _ in rnn.named_parameters() if name.startswith("weight_hh_")]) output = torch.nn.Linear(args.hidden_dim, len(itos)-1+3).cuda() # -1, because whitespace doesn't actually appear char_embeddings = torch.nn.Embedding(num_embeddings=len(itos)-1+3, embedding_dim=args.char_embedding_size).cuda() logsoftmax = torch.nn.LogSoftmax(dim=2) train_loss = torch.nn.NLLLoss(ignore_index=0) print_loss = torch.nn.NLLLoss(size_average=False, reduce=False, ignore_index=0) char_dropout = torch.nn.Dropout2d(p=args.char_dropout_prob) modules = [rnn, output, char_embeddings] def parameters(): for module in modules: for param in module.parameters():
def __init__(self, ntoken, ninp, nhid, dropout=0.5, dropouth=0.5, dropouti=0.5, dropoute=0.1, wdrop=0.5, nsamples=10, temperature=65, frequencies=None, bias=True, bias_reg=1., dist_fn='eucl', activation_fn='logsoftmax'): super(RNNModel, self).__init__() self.lockdrop = LockedDropout() self.idrop = nn.Dropout(dropouti) self.hdrop = nn.Dropout(dropouth) self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) self.rnn = torch.nn.RNN(ninp, nhid, 1, dropout=0) self.rnn = WeightDrop(self.rnn, ['weight_hh_l0'], dropout=wdrop) print(self.rnn) # initialize bias self.bias_reg = bias_reg if bias: self.decoder = nn.Linear(nhid, ntoken) self.bias = self.decoder.bias else: self.bias = None self.init_weights(bias) # store input arguments self.ninp = ninp self.nhid = nhid self.dropout = dropout self.dropouti = dropouti self.dropouth = dropouth self.dropoute = dropoute self.wdrop = wdrop # nonlinearity needs to be the same as for RNN! self.nonlinearity = nn.Tanh() self.nsamples = nsamples self.temp = temperature self.ntoken = ntoken self.sampler = NegativeSampler( self.nsamples, torch.ones(self.ntoken) if frequencies is None else frequencies) # set activation if activation_fn == 'logsoftmax': self.activation = log_softmax elif activation_fn == 'logsigmoid': self.activation = log_sigmoid else: self.activation = None # set distance function if dist_fn == 'eucl': self.dist_fn = eucl_distance elif dist_fn == 'dot': self.dist_fn = dot_distance elif dist_fn == 'poinc': self.dist_fn = pairwise_poinc_distance else: self.dist_fn = cone_distance
from weight_drop import WeightDrop # Input is (seq, batch, input) x = torch.autograd.Variable(torch.randn(2, 1, 10)).cuda() h0 = None ### print('Testing WeightDrop') print('=-=-=-=-=-=-=-=-=-=') ### print('Testing WeightDrop with Linear') lin = WeightDrop(torch.nn.Linear(10, 10), ['weight'], dropout=0.9) lin.cuda() run1 = [x.sum() for x in lin(x).data] run2 = [x.sum() for x in lin(x).data] print('All items should be different') print('Run 1:', run1) print('Run 2:', run2) assert run1[0] != run2[0] assert run1[1] != run2[1] print('---') ###
from weight_drop import WeightDrop # Input is (seq, batch, input) x = torch.autograd.Variable(torch.randn(2, 1, 10)).cuda() h0 = None ### print('Testing WeightDrop') print('=-=-=-=-=-=-=-=-=-=') ### print('Testing WeightDrop with Linear') lin = WeightDrop(torch.nn.Linear(10, 10), ['weight'], dropout=0.9) lin.cuda() run1 = [x.sum() for x in lin(x).data] run2 = [x.sum() for x in lin(x).data] print('All items should be different') print('Run 1:', run1) print('Run 2:', run2) assert run1[0] != run2[0] assert run1[1] != run2[1] print('---') ###