class lstmwrapper(nn.Module): def __init__(self, input_size=66529, output_size=5952, hidden_size=52, num_layers=16, batch_first=True, dropout=0.1): super(lstmwrapper, self).__init__() self.lstm = LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=batch_first, dropout=dropout) self.output = nn.Linear(hidden_size, output_size) self.bn = nn.BatchNorm1d(input_size) self.reset_parameters() def reset_parameters(self): self.lstm.reset_parameters() self.output.reset_parameters() def forward(self, input, hx=None): input = self.bn(input) output, statetuple = self.lstm(input, hx) return self.output(output)
class LSTM_vocab(nn.Module): def __init__(self, vocab_size=50000, vocab_embed_d=512, output_size=12, hidden_size=256, *args, **kwargs): super(LSTM_vocab, self).__init__() self.src_word_emb = nn.Embedding(vocab_size, vocab_embed_d, padding_idx=0) self.lstm = LSTM(input_size=vocab_embed_d, hidden_size=hidden_size, *args, **kwargs) self.output = nn.Linear(hidden_size, output_size) self.reset_parameters() def reset_parameters(self): self.lstm.reset_parameters() self.output.reset_parameters() def forward(self, input, hx=None): input = self.src_word_emb(input) output, statetuple = self.lstm(input, hx) # this is a design decision that can be experimented with output = self.output(output) # output=torch.max(output,dim=1)[0] output = output[:, -1, :] return output
class LSTMWrapper(nn.Module): def __init__(self, output_size=12, hidden_size=256, *args, **kwargs): super(LSTMWrapper, self).__init__() self.lstm = LSTM(hidden_size=hidden_size, *args, **kwargs) self.output = nn.Linear(hidden_size, output_size) self.reset_parameters() def reset_parameters(self): self.lstm.reset_parameters() self.output.reset_parameters() def forward(self, input, hx=None): output, statetuple = self.lstm(input, hx) # this is a design decision that can be experimented with output = self.output(output) # output=torch.max(output,dim=1)[0] output = output[:, -1, :] return output
class lstmwrapperJ(nn.Module): def __init__(self, input_size=52686, output_size=2976, hidden_size=128, num_layers=16, batch_first=True, dropout=0.1): super(lstmwrapperJ, self).__init__() self.lstm = LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=batch_first, dropout=dropout) self.bn = nn.BatchNorm1d(input_size) self.output = nn.Linear(hidden_size, output_size) self.reset_parameters() for name, param in self.named_parameters(): print(name, param.data.shape) def reset_parameters(self): self.lstm.reset_parameters() self.output.reset_parameters() def forward(self, input, hx=None): input = input.permute(0, 2, 1).contiguous() try: bnout = self.bn(input) bnout[(bnout != bnout).detach()] = 0 except ValueError: if step_input.shape[0] == 1: print("Somehow the batch size is one for this input") bnout = step_input else: raise input = bnout.permute(0, 2, 1).contiguous() output, statetuple = self.lstm(input, hx) output = self.output(output) # (batch_size, seq_len, target_dim) # pdb.set_trace() # output=output.sum(1) output = output.max(1)[0] return output
class Stock_LSTM(nn.Module): """ I prefer using this Stock LSTM for numerical stability. """ def __init__(self, x, R, W, h, L, v_t): super(Stock_LSTM, self).__init__() self.x = x self.R = R self.W = W self.h = h self.L = L self.v_t= v_t self.LSTM=LSTM(input_size=self.x+self.R*self.W,hidden_size=h,num_layers=L,batch_first=True, dropout=0.1) self.last=nn.Linear(self.h, self.v_t) self.st=None def forward(self, input_x): """ :param input_x: input and memory values :return: """ assert (self.st is not None) o, st = self.LSTM(input_x, self.st) if (st[0]!=st[0]).any(): with open("debug/lstm.pkl") as f: pickle.dump(self, f) with open("debug/lstm.pkl") as f: pickle.dump(input_x, f) raise ("LSTM produced a NAN, objects dumped.") return self.last(o), st def reset_parameters(self): self.LSTM.reset_parameters() self.last.reset_parameters() def assign_states_tuple(self, states_tuple): self.st=states_tuple
class PriorLSTM(nn.Module): def __init__(self, prior, input_size=52686, output_size=2976, hidden_size=128, num_layers=16, batch_first=True, dropout=0.1): super(PriorLSTM, self).__init__() self.lstm = LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=batch_first, dropout=dropout) self.bn = nn.BatchNorm1d(input_size) self.output = nn.Linear(hidden_size, output_size) self.reset_parameters() self.prior = prior '''prior''' # this is the prior probability of each label predicting true # this is added to the logit self.prior = prior if isinstance(self.prior, np.ndarray): self.prior = torch.from_numpy(self.prior).float() self.prior = Variable(self.prior, requires_grad=False) elif isinstance(self.prior, torch.Tensor): self.prior = Variable(self.prior, requires_grad=False) else: assert (isinstance(self.prior, Variable)) # transform to logits # because we are using sigmoid, not softmax, self.prior=log(P(y))-log(P(not y)) # sigmoid_input = z + self.prior # z = log(P(x|y)) - log(P(x|not y)) # sigmoid output is the posterior positive self.prior = self.prior.clamp(1e-8, 1 - 1e-8) self.prior = torch.log(self.prior) - torch.log(1 - self.prior) a = Variable(torch.Tensor([0])) self.prior = torch.cat((a, self.prior)) self.prior = self.prior.cuda() for name, param in self.named_parameters(): print(name, param.data.shape) print("Using prior lstm") def reset_parameters(self): self.lstm.reset_parameters() self.output.reset_parameters() def forward(self, input, hx=None): input = input.permute(0, 2, 1).contiguous() bnout = self.bn(input) bnout[(bnout != bnout).detach()] = 0 input = bnout.permute(0, 2, 1).contiguous() output, statetuple = self.lstm(input, hx) output = self.output(output) # (batch_size, seq_len, target_dim) # pdb.set_trace() # output=output.sum(1) output = output.max(1)[0] output = output + self.prior return output