def __init__(self, input_size, hidden_size, memory_size, output_size): super(Single_LSTM, self).__init__() #Define model self.lstm = mod.GD_LSTM(input_size, hidden_size, memory_size, hidden_size) self.w_out = Parameter(torch.rand(output_size, hidden_size), requires_grad=1)
def __init__(self, input_size, hidden_size, memory_size, output_size): super(Stacked_LSTM, self).__init__() #Define model #self.poly = mod.GD_polynet(input_size, hidden_size, hidden_size, hidden_size, None) self.lstm1 = mod.GD_LSTM(input_size, hidden_size, memory_size, hidden_size) self.lstm2 = mod.GD_LSTM(hidden_size, hidden_size, memory_size, hidden_size) self.lstm3 = mod.GD_LSTM(hidden_size, hidden_size, memory_size, hidden_size) self.w_out1 = Parameter(torch.rand(output_size, hidden_size), requires_grad=1) self.w_out2 = Parameter(torch.rand(output_size, hidden_size), requires_grad=1) self.w_out3 = Parameter(torch.rand(output_size, hidden_size), requires_grad=1)
def __init__(self, input_size, hidden_size, memory_size, output_size, n_vocab): super(Stacked_LSTM, self).__init__() #Define model #self.poly = mod.GD_polynet(input_size, hidden_size, hidden_size, hidden_size, None) self.embeddings = nn.Embedding(n_vocab + 1, embedding_dim) self.lstm1 = mod.GD_LSTM(embedding_dim, hidden_size, memory_size, hidden_size) self.lstm2 = mod.GD_LSTM(hidden_size, hidden_size, memory_size, hidden_size) self.dropout1 = nn.Dropout(0.1) self.dropout2 = nn.Dropout(0.1) #self.w_out1 = Parameter(torch.rand(output_size, hidden_size), requires_grad=1) self.w_out2 = Parameter(torch.rand(output_size, hidden_size), requires_grad=1) for param in self.parameters(): # torch.nn.init.xavier_normal(param) # torch.nn.init.orthogonal(param) # torch.nn.init.sparse(param, sparsity=0.5) torch.nn.init.kaiming_normal(param)