def __init__(self, input_dim=None, hidden_dim=200, output_dim=100, batch_size=1, p_dropout=0.2, num_layers=2): super(LSTMModel, self).__init__() self.hidden_dim = hidden_dim self.batch_size = batch_size self.bidirectional = True self.num_layers = num_layers self.bidir_mult = 2 if self.bidirectional else 1 self.dimension_mult = self.num_layers * self.bidir_mult # The LSTM takes sequences of spectrograms/MFCCs as inputs, and outputs hidden states # with dimensionality hidden_dim. self.lstm = to_gpu( nn.LSTM(input_dim, hidden_dim, bidirectional=self.bidirectional, num_layers=self.num_layers, dropout=p_dropout)) self.dropout_1 = nn.Dropout(p_dropout) # The linear layer that maps from hidden state space to tag space self.hidden2tag = to_gpu( nn.Linear(hidden_dim * self.bidir_mult, output_dim)) self.reset_hidden()
def __getitem__(self, idx): if idx < self.batches: x = to_gpu(torch.randn(self.x_shape)) y = to_gpu(self.model(Variable(x)).data) return (x, y) else: raise StopIteration()
def init_hidden(self, batch_size=None): if batch_size is None: batch_size = self.batch_size # Before we've done anything, we dont have any hidden state. # Refer to the Pytorch documentation to see exactly # why they have this dimensionality. # The axes semantics are (num_layers, minibatch_size, hidden_dim) return (autograd.Variable( to_gpu( torch.zeros(self.dimension_mult, batch_size, self.hidden_dim))), autograd.Variable( to_gpu( torch.zeros(self.dimension_mult, batch_size, self.hidden_dim))))
def forward(self, last_action=None, last_action_pos=None): ''' One step of the RNN model :param enc_output: batch x z_size, so don't support sequences :param last_action: batch of ints, all equaling None for first step :param last_action_pos: ignored, used by the attention decoder, here just to get the signature right :return: ''' if self.hidden is None: # first step after reset # need to do it here as batch size might be different for each sequence self.hidden = self.init_hidden(batch_size=self.batch_size) self.one_hot_action = to_gpu( torch.zeros(self.batch_size, self.output_feature_size)) encoded = self.encode(self.enc_output, last_action) # copy the latent state to length of sequence, instead of sampling inputs embedded = F.relu(self.fc_input(self.batch_norm(encoded))) \ .view(self.batch_size, 1, self.hidden_n) \ .repeat(1, self.max_seq_length, 1) embedded = self.dropout_1(embedded) # run the GRU on it out_3, self.hidden = self.gru_1(embedded, self.hidden) # tmp has dim (batch_size*seq_len)xhidden_n, so we can apply the linear transform to it tmp = self.dropout_2(out_3.contiguous().view(-1, self.hidden_n)) out = self.fc_out(tmp).view(self.batch_size, self.max_seq_length, self.output_feature_size) # just return the logits #self.hidden = None return out #, hidden_1
def encode(self, x): ''' :param x: a numpy array batch x seq x feature :return: ''' out, hidden = self.forward(to_gpu(Variable(FloatTensor(x)))) return out.data.cpu().numpy()
def gen(): iter = self.iterable.__iter__() while True: # TODO: cast to float earlier? x = to_gpu(next(iter).float()) yield (x, x)
def init_hidden(self, batch_size): h1 = Variable(to_gpu( torch.zeros(self.dimension_mult, batch_size, self.hidden_n)), requires_grad=False) return h1
def init_hidden(self, batch_size): # NOTE: assume only 1 layer no bi-direction h1 = Variable(to_gpu( torch.zeros(self.num_layers, batch_size, self.hidden_n)), requires_grad=False) return h1