def train_batch(self, batch) -> BatchResult: X, y = batch if self.device: X = X.to(self.device) y = y.to(self.device) # TODO: Train the PyTorch model on one batch of data. # - Forward pass # - Backward pass # - Optimize params # - Calculate number of correct predictions # ====== YOUR CODE: ====== # We'll start with the forward pass, which will also give us our predictions y_pred = self.model.forward(X) # Zero the gradients after each step self.optimizer.zero_grad() # Compute the loss with respect to the true labels loss = self.loss_fn(y_pred, y) # Run the backwards pass loss.backward() loss = loss.data.tolist() # Perform the optimization step self.optimizer.step() # All that's left is to compute the numebr of correct classification model_pred = torch.argmax(input=y_pred, dim=1) incorrect_classifications = torch.nonzero(input=(model_pred - y)) num_correct = y.shape[0] - incorrect_classifications.shape[0] # ======================== return BatchResult(loss, num_correct)
def train_batch(self, batch) -> BatchResult: X, y = batch # TODO: Train the Block model on one batch of data. # - Forward pass # - Backward pass # - Optimize params # - Calculate number of correct predictions # ====== YOUR CODE: ====== # We'll start with the forward pass, which will also give us our predictions y_pred = self.model.forward(X) # Now we can compute the loss with respect to the true labels loss = self.loss_fn.forward(y_pred, y) # Now we can compute the gradients with respect to the loss loss_grad = self.loss_fn.backward(loss) grads = self.model.backward(loss_grad) # And finally perform the optimization step self.optimizer.step() # All that's left is to compute the numebr of correct classification model_pred = torch.argmax(input=y_pred, dim=1) incorrect_classifications = torch.nonzero(input=(model_pred - y)) num_correct = y.shape[0] - incorrect_classifications.shape[0] # ======================== return BatchResult(loss, num_correct)
def test_batch(self, batch) -> BatchResult: x, y = batch x = x.to(self.device, dtype=torch.float) # (B,S,V) y = y.to(self.device, dtype=torch.long) # (B,S) seq_len = y.shape[1] with torch.no_grad(): # TODO: Evaluate the RNN model on one batch of data. # - Forward pass # - Loss calculation # - Calculate number of correct predictions # ====== YOUR CODE: ====== # Forward pass y = y.view(-1) y_hat, self.hidden_train = self.model(x, self.hidden_train) y_hat = y_hat.view(-1, y_hat.shape[-1]) loss = self.loss_fn(y_hat, y) # Calculate number of correct predictions num_correct = (y_hat.argmax(dim=1) == y).sum() # ======================== return BatchResult(loss.item(), num_correct.item() / seq_len)
def train_batch(self, batch) -> BatchResult: x, y = batch x = x.to(self.device, dtype=torch.float) # (B,S,V) y = y.to(self.device, dtype=torch.long) # (B,S) seq_len = y.shape[1] # TODO: Train the RNN model on one batch of data. # - Forward pass # - Calculate total loss over sequence # - Backward pass (BPTT) # - Update params # - Calculate number of correct char predictions # ====== YOUR CODE: ====== y_scores, self.hidden_state = self.model( x, hidden_state=self.hidden_state) # need to transpose to match the shape the loss function # (CrossEntropy in the notebook) expects y_scores = torch.transpose(y_scores, 1, 2) loss = self.loss_fn(y_scores, y) self.optimizer.zero_grad() loss.backward() self.optimizer.step() self.hidden_state = self.hidden_state.detach() self.hidden_state.requires_grad = False y_pred = torch.argmax(y_scores, dim=1) num_correct = torch.sum(y == y_pred) # ======================== # Note: scaling num_correct by seq_len because each sample has seq_len # different predictions. return BatchResult(loss.item(), num_correct.item() / seq_len)
def test_batch(self, batch) -> BatchResult: X, y = batch if self.device: X = X.to(self.device) y = y.to(self.device) with torch.no_grad(): # TODO: Evaluate the PyTorch model on one batch of data. # - Forward pass # - Calculate number of correct predictions # ====== YOUR CODE: ====== # We'll start with the forward pass, which will also give us our predictions y_pred = self.model.forward(X) # Now we can compute the loss with respect to the true labels loss = self.loss_fn.forward(y_pred, y) loss = loss.data.tolist() # All that's left is to compute the numebr of correct classification model_pred = torch.argmax(y_pred, dim=1) incorrect_classifications = torch.nonzero(input=(model_pred - y)) num_correct = y.shape[0] - incorrect_classifications.shape[0] # ======================== return BatchResult(loss, num_correct)
def train_batch(self, batch) -> BatchResult: X, y = batch # TODO: Train the Block model on one batch of data. # - Forward pass # - Backward pass # - Optimize params # - Calculate number of correct predictions # ====== YOUR CODE: ====== x = X.reshape(X.shape[0], -1) scores = self.model(x) loss = self.loss_fn(scores, y).numpy() self.optimizer.zero_grad() dout = self.loss_fn.backward(loss) self.model.backward(dout) self.optimizer.step() scores = self.model.forward(x) y_pred = torch.argmax(scores, dim=1) num_correct = torch.sum(y == y_pred).numpy() # ======================== return BatchResult(loss, num_correct)
def train_batch(self, batch) -> BatchResult: x, _ = batch x = x.to(self.device) # Image batch (N,C,H,W) # TODO: Train a VAE on one batch. # ====== YOUR CODE: ====== #forward pass xr, mu, log_sigma2 = self.model(x) # Print the loss loss, data_loss, kldiv_loss = self.loss_fn(x=x, xr=xr, z_mu=mu, z_log_sigma2=log_sigma2) # Zero gradients, perform a backward pass, and update the weights. self.optimizer.zero_grad() loss.backward() self.optimizer.step() # Compute loss xr, mu, log_sigma2 = self.model(x) loss, data_loss, kldiv_loss = self.loss_fn(x=x, xr=xr, z_mu=mu, z_log_sigma2=log_sigma2) # ======================== return BatchResult(loss.item(), 1 / data_loss.item())
def test_batch(self, batch) -> BatchResult: x, y = batch x = x.to(self.device, dtype=torch.float) # (B,S,V) y = y.to(self.device, dtype=torch.long) # (B,S) seq_len = y.shape[1] with torch.no_grad(): # TODO: Evaluate the RNN model on one batch of data. # - Forward pass # - Loss calculation # - Calculate number of correct predictions # ====== YOUR CODE: ====== if self.h is not None: self.h = self.h.detach() # Foward pass y_pred, self.h = self.model(x, self.h) # Loss calculation loss = self.loss_fn(y_pred.view(-1, x.shape[-1]), y.view(-1)) # Calculate number of correct char predictions y_pred_idx = y_pred.argmax(dim=2) num_correct = torch.sum(y == y_pred_idx) # ======================== return BatchResult(loss.item(), num_correct.item() / seq_len)
def train_batch(self, batch) -> BatchResult: x, y = batch x = x.to(self.device, dtype=torch.float) # (B,S,V) y = y.to(self.device, dtype=torch.long) # (B,S) seq_len = y.shape[1] # TODO: Train the RNN model on one batch of data. # - Forward pass # - Calculate total loss over sequence # - Backward pass (BPTT) # - Update params # - Calculate number of correct char predictions # ====== YOUR CODE: ====== self.optimizer.zero_grad() logits, hidden_state = self.model(x, self.hidden_state) self.hidden_state = hidden_state.detach() logits = logits.view((-1, x.shape[-1])) y = y.view((-1)) loss = self.loss_fn(logits, y) loss.backward() self.optimizer.step() num_correct = torch.sum(torch.argmax(logits, dim=1) == y) # ======================== # Note: scaling num_correct by seq_len because each sample has seq_len # different predictions. return BatchResult(loss.item(), num_correct.item() / seq_len)
def train_batch(self, batch) -> BatchResult: X, y = batch if self.device: X = X.to(self.device) y = y.to(self.device) # TODO: Train the PyTorch model on one batch of data. # - Forward pass # - Backward pass # - Optimize params # - Calculate number of correct predictions # ====== YOUR CODE: ====== y_pred = self.model(X) # Compute and print loss loss = self.loss_fn(y_pred, y) # Zero gradients, perform a backward pass, and update the weights. self.optimizer.zero_grad() loss.backward() self.optimizer.step() scores = self.model(X) loss = self.loss_fn(scores, y) y_pred = torch.argmax(scores, dim=1) num_correct = torch.sum(y == y_pred).to('cpu').detach().numpy() loss = loss.to('cpu').detach().numpy().tolist() # ======================== return BatchResult(loss, num_correct)
def train_batch(self, batch) -> BatchResult: X, y = batch if self.device: X = X.to(self.device) y = y.to(self.device) # TODO: Train the PyTorch model on one batch of data. # - Forward pass # - Backward pass # - Optimize params # - Calculate number of correct predictions # ====== YOUR CODE: ====== self.optimizer.zero_grad() # Forward pass y_hat = self.model(X) loss = self.loss_fn(y_hat, y) # Backward pass loss.backward() # Optimize params self.optimizer.step() # Calculate number of correct predictions num_correct = (y_hat.argmax(dim=1) == y).sum().item() # ======================== return BatchResult(loss.item(), num_correct)
def test_batch(self, batch) -> BatchResult: x, y = batch x = x.to(self.device, dtype=torch.float) # (B,S,V) y = y.to(self.device, dtype=torch.long) # (B,S) seq_len = y.shape[1] with torch.no_grad(): # TODO: Evaluate the RNN model on one batch of data. # - Forward pass # - Loss calculation # - Calculate number of correct predictions # ====== YOUR CODE: ====== y_scores, self.h = self.model(x, self.h) loss = self.loss_fn(y_scores, y) y_scores = y_scores.reshape(y_scores.shape[1], -1) y = y.reshape(y.shape[1]) y_pred = torch.argmax(y_scores, dim=1) num_correct = torch.sum(y_pred == y).float() # ======================== return BatchResult(loss.item(), num_correct.item() / seq_len)
def train_batch(self, batch) -> BatchResult: X, y = batch if self.device: X = X.to(self.device) y = y.to(self.device) # TODO: Train the PyTorch model on one batch of data. # - Forward pass # - Backward pass # - Optimize params # - Calculate number of correct predictions # ====== YOUR CODE: ====== self.optimizer.zero_grad() out = self.model(X) loss = self.loss_fn(out, y) values, y_pred = out.max(1) num_correct = y.shape[0] - torch.sub(y_pred, y).nonzero().shape[0] loss.backward() self.optimizer.step() loss = loss.item() # ======================== return BatchResult(loss, num_correct)
def test_batch(self, batch) -> BatchResult: x, y = batch x = x.to(self.device, dtype=torch.float) # (B,S,V) y = y.to(self.device, dtype=torch.long) # (B,S) seq_len = y.shape[1] with torch.no_grad(): # TODO: Evaluate the RNN model on one batch of data. # - Forward pass # - Loss calculation # - Calculate number of correct predictions # ====== YOUR CODE: ====== y_scores, self.hidden_state = self.model(x, self.hidden_state) # transpose for the CrossEntropy y_scores = torch.transpose(y_scores, 1, 2) # Compute the loss loss = self.loss_fn(y_scores, y) y_pred = torch.argmax(y_scores, dim=1) num_correct = torch.sum(y == y_pred) # ======================== return BatchResult(loss.item(), num_correct.item() / seq_len)
def train_batch(self, batch) -> BatchResult: x, y = batch x = x.to(self.device, dtype=torch.float) # (B,S,V) y = y.to(self.device, dtype=torch.long) # (B,S) seq_len = y.shape[1] # TODO: Train the RNN model on one batch of data. # - Forward pass # - Calculate total loss over sequence # - Backward pass (BPTT) # - Update params # - Calculate number of correct char predictions # ====== YOUR CODE: ====== y_scores, self.hidden_state = self.model(x, self.hidden_state) # transpose for the CrossEntropy y_scores = torch.transpose(y_scores, 1, 2) # # Compute and print loss loss = self.loss_fn(y_scores, y) # Zero gradients, perform a backward pass, and update the weights. self.optimizer.zero_grad() loss.backward() self.optimizer.step() self.hidden_state.detach_() y_pred = torch.argmax(y_scores, dim=1) num_correct = torch.sum(y == y_pred) # ======================== # Note: scaling num_correct by seq_len because each sample has seq_len # different predictions. return BatchResult(loss.item(), num_correct.item() / seq_len)
def train_batch(self, batch) -> BatchResult: X, y = batch if self.device: X = X.to(self.device) y = y.to(self.device) # TODO: Train the PyTorch model on one batch of data. # - Forward pass # - Backward pass # - Optimize params # - Calculate number of correct predictions # ====== YOUR CODE: ====== # zero the parameter gradients self.optimizer.zero_grad() # forward + backward + optimize outputs = self.model.forward(X) loss = self.loss_fn(outputs, y) loss.backward() self.optimizer.step() softmax = torch.nn.Softmax(1) my_res = softmax(outputs) my_cl = torch.max(my_res,1)[1] num_correct = 0 for idx in range(my_cl.size(0)): if my_cl[idx] == y[idx]: num_correct+=1 # ======================== return BatchResult(loss, num_correct)
def train_batch(self, batch) -> BatchResult: x, y = batch x = x.to(self.device, dtype=torch.float) # (B,S,V) y = y.to(self.device, dtype=torch.long) # (B,S) seq_len = y.shape[1] # TODO: Train the RNN model on one batch of data. # - Forward pass # - Calculate total loss over sequence # - Backward pass (BPTT) # - Update params # - Calculate number of correct char predictions # ====== YOUR CODE: ====== self.optimizer.zero_grad() pred, self.hidden_state = self.model.forward(x, self.hidden_state) # (B,S,V), (B, num_layers, h_dim) pred = pred.permute(0, 2, 1) y_pred = torch.argmax(pred, dim=-2) # (B,S) loss = self.loss_fn(pred, y) loss.backward() self.optimizer.step() num_correct = torch.sum(y == y_pred) self.hidden_state.detach_() # ======================== # Note: scaling num_correct by seq_len because each sample has seq_len # different predictions. return BatchResult(loss.item(), num_correct.item() / seq_len)
def train_batch(self, batch) -> BatchResult: X, y = batch X = X.to(self.device, dtype=torch.float) # (B,S,V) y = y.to(self.device, dtype=torch.long) # (B,S) seq_len = y.shape[1] # TODO: Train the RNN model on one batch of data. # - Forward pass # - Calculate total loss over sequence # - Backward pass (BPTT) # - Update params # - Calculate number of correct char predictions B = X.shape[0] S = X.shape[1] V = X.shape[2] self.optimizer.zero_grad() chars_scores, self.h = self.model.forward(X, hidden_state=self.h) scores = chars_scores.transpose(1, 2) loss = self.loss_fn.forward(scores, y) loss.backward() self.optimizer.step() self.h.detach_() y_hat = torch.argmax(scores, dim=1) num_correct = torch.sum(y_hat == y) # Note: scaling num_correct by seq_len because each sample has seq_len # different predictions. return BatchResult(loss.item(), num_correct.item() / seq_len)
def train_batch(self, batch) -> BatchResult: x, y = batch x = x.to(self.device, dtype=torch.float) # (B,S,V) y = y.to(self.device, dtype=torch.long) # (B,S) seq_len = y.shape[1] # TODO: Train the RNN model on one batch of data. # - Forward pass # - Calculate total loss over sequence # - Backward pass (BPTT) # - Update params # - Calculate number of correct char predictions # ====== YOUR CODE: ====== self.optimizer.zero_grad() output, self.hidden_state = self.model(x, self.hidden_state) loss = sum( self.loss_fn(output[:, i, :], y[:, i]) for i in range(seq_len)) loss.backward(retain_graph=True) self.optimizer.step() self.hidden_state = self.hidden_state.detach() p = output.argmax(dim=-1) num_correct = torch.sum(p == y, (0, 1)) # ======================== # Note: scaling num_correct by seq_len because each sample has seq_len # different predictions. return BatchResult(loss.item(), num_correct.item() / seq_len)
def test_batch(self, batch) -> BatchResult: x, y = batch x = x.to(self.device, dtype=torch.float) # (B,S,V) y = y.to(self.device, dtype=torch.long) # (B,S) seq_len = y.shape[1] batch_size = list(y.shape)[0] with torch.no_grad(): # TODO: Evaluate the RNN model on one batch of data. # - Forward pass # - Loss calculation # - Calculate number of correct predictions # ====== YOUR CODE: ====== loss = 0 num_correct = 0 for seq_idx in range(batch_size): output, h_state = self.model(x[seq_idx, :, :].unsqueeze(0), self.last_hidden_state) self.last_hidden_state = h_state loss = self.loss_fn(output[0, :, :], y[seq_idx, :]) loss += loss # Count correct results _, max_indices = output[0, :, :].max(1) num_correct = torch.numel(max_indices - y[seq_idx, :]) - \ torch.nonzero(max_indices - y[seq_idx, :]).size(0) num_correct += num_correct # ======================== return BatchResult(loss.item(), num_correct / seq_len)
def train_batch(self, batch) -> BatchResult: x, y = batch x = x.to(self.device, dtype=torch.float) # (B,S,V) y = y.to(self.device, dtype=torch.long) # (B,S) seq_len = y.shape[1] # TODO: Train the RNN model on one batch of data. # - Forward pass # - Calculate total loss over sequence # - Backward pass (BPTT) # - Update params # - Calculate number of correct char predictions # ====== YOUR CODE: ====== # foeward pass, loss, backward pass, step num_correct = 0 ### OPTION 1 : k1, k2 # k1 = 80 # # for t, (char_batch, y_batch) in enumerate(zip(x.split(1, 1), y.split(1, 1))): # if t % k1 == 0 and t != 0: # y_pred, h = self.model(char_batch, h.detach()) # else: # y_pred, h = self.model(char_batch) # self.optimizer.zero_grad() # loss = self.loss_fn(y_pred.squeeze(1), y_batch.squeeze(1)) # loss.backward() # self.optimizer.step() # # print(f'19={y_pred[0][0][19]}') # # print(f'20={y_pred[0][0][20]}') # # # count num_correct # y_pred_idx = y_pred.argmax(dim=2) # num_correct += len(y_batch[y_batch == y_pred_idx]) # num_correct = torch.tensor(num_correct) ### OPTION 2 : batch is good enough for BPTT # Foward pass self.optimizer.zero_grad() if self.h is not None: self.h = self.h.detach() y_pred, self.h = self.model(x, self.h) #Backward pass loss = self.loss_fn(y_pred.view(-1, x.shape[-1]), y.view(-1)) loss.backward() # Weight updates self.optimizer.step() # Calculate number of correct char predictions y_pred_idx = y_pred.argmax(dim=2) num_correct = torch.sum(y == y_pred_idx) # ======================== # Note: scaling num_correct by seq_len because each sample has seq_len # different predictions. return BatchResult(loss.item(), num_correct.item() / seq_len)
def train_batch(self, batch) -> BatchResult: x, _ = batch x = x.to(self.device) # Image batch (N,C,H,W) # TODO: Train a VAE on one batch. # ====== YOUR CODE: ====== raise NotImplementedError() # ======================== return BatchResult(loss.item(), 1 / data_loss.item())
def test_batch(self, batch) -> BatchResult: X, y = batch # TODO: Evaluate the Block model on one batch of data. # - Forward pass # - Calculate number of correct predictions class_scores = self.model.forward(X) y_hat = torch.argmax(class_scores, dim=1) num_correct = torch.sum(y_hat == y).item() loss = self.loss_fn.forward(class_scores, y) return BatchResult(loss, num_correct)
def test_batch(self, batch) -> BatchResult: x, _ = batch x = x.to(self.device) # Image batch (N,C,H,W) with torch.no_grad(): # TODO: Evaluate a VAE on one batch. # ====== YOUR CODE: ====== raise NotImplementedError() # ======================== return BatchResult(loss.item(), 1 / data_loss.item())
def test_batch(self, batch) -> BatchResult: X, y = batch # TODO: Evaluate the Block model on one batch of data. # - Forward pass # - Calculate number of correct predictions # ====== YOUR CODE: ====== raise NotImplementedError() # ======================== return BatchResult(loss, num_correct)
def train_batch(self, batch) -> BatchResult: x, y = batch x = x.to(self.device, dtype=torch.float) # (B,S,V) y = y.to(self.device, dtype=torch.long) # (B,S) seq_len = y.shape[1] batch_size = list(y.shape)[0] # TODO: Train the RNN model on one batch of data. # - Forward pass # - Calculate total loss over sequence # - Backward pass (BPTT) # - Update params # - Calculate number of correct char predictions # ====== YOUR CODE: ====== self.optimizer.zero_grad() loss = 0 num_correct = 0 #for seq_idx in range(batch_size): # output, h_state = self.model.forward(x[seq_idx, :, :].unsqueeze(0), self.last_hidden_state) # h_state.detach() # self.last_hidden_state = h_state # loss += self.loss_fn(output[0, :, :], y[seq_idx, :]) # # count correct results # _, max_indices = output[0, :, :].max(1) # num_correct += torch.numel(max_indices - y[seq_idx, :]) - \ # torch.nonzero(max_indices - y[seq_idx, :]).size(0) output, h_state = self.model.forward(x, self.last_hidden_state) h_state.detach() self.last_hidden_state = h_state for seq_idx in range(batch_size): #loss += self.loss_fn(output[0, : , :], y[seq_idx, :]) loss += self.loss_fn(output[seq_idx, :, :], y[seq_idx, :]) # count correct results #_, max_indices = output[0, :, :].max(1) #num_correct += torch.numel(max_indices - y[seq_idx, :]) - \ # torch.nonzero(max_indices - y[seq_idx, :]).size(0) _, max_indices = output[seq_idx, :, :].max(1) num_correct += torch.numel(max_indices - y[seq_idx, :]) - \ torch.nonzero(max_indices - y[seq_idx, :]).size(0) loss.backward(retain_graph=True) loss.detach() self.optimizer.step() #self.optimizer.zero_grad() #self.model.zero_grad() # ======================== # Note: scaling num_correct by seq_len because each sample has seq_len # different predictions. return BatchResult(loss.item(), num_correct / seq_len)
def test_batch(self, batch) -> BatchResult: x, _ = batch x = x.to(self.device) # Image batch (N,C,H,W) with torch.no_grad(): # TODO: Evaluate a VAE on one batch. # ====== YOUR CODE: ====== xr, mu, log_sigma2 = self.model(x) loss, data_loss, _ = self.loss_fn(x, xr, mu, log_sigma2) # ======================== return BatchResult(loss.item(), 1 / data_loss.item())
def train_batch(self, batch) -> BatchResult: X, y = batch # TODO: Train the Block model on one batch of data. # - Forward pass # - Backward pass # - Optimize params # - Calculate number of correct predictions # ====== YOUR CODE: ====== raise NotImplementedError() # ======================== return BatchResult(loss, num_correct)
def test_batch(self, batch) -> BatchResult: X, y = batch # ====== YOUR CODE: ====== # Forward pass y_hat = self.model.forward(X) loss = self.loss_fn(y_hat, y) # Calculate number of correct predictions num_correct = (y_hat.argmax(dim=1) == y).sum().item() # ======================== return BatchResult(loss, num_correct)
def train_batch(self, batch) -> BatchResult: x, _ = batch x = x.to(self.device) # Image batch (N,C,H,W) # TODO: Train a VAE on one batch. # ====== YOUR CODE: ====== self.optimizer.zero_grad() xr, mu, log_sigma2 = self.model(x) loss, data_loss, _ = self.loss_fn(x, xr, mu, log_sigma2) loss.backward() self.optimizer.step() # ======================== return BatchResult(loss.item(), 1 / data_loss.item())