def train(self, data, learning_rate, max_epochs=500, log_fn=default_log_fn): self.model = Network(self.hidden_layers, self.backend) optim = minitorch.SGD(self.model.parameters(), learning_rate) BATCH = 10 losses = [] for epoch in range(max_epochs): total_loss = 0.0 c = list(zip(data.X, data.y)) random.shuffle(c) X_shuf, y_shuf = zip(*c) for i in range(0, len(X_shuf), BATCH): optim.zero_grad() X = minitorch.tensor(X_shuf[i:i + BATCH], backend=self.backend) y = minitorch.tensor(y_shuf[i:i + BATCH], backend=self.backend) # Forward out = self.model.forward(X).view(y.shape[0]) prob = (out * y) + (out - 1.0) * (y - 1.0) loss = -prob.log() (loss / y.shape[0]).sum().view(1).backward() total_loss = loss.sum().view(1)[0] # Update optim.step() losses.append(total_loss) # Logging if epoch % 10 == 0 or epoch == max_epochs: X = minitorch.tensor(data.X, backend=self.backend) y = minitorch.tensor(data.y, backend=self.backend) out = self.model.forward(X).view(y.shape[0]) y2 = minitorch.tensor(data.y) correct = int(((out.get_data() > 0.5) == y2).sum()[0]) log_fn(epoch, total_loss, correct, losses)
def train(self, data, learning_rate, max_epochs=500, log_fn=default_log_fn): self.learning_rate = learning_rate self.max_epochs = max_epochs self.model = Network(self.hidden_layers) optim = minitorch.SGD(self.model.parameters(), learning_rate) losses = [] for epoch in range(1, self.max_epochs + 1): total_loss = 0.0 correct = 0 optim.zero_grad() # Forward loss = 0 for i in range(data.N): x_1, x_2 = data.X[i] y = data.y[i] x_1 = minitorch.Scalar(x_1) x_2 = minitorch.Scalar(x_2) out = self.model.forward((x_1, x_2)) if y == 1: prob = out correct += 1 if out.data > 0.5 else 0 else: prob = -out + 1.0 correct += 1 if out.data < 0.5 else 0 loss = -prob.log() (loss / data.N).backward() total_loss += loss.data losses.append(total_loss) # Update optim.step() # Logging if epoch % 10 == 0 or epoch == max_epochs: log_fn(epoch, total_loss, correct, losses)
def train(self, data, learning_rate, max_epochs=500, log_fn=default_log_fn): self.learning_rate = learning_rate self.max_epochs = max_epochs self.model = Network(self.hidden_layers) optim = minitorch.SGD(self.model.parameters(), learning_rate) X = minitorch.tensor(data.X) y = minitorch.tensor(data.y) losses = [] for epoch in range(1, self.max_epochs + 1): total_loss = 0.0 correct = 0 optim.zero_grad() # Forward out = self.model.forward(X).view(data.N) prob = (out * y) + (out - 1.0) * (y - 1.0) loss = -prob.log() (loss / data.N).sum().view(1).backward() total_loss = loss.sum().view(1)[0] losses.append(total_loss) # Update optim.step() # Logging if epoch % 10 == 0 or epoch == max_epochs: y2 = minitorch.tensor(data.y) correct = int(((out.get_data() > 0.5) == y2).sum()[0]) log_fn(epoch, total_loss, correct, losses)
def train( self, data_train, learning_rate, batch_size=10, max_epochs=500, data_val=None, log_fn=default_log_fn, ): model = self.model (X_train, y_train) = data_train n_training_samples = len(X_train) optim = minitorch.SGD(self.model.parameters(), learning_rate) losses = [] train_accuracy = [] validation_accuracy = [] for epoch in range(1, max_epochs + 1): total_loss = 0.0 model.train() train_predictions = [] batch_size = min(batch_size, n_training_samples) for batch_num, example_num in enumerate( range(0, n_training_samples, batch_size)): y = minitorch.tensor(y_train[example_num:example_num + batch_size], backend=BACKEND) x = minitorch.tensor(X_train[example_num:example_num + batch_size], backend=BACKEND) x.requires_grad_(True) y.requires_grad_(True) # Forward out = model.forward(x) prob = (out * y) + (out - 1.0) * (y - 1.0) loss = -(prob.log() / y.shape[0]).sum() loss.view(1).backward() # Save train predictions train_predictions += get_predictions_array(y, out) total_loss += loss[0] # Update optim.step() # Evaluate on validation set at the end of the epoch validation_predictions = [] if data_val is not None: (X_val, y_val) = data_val model.eval() y = minitorch.tensor( y_val, backend=BACKEND, ) x = minitorch.tensor( X_val, backend=BACKEND, ) out = model.forward(x) validation_predictions += get_predictions_array(y, out) validation_accuracy.append( get_accuracy(validation_predictions)) model.train() train_accuracy.append(get_accuracy(train_predictions)) losses.append(total_loss) log_fn( epoch, total_loss, losses, train_predictions, train_accuracy, validation_predictions, validation_accuracy, ) total_loss = 0.0
def train(self, data_train, data_val, learning_rate, max_epochs=500, log_fn=default_log_fn): (X_train, y_train) = data_train (X_val, y_val) = data_val self.model = Network() model = self.model n_training_samples = len(X_train) optim = minitorch.SGD(self.model.parameters(), learning_rate) losses = [] for epoch in range(1, max_epochs + 1): total_loss = 0.0 model.train() for batch_num, example_num in enumerate( range(0, n_training_samples, BATCH)): if n_training_samples - example_num <= BATCH: continue y = minitorch.tensor(y_train[example_num:example_num + BATCH], backend=BACKEND) x = minitorch.tensor(X_train[example_num:example_num + BATCH], backend=BACKEND) x.requires_grad_(True) y.requires_grad_(True) # Forward out = model.forward(x.view(BATCH, 1, H, W)).view(BATCH, C) prob = (out * y).sum(1) loss = -(prob / y.shape[0]).sum() assert loss.backend == BACKEND loss.view(1).backward() total_loss += loss[0] losses.append(total_loss) # Update optim.step() if batch_num % 5 == 0: model.eval() # Evaluate on 5 held-out batches correct = 0 for val_example_num in range(0, 1 * BATCH, BATCH): y = minitorch.tensor( y_val[val_example_num:val_example_num + BATCH], backend=BACKEND, ) x = minitorch.tensor( X_val[val_example_num:val_example_num + BATCH], backend=BACKEND, ) out = model.forward(x.view(BATCH, 1, H, W)).view(BATCH, C) for i in range(BATCH): m = -1000 ind = -1 for j in range(C): if out[i, j] > m: ind = j m = out[i, j] if y[i, ind] == 1.0: correct += 1 log_fn(epoch, total_loss, correct, losses, model) total_loss = 0.0 model.train()