def train(train_loader, model, criterion, optimizer, epoch): """ Run one train epoch """ for i, (input, target) in enumerate(train_loader): target = target input_var = input target_var = target # compute output output = model(input_var) loss = criterion(output, target_var) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() loss = loss.float() # aim - Track model loss function # track(aim.loss, 'loss', loss.item()) if i % 10 == 0: print('Epoch: [{0}][{1}/{2}]\t'.format( epoch, i, len(train_loader))) # aim - Track last layer correlation # track(aim.label_correlation, 'corr', output, labels=[ # 'airplane', # 'automobile', # 'bird', # 'cat', # 'deer', # 'dog', # 'frog', # 'horse', # 'ship', # 'truck', # ]) track(aim.weights, model) track(aim.gradients, model)
# Forward pass outputs = model(images) loss = criterion(outputs, labels) # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() if (i + 1) % 100 == 0: print('Epoch [{}/{}], Step [{}/{}], ' 'Loss: {:.4f}'.format(epoch + 1, num_epochs, i + 1, total_step, loss.item())) # aim - Track model loss function track(aim.label_correlation, 'corr', outputs, labels=[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ])
print('Epoch: [{0}][{1}/{2}]\t'.format( epoch, i, len(train_loader))) # aim - Track last layer correlation # track(aim.label_correlation, 'corr', output, labels=[ # 'airplane', # 'automobile', # 'bird', # 'cat', # 'deer', # 'dog', # 'frog', # 'horse', # 'ship', # 'truck', # ]) track(aim.weights, model) track(aim.gradients, model) # track(aim.checkpoint, # 'checkpoint_test', 'chp_epoch_{}'.format(epoch), # model, epoch, meta={'iteration': i}) for epoch in range(5): train(train_loader, model, criterion, optimizer, epoch) lr_scheduler.step(epoch) track(aim.checkpoint, 'checkpoint_test', 'chp_epoch_{}'.format(epoch), model, epoch, )
from torch.optim.lr_scheduler import ReduceLROnPlateau # Device configuration device = torch.device('cpu') # Hyper parameters num_epochs = 5 num_classes = 10 batch_size = 50 learning_rate = 0.01 # aim - Track hyper parameters track( aim.hyperparams, { 'num_epochs': num_epochs, 'num_classes': num_classes, 'batch_size': batch_size, 'learning_rate': learning_rate, }) # MNIST dataset train_dataset = torchvision.datasets.MNIST(root='./data/', train=True, transform=transforms.ToTensor(), download=True) test_dataset = torchvision.datasets.MNIST(root='./data/', train=False, transform=transforms.ToTensor()) # Data loader
# Forward pass outputs = model(images) loss = criterion(outputs, labels) # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() if i % 30 == 0: print('Epoch [{}/{}], Step [{}/{}], ' 'Loss: {:.4f}'.format(epoch + 1, num_epochs, i + 1, total_step, loss.item())) # aim - Track model loss function aim.track(loss.item(), name='loss', epoch=epoch) correct = 0 total = 0 _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() # aim - Track metrics aim.track(100 * correct / total, name='accuracy', epoch=epoch) # Test the model model.eval() with torch.no_grad(): correct = 0 total = 0
optimizer.zero_grad() loss.backward() optimizer.step() if (i + 1) % 100 == 0: print('Epoch [{}/{}], Step [{}/{}], ' 'Loss: {:.4f}'.format(epoch + 1, num_epochs, i + 1, total_step, loss.item())) for l in range(len(labels)): if labels[l].item() != outputs[l].argmax().item(): if saved_img < 50: saved_img += 1 # aim - Track misclassified images img = track(aim.image, images[l]) track(aim.misclassification, 'miscls', img, labels[l].item(), outputs[l].argmax().item()) # Test the model model.eval() with torch.no_grad(): correct = 0 total = 0 for images, labels in test_loader: images = images.to(device) labels = labels.to(device) outputs = model(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item()
import aim aim.init() aim.track({ 'num_epochs': 10, 'fc_units': 128, }, namespace='params') aim.track({ 'name': 'Dataset name', 'version': 'Dataset version', }, namespace='dataset') aim.track({ 'foo': 'bar', })
lr=0.001, momentum=0.9) scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=0) # Train the model total_step = len(train_loader) for epoch in range(num_epochs): false_positives = 0 false_negatives = 0 for i, (images, labels) in enumerate(train_loader): images = images.to(device) labels = labels.to(device) # Forward pass outputs = model(images) loss = criterion(outputs, labels) # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() if (i + 1) % 100 == 0: print('Epoch [{}/{}], Step [{}/{}]'.format(epoch + 1, num_epochs, i + 1, total_step)) scheduler.step(loss) # Track learning rates of each param group track(aim.learning_rate, optimizer)
import aim if __name__ == '__main__': aim.track({ 'num_epochs': 5, 'lr': 10, }, namespace='hparams') for e in range(5): for i in range(50): aim.track(i, name='loss', epoch=e, subset='train', subtask='lm') aim.track(i, name='acc', epoch=e, subset='train', subtask='lm') if i % 10 == 0: aim.track(i, name='loss', epoch=e, subset='val', subtask='lm') aim.track(i, name='acc', epoch=e, subset='val', subtask='lm') for e in range(5): for i in range(50): aim.track(i, name='loss', epoch=e, subset='train', subtask='nmt') aim.track(i, name='acc', epoch=e, subset='train', subtask='nmt') if i % 10 == 0: aim.track(i, name='loss', epoch=e, subset='val', subtask='nmt') aim.track(i, name='acc', epoch=e, subset='val', subtask='nmt')
import tensorflow as tf import os import aim from aim import track input_column = tf.feature_column.numeric_column("x") estimator = tf.estimator.LinearClassifier(feature_columns=[input_column]) def input_fn(): return tf.data.Dataset.from_tensor_slices(({ "x": [1., 2., 3., 4.] }, [1, 1, 0, 0])).repeat(200).shuffle(64).batch(16) estimator.train(input_fn) serving_input_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn( tf.feature_column.make_parse_example_spec([input_column])) track(aim.checkpoint, 'checkpoint-test', 'iris', estimator, 0, fn=serving_input_fn)
from aim import track import aim aim.init(True) for i in range(100000): track(aim.metric, 'metric', i)
# Run optimization op (backprop) sess.run(train_op, feed_dict={X: batch_x, Y: batch_y}) if step % display_step == 0 or step == 1: # Calculate batch loss and accuracy loss, acc = sess.run([loss_op, accuracy], feed_dict={ X: batch_x, Y: batch_y, }) print("Step " + str(step) + ", Epoch " + str(e + 1) + ", Minibatch Loss= " + "{:.4f}".format(loss) + ", Training Accuracy= " + "{:.3f}".format(acc)) track(aim.checkpoint, 'checkpoint_test', 'chp_epoch_{}'.format(e), sess, e, lr_rate=learning_rate, meta={ 'learning_rate': learning_rate, 'batch_size': batch_size, 'classes': num_classes, }) learning_rate = learning_rate / 2.0 print("Optimization Finished!") # Calculate accuracy for MNIST test images print("Testing Accuracy:", \ sess.run(accuracy, feed_dict={X: mnist.test.images, Y: mnist.test.labels}))
import torchvision.transforms as transforms # Device configuration device = torch.device('cpu') # Hyper parameters num_epochs = 5 num_classes = 10 batch_size = 50 learning_rate = 0.01 # aim - Track hyper parameters track( aim.hyperparams, { 'num_epochs': num_epochs, 'num_classes': num_classes, 'batch_size': batch_size, 'learning_rate': learning_rate, }) # MNIST dataset train_dataset = torchvision.datasets.MNIST(root='./data/', train=True, transform=transforms.ToTensor(), download=True) test_dataset = torchvision.datasets.MNIST(root='./data/', train=False, transform=transforms.ToTensor()) # Data loader
# Forward pass outputs = model(images) loss = criterion(outputs, labels) # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() if (i + 1) % 100 == 0: print('Epoch [{}/{}], Step [{}/{}], ' 'Loss: {:.4f}'.format(epoch + 1, num_epochs, i + 1, total_step, loss.item())) # aim - Track model loss function aim.track(loss.item(), name='loss', epoch=epoch) correct = 0 total = 0 _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() # aim - Track metrics aim.track(100 * correct / total, name='accuracy') aim.track(random.random(), name='random', epoch=epoch) aim.track(random.random() * 10, name='random-md', epoch=epoch) aim.track(math.ceil(random.random() * 100), name='random-lg', epoch=epoch)
trf = T.Compose([ T.Resize(256), T.CenterCrop(224), T.ToTensor(), ]) trf_norm = T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) for img in images: inp = trf(img) norm_inp = trf_norm(inp).unsqueeze(0) out = fcn(norm_inp)['out'] om = torch.argmax(out.squeeze(), dim=0).detach().cpu().numpy() aim_img = track(aim.image, inp) bad_om = om.copy() bad_om[0:150, 00:150] = 0 bad_om_1 = om.copy() bad_om_1[0:150, 0:120] = 0 bad_om_2 = om.copy() bad_om_2[0:150, 0:80] = 0 track(aim.segmentation, 'seg', aim_img, mask=bad_om.tolist(), class_labels=labels, epoch=1)
# Forward pass outputs = model(images) loss = criterion(outputs, labels) # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() if i % 30 == 0: print('Epoch [{}/{}], Step [{}/{}], ' 'Loss: {:.4f}'.format(epoch + 1, num_epochs, i + 1, total_step, loss.item())) # aim - Track model loss function aim.track(loss.item(), name='loss', epoch=epoch, subset='train') correct = 0 total = 0 _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() acc = 100 * correct / total # aim - Track metrics aim.track(acc, name='accuracy', epoch=epoch, subset='train') # TODO: Do actual validation if i % 300 == 0: aim.track(loss.item(), name='loss', epoch=epoch, subset='val') aim.track(acc, name='accuracy', epoch=epoch, subset='val')
import torchvision import torchvision.transforms as transforms # Device configuration device = torch.device('cpu') # Hyper parameters num_epochs = 5 num_classes = 10 batch_size = 50 learning_rate = 0.01 # aim - Track hyper parameters aim.track({ 'num_epochs': num_epochs, 'num_classes': num_classes, 'batch_size': batch_size, 'learning_rate': learning_rate, }, namespace='params') # MNIST dataset train_dataset = torchvision.datasets.MNIST(root='./data/', train=True, transform=transforms.ToTensor(), download=True) test_dataset = torchvision.datasets.MNIST(root='./data/', train=False, transform=transforms.ToTensor()) # Data loader train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
(x_train, y_train), (x_test, y_test) = mnist.load_data() x_train, x_test = x_train / 255.0, x_test / 255.0 # Create model model = tf.keras.models.Sequential([ tf.keras.layers.Flatten(input_shape=(28, 28)), tf.keras.layers.Dense(128, activation='relu'), tf.keras.layers.Dropout(0.2), tf.keras.layers.Dense(10) ]) track(aim.checkpoint, 'checkpoint-test', 'mnist-0', model, 0, meta={ 'classes': 10, }) predictions = model(x_train[:1]).numpy() tf.nn.softmax(predictions).numpy() loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) model.compile(optimizer='adam', loss=loss_fn, metrics=['accuracy']) meta = {'classes': 10} # Train model with checkpoints callbacks
# Forward pass outputs = model(images) loss = criterion(outputs, labels) # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() if (i + 1) % 100 == 0: print('Epoch [{}/{}], Step [{}/{}], ' 'Loss: {:.4f}'.format(epoch + 1, num_epochs, i + 1, total_step, loss.item())) # aim - Track model track(aim.weights, model) track(aim.gradients, model) # Test the model model.eval() with torch.no_grad(): correct = 0 total = 0 for images, labels in test_loader: images = images.to(device) labels = labels.to(device) outputs = model(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item()
labels = labels.to(device) # Forward pass outputs = model(images) loss = criterion(outputs, labels) # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() if (i + 1) % 100 == 0: print('Epoch [{}/{}], Step [{}/{}], ' 'Loss: {:.4f}'.format(epoch + 1, num_epochs, i + 1, total_step, loss.item())) # track images if saved_img < 50: saved_img += 1 img_0 = track(aim.image, images[0]) track(aim.image_set, 'set_1', img_0, meta={ 'img_idx': saved_img, 'label': 1, }) img_1 = track(aim.image, images[1]) track(aim.image_set, 'set_2', img_1)
for l in range(len(labels)): if labels[l].item() != outputs[l].argmax().item(): # Count fn false_negatives += 1 learning_rate /= 2 # aim - Track model checkpoints track(aim.checkpoint, 'checkpoint_test', 'chp_epoch_{}'.format(epoch), model, epoch, lr_rate=learning_rate, meta={ 'learning_rate': learning_rate, 'false_positives': false_positives, 'false_negatives': false_negatives, 'drop_out': 0.5, 'batch_size': 10, 'kernel_size': 2, 'stride': 2, }) # Test the model model.eval() with torch.no_grad(): correct = 0 total = 0 for images, labels in test_loader: images = images.to(device)
import torchvision import torchvision.transforms as transforms # Device configuration device = torch.device('cpu') # Hyper parameters num_epochs = 5 num_classes = 10 batch_size = 50 learning_rate = 0.01 # aim - Track hyper parameters aim.track({ 'num_epochs': num_epochs, 'num_classes': num_classes, 'batch_size': batch_size, 'learning_rate': learning_rate, }) # MNIST dataset train_dataset = torchvision.datasets.MNIST(root='./data/', train=True, transform=transforms.ToTensor(), download=True) test_dataset = torchvision.datasets.MNIST(root='./data/', train=False, transform=transforms.ToTensor()) # Data loader train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
# Forward pass outputs = model(images) loss = criterion(outputs, labels) # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() if (i + 1) % 100 == 0: print('Epoch [{}/{}], Step [{}/{}], ' 'Loss: {:.4f}'.format(epoch + 1, num_epochs, i + 1, total_step, loss.item())) # aim - Track model loss function track(aim.loss, 'loss', loss.item(), epoch) # aim - Track model loss function correct = 0 total = 0 _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() track(aim.accuracy, 'accuracy', 100 * correct / total, epoch) track(aim.metric, 'random', random.random(), epoch) track(aim.metric, 'random-md', random.random() * 10, epoch) track(aim.metric, 'random-lg', math.ceil(random.random() * 100), epoch) track(aim.metric_group, 'random', [