def train_model(model, criterion, train_loader, ps_stub): start_time = time.time() device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(device) model = model.to(device) # Training for 1 epoch cum_loss = 0.0 correct = 0 model.train() for x, y in tqdm(train_loader): x, y = x.to(device), y.to(device) # optimizer.zero_grad() print("Fetching model") model_proto = ps_stub.GetModel(ModelRequest()) load_proto(model, model_proto) print("Model fetched") outputs = model(x) loss = criterion(outputs, y) loss.backward() # optimizer.step() print("Sending gradients") ps_stub.UpdateGradients(gradients_to_proto(model)) print("Gradients sent") with torch.no_grad(): _, pred = outputs.max(1) correct += (pred == y).sum().item() cum_loss += loss.item() n_train = len(train_loader.dataset) print(f"Finished in {time.time() - start_time} seconds.") print(f"Train acc={correct / n_train}, train loss={cum_loss / n_train}.")
train_loader = DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=0) val_loader = DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=0) ### MODEL DEFINITION model = create_model() optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9) criterion = nn.CrossEntropyLoss() mock_ps = MockPS(create_model(), lr=0.001) # Train for 1 epoch train_model(model, criterion, optimizer, train_loader, mock_ps) # Create new model and train model2 = create_model() optimizer2 = optim.SGD(model2.parameters(), lr=0.001, momentum=0.9) train_model(model2, criterion, optimizer2, train_loader, mock_ps) model3 = create_model() optimizer3 = optim.SGD(model3.parameters(), lr=0.001, momentum=0.9) load_proto(model3, mock_ps.get_model()) train_model(model3, criterion, optimizer3, train_loader, mock_ps) train_model(model3, criterion, optimizer3, train_loader, mock_ps)
print(len(params)) print(params[0].shape) # for param in model.parameters(): print("====Gradients") print(params[0].grad) x = torch.ones(2, 2, requires_grad=True) print(x) y = x * x + 2 print(y) print(y.grad_fn) out = y.mean() print(out.grad_fn) out.backward() print(x.grad) print(y.grad) print(pickle.dumps(x.grad)) print(pickle.loads(pickle.dumps(x.grad))) proto = model_to_proto(model) print(len(proto.weights)) load_proto(model, proto) # print(dir(pickle.loads(model_proto.weights[0].value)))
### MODEL DEFINITION model = create_model() optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9) criterion = nn.CrossEntropyLoss() print(model) model_proto = model_to_proto(model, log=True) # print(len(model_proto.weights)) # print(pickle.loads(model_proto.weights[0].value)) print([name for name, _ in model.named_parameters()]) # Train for 1 epoch and save to proto train_model(model, criterion, optimizer, train_loader) model_proto = model_to_proto(model) # Create new model and load from protobuf model2 = create_model() optimizer2 = optim.SGD(model2.parameters(), lr=0.001, momentum=0.9) load_proto(model2, model_proto) print("Models are the same?", compare_models(model, model2)) train_model(model2, criterion, optimizer2, train_loader) # Reset to 1 epoch completed load_proto(model2, model_proto) train_model(model2, criterion, optimizer2, train_loader) # Train for another epoch train_model(model2, criterion, optimizer2, train_loader)