for r in tqdm(range(runs)): # index of the worker doing local SGD w_index = w_index % N_w for worker in range(N_w): for data in trainloaders[worker]: inputs, labels = data inputs, labels = inputs.to(device), labels.to(device) optimizers[worker].zero_grad() preds = nets[worker](inputs) loss = criterions[worker](preds, labels) loss.backward() ps_functions.synch_weight(reserveNets[worker], nets[worker]) break for worker in range(N_w): ps_functions.initialize_zero(nets[worker]) index = worker - 4 if index < 0: index += N_w for i in range(5): ps_functions.weight_accumulate( reserveNets[int((index + (i * 2)) % N_w)], nets[worker], 5) optimizers[worker].step() # w_index sends its model to other workers # # other workers upon receiving the model take the average for n in range(N_w): if n != w_index: ps_functions.average_model(nets[n], nets[w_index]) if (r % 100) == 0 and r != 0:
preds = nets[worker](inputs) loss = criterions[worker](preds, labels) loss.backward() optimizers[worker].step() wcounter += 1 if wcounter == tau: break avg_index = int((r % (N_w / 2)) * 2) ps_functions.average_model(nets[avg_index], nets[avg_index + 1]) ps_functions.synch_weight(nets[avg_index + 1], nets[avg_index]) for n in range(N_w): if n != avg_index and n != avg_index + 1: ps_functions.average_model2(nets[n], nets[avg_index]) if (r * tau) % 120 == 0: ps_functions.initialize_zero(ps_model) for n in range(N_w): ps_functions.weight_accumulate(nets[n], ps_model, N_w) correct = 0 total = 0 with torch.no_grad(): for data in testloader: images, labels = data images, labels = images.to(device), labels.to(device) outputs = ps_model(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))
if wcounter == 4: break # w_index sends its model to other workers # # other workers upon receiving the model take the average for n in range(N_w): if n != w_index: ps_functions.average_model(nets[n], nets[w_index]) # averaging the momentums for n in range(N_w): if n != w_index: ps_functions.average_momentum(optimizers[n], optimizers[w_index]) if (r % 100) == 0 and r != 0: ## reset of extraModel ps_functions.initialize_zero(avg_model) # model ps_functions.momentum_zero(avg_Optimizer) ## take average for worker in range(N_w): ps_functions.weight_accumulate(nets[worker], avg_model, N_w) # model ps_functions.momentum_accumulate(avg_Optimizer, optimizers[worker], N_w) ##assign all worker models for worker in range(N_w): ps_functions.synch_weight(nets[worker], avg_model) # model ps_functions.momentum_Avg(avg_Optimizer, optimizers[worker]) # momentum if r % 100 == 0: ps_functions.initialize_zero(ps_model)