w_decay=weight_decay) for n in range(num_w_per_cluster) ] for c in range(num_cl)] ############# epochs = 300 period = 6 iter_ind = 1 ############# warm_up_epoch = 5 max_ind = 50000 * warm_up_epoch / (mini_batch * num_w_per_cluster * num_cl) ############## old_nets = deepcopy(nets) for c in range(num_cl): for n in range(num_w_per_cluster): ps_functions.grad_init(old_nets[c][n], x_init, y_init) ps_functions.synch_weight(nets[c][n], ps_model) ps_functions.warmup_lr(optimizers, num_cl, num_w_per_cluster, lr, iter_ind, max_ind) #initialize lr for warmup phase # Result vector results = np.empty([1, 150]) res_ind = 0 # training print('=======> training') for e in tqdm(range(epochs)): # user i = 0 # cluster c = 0 # period per = 0
lr = 1e-1 momentum = 0.9 weight_decay = 1e-4 alpha = 0.45 criterions = [nn.CrossEntropyLoss() for n in range(N_w)] optimizers = [ SGD_custom.define_optimizer(nets[n], lr, momentum, weight_decay) for n in range(N_w) ] avg_Optimizer = SGD_custom.define_optimizer(avg_model, lr, momentum, weight_decay) # initilize all weights equally [ps_functions.synch_weight(nets[i], ps_model) for i in range(N_w)] ps_functions.synch_weight(ps_model, avg_model) for r in tqdm(range(runs)): # index of the worker doing local SGD w_index = w_index % N_w for worker in range(N_w): wcounter = 0 for data in trainloaders[worker]: inputs, labels = data inputs, labels = inputs.to(device), labels.to(device) optimizers[worker].zero_grad() preds = nets[worker](inputs) loss = criterions[worker](preds, labels) loss.backward() optimizers[worker].step()
criterions = [nn.CrossEntropyLoss() for n in range(N_w)] optimizers = [ SGD_custom.define_optimizer(nets[n], lr, momentum, weight_decay) for n in range(N_w) ] reserveOptimers = [ SGD_custom.define_optimizer(reserveNets[n], lr, momentum, weight_decay) for n in range(N_w) ] avg_Optimizer = SGD_custom.define_optimizer(avg_model, lr, momentum, weight_decay) # initilize all weights equally [ps_functions.synch_weight(nets[i], ps_model) for i in range(N_w)] [ps_functions.synch_weight(reserveNets[i], ps_model) for i in range(N_w)] ps_functions.synch_weight(ps_model, avg_model) runs = int(20000) for r in tqdm(range(runs)): # index of the worker doing local SGD w_index = w_index % N_w for worker in range(N_w): for data in trainloaders[worker]: inputs, labels = data inputs, labels = inputs.to(device), labels.to(device) optimizers[worker].zero_grad() preds = nets[worker](inputs) loss = criterions[worker](preds, labels) loss.backward()
momentum, w_decay=weight_decay) for n in range(num_w_per_cluster) ] for c in range(num_cl)] ############# epochs = 300 period = 6 iter_ind = 1 ############# warm_up_epoch = 5 max_ind = 50000 * warm_up_epoch / (mini_batch * num_w_per_cluster * num_cl) ############## for c in range(num_cl): for n in range(num_w_per_cluster): ps_functions.synch_weight(weightsTilde[c][n], wref) ps_functions.warmup_lr(optimizers, num_cl, num_w_per_cluster, lr, iter_ind, max_ind) #initialize lr for warmup phase # Result vector results = np.empty([1, 150]) res_ind = 0 # training print('=======> training') for e in tqdm(range(epochs)): # user i = 0 # cluster c = 0 # period per = 0
lr = 1e-1 momentum = 0 weight_decay = 1e-4 criterions = [nn.CrossEntropyLoss() for n in range(N_w)] optimizers = [ SGD_custom.define_optimizer(nets[n], lr, momentum, weight_decay) for n in range(N_w) ] avg_Optimizer = SGD_custom.define_optimizer(avg_model, lr, momentum, weight_decay) # initilize all weights equally [ps_functions.synch_weight(nets[i], ps_model) for i in range(N_w)] [ps_functions.synch_weight(netsCurrent[i], ps_model) for i in range(N_w)] [ps_functions.synch_weight(netsOLD[i], ps_model) for i in range(N_w)] [ps_functions.synch_weight(netsDif[i], ps_model) for i in range(N_w)] [ps_functions.synch_weight(netsAvg[i], ps_model) for i in range(N_w)] ps_functions.synch_weight(ps_model, avg_model) runs = int(10000) for r in tqdm(range(runs)): # 2 # index of the worker doing local SGD w_index = w_index % N_w for worker in range(N_w): #3 for data in trainloaders[worker]: inputs, labels = data inputs, labels = inputs.to(device), labels.to(device) optimizers[worker].zero_grad()
# optimizers = [define_optimizer(nets[n], lr, momentum, w_decay=weight_decay) for n in range(num_workers)] optimizers = [ SGD_custom2.define_optimizer(nets[n], lr, momentum, w_decay=weight_decay) for n in range(num_workers) ] ####################### epochs = 300 warm_up_epoch = 5 max_ind = 50000 * warm_up_epoch / (mini_batch * num_workers) iter_ind = 1 ####################### [ps_functions.synch_weight(nets[n], ps_model) for n in range(num_workers)] ####################################### ps_functions.warmup_lr_nc(optimizers, num_workers, lr, iter_ind, max_ind) #initialize lr for warmup phase # Result vector results = np.empty([1, 150]) res_ind = 0 #training for e in tqdm(range(epochs)): i = 0 for data in trainloader: inputs, labels = data inputs, labels = inputs.to(device), labels.to(device) # index of the worker index = i % num_workers
weight_decay = 1e-4 alpha = 0.45 criterions = [nn.CrossEntropyLoss() for n in range(N_w)] optimizers = [ SGD_custom.define_optimizer(nets[n], lr, momentum, weight_decay) for n in range(N_w) ] avg_Optimizer = SGD_custom.define_optimizer(avg_model, lr, momentum, weight_decay) bcast_Optimizer = SGD_custom.define_optimizer(bcast_model, lr, momentum, weight_decay) # initilize all weights equally [ps_functions.synch_weight(nets[i], ps_model) for i in range(N_w)] ps_functions.synch_weight(ps_model, avg_model) ps_functions.synch_weight(avg_model, bcast_model) runs = int(20000) for r in tqdm(range(runs)): # index of the worker doing local SGD w_index = w_index % N_w ts = time.time() for worker in range(N_w): for data in trainloaders[worker]: inputs, labels = data inputs, labels = inputs.to(device), labels.to(device) optimizers[worker].zero_grad() preds = nets[worker](inputs) loss = criterions[worker](preds, labels)