예제 #1
0
for r in tqdm(range(runs)):
    # index of the worker doing local SGD
    w_index = w_index % N_w
    for worker in range(N_w):
        for data in trainloaders[worker]:
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            optimizers[worker].zero_grad()
            preds = nets[worker](inputs)
            loss = criterions[worker](preds, labels)
            loss.backward()
            ps_functions.synch_weight(reserveNets[worker], nets[worker])
            break

    for worker in range(N_w):
        ps_functions.initialize_zero(nets[worker])
        index = worker - 4
        if index < 0:
            index += N_w
        for i in range(5):
            ps_functions.weight_accumulate(
                reserveNets[int((index + (i * 2)) % N_w)], nets[worker], 5)
        optimizers[worker].step()

    # w_index sends its model to other workers
    # # other workers upon receiving the model take the average
    for n in range(N_w):
        if n != w_index:
            ps_functions.average_model(nets[n], nets[w_index])

    if (r % 100) == 0 and r != 0:
예제 #2
0
            preds = nets[worker](inputs)
            loss = criterions[worker](preds, labels)
            loss.backward()
            optimizers[worker].step()
            wcounter += 1
            if wcounter == tau:
                break

    avg_index = int((r % (N_w / 2)) * 2)
    ps_functions.average_model(nets[avg_index], nets[avg_index + 1])
    ps_functions.synch_weight(nets[avg_index + 1], nets[avg_index])
    for n in range(N_w):
        if n != avg_index and n != avg_index + 1:
            ps_functions.average_model2(nets[n], nets[avg_index])
    if (r * tau) % 120 == 0:
        ps_functions.initialize_zero(ps_model)
        for n in range(N_w):
            ps_functions.weight_accumulate(nets[n], ps_model, N_w)
        correct = 0
        total = 0
        with torch.no_grad():
            for data in testloader:
                images, labels = data
                images, labels = images.to(device), labels.to(device)
                outputs = ps_model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print('Accuracy of the network on the 10000 test images: %d %%' %
              (100 * correct / total))
예제 #3
0
            if wcounter == 4:
                break
    # w_index sends its model to other workers
    # # other workers upon receiving the model take the average
    for n in range(N_w):
        if n != w_index:
            ps_functions.average_model(nets[n], nets[w_index])

    # averaging the momentums
    for n in range(N_w):
        if n != w_index:
            ps_functions.average_momentum(optimizers[n], optimizers[w_index])

    if (r % 100) == 0 and r != 0:
        ## reset of extraModel
        ps_functions.initialize_zero(avg_model)  # model
        ps_functions.momentum_zero(avg_Optimizer)

        ## take average
        for worker in range(N_w):
            ps_functions.weight_accumulate(nets[worker], avg_model,
                                           N_w)  # model
            ps_functions.momentum_accumulate(avg_Optimizer, optimizers[worker],
                                             N_w)
        ##assign all worker models
        for worker in range(N_w):
            ps_functions.synch_weight(nets[worker], avg_model)  # model
            ps_functions.momentum_Avg(avg_Optimizer,
                                      optimizers[worker])  # momentum
    if r % 100 == 0:
        ps_functions.initialize_zero(ps_model)