paradigms = args.paradigms batches = args.batch_sizes dpi = args.dpi rows, cols = args.rows, args.cols device = t.device("cuda:{}".format(2)) for paradigm in paradigms: fig = plt.figure(figsize=(5 * cols, (4 * rows) - 1)) c = 1 for batch in batches: file = '../ckpts/{}_centralized/history/clf_fcn_noise_None' \ '_paradigm_{}_lr_0.01_decay_1e-05_batch_{}.pkl'.format( dataset, paradigm, batch) x_ax, acc_train, acc_test, l_train, l_test, grad = pkl.load( open(file, 'rb')) grad = accumulate_grads_over_epochs(grad, device) grad0 = t.stack([_[0].flatten() for _ in grad], dim=0).T grad1 = t.stack([_[1].flatten() for _ in grad], dim=0).T pca = PCA() pca.fit(grad0.cpu().numpy()) exp = pca.explained_variance_ratio_[:10] pca.fit(grad1.cpu().numpy()) exp1 = pca.explained_variance_ratio_[:10] ax = fig.add_subplot(100 * rows + 10 * cols + c) ax.bar(np.array(list(range(1, len(exp) + 1))) - 0.25, exp, color='b',
wait = 0 for epoch in range(1, args.epochs + 1): gradi = [] correcti = 0 for data, target in train_loader: data, target = data.to(device), target.to(device) output = model(data) loss = loss_fn(output, target) optim.zero_grad() loss.backward() gradi.append([p.grad.clone() for p in model.parameters()]) predi = output.argmax(1, keepdim=True) correcti += predi.eq(target.view_as(predi)).sum().item() grad_accum = accumulate_grads_over_epochs([gradi], device) for p, g in zip(model.parameters(), grad_accum[0]): p.grad = g.clone() optim.step() grad.append([[ii.cpu() for ii in i] for i in gradi]) x_ax.append(epoch) acc_i = correcti / len(train_loader.dataset) acc_train.append(acc_i) l_train.append(loss.item()) acc, loss = test(args, model, device, test_loader, best, epoch, loss_type, False) acc_test.append(acc) l_test.append(loss)
for data, target in train_loader: data, target = data.to(device), target.to(device) output = model(data) loss = loss_fn(output, target) optim.zero_grad() loss.backward() if len(grad_w) == args.kgrads: curr_paradigm = 'kgrad' gradient_approximation(model, [grad_w, grad_b], device) gradi.append([p.grad.clone() for p in model.parameters()]) optim.step() predi = output.argmax(1, keepdim=True) correcti += predi.eq(target.view_as(predi)).sum().item() if len(grad_w) < args.kgrads: grad_tmp = accumulate_grads_over_epochs([gradi], device) grad_w.append(grad_tmp[0][0].flatten()) grad_b.append(grad_tmp[0][1].flatten()) if len(grad_w) == args.kgrads and type(grad_w) == list: grad_w = torch.stack(grad_w, dim=0) grad_b = torch.stack(grad_b, dim=0) grad.append(gradi) x_ax.append(epoch) acc_i = correcti / len(train_loader.dataset) acc_train.append(acc_i) l_train.append(loss.item()) acc, loss = test(args, model, device, test_loader, best, epoch, loss_type, False) acc_test.append(acc) l_test.append(loss)