def test_multi_step_multi_gamma_lr(): dummy_tensor = torch.zeros(3, 3, 3, requires_grad=True) dummy_optimizer = Optimizer([dummy_tensor], {'lr': 0.1}) # Test input checks with pytest.raises(ValueError): lr_sched = MultiStepMultiGammaLR(dummy_optimizer, milestones=[60, 30, 80], gammas=[0.1, 0.1, 0.2]) with pytest.raises(ValueError): lr_sched = MultiStepMultiGammaLR(dummy_optimizer, milestones=[30, 60], gammas=[0.1, 0.1, 0.2]) with pytest.raises(ValueError): lr_sched = MultiStepMultiGammaLR(dummy_optimizer, milestones=[30, 60, 80], gammas=[0.1, 0.1]) # Test functionality lr_sched = MultiStepMultiGammaLR(dummy_optimizer, milestones=[30, 60, 80], gammas=[0.1, 0.1, 0.2]) expected_gammas = [1, 1 * 0.1, 1 * 0.1 * 0.1, 1 * 0.1 * 0.1 * 0.2] expected_lrs = [0.1 * gamma for gamma in expected_gammas] assert lr_sched.multiplicative_gammas == expected_gammas lr_sched.step(0) assert dummy_optimizer.param_groups[0]['lr'] == expected_lrs[0] lr_sched.step(15) assert dummy_optimizer.param_groups[0]['lr'] == expected_lrs[0] lr_sched.step(30) assert dummy_optimizer.param_groups[0]['lr'] == expected_lrs[1] lr_sched.step(33) assert dummy_optimizer.param_groups[0]['lr'] == expected_lrs[1] lr_sched.step(60) assert dummy_optimizer.param_groups[0]['lr'] == expected_lrs[2] lr_sched.step(79) assert dummy_optimizer.param_groups[0]['lr'] == expected_lrs[2] lr_sched.step(80) assert dummy_optimizer.param_groups[0]['lr'] == expected_lrs[3] lr_sched.step(100) assert dummy_optimizer.param_groups[0]['lr'] == expected_lrs[3]
def set_world_properties(self, n_actions: int, size: int, dtype: str) -> None: """ initiate parameters that depend on world parameters :param n_actions: available number of actions :param size: board size :param dtype: optimization algorithm type RQL or DQN """ # set action range self._actions = range(n_actions) # set state data type self._dtype = dtype # initiate according to data type if self._dtype is 'DQN': # initiate neural network network, computes V(s_t) expected values of actions for given state # noinspection PyUnresolvedReferences self.policy_net = DQN(inputs=size, outputs=n_actions, hidden_depth=self._n_hidden, hidden_dim=self._w_hidden).to(device=dev) # noinspection PyUnresolvedReferences self.target_net = DQN(inputs=size, outputs=n_actions, hidden_depth=self._n_hidden, hidden_dim=self._w_hidden).to(device=dev) self.target_net.load_state_dict(self.policy_net.state_dict()) self.target_net.eval() # set optimizer # noinspection PyUnresolvedReferences self._optimizer = Optimizer(self.policy_net.parameters(), lr=self._lr, alpha=self._alpha) elif self._dtype is 'RQL': self._q = {} self._best_action = {}
device = t.device('cuda' if t.cuda.is_available() else 'cpu') writer = SummaryWriter(args.tensorboard) t.set_num_threads(args.num_threads) loader = Dataloader('./data/') model = Model(args.num_layers, args.num_heads, args.dropout, max_len=loader.max_len, embeddings_path='./data/embeddings.npy') model.to(device) optimizer = Optimizer(model.learnable_parameters(), lr=0.0002, amsgrad=True) print('Model have initialized') for i in range(args.num_iterations): optimizer.zero_grad() model.train() input, target = loader.next_batch(args.batch_size, 'train', device) nll = model(input, target) nll.backward() optimizer.step()
if multi_gpu: optimizer = mymodel.build_optimizer( Optimizer, lr=init_lr, betas=adam_betas_default, eps=ieps_adam_default, weight_decay=cnfg.weight_decay, amsgrad=use_ams, multi_gpu_optimizer=multi_gpu_optimizer, contiguous_parameters=contiguous_parameters) else: optimizer = Optimizer(get_model_parameters( mymodel, contiguous_parameters=contiguous_parameters), lr=init_lr, betas=adam_betas_default, eps=ieps_adam_default, weight_decay=cnfg.weight_decay, amsgrad=use_ams) optimizer.zero_grad(set_to_none=optm_step_zero_grad_set_none) lrsch = LRScheduler(optimizer, cnfg.isize, cnfg.warm_step, scale=cnfg.lr_scale) state_holder = None if statesf is None and cnt_states is None else Holder( **{ "optm": optimizer, "lrsch": lrsch, "pyrand": PyRandomState(), "thrand": THRandomState(use_cuda=use_cuda) })
if cnfg.src_emb is not None: logger.info("Load source embedding from: " + cnfg.src_emb) load_emb(cnfg.src_emb, mymodel.enc.wemb.weight, nwordi, cnfg.scale_down_emb, cnfg.freeze_srcemb) if cnfg.tgt_emb is not None: logger.info("Load target embedding from: " + cnfg.tgt_emb) load_emb(cnfg.tgt_emb, mymodel.dec.wemb.weight, nwordt, cnfg.scale_down_emb, cnfg.freeze_tgtemb) if use_cuda: mymodel.to(cuda_device) lossf.to(cuda_device) optimizer = Optimizer(mymodel.parameters(), lr=init_lr, betas=adam_betas_default, eps=ieps_adam_default, weight_decay=cnfg.weight_decay, amsgrad=use_ams) optimizer.zero_grad(set_to_none=True) use_amp = cnfg.use_amp and use_cuda scaler = GradScaler() if use_amp else None if multi_gpu: mymodel = DataParallelMT(mymodel, device_ids=cuda_devices, output_device=cuda_device.index, host_replicate=True, gather_output=False) lossf = DataParallelCriterion(lossf, device_ids=cuda_devices,
def main(): # Training settings parser = argparse.ArgumentParser( description='Gradient Quantization Samples') parser.add_argument('--network', type=str, default='fcn', choices=network_choices.keys()) parser.add_argument('--dataset', type=str, default='mnist', choices=data_loaders.keys()) parser.add_argument('--num-classes', type=int, default=10, choices=classes_choices.values()) parser.add_argument('--logdir', type=str, default='./log/tmp', help='For Saving the logs') parser.add_argument('--batch-size', type=int, default=32, metavar='N', help='input batch size for training (default: 32)') parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', help='input batch size for testing (default: 1000)') parser.add_argument('--epochs', type=int, default=150, metavar='N', help='number of epochs to train (default: 150)') parser.add_argument('--momentum', type=float, default=0.9, metavar='M', help='SGD momentum (default: 0.9)') parser.add_argument('--weight-decay', type=float, default=5e-4, metavar='M', help='weight decay momentum (default: 5e-4)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--log-epoch', type=int, default=1, metavar='N', help='logging training status at each epoch') parser.add_argument('--save-model', action='store_true', default=False, help='For Saving the current Model') parser.add_argument('--m', type=int, default=1, help='') parser.add_argument('--depth', type=int, default=1, help='') args = parser.parse_args() get_config(args) torch.manual_seed(args.seed) device = torch.device("cpu" if args.no_cuda else "cuda") train_loader, test_loader = DATASET_LOADER(args) model = NETWORK(num_classes=args.num_classes).to(device) optimizer = Optimizer(model.parameters(), lr=0.1, momentum=args.momentum, weight_decay=args.weight_decay) if args.dataset == 'mnist': epochs = [10, 20] lrs = [0.01, 0.001] args.epochs = 30 else: epochs = [51, 71] lrs = [0.01, 0.005] args.epochs = 150 for epoch in range(1, args.epochs + 2): for i_epoch, i_lr in zip(epochs, lrs): if epoch == i_epoch: optimizer = Optimizer(model.parameters(), lr=i_lr, momentum=args.momentum, weight_decay=args.weight_decay) origin_train(args, model, device, train_loader, optimizer, epoch, test_loader) if args.save_model: filename = "saved_{}_{}.pt".format(args.network, datetime.now()) torch.save(model.state_dict(), filename)
def main(): # Set DPVI params T = 80000 C = 2.0 lr = .0005 q = 0.005 batch_size = int(q * N) sigma = float(sys.argv[1]) income = sys.argv[2] seed = int(sys.argv[3]) torch.manual_seed(seed) npr.seed(seed) # Set number of mixture components k = 10 param_dims = OrderedDict() for key, value in variable_types.items(): if key == 'pi_unconstrained': param_dims[key] = [k - 1] else: if value == 'Bernoulli': param_dims[key] = [k] elif value == 'Categorical': param_dims[key] = [k, len(np.unique(data[key]))] elif value == 'Beta': param_dims[key] = [2, k] input_dim = int(np.sum([np.prod(value) for value in param_dims.values()])) flat_param_dims = np.array( [np.prod(value) for value in param_dims.values()]) rich_data = data[data['Target'] == 1] batch_size_rich = int(q * len(rich_data)) poor_data = data[data['Target'] == 0] batch_size_poor = int(q * len(poor_data)) ### Save log date = datetime.date.today().isoformat() wall_start = time.time() cpu_start = time.clock() out_file = open("out_file_{}_{}_{}.txt".format(income, date, sigma), "a") sys.stdout = out_file print("Sigma : {}".format(sigma)) ## Containers for models models = [] from torch.optim import Adam as Optimizer from dpvi import DPVI ## Repeat inference 10 times if income == "rich": rich_model = ReparamXpand(batch_size_rich, input_dim, param_dims, flat_param_dims) optimizer_rich = Optimizer(rich_model.parameters(), lr=lr) # Init mixture fractions to N(0, exp(-2.0)) rich_model.reparam.bias.data[:, -(k - 1):] = 0.0 * torch.ones_like( rich_model.reparam.bias.data[:, -(k - 1):]) rich_model.reparam.weight.data[:, -(k - 1):] = -2.0 * torch.ones_like( rich_model.reparam.weight.data[:, -(k - 1):]) rich_model_ = DPVI(rich_model, T, rich_data, batch_size_rich,\ optimizer_rich, C, sigma, variable_types) models.append(rich_model_) else: poor_model = ReparamXpand(batch_size_poor, input_dim, param_dims, flat_param_dims) optimizer_poor = Optimizer(poor_model.parameters(), lr=lr) poor_model.reparam.bias.data[:, -(k - 1):] = 0.0 * torch.ones_like( poor_model.reparam.bias.data[:, -(k - 1):]) poor_model.reparam.weight.data[:, -(k - 1):] = -2.0 * torch.ones_like( poor_model.reparam.weight.data[:, -(k - 1):]) poor_model_ = DPVI(poor_model, T, poor_data, batch_size_poor,\ optimizer_poor, C, sigma, variable_types) models.append(poor_model_) wall_end = time.time() cpu_end = time.clock() print('Wall time {}'.format(wall_end - wall_start)) print('CPU time {}'.format(cpu_end - cpu_start)) ## Compute privacy budget from privacy.analysis.compute_dp_sgd_privacy import compute_rdp, get_privacy_spent delta = 1e-5 rdp_orders = range(2, 500) rdp_eps = compute_rdp(q, sigma, T, rdp_orders) epsilon = 2 * get_privacy_spent( rdp_orders, rdp_eps, target_delta=delta / 2)[0] pickle.dump( models, open('./res/models_{}_{}_{}_{}.p'.format(income, date, sigma, seed), 'wb')) params = { 'T': T, 'C': C, 'lr': lr, 'k': k, 'q': q, 'sigma': sigma, 'epsilon': epsilon, 'seed': seed } pickle.dump( params, open('./res/params_{}_{}_{}_{}.p'.format(income, date, sigma, seed), 'wb')) out_file.close()