def test_multi_step_multi_gamma_lr():
    dummy_tensor = torch.zeros(3, 3, 3, requires_grad=True)
    dummy_optimizer = Optimizer([dummy_tensor], {'lr': 0.1})

    # Test input checks
    with pytest.raises(ValueError):
        lr_sched = MultiStepMultiGammaLR(dummy_optimizer, milestones=[60, 30, 80], gammas=[0.1, 0.1, 0.2])
    with pytest.raises(ValueError):
        lr_sched = MultiStepMultiGammaLR(dummy_optimizer, milestones=[30, 60], gammas=[0.1, 0.1, 0.2])
    with pytest.raises(ValueError):
        lr_sched = MultiStepMultiGammaLR(dummy_optimizer, milestones=[30, 60, 80], gammas=[0.1, 0.1])

    # Test functionality
    lr_sched = MultiStepMultiGammaLR(dummy_optimizer, milestones=[30, 60, 80], gammas=[0.1, 0.1, 0.2])
    expected_gammas = [1, 1 * 0.1, 1 * 0.1 * 0.1, 1 * 0.1 * 0.1 * 0.2]
    expected_lrs = [0.1 * gamma for gamma in expected_gammas]
    assert lr_sched.multiplicative_gammas == expected_gammas
    lr_sched.step(0)
    assert dummy_optimizer.param_groups[0]['lr'] == expected_lrs[0]
    lr_sched.step(15)
    assert dummy_optimizer.param_groups[0]['lr'] == expected_lrs[0]
    lr_sched.step(30)
    assert dummy_optimizer.param_groups[0]['lr'] == expected_lrs[1]
    lr_sched.step(33)
    assert dummy_optimizer.param_groups[0]['lr'] == expected_lrs[1]
    lr_sched.step(60)
    assert dummy_optimizer.param_groups[0]['lr'] == expected_lrs[2]
    lr_sched.step(79)
    assert dummy_optimizer.param_groups[0]['lr'] == expected_lrs[2]
    lr_sched.step(80)
    assert dummy_optimizer.param_groups[0]['lr'] == expected_lrs[3]
    lr_sched.step(100)
    assert dummy_optimizer.param_groups[0]['lr'] == expected_lrs[3]
Beispiel #2
0
    def set_world_properties(self, n_actions: int, size: int, dtype: str) -> None:
        """
        initiate parameters that depend on world parameters
        :param n_actions: available number of actions
        :param size: board size
        :param dtype: optimization algorithm type RQL or DQN
        """
        # set action range
        self._actions = range(n_actions)

        # set state data type
        self._dtype = dtype

        # initiate according to data type
        if self._dtype is 'DQN':
            # initiate neural network network, computes V(s_t) expected values of actions for given state
            # noinspection PyUnresolvedReferences
            self.policy_net = DQN(inputs=size, outputs=n_actions,
                                  hidden_depth=self._n_hidden, hidden_dim=self._w_hidden).to(device=dev)
            # noinspection PyUnresolvedReferences
            self.target_net = DQN(inputs=size, outputs=n_actions,
                                  hidden_depth=self._n_hidden, hidden_dim=self._w_hidden).to(device=dev)
            self.target_net.load_state_dict(self.policy_net.state_dict())
            self.target_net.eval()
            # set optimizer
            # noinspection PyUnresolvedReferences
            self._optimizer = Optimizer(self.policy_net.parameters(), lr=self._lr, alpha=self._alpha)
        elif self._dtype is 'RQL':
            self._q = {}
            self._best_action = {}
Beispiel #3
0
    device = t.device('cuda' if t.cuda.is_available() else 'cpu')

    writer = SummaryWriter(args.tensorboard)

    t.set_num_threads(args.num_threads)
    loader = Dataloader('./data/')

    model = Model(args.num_layers,
                  args.num_heads,
                  args.dropout,
                  max_len=loader.max_len,
                  embeddings_path='./data/embeddings.npy')
    model.to(device)

    optimizer = Optimizer(model.learnable_parameters(),
                          lr=0.0002,
                          amsgrad=True)

    print('Model have initialized')

    for i in range(args.num_iterations):

        optimizer.zero_grad()

        model.train()

        input, target = loader.next_batch(args.batch_size, 'train', device)
        nll = model(input, target)
        nll.backward()

        optimizer.step()
Beispiel #4
0
if multi_gpu:
    optimizer = mymodel.build_optimizer(
        Optimizer,
        lr=init_lr,
        betas=adam_betas_default,
        eps=ieps_adam_default,
        weight_decay=cnfg.weight_decay,
        amsgrad=use_ams,
        multi_gpu_optimizer=multi_gpu_optimizer,
        contiguous_parameters=contiguous_parameters)
else:
    optimizer = Optimizer(get_model_parameters(
        mymodel, contiguous_parameters=contiguous_parameters),
                          lr=init_lr,
                          betas=adam_betas_default,
                          eps=ieps_adam_default,
                          weight_decay=cnfg.weight_decay,
                          amsgrad=use_ams)
optimizer.zero_grad(set_to_none=optm_step_zero_grad_set_none)

lrsch = LRScheduler(optimizer, cnfg.isize, cnfg.warm_step, scale=cnfg.lr_scale)

state_holder = None if statesf is None and cnt_states is None else Holder(
    **{
        "optm": optimizer,
        "lrsch": lrsch,
        "pyrand": PyRandomState(),
        "thrand": THRandomState(use_cuda=use_cuda)
    })
Beispiel #5
0
if cnfg.src_emb is not None:
    logger.info("Load source embedding from: " + cnfg.src_emb)
    load_emb(cnfg.src_emb, mymodel.enc.wemb.weight, nwordi,
             cnfg.scale_down_emb, cnfg.freeze_srcemb)
if cnfg.tgt_emb is not None:
    logger.info("Load target embedding from: " + cnfg.tgt_emb)
    load_emb(cnfg.tgt_emb, mymodel.dec.wemb.weight, nwordt,
             cnfg.scale_down_emb, cnfg.freeze_tgtemb)

if use_cuda:
    mymodel.to(cuda_device)
    lossf.to(cuda_device)

optimizer = Optimizer(mymodel.parameters(),
                      lr=init_lr,
                      betas=adam_betas_default,
                      eps=ieps_adam_default,
                      weight_decay=cnfg.weight_decay,
                      amsgrad=use_ams)
optimizer.zero_grad(set_to_none=True)

use_amp = cnfg.use_amp and use_cuda
scaler = GradScaler() if use_amp else None

if multi_gpu:
    mymodel = DataParallelMT(mymodel,
                             device_ids=cuda_devices,
                             output_device=cuda_device.index,
                             host_replicate=True,
                             gather_output=False)
    lossf = DataParallelCriterion(lossf,
                                  device_ids=cuda_devices,
Beispiel #6
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(
        description='Gradient Quantization Samples')
    parser.add_argument('--network',
                        type=str,
                        default='fcn',
                        choices=network_choices.keys())
    parser.add_argument('--dataset',
                        type=str,
                        default='mnist',
                        choices=data_loaders.keys())
    parser.add_argument('--num-classes',
                        type=int,
                        default=10,
                        choices=classes_choices.values())

    parser.add_argument('--logdir',
                        type=str,
                        default='./log/tmp',
                        help='For Saving the logs')
    parser.add_argument('--batch-size',
                        type=int,
                        default=32,
                        metavar='N',
                        help='input batch size for training (default: 32)')
    parser.add_argument('--test-batch-size',
                        type=int,
                        default=1000,
                        metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs',
                        type=int,
                        default=150,
                        metavar='N',
                        help='number of epochs to train (default: 150)')

    parser.add_argument('--momentum',
                        type=float,
                        default=0.9,
                        metavar='M',
                        help='SGD momentum (default: 0.9)')
    parser.add_argument('--weight-decay',
                        type=float,
                        default=5e-4,
                        metavar='M',
                        help='weight decay momentum (default: 5e-4)')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument('--log-epoch',
                        type=int,
                        default=1,
                        metavar='N',
                        help='logging training status at each epoch')
    parser.add_argument('--save-model',
                        action='store_true',
                        default=False,
                        help='For Saving the current Model')
    parser.add_argument('--m', type=int, default=1, help='')
    parser.add_argument('--depth', type=int, default=1, help='')
    args = parser.parse_args()
    get_config(args)

    torch.manual_seed(args.seed)
    device = torch.device("cpu" if args.no_cuda else "cuda")

    train_loader, test_loader = DATASET_LOADER(args)
    model = NETWORK(num_classes=args.num_classes).to(device)
    optimizer = Optimizer(model.parameters(),
                          lr=0.1,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)

    if args.dataset == 'mnist':
        epochs = [10, 20]
        lrs = [0.01, 0.001]
        args.epochs = 30
    else:
        epochs = [51, 71]
        lrs = [0.01, 0.005]
        args.epochs = 150

    for epoch in range(1, args.epochs + 2):
        for i_epoch, i_lr in zip(epochs, lrs):
            if epoch == i_epoch:
                optimizer = Optimizer(model.parameters(),
                                      lr=i_lr,
                                      momentum=args.momentum,
                                      weight_decay=args.weight_decay)

        origin_train(args, model, device, train_loader, optimizer, epoch,
                     test_loader)

    if args.save_model:
        filename = "saved_{}_{}.pt".format(args.network, datetime.now())
        torch.save(model.state_dict(), filename)
Beispiel #7
0
def main():
    # Set DPVI params
    T = 80000
    C = 2.0
    lr = .0005
    q = 0.005
    batch_size = int(q * N)
    sigma = float(sys.argv[1])
    income = sys.argv[2]
    seed = int(sys.argv[3])
    torch.manual_seed(seed)
    npr.seed(seed)
    # Set number of mixture components
    k = 10
    param_dims = OrderedDict()
    for key, value in variable_types.items():
        if key == 'pi_unconstrained':
            param_dims[key] = [k - 1]
        else:
            if value == 'Bernoulli':
                param_dims[key] = [k]
            elif value == 'Categorical':
                param_dims[key] = [k, len(np.unique(data[key]))]
            elif value == 'Beta':
                param_dims[key] = [2, k]

    input_dim = int(np.sum([np.prod(value) for value in param_dims.values()]))
    flat_param_dims = np.array(
        [np.prod(value) for value in param_dims.values()])

    rich_data = data[data['Target'] == 1]
    batch_size_rich = int(q * len(rich_data))
    poor_data = data[data['Target'] == 0]
    batch_size_poor = int(q * len(poor_data))

    ### Save log
    date = datetime.date.today().isoformat()
    wall_start = time.time()
    cpu_start = time.clock()
    out_file = open("out_file_{}_{}_{}.txt".format(income, date, sigma), "a")
    sys.stdout = out_file
    print("Sigma : {}".format(sigma))

    ## Containers for models
    models = []

    from torch.optim import Adam as Optimizer
    from dpvi import DPVI
    ## Repeat inference 10 times
    if income == "rich":
        rich_model = ReparamXpand(batch_size_rich, input_dim, param_dims,
                                  flat_param_dims)
        optimizer_rich = Optimizer(rich_model.parameters(), lr=lr)
        # Init mixture fractions to N(0, exp(-2.0))
        rich_model.reparam.bias.data[:, -(k - 1):] = 0.0 * torch.ones_like(
            rich_model.reparam.bias.data[:, -(k - 1):])
        rich_model.reparam.weight.data[:, -(k - 1):] = -2.0 * torch.ones_like(
            rich_model.reparam.weight.data[:, -(k - 1):])
        rich_model_ = DPVI(rich_model, T, rich_data, batch_size_rich,\
          optimizer_rich, C, sigma, variable_types)
        models.append(rich_model_)
    else:
        poor_model = ReparamXpand(batch_size_poor, input_dim, param_dims,
                                  flat_param_dims)
        optimizer_poor = Optimizer(poor_model.parameters(), lr=lr)
        poor_model.reparam.bias.data[:, -(k - 1):] = 0.0 * torch.ones_like(
            poor_model.reparam.bias.data[:, -(k - 1):])
        poor_model.reparam.weight.data[:, -(k - 1):] = -2.0 * torch.ones_like(
            poor_model.reparam.weight.data[:, -(k - 1):])

        poor_model_ = DPVI(poor_model, T, poor_data, batch_size_poor,\
          optimizer_poor, C, sigma, variable_types)
        models.append(poor_model_)
    wall_end = time.time()
    cpu_end = time.clock()
    print('Wall time {}'.format(wall_end - wall_start))
    print('CPU time {}'.format(cpu_end - cpu_start))

    ## Compute privacy budget
    from privacy.analysis.compute_dp_sgd_privacy import compute_rdp, get_privacy_spent
    delta = 1e-5
    rdp_orders = range(2, 500)
    rdp_eps = compute_rdp(q, sigma, T, rdp_orders)
    epsilon = 2 * get_privacy_spent(
        rdp_orders, rdp_eps, target_delta=delta / 2)[0]

    pickle.dump(
        models,
        open('./res/models_{}_{}_{}_{}.p'.format(income, date, sigma, seed),
             'wb'))
    params = {
        'T': T,
        'C': C,
        'lr': lr,
        'k': k,
        'q': q,
        'sigma': sigma,
        'epsilon': epsilon,
        'seed': seed
    }
    pickle.dump(
        params,
        open('./res/params_{}_{}_{}_{}.p'.format(income, date, sigma, seed),
             'wb'))
    out_file.close()