예제 #1
0
    def get_functions(self):
        x = T.ftensor4('x')  # input
        y = T.fvector('y')  # expected output
        lr = T.scalar('lr')  # learning rate

        pad_x = T.zeros([1, self.input_channels] +
                        [2 * self.pad + self.input_dim] * 2)
        pad_x = T.set_subtensor(
            pad_x[:, :, self.pad:-self.pad, self.pad:-self.pad], x)

        out, locations, wl_grad = self.full_network(pad_x)
        loss = self.loss(y, out)
        reward = self.reward(loss)

        rl_gradient = (reward - self.base_reward).sum() * wl_grad / T.sqrt(T.sum(wl_grad ** 2))
        br_gradient, = T.grad(T.sqr(reward - self.base_reward).sum(),
                              [self.base_reward])

        train_updates = adam.Adam(loss, self.nn_params)
        train_updates[self.W_l] = self.W_l + lr * rl_gradient  # gradient ascent
        train_updates[self.base_reward] = self.base_reward - lr * br_gradient

        train = theano.function(
            inputs=[x, y, lr],
            outputs=[out, loss, reward],
            updates=train_updates)

        predict = theano.function(
            inputs=[x],
            outputs=[out, locations])

        return train, predict
def get_optimizer(model, args):
    import sys
    sys.path.insert(0, '/home/ubuntu/skin_demo/RoP/AMSGrad/')

    if args.optimizer == 'sgd':
        return torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), args.lr,
                               momentum=args.momentum, nesterov=args.nesterov,
                               weight_decay=args.weight_decay)
    elif args.optimizer == 'rmsprop':
        return torch.optim.RMSprop(model.parameters(), args.lr,
                                   alpha=args.alpha,
                                   weight_decay=args.weight_decay)
    elif args.optimizer == 'adam':
        return torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), args.lr,
                                # beta=(args.beta1, args.beta2),
                                weight_decay=args.weight_decay)

    elif args.optimizer == 'adam_1':
        import adam
        optimizer = adam.Adam([{'params': model.parameters(), 'lr': args.lr}
                               ], lr=args.lr, weight_decay=0.0001, amsgrad=True)
        return optimizer

    elif args.optimizer == 'adam_2':
        import adam_amsgrad
        optimizer = adam_amsgrad.Adam([{'params': model.parameters(), 'lr': args.lr}
                               ], lr=args.lr, weight_decay=0.0001, amsgrad=True)
        return optimizer

    elif args.optimizer == 'adam_3':
        import adamw
        optimizer = adamw.Adam([{'params': model.parameters(), 'lr': args.lr}
                               ], lr=args.lr, weight_decay=0.0001)
        return optimizer

    else:
        raise NotImplementedError
예제 #3
0
def Adam(grads, lr=0.0002, b1=0.1, b2=0.001, e=1e-8):
    return adam.Adam(grads, lr, b1, b2, e)
예제 #4
0
        return action


# In[16]:

current_model = CnnDQN(env.observation_space.shape, env.action_space.n)
# current_model.load_state_dict(torch.load('current.ckpt'))
target_model = CnnDQN(env.observation_space.shape, env.action_space.n)
# target_model.load_state_dict(torch.load('target.ckpt'))

if USE_CUDA:
    current_model = current_model.cuda()
    target_model = target_model.cuda()

# optimizer = optim.Adam(current_model.parameters(), lr=0.00001)
optimizer = adam.Adam(current_model.parameters(), lr=0.00001)

replay_initial = 10000
replay_buffer = ReplayBuffer(100000)

update_target(current_model, target_model)

# In[17]:

epsilon_start = 1.0
epsilon_final = 0.01
epsilon_decay = 30000

epsilon_by_frame = lambda frame_idx: epsilon_final + (
    epsilon_start - epsilon_final) * math.exp(-1. * frame_idx / epsilon_decay)