Example #1
0
def run():
    options = parse_options()
    print(options)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    os.makedirs(options.data_dir, exist_ok=True)
    os.makedirs(options.output_dir, exist_ok=True)
    os.makedirs(options.model_dir, exist_ok=True)

    with open(os.path.join(options.output_dir, 'options.json'), 'w') as f:
        json.dump(vars(options), f, indent=4)

    if options.restore:
        generator = torch.load(os.path.join(options.model_dir, 'generator.pt'))
        critic = torch.load(os.path.join(options.model_dir, 'critic.pt'))
    else:
        generator = Generator(options.image_size, options.state_size)
        critic = Critic(options.image_size)

        generator.apply(init_weights)
        critic.apply(init_weights)
    generator = generator.to(device)
    critic = critic.to(device)

    transform = transforms.Compose([
        transforms.Resize((options.image_size, options.image_size)),
        transforms.CenterCrop(options.image_size),  #redundant?
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
    ])
    if options.dataset == 'lsun':
        training_class = options.image_class + '_train'
        dataset = datasets.LSUN(options.data_dir,
                                classes=[training_class],
                                transform=transform)
    else:
        dataset = datasets.ImageFolder(root=options.data_dir,
                                       transform=transform)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=options.batch_size,
                                             num_workers=4,
                                             shuffle=True,
                                             drop_last=True,
                                             pin_memory=True)

    train(generator, critic, dataloader, device, options)
Example #2
0
class AgentDDPG:
    """Deep Deterministic Policy Gradient implementation for continuous action space reinforcement learning tasks"""
    def __init__(self,
                 state_size,
                 hidden_size,
                 action_size,
                 actor_learning_rate=1e-4,
                 critic_learning_rate=1e-3,
                 gamma=0.99,
                 tau=1e-2,
                 use_cuda=False,
                 actor_path=None,
                 critic_path=None):
        # Params
        self.state_size, self.hidden_size, self.action_size = state_size, hidden_size, action_size
        self.gamma, self.tau = gamma, tau
        self.use_cuda = use_cuda

        # Networks
        self.actor = Actor(state_size, hidden_size, action_size)
        self.actor_target = Actor(state_size, hidden_size, action_size)

        self.critic = Critic(state_size + action_size, hidden_size,
                             action_size)
        self.critic_target = Critic(state_size + action_size, hidden_size,
                                    action_size)

        # Load model state_dicts from saved file
        if actor_path and path.exists(actor_path):
            self.actor.load_state_dict(torch.load(actor_path))

        if critic_path and path.exists(critic_path):
            self.critic.load_state_dict(torch.load(critic_path))

        # Hard copy params from original networks to target networks
        copy_params(self.actor, self.actor_target)
        copy_params(self.critic, self.critic_target)

        if self.use_cuda:
            self.actor.cuda()
            self.actor_target.cuda()
            self.critic.cuda()
            self.critic_target.cuda()

        # Create replay buffer for storing experience
        self.replay_buffer = ReplayBuffer(cache_size=int(1e6))

        # Training
        self.critic_criterion = nn.MSELoss()
        self.actor_optimizer = optim.Adam(self.actor.parameters(),
                                          lr=actor_learning_rate)
        self.critic_optimizer = optim.Adam(self.critic.parameters(),
                                           lr=critic_learning_rate)

    def save_to_file(self, actor_file, critic_file):
        # Save the state_dict's of the Actor and Critic networks
        torch.save(self.actor.state_dict(), actor_file)
        torch.save(self.critic.state_dict(), critic_file)

    def get_action(self, state):
        """Select action with respect to state according to current policy and exploration noise"""
        state = Variable(torch.from_numpy(state).float())

        if self.use_cuda:
            state = state.cuda()

        a = self.actor.forward(state)

        if self.use_cuda:
            return a.detach().cpu().numpy()

        return a.detach().numpy()

    def save_experience(self, state_t, action_t, reward_t, state_t1):
        self.replay_buffer.add_sample(state_t, action_t, reward_t, state_t1)

    def update(self, batch_size):
        states, actions, rewards, next_states = self.replay_buffer.get_samples(
            batch_size)
        states = torch.FloatTensor(states)
        actions = torch.FloatTensor(actions)
        rewards = torch.FloatTensor(rewards)
        next_states = torch.FloatTensor(next_states)

        if self.use_cuda:
            states = states.cuda()
            next_states = next_states.cuda()
            actions = actions.cuda()
            rewards = rewards.cuda()

        # Critic loss
        Qvals = self.critic.forward(states, actions)
        next_actions = self.actor_target.forward(next_states)
        next_Q = self.critic_target.forward(next_states, next_actions.detach())
        Qprime = rewards + self.gamma * next_Q
        critic_loss = self.critic_criterion(Qvals, Qprime)

        # Update critic
        self.critic_optimizer.zero_grad()
        critic_loss.backward()
        self.critic_optimizer.step()

        # Actor loss
        policy_loss = -self.critic.forward(states,
                                           self.actor.forward(states)).mean()

        # Update actor
        self.actor_optimizer.zero_grad()
        policy_loss.backward()
        self.actor_optimizer.step()

        # update target networks
        soft_copy_params(self.actor, self.actor_target, self.tau)
        soft_copy_params(self.critic, self.critic_target, self.tau)

    def add_noise_to_weights(self, amount=0.1):
        self.actor.apply(
            lambda x: _add_noise_to_weights(x, amount, self.use_cuda))
        self.critic.apply(
            lambda x: _add_noise_to_weights(x, amount, self.use_cuda))
        self.actor_target.apply(
            lambda x: _add_noise_to_weights(x, amount, self.use_cuda))
        self.critic_target.apply(
            lambda x: _add_noise_to_weights(x, amount, self.use_cuda))
transform = transforms.Compose([
    transforms.Resize(64),
    transforms.ToTensor(),
    transforms.CenterCrop(64),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

dataset = datasets.ImageFolder(root=data_path, transform=transform)
loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

critic = Critic(n_channels, width_shape).to(device)
generator = Generator(n_dimension, width_shape, n_channels).to(device)

generator = generator.apply(weights_init)
critic = critic.apply(weights_init)

opt_critic = optim.Adam(critic.parameters(),
                        lr=learning_rate,
                        betas=(0.0, 0.9))
opt_gen = optim.Adam(generator.parameters(),
                     lr=learning_rate,
                     betas=(0.0, 0.9))

fixed_sample = torch.randn(batch_size, n_dimension, 1, 1).to(device)

if Load == "True":
    print("Load Weights...")
    critic.load_state_dict(torch.load("critic_weights.pt"))
    generator.load_state_dict(torch.load("gen_weights.pt"))
Example #4
0
'''Initialize weights:
   Here, we want to initialize the weights to the normal distribution
   with mean 0 and standard deviation 0.02
'''


def initialize_weights(m):
    if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
        nn.init.normal_(m.weight, 0.0, 0.2)
    if isinstance(m, nn.BatchNorm2d):
        nn.init.normal_(m.weight, 0.0, 0.2)
        nn.init.normal_(m.bias, 0)


gen = gen.apply(initialize_weights)
crit = crit.apply(initialize_weights)

######################### Train WGAN-GP ###############################
""" 
    Finally, we can train the WGAN-GP model! For each epoch, we will process the entire dataset in batches. 
    For every batch, we will update the discriminator and generator.

"""

####### Note #######
"""  WGAN-GP isn't necessarily meant to improve overall performance of a GAN, 
      but just **increases stability** and **avoids mode collapse**. 
      In general, a WGAN will be able to train in a much more stable way than the vanilla DCGAN, 
      though it will generally run a bit slower. Also, we  should train WGAN model for more epochs without it collapsing.
"""