예제 #1
0
파일: trainer.py 프로젝트: onewarmheart/HER
    def __init__(self, args, state_dim, action_dim, action_lim, ram):
        """
		:param state_dim: Dimensions of state (int)
		:param action_dim: Dimension of action (int)
		:param action_lim: Used to limit action in [-action_lim,action_lim]
		:param ram: replay memory buffer object
		:return:
		"""
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.action_lim = action_lim
        self.ram = ram
        self.iter = 0
        self.noise = utils.OrnsteinUhlenbeckActionNoise(self.action_dim)
        self.args = args

        self.actor = model.Actor(self.state_dim, self.action_dim,
                                 self.action_lim)
        self.target_actor = model.Actor(self.state_dim, self.action_dim,
                                        self.action_lim)
        self.actor_optimizer = torch.optim.Adam(self.actor.parameters(),
                                                self.args.learning_rate)

        self.critic = model.Critic(self.state_dim, self.action_dim)
        self.target_critic = model.Critic(self.state_dim, self.action_dim)
        self.critic_optimizer = torch.optim.Adam(self.critic.parameters(),
                                                 self.args.learning_rate)

        utils.hard_update(self.target_actor, self.actor)
        utils.hard_update(self.target_critic, self.critic)
    def __init__(self, hp):
        """Initialize an Agent object.
        
        Params
        ======
            hp: hyper parameters
        """
        self.hp = hp

        # Actor Network (w/ Target Network)
        self.actor_local = model.Actor(self.hp.state_size, self.hp.action_size,
                                       self.hp.random_seed).to(device)
        self.actor_target = model.Actor(self.hp.state_size,
                                        self.hp.action_size,
                                        self.hp.random_seed).to(device)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=self.hp.lr_actor)

        # Critic Network (w/ Target Network)
        self.critic_local = model.Critic(self.hp.state_size,
                                         self.hp.action_size,
                                         self.hp.random_seed).to(device)
        self.critic_target = model.Critic(self.hp.state_size,
                                          self.hp.action_size,
                                          self.hp.random_seed).to(device)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=self.hp.lr_critic,
                                           weight_decay=self.hp.weight_decay)
        self.soft_update(self.critic_local, self.critic_target, 1)
        self.soft_update(self.actor_local, self.actor_target, 1)

        # Noise process
        self.noise = ounoise.OUNoise(self.hp.action_size, self.hp.random_seed)
    def __init__(self, state_dim, action_dim, action_lim, ram, device='cpu'):
        """
        :param state_dim: Dimensions of state (int)
        :param action_dim: Dimension of action (int)
        :param action_lim: Used to limit action in [-action_lim,action_lim]
        :param ram: replay memory buffer object
        :return:
        """
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.action_lim = action_lim
        self.ram = ram
        self.iter = 0
        self.device = device
        # self.noise = utils.OrnsteinUhlenbeckActionNoise(self.action_dim)

        self.actor = model.Actor(self.state_dim, self.action_dim,
                                 self.action_lim).to(device)
        self.target_actor = model.Actor(self.state_dim, self.action_dim,
                                        self.action_lim).to(device)
        self.actor_optimizer = torch.optim.Adam(self.actor.parameters(),
                                                LEARNING_RATE)

        self.critic = model.Critic(self.state_dim, self.action_dim).to(device)
        self.target_critic = model.Critic(self.state_dim,
                                          self.action_dim).to(device)
        self.critic_optimizer = torch.optim.Adam(self.critic.parameters(),
                                                 LEARNING_RATE)

        utils.hard_update(self.target_actor, self.actor)
        utils.hard_update(self.target_critic, self.critic)
예제 #4
0
    def __init__(self, config, state_size, action_size, num_agents, seed):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
        """

        self.config = config

        self.state_size = state_size
        self.action_size = action_size
        self.num_agents = num_agents
        self.seed = random.seed(seed)

        # Initialize the Actor and Critic Networks
        self.actor = model.Actor(state_size, action_size,
                                 seed).to(self.config.device)
        self.actor_target = model.Actor(state_size, action_size,
                                        seed).to(self.config.device)
        self.actor_optimizer = torch.optim.Adam(self.actor.parameters(),
                                                self.config.LR_actor)

        self.critic = model.Critic(state_size, action_size,
                                   seed).to(self.config.device)
        self.critic_target = model.Critic(state_size, action_size,
                                          seed).to(self.config.device)
        self.critic_optimizer = torch.optim.Adam(
            self.critic.parameters(),
            self.config.LR_critic,
            weight_decay=self.config.weight_decay)

        # Initialize the random-noise-process for action-noise
        self.is_training = True
        self.randomer = OUNoise((self.num_agents, self.action_size), seed)

        # Hard update the target networks to have the same parameters as the local networks
        for target_param, param in zip(self.actor_target.parameters(),
                                       self.actor.parameters()):
            target_param.data.copy_(param.data)
        for target_param, param in zip(self.critic_target.parameters(),
                                       self.critic.parameters()):
            target_param.data.copy_(param.data)

        # Initialize replay-buffer
        self.memory = ReplayBuffer(self.config.BUFFER_SIZE,
                                   self.config.BATCH_SIZE, seed,
                                   self.config.device)
예제 #5
0
    def __init__(self,
                 state_size,
                 action_size,
                 random_seed,
                 num_envs=1,
                 checkpt_folder="checkpt"):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            random_seed (int): random seed
        """
        self.state_size = state_size
        self.num_envs = num_envs
        self.action_size = action_size
        self.seed = random.seed(random_seed)
        self.CHECKPOINT_FOLDER = checkpt_folder

        # Actor Network (w/ Target Network)
        self.actor_local = model.Actor(state_size, action_size,
                                       random_seed).to(DEVICE)
        self.actor_target = model.Actor(state_size, action_size,
                                        random_seed).to(DEVICE)
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(),
                                          lr=LR_ACTOR)

        # Critic Network (w/ Target Network)
        self.critic_local = model.Critic(state_size, action_size,
                                         random_seed).to(DEVICE)
        self.critic_target = model.Critic(state_size, action_size,
                                          random_seed).to(DEVICE)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(),
                                           lr=LR_CRITIC,
                                           weight_decay=WEIGHT_DECAY)
        '''if os.path.isfile(self.CHECKPOINT_FOLDER + 'checkpoint_actor.pth') and os.path.isfile(self.CHECKPOINT_FOLDER + 'checkpoint_critic.pth'):
            self.actor_local.load_state_dict(torch.load(self.CHECKPOINT_FOLDER + 'checkpoint_actor.pth'))
            self.actor_target.load_state_dict(torch.load(self.CHECKPOINT_FOLDER + 'checkpoint_actor.pth'))

            self.critic_local.load_state_dict(torch.load(self.CHECKPOINT_FOLDER + 'checkpoint_critic.pth'))
            self.critic_target.load_state_dict(torch.load(self.CHECKPOINT_FOLDER + 'checkpoint_critic.pth'))'''

        # Noise process
        self.noise = OUNoise((num_envs, action_size), random_seed)

        # Replay memory
        self.memory = ReplayBuffer(DEVICE, action_size, BUFFER_SIZE,
                                   BATCH_SIZE, random_seed)
예제 #6
0
    def __init__(self, env, results_path, tok, episode_len=20):
        super(Seq2SeqAgent, self).__init__(env, results_path)
        self.tok = tok
        self.episode_len = episode_len
        self.feature_size = self.env.feature_size

        # Models
        self.glove_dim = 300
        with open('img_features/objects/object_vocab.txt', 'r') as f_ov:
            self.obj_vocab = [k.strip() for k in f_ov.readlines()]
        glove_matrix = get_glove_matrix(self.obj_vocab, self.glove_dim)
        self.objencoder = ObjEncoder(glove_matrix.size(0), glove_matrix.size(1), glove_matrix).cuda()

        enc_hidden_size = args.rnn_dim//2 if args.bidir else args.rnn_dim

        self.encoder = model.EncoderLSTM(tok.vocab_size(), args.wemb, enc_hidden_size, padding_idx,
                                         args.dropout, bidirectional=args.bidir).cuda()
        self.decoder = model.AttnDecoderLSTM_Graph(args.aemb, args.rnn_dim, args.dropout, feature_size=self.feature_size + args.angle_feat_size).cuda()
        self.critic = model.Critic().cuda()
        self.models = (self.encoder, self.decoder, self.critic)

        # Optimizers
        self.encoder_optimizer = args.optimizer(self.encoder.parameters(), lr=args.lr)
        self.decoder_optimizer = args.optimizer(self.decoder.parameters(), lr=args.lr)
        self.critic_optimizer = args.optimizer(self.critic.parameters(), lr=args.lr)
        self.optimizers = (self.encoder_optimizer, self.decoder_optimizer, self.critic_optimizer)

        # Evaluations
        self.losses = []
        self.criterion = nn.CrossEntropyLoss(ignore_index=args.ignoreid, size_average=False)

        # Logs
        sys.stdout.flush()
        self.logs = defaultdict(list)
예제 #7
0
    def __init__(self, env, results_path, tok, episode_len=20):
        super(Seq2SeqAgent, self).__init__(env, results_path)
        self.tok = tok
        self.episode_len = episode_len
        self.feature_size = self.env.feature_size

        # Models
        enc_hidden_size = args.rnn_dim//2 if args.bidir else args.rnn_dim
        self.encoder = model.EncoderLSTM(tok.vocab_size(), args.wemb, enc_hidden_size, padding_idx,
                                         args.dropout, bidirectional=args.bidir).cuda()
        self.decoder = model.AttnDecoderLSTM(args.aemb, args.rnn_dim, args.dropout, feature_size=self.feature_size + args.angle_feat_size).cuda()
        self.critic = model.Critic().cuda()
        self.models = (self.encoder, self.decoder, self.critic)

        # Optimizers
        self.encoder_optimizer = args.optimizer(self.encoder.parameters(), lr=args.lr)
        self.decoder_optimizer = args.optimizer(self.decoder.parameters(), lr=args.lr)
        self.critic_optimizer = args.optimizer(self.critic.parameters(), lr=args.lr)
        self.optimizers = (self.encoder_optimizer, self.decoder_optimizer, self.critic_optimizer)

        # Evaluations
        self.losses = []
        self.criterion = nn.CrossEntropyLoss(ignore_index=args.ignoreid, size_average=False)

        # Logs
        sys.stdout.flush()
        self.logs = defaultdict(list)
예제 #8
0
    def __init__(self, state_dim, action_dim, action_lim, ram):
        """Special method for object initialisation.

		:param state_dim: Dimensions of state.
		:type state_dim: int.
		:param action_dim: Dimension of action.
		:type action_dim: int.
		:param action_lim: Used to limit action in [-action_lim, action_lim].
		:type action_lim: float.
		:param ram: replay memory buffer object.
		:type ram: buffer.
		"""

        # Set the parameters.
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.action_lim = action_lim
        self.ram = ram
        self.iter = 0

        # Set the noise function.
        self.noise = utils.OrnsteinUhlenbeckActionNoise(self.action_dim)

        # Set the actor.
        self.actor = model.Actor(self.state_dim, self.action_dim,
                                 self.action_lim)
        self.target_actor = model.Actor(self.state_dim, self.action_dim,
                                        self.action_lim)
        self.actor_optimizer = torch.optim.Adam(self.actor.parameters(),
                                                LEARNING_RATE)

        # Set the critic.
        self.critic = model.Critic(self.state_dim, self.action_dim)
        self.target_critic = model.Critic(self.state_dim, self.action_dim)
        self.critic_optimizer = torch.optim.Adam(self.critic.parameters(),
                                                 LEARNING_RATE)

        # Update the actor and critic networks
        self.hard_update(self.target_actor, self.actor)
        self.hard_update(self.target_critic, self.critic)

        return
예제 #9
0
def c_graph(sess, phc):
    Critic = model.Critic()

    Y_value = Critic.build(phc['states_c'], phc['is_training_c'])
    loss_op = tf.reduce_mean(tf.square(Y_value - phc['values_c']))
    reg_loss = tf.reduce_sum(Critic.reg_loss)
    loss_op += reg_loss
    # update_op = tf.train.MomentumOptimizer(LR, MOMENTUM).minimize(loss_op, var_list=Critic.vars)
    update_op = tf.train.AdamOptimizer(1e-3).minimize(loss_op,
                                                      var_list=Critic.vars)

    return loss_op, Y_value, update_op, Critic.vars
예제 #10
0
    def __init__(self, state_dim, action_dim, ram):
        """
		Initialize actor and critic networks
		"""
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.ram = ram
        self.iter = 0
        self.noise = utils.OrnsteinUhlenbeckActionNoise(self.action_dim)

        self.actor = model.Actor(self.state_dim, self.action_dim)
        self.target_actor = model.Actor(self.state_dim, self.action_dim)
        self.actor_optimizer = torch.optim.Adam(self.actor.parameters(),
                                                LEARNING_RATE)

        self.critic = model.Critic(self.state_dim, self.action_dim)
        self.target_critic = model.Critic(self.state_dim, self.action_dim)
        self.critic_optimizer = torch.optim.Adam(self.critic.parameters(),
                                                 LEARNING_RATE)

        # copy parameters to target networks
        utils.hard_update(self.target_actor, self.actor)
        utils.hard_update(self.target_critic, self.critic)
예제 #11
0
파일: iris_RL.py 프로젝트: zbenic/mobrob
    def __init__(self, env, state_vector_size, action_num, action_limit, ram):
        """
        :param env: Gym environment
        :param state_dim: Dimensions of state (int)
        :param action_dim: Dimension of action (int)
        :param action_lim: Used to limit action in [-action_lim,action_lim]
        :param ram: replay memory buffer object
        :return:
        """
        self.env = env
        self.state_dim = state_vector_size
        self.action_dim = action_num
        self.action_lim = action_limit
        self.ram = ram
        self.iter = 0
        self.noise = OrnsteinUhlenbeckActionNoise(self.action_dim)

        self.actor = model.Actor(self.state_dim, self.action_dim,
                                 self.action_lim)
        self.target_actor = model.Actor(self.state_dim, self.action_dim,
                                        self.action_lim)
        self.actor_optimizer = torch.optim.Adam(self.actor.parameters(),
                                                LEARNING_RATE)

        self.critic = model.Critic(self.state_dim, self.action_dim)
        self.target_critic = model.Critic(self.state_dim, self.action_dim)
        self.critic_optimizer = torch.optim.Adam(self.critic.parameters(),
                                                 LEARNING_RATE)

        utils.hard_update(self.target_actor, self.actor)
        utils.hard_update(self.target_critic, self.critic)

        self.state_vector_size = state_vector_size
        self.action_num = action_num
        self.action_limit = action_limit
        self.controller = DQNAgent(env, state_vector_size, action_num,
                                   action_limit)
예제 #12
0
    def __init__(self, state_dim, action_dim, ram, LR_actor, LR_critic, gamma,
                 tau, batchsize, expl_rate, version):
        """
		:param state_dim: Dimensions of state (int)
		:param action_dim: Dimension of action (int)
		:param action_lim: Used to limit action in [-action_lim,action_lim]
		:param ram: replay memory buffer object
		:return:
		"""
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.LR_actor = LR_actor
        self.LR_critic = LR_critic
        self.gamma = gamma
        self.tau = tau
        self.ram = ram
        self.batchsize = batchsize
        self.iter = 0
        self.noise = utils.OrnsteinUhlenbeckActionNoise(
            self.action_dim, 0, 0.15, expl_rate)
        self.action_lim = 1.0

        self.actor = model.Actor(self.state_dim, self.action_dim,
                                 self.action_lim)
        self.target_actor = model.Actor(self.state_dim, self.action_dim,
                                        self.action_lim)
        self.actor_optimizer = torch.optim.Adam(self.actor.parameters(),
                                                self.LR_actor)

        self.critic = model.Critic(self.state_dim, self.action_dim)
        self.target_critic = model.Critic(self.state_dim, self.action_dim)
        self.critic_optimizer = torch.optim.Adam(self.critic.parameters(),
                                                 self.LR_critic)

        utils.hard_update(self.target_actor, self.actor)
        utils.hard_update(self.target_critic, self.critic)
예제 #13
0
    def __init__(self, env, results_path, tok, episode_len=20):
        super(Seq2PolicyAgent, self).__init__(env, results_path)
        self._iter = 0
        self.tok = tok
        self.episode_len = episode_len
        self.feature_size = self.env.feature_size

        # Models
        enc_hidden_size = args.rnn_dim//2 if args.bidir else args.rnn_dim
        self.encoder = model.EncoderLSTM(tok.vocab_size(), args.wemb, enc_hidden_size, padding_idx,
                                         args.dropout, bidirectional=args.bidir).cuda()
        if args.original_decoder:
            self.decoder = model.AttnDecoderLSTM(args.aemb, args.rnn_dim, args.dropout, feature_size=self.feature_size + args.angle_feat_size).cuda()
        else:
            self.decoder = model.AttnPolicyLSTM(args.aemb, args.rnn_dim, args.dropout, feature_size=self.feature_size + args.angle_feat_size, latent_dim=args.vae_latent_dim).cuda()
        if args.fix_vae:
            print("fix the parameters in sub policy")
            for param in self.decoder.policy.parameters():
                param.requires_grad = False

        self.critic = model.Critic().cuda()
        self.models = (self.encoder, self.decoder, self.critic)

        # Optimizers
        self.encoder_optimizer = args.optimizer(self.encoder.parameters(), lr=args.lr)
        self.decoder_optimizer = args.optimizer(self.decoder.parameters(), lr=args.lr)
        self.critic_optimizer = args.optimizer(self.critic.parameters(), lr=args.lr)
        self.optimizers = (self.encoder_optimizer, self.decoder_optimizer, self.critic_optimizer)

        # Evaluations
        self.losses = []
        self.criterion = nn.CrossEntropyLoss(ignore_index=args.ignoreid, size_average=False)

        # Logs
        sys.stdout.flush()
        self.logs = defaultdict(list)
예제 #14
0
    return scores, mean_scores_window


# In[19]:

config = Config(seed=6)

config.num_agents = len(env_info.agents)
config.state_size = state_size
config.action_size = action_size

config.actor_fn = lambda: model.Actor(config.state_size, config.action_size,
                                      128, 128)
config.actor_opt_fn = lambda params: optim.Adam(params, lr=1e-3)

config.critic_fn = lambda: model.Critic(config.state_size, config.action_size,
                                        1, 128, 128)
config.critic_opt_fn = lambda params: optim.Adam(params, lr=2e-3)

config.replay_fn = lambda: Replay(
    config.action_size, buffer_size=int(1e6), batch_size=128)
config.noise_fn = lambda: OUNoise(
    config.action_size, mu=0., theta=0.15, sigma=0.1, seed=config.seed)

config.discount = 0.99
config.target_mix = 3e-3

config.max_episodes = 3000
config.max_steps = int(1e6)
config.goal_score = 1

config.CHECKPOINT_FOLDER = "MultiAgentCheckPt"
예제 #15
0
torch.manual_seed(args.seed)

train_loader = dataloader.train_loader('mnist', args.data_directory,
                                       args.batch_size)

input_size, hidden_size, latent_size, k, l = args.parameters

if args.load_model != '000000000000':
    critic = torch.load(args.log_directory + '/' + args.load_model +
                        '/critic.pt')
    generator = torch.load(args.log_directory + '/' + args.load_model +
                           '/generator.pt')
    args.time_stamp = args.load_model
else:
    critic = model.Critic()
    generator = model.Generator()
    critic = critic.to(args.device)
    generator = generator.to(args.device)

writer = SummaryWriter(args.log_directory + '/' + args.time_stamp + '/')

critic_optimizer = optim.Adam(critic.parameters(), lr=args.lr, betas=(0, 0.9))
generator_optimizer = optim.Adam(generator.parameters(),
                                 lr=args.lr,
                                 betas=(0, 0.9))


def train(epoch):
    critic.train()
    generator.train()
예제 #16
0
                              batch_size=args.batch_size,
                              is_cuda=use_cuda,
                              evaluation=True)

# ##############################################################################
# Build model
# ##############################################################################
import model
from const import PAD
from optim import Optim

encode = model.Encode(use_cuda)
actor = model.Actor(args.vocab_size, args.dec_hsz, args.rnn_layers,
                    args.batch_size, args.max_len, args.dropout, use_cuda)

critic = model.Critic(args.vocab_size, args.dec_hsz, args.rnn_layers,
                      args.batch_size, args.max_len, args.dropout, use_cuda)

optim_pre_A = Optim(actor.parameters(), args.pre_lr, True)
optim_pre_C = Optim(critic.parameters(), args.pre_lr, True)

optim_A = Optim(actor.parameters(), args.lr, False, args.new_lr)
optim_C = Optim(critic.parameters(), args.lr, False, args.new_lr)

criterion_A = torch.nn.CrossEntropyLoss(ignore_index=PAD)
criterion_C = torch.nn.MSELoss()

if use_cuda:
    actor = actor.cuda()
    critic = critic.cuda()

# ##############################################################################
예제 #17
0
    def update_target(self, source, target):
        new_target_param = parameters_to_vector(source.parameters()) * self.tau + \
          (1 - self.tau) * parameters_to_vector(target.parameters())
        vector_to_parameters(new_target_param, target.parameters())
        return target


if __name__ == '__main__':
    env = gym.make("CartPole-v0")

    global state_size, action_size
    state_size = int(np.product(env.observation_space.shape))
    action_size = int(env.action_space.n)
    num_episode = 800
    critic = model.Critic(state_size, action_size)
    actor = model.Actor(state_size, action_size)

    # actor.eval()
    # critic.eval()

    # target network
    target_critic = deepcopy(critic)
    target_actor = deepcopy(actor)

    ddpg = DDPG(env,
                actor,
                critic,
                target_actor,
                target_critic,
                num_episode,
예제 #18
0
파일: train.py 프로젝트: Savio666/anime
# Function to keep track of gradients for visualization purposes
def make_grad_hook():
    grads = []

    def grad_hook(m):
        if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
            grads.append(m.weight.grad)

    return grads, grad_hook


if __name__ == "__main__":
    # specify the parameters
    gen = model.Generator(config.z_dim).to(config.device)
    crit = model.Critic().to(config.device)
    batch_size = config.batch_size
    gen_opt = torch.optim.Adam(gen.parameters(),
                               lr=config.lr,
                               betas=(config.beta_1, config.beta_2))
    crit_opt = torch.optim.Adam(crit.parameters(),
                                lr=config.lr,
                                betas=(config.beta_1, config.beta_2))
    gen = gen.apply(weights_init)
    crit = crit.apply(weights_init)
    mean_generator_loss = 0
    cur_step = 0
    generator_losses = []
    critic_losses = []
    # training the model
    for e in range(config.n_epochs):
예제 #19
0
def main(management, hps):
    # Setup Experiment
    task = hps.task
    config_logger(management.log_file, saving=management.save_logs)
    logger = logging.getLogger('Exp')
    train_logger = logging.getLogger('Exp.Train')
    eval_logger = logging.getLogger('Exp.Eval')
    try:
        state = State(hps.seed, management)
    except SingletonError:
        State.instance = None
        state = State(hps.seed, management)

    logger.info(
        f"Initializing experiment `{management.exp_name}` with hyperparameters:\n%s",
        repr(hps))
    stats = accumulator()

    # Setup Data
    if task == 'dirac':

        def train_data():
            dirac = State().convert(torch.Tensor(1, 1).fill_(hps.dirac_target))
            while True:
                yield dirac

        train_iter = train_data()
    else:
        dataset_cfg = dict(
            type=task,
            root=management.data_path,
            download=True,
            preload_to_gpu=management.preload_to_gpu,
            num_threads=management.num_workers,
            batch_size=hps.batch_size,
        )
        train_data = Dataset(**dataset_cfg, mode='train')
        eval_data = Dataset(**dataset_cfg, mode='test')
        train_iter = train_data.sampler(infinite=True, project=0)

    # Setup Generator
    if task == 'dirac':
        generator = dirac.DiracGenerator()
        stats.g_params.append(generator.param.clone().detach().cpu())
    else:
        generator = model.Generator(dimz=hps.generator_dimz,
                                    dimh=hps.generator_dimh,
                                    default_batch_size=hps.batch_size)
    test_generator = generator
    if hps.generator_alpha_ema is not None:
        test_generator = deepcopy(generator)
        test_generator.to(device=State().device)
        test_generator.train()
    generator.to(device=State().device)
    generator.train()
    generator_optim = optim.init_optimizer(generator.parameters(),
                                           type=hps.optimizer,
                                           lr=hps.generator_lr,
                                           betas=hps.generator_betas,
                                           wd=hps.generator_wd)
    logger.info("Generator:\n%s", generator)

    # Setup Critic
    if task == 'dirac':
        critic = dirac.DiracCritic()
        stats.c_params.append(critic.param.clone().detach().cpu())
    else:
        critic = model.Critic(dimh=hps.critic_dimh, sn=hps.critic_use_sn)
    critic.to(device=State().device)
    critic.train()
    critic_optim = optim.init_optimizer(critic.parameters(),
                                        type=hps.optimizer,
                                        lr=hps.critic_lr,
                                        betas=hps.critic_betas,
                                        wd=hps.critic_wd)
    logger.info("Critic:\n%s", critic)

    # Train
    step = 0
    train_loss_meter = running_average_meter()
    train_step = gan.make_train_step(hps.loss_type,
                                     critic_inner_iters=hps.critic_inner_iters,
                                     reg_type=hps.critic_reg_type,
                                     reg_cf=hps.critic_reg_cf,
                                     alpha_ema=hps.generator_alpha_ema)
    if task != 'dirac':
        eval_step = gan.make_eval_step(os.path.join(
            management.exp_path, task + '_inception_stats.npz'),
                                       eval_data.sampler(infinite=False,
                                                         project=0),
                                       hps.generator_dimz,
                                       persisting_Z=100,
                                       device=State().device)

    logger.info("Training")
    while True:
        if step >= hps.max_iters: break
        step += 1
        train_loss = train_step(train_iter, critic, critic_optim, generator,
                                test_generator, generator_optim)
        train_loss_meter.update(train_loss.clone().detach())

        if step % management.log_every == 0 and task != 'dirac':
            train_logger.info("step %d | loss(%s) %.3f (%.3f)", step,
                              hps.loss_type, train_loss_meter.avg.item(),
                              train_loss_meter.val.item())
        if task == 'dirac':
            stats.g_params.append(test_generator.param.clone().detach().cpu())
            stats.c_params.append(critic.param.clone().detach().cpu())

        if step % management.eval_every == 0 and task != 'dirac':
            eval_iter = eval_data.sampler(infinite=False, project=0)
            samples, results = eval_step(eval_iter, critic, test_generator)
            if management.viz:
                from IPython.display import clear_output, display, update_display
                grid_img = torchvision.utils.make_grid(samples,
                                                       nrow=10,
                                                       normalize=True,
                                                       value_range=(-1., 1.),
                                                       padding=0)
                plt.imshow(grid_img.permute(1, 2, 0).cpu())
                display(plt.gcf())
            eval_logger.info(
                "step %d | " +
                ' | '.join([f'{k} {v:.3f}' for k, v in results.items()]), step)
            torchvision.utils.save_image(samples.cpu(),
                                         os.path.join(management.log_path,
                                                      f'samples-{step}.png'),
                                         nrow=10,
                                         normalize=True,
                                         value_range=(-1., 1.),
                                         padding=0)

    logger.info("Final Evaluation")
    if task == 'dirac':
        g_params = torch.stack(stats.g_params)
        c_params = torch.stack(stats.c_params)
        trajectory = torch.cat([c_params, g_params], dim=-1).numpy()
        logger.info(f"Final point in parameter space: {trajectory[-1]}")
        anima = dirac.animate(trajectory, hps)
        if management.viz:
            from IPython.display import HTML, display
            display(HTML(anima.to_html5_video()))
        anima.save(os.path.join(management.log_path, 'evolution.mp4'))
    else:
        eval_iter = eval_data.sampler(infinite=False, project=0)
        samples, results = eval_step(eval_iter, critic, test_generator)
        logger.info(
            "step %d | " +
            ' | '.join([f'{k} {v:.3f}' for k, v in results.items()]), step)
        torchvision.utils.save_image(samples,
                                     os.path.join(management.log_path,
                                                  f'samples-final.png'),
                                     nrow=10,
                                     normalize=True,
                                     value_range=(-1., 1.),
                                     padding=0)
예제 #20
0
if __name__ == '__main__':
    # set unity environment path (file_name)
    env = UnityEnvironment(file_name=config.env_name)
    # env = UnityEnvironment(file_name=config.env_name, worker_id=np.random.randint(100000))

    # setting brain for unity
    default_brain = env.brain_names[0]
    brain = env.brains[default_brain]

    train_mode = config.train_mode

    device = config.device

    actor = model.Actor(config.action_size, "main").to(device)
    target_actor = model.Actor(config.action_size, "target").to(device)
    critic = model.Critic(config.action_size, "main").to(device)
    target_critic = model.Critic(config.action_size, "target").to(device)

    optimizer_actor = optim.Adam(actor.parameters(), lr=config.actor_lr)
    optimizer_critic = optim.Adam(critic.parameters(), lr=config.critic_lr)

    algorithm = "_DDPG"
    agent = agent.DDPGAgent(actor, critic, target_actor, target_critic,
                            optimizer_actor, optimizer_critic, device,
                            algorithm)

    # Initialize target networks
    agent.hard_update_target()

    step = 0
    episode = 0