def __init__(self, batch_size=64, env_batch=1, max_step=40, \
                 tau=0.001, discount=0.9, rmsize=800, \
                 writer=None, resume=None, output_path=None):

        self.max_step = max_step
        self.env_batch = env_batch
        self.batch_size = batch_size        

        self.actor = ResNet(9, 18, 65)
        self.actor_target = ResNet(9, 18, 65)
        self.critic = ResNet_wobn(3 + 9, 18, 1)
        self.critic_target = ResNet_wobn(3 + 9, 18, 1) 

        self.actor_optim  = Adam(self.actor.parameters(), lr=1e-2)
        self.critic_optim  = Adam(self.critic.parameters(), lr=1e-2)

        if (resume != None):
            self.load_weights(resume)

        hard_update(self.actor_target, self.actor)
        hard_update(self.critic_target, self.critic)
        
        self.memory = rpm(rmsize * max_step)
        self.tau = tau
        self.discount = discount
        self.writer = writer
        self.log = 0
        
        self.state = [None] * self.env_batch
        self.action = [None] * self.env_batch
        self.choose_device()        
Ejemplo n.º 2
0
    def __init__(
        self,
        batch_size=64,
        env_batch=1,
        max_step=40,
        tau=0.001,
        discount=0.9,
        rmsize=800,
        writer=None,
        resume=None,
        output_path=None,
    ):

        self.max_step = max_step
        self.env_batch = env_batch
        self.batch_size = batch_size

        self.actor = ResNet(
            9, 18, (action_dim + 3) * n_frames_per_step
        )  # target, canvas, stepnum, coordconv 3 + 3 + 1 + 2
        self.actor_target = ResNet(9, 18, (action_dim + 3) * n_frames_per_step)
        self.critic = ResNet_wobn(
            3 + 9, 18, 1
        )  # add the last canvas for better prediction
        self.critic_target = ResNet_wobn(3 + 9, 18, 1)

        self.actor_optim = Adam(self.actor.parameters(), lr=1e-2)
        self.critic_optim = Adam(self.critic.parameters(), lr=1e-2)

        if resume != None:
            self.load_weights(resume)

        hard_update(self.actor_target, self.actor)
        hard_update(self.critic_target, self.critic)

        # Create replay buffer
        self.memory = rpm(rmsize * max_step)

        # Hyper-parameters
        self.tau = tau
        self.discount = discount

        # Tensorboard
        self.writer = writer
        self.log = 0
        
        self.state = [None] * self.env_batch # Most recent state
        self.action = [None] * self.env_batch # Most recent action
        self.choose_device()        
Ejemplo n.º 3
0
    def __init__(self, batch_size=64, env_batch=1, max_step=40, \
                 tau=0.001, discount=0.9, rmsize=800, \
                 writer=None, resume=None, output_path=None):

        self.max_step = max_step
        self.env_batch = env_batch
        self.batch_size = batch_size
        self.alpha = 0.001

        self.entropy_tuning = True
        if self.entropy_tuning:
            self.entropy_target = -torch.Tensor([65.]).to(device)
            self.log_alpha = torch.tensor([0.],
                                          requires_grad=True,
                                          device=device)
            self.alpha_optimizer = Adam([self.log_alpha], lr=1e-3)

        self.pi = ResGaussianActor(
            9, 18, 65)  # target, canvas, stepnum, coordconv 3 + 3 + 1 + 2
        self.q1 = ResNet_wobn(3 + 9, 18,
                              1)  # add the last canvas for better prediction
        self.q1_target = ResNet_wobn(3 + 9, 18, 1)
        self.q2 = ResNet_wobn(3 + 9, 18, 1)
        self.q2_target = ResNet_wobn(3 + 9, 18, 1)

        self.pi_optim = Adam(self.pi.parameters(), lr=1e-2)
        self.q1_optim = Adam(self.q1.parameters(), lr=1e-2)
        self.q2_optim = Adam(self.q2.parameters(), lr=1e-2)

        if (resume != None):
            self.load_weights(resume)

        hard_update(self.q1_target, self.q1)
        hard_update(self.q2_target, self.q2)

        # Create replay buffer
        self.memory = rpm(rmsize * max_step)

        # Hyper-parameters
        self.tau = tau
        self.discount = discount

        # Tensorboard
        self.writer = writer
        self.log = 0

        self.state = [None] * self.env_batch  # Most recent state
        self.action = [None] * self.env_batch  # Most recent action
        self.choose_device()
Ejemplo n.º 4
0
    def __init__(self, opt, writer=None):
        self.opt = opt
        self.max_step = opt.max_step
        self.env_batch = opt.env_batch
        self.batch_size = opt.batch_size
        self.loss_fcn = opt.loss_fcn
        self.use_multiple_renderers = opt.use_multiple_renderers

        state_size = 9
        if (opt.loss_fcn == 'cm'
                or opt.loss_fcn == 'cml1') and self.opt.built_in_cm:
            state_size = 10

        self.actor = ResNet(
            state_size, 18,
            13 * n_strokes)  # target, canvas, stepnum, coordconv 3 + 3 + 1 + 2
        self.actor_target = ResNet(state_size, 18, 13 * n_strokes)
        self.critic = ResNet_wobn(
            3 + state_size, 18, 1)  # add the last canvas for better prediction
        self.critic_target = ResNet_wobn(3 + state_size, 18, 1)

        if not opt.use_multiple_renderers:
            Decoder.load_state_dict(torch.load(opt.renderer))

        self.actor_optim = Adam(self.actor.parameters(), lr=1e-2)
        self.critic_optim = Adam(self.critic.parameters(), lr=1e-2)

        if (opt.resume != None):
            self.load_weights(opt.resume)

        hard_update(self.actor_target, self.actor)
        hard_update(self.critic_target, self.critic)

        # Create replay buffer
        self.memory = rpm(opt.rmsize * opt.max_step)

        # Hyper-parameters
        self.tau = opt.tau
        self.discount = opt.discount

        # Tensorboard
        self.writer = writer
        self.log = 0

        self.state = [None] * self.env_batch  # Most recent state
        self.action = [None] * self.env_batch  # Most recent action
        self.choose_device()
Ejemplo n.º 5
0
    def __init__(self, state_dim, merged_state_dim, act_dim, batch_size=64, env_batch=1, max_step=40, \
                 tau=0.001, discount=0.9, rmsize=800, \
                 writer=None, resume=None, output_path=None):

        self.state_dim = state_dim
        self.merged_state_dim = merged_state_dim
        self.act_dim = act_dim
        self.max_step = max_step
        self.env_batch = env_batch
        self.batch_size = batch_size

        # input channel (state_dim): canvas, gt, parameters, stepnum  3 + 3 + 2 + 1
        self.actor = ResNet(self.state_dim, 18, self.act_dim)
        self.actor_target = ResNet(self.state_dim, 18, self.act_dim)

        # input channel (merged_state_dim): canvas, parameters, stepnum, coordconv  3 + 2 + 1 + 2
        self.critic = ResNet_wobn(self.merged_state_dim, 18, 1, self.act_dim)
        self.critic_target = ResNet_wobn(self.merged_state_dim, 18, 1,
                                         self.act_dim)

        self.actor_optim = Adam(self.actor.parameters(), lr=1e-2)
        self.critic_optim = Adam(self.critic.parameters(), lr=1e-2)

        if (resume != None):
            self.load_weights(resume)

        hard_update(self.actor_target, self.actor)
        hard_update(self.critic_target, self.critic)

        # Create replay buffer
        self.memory = rpm(rmsize * max_step)

        # Hyper-parameters
        self.tau = tau
        self.discount = discount

        # Tensorboard
        self.writer = writer
        self.log = 0

        self.state = [None] * self.env_batch  # Most recent state
        self.action = [None] * self.env_batch  # Most recent action
        self.choose_device()
Ejemplo n.º 6
0
    def __init__(self, batch_size=64, env_batch=1, max_step=40, \
                 tau=0.001, discount=0.9, rmsize=800, \
                 writer=None, resume=None, output_path=None):

        self.max_step = max_step
        self.env_batch = env_batch
        self.batch_size = batch_size

        self.actor = ResNet(
            9, 18, 65)  # target, canvas, stepnum, coordconv 3 + 3 + 1 + 2
        self.actor_target = ResNet(9, 18, 65)
        self.critic = ResNet_wobn(9, 18, 1)
        self.critic_target = ResNet_wobn(9, 18, 1)

        self.actor_optim = Adam(self.actor.parameters(), lr=1e-2)
        self.critic_optim = Adam(self.critic.parameters(), lr=1e-2)

        if (resume != None):
            self.load_weights(resume)

        hard_update(self.actor_target, self.actor)
        hard_update(self.critic_target, self.critic)

        # Create replay buffer
        self.memory = rpm(rmsize * max_step)

        # Hyper-parameters
        self.tau = tau
        self.discount = discount

        # Tensorboard
        self.writer = writer
        self.log = 0

        self.state = [None] * self.env_batch  # Most recent state
        self.action = [None] * self.env_batch  # Most recent action
        self.cuda = torch.cuda.is_available()
        self.choose_device()