Example #1
0
 def __init__(self, envs, args):
     self.envs = envs
     self.args = args
     # start to build the network.
     if self.args.env_type == 'atari':
         self.net = cnn_net(envs.action_space.n)
     elif self.args.env_type == 'mujoco':
         self.net = mlp_net(envs.observation_space.shape[0],
                            envs.action_space.shape[0], self.args.dist)
         # define our dpp network
         self.intrinsic_net = mlp_dpp_net(envs.observation_space.shape[0])
     self.old_net = copy.deepcopy(self.net)
     # if use the cuda...
     if self.args.cuda:
         self.net.cuda()
         self.intrinsic_net.cuda()
         self.old_net.cuda()
     # define the optimizer...
     self.optimizer = optim.Adam(self.net.parameters(),
                                 self.args.lr,
                                 eps=self.args.eps)
     self.intrinsic_optimizer = optim.Adam(self.intrinsic_net.parameters(),
                                           self.args.lr_in,
                                           eps=self.args.eps)
     # running filter...
     if self.args.env_type == 'mujoco':
         num_states = self.envs.observation_space.shape[0]
         self.running_state = ZFilter((num_states, ), clip=5)
     # check saving folder..
     if not os.path.exists(self.args.save_dir):
         os.mkdir(self.args.save_dir)
     # env folder..
     self.model_path = os.path.join(self.args.save_dir, self.args.env_name)
     if not os.path.exists(self.model_path):
         os.mkdir(self.model_path)
     # get the observation
     self.batch_ob_shape = (self.args.num_workers * self.args.nsteps,
                            ) + self.envs.observation_space.shape
     self.obs = np.zeros(
         (self.args.num_workers, ) + self.envs.observation_space.shape,
         dtype=self.envs.observation_space.dtype.name)
     if self.args.env_type == 'mujoco':
         self.obs[:] = np.expand_dims(self.running_state(self.envs.reset()),
                                      0)
     else:
         self.obs[:] = self.envs.reset()
     self.dones = [False for _ in range(self.args.num_workers)]
     # init the state
     self.state_optims = None
     # start to create the folder to save the log_data
     if not os.path.exists(self.args.log_data_dir):
         os.mkdir(self.args.log_data_dir)
     self.intrinsic_data_path = '{}/reward_delay_{}'.format(
         self.args.log_data_dir, self.args.reward_delay_freq)
     if not os.path.exists(self.intrinsic_data_path):
         os.mkdir(self.intrinsic_data_path)
     self.intrinsic_data_path = '{}/seed_{}'.format(
         self.intrinsic_data_path, self.args.seed)
     if not os.path.exists(self.intrinsic_data_path):
         os.mkdir(self.intrinsic_data_path)
Example #2
0

if __name__ == '__main__':
    # get the arguments
    args = get_args()
    # create the environment
    if args.env_type == 'atari':
        env = make_atari(args.env_name)
        env = wrap_deepmind(env, frame_stack=True)
    elif args.env_type == 'mujoco':
        env = gym.make(args.env_name)
    # get the model path
    model_path = args.save_dir + args.env_name + '/model.pt'
    # create the network
    if args.env_type == 'atari':
        network = cnn_net(env.action_space.n)
        network.load_state_dict(
            torch.load(model_path, map_location=lambda storage, loc: storage))
        filters = None
    elif args.env_type == 'mujoco':
        network = mlp_net(env.observation_space.shape[0],
                          env.action_space.shape[0], args.dist)
        net_models, filters = torch.load(
            model_path, map_location=lambda storage, loc: storage)
        # load models
        network.load_state_dict(net_models)
    # start to play the demo
    obs = env.reset()
    reward_total = 0
    # just one episode
    while True: