예제 #1
0
    def __init__(self, args, env, env_params):
        self.args = args

        # path to save the model
        self.exp_name = '_'.join((self.args.env_name, self.args.alg, 
                    str(self.args.seed), datetime.now().isoformat()))
        self.data_path = os.path.join(self.args.save_dir, 
                '_'.join((self.args.env_name, self.args.alg)),
                self.exp_name)
        self.logger = EpochLogger(output_dir=self.data_path, exp_name=self.exp_name)
        self.logger.save_config(args)

        self.env = env
        self.env_params = env_params
        # create the network
        self.actor_network = actor(env_params)
        self.critic_network = critic(env_params)
        # sync the networks across the cpus
        sync_networks(self.actor_network)
        sync_networks(self.critic_network)
        # build up the target network
        self.actor_target_network = actor(env_params)
        self.critic_target_network = critic(env_params)
        # load the weights into the target networks
        self.actor_target_network.load_state_dict(self.actor_network.state_dict())
        self.critic_target_network.load_state_dict(self.critic_network.state_dict())

        # if use gpu
        self.rank = MPI.COMM_WORLD.Get_rank()
        if args.cuda:
            device = 'cuda:{}'.format(self.rank % torch.cuda.device_count())
        else:
            device = 'cpu'
        self.device = torch.device(device)

        if self.args.cuda:
            self.actor_network.cuda(self.device)
            self.critic_network.cuda(self.device)
            self.actor_target_network.cuda(self.device)
            self.critic_target_network.cuda(self.device)
        # create the optimizer
        self.actor_optim = torch.optim.Adam(self.actor_network.parameters(), lr=self.args.lr_actor)
        self.critic_optim = torch.optim.Adam(self.critic_network.parameters(), lr=self.args.lr_critic)
        # her sampler
        self.her_module = her_sampler(self.args.replay_strategy, self.args.replay_k, self.env.compute_reward)
        # create the replay buffer
        self.buffer = replay_buffer(self.env_params, self.args.buffer_size, self.her_module.sample_her_transitions)
        # create the normalizer
        self.o_norm = normalizer(size=env_params['obs'], default_clip_range=self.args.clip_range)
        self.g_norm = normalizer(size=env_params['goal'], default_clip_range=self.args.clip_range)

        self.logger.setup_pytorch_saver(self.actor_network)
    def __init__(self, args, env, env_params):
        self.args = args
        self.env = env
        self.env_params = env_params
        # create the network
        self.actor_network = actor(env_params)
        self.critic_network = critic(env_params)
        # sync the networks across the cpus
        sync_networks(self.actor_network)
        sync_networks(self.critic_network)
        # build up the target network
        self.actor_target_network = actor(env_params)
        self.critic_target_network = critic(env_params)
        # load the weights into the target networks
        self.actor_target_network.load_state_dict(self.actor_network.state_dict())
        self.critic_target_network.load_state_dict(self.critic_network.state_dict())
        # if use gpu
        if self.args.cuda:
            self.actor_network.cuda()
            self.critic_network.cuda()
            self.actor_target_network.cuda()
            self.critic_target_network.cuda()
        # create the optimizer
        if self.args.optimizer_type =='SGD':
            self.actor_optim = torch.optim.SGD(self.actor_network.parameters(), lr=self.args.lr_actor)
            self.critic_optim = torch.optim.SGD(self.critic_network.parameters(), lr=self.args.lr_critic)
        elif self.args.optimizer_type =='adam':
            self.actor_optim = torch.optim.Adam(self.actor_network.parameters(), lr=self.args.lr_actor)
            self.critic_optim = torch.optim.Adam(self.critic_network.parameters(), lr=self.args.lr_critic)
        # her sampler
        self.her_module = her_sampler(self.args.replay_strategy, self.args.replay_k, self.env.compute_reward)
        # create the replay buffer
        self.buffer = replay_buffer(self.env_params, self.args.buffer_size, self.her_module.sample_her_transitions)
        # create the normalizer
        self.o_norm = normalizer(size=env_params['obs'], default_clip_range=self.args.clip_range)
        self.g_norm = normalizer(size=env_params['goal'], default_clip_range=self.args.clip_range)
        self.scales = []

        # create the dict for store the model
        if MPI.COMM_WORLD.Get_rank() == 0:
            if not os.path.exists(self.args.save_dir):
                os.mkdir(self.args.save_dir)
            # path to save the model
            self.model_path = os.path.join(self.args.save_dir, self.args.env_name)
            if not os.path.exists(self.model_path):
                os.mkdir(self.model_path)

            self.result_dir = f'./learning_curves/{args.env_name}/{self.args.run_name}'
            if not os.path.isdir(self.result_dir):
                os.makedirs(self.result_dir, exist_ok=True)
                print(f'creating {self.result_dir}')
            self.writer = SummaryWriter(logdir=self.result_dir)
예제 #3
0
 def __init__(self, args, env, env_params):
     self.args = args
     self.env = env
     self.env_params = env_params
     # create the network
     self.actor_network = actor(env_params)
     self.critic_network = critic(env_params)
     # sync the networks across the cpus
     sync_networks(self.actor_network)
     sync_networks(self.critic_network)
     # build up the target network
     self.actor_target_network = actor(env_params)
     self.critic_target_network = critic(env_params)
     # load the weights into the target networks
     self.actor_target_network.load_state_dict(
         self.actor_network.state_dict())
     self.critic_target_network.load_state_dict(
         self.critic_network.state_dict())
     # if use gpu
     if self.args.cuda:
         self.actor_network.cuda()
         self.critic_network.cuda()
         self.actor_target_network.cuda()
         self.critic_target_network.cuda()
     # create the optimizer
     self.actor_optim = torch.optim.Adam(self.actor_network.parameters(),
                                         lr=self.args.lr_actor)
     self.critic_optim = torch.optim.Adam(self.critic_network.parameters(),
                                          lr=self.args.lr_critic)
     # her sampler
     self.her_module = her_sampler(self.args.replay_strategy,
                                   self.args.replay_k,
                                   self.env.compute_reward)
     # create the replay buffer
     self.buffer = replay_buffer(self.env_params, self.args.buffer_size,
                                 self.her_module.sample_her_transitions)
     # create the normalizer
     self.o_norm = normalizer(size=env_params['obs'],
                              default_clip_range=self.args.clip_range)
     self.g_norm = normalizer(size=env_params['goal'],
                              default_clip_range=self.args.clip_range)
     # create the dict for store the model
     if MPI.COMM_WORLD.Get_rank() == 0:
         if not os.path.exists(self.args.save_dir):
             os.mkdir(self.args.save_dir)
         # path to save the model
         self.model_path = os.path.join(self.args.save_dir,
                                        self.args.env_name)
         if not os.path.exists(self.model_path):
             os.mkdir(self.model_path)
    def __init__(self, args, env, env_params):
        self.args = args
        self.env = env
        self.env_params = env_params
        # create the network
        self.actor_network = actor(args, env_params)
        self.critic_network = critic(args, env_params)
        # sync the networks across the cpus
        sync_networks(self.actor_network)
        sync_networks(self.critic_network)
        # build up the target network
        self.actor_target_network = actor(args, env_params)
        self.critic_target_network = critic(args, env_params)
        # load the weights into the target networks
        self.actor_target_network.load_state_dict(self.actor_network.state_dict())
        self.critic_target_network.load_state_dict(self.critic_network.state_dict())
        # if use gpu
        if self.args.cuda:
            self.actor_network.cuda()
            self.critic_network.cuda()
            self.actor_target_network.cuda()
            self.critic_target_network.cuda()
        # create the optimizer
        self.actor_optim = torch.optim.Adam(self.actor_network.parameters(), lr=self.args.lr_actor)
        self.critic_optim = torch.optim.Adam(self.critic_network.parameters(), lr=self.args.lr_critic)
        # her sampler
        self.her_module = her_sampler(self.args.replay_strategy, self.args.replay_k, self.env.compute_reward)
        # create the replay buffer
        self.buffer = replay_buffer(self.env_params, self.args.buffer_size, self.her_module.sample_her_transitions)
        # create the normalizer
        self.o_norm = normalizer(size=env_params['obs'], default_clip_range=self.args.clip_range)
        self.g_norm = normalizer(size=env_params['goal'], default_clip_range=self.args.clip_range)
        # create the dict for store the model
        if MPI.COMM_WORLD.Get_rank() == 0:
            if not os.path.exists(self.args.save_dir):
                os.mkdir(self.args.save_dir)
            # path to save the model
            self.model_path = os.path.join(self.args.save_dir, self.args.env_name)
            if not os.path.exists(self.model_path):
                os.mkdir(self.model_path)

            # added
            log_dir = create_env_folder(args.env_name, args.network_class, test=args.test)
            save_kwargs(vars(args), log_dir)
            tabular_log_path = osp.join(log_dir, 'progress.csv')
            text_log_path = osp.join(log_dir, 'debug.log')
            logger.add_text_output(text_log_path)
            logger.add_tabular_output(tabular_log_path)
            exp_name = f'{args.env_name}'
            logger.push_prefix("[%s] " % exp_name)
def demo_2_envs(env, args, env_id):
    # load the model param
    model_path = args.save_dir + args.env1_name + args.env2_name + '/' + args.save_name
    o_mean, o_std, g_mean, g_std, model = torch.load(model_path, map_location=lambda storage, loc: storage)

    # get the env param
    observation = env.reset()
    # get the environment params
    env_params = {'obs': ddpg_agent.inject_obs(observation['observation'], env_id, args).shape[0],
                  'goal': observation['desired_goal'].shape[0],
                  'action': env.action_space.shape[0],
                  'action_max': env.action_space.high[0],
                  }

    try:
        # create the actor network
        actor_network = actor(env_params)
        actor_network.load_state_dict(model)
        actor_network.eval()
    except Exception as e:
        args.dont_inject_observation = not args.dont_inject_observation
        # get the environment params
        env_params['obs'] = ddpg_agent.inject_obs(observation['observation'], env_id, args).shape[0]
        # create the actor network
        actor_network = actor(env_params)
        actor_network.load_state_dict(model)
        actor_network.eval()

    for i in range(args.demo_length):
        observation = env.reset()
        # start to do the demo
        obs = ddpg_agent.inject_obs(observation['observation'], env_id, args)
        g = observation['desired_goal']
        for t in range(env._max_episode_steps):
            env.render()
            inputs = process_inputs(obs, g, o_mean, o_std, g_mean, g_std, args)
            with torch.no_grad():
                pi = actor_network(inputs)
            action = pi.detach().numpy().squeeze()
            # put actions into the environment
            observation_new, reward, _, info = env.step(action)
            obs = ddpg_agent.inject_obs(observation_new['observation'], env_id, args)
        print('the episode is: {}, is success: {}'.format(i, info['is_success']))
예제 #6
0
 model_path = args.save_dir + args.env_name + '/model.pt'
 o_mean, o_std, g_mean, g_std, model = torch.load(
     model_path, map_location=lambda storage, loc: storage)
 # create the environment
 env = gym.make(args.env_name)
 # get the env param
 observation = env.reset()
 # get the environment params
 env_params = {
     'obs': observation['observation'].shape[0],
     'goal': observation['desired_goal'].shape[0],
     'action': env.action_space.shape[0],
     'action_max': env.action_space.high[0],
 }
 # create the actor network
 actor_network = actor(env_params)
 actor_network.load_state_dict(model)
 actor_network.eval()
 safe_path('./images')
 for i in range(args.demo_length):
     observation = env.reset()
     # start to do the demo
     obs = observation['observation']
     g = observation['desired_goal']
     epi_path = safe_path('./images/epi{}'.format(i))
     for t in range(env._max_episode_steps):
         # env.render()
         path = os.path.join(epi_path, 'img_{}.jpg'.format(t))
         img = env.sim.render(mode='offscreen',
                              camera_name='external_camera_0',
                              width=256,
예제 #7
0
    def __init__(self, args, env, env_params):
        self.args = args
        self.env = env
        self.env_params = env_params
        sim = self.env.sim
        self.viewer = MjRenderContextOffscreen(sim)
        # self.viewer.cam.fixedcamid = 3
        # self.viewer.cam.type = const.CAMERA_FIXED
        self.critic_loss = []
        self.actor_loss = []
        self.viewer.cam.distance = 1.2
        self.viewer.cam.azimuth = 180
        self.viewer.cam.elevation = -25
        env.env._viewers['rgb_array'] = self.viewer

        self.env_params = env_params
        self.image_based = True if args.image else False
        print("Training image based RL ? : {}".format(self.image_based))
        # create the network
        if not self.image_based:
            self.actor_network = actor(env_params)
        else:
            self.actor_network = new_actor(env_params)
            #self.actor_network = resnet_actor(env_params)
        self.critic_network = critic(env_params)

        # sync the networks across the cpus
        sync_networks(self.actor_network)
        sync_networks(self.critic_network)
        # build up the target network
        if not self.image_based:
            self.actor_target_network = actor(env_params)
        else:
            #self.actor_target_network = resnet_actor(env_params)
            self.actor_target_network = new_actor(env_params)

        self.critic_target_network = critic(env_params)
        # load the weights into the target networks
        self.actor_target_network.load_state_dict(
            self.actor_network.state_dict())
        self.critic_target_network.load_state_dict(
            self.critic_network.state_dict())
        # if use gpu
        if self.args.cuda:
            print("use the GPU")
            self.actor_network.cuda(MPI.COMM_WORLD.Get_rank())
            self.critic_network.cuda(MPI.COMM_WORLD.Get_rank())
            self.actor_target_network.cuda(MPI.COMM_WORLD.Get_rank())
            self.critic_target_network.cuda(MPI.COMM_WORLD.Get_rank())

        # create the optimizer
        self.actor_optim = torch.optim.Adam(self.actor_network.parameters(),
                                            lr=self.args.lr_actor)
        self.critic_optim = torch.optim.Adam(self.critic_network.parameters(),
                                             lr=self.args.lr_critic)
        # her sampler
        self.her_module = her_sampler(self.args.replay_strategy,
                                      self.args.replay_k,
                                      self.env.compute_reward,
                                      self.image_based)
        # create the replay buffer
        self.buffer = replay_buffer(self.env_params, self.args.buffer_size,
                                    self.her_module.sample_her_transitions,
                                    self.image_based)
        # create the normalizer
        self.o_norm = normalizer(size=env_params['obs'],
                                 default_clip_range=self.args.clip_range)
        self.g_norm = normalizer(size=env_params['goal'],
                                 default_clip_range=self.args.clip_range)
        # create the dict for store the model
        if MPI.COMM_WORLD.Get_rank() == 0:
            if not os.path.exists(self.args.save_dir):
                os.mkdir(self.args.save_dir)
            # path to save the model
            self.model_path = os.path.join(self.args.save_dir,
                                           self.args.env_name)
            if not os.path.exists(self.model_path):
                os.mkdir(self.model_path)
예제 #8
0
    def __init__(self, args, env1, env2, env1_params, env2_params):
        self.args = args
        self.env1 = env1
        self.env2 = env2
        self.env1_params = env1_params
        self.env2_params = env2_params
        if not self.args.dont_inject_observation:
            self.env2_params['obs'] += 1
            self.env1_params['obs'] += 1

        self.train_mode = TrainMode(args.training_mode)

        # store weights and biases API key if in args
        if self.args.wandb_api_key is not None:
            os.environ["WANDB_API_KEY"] = self.args.wandb_api_key
        # if key is present set a flag to enable the functionality
        self.use_wandb_log = os.environ.get("WANDB_API_KEY") is not None

        # create the network
        assert env1_params == env2_params  # TODO: make sure to check for equality
        self.actor_network = actor(env1_params)

        self.critic_network = critic(env1_params)

        # sync the networks across the cpus
        sync_networks(self.actor_network)
        sync_networks(self.critic_network)

        # build up the target network
        self.actor_target_network = actor(env1_params)
        self.critic_target_network = critic(env1_params)

        # load the weights into the target networks
        self.actor_target_network.load_state_dict(self.actor_network.state_dict())
        self.critic_target_network.load_state_dict(self.critic_network.state_dict())

        # if use gpu
        if self.args.cuda:
            self.actor_network.cuda()
            self.critic_network.cuda()
            self.actor_target_network.cuda()
            self.critic_target_network.cuda()

        # create the optimizer
        self.actor_optim = torch.optim.Adam(self.actor_network.parameters(), lr=self.args.lr_actor)
        self.critic_optim = torch.optim.Adam(self.critic_network.parameters(), lr=self.args.lr_critic)
        
        # setup dual critic if applicable
        self.use_two_critics = self.args.dual_critic
        if self.use_two_critics:
            self.critic_network2 = critic(env1_params)
            sync_networks(self.critic_network2)
            self.critic_target_network2 = critic(env1_params)
            self.critic_target_network2.load_state_dict(self.critic_network2.state_dict())
            self.critic2_optim = torch.optim.Adam(self.critic_network2.parameters(), lr=self.args.lr_critic)
            
            if self.args.cuda:
                self.critic_network2.cuda()
                self.critic_target_network2.cuda()

        # her sampler
        self.her_module1 = her_sampler(self.args.replay_strategy, self.args.replay_k, self.env1.compute_reward)
        self.her_module2 = her_sampler(self.args.replay_strategy, self.args.replay_k, self.env2.compute_reward)

        # create the replay buffer
        self.buffer1 = replay_buffer(self.env1_params, self.args.buffer_size, self.her_module1.sample_her_transitions)
        self.buffer2 = replay_buffer(self.env2_params, self.args.buffer_size, self.her_module2.sample_her_transitions)

        # create the normalizer
        self.o_norm = normalizer(size=env1_params['obs'], default_clip_range=self.args.clip_range)
        self.g_norm = normalizer(size=env1_params['goal'], default_clip_range=self.args.clip_range)

        # create the dict for storing the model
        if MPI.COMM_WORLD.Get_rank() == 0:
            if not os.path.exists(self.args.save_dir):
                os.mkdir(self.args.save_dir)

            # path to save the model
            self.model_path = os.path.join(self.args.save_dir, self.args.env1_name + self.args.env2_name)
            if not os.path.exists(self.model_path):
                os.mkdir(self.model_path)
예제 #9
0
    def __init__(self, args, env, env_params):
        self.args = args
        self.env = env
        self.env_params = env_params
        # check whether to continue training or start new
        if args.continue_training is None:
            self.continueTraining = False
        else:
            self.continueTraining = args.continue_training
        # create the network
        self.actor_network = actor(env_params)
        self.critic_network = critic(env_params)
        # sync the networks across the cpus
        sync_networks(self.actor_network)
        sync_networks(self.critic_network)

        self.actor_target_network = actor(env_params)
        self.critic_target_network = critic(env_params)
        # build up the target network
        dash = "-"*42
        if MPI.COMM_WORLD.Get_rank() == 0:
            print("env.spec.id: ", env.spec.id)
            print("args: ")
            d_args = vars(args)
            print(dash)
            print("{:<25s}{:<15s}".format("ARGS", "VALUE"))
            for key in d_args:
                if d_args[key] is not None:
                    print("|{:<22s} | {:<15}|".format(key, d_args[key]))
            print(dash)
            print("env_inits: ")
            print("{:<25s}{:<15s}".format("ENV_INIT", "VALUE"))
            for key in env.env.inits:
                print("|{:<22s} | {:<15}|".format(key, env.env.inits[key]))
            print(dash)
            print("env_dimensions: ")
            for key in env_params:
                print("|{:<22s} | {:<15}|".format(key, env_params[key]))
            print(dash)

            #print("env_params", env_params)
        if self.continueTraining:
            if MPI.COMM_WORLD.Get_rank() == 0:
                print("CONTINUE TRAINING...")
            env_name = env.spec.id
            saved_dicts = load_saved_state_dicts(
                args.save_dir, env_name, MPI.COMM_WORLD.Get_rank())
            self.actor_network.load_state_dict(saved_dicts['actor'])
            self.critic_network.load_state_dict(saved_dicts['critic'])

            self.critic_target_network.load_state_dict(
                saved_dicts['critic_target'])
            self.actor_target_network.load_state_dict(
                saved_dicts['actor_target'])
        else:

            # load the weights into the target networks
            self.actor_target_network.load_state_dict(
                self.actor_network.state_dict())
            self.critic_target_network.load_state_dict(
                self.critic_network.state_dict())

        # if use gpu
        if self.args.cuda:
            self.actor_network.cuda()
            self.critic_network.cuda()
            self.actor_target_network.cuda()
            self.critic_target_network.cuda()
        # create the optimizer
        self.actor_optim = torch.optim.Adam(
            self.actor_network.parameters(), lr=self.args.lr_actor)
        self.critic_optim = torch.optim.Adam(
            self.critic_network.parameters(), lr=self.args.lr_critic)
        # her sampler
        self.her_module = her_sampler(
            self.args.replay_strategy, self.args.replay_k, self.env.compute_reward)
        # create the replay buffer
        self.buffer = replay_buffer(
            self.env_params, self.args.buffer_size, self.her_module.sample_her_transitions)
        # create the normalizer
        self.o_norm = normalizer(
            size=env_params['obs'], default_clip_range=self.args.clip_range)
        self.g_norm = normalizer(
            size=env_params['goal'], default_clip_range=self.args.clip_range)
        # create the dict for store the model
        if MPI.COMM_WORLD.Get_rank() == 0:
            if not os.path.exists(self.args.save_dir):
                os.mkdir(self.args.save_dir)
            # path to save the model
            self.model_path = os.path.join(
                self.args.save_dir, self.args.env_name)
            if not os.path.exists(self.model_path):
                os.mkdir(self.model_path)