Esempio n. 1
0
    def __init__(self, args, env, env_params):
        self.args = args
        self.env = env
        self.env_params = env_params
        # create the network
        self.actor_network = actor(env_params)
        self.critic_network = critic(env_params)
        # sync the networks across the cpus
        sync_networks(self.actor_network)
        sync_networks(self.critic_network)
        # build up the target network
        self.actor_target_network = actor(env_params)
        self.critic_target_network = critic(env_params)
        # load the weights into the target networks
        self.actor_target_network.load_state_dict(
            self.actor_network.state_dict())
        self.critic_target_network.load_state_dict(
            self.critic_network.state_dict())
        # if use gpu
        if self.args.cuda and torch.cuda.is_available():
            self.actor_network.cuda()
            self.critic_network.cuda()
            self.actor_target_network.cuda()
            self.critic_target_network.cuda()
        # create the optimizer
        self.actor_optim = torch.optim.Adam(self.actor_network.parameters(),
                                            lr=self.args.lr_actor)
        self.critic_optim = torch.optim.Adam(self.critic_network.parameters(),
                                             lr=self.args.lr_critic)
        # her sampler
        self.her_module = her_sampler(self.args.replay_strategy,
                                      self.args.replay_k,
                                      self.env.compute_reward)
        # create the replay buffer
        self.buffer = replay_buffer(self.env_params, self.args.buffer_size,
                                    self.her_module.sample_her_transitions)
        # create the normalizer
        self.o_norm = normalizer(size=env_params['obs'],
                                 default_clip_range=self.args.clip_range)
        self.g_norm = normalizer(size=env_params['goal'],
                                 default_clip_range=self.args.clip_range)

        # add tensorboardX tool

        # create the dict for store the model
        if MPI.COMM_WORLD.Get_rank() == 0:
            if not os.path.exists(self.args.save_dir):
                os.mkdir(self.args.save_dir)
            # path to save the model
            self.model_path = os.path.join(self.args.save_dir,
                                           self.args.env_name)
            if not os.path.exists(self.model_path):
                os.mkdir(self.model_path)
            print('model path', self.model_path)

        # self.writer = SummaryWriter('./logs')
        self.reward_list = []
        self.reward_record = []
        self.success_rate_list = []
        self.success_list = []
    def __init__(self, args, env, env_params, image=True):
        self.args = args
        self.env = env
        self.env_params = env_params
        self.image = image

        # create the network
        if self.image:
            self.actor_network = actor_image(env_params, env_params['obs'])
            self.critic_network = critic_image(env_params, env_params['obs'] + env_params['action'])
        else:
            self.actor_network = actor(env_params, env_params['obs'])
            self.critic_network = critic(env_params, env_params['obs'] + env_params['action'])

        # load model if load_path is not None
        if self.args.load_dir != '':
            actor_load_path = self.args.load_dir + '/actor.pt'
            model = torch.load(actor_load_path)
            self.actor_network.load_state_dict(model)
            critic_load_path = self.args.load_dir + '/critic.pt'
            model = torch.load(critic_load_path)
            self.critic_network.load_state_dict(model)

        # sync the networks across the cpus
        # sync_networks(self.actor_network)
        # sync_networks(self.critic_network)
        # build up the target network
        # if self.image:
        #     self.actor_target_network = actor_image(env_params, env_params['obs'])
        # else:
        #     self.actor_target_network = actor(env_params, env_params['obs'])
        # # load the weights into the target networks
        # self.actor_target_network.load_state_dict(self.actor_network.state_dict())

        # if use gpu
        if self.args.cuda:
            self.actor_network.cuda()
            # self.actor_target_network.cuda()
            self.critic_network.cuda()
        # create the optimizer
        self.actor_optim = torch.optim.Adam(self.actor_network.parameters(), lr=self.args.lr_actor)
        self.critic_optim = torch.optim.Adam(self.critic_network.parameters(), lr=self.args.lr_critic)
        # her sampler
        self.her_module = her_sampler(self.args.replay_strategy, self.args.replay_k, self.env().compute_reward)
        # create the replay buffer
        self.buffer = replay_buffer(self.env_params, self.args.buffer_size, self.her_module.sample_her_transitions, image=self.image)

        # path to save the model
        self.model_path = os.path.join(self.args.save_dir, self.args.env_name)
Esempio n. 3
0
    def __init__(self, args, env, env_params):
        self.args = args
        self.env = env
        self.env_params = env_params

        # create the network
        self.actor_network = actor(env_params)
        self.critic_network = critic(env_params)
        # sync the networks across the cpus
        #sync_networks(self.actor_network)
        #sync_networks(self.critic_network)
        # build up the target network
        self.actor_target_network = actor(env_params)
        self.critic_target_network = critic(env_params)

        # Load the model if required
        if args.load_path != None:
            o_mean, o_std, g_mean, g_std, load_actor_model, load_critic_model = torch.load(
                args.load_path, map_location=lambda storage, loc: storage)
            self.actor_network.load_state_dict(load_actor_model)
            self.critic_network.load_state_dict(load_critic_model)

        # load the weights into the target networks
        self.actor_target_network.load_state_dict(
            self.actor_network.state_dict())
        self.critic_target_network.load_state_dict(
            self.critic_network.state_dict())
        # if use gpu
        if self.args.cuda:
            self.actor_network.cuda()
            self.critic_network.cuda()
            self.actor_target_network.cuda()
            self.critic_target_network.cuda()
        # create the optimizer
        self.actor_optim = torch.optim.Adam(self.actor_network.parameters(),
                                            lr=self.args.lr_actor)
        self.critic_optim = torch.optim.Adam(self.critic_network.parameters(),
                                             lr=self.args.lr_critic)
        # her sampler
        self.her_module = her_sampler(self.args.replay_strategy,
                                      self.args.replay_k,
                                      self.env.compute_reward)
        # create the replay buffer
        self.buffer = replay_buffer(self.env_params, self.args.buffer_size,
                                    self.her_module.sample_her_transitions)
        # create the normalizer
        self.o_norm = normalizer(size=env_params['obs'],
                                 default_clip_range=self.args.clip_range)
        self.g_norm = normalizer(size=env_params['goal'],
                                 default_clip_range=self.args.clip_range)
        # create the dict for store the model
        #if MPI.COMM_WORLD.Get_rank() == 0:
        if not os.path.exists(self.args.save_dir):
            os.mkdir(self.args.save_dir)

        # makeup a suffix for the model path to indicate which method is used for Training
        #self.folder_siffix = '_' + self.args.replay_strategy + '_' + self.args.env_params.reward_type
        # path to save the model
        self.model_path = os.path.join(self.args.save_dir, self.args.env_name)
        if not os.path.exists(self.model_path):
            os.mkdir(self.model_path)
        self.model_path = os.path.join(self.model_path,
                                       'seed_' + str(self.args.seed))
        if not os.path.exists(self.model_path):
            os.mkdir(self.model_path)
 def __init__(self, args, env, env_params):
     self.savetime = 0
     self.args = args
     self.env = env
     self.env_params = env_params
     # create the network
     self.actor_network = actor(env_params)
     self.critic_network = critic(env_params)
     # sync the networks across the cpus
     sync_networks(self.actor_network)
     sync_networks(self.critic_network)
     # build up the target network
     self.actor_target_network = actor(env_params)
     self.critic_target_network = critic(env_params)
     # load the weights into the target networks
     self.actor_target_network.load_state_dict(
         self.actor_network.state_dict())
     self.critic_target_network.load_state_dict(
         self.critic_network.state_dict())
     # if use gpu
     if self.args.cuda:
         self.actor_network.cuda()
         self.critic_network.cuda()
         self.actor_target_network.cuda()
         self.critic_target_network.cuda()
     # create the optimizer
     self.actor_optim = torch.optim.Adam(self.actor_network.parameters(),
                                         lr=self.args.lr_actor)
     self.critic_optim = torch.optim.Adam(self.critic_network.parameters(),
                                          lr=self.args.lr_critic)
     # her sampler
     self.her_module = her_sampler(self.args.replay_strategy,
                                   self.args.replay_k,
                                   self.env.compute_reward)
     # create the replay buffer
     self.buffer = replay_buffer(self.env_params, self.args.buffer_size,
                                 self.her_module.sample_her_transitions)
     # 是否加入示教数据
     if self.args.add_demo:
         self._init_demo_buffer(
         )  # initialize replay buffer with demonstration
     # create the normalizer
     self.o_norm = normalizer(size=env_params['obs'],
                              default_clip_range=self.args.clip_range)
     self.g_norm = normalizer(size=env_params['goal'],
                              default_clip_range=self.args.clip_range)
     # load the data to continue the training
     # model_path = "saved_models/bmirobot-v3/125_True12_model.pt"
     # # # model_path = args.save_dir + args.env_name + '/' + str(args.seed) + '_' + str(args.add_demo) + '_model.pt'
     # # o_mean, o_std, g_mean, g_std, model = torch.load(model_path, map_location=lambda storage, loc: storage)
     # self.actor_network.load_state_dict(model)
     # self.o_norm.mean=o_mean
     # self.o_norm.std=o_std
     # self.g_norm.mean=g_mean
     # self.g_norm.std=g_std
     self.success_rates = []  # 记录每个epoch的成功率
     # create the dict for store the model
     if MPI.COMM_WORLD.Get_rank() == 0:
         if not os.path.exists(self.args.save_dir):
             os.mkdir(self.args.save_dir)
         # path to save the model
         self.model_path = os.path.join(self.args.save_dir,
                                        self.args.env_name)
         if not os.path.exists(self.model_path):
             os.mkdir(self.model_path)
Esempio n. 5
0
    def __init__(self, args, env, env_params):
        self.args = args
        self.env = env
        self.env_params = env_params

        # create the network
        self.actor_network = actor(env_params)
        self.critic_network = critic(env_params)
        # build up the target network
        self.actor_target_network = actor(env_params)
        self.critic_target_network = critic(env_params)

        # Load the model if required
        if args.load_path != None:
            o_mean, o_std, g_mean, g_std, load_actor_model, load_critic_model = torch.load(
                args.load_path, map_location=lambda storage, loc: storage)
            self.actor_network.load_state_dict(load_actor_model)
            self.critic_network.load_state_dict(load_critic_model)

        # load the weights into the target networks
        self.actor_target_network.load_state_dict(
            self.actor_network.state_dict())
        self.critic_target_network.load_state_dict(
            self.critic_network.state_dict())
        # if use gpu
        if self.args.cuda:
            self.actor_network.cuda()
            self.critic_network.cuda()
            self.actor_target_network.cuda()
            self.critic_target_network.cuda()
        # create the optimizer
        self.actor_optim = torch.optim.Adam(self.actor_network.parameters(),
                                            lr=self.args.lr_actor)
        self.critic_optim = torch.optim.Adam(self.critic_network.parameters(),
                                             lr=self.args.lr_critic)
        # her sampler
        self.her_module = her_sampler(self.args.replay_strategy,
                                      self.args.replay_k,
                                      self.env.compute_reward)
        # create the replay buffer
        if self.args.replay_strategy == 'future':
            self.buffer = replay_buffer(self.env_params, self.args.buffer_size,
                                        self.her_module.sample_her_transitions)
        else:
            self.buffer = replay_buffer(
                self.env_params, self.args.buffer_size,
                self.her_module.sample_normal_transitions)
        # create the normalizer
        self.o_norm = normalizer(size=env_params['obs'],
                                 default_clip_range=self.args.clip_range)
        self.g_norm = normalizer(size=env_params['goal'],
                                 default_clip_range=self.args.clip_range)
        # create the dict for store the model
        if not os.path.exists(self.args.save_dir):
            os.mkdir(self.args.save_dir)

        # makeup a suffix for the model path to indicate which method is used for Training
        buffer_len_epochs = int(
            self.args.buffer_size /
            (env_params['max_timesteps'] * self.args.num_rollouts_per_cycle *
             self.args.n_cycles))
        name_add_on = ''
        if self.args.exploration_strategy == 'pgg':
            if self.args.pgg_strategy == 'final':
                if self.args.replay_strategy == 'future':
                    name_add_on = '_final_distance_based_goal_generation_buffer' + str(
                        buffer_len_epochs) + 'epochs'
                else:
                    name_add_on = '_final_distance_based_goal_generation_withoutHER_buffer' + str(
                        buffer_len_epochs) + 'epochs'
            else:
                if self.args.replay_strategy == 'future':
                    name_add_on = '_distance_based_goal_generation_buffer' + str(
                        buffer_len_epochs) + 'epochs'
                else:
                    name_add_on = '_distance_based_goal_generation_withoutHER_buffer' + str(
                        buffer_len_epochs) + 'epochs'
        else:
            if self.args.replay_strategy == 'future':
                name_add_on = '_originalHER_buffer' + str(
                    buffer_len_epochs) + 'epochs'
            else:
                name_add_on = '_originalDDPG_buffer' + str(
                    buffer_len_epochs) + 'epochs'

        # path to save the model
        self.model_path = os.path.join(self.args.save_dir,
                                       self.args.env_name + name_add_on)

        if not os.path.exists(self.model_path):
            os.mkdir(self.model_path)
        self.model_path = os.path.join(self.model_path,
                                       'seed_' + str(self.args.seed))
        if not os.path.exists(self.model_path):
            os.mkdir(self.model_path)
    def __init__(self,
                 args,
                 envs_lst,
                 env_params,
                 expert_lst_dir,
                 recurrent=True,
                 ee_reward=True,
                 image=True):
        self.args = args
        self.envs_lst = envs_lst
        self.env_params = env_params
        self.recurrent = recurrent
        self.ee_reward = ee_reward
        self.image = image

        # initialize expert
        self.expert_lst = []
        for dir in expert_lst_dir:
            expert_load_path = dir + '/model.pt'
            o_mean, o_std, g_mean, g_std, model = torch.load(expert_load_path)
            expert_model = actor(env_params,
                                 env_params['obs'] + env_params['goal'])
            expert_model.load_state_dict(model)
            self.expert_lst.append({
                "model": expert_model,
                "o_mean": o_mean,
                "o_std": o_std,
                "g_mean": g_mean,
                "g_std": g_std
            })

        # create the network
        if self.recurrent:
            self.actor_network = actor_recurrent(
                env_params,
                env_params['obs'] + env_params['goal'] + env_params['action'],
                env_params['goal'])
            # self.critic_network = critic_recurrent(env_params, env_params['obs'] + env_params['goal'] + 2 * env_params['action'])
        else:
            self.actor_network = actor(
                env_params,
                env_params['obs'] + env_params['goal'] + env_params['action'],
                env_params['goal'])
        self.critic_network = critic(
            env_params,
            env_params['obs'] + 2 * env_params['goal'] + env_params['action'])

        # create the normalizer
        self.o_norm = normalizer(size=env_params['obs'],
                                 default_clip_range=self.args.clip_range)
        self.g_norm = normalizer(size=env_params['goal'],
                                 default_clip_range=self.args.clip_range)
        self.sg_norm = normalizer(size=env_params['action'],
                                  default_clip_range=self.args.clip_range)

        # load model if load_path is not None
        if self.args.load_dir != '':
            load_path = self.args.load_dir + '/model.pt'
            # o_mean, o_std, g_mean, g_std, sg_mean, sg_std, model = torch.load(load_path)
            o_mean, o_std, g_mean, g_std, model = torch.load(load_path)
            self.o_norm.mean = o_mean
            self.o_norm.std = o_std
            self.g_norm.mean = g_mean
            self.g_norm.std = g_std
            # self.sg_norm.mean = sg_mean
            # self.sg_norm.std = sg_std
            self.actor_network.load_state_dict(model)

        # sync the networks across the cpus
        sync_networks(self.actor_network)
        sync_networks(self.critic_network)
        # build up the target network
        if self.recurrent:
            self.actor_target_network = actor_recurrent(
                env_params,
                env_params['obs'] + env_params['goal'] + env_params['action'],
                env_params['goal'])
            # self.critic_target_network = critic_recurrent(env_params, env_params['obs'] + env_params['goal'] + 2 * env_params['action'])
        else:
            self.actor_target_network = actor(
                env_params,
                env_params['obs'] + env_params['goal'] + env_params['action'],
                env_params['goal'])
        self.critic_target_network = critic(
            env_params,
            env_params['obs'] + 2 * env_params['goal'] + env_params['action'])
        # load the weights into the target networks
        self.actor_target_network.load_state_dict(
            self.actor_network.state_dict())
        self.critic_target_network.load_state_dict(
            self.critic_network.state_dict())

        # if use gpu
        if self.args.cuda:
            self.actor_network.cuda()
            self.critic_network.cuda()
            self.actor_target_network.cuda()
            self.critic_target_network.cuda()
        # create the optimizer
        self.actor_optim = torch.optim.Adam(self.actor_network.parameters(),
                                            lr=self.args.lr_actor)
        self.critic_optim = torch.optim.Adam(self.critic_network.parameters(),
                                             lr=self.args.lr_critic)
        # her sampler
        self.her_module_lst = [
            her_sampler(self.args.replay_strategy, self.args.replay_k,
                        env.compute_reward) for env in self.envs_lst
        ]
        # create the replay buffer
        self.buffer_lst = [
            replay_buffer(self.env_params,
                          self.args.buffer_size,
                          her_module.sample_her_transitions,
                          ee_reward=True) for her_module in self.her_module_lst
        ]

        # path to save the model
        self.model_path = os.path.join(self.args.save_dir, self.args.env_name)