Exemple #1
0
 def __init__(self, name="", pubsub_params=None):
     self.name = name
     self.pubsub = None
     if pubsub_params:
         self.channel = "channel-srs-{}".format(self.name)
         from rl_coach.memories.backend.memory_impl import get_memory_backend
         self.pubsub = get_memory_backend(pubsub_params)
         subscribe_thread = SharedRunningStatsSubscribe(self)
         subscribe_thread.daemon = True
         subscribe_thread.start()
    def __init__(self, params: KubernetesParameters):
        """
        :param params: The Kubernetes parameters which are used for deploying the components in Coach. These parameters
        include namespace and kubeconfig.
        """

        super().__init__(params)
        self.params = params
        if self.params.kubeconfig:
            k8sconfig.load_kube_config()
        else:
            k8sconfig.load_incluster_config()

        if not self.params.namespace:
            _, current_context = k8sconfig.list_kube_config_contexts()
            self.params.namespace = current_context['context']['namespace']

        if os.environ.get('http_proxy'):
            k8sclient.Configuration._default.proxy = os.environ.get(
                'http_proxy')

        self.params.memory_backend_parameters.orchestrator_params = {
            'namespace': self.params.namespace
        }
        self.memory_backend = get_memory_backend(
            self.params.memory_backend_parameters)

        self.params.data_store_params.orchestrator_params = {
            'namespace': self.params.namespace
        }
        self.params.data_store_params.namespace = self.params.namespace
        self.data_store = get_data_store(self.params.data_store_params)

        if self.params.data_store_params.store_type == "s3":
            self.s3_access_key = None
            self.s3_secret_key = None
            if self.params.data_store_params.creds_file:
                s3config = ConfigParser()
                s3config.read(self.params.data_store_params.creds_file)
                try:
                    self.s3_access_key = s3config.get('default',
                                                      'aws_access_key_id')
                    self.s3_secret_key = s3config.get('default',
                                                      'aws_secret_access_key')
                except Error as e:
                    screen.print("Error when reading S3 credentials file: %s",
                                 e)
            else:
                self.s3_access_key = os.environ.get('ACCESS_KEY_ID')
                self.s3_secret_key = os.environ.get('SECRET_ACCESS_KEY')
 def __init__(self,
              replicated_device=None,
              epsilon=1e-2,
              name="",
              create_ops=True,
              pubsub_params=None):
     self.sess = None
     self.name = name
     self.replicated_device = replicated_device
     self.epsilon = epsilon
     self.ops_were_created = False
     if create_ops:
         with tf.device(replicated_device):
             self.create_ops()
     self.pubsub = None
     if pubsub_params:
         self.channel = "channel-srs-{}".format(self.name)
         self.pubsub = get_memory_backend(pubsub_params)
         subscribe_thread = SharedRunningStatsSubscribe(self)
         subscribe_thread.daemon = True
         subscribe_thread.start()
Exemple #4
0
 def setup_memory_backend(self) -> None:
     if hasattr(self.agent_params.memory, 'memory_backend_params'):
         self.memory_backend = get_memory_backend(
             self.agent_params.memory.memory_backend_params)
Exemple #5
0
    def __init__(self, agent_parameters: AgentParameters, parent: Union['LevelManager', 'CompositeAgent']=None):
        """
        :param agent_parameters: A AgentParameters class instance with all the agent parameters
        """
        super().__init__()
        self.ap = agent_parameters
        self.task_id = self.ap.task_parameters.task_index
        self.is_chief = self.task_id == 0
        self.shared_memory = type(agent_parameters.task_parameters) == DistributedTaskParameters \
                             and self.ap.memory.shared_memory
        if self.shared_memory:
            self.shared_memory_scratchpad = self.ap.task_parameters.shared_memory_scratchpad
        self.name = agent_parameters.name
        self.parent = parent
        self.parent_level_manager = None
        self.full_name_id = agent_parameters.full_name_id = self.name

        if type(agent_parameters.task_parameters) == DistributedTaskParameters:
            screen.log_title("Creating agent - name: {} task id: {} (may take up to 30 seconds due to "
                             "tensorflow wake up time)".format(self.full_name_id, self.task_id))
        else:
            screen.log_title("Creating agent - name: {}".format(self.full_name_id))
        self.imitation = False
        self.agent_logger = Logger()
        self.agent_episode_logger = EpisodeLogger()

        # get the memory
        # - distributed training + shared memory:
        #   * is chief?  -> create the memory and add it to the scratchpad
        #   * not chief? -> wait for the chief to create the memory and then fetch it
        # - non distributed training / not shared memory:
        #   * create memory
        memory_name = self.ap.memory.path.split(':')[1]
        self.memory_lookup_name = self.full_name_id + '.' + memory_name
        if self.shared_memory and not self.is_chief:
            self.memory = self.shared_memory_scratchpad.get(self.memory_lookup_name)
        else:
            # modules
            self.memory = dynamic_import_and_instantiate_module_from_params(self.ap.memory)

            if hasattr(self.ap.memory, 'memory_backend_params'):
                self.memory_backend = get_memory_backend(self.ap.memory.memory_backend_params)

                if self.ap.memory.memory_backend_params.run_type != 'trainer':
                    self.memory.set_memory_backend(self.memory_backend)

            if agent_parameters.memory.load_memory_from_file_path:
                screen.log_title("Loading replay buffer from pickle. Pickle path: {}"
                                 .format(agent_parameters.memory.load_memory_from_file_path))
                self.memory.load(agent_parameters.memory.load_memory_from_file_path)

            if self.shared_memory and self.is_chief:
                self.shared_memory_scratchpad.add(self.memory_lookup_name, self.memory)

        # set devices
        if type(agent_parameters.task_parameters) == DistributedTaskParameters:
            self.has_global = True
            self.replicated_device = agent_parameters.task_parameters.device
            self.worker_device = "/job:worker/task:{}".format(self.task_id)
        else:
            self.has_global = False
            self.replicated_device = None
            self.worker_device = ""
        if agent_parameters.task_parameters.use_cpu:
            self.worker_device += "/cpu:0"
        else:
            self.worker_device += "/device:GPU:0"

        # filters
        self.input_filter = self.ap.input_filter
        self.output_filter = self.ap.output_filter
        self.pre_network_filter = self.ap.pre_network_filter
        device = self.replicated_device if self.replicated_device else self.worker_device
        if hasattr(self.ap.memory, 'memory_backend_params') and self.ap.algorithm.distributed_coach_synchronization_type:
            self.input_filter.set_device(device, memory_backend_params=self.ap.memory.memory_backend_params)
            self.output_filter.set_device(device, memory_backend_params=self.ap.memory.memory_backend_params)
            self.pre_network_filter.set_device(device, memory_backend_params=self.ap.memory.memory_backend_params)
        else:
            self.input_filter.set_device(device)
            self.output_filter.set_device(device)
            self.pre_network_filter.set_device(device)

        # initialize all internal variables
        self._phase = RunPhase.HEATUP
        self.total_shaped_reward_in_current_episode = 0
        self.total_reward_in_current_episode = 0
        self.total_steps_counter = 0
        self.running_reward = None
        self.training_iteration = 0
        self.last_target_network_update_step = 0
        self.last_training_phase_step = 0
        self.current_episode = self.ap.current_episode = 0
        self.curr_state = {}
        self.current_hrl_goal = None
        self.current_episode_steps_counter = 0
        self.episode_running_info = {}
        self.last_episode_evaluation_ran = 0
        self.running_observations = []
        self.agent_logger.set_current_time(self.current_episode)
        self.exploration_policy = None
        self.networks = {}
        self.last_action_info = None
        self.running_observation_stats = None
        self.running_reward_stats = None
        self.accumulated_rewards_across_evaluation_episodes = 0
        self.accumulated_shaped_rewards_across_evaluation_episodes = 0
        self.num_successes_across_evaluation_episodes = 0
        self.num_evaluation_episodes_completed = 0
        self.current_episode_buffer = Episode(discount=self.ap.algorithm.discount, n_step=self.ap.algorithm.n_step)
        # TODO: add agents observation rendering for debugging purposes (not the same as the environment rendering)

        # environment parameters
        self.spaces = None
        self.in_action_space = self.ap.algorithm.in_action_space

        # signals
        self.episode_signals = []
        self.step_signals = []
        self.loss = self.register_signal('Loss')
        self.curr_learning_rate = self.register_signal('Learning Rate')
        self.unclipped_grads = self.register_signal('Grads (unclipped)')
        self.reward = self.register_signal('Reward', dump_one_value_per_episode=False, dump_one_value_per_step=True)
        self.shaped_reward = self.register_signal('Shaped Reward', dump_one_value_per_episode=False, dump_one_value_per_step=True)
        self.discounted_return = self.register_signal('Discounted Return')
        if isinstance(self.in_action_space, GoalsSpace):
            self.distance_from_goal = self.register_signal('Distance From Goal', dump_one_value_per_step=True)
        # use seed
        if self.ap.task_parameters.seed is not None:
            random.seed(self.ap.task_parameters.seed)
            np.random.seed(self.ap.task_parameters.seed)
        else:
            # we need to seed the RNG since the different processes are initialized with the same parent seed
            random.seed()
            np.random.seed()