コード例 #1
0
 def read_variable_values(self, variables):
     # For test compatibility.
     if isinstance(variables, dict):
         ret = {}
         for name, var in variables.items():
             ret[name] = Component.read_variable(var)
         return ret
     elif isinstance(variables, list):
         return [Component.read_variable(var) for var in variables]
     else:
         # Attempt to read as single var.
         return Component.read_variable(variables)
コード例 #2
0
    def test_copying_a_component(self):
        # Flatten a simple 2x2 FloatBox to (4,).
        space = FloatBox(shape=(2, 2), add_batch_rank=False)

        flatten_orig = ReShape(flatten=True, scope="A")
        flatten_copy = flatten_orig.copy(scope="B")
        container = Component(flatten_orig, flatten_copy)

        @rlgraph_api(component=container)
        def flatten1(self, input_):
            return self.sub_components["A"].apply(input_)

        @rlgraph_api(component=container)
        def flatten2(self, input_):
            return self.sub_components["B"].apply(input_)

        test = ComponentTest(component=container,
                             input_spaces=dict(input_=space))

        input_ = dict(input1=np.array([[0.5, 2.0], [1.0, 2.0]]),
                      input2=np.array([[1.0, 2.0], [3.0, 4.0]]))
        expected = dict(output1=np.array([0.5, 2.0, 1.0, 2.0]),
                        output2=np.array([1.0, 2.0, 3.0, 4.0]))
        for i in range_(1, 3):
            test.test(("flatten" + str(i), input_["input" + str(i)]),
                      expected_outputs=expected["output" + str(i)])
コード例 #3
0
    def test_sync_functionality(self):
        # Two Components, one with Synchronizable dropped in:
        # A: Can only push out values.
        # B: To be synced by A's values.
        sync_from = MyCompWithVars(scope="sync-from")
        sync_to = MyCompWithVars(initializer1=8.0,
                                 initializer2=7.0,
                                 scope="sync-to",
                                 synchronizable=True)

        # Create a dummy test component that contains our two Synchronizables.
        container = Component(name="container")
        container.add_components(sync_from, sync_to)

        @rlgraph_api(component=container)
        def execute_sync(self):
            values_ = sync_from._variables()
            return sync_to.sync(values_)

        test = ComponentTest(component=container)

        # Test syncing the variable from->to and check them before and after the sync.
        # Before the sync.
        test.variable_test(
            sync_to.get_variables(VARIABLE_NAMES), {
                "sync-to/" + VARIABLE_NAMES[0]:
                np.full(shape=sync_from.space.shape, fill_value=8.0),
                "sync-to/" + VARIABLE_NAMES[1]:
                np.full(shape=sync_from.space.shape, fill_value=7.0)
            })

        # Now sync and re-check.
        test.test("execute_sync", expected_outputs=None)

        # After the sync.
        test.variable_test(
            sync_to.get_variables(VARIABLE_NAMES), {
                "sync-to/" + VARIABLE_NAMES[0]:
                np.zeros(shape=sync_from.space.shape),
                "sync-to/" + VARIABLE_NAMES[1]:
                np.ones(shape=sync_from.space.shape)
            })
コード例 #4
0
    def test_sync_socket_between_2_identical_comps_that_have_vars_only_in_their_sub_comps(
            self):
        """
        Similar to the Policy scenario, where the Policy Component owns a NeuralNetwork (which has vars)
        and has to be synced with other Policies.
        """
        # Create 2x: A custom Component (with vars) that holds another Component (with vars).
        # Then sync between them.
        comp1 = MyCompWithVars(scope="A")
        comp1.add_components(MyCompWithVars(scope="sub-of-A-with-vars"))

        comp2_writable = MyCompWithVars(scope="B",
                                        initializer1=3.0,
                                        initializer2=4.2,
                                        synchronizable=True)
        comp2_writable.add_components(
            MyCompWithVars(scope="sub-of-B-with-vars",
                           initializer1=5.0,
                           initializer2=6.2))

        container = Component(comp1, comp2_writable, scope="container")

        @rlgraph_api(component=container)
        def execute_sync(self):
            values_ = comp1._variables()
            return comp2_writable.sync(values_)

        test = ComponentTest(component=container)

        # Before the sync.
        test.variable_test(
            comp2_writable.get_variables([
                "container/B/variable_to_sync1",
                "container/B/variable_to_sync2",
                "container/B/sub-of-B-with-vars/variable_to_sync1",
                "container/B/sub-of-B-with-vars/variable_to_sync2"
            ]), {
                "container/B/variable_to_sync1":
                np.full(
                    shape=comp1.space.shape, fill_value=3.0, dtype=np.float32),
                "container/B/variable_to_sync2":
                np.full(
                    shape=comp1.space.shape, fill_value=4.2, dtype=np.float32),
                "container/B/sub-of-B-with-vars/variable_to_sync1":
                np.full(
                    shape=comp1.space.shape, fill_value=5.0, dtype=np.float32),
                "container/B/sub-of-B-with-vars/variable_to_sync2":
                np.full(
                    shape=comp1.space.shape, fill_value=6.2, dtype=np.float32)
            })

        # Now sync and re-check.
        test.test(("execute_sync", None), expected_outputs=None)

        # After the sync.
        test.variable_test(
            comp2_writable.get_variables([
                "container/B/variable_to_sync1",
                "container/B/variable_to_sync2",
                "container/B/sub-of-B-with-vars/variable_to_sync1",
                "container/B/sub-of-B-with-vars/variable_to_sync2"
            ]), {
                "container/B/variable_to_sync1":
                np.zeros(shape=comp1.space.shape, dtype=np.float32),
                "container/B/variable_to_sync2":
                np.ones(shape=comp1.space.shape, dtype=np.float32),
                "container/B/sub-of-B-with-vars/variable_to_sync1":
                np.zeros(shape=comp1.space.shape, dtype=np.float32),
                "container/B/sub-of-B-with-vars/variable_to_sync2":
                np.ones(shape=comp1.space.shape, dtype=np.float32)
            })
コード例 #5
0
    def __init__(self,
                 state_space,
                 action_space,
                 discount=0.98,
                 preprocessing_spec=None,
                 network_spec=None,
                 internal_states_space=None,
                 policy_spec=None,
                 value_function_spec=None,
                 exploration_spec=None,
                 execution_spec=None,
                 optimizer_spec=None,
                 value_function_optimizer_spec=None,
                 observe_spec=None,
                 update_spec=None,
                 summary_spec=None,
                 saver_spec=None,
                 auto_build=True,
                 name="agent"):
        """
        Args:
            state_space (Union[dict,Space]): Spec dict for the state Space or a direct Space object.
            action_space (Union[dict,Space]): Spec dict for the action Space or a direct Space object.

            preprocessing_spec (Optional[list,PreprocessorStack]): The spec list for the different necessary states
                preprocessing steps or a PreprocessorStack object itself.

            discount (float): The discount factor (gamma).

            network_spec (Optional[list,NeuralNetwork]): Spec list for a NeuralNetwork Component or the NeuralNetwork
                object itself.

            internal_states_space (Optional[Union[dict,Space]]): Spec dict for the internal-states Space or a direct
                Space object for the Space(s) of the internal (RNN) states.

            policy_spec (Optional[dict]): An optional dict for further kwargs passing into the Policy c'tor.
            value_function_spec (list): Neural network specification for baseline.

            exploration_spec (Optional[dict]): The spec-dict to create the Exploration Component.
            execution_spec (Optional[dict,Execution]): The spec-dict specifying execution settings.
            optimizer_spec (Optional[dict,Optimizer]): The spec-dict to create the Optimizer for this Agent.

            value_function_optimizer_spec (dict): Optimizer config for value function otpimizer. If None, the optimizer
                spec for the policy is used (same learning rate and optimizer type).

            observe_spec (Optional[dict]): Spec-dict to specify `Agent.observe()` settings.
            update_spec (Optional[dict]): Spec-dict to specify `Agent.update()` settings.
            summary_spec (Optional[dict]): Spec-dict to specify summary settings.
            saver_spec (Optional[dict]): Spec-dict to specify saver settings.

            auto_build (Optional[bool]): If True (default), immediately builds the graph using the agent's
                graph builder. If false, users must separately call agent.build(). Useful for debugging or analyzing
                components before building.

            name (str): Some name for this Agent object.
        """
        super(Agent, self).__init__()

        self.name = name
        self.auto_build = auto_build
        self.graph_built = False
        self.logger = logging.getLogger(__name__)

        self.state_space = Space.from_spec(state_space).with_batch_rank(False)
        self.flat_state_space = self.state_space.flatten() if isinstance(
            self.state_space, ContainerSpace) else None
        self.logger.info("Parsed state space definition: {}".format(
            self.state_space))
        self.action_space = Space.from_spec(action_space).with_batch_rank(
            False)
        self.flat_action_space = self.action_space.flatten() if isinstance(
            self.action_space, ContainerSpace) else None
        self.logger.info("Parsed action space definition: {}".format(
            self.action_space))

        self.discount = discount
        self.build_options = {}

        # The agent's root-Component.
        self.root_component = Component(name=self.name, nesting_level=0)

        # Define the input-Spaces:
        # Tag the input-Space to `self.set_weights` as equal to whatever the variables-Space will be for
        # the Agent's policy Component.
        self.input_spaces = dict(states=self.state_space.with_batch_rank(), )

        # Construct the Preprocessor.
        self.preprocessor = PreprocessorStack.from_spec(preprocessing_spec)
        self.preprocessed_state_space = self.preprocessor.get_preprocessed_space(
            self.state_space)
        self.preprocessing_required = preprocessing_spec is not None and len(
            preprocessing_spec) > 0
        if self.preprocessing_required:
            self.logger.info("Preprocessing required.")
            self.logger.info(
                "Parsed preprocessed-state space definition: {}".format(
                    self.preprocessed_state_space))
        else:
            self.logger.info("No preprocessing required.")

        # Construct the Policy network.
        policy_spec = policy_spec or dict()
        if "network_spec" not in policy_spec:
            policy_spec["network_spec"] = network_spec
        if "action_space" not in policy_spec:
            policy_spec["action_space"] = self.action_space
        self.policy_spec = policy_spec
        # The behavioral policy of the algorithm. Also the one that gets updated.
        self.policy = Policy.from_spec(self.policy_spec)
        # Done by default.
        self.policy.add_components(Synchronizable(), expose_apis="sync")

        # Create non-shared baseline network.
        self.value_function = None
        if value_function_spec is not None:
            self.value_function = ValueFunction(
                network_spec=value_function_spec)
            self.value_function.add_components(Synchronizable(),
                                               expose_apis="sync")
            self.vars_merger = ContainerMerger("policy",
                                               "vf",
                                               scope="variable-dict-merger")
            self.vars_splitter = ContainerSplitter(
                "policy", "vf", scope="variable-container-splitter")
        else:
            self.vars_merger = ContainerMerger("policy",
                                               scope="variable-dict-merger")
            self.vars_splitter = ContainerSplitter(
                "policy", scope="variable-container-splitter")

        self.internal_states_space = Space.from_spec(internal_states_space)

        # An object implementing the loss function interface is only strictly needed
        # if automatic device strategies like multi-gpu are enabled. This is because
        # the device strategy needs to know the name of the loss function to infer the appropriate
        # operations.
        self.loss_function = None

        self.exploration = Exploration.from_spec(exploration_spec)
        self.execution_spec = parse_execution_spec(execution_spec)

        # Python-side experience buffer for better performance (may be disabled).
        self.default_env = "env_0"

        def factory_(i):
            if i < 2:
                return []
            return tuple([[] for _ in range(i)])

        self.states_buffer = defaultdict(
            list)  # partial(fact_, len(self.flat_state_space)))
        self.actions_buffer = defaultdict(
            partial(factory_, len(self.flat_action_space or [])))
        self.internals_buffer = defaultdict(list)
        self.rewards_buffer = defaultdict(list)
        self.next_states_buffer = defaultdict(
            list)  # partial(fact_, len(self.flat_state_space)))
        self.terminals_buffer = defaultdict(list)

        self.observe_spec = parse_observe_spec(observe_spec)

        # Global time step counter.
        self.timesteps = 0

        # Create the Agent's optimizer based on optimizer_spec and execution strategy.
        self.optimizer = None
        if optimizer_spec is not None:
            # Save spec in case agent needs to create more optimizers e.g. for baseline.
            self.optimizer_spec = optimizer_spec
            self.optimizer = Optimizer.from_spec(optimizer_spec)

        self.value_function_optimizer = None
        if self.value_function is not None:
            if value_function_optimizer_spec is None:
                vf_optimizer_spec = self.optimizer_spec
            else:
                vf_optimizer_spec = value_function_optimizer_spec
            vf_optimizer_spec["scope"] = "value-function-optimizer"
            self.value_function_optimizer = Optimizer.from_spec(
                vf_optimizer_spec)

        # Update-spec dict tells the Agent how to update (e.g. memory batch size).
        self.update_spec = parse_update_spec(update_spec)

        # Create our GraphBuilder and -Executor.
        self.graph_builder = GraphBuilder(action_space=self.action_space,
                                          summary_spec=summary_spec)
        self.graph_executor = GraphExecutor.from_spec(
            get_backend(),
            graph_builder=self.graph_builder,
            execution_spec=self.execution_spec,
            saver_spec=saver_spec)  # type: GraphExecutor
コード例 #6
0
ファイル: agent.py プロジェクト: mugenZebra/rlgraph
    def __init__(
        self,
        state_space,
        action_space,
        discount=0.98,
        preprocessing_spec=None,
        network_spec=None,
        internal_states_space=None,
        action_adapter_spec=None,
        exploration_spec=None,
        execution_spec=None,
        optimizer_spec=None,
        observe_spec=None,
        update_spec=None,
        summary_spec=None,
        saver_spec=None,
        auto_build=True,
        name="agent"
    ):
        """
        Args:
            state_space (Union[dict,Space]): Spec dict for the state Space or a direct Space object.
            action_space (Union[dict,Space]): Spec dict for the action Space or a direct Space object.
            preprocessing_spec (Optional[list,PreprocessorStack]): The spec list for the different necessary states
                preprocessing steps or a PreprocessorStack object itself.
            discount (float): The discount factor (gamma).
            network_spec (Optional[list,NeuralNetwork]): Spec list for a NeuralNetwork Component or the NeuralNetwork
                object itself.
            internal_states_space (Optional[Union[dict,Space]]): Spec dict for the internal-states Space or a direct
                Space object for the Space(s) of the internal (RNN) states.
            action_adapter_spec (Optional[dict,ActionAdapter]): The spec-dict for the ActionAdapter Component or the
                ActionAdapter object itself.
            exploration_spec (Optional[dict]): The spec-dict to create the Exploration Component.
            execution_spec (Optional[dict,Execution]): The spec-dict specifying execution settings.
            optimizer_spec (Optional[dict,Optimizer]): The spec-dict to create the Optimizer for this Agent.
            observe_spec (Optional[dict]): Spec-dict to specify `Agent.observe()` settings.
            update_spec (Optional[dict]): Spec-dict to specify `Agent.update()` settings.
            summary_spec (Optional[dict]): Spec-dict to specify summary settings.
            saver_spec (Optional[dict]): Spec-dict to specify saver settings.
            auto_build (Optional[bool]): If True (default), immediately builds the graph using the agent's
                graph builder. If false, users must separately call agent.build(). Useful for debugging or analyzing
                components before building.
            name (str): Some name for this Agent object.
        """
        super(Agent, self).__init__()

        self.name = name
        self.auto_build = auto_build
        self.graph_built = False
        self.logger = logging.getLogger(__name__)

        self.state_space = Space.from_spec(state_space).with_batch_rank(False)
        self.logger.info("Parsed state space definition: {}".format(self.state_space))
        self.action_space = Space.from_spec(action_space).with_batch_rank(False)
        self.logger.info("Parsed action space definition: {}".format(self.action_space))

        self.discount = discount

        # The agent's root-Component.
        self.root_component = Component(name=self.name)

        # Define the input-Spaces:
        # Tag the input-Space to `self.set_policy_weights` as equal to whatever the variables-Space will be for
        # the Agent's policy Component.
        self.input_spaces = dict(
            states=self.state_space.with_batch_rank(),
        )

        # Construct the Preprocessor.
        self.preprocessor = PreprocessorStack.from_spec(preprocessing_spec)
        self.preprocessed_state_space = self.preprocessor.get_preprocessed_space(self.state_space)
        self.preprocessing_required = preprocessing_spec is not None and len(preprocessing_spec) > 1
        if self.preprocessing_required:
            self.logger.info("Preprocessing required.")
            self.logger.info("Parsed preprocessed-state space definition: {}".format(self.preprocessed_state_space))
        else:
            self.logger.info("No preprocessing required.")

        # Construct the Policy network.
        self.neural_network = None
        if network_spec is not None:
            self.neural_network = NeuralNetwork.from_spec(network_spec)
        self.action_adapter_spec = action_adapter_spec

        self.internal_states_space = internal_states_space

        # An object implementing the loss function interface is only strictly needed
        # if automatic device strategies like multi-gpu are enabled. This is because
        # the device strategy needs to know the name of the loss function to infer the appropriate
        # operations.
        self.loss_function = None

        # The action adapter mapping raw NN output to (shaped) actions.
        action_adapter_dict = dict(action_space=self.action_space)
        if self.action_adapter_spec is None:
            self.action_adapter_spec = action_adapter_dict
        else:
            self.action_adapter_spec.update(action_adapter_dict)

        # The behavioral policy of the algorithm. Also the one that gets updated.
        self.policy = Policy(
            network_spec=self.neural_network,
            action_adapter_spec=self.action_adapter_spec
        )

        self.exploration = Exploration.from_spec(exploration_spec)
        self.execution_spec = parse_execution_spec(execution_spec)

        # Python-side experience buffer for better performance (may be disabled).
        self.default_env = "env_0"
        self.states_buffer = defaultdict(list)
        self.actions_buffer = defaultdict(list)
        self.internals_buffer = defaultdict(list)
        self.rewards_buffer = defaultdict(list)
        self.next_states_buffer = defaultdict(list)
        self.terminals_buffer = defaultdict(list)

        self.observe_spec = parse_observe_spec(observe_spec)
        if self.observe_spec["buffer_enabled"]:
            self.reset_env_buffers()

        # Global time step counter.
        self.timesteps = 0

        # Create the Agent's optimizer based on optimizer_spec and execution strategy.
        self.optimizer = None
        if optimizer_spec is not None:
            self.optimizer = Optimizer.from_spec(optimizer_spec)  #get_optimizer_from_device_strategy(
                #optimizer_spec, self.execution_spec.get("device_strategy", 'default')
        # Update-spec dict tells the Agent how to update (e.g. memory batch size).
        self.update_spec = parse_update_spec(update_spec)

        # Create our GraphBuilder and -Executor.
        self.graph_builder = GraphBuilder(action_space=self.action_space, summary_spec=summary_spec)
        self.graph_executor = GraphExecutor.from_spec(
            get_backend(),
            graph_builder=self.graph_builder,
            execution_spec=self.execution_spec,
            saver_spec=saver_spec
        )  # type: GraphExecutor