def setup_preprocessor(self, preprocessing_spec, in_space): if preprocessing_spec is not None: # TODO move ingraph for python component assembly. preprocessing_spec = deepcopy(preprocessing_spec) in_space = deepcopy(in_space) # Set scopes. scopes = [ preprocessor["scope"] for preprocessor in preprocessing_spec ] # Set backend to python. for spec in preprocessing_spec: spec["backend"] = "python" processor_stack = PreprocessorStack(*preprocessing_spec, backend="python") build_space = in_space for sub_comp_scope in scopes: processor_stack.sub_components[ sub_comp_scope].create_variables( input_spaces=dict(preprocessing_inputs=build_space), action_space=None) build_space = processor_stack.sub_components[ sub_comp_scope].get_preprocessed_space(build_space) processor_stack.reset() return processor_stack else: return None
def __init__(self, preprocessor_spec, policy_spec, exploration_spec=None, **kwargs): """ Args: preprocessor_spec (Union[list,dict,PreprocessorSpec]): - A dict if the state from the Env will come in as a ContainerSpace (e.g. Dict). In this case, each each key in this dict specifies, which value in the incoming dict should go through which PreprocessorStack. - A list with layer specs. - A PreprocessorStack object. policy_spec (Union[dict,Policy]): A specification dict for a Policy object or a Policy object directly. exploration_spec (Union[dict,Exploration]): A specification dict for an Exploration object or an Exploration object directly. """ super(ActorComponent, self).__init__(scope=kwargs.pop("scope", "actor-component"), **kwargs) self.preprocessor = PreprocessorStack.from_spec(preprocessor_spec) self.policy = Policy.from_spec(policy_spec) self.num_nn_inputs = self.policy.neural_network.num_inputs self.exploration = Exploration.from_spec(exploration_spec) self.tuple_merger = ContainerMerger(is_tuple=True, merge_tuples_into_one=True) self.tuple_splitter = ContainerSplitter( tuple_length=self.num_nn_inputs) self.add_components(self.policy, self.exploration, self.preprocessor, self.tuple_merger, self.tuple_splitter)
def __init__(self, preprocessor_spec, policy_spec, exploration_spec, max_likelihood=None, **kwargs): """ Args: preprocessor_spec (Union[list,dict,PreprocessorSpec]): - A dict if the state from the Env will come in as a ContainerSpace (e.g. Dict). In this case, each each key in this dict specifies, which value in the incoming dict should go through which PreprocessorStack. - A list with layer specs. - A PreprocessorStack object. policy_spec (Union[dict,Policy]): A specification dict for a Policy object or a Policy object directly. exploration_spec (Union[dict,Exploration]): A specification dict for an Exploration object or an Exploration object directly. max_likelihood (Optional[bool]): See Policy's property `max_likelihood`. If not None, overwrites the equally named setting in the Policy object (defined by `policy_spec`). """ super(ActorComponent, self).__init__(scope=kwargs.pop("scope", "actor-component"), **kwargs) self.preprocessor = PreprocessorStack.from_spec(preprocessor_spec) self.policy = Policy.from_spec(policy_spec) self.exploration = Exploration.from_spec(exploration_spec) self.max_likelihood = max_likelihood self.add_components(self.policy, self.exploration, self.preprocessor)
def test_actor_component_with_lstm_network(self): # state space and internal state space state_space = FloatBox(shape=(2,), add_batch_rank=True, add_time_rank=True, time_major=False) internal_states_space = Tuple(FloatBox(shape=(3,)), FloatBox(shape=(3,)), add_batch_rank=True) time_percentages_space = FloatBox() # action_space. action_space = IntBox(2, add_batch_rank=True, add_time_rank=True) preprocessor = PreprocessorStack.from_spec( [dict(type="convert_type", to_dtype="float"), dict(type="divide", divisor=10)] ) policy = Policy(network_spec=config_from_path("configs/test_lstm_nn.json"), action_space=action_space) exploration = Exploration(epsilon_spec=dict(decay_spec=dict( type="linear_decay", from_=1.0, to_=0.1) )) actor_component = ActorComponent(preprocessor, policy, exploration) test = ComponentTest( component=actor_component, input_spaces=dict( states=state_space, other_nn_inputs=Tuple(internal_states_space, add_batch_rank=True), time_percentage=time_percentages_space ), action_space=action_space ) # Some state inputs (batch size=2, seq-len=1000; batch-major). np.random.seed(10) states = state_space.sample(size=(1000, 2)) initial_internal_states = internal_states_space.zeros(size=2) # only batch time_percentages = time_percentages_space.sample(1000) # Run n times a single time-step to simulate acting and env interaction with an LSTM. preprocessed_states = np.ndarray(shape=(1000, 2, 2), dtype=np.float) actions = np.ndarray(shape=(1000, 2, 1), dtype=np.int) for i, time_percentage in enumerate(time_percentages): ret = test.test(( "get_preprocessed_state_and_action", # expand time dim at 1st slot as we are time-major == False [np.expand_dims(states[i], 1), tuple([initial_internal_states]), time_percentage] )) preprocessed_states[i] = ret["preprocessed_state"][:, 0, :] # take out time-rank again () actions[i] = ret["action"] # Check c/h-state shape. self.assertEqual(ret["nn_outputs"][1][0].shape, (2, 3)) # batch-size=2, LSTM units=3 self.assertEqual(ret["nn_outputs"][1][1].shape, (2, 3)) # Check all preprocessed states (easy: just divided by 10). expected_preprocessed_state = states / 10 recursive_assert_almost_equal(preprocessed_states, expected_preprocessed_state) # Check the exploration functionality over the actions. # Not checking mean as we are mostly in the non-exploratory region, that's why the stddev should be small. stddev_actions = actions.std() self.assertGreater(stddev_actions, 0.4) self.assertLess(stddev_actions, 0.6)
def __init__(self, preprocessors, **kwargs): """ Args: preprocessors (dict): Raises: RLGraphError: If a sub-component is not a PreprocessLayer object. """ # Create one separate PreprocessorStack per given key. # All possibly other keys in an input will be pass through un-preprocessed. self.flattened_preprocessors = flatten_op(preprocessors) for i, (flat_key, spec) in enumerate(self.flattened_preprocessors.items()): self.flattened_preprocessors[flat_key] = PreprocessorStack.from_spec( spec, scope="preprocessor-stack-{}".format(i) ) # NOTE: No automatic API-methods. Define them all ourselves. kwargs["api_methods"] = {} default_dict(kwargs, dict(scope=kwargs.pop("scope", "dict-preprocessor-stack"))) super(DictPreprocessorStack, self).__init__(*list(self.flattened_preprocessors.values()), **kwargs)
def test_simple_actor_component(self): # state_space (NN is a simple single fc-layer relu network (2 units), random biases, random weights). state_space = FloatBox(shape=(5, ), add_batch_rank=True) # action_space. action_space = IntBox(10) preprocessor = PreprocessorStack.from_spec([ dict(type="convert_type", to_dtype="float"), dict(type="multiply", factor=2) ]) policy = Policy( network_spec=config_from_path("configs/test_simple_nn.json"), action_space=action_space) exploration = Exploration() # no exploration actor_component = ActorComponent(preprocessor, policy, exploration) test = ComponentTest(component=actor_component, input_spaces=dict(states=state_space), action_space=action_space) # Get and check some actions. actor_component_params = test.read_variable_values( actor_component.variables) # Some state inputs (5 input nodes, batch size=2). states = state_space.sample(2) # Expected NN-output. expected_nn_output = np.matmul( states * 2, actor_component_params[ "actor-component/policy/test-network/hidden-layer/dense/kernel"] ) # Raw action layer output. expected_action_layer_output = np.matmul( expected_nn_output, actor_component_params[ "actor-component/policy/action-adapter-0/action-network/action-layer/dense/kernel"] ) # Final actions (max-likelihood/greedy pick). expected_actions = np.argmax(expected_action_layer_output, axis=-1) expected_preprocessed_state = states * 2 test.test(("get_preprocessed_state_and_action", states), expected_outputs=dict( preprocessed_state=expected_preprocessed_state, action=expected_actions)) # Get actions and action-probs by calling a different API-method. states = state_space.sample(5) # Get and check some actions. actor_component_params = test.read_variable_values( actor_component.variables) # Expected NN-output. expected_nn_output = np.matmul( states * 2, actor_component_params[ "actor-component/policy/test-network/hidden-layer/dense/kernel"] ) # Raw action layer output. expected_action_layer_output = np.matmul( expected_nn_output, actor_component_params[ "actor-component/policy/action-adapter-0/action-network/action-layer/dense/kernel"] ) # No reshape necessary (simple action space), softmax to get probs. expected_action_probs = softmax(expected_action_layer_output) # Final actions (max-likelihood/greedy pick). expected_actions = np.argmax(expected_action_layer_output, axis=-1) expected_preprocessed_state = states * 2 test.test(("get_preprocessed_state_action_and_action_probs", states), expected_outputs=dict( preprocessed_state=expected_preprocessed_state, action=expected_actions, action_probs=expected_action_probs))