def check_input_spaces(self, input_spaces, action_space=None): time_step_space = input_spaces["time_step"] # type: Space sanity_check_space(time_step_space, allowed_types=[IntBox], must_have_batch_rank=False, must_have_categories=False, rank=0)
def create_variables(self, input_spaces, action_space=None): # Overwrite parent's method as we don't need a custom registry. if self.record_space is None: self.record_space = input_spaces["records"] # Make sure all input-records have a batch rank and determine the shapes and dtypes. shapes = [] dtypes = [] names = [] for key, value in self.record_space.flatten().items(): # TODO: what if single items come in without a time-rank? Then this check here will fail. # We are expecting single items. The incoming batch-rank is actually a time-rank: Add the batch rank. sanity_check_space( value, must_have_batch_rank=self.only_insert_single_records is False) shape = value.get_shape(with_time_rank=value.has_time_rank) shapes.append(shape) dtypes.append(dtype_(value.dtype)) names.append(key) # Construct the wrapped FIFOQueue object. if get_backend() == "tf": if self.reuse_variable_scope: shared_name = self.reuse_variable_scope + ("/" + self.scope if self.scope else "") else: shared_name = self.global_scope self.queue = tf.FIFOQueue(capacity=self.capacity, dtypes=dtypes, shapes=shapes, names=names, shared_name=shared_name)
def check_input_spaces(self, input_spaces, action_space=None): """ Do some sanity checking on the incoming Space: Must not be Container (for now) and must have a batch rank. """ # Make sure all inputs have the same time/batch ranks. # TODO also check spaces for pytorch once unified space management if get_backend() == "tf": if "inputs[0]" in input_spaces: self.in_space_0 = input_spaces["inputs[0]"] self.time_major = self.in_space_0.time_major idx = 0 while True: key = "inputs[{}]".format(idx) if key not in input_spaces: break sanity_check_space(input_spaces[key], allowed_types=[FloatBox, IntBox], must_have_batch_rank=True) # Make sure all concat inputs have same batch-/time-ranks. assert self.in_space_0.has_batch_rank == input_spaces[key].has_batch_rank and \ self.in_space_0.has_time_rank == input_spaces[key].has_time_rank, \ "ERROR: Input spaces to '{}' must have same batch-/time-rank structure! " \ "0th input is batch-rank={} time-rank={}, but {}st input is batch-rank={} " \ "time-rank={}.".format( self.global_scope, self.in_space_0.has_batch_rank, input_spaces[key].has_batch_rank, idx, self.in_space_0.has_time_rank, input_spaces[key].has_time_rank ) idx += 1
def check_input_spaces(self, input_spaces, action_space=None): assert action_space is not None self.action_space = action_space # Check for IntBox and num_categories. sanity_check_space( self.action_space, allowed_types=[IntBox], must_have_categories=True )
def check_input_spaces(self, input_spaces, action_space=None): """ Do some sanity checking on the incoming Space: Must be string type. """ sanity_check_space(input_spaces["text_inputs"], allowed_types=[TextBox], must_have_batch_rank=True)
def check_input_spaces(self, input_spaces, action_space=None): super(ReShape, self).check_input_spaces(input_spaces, action_space) # Check whether our input space has-batch or not and store this information here. in_space = input_spaces["preprocessing_inputs"] # type: Space if self.flatten is True and isinstance(in_space, IntBox) and self.flatten_categories is True: sanity_check_space(in_space, must_have_categories=True, num_categories=(2, 10000))
def check_input_spaces(self, input_spaces, action_space=None): for s in [ "states", "actions", "env_actions", "preprocessed_states", "rewards", "terminals" ]: sanity_check_space(input_spaces[s], must_have_batch_rank=True) self.env_action_space = input_spaces["env_actions"].flatten()
def check_input_spaces(self, input_spaces, action_space=None): labels_space = input_spaces["labels"] if self.sparse is True: sanity_check_space(labels_space, allowed_types=IntBox, must_have_batch_rank=True) else: sanity_check_space(labels_space, allowed_types=FloatBox, must_have_batch_rank=True)
def check_input_spaces(self, input_spaces, action_space=None): super(StringToHashBucket, self).check_input_spaces(input_spaces, action_space) # Make sure there is only a batch rank (single text items). # tf.string_split does not support more complex shapes. sanity_check_space(input_spaces["text_inputs"], must_have_batch_rank=True, must_have_time_rank=False, rank=0)
def check_input_spaces(self, input_spaces, action_space=None): """ Do some sanity checking on the incoming Spaces: """ assert action_space is not None self.action_space = action_space self.flat_action_space = action_space.flatten() # Check for IntBox and num_categories. sanity_check_space(self.action_space, must_have_categories=True, allowed_sub_types=[IntBox]) self.ranks_to_reduce = len(self.action_space.get_shape(with_batch_rank=True)) - 1
def check_input_spaces(self, input_spaces, action_space=None): time_pct_space = input_spaces["time_percentage"] # Time percentage is only not needed, iff backend=tf and we have a max_timesteps property with which we # can derive the percentage from the tf GLOBAL_TIMESTEP variable. if time_pct_space == "flex": assert get_backend() == "tf", "ERROR: `time_percentage` can only be left out if using tf as backend!" assert self.max_time_steps is not None, \ "ERROR: `time_percentage` can only be left out if `self.max_time_steps` is not None!" else: sanity_check_space(time_pct_space, allowed_types=[FloatBox], rank=0)
def get_preprocessed_space(self, space): ret = {} for key, single_space in space.flatten().items(): class_ = type(single_space) # Determine the actual shape (not batch/time ranks). if self.flatten is True: if type(single_space ) == IntBox and self.flatten_categories is not False: assert self.flatten_categories is not None,\ "ERROR: `flatten_categories` must not be None if `flatten` is True and input is IntBox!" new_shape = (self.get_num_categories(key, single_space), ) class_ = FloatBox else: new_shape = (single_space.flat_dim, ) else: new_shape = self.new_shape[key] if isinstance( self.new_shape, dict) else self.new_shape # Check the batch/time rank options. if self.fold_time_rank is True: sanity_check_space(single_space, must_have_batch_rank=True, must_have_time_rank=True) ret[key] = class_(shape=single_space.shape if new_shape is None else new_shape, add_batch_rank=True, add_time_rank=False) # Time rank should be unfolded from batch rank with the given dimension. elif self.unfold_time_rank is True: sanity_check_space(single_space, must_have_batch_rank=True, must_have_time_rank=False) ret[key] = class_(shape=single_space.shape if new_shape is None else new_shape, add_batch_rank=True, add_time_rank=True, time_major=self.time_major if self.time_major is not None else False) # Only change the actual shape (leave batch/time ranks as is). else: time_major = single_space.time_major ret[key] = class_(shape=single_space.shape if new_shape is None else new_shape, add_batch_rank=single_space.has_batch_rank, add_time_rank=single_space.has_time_rank, time_major=time_major) ret = unflatten_op(ret) return ret
def check_input_spaces(self, input_spaces, action_space=None): """ Do some sanity checking on the incoming Spaces: """ assert action_space is not None self.action_space = action_space # Check for IntBox and FloatBox.? sanity_check_space(self.action_space, allowed_types=[IntBox, FloatBox], must_have_categories=False) self.ranks_to_reduce = len( self.action_space.get_shape(with_batch_rank=True)) - 1 # TODO: Make this flexible with different distributions. self.distribution = Categorical()
def check_input_spaces(self, input_spaces, action_space=None): super(LSTMLayer, self).check_input_spaces(input_spaces, action_space) # Check correct tuple-internal-states format (if not None, in which case we assume all 0.0s). if "internal_states" in input_spaces: sanity_check_space(input_spaces["internal_states"], allowed_types=[Tuple]) assert len(input_spaces["internal_states"]) == 2,\ "ERROR: If internal_states are provided (which is the case), an LSTMLayer requires the len of " \ "this Tuple to be 2 (c- and h-states). Your Space is '{}'.".format(input_spaces["internal_states"]) # Check for batch AND time-rank. self.in_space = input_spaces["inputs"] sanity_check_space(self.in_space, must_have_batch_rank=True, must_have_time_rank=True)
def get_preprocessed_space(self, space): ret = dict() for key, single_space in space.flatten().items(): class_ = type(single_space) # Determine the actual shape (not batch/time ranks). if self.flatten is True: if self.flatten_categories is not False and type(single_space) == IntBox: if self.flatten_categories is True: num_categories = single_space.flat_dim_with_categories else: num_categories = self.flatten_categories new_shape = (num_categories,) else: new_shape = (single_space.flat_dim,) if self.flatten_categories is not False and type(single_space) == IntBox: class_ = FloatBox else: new_shape = self.new_shape[key] if isinstance(self.new_shape, dict) else self.new_shape # Check the batch/time rank options. if self.fold_time_rank is True: sanity_check_space(single_space, must_have_batch_rank=True, must_have_time_rank=True) ret[key] = class_( shape=single_space.shape if new_shape is None else new_shape, add_batch_rank=True, add_time_rank=False ) # Time rank should be unfolded from batch rank with the given dimension. elif self.unfold_time_rank is True: sanity_check_space(single_space, must_have_batch_rank=True, must_have_time_rank=False) ret[key] = class_( shape=single_space.shape if new_shape is None else new_shape, add_batch_rank=True, add_time_rank=True, time_major=self.time_major if self.time_major is not None else False ) # Only change the actual shape (leave batch/time ranks as is). else: # Do we flip batch and time ranks? time_major = single_space.time_major if self.flip_batch_and_time_rank is False else \ not single_space.time_major ret[key] = class_(shape=single_space.shape if new_shape is None else new_shape, add_batch_rank=single_space.has_batch_rank, add_time_rank=single_space.has_time_rank, time_major=time_major) ret = unflatten_op(ret) return ret
def check_input_spaces(self, input_spaces, action_space=None): super(MultiLSTMLayer, self).check_input_spaces(input_spaces, action_space) # Check correct tuple-internal-states format (if not None, in which case we assume all 0.0s). if "internal_states" in input_spaces: # Check that main space is a Tuple (one item for each layer). sanity_check_space(input_spaces["internal_states"], allowed_types=[Tuple]) # Check that each layer gets a tuple of 2 values: c- and h-states. for i in range(self.num_lstms): sanity_check_space(input_spaces["internal_states"][i], allowed_types=[Tuple], must_have_batch_rank=True, must_have_time_rank=False) assert len(input_spaces["internal_states"][i]) == 2,\ "ERROR: If internal_states are provided (which is the case), an LSTMLayer requires the len of " \ "this Tuple to be 2 (c- and h-states). Your Space is '{}'.".\ format(input_spaces["internal_states"][i]) # Check for batch AND time-rank. self.in_space = input_spaces["inputs"] sanity_check_space(self.in_space, must_have_batch_rank=True, must_have_time_rank=True)
def check_input_spaces(self, input_spaces, action_space=None): # Must be a Tuple of len 2 (loc and scale). in_space = input_spaces["parameters"] sanity_check_space(in_space, allowed_types=[Tuple]) assert len(in_space) == 2, "ERROR: Expected Tuple of len=2 as input Space to Normal!" sanity_check_space(in_space[0], allowed_types=[FloatBox]) sanity_check_space(in_space[1], allowed_types=[FloatBox])
def check_input_spaces(self, input_spaces, action_space=None): # Check the input Space. last_nn_layer_space = input_spaces["nn_output"] # type: Space sanity_check_space(last_nn_layer_space, non_allowed_types=[ContainerSpace]) # Check the action Space. sanity_check_space(self.action_space, must_have_batch_rank=True) if isinstance(self.action_space, IntBox): sanity_check_space(self.action_space, must_have_categories=True)
def check_input_spaces(self, input_spaces, action_space=None): # Must be a Tuple of len 2 (mean and stddev OR mean and full co-variance matrix). in_space = input_spaces["parameters"] sanity_check_space(in_space, allowed_types=[Tuple]) assert len(in_space) == 2, "ERROR: Expected Tuple of len=2 as input Space to MultivariateNormal!" sanity_check_space(in_space[0], allowed_types=[FloatBox]) sanity_check_space(in_space[1], allowed_types=[FloatBox]) if self.parameterize_via_diagonal: # Make sure mean and stddev have the same last rank. assert in_space[0].shape[-1] == in_space[1].shape[-1],\ "ERROR: `parameters` in_space must have the same last rank for mean as for (diagonal) stddev values!"
def check_input_spaces(self, input_spaces, action_space=None): # Check the input Space. last_nn_layer_space = input_spaces["nn_output"] # type: Space sanity_check_space(last_nn_layer_space, non_allowed_types=[ContainerSpace]) # Check the action Space. sanity_check_space(self.action_space, must_have_batch_rank=True) if isinstance(self.action_space, IntBox): sanity_check_space(self.action_space, must_have_categories=True) else: # Fixme: Are there other restraints on continuous action spaces? E.g. no dueling layers? pass
def check_input_spaces(self, input_spaces, action_space=None): # All the following need a batch rank. for in_space_name in ["log_probs_sampled", "log_probs_next_sampled", "q_values", "q_values_sampled", "q_values_next_sampled", "rewards", "terminals"]: in_space = input_spaces[in_space_name] sanity_check_space(in_space, must_have_batch_rank=True, must_have_time_rank=False) # All the following need shape==(). for in_space_name in ["alpha", "rewards", "terminals"]: in_space = input_spaces[in_space_name] sanity_check_space(in_space, shape=()) # All the following need shape==(1,). for in_space_name in ["q_values", "q_values_sampled", "q_values_next_sampled"]: in_space = input_spaces[in_space_name] sanity_check_space(in_space, shape=(1,))
def check_input_spaces(self, input_spaces, action_space=None): action_sample_space = input_spaces["actions"] if get_backend() == "tf": sanity_check_space(action_sample_space, must_have_batch_rank=True) assert action_space is not None self.action_space = action_space self.flat_action_space = action_space.flatten() if self.epsilon_exploration and self.noise_component: # Check again at graph creation? This is currently redundant to the check in __init__ raise RLGraphError("Cannot use both epsilon exploration and a noise component at the same time.") if self.epsilon_exploration: sanity_check_space(self.action_space, must_have_categories=True, num_categories=(1, None), allowed_sub_types=[IntBox]) elif self.noise_component: sanity_check_space(self.action_space, allowed_sub_types=[FloatBox])
def check_input_spaces(self, input_spaces, action_space=None): # Require at least a batch-rank in the incoming samples. self.sample_space = input_spaces["sample"] self.flat_sample_space = self.sample_space.flatten() if get_backend() == "tf": sanity_check_space(self.sample_space, must_have_batch_rank=True)
def check_input_spaces(self, input_spaces, action_space=None): ids_space = input_spaces["ids"] # Require int with batch-rank. sanity_check_space(ids_space, must_have_batch_rank=True, allowed_sub_types=[IntBox])
def check_input_spaces(self, input_spaces, action_space=None): # Check the input Space. last_nn_layer_space = input_spaces["nn_input"] # type: Space sanity_check_space(last_nn_layer_space, non_allowed_types=[ContainerSpace])
def check_input_spaces(self, input_spaces, action_space=None): super(CategoricalDistributionAdapter, self).check_input_spaces(input_spaces, action_space) # IntBoxes must have categories. sanity_check_space(self.action_space, must_have_categories=True)
def check_input_spaces(self, input_spaces, action_space=None): ids_space = input_spaces["ids"] # For now, require both batch- and time-ranks. sanity_check_space(ids_space, must_have_batch_rank=True, must_have_time_rank=True)
def check_input_spaces(self, input_spaces, action_space=None): ids_space = input_spaces["ids"] # Require a batch-rank. sanity_check_space(ids_space, must_have_batch_rank=True)
def check_input_spaces(self, input_spaces, action_space=None): super(Sequence, self).check_input_spaces(input_spaces, action_space) in_space = input_spaces["preprocessing_inputs"] # Require preprocessing_inputs to not have time rank (batch rank doesn't matter). sanity_check_space(in_space, must_have_time_rank=False)
def check_input_spaces(self, input_spaces, action_space=None): sanity_check_space(input_spaces["parameters"], must_have_batch_rank=True) sanity_check_space(input_spaces["labels"], must_have_batch_rank=True)