def _graph_fn_apply(self, preprocessing_inputs): if self.backend == "python" or get_backend() == "python": if isinstance(preprocessing_inputs, list): preprocessing_inputs = np.asarray(preprocessing_inputs) return preprocessing_inputs.astype(dtype=util.dtype(self.to_dtype, to="np")) elif get_backend() == "pytorch": return torch.tensor(preprocessing_inputs, dtype=util.dtype(self.to_dtype, to="pytorch")) elif get_backend() == "tf": to_dtype = util.dtype(self.to_dtype, to="tf") if preprocessing_inputs.dtype != to_dtype: return tf.cast(x=preprocessing_inputs, dtype=to_dtype) else: return preprocessing_inputs
def _graph_fn_get_noise(self): drift = self.theta * (self.mu - self.ou_state) if get_backend() == "tf": diffusion = self.sigma * tf.random_normal( shape=self.action_space.shape, dtype=dtype(self.action_space.dtype)) delta = drift + diffusion return tf.assign_add(ref=self.ou_state, value=delta)
def _graph_fn_get_distribution(self, parameters): """ Args: parameters (DataOp): The p value (probability that distribution returns True). """ if get_backend() == "tf": return tf.distributions.Bernoulli(probs=parameters, dtype=util.dtype("bool")) elif get_backend() == "pytorch": return torch.distributions.Bernoulli(probs=parameters)
def create_variables(self, input_spaces, action_space=None): # Create weights matrix and (maybe) biases vector. shape = (self.vocab_size, self.embed_dim) self.initializer = Initializer.from_spec(shape=shape, specification=self.initializer_spec) # TODO: For IMPALA partitioner is not needed. Do this later. self.embedding_matrix = self.get_variable( name="embedding-matrix", shape=shape, dtype=dtype("float"), initializer=self.initializer.initializer, #partitioner=self.partitioners, regularizer=self.regularizers, trainable=self.trainable ) self.ids_space = input_spaces["ids"]
def _graph_fn_pick(self, use_exploration, epsilon_decisions, sample): """ Exploration for discrete action spaces. Either pick a random action (if `use_exploration` and `epsilon_decision` are True), or return non-exploratory action. Args: use_exploration (DataOp): The master switch determining, whether to use exploration or not. epsilon_decisions (DataOp): The bool coming from the epsilon-exploration component specifying whether to use exploration or not (per batch item). sample (DataOp): The output from a distribution's "sample_deterministic" OR "sample_stochastic". Returns: DataOp: The DataOp representing the action. This will match the shape of self.action_space. """ if get_backend() == "tf": random_actions = tf.random_uniform( shape=tf.shape(sample), maxval=self.action_space.num_categories, dtype=dtype("int")) if use_exploration is False: return sample else: return tf.where( # `use_exploration` given as actual bool or as tensor? condition=epsilon_decisions if use_exploration is True else tf.logical_and(use_exploration, epsilon_decisions), x=random_actions, y=sample) elif get_backend() == "pytorch": # N.b. different order versus TF because we dont want to execute the sampling below. if use_exploration is False: return sample if self.sample_obj is None: # Don't create new sample objects very time. self.sample_obj = torch.distributions.Uniform( 0, self.action_space.num_categories) random_actions = self.sample_obj.sample(sample.shape).int() if use_exploration is True: return torch.where(epsilon_decisions, random_actions, sample) else: if not isinstance(use_exploration, torch.ByteTensor): use_exploration = use_exploration.byte() if not isinstance(epsilon_decisions, torch.ByteTensor): epsilon_decisions = epsilon_decisions.byte() return torch.where(use_exploration & epsilon_decisions, random_actions, sample)
def test_specifiable_server(self): action_space = IntBox(2) state_space = FloatBox() env_spec = dict(type="random_env", state_space=state_space, action_space=action_space, deterministic=True) # Create the server, but don't start it yet. This will be done fully automatically by the tf-Session. specifiable_server = SpecifiableServer(Environment, env_spec, dict( step_for_env_stepper=[state_space, float, bool] ), "terminate") # ret are ops now in the graph. ret1 = specifiable_server.step_for_env_stepper(action_space.sample()) ret2 = specifiable_server.step_for_env_stepper(action_space.sample()) # Check all 3 outputs of the Env step (next state, reward, terminal). self.assertEqual(ret1[0].shape, ()) self.assertEqual(ret1[0].dtype, dtype("float32")) self.assertEqual(ret1[1].shape, ()) self.assertEqual(ret1[1].dtype, dtype("float32")) self.assertEqual(ret1[2].shape, ()) self.assertEqual(ret1[2].dtype, dtype("bool")) self.assertEqual(ret2[0].shape, ()) self.assertEqual(ret2[0].dtype, dtype("float32")) self.assertEqual(ret2[1].shape, ()) self.assertEqual(ret2[1].dtype, dtype("float32")) self.assertEqual(ret2[2].shape, ()) self.assertEqual(ret2[2].dtype, dtype("bool")) # Start the session and run the op, then check its actual values. with tf.train.SingularMonitoredSession(hooks=[SpecifiableServerHook()]) as sess: out1 = sess.run(ret1) out2 = sess.run(ret2) # next state self.assertAlmostEqual(out1[0], 0.7713, places=4) self.assertAlmostEqual(out2[0], 0.7488, places=4) # reward self.assertAlmostEqual(out1[1], 0.0208, places=4) self.assertAlmostEqual(out2[1], 0.4985, places=4) # terminal self.assertTrue(out1[2] is np.bool_(False)) self.assertTrue(out2[2] is np.bool_(False))
def _graph_fn_sample_deterministic(self, distribution): if get_backend() == "tf": return tf.argmax(input=distribution.probs, axis=-1, output_type=util.dtype("int")) elif get_backend() == "pytorch": return torch.argmax(distribution.probs, dim=-1).int()
def _graph_fn_get_distribution(self, parameters): if get_backend() == "tf": return tf.distributions.Categorical(probs=parameters, dtype=util.dtype("int")) elif get_backend() == "pytorch": return torch.distributions.Categorical(probs=parameters)
def get_variable(self, name, is_input_feed=False, add_batch_rank=None, add_time_rank=None, time_major=None, is_python=False, local=False, **kwargs): add_batch_rank = self.has_batch_rank if add_batch_rank is None else add_batch_rank batch_rank = () if add_batch_rank is False else ( None, ) if add_batch_rank is True else (add_batch_rank, ) add_time_rank = self.has_time_rank if add_time_rank is None else add_time_rank time_rank = () if add_time_rank is False else ( None, ) if add_time_rank is True else (add_time_rank, ) time_major = self.time_major if time_major is None else time_major if time_major is False: shape = batch_rank + time_rank + self.shape else: shape = time_rank + batch_rank + self.shape if is_python is True or get_backend() == "python": if isinstance(add_batch_rank, int): if isinstance(add_time_rank, int): if time_major: var = [[0 for _ in range_(add_batch_rank)] for _ in range_(add_time_rank)] else: var = [[0 for _ in range_(add_time_rank)] for _ in range_(add_batch_rank)] else: var = [0 for _ in range_(add_batch_rank)] elif isinstance(add_time_rank, int): var = [0 for _ in range_(add_time_rank)] else: var = [] # Un-indent and just directly construct pytorch? if get_backend() == "pytorch" and is_input_feed: # Convert to PyTorch tensor because PyTorch cannot use return torch.zeros(shape) else: # TODO also convert? return var elif get_backend() == "tf": import tensorflow as tf # TODO: re-evaluate the cutting of a leading '/_?' (tf doesn't like it) name = re.sub(r'^/_?', "", name) if is_input_feed: return tf.placeholder(dtype=dtype(self.dtype), shape=shape, name=name) else: init_spec = kwargs.pop("initializer", None) # Bools should be initializable via 0 or not 0. if self.dtype == np.bool_ and isinstance( init_spec, (int, float)): init_spec = (init_spec != 0) if self.dtype == np.str_ and init_spec == 0: initializer = None else: initializer = Initializer.from_spec( shape=shape, specification=init_spec).initializer return tf.get_variable( name, shape=shape, dtype=dtype(self.dtype), initializer=initializer, collections=[ tf.GraphKeys.GLOBAL_VARIABLES if local is False else tf.GraphKeys.LOCAL_VARIABLES ], **kwargs)
def contains(self, sample): if self.shape == (): return isinstance(sample, (bool, np.bool_)) else: return dtype(sample.dtype, "np") == np.bool_
def get_space_from_op(op): """ Tries to re-create a Space object given some DataOp. This is useful for shape inference when passing a Socket's ops through a GraphFunction and auto-inferring the resulting shape/Space. Args: op (DataOp): The op to create a corresponding Space for. Returns: Space: The inferred Space object. """ # a Dict if isinstance(op, dict): # DataOpDict spec = {} add_batch_rank = False add_time_rank = False for key, value in op.items(): spec[key] = get_space_from_op(value) if spec[key].has_batch_rank: add_batch_rank = True if spec[key].has_time_rank: add_time_rank = True return Dict(spec, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank) # a Tuple elif isinstance(op, tuple): # DataOpTuple spec = [] add_batch_rank = False add_time_rank = False for i in op: space = get_space_from_op(i) if space == 0: return 0 spec.append(space) if spec[-1].has_batch_rank: add_batch_rank = True if spec[-1].has_time_rank: add_time_rank = True return Tuple(spec, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank) # primitive Space -> infer from op dtype and shape else: # Simple constant value DataOp (python type or an np.ndarray). assert not hasattr( op, "constant_value") # we should be done with this by now #if isinstance(op, SingleDataOp) and op.constant_value is not None: # value = op.constant_value # if isinstance(value, np.ndarray): # return BoxSpace.from_spec(spec=dtype(str(value.dtype), "np"), shape=value.shape) # Op itself is a single value, simple python type. if isinstance(op, (bool, int, float)): return BoxSpace.from_spec(spec=type(op), shape=()) # A single numpy array. elif isinstance(op, np.ndarray): return BoxSpace.from_spec(spec=dtype(str(op.dtype), "np"), shape=op.shape) # No Space: e.g. the tf.no_op, a distribution (anything that's not a tensor). # PyTorch Tensors do not have get_shape so must check backend. elif hasattr(op, "dtype") is False or (get_backend() == "tf" and not hasattr(op, "get_shape")): return 0 # Some tensor: can be converted into a BoxSpace. else: shape = get_shape(op, ) # Unknown shape (e.g. a cond op). if shape is None: return 0 add_batch_rank = False add_time_rank = False time_major = False new_shape = list(shape) # New way: Detect via op._batch_rank and op._time_rank properties where these ranks are. if hasattr(op, "_batch_rank") and isinstance(op._batch_rank, int): add_batch_rank = True new_shape[op._batch_rank] = -1 # elif get_backend() == "pytorch": # if isinstance(op, torch.Tensor): # if op.dim() > 1 and shape[0] == 1: # add_batch_rank = True # new_shape[0] = 1 if hasattr(op, "_time_rank") and isinstance(op._time_rank, int): add_time_rank = True if op._time_rank == 0: time_major = True new_shape[op._time_rank] = -1 shape = tuple(n for n in new_shape if n != -1) # Old way: Detect automatically whether the first rank(s) are batch and/or time rank. if add_batch_rank is False and add_time_rank is False and shape != ( ) and shape[0] is None: if len(shape) > 1 and shape[1] is None: #raise RLGraphError( # "ERROR: Cannot determine time-major flag if both batch- and time-ranks are in an op w/o saying " # "which rank goes to which position!" #) shape = shape[2:] add_time_rank = True else: shape = shape[1:] add_batch_rank = True base_dtype = op.dtype.base_dtype if hasattr( op.dtype, "base_dtype") else op.dtype # PyTorch does not have a bool type if get_backend() == "pytorch": if op.dtype is torch.uint8: base_dtype = bool base_dtype_str = str(base_dtype) # FloatBox if "float" in base_dtype_str: return FloatBox(shape=shape, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank, time_major=time_major, dtype=dtype(base_dtype, "np")) # IntBox elif "int" in base_dtype_str: return IntBox(shape=shape, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank, time_major=time_major, dtype=dtype(base_dtype, "np")) # a BoolBox elif "bool" in base_dtype_str: return BoolBox(shape=shape, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank, time_major=time_major) # a TextBox elif "string" in base_dtype_str: return TextBox(shape=shape, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank, time_major=time_major) raise RLGraphError( "ERROR: Cannot derive Space from op '{}' (unknown type?)!".format(op))
def call(*args): if isinstance(self.output_spaces, dict): assert method_name in self.output_spaces, "ERROR: Method '{}' not specified in output_spaces: {}!".\ format(method_name, self.output_spaces) specs = self.output_spaces[method_name] else: specs = self.output_spaces(method_name) if specs is None: raise RLGraphError( "No Space information received for method '{}:{}'".format(self.class_.__name__, method_name) ) dtypes = [] shapes = [] return_slots = [] for i, space in enumerate(force_list(specs)): assert not isinstance(space, ContainerSpace) # Expecting an op (space 0). if space == 0: dtypes.append(0) shapes.append(0) return_slots.append(i) # Expecting a tensor. elif space is not None: dtypes.append(dtype(space.dtype)) shapes.append(space.shape) return_slots.append(i) if get_backend() == "tf": # This function will send the method-call-comment via the out-pipe to the remote (server) Specifiable # object - all in-graph - and return the results to be used further by other graph ops. def py_call(*args_): args_ = [arg.decode('UTF-8') if isinstance(arg, bytes) else arg for arg in args_] try: self.out_pipe.send(args_) result_ = self.out_pipe.recv() # If an error occurred, it'll be passed back through the pipe. if isinstance(result_, Exception): raise result_ elif result_ is not None: return result_ except Exception as e: if isinstance(e, IOError): raise StopIteration() # Clean exit. else: raise results = tf.py_func(py_call, (method_name,) + tuple(args), dtypes, name=method_name) # Force known shapes on the returned tensors. for i, (result, shape) in enumerate(zip(results, shapes)): # Not an op (which have shape=0). if shape != 0: result.set_shape(shape) else: raise NotImplementedError return results[0] if len(dtypes) == 1 else tuple(results)
def _graph_fn_decayed_value(self, time_step): """ Args: time_step (DataOp): The int-type DataOp that holds the current global time_step. Returns: DataOp: The decay'd value depending on the current time step. """ if get_backend() == "tf": smaller_than_start = time_step <= self.start_timestep shape = tf.shape(time_step) # time_step comes in as a time-sequence of time-steps. if shape.shape[0] > 0: return tf.where( condition=smaller_than_start, # We are still in pre-decay time. x=tf.tile(tf.constant([self.from_]), multiples=shape), # We are past pre-decay time. y=tf.where( condition=(time_step >= self.start_timestep + self.num_timesteps), # We are in post-decay time. x=tf.tile(tf.constant([self.to_]), multiples=shape), # We are inside the decay time window. y=self._graph_fn_decay( tf.cast(x=time_step - self.start_timestep, dtype=util.dtype("float"))), name="cond-past-end-time"), name="cond-before-start-time") # Single 0D time step. else: return tf.cond( pred=smaller_than_start, # We are still in pre-decay time. true_fn=lambda: self.from_, # We are past pre-decay time. false_fn=lambda: tf.cond( pred=(time_step >= self.start_timestep + self. num_timesteps), # We are in post-decay time. true_fn=lambda: self.to_, # We are inside the decay time window. false_fn=lambda: self._graph_fn_decay( tf.cast(x=time_step - self.start_timestep, dtype=util.dtype("float"))), ), ) elif get_backend() == "pytorch": if time_step is None: time_step = torch.tensor([0]) smaller_than_start = time_step <= self.start_timestep if time_step.dim() == 0: time_step = time_step.unsqueeze(-1) shape = time_step.shape # time_step comes in as a time-sequence of time-steps. # TODO tile shape is confusing -> num tiles should be shape[0] not shape? if shape[0] > 0: past_decay = torch.where( (time_step >= self.start_timestep + self.num_timesteps), # We are in post-decay time. pytorch_tile(torch.tensor([self.to_]), shape), # We are inside the decay time window. torch.tensor( self._graph_fn_decay( torch.FloatTensor( [time_step - self.start_timestep])))) return torch.where( smaller_than_start, # We are still in pre-decay time. pytorch_tile(torch.tensor([self.from_]), shape), # We are past pre-decay time. past_decay) # Single 0D time step. else: if smaller_than_start: return self.from_ else: if time_step >= self.start_timestep + self.num_timesteps: return self.to_ else: return self._graph_fn_decay( torch.FloatTensor( [time_step - self.start_timestep]))
def _graph_fn_get_noise(self): if get_backend() == "tf": return tf.random_normal(shape=(1, ) + self.action_space.shape, mean=self.mean, stddev=self.stddev, dtype=dtype(self.action_space.dtype))
def __init__(self, shape, specification=None, **kwargs): """ Args: shape (tuple): The shape of the Variables to initialize. specification (any): A spec that determines the nature of this initializer. Raises: RLGraphError: If a fixed shape in `specification` does not match `shape`. """ super(Initializer, self).__init__() # The shape of the variable to be initialized. self.shape = shape # The actual underlying initializer object. self.initializer = None # Truncated Normal. if specification == "truncated_normal": if get_backend() == "tf": # Use the first dimension (num_rows or batch rank) to figure out the stddev. stddev = 1 / math.sqrt(shape[0] if isinstance( shape, (tuple, list)) and len(shape) > 0 else 1.0) self.initializer = tf.truncated_normal_initializer( stddev=stddev) elif get_backend() == "pytorch": stddev = 1 / math.sqrt(shape[0] if isinstance( shape, (tuple, list)) and len(shape) > 0 else 1.0) self.initializer = lambda t: torch.nn.init.normal_(tensor=t, std=stddev) # No spec -> Leave initializer as None for TF (will then use default; # e.g. for tf weights: Xavier uniform). For PyTorch, still have to set Xavier. # TODO this is None or is False is very unclean because TF and PT have different defaults -> # change to clean default values for weights and biases. elif specification is None or specification is False: if get_backend() == "tf": pass elif get_backend() == "pytorch": self.initializer = torch.nn.init.xavier_uniform_ # Fixed values spec -> Use them, just do sanity checking. else: # Constant value across the variable. if isinstance(specification, (float, int)): pass # A 1D initializer (e.g. for biases). elif isinstance(specification, list): array = np.asarray(specification, dtype=dtype("float32", "np")) if array.shape != self.shape: raise RLGraphError( "ERROR: Number/shape of given items ({}) not identical with shape ({})!" .format(array.shape, self.shape)) # A nD initializer (numpy-array). elif isinstance(specification, np.ndarray): if specification.shape != self.shape: raise RLGraphError( "ERROR: Shape of given items ({}) not identical with shape ({})!" .format(specification.shape, self.shape)) # Unknown type. else: raise RLGraphError( "ERROR: Bad specification given ({}) for Initializer object!" .format(specification)) # Create the backend initializer object. if get_backend() == "tf": self.initializer = tf.constant_initializer( value=specification, dtype=dtype("float32")) elif get_backend() == "pytorch": self.initializer = lambda t: torch.nn.init.constant_( tensor=t, val=specification)