def true_fn(): batch_size = 1 req = force_tuple( action_dist.required_model_output_shape( self.action_space, getattr(self.model, "model_config", None))) # Add a batch dimension? if len(action_dist.inputs.shape) == len(req) + 1: batch_size = tf.shape(action_dist.inputs)[0] # Function to produce random samples from primitive space # components: (Multi)Discrete or Box. def random_component(component): if isinstance(component, Discrete): return tf.random.uniform( shape=(batch_size, ) + component.shape, maxval=component.n, dtype=component.dtype) elif isinstance(component, MultiDiscrete): return tf.concat( [ tf.random.uniform( shape=(batch_size, 1), maxval=n, dtype=component.dtype) for n in component.nvec ], axis=1) elif isinstance(component, Box): if component.bounded_above.all() and \ component.bounded_below.all(): if component.dtype.name.startswith("int"): return tf.random.uniform( shape=(batch_size, ) + component.shape, minval=component.low.flat[0], maxval=component.high.flat[0], dtype=component.dtype) else: return tf.random.uniform( shape=(batch_size, ) + component.shape, minval=component.low, maxval=component.high, dtype=component.dtype) else: return tf.random.normal( shape=(batch_size, ) + component.shape, dtype=component.dtype) else: assert isinstance(component, Simplex), \ "Unsupported distribution component '{}' for random " \ "sampling!".format(component) return tf.nn.softmax( tf.random.uniform( shape=(batch_size, ) + component.shape, minval=0.0, maxval=1.0, dtype=component.dtype)) actions = tree.map_structure(random_component, self.action_space_struct) return actions
def get_torch_exploration_action(self, action_dist, explore): if explore: # Unsqueeze will be unnecessary, once we support batch/time-aware # Spaces. a = self.action_space.sample() req = force_tuple( action_dist.required_model_output_shape( self.action_space, self.model.model_config)) # Add a batch dimension. if len(action_dist.inputs.shape) == len(req) + 1: a = np.expand_dims(a, 0) action = torch.from_numpy(a).to(self.device) else: action = action_dist.deterministic_sample() logp = torch.zeros( (action.size()[0], ), dtype=torch.float32, device=self.device) return action, logp
def get_torch_exploration_action(self, action_dist, explore): if explore: req = force_tuple( action_dist.required_model_output_shape( self.action_space, self.model.model_config)) # Add a batch dimension? if len(action_dist.inputs.shape) == len(req) + 1: batch_size = action_dist.inputs.shape[0] a = np.stack( [self.action_space.sample() for _ in range(batch_size)]) else: a = self.action_space.sample() # Convert action to torch tensor. action = torch.from_numpy(a).to(self.device) else: action = action_dist.deterministic_sample() logp = torch.zeros((action.size()[0], ), dtype=torch.float32, device=self.device) return action, logp
def true_fn(): batch_size = 1 req = force_tuple( action_dist.required_model_output_shape( self.action_space, self.model.model_config)) # Add a batch dimension? if len(action_dist.inputs.shape) == len(req) + 1: batch_size = tf.shape(action_dist.inputs)[0] # Function to produce random samples from primitive space # components: (Multi)Discrete or Box. def random_component(component): if isinstance(component, Discrete): return tf.random.uniform(shape=(batch_size, ) + component.shape, maxval=component.n, dtype=component.dtype) elif isinstance(component, MultiDiscrete): return tf.concat([ tf.random.uniform(shape=(batch_size, 1), maxval=n, dtype=component.dtype) for n in component.nvec ], axis=1) elif isinstance(component, Box): if component.bounded_above.all() and \ component.bounded_below.all(): return tf.random.uniform(shape=(batch_size, ) + component.shape, minval=component.low, maxval=component.high, dtype=component.dtype) else: return tf.random.normal(shape=(batch_size, ) + component.shape, dtype=component.dtype) actions = tree.map_structure(random_component, self.action_space_struct) return actions