def try_space_inference_from_list(list_op): """ Attempts to infer shape space from a list op. A list op may be the result of fetching state from a Python memory. Args: list_op (list): List with arbitrary sub-structure. Returns: Space: Inferred Space object represented by list. """ if get_backend() == "pytorch": batch_shape = len(list_op) if batch_shape > 0: # Try to infer more things by looking inside list. elem = list_op[0] if isinstance(elem, torch.Tensor): list_type = elem.dtype inner_shape = elem.shape return BoxSpace.from_spec(spec=convert_dtype(list_type, "np"), shape=(batch_shape, ) + inner_shape, add_batch_rank=True) elif isinstance(elem, list): inner_shape = len(elem) return BoxSpace.from_spec(spec=convert_dtype(float, "np"), shape=(batch_shape, inner_shape), add_batch_rank=True) else: # Most general guess is a Float box. return FloatBox(shape=(batch_shape, )) else: raise ValueError( "List inference should only be attempted on the Python backend.")
def _graph_fn_call(self, inputs): if self.backend == "python" or get_backend() == "python": if isinstance(inputs, list): inputs = np.asarray(inputs) return inputs.astype( dtype=util.convert_dtype(self.to_dtype, to="np")) elif get_backend() == "pytorch": torch_dtype = util.convert_dtype(self.to_dtype, to="pytorch") if torch_dtype == torch.float or torch.float32: return inputs.float() elif torch_dtype == torch.int or torch.int32: return inputs.int() elif torch_dtype == torch.uint8: return inputs.byte() elif get_backend() == "tf": in_space = get_space_from_op(inputs) to_dtype = util.convert_dtype(self.to_dtype, to="tf") if inputs.dtype != to_dtype: ret = tf.cast(x=inputs, dtype=to_dtype) if in_space.has_batch_rank is True: ret._batch_rank = 0 if in_space.time_major is False else 1 if in_space.has_time_rank is True: ret._time_rank = 0 if in_space.time_major is True else 1 return ret else: return inputs
def try_space_inference_from_list(list_op, dtype=None, **low_high): """ Attempts to infer shape space from a list op. A list op may be the result of fetching state from a Python memory. Args: list_op (list): List with arbitrary sub-structure. Returns: Space: Inferred Space object represented by list. """ shape = len(list_op) if shape > 0: # Try to infer more things by looking inside list. elem = list_op[0] if (get_backend() == "pytorch" and isinstance(elem, torch.Tensor)) or \ get_backend() == "tf" and isinstance(elem, tf.Tensor): list_type = dtype or elem.dtype inner_shape = elem.shape return BoxSpace.from_spec(spec=convert_dtype(list_type, "np"), shape=(shape, ) + inner_shape, add_batch_rank=True, **low_high) elif isinstance(elem, list): inner_shape = len(elem) return BoxSpace.from_spec(spec=convert_dtype(dtype or float, "np"), shape=(shape, inner_shape), add_batch_rank=True, **low_high) # IntBox -> elem must be int and dtype hint must match (or None). elif isinstance(elem, int) and (dtype is None or dtype == "int"): # In case of missing comma values, check all other items in list for float. # If one float in there -> FloatBox, otherwise -> IntBox. has_floats = any(isinstance(el, float) for el in list_op) if has_floats is False: return IntBox.from_spec(shape=(shape, ), add_batch_rank=True, **low_high) else: return FloatBox.from_spec(shape=(shape, ), add_batch_rank=True, **low_high) # FloatBox -> elem must be float (or int) and dtype hint must match (or None). elif isinstance(elem, (float, int)) and (dtype is None or dtype == "float"): return FloatBox.from_spec(shape=(shape, ), add_batch_rank=True, **low_high) # Most general guess is a Float box. return FloatBox(shape=(shape, ), **low_high)
def update(self, batch=None): # In apex, syncing is based on num steps trained, not steps sampled. sync_call = None # Apex uses train time steps for syncing. self.steps_since_target_net_sync += len(batch["terminals"]) if self.steps_since_target_net_sync >= self.update_spec["sync_interval"]: sync_call = "sync_target_qnet" self.steps_since_target_net_sync = 0 return_ops = [0, 1] self.num_updates += 1 if batch is None: # Add some additional return-ops to pull (left out normally for performance reasons). ret = self.graph_executor.execute(("update_from_memory", None, return_ops), sync_call) # Remove unnecessary return dicts (e.g. sync-op). if isinstance(ret, dict): ret = ret["update_from_memory"] if self.store_last_q_table is True: q_table = dict( states=ret[3]["states"], q_values=ret[4] ) self.last_q_table = q_table return ret[1] else: # Add some additional return-ops to pull (left out normally for performance reasons). pps_dtype = self.preprocessed_state_space.dtype batch_input = [np.asarray(batch["states"], dtype=util.convert_dtype(dtype=pps_dtype, to='np')), batch["actions"], batch["rewards"], batch["terminals"], np.asarray(batch["next_states"], dtype=util.convert_dtype(dtype=pps_dtype, to='np')), batch["importance_weights"], True] ret = self.graph_executor.execute(("update_from_external_batch", batch_input), sync_call) # Remove unnecessary return dicts (e.g. sync-op). if isinstance(ret, dict): ret = ret["update_from_external_batch"] if self.store_last_q_table is True: q_table = dict( states=batch["states"], q_values=ret[3] ) self.last_q_table = q_table # Return [1]=total loss, [2]=loss-per-item (skip [0]=update noop). return ret[1], ret[2]
def _batch_process_sample(self, states, actions, rewards, next_states, terminals): """ Batch Post-processes sample, e.g. by computing priority weights, and compressing. Args: states (list): List of states. actions (list, dict): List of actions or dict of lists for container actions. rewards (list): List of rewards. next_states: (list): List of next_states. terminals (list): List of terminals. Returns: dict: Sample batch dict. """ weights = np.ones_like(rewards) # Compute loss-per-item. if self.worker_executes_postprocessing: # Next states were just collected, we batch process them here. _, loss_per_item = self.agent.post_process( dict(states=states, actions=actions, rewards=rewards, terminals=terminals, next_states=next_states, importance_weights=weights)) weights = np.abs(loss_per_item) + SMALL_NUMBER env_dtype = self.vector_env.state_space.dtype compressed_states = [ ray_compress( np.asarray(state, dtype=util.convert_dtype(dtype=env_dtype, to='np'))) for state in states ] compressed_next_states = compressed_states[self.n_step_adjustment:] + \ [ray_compress(np.asarray(next_s,dtype=util.convert_dtype(dtype=env_dtype, to='np'))) for next_s in next_states[-self.n_step_adjustment:]] if self.container_actions: for name in self.action_space.keys(): actions[name] = np.array(actions[name]) else: actions = np.array(actions) return dict(states=compressed_states, actions=actions, rewards=np.array(rewards), terminals=np.array(terminals), next_states=compressed_next_states, importance_weights=np.array(weights)), len(rewards)
def _graph_fn_get_records(self, num_records=1): if get_backend() == "tf": size = self.read_variable(self.size) # Sample and retrieve a random range, including terminals. index = self.read_variable(self.index) indices = tf.random_uniform(shape=(num_records,), maxval=size, dtype=tf.int32) indices = (index - 1 - indices) % self.capacity # Return default importance weight one. return self._read_records(indices=indices), indices, tf.ones_like(tensor=indices, dtype=tf.float32) elif get_backend() == "pytorch": indices = [] if self. size > 0: indices = np.random.choice(np.arange(0, self.size), size=int(num_records)) indices = (self.index - 1 - indices) % self.capacity records = OrderedDict() for name, variable in self.memory.items(): records[name] = self.read_variable(variable, indices, dtype= util.convert_dtype(self.flat_record_space[name].dtype, to="pytorch"), shape=self.flat_record_space[name].shape) records = define_by_run_unflatten(records) weights = torch.ones(indices.shape, dtype=torch.float32) if len(indices) > 0 \ else torch.ones(1, dtype=torch.float32) return records, indices, weights
def _graph_fn_sample_deterministic(self, distribution): if get_backend() == "tf": return tf.argmax(input=distribution.probs, axis=-1, output_type=util.convert_dtype("int")) elif get_backend() == "pytorch": return torch.argmax(distribution.probs, dim=-1).int()
def _graph_fn_get_noise(self): if get_backend() == "tf": return tf.random_normal(shape=(1, ) + self.action_space.shape, mean=self.mean, stddev=self.stddev, dtype=convert_dtype( self.action_space.dtype))
def _graph_fn_get_records(self, num_records=1): available_records = min(num_records, self.size) indices = [] prob_sum = self.merged_segment_tree.sum_segment_tree.get_sum(0, self.size - 1) samples = np.random.random(size=(available_records,)) * prob_sum for sample in samples: indices.append(self.merged_segment_tree.sum_segment_tree.index_of_prefixsum(prefix_sum=sample)) sum_prob = self.merged_segment_tree.sum_segment_tree.get_sum() + SMALL_NUMBER min_prob = self.merged_segment_tree.min_segment_tree.get_min_value() / sum_prob max_weight = (min_prob * self.size) ** (-self.beta) weights = [] for index in indices: sample_prob = self.merged_segment_tree.sum_segment_tree.get(index) / sum_prob weight = (sample_prob * self.size) ** (-self.beta) weights.append(weight / max_weight) if get_backend() == "pytorch": indices = torch.tensor(indices) weights = torch.tensor(weights) else: indices = np.asarray(indices) weights = np.asarray(weights) records = OrderedDict() for name, variable in self.record_registry.items(): records[name] = self.read_variable(variable, indices, dtype= util.convert_dtype(self.flat_record_space[name].dtype, to="pytorch")) records = define_by_run_unflatten(records) return records, indices, weights
def _process_policy_trajectories(self, states, actions, rewards, terminals, sequence_indices): """ Post-processes policy trajectories. """ if self.worker_executes_postprocessing: rewards = self.agent.post_process( dict(states=states, rewards=rewards, terminals=terminals, sequence_indices=sequence_indices)) if self.compress: env_dtype = self.vector_env.state_space.dtype states = [ ray_compress( np.asarray(state, dtype=util.convert_dtype(dtype=env_dtype, to='np'))) for state in states ] return dict(states=states, actions=actions, rewards=rewards, terminals=terminals), len(rewards)
def _graph_fn_get_noise(self): drift = self.theta * (self.mu - self.ou_state) if get_backend() == "tf": diffusion = self.sigma * tf.random_normal( shape=self.action_space.shape, dtype=convert_dtype(self.action_space.dtype) ) delta = drift + diffusion return tf.assign_add(ref=self.ou_state, value=delta)
def update(self, batch=None, time_percentage=None, sequence_indices=None, apply_postprocessing=True): """ Args: sequence_indices (Optional[np.ndarray, list]): Sequence indices are used in multi-env batches where partial episode fragments may be concatenated within the trajectory. For a single env, these are equal to terminals. If None are given, terminals will be used as sequence indices. A sequence index is True where an episode fragment ends and False otherwise. The reason separate indices are necessary is so that e.g. in GAE discounting, correct boot-strapping is applied depending on whether a true terminal state was reached, or a partial episode fragment of an environment ended. Example: If env_1 has terminals [0 0 0] for an episode fragment and env_2 terminals = [0 0 1], we may pass them in as one combined array [0 0 0 0 0 1] with sequence indices showing where each episode ends: [0 0 1 0 0 1]. apply_postprocessing (Optional[(bool]): If True, apply post-processing such as generalised advantage estimation to collected batch in-graph. If False, update assumed post-processing has already been applied. The purpose of internal versus external post-processing is to be able to off-load post-processing in large scale distributed scenarios. """ # TODO: Move update_spec to Worker. Agent should not hold these execution details. if time_percentage is None: time_percentage = self.timesteps / self.update_spec.get( "max_timesteps", 1e6) # [0] = the loss; [1] = loss-per-item, [2] = vf-loss, [3] = vf-loss- per item return_ops = [0, 1, 2, 3] if batch is None: ret = self.graph_executor.execute( ("update_from_memory", [True, time_percentage], return_ops)) # Remove unnecessary return dicts (e.g. sync-op). if isinstance(ret, dict): ret = ret["update_from_memory"] else: # No sequence indices means terminals are used in place. if sequence_indices is None: sequence_indices = batch["terminals"] pps_dtype = self.preprocessed_state_space.dtype batch["states"] = np.asarray(batch["states"], dtype=util.convert_dtype( dtype=pps_dtype, to='np')) ret = self.graph_executor.execute(("update_from_external_batch", [ batch["states"], batch["actions"], batch["rewards"], batch["terminals"], sequence_indices, apply_postprocessing, time_percentage ], return_ops)) # Remove unnecessary return dicts (e.g. sync-op). if isinstance(ret, dict): ret = ret["update_from_external_batch"] # [0] loss, [1] loss per item return ret[0], ret[1]
def _graph_fn_get_distribution(self, parameters): """ Args: parameters (DataOp): The p value (probability that distribution returns True). """ if get_backend() == "tf": return tf.distributions.Bernoulli(probs=parameters, dtype=util.convert_dtype("bool")) elif get_backend() == "pytorch": return torch.distributions.Bernoulli(probs=parameters)
def create_variables(self, input_spaces, action_space=None): # Create weights matrix and (maybe) biases vector. shape = (self.vocab_size, self.embed_dim) self.initializer = Initializer.from_spec(shape=shape, specification=self.initializer_spec) # TODO: For IMPALA partitioner is not needed. Do this later. self.embedding_matrix = self.get_variable( name="embedding-matrix", shape=shape, dtype=convert_dtype("float"), initializer=self.initializer.initializer, #partitioner=self.partitioners, regularizer=self.regularizers, trainable=self.trainable ) self.ids_space = input_spaces["ids"]
def _graph_fn_get_episodes(self, num_episodes=1): if get_backend() == "tf": stored_episodes = self.read_variable(self.num_episodes) available_episodes = tf.minimum(x=num_episodes, y=stored_episodes) # Say we have two episodes with this layout: # terminals = [0 0 1 0 1] # episode_indices = [2, 4] # If we want to fetch the most recent episode, the start index is: # stored_episodes - 1 - num_episodes = 2 - 1 - 1 = 0, which points to buffer index 2 # The next episode starts one element after this, hence + 1. # However, this points to index -1 if stored_episodes = available_episodes, # in this case we want start = 0 to get everything. start = tf.cond(pred=tf.equal(x=stored_episodes, y=available_episodes), true_fn=lambda: 0, false_fn=lambda: self.episode_indices[ stored_episodes - available_episodes - 1] + 1) # End index is just the pointer to the most recent episode. limit = self.episode_indices[stored_episodes - 1] limit += tf.where(condition=(start < limit), x=0, y=self.capacity - 1) # limit = tf.Print(limit, [stored_episodes, start, limit], summarize=100, message="start | limit") indices = tf.range(start=start, limit=limit + 1) % self.capacity return self._read_records(indices=indices) elif get_backend() == "pytorch": stored_episodes = self.num_episodes available_episodes = min(num_episodes, self.num_episodes) if stored_episodes == available_episodes: start = 0 else: start = self.episode_indices[stored_episodes - available_episodes - 1] + 1 # End index is just the pointer to the most recent episode. limit = self.episode_indices[stored_episodes - 1] if start >= limit: limit += self.capacity - 1 indices = torch.arange(start, limit + 1) % self.capacity records = DataOpDict() for name, variable in self.memory.items(): records[name] = self.read_variable( variable, indices, dtype=util.convert_dtype( self.flat_record_space[name].dtype, to="pytorch"), shape=self.flat_record_space[name].shape) records = define_by_run_unflatten(records) return records
def _graph_fn_pick(self, key, use_exploration, epsilon_decisions, sample): """ Exploration for discrete action spaces. Either pick a random action (if `use_exploration` and `epsilon_decision` are True), or return non-exploratory action. Args: use_exploration (DataOp): The master switch determining, whether to use exploration or not. epsilon_decisions (DataOp): The bool coming from the epsilon-exploration component specifying whether to use exploration or not (per batch item). sample (DataOp): The output from a distribution's "sample_deterministic" OR "sample_stochastic". Returns: DataOp: The DataOp representing the action. This will match the shape of self.action_space. """ if get_backend() == "tf": if use_exploration is False: return sample else: random_actions = tf.random_uniform( shape=tf.shape(sample), maxval=self.flat_action_space[key].num_categories, dtype=convert_dtype("int") ) return tf.where( # `use_exploration` given as actual bool or as tensor? condition=epsilon_decisions if use_exploration is True else tf.logical_and( use_exploration, epsilon_decisions ), x=random_actions, y=sample ) elif get_backend() == "pytorch": # N.b. different order versus TF because we dont want to execute the sampling below. if use_exploration is False: return sample if self.sample_obj is None: # Don't create new sample objects very time. self.sample_obj = torch.distributions.Uniform(0, self.flat_action_space[key].num_categories) random_actions = self.sample_obj.sample(sample.shape).int() if use_exploration is True: return torch.where(epsilon_decisions, random_actions, sample) else: if not isinstance(use_exploration, torch.ByteTensor): use_exploration = use_exploration.byte() if not isinstance(epsilon_decisions, torch.ByteTensor): epsilon_decisions = epsilon_decisions.byte() return torch.where(use_exploration & epsilon_decisions, random_actions, sample)
def _graph_fn_get_records(self, num_records=1): if get_backend() == "tf": index = self.read_variable(self.index) indices = tf.range(start=index - num_records, limit=index) % self.capacity return self._read_records(indices=indices) elif get_backend() == "pytorch": indices = np.arange(self.index - num_records, self.index) % self.capacity records = OrderedDict() for name, variable in self.record_registry.items(): records[name] = self.read_variable( variable, indices, dtype=util.convert_dtype(self.record_space[name].dtype, to="pytorch")) return records
def _graph_fn_get_records(self, num_records=1): if get_backend() == "tf": stored_records = self.read_variable(self.size) available_records = tf.minimum(x=num_records, y=stored_records) index = self.read_variable(self.index) indices = tf.range(start=index - available_records, limit=index) % self.capacity return self._read_records(indices=indices) elif get_backend() == "pytorch": available_records = min(num_records, self.size) indices = np.arange(self.index - available_records, self.index) % self.capacity records = DataOpDict() for name, variable in self.memory.items(): records[name] = self.read_variable(variable, indices, dtype= util.convert_dtype(self.flat_record_space[name].dtype, to="pytorch"), shape=self.flat_record_space[name].shape) records = define_by_run_unflatten(records) return records
def test_specifiable_server(self): action_space = IntBox(2) state_space = FloatBox() env_spec = dict(type="random_env", state_space=state_space, action_space=action_space, deterministic=True) # Create the server, but don't start it yet. This will be done fully automatically by the tf-Session. specifiable_server = SpecifiableServer( Environment, env_spec, dict(step_flow=[state_space, float, bool]), "terminate") # ret are ops now in the graph. ret1 = specifiable_server.step_flow(action_space.sample()) ret2 = specifiable_server.step_flow(action_space.sample()) # Check all 3 outputs of the Env step (next state, reward, terminal). self.assertEqual(ret1[0].shape, ()) self.assertEqual(ret1[0].dtype, convert_dtype("float32")) self.assertEqual(ret1[1].shape, ()) self.assertEqual(ret1[1].dtype, convert_dtype("float32")) self.assertEqual(ret1[2].shape, ()) self.assertEqual(ret1[2].dtype, convert_dtype("bool")) self.assertEqual(ret2[0].shape, ()) self.assertEqual(ret2[0].dtype, convert_dtype("float32")) self.assertEqual(ret2[1].shape, ()) self.assertEqual(ret2[1].dtype, convert_dtype("float32")) self.assertEqual(ret2[2].shape, ()) self.assertEqual(ret2[2].dtype, convert_dtype("bool")) # Start the session and run the op, then check its actual values. with tf.train.SingularMonitoredSession( hooks=[SpecifiableServerHook()]) as sess: out1 = sess.run(ret1) out2 = sess.run(ret2) # next state self.assertAlmostEqual(out1[0], 0.7713, places=4) self.assertAlmostEqual(out2[0], 0.7488, places=4) # reward self.assertAlmostEqual(out1[1], 0.0208, places=4) self.assertAlmostEqual(out2[1], 0.4985, places=4) # terminal self.assertTrue(out1[2] is np.bool_(False)) self.assertTrue(out2[2] is np.bool_(False))
def get_variable(self, name, is_input_feed=False, add_batch_rank=None, add_time_rank=None, time_major=None, is_python=False, local=False, **kwargs): add_batch_rank = self.has_batch_rank if add_batch_rank is None else add_batch_rank batch_rank = () if add_batch_rank is False else (None,) if add_batch_rank is True else (add_batch_rank,) add_time_rank = self.has_time_rank if add_time_rank is None else add_time_rank time_rank = () if add_time_rank is False else (None,) if add_time_rank is True else (add_time_rank,) time_major = self.time_major if time_major is None else time_major if time_major is False: shape = batch_rank + time_rank + self.shape else: shape = time_rank + batch_rank + self.shape if is_python is True or get_backend() == "python": if isinstance(add_batch_rank, int): if isinstance(add_time_rank, int): if time_major: var = [[0 for _ in range_(add_batch_rank)] for _ in range_(add_time_rank)] else: var = [[0 for _ in range_(add_time_rank)] for _ in range_(add_batch_rank)] else: var = [0 for _ in range_(add_batch_rank)] elif isinstance(add_time_rank, int): var = [0 for _ in range_(add_time_rank)] else: var = [] # Un-indent and just directly construct pytorch? if get_backend() == "pytorch" and is_input_feed: # Convert to PyTorch tensors as a faux placehodler. return torch.zeros(shape, dtype=convert_dtype(dtype=self.dtype, to="pytorch")) else: # TODO also convert? return var elif get_backend() == "tf": # TODO: re-evaluate the cutting of a leading '/_?' (tf doesn't like it) name = re.sub(r'^/_?', "", name) if is_input_feed: variable = tf.placeholder(dtype=convert_dtype(self.dtype), shape=shape, name=name) else: init_spec = kwargs.pop("initializer", None) # Bools should be initializable via 0 or not 0. if self.dtype == np.bool_ and isinstance(init_spec, (int, float)): init_spec = (init_spec != 0) if self.dtype == np.str_ and init_spec == 0: initializer = None else: initializer = Initializer.from_spec(shape=shape, specification=init_spec).initializer variable = tf.get_variable( name, shape=shape, dtype=convert_dtype(self.dtype), initializer=initializer, collections=[tf.GraphKeys.GLOBAL_VARIABLES if local is False else tf.GraphKeys.LOCAL_VARIABLES], **kwargs ) # Add batch/time rank flags to the op. if self.has_batch_rank: variable._batch_rank = 0 if self.time_major is False else 1 if self.has_time_rank: variable._time_rank = 1 if self.time_major is False else 0 return variable
def _graph_fn_decayed_value(self, time_step): """ Args: time_step (DataOp): The int-type DataOp that holds the current global time_step. Returns: DataOp: The decay'd value depending on the current time step. """ if get_backend() == "tf": smaller_than_start = time_step <= self.start_timestep shape = tf.shape(time_step) # time_step comes in as a time-sequence of time-steps. if shape.shape[0] > 0: return tf.where( condition=smaller_than_start, # We are still in pre-decay time. x=tf.tile(tf.constant([self.from_]), multiples=shape), # We are past pre-decay time. y=tf.where( condition=(time_step >= self.start_timestep + self.num_timesteps), # We are in post-decay time. x=tf.tile(tf.constant([self.to_]), multiples=shape), # We are inside the decay time window. y=self._graph_fn_decay( tf.cast(x=time_step - self.start_timestep, dtype=util.convert_dtype("float"))), name="cond-past-end-time"), name="cond-before-start-time") # Single 0D time step. else: return tf.cond( pred=smaller_than_start, # We are still in pre-decay time. true_fn=lambda: self.from_, # We are past pre-decay time. false_fn=lambda: tf.cond( pred=(time_step >= self.start_timestep + self. num_timesteps), # We are in post-decay time. true_fn=lambda: self.to_, # We are inside the decay time window. false_fn=lambda: self._graph_fn_decay( tf.cast(x=time_step - self.start_timestep, dtype=util.convert_dtype("float"))), ), ) elif get_backend() == "pytorch": if time_step is None: time_step = torch.tensor([0]) smaller_than_start = time_step <= self.start_timestep if time_step.dim() == 0: time_step = time_step.unsqueeze(-1) shape = time_step.shape # time_step comes in as a time-sequence of time-steps. # TODO tile shape is confusing -> num tiles should be shape[0] not shape? if shape[0] > 0: past_decay = torch.where( (time_step >= self.start_timestep + self.num_timesteps), # We are in post-decay time. pytorch_tile(torch.tensor([self.to_]), shape), # We are inside the decay time window. torch.tensor( self._graph_fn_decay( torch.FloatTensor( [time_step - self.start_timestep])))) return torch.where( smaller_than_start, # We are still in pre-decay time. pytorch_tile(torch.tensor([self.from_]), shape), # We are past pre-decay time. past_decay) # Single 0D time step. else: if smaller_than_start: return self.from_ else: if time_step >= self.start_timestep + self.num_timesteps: return self.to_ else: return self._graph_fn_decay( torch.FloatTensor( [time_step - self.start_timestep]))
def __init__(self, shape, specification=None, **kwargs): """ Args: shape (tuple): The shape of the Variables to initialize. specification (any): A spec that determines the nature of this initializer. Raises: RLGraphError: If a fixed shape in `specification` does not match `shape`. """ super(Initializer, self).__init__() # The shape of the variable to be initialized. self.shape = shape # The actual underlying initializer object. self.initializer = None # Truncated Normal. if specification == "truncated_normal": if get_backend() == "tf": # Use the first dimension (num_rows or batch rank) to figure out the stddev. stddev = 1 / math.sqrt(shape[0] if isinstance( shape, (tuple, list)) and len(shape) > 0 else 1.0) self.initializer = tf.truncated_normal_initializer( stddev=stddev) elif get_backend() == "pytorch": stddev = 1 / math.sqrt(shape[0] if isinstance( shape, (tuple, list)) and len(shape) > 0 else 1.0) self.initializer = lambda t: torch.nn.init.normal_(tensor=t, std=stddev) # No spec -> Leave initializer as None for TF (will then use default; # e.g. for tf weights: Xavier uniform). For PyTorch, still have to set Xavier. # TODO this is None or is False is very unclean because TF and PT have different defaults -> # change to clean default values for weights and biases. elif specification is None or specification is False: if get_backend() == "tf": pass elif get_backend() == "pytorch": self.initializer = torch.nn.init.xavier_uniform_ # Fixed values spec -> Use them, just do sanity checking. else: # Constant value across the variable. if isinstance(specification, (float, int)): pass # A 1D initializer (e.g. for biases). elif isinstance(specification, list): array = np.asarray(specification, dtype=convert_dtype("float32", "np")) if array.shape != self.shape: raise RLGraphError( "ERROR: Number/shape of given items ({}) not identical with shape ({})!" .format(array.shape, self.shape)) # A nD initializer (numpy-array). elif isinstance(specification, np.ndarray): if specification.shape != self.shape: raise RLGraphError( "ERROR: Shape of given items ({}) not identical with shape ({})!" .format(specification.shape, self.shape)) # Unknown type. else: raise RLGraphError( "ERROR: Bad specification given ({}) for Initializer object!" .format(specification)) # Create the backend initializer object. if get_backend() == "tf": self.initializer = tf.constant_initializer( value=specification, dtype=convert_dtype("float32")) elif get_backend() == "pytorch": self.initializer = lambda t: torch.nn.init.constant_( tensor=t, val=specification)
def call(*args): if isinstance(self.output_spaces, dict): assert method_name in self.output_spaces, "ERROR: Method '{}' not specified in output_spaces: {}!".\ format(method_name, self.output_spaces) specs = self.output_spaces[method_name] else: specs = self.output_spaces(method_name) if specs is None: raise RLGraphError( "No Space information received for method '{}:{}'".format( self.specifiable_class.__name__, method_name)) dtypes = [] shapes = [] return_slots = [] for i, space in enumerate(force_list(specs)): assert not isinstance(space, ContainerSpace) # Expecting an op (space 0). if space == 0: dtypes.append(0) shapes.append(0) return_slots.append(i) # Expecting a tensor. elif space is not None: dtypes.append(convert_dtype(space.dtype)) shapes.append(space.shape) return_slots.append(i) if get_backend() == "tf": # This function will send the method-call-comment via the out-pipe to the remote (server) Specifiable # object - all in-graph - and return the results to be used further by other graph ops. def py_call(*call_args): call_args = [ arg.decode('UTF-8') if isinstance(arg, bytes) else arg for arg in call_args ] try: self.out_pipe.send(call_args) received_results = self.out_pipe.recv() # If an error occurred, it'll be passed back through the pipe. if isinstance(received_results, Exception): raise received_results elif received_results is not None: return received_results except Exception as e: if isinstance(e, IOError): raise StopIteration() # Clean exit. else: print("ERROR: Sent={} Exception={}".format( call_args, e)) raise results = tf.py_func(py_call, (method_name, ) + tuple(args), dtypes, name=method_name) # Force known shapes on the returned tensors. for i, (result, shape) in enumerate(zip(results, shapes)): # Not an op (which have shape=0). if shape != 0: result.set_shape(shape) else: raise NotImplementedError return results[0] if len(dtypes) == 1 else tuple(results)
def _graph_fn_get_distribution(self, parameters): if get_backend() == "tf": return tf.distributions.Categorical( probs=parameters, dtype=util.convert_dtype("int")) elif get_backend() == "pytorch": return torch.distributions.Categorical(probs=parameters)
def get_space_from_op(op): """ Tries to re-create a Space object given some DataOp (e.g. a tf op). This is useful for shape inference on returned ops after having run through a graph_fn. Args: op (DataOp): The op to create a corresponding Space for. Returns: Space: The inferred Space object. """ # a Dict if isinstance(op, dict): # DataOpDict spec = {} add_batch_rank = False add_time_rank = False for key, value in op.items(): spec[key] = get_space_from_op(value) if spec[key].has_batch_rank: add_batch_rank = True if spec[key].has_time_rank: add_time_rank = True return Dict(spec, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank) # a Tuple elif isinstance(op, tuple): # DataOpTuple spec = [] add_batch_rank = False add_time_rank = False for i in op: space = get_space_from_op(i) if space == 0: return 0 spec.append(space) if spec[-1].has_batch_rank: add_batch_rank = True if spec[-1].has_time_rank: add_time_rank = True return Tuple(spec, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank) # primitive Space -> infer from op dtype and shape else: # Op itself is a single value, simple python type. if isinstance(op, (bool, int, float)): return BoxSpace.from_spec(spec=type(op), shape=()) elif isinstance(op, str): raise RLGraphError( "Cannot derive Space from non-allowed op ({})!".format(op)) # A single numpy array. elif isinstance(op, np.ndarray): return BoxSpace.from_spec(spec=convert_dtype(str(op.dtype), "np"), shape=op.shape) elif isinstance(op, list): return try_space_inference_from_list(op) # No Space: e.g. the tf.no_op, a distribution (anything that's not a tensor). # PyTorch Tensors do not have get_shape so must check backend. elif hasattr(op, "dtype") is False or (get_backend() == "tf" and not hasattr(op, "get_shape")): return 0 # Some tensor: can be converted into a BoxSpace. else: shape = get_shape(op) # Unknown shape (e.g. a cond op). if shape is None: return 0 add_batch_rank = False add_time_rank = False time_major = False new_shape = list(shape) # New way: Detect via op._batch_rank and op._time_rank properties where these ranks are. if hasattr(op, "_batch_rank") and isinstance(op._batch_rank, int): add_batch_rank = True new_shape[op._batch_rank] = -1 # elif get_backend() == "pytorch": # if isinstance(op, torch.Tensor): # if op.dim() > 1 and shape[0] == 1: # add_batch_rank = True # new_shape[0] = 1 if hasattr(op, "_time_rank") and isinstance(op._time_rank, int): add_time_rank = True if op._time_rank == 0: time_major = True new_shape[op._time_rank] = -1 shape = tuple(n for n in new_shape if n != -1) # Old way: Detect automatically whether the first rank(s) are batch and/or time rank. if add_batch_rank is False and add_time_rank is False and shape != ( ) and shape[0] is None: if len(shape) > 1 and shape[1] is None: #raise RLGraphError( # "ERROR: Cannot determine time-major flag if both batch- and time-ranks are in an op w/o saying " # "which rank goes to which position!" #) shape = shape[2:] add_time_rank = True else: shape = shape[1:] add_batch_rank = True # TODO: If op._batch_rank and/or op._time_rank are not set, set them now. base_dtype = op.dtype.base_dtype if hasattr( op.dtype, "base_dtype") else op.dtype # PyTorch does not have a bool type if get_backend() == "pytorch": if op.dtype is torch.uint8: base_dtype = bool base_dtype_str = str(base_dtype) # FloatBox if "float" in base_dtype_str: return FloatBox(shape=shape, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank, time_major=time_major, dtype=convert_dtype(base_dtype, "np")) # IntBox elif "int" in base_dtype_str: high = getattr(op, "_num_categories", None) return IntBox(high, shape=shape, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank, time_major=time_major, dtype=convert_dtype(base_dtype, "np")) # a BoolBox elif "bool" in base_dtype_str: return BoolBox(shape=shape, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank, time_major=time_major) # a TextBox elif "string" in base_dtype_str: return TextBox(shape=shape, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank, time_major=time_major) raise RLGraphError( "ERROR: Cannot derive Space from op '{}' (unknown type?)!".format(op))
def contains(self, sample): if self.shape == (): return isinstance(sample, (bool, np.bool_)) else: return convert_dtype(sample.dtype, "np") == np.bool_
def get_space_from_op(op, read_key_hints=False, dtype=None, low=None, high=None): """ Tries to re-create a Space object given some DataOp (e.g. a tf op). This is useful for shape inference on returned ops after having run through a graph_fn. Args: op (DataOp): The op to create a corresponding Space for. read_key_hints (bool): If True, tries to read type- and low/high-hints from the pattern of the Dict keys (str). - Preceding "I_": IntBox, "F_": FloatBox, "B_": BoolBox. - Succeeding "_low=0.0": Low value. - Succeeding "_high=1.0": High value. E.g. Dict key "F_somekey_low=0.0_high=2.0" indicates a FloatBox with low=0.0 and high=2.0. Dict key "I_somekey" indicates an intbox with no limits. Dict key "I_somekey_high=5" indicates an intbox with high=5 (values 0-4). Default: False. dtype (Optional[str]): An optional indicator, what the `dtype` of a BoxSpace should be. low (Optional[int,float]): An optional indicator, what the `low` property for a BoxSpace should be. high (Optional[int,float]): An optional indicator, what the `high` property for a BoxSpace should be. Returns: Space: The inferred Space object. """ # a Dict if isinstance(op, dict): # DataOpDict spec = {} add_batch_rank = False add_time_rank = False for key, value in op.items(): # Try to infer hints from the key. if read_key_hints is True: dtype, low, high = get_space_hints_from_dict_key(key) spec[key] = get_space_from_op(value, dtype=dtype, low=low, high=high) # Return if spec[key] == 0: return 0 if spec[key].has_batch_rank: add_batch_rank = True if spec[key].has_time_rank: add_time_rank = True return Dict(spec, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank) # a Tuple elif isinstance(op, tuple): # DataOpTuple spec = [] add_batch_rank = False add_time_rank = False for i in op: space = get_space_from_op(i) if space == 0: return 0 spec.append(space) if spec[-1].has_batch_rank: add_batch_rank = True if spec[-1].has_time_rank: add_time_rank = True return Tuple(spec, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank) # primitive Space -> infer from op dtype and shape else: low_high = {} if high is not None: low_high["high"] = high if low is not None: low_high["low"] = low # Op itself is a single value, simple python type. if isinstance(op, (bool, int, float)): return BoxSpace.from_spec(spec=(dtype or type(op)), shape=(), **low_high) elif isinstance(op, str): raise RLGraphError( "Cannot derive Space from non-allowed op ({})!".format(op)) # A single numpy array. elif isinstance(op, np.ndarray): return BoxSpace.from_spec(spec=convert_dtype(str(op.dtype), "np"), shape=op.shape, **low_high) elif isinstance(op, list): return try_space_inference_from_list(op, dtype=dtype, **low_high) # No Space: e.g. the tf.no_op, a distribution (anything that's not a tensor). # PyTorch Tensors do not have get_shape so must check backend. elif hasattr(op, "dtype") is False or (get_backend() == "tf" and not hasattr(op, "get_shape")): return 0 # Some tensor: can be converted into a BoxSpace. else: shape = get_shape(op) # Unknown shape (e.g. a cond op). if shape is None: return 0 add_batch_rank = False add_time_rank = False time_major = False new_shape = list(shape) # New way: Detect via op._batch_rank and op._time_rank properties where these ranks are. if hasattr(op, "_batch_rank") and isinstance(op._batch_rank, int): add_batch_rank = True new_shape[op._batch_rank] = -1 # elif get_backend() == "pytorch": # if isinstance(op, torch.Tensor): # if op.dim() > 1 and shape[0] == 1: # add_batch_rank = True # new_shape[0] = 1 if hasattr(op, "_time_rank") and isinstance(op._time_rank, int): add_time_rank = True if op._time_rank == 0: time_major = True new_shape[op._time_rank] = -1 shape = tuple(n for n in new_shape if n != -1) # Old way: Detect automatically whether the first rank(s) are batch and/or time rank. if add_batch_rank is False and add_time_rank is False and shape != ( ) and shape[0] is None: if len(shape) > 1 and shape[1] is None: #raise RLGraphError( # "ERROR: Cannot determine time-major flag if both batch- and time-ranks are in an op w/o saying " # "which rank goes to which position!" #) shape = shape[2:] add_time_rank = True else: shape = shape[1:] add_batch_rank = True # TODO: If op._batch_rank and/or op._time_rank are not set, set them now. base_dtype = op.dtype.base_dtype if hasattr( op.dtype, "base_dtype") else op.dtype # PyTorch does not have a bool type if get_backend() == "pytorch": if op.dtype is torch.uint8: base_dtype = bool base_dtype_str = str(base_dtype) # FloatBox if "float" in base_dtype_str: return FloatBox(shape=shape, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank, time_major=time_major, dtype=convert_dtype(base_dtype, "np")) # IntBox elif "int" in base_dtype_str: high_ = high or getattr(op, "_num_categories", None) return IntBox(high_, shape=shape, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank, time_major=time_major, dtype=convert_dtype(base_dtype, "np")) # a BoolBox elif "bool" in base_dtype_str: return BoolBox(shape=shape, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank, time_major=time_major) # a TextBox elif "string" in base_dtype_str: return TextBox(shape=shape, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank, time_major=time_major) raise RLGraphError( "ERROR: Cannot derive Space from op '{}' (unknown type?)!".format(op))