def _graph_fn_sync(self, values_): """ Generates the op that syncs this Synchronizable's parent's variable values from another Synchronizable Component. Args: values_ (DataOpDict): The dict of variable values (coming from the "variables"-Socket of any other Component) that need to be assigned to this Component's parent's variables. The keys in the dict refer to the names of our parent's variables and must match their names. Returns: DataOp: The op that executes the syncing. """ # Loop through all incoming vars and our own and collect assign ops. syncs = [] # Sanity checking if get_backend() == "tf": parents_vars = self.parent_component.get_variables(collections=self.collections, custom_scope_separator="-") syncs_from, syncs_to = (sorted(values_.items()), sorted(parents_vars.items())) if len(syncs_from) != len(syncs_to): raise RLGraphError("ERROR: Number of Variables to sync must match! " "We have {} syncs_from and {} syncs_to.".format(len(syncs_from), len(syncs_to))) for (key_from, var_from), (key_to, var_to) in zip(syncs_from, syncs_to): # Sanity checking. TODO: Check the names' ends? Without the global scope? #if key_from != key_to: # raise RLGraphError("ERROR: Variable names for syncing must match in order and name! " # "Mismatch at from={} and to={}.".format(key_from, key_to)) if get_shape(var_from) != get_shape(var_to): raise RLGraphError("ERROR: Variable shapes for syncing must match! " "Shape mismatch between from={} ({}) and to={} ({}).". format(key_from, get_shape(var_from), key_to, get_shape(var_to))) syncs.append(self.assign_variable(var_to, var_from)) # Bundle everything into one "sync"-op. with tf.control_dependencies(syncs): return tf.no_op(name="sync-to-{}".format(self.parent_component.name)) elif get_backend() == "pytorch": # Get refs(!) parents_vars = self.parent_component.get_variables(collections=self.collections, custom_scope_separator="-", get_ref=True) syncs_from, sync_to_ref = (sorted(values_.items()), sorted(parents_vars.items())) # Assign parameters of layers. for (key_from, var_from), (key_to, ref_to) in zip(syncs_from, sync_to_ref): ref_to.set_value(var_from) return None
def _graph_fn_apply(self, preprocessing_inputs): """ Sequences (stitches) together the incoming inputs by using our buffer (with stored older records). Sequencing happens within the last rank if `self.add_rank` is False, otherwise a new rank is added at the end for the sequencing. Args: preprocessing_inputs (FlattenedDataOp): The FlattenedDataOp to be sequenced. One sequence is generated separately for each SingleDataOp in api_methods. Returns: FlattenedDataOp: The FlattenedDataOp holding the sequenced SingleDataOps as values. """ # A normal (index != -1) assign op. if self.backend == "python" or get_backend() == "python": if self.index == -1: for _ in range_(self.sequence_length): self.deque.append(preprocessing_inputs) else: self.deque.append(preprocessing_inputs) self.index = (self.index + 1) % self.sequence_length if self.add_rank: sequence = np.stack(self.deque, axis=-1) # Concat the sequence items in the last rank. else: sequence = np.concatenate(self.deque, axis=-1) # TODO move into transpose component. if self.in_data_format == "channels_last" and self.out_data_format == "channels_first": sequence = sequence.transpose((0, 3, 2, 1)) return sequence elif get_backend() == "pytorch": if self.index == -1: for _ in range_(self.sequence_length): if isinstance(preprocessing_inputs, dict): for key, value in preprocessing_inputs.items(): self.deque.append(value) else: self.deque.append(preprocessing_inputs) else: if isinstance(preprocessing_inputs, dict): for key, value in preprocessing_inputs.items(): self.deque.append(value) self.index = (self.index + 1) % self.sequence_length else: self.deque.append(preprocessing_inputs) self.index = (self.index + 1) % self.sequence_length if self.add_rank: sequence = torch.stack(torch.tensor(self.deque), dim=-1) # Concat the sequence items in the last rank. else: data = [] for t in self.deque: if isinstance(t, torch.Tensor): data.append(t) else: data.append(torch.tensor(t)) sequence = torch.cat(data, dim=-1) # TODO remove when transpose component implemented. if self.in_data_format == "channels_last" and self.out_data_format == "channels_first": # Problem: PyTorch does not have data format options in conv layers -> # only channels first supported. # -> Confusingly have to transpose. # B W H C -> B C W H # e.g. atari: [4 84 84 4] -> [4 4 84 84] sequence = sequence.permute(0, 3, 2, 1) return sequence elif get_backend() == "tf": # Assigns the input_ into the buffer at the current time index. def normal_assign(): assigns = list() for key_, value in preprocessing_inputs.items(): assign_op = self.assign_variable(ref=self.buffer[key_][self.index], value=value) assigns.append(assign_op) return assigns # After a reset (time index is -1), fill the entire buffer with `self.sequence_length` x input_. def after_reset_assign(): assigns = list() for key_, value in preprocessing_inputs.items(): multiples = (self.sequence_length,) + tuple([1] * get_rank(value)) input_ = tf.expand_dims(input=value, axis=0) assign_op = self.assign_variable( ref=self.buffer[key_], value=tf.tile(input=input_, multiples=multiples) ) assigns.append(assign_op) return assigns # Insert the input at the correct index or fill empty buffer entirely with input. insert_inputs = tf.cond(pred=(self.index >= 0), true_fn=normal_assign, false_fn=after_reset_assign) # Make sure the input has been inserted. with tf.control_dependencies(control_inputs=force_list(insert_inputs)): # Then increase index by 1. index_plus_1 = self.assign_variable(ref=self.index, value=((self.index + 1) % self.sequence_length)) # Then gather the output. with tf.control_dependencies(control_inputs=[index_plus_1]): sequences = FlattenedDataOp() # Collect the correct previous inputs from the buffer to form the output sequence. for key in preprocessing_inputs.keys(): n_in = [self.buffer[key][(self.index + n) % self.sequence_length] for n in range_(self.sequence_length)] # Add the sequence-rank to the end of our inputs. if self.add_rank: sequence = tf.stack(values=n_in, axis=-1) # Concat the sequence items in the last rank. else: sequence = tf.concat(values=n_in, axis=-1) # Must pass the sequence through a placeholder_with_default dummy to set back the # batch rank to '?', instead of 1 (1 would confuse the auto Space inference). sequences[key] = tf.placeholder_with_default( sequence, shape=(None,) + tuple(get_shape(sequence)[1:]) ) # TODO implement transpose return sequences
def _graph_fn_call(self, inputs): """ Gray-scales images of arbitrary rank. Normally, the images' rank is 3 (width/height/colors), but can also be: batch/width/height/colors, or any other. However, the last rank must be of size: len(self.weights). Args: inputs (tensor): Single image or a batch of images to be gray-scaled (last rank=n colors, where n=len(self.weights)). Returns: DataOp: The op for processing the images. """ # The reshaped weights used for the grayscale operation. if isinstance(inputs, list): inputs = np.asarray(inputs) images_shape = get_shape(inputs) assert images_shape[-1] == self.last_rank,\ "ERROR: Given image's shape ({}) does not match number of weights (last rank must be {})!".\ format(images_shape, self.last_rank) if self.backend == "python" or get_backend() == "python": if inputs.ndim == 4: grayscaled = [] for i in range_(len(inputs)): scaled = cv2.cvtColor(inputs[i], cv2.COLOR_RGB2GRAY) grayscaled.append(scaled) scaled_images = np.asarray(grayscaled) # Keep last dim. if self.keep_rank: scaled_images = scaled_images[:, :, :, np.newaxis] else: # Sample by sample. scaled_images = cv2.cvtColor(inputs, cv2.COLOR_RGB2GRAY) return scaled_images elif get_backend() == "pytorch": if len(inputs.shape) == 4: grayscaled = [] for i in range_(len(inputs)): scaled = cv2.cvtColor(inputs[i].numpy(), cv2.COLOR_RGB2GRAY) grayscaled.append(scaled) scaled_images = np.asarray(grayscaled) # Keep last dim. if self.keep_rank: scaled_images = scaled_images[:, :, :, np.newaxis] else: # Sample by sample. scaled_images = cv2.cvtColor(inputs.numpy(), cv2.COLOR_RGB2GRAY) return torch.tensor(scaled_images) elif get_backend() == "tf": weights_reshaped = np.reshape( self.weights, newshape=tuple([1] * (get_rank(inputs) - 1)) + (self.last_rank, )) # Do we need to convert? # The dangerous thing is that multiplying an int tensor (image) with float weights results in an all # 0 tensor). if "int" in str(dtype_(inputs.dtype)): weighted = weights_reshaped * tf.cast(inputs, dtype=dtype_("float")) else: weighted = weights_reshaped * inputs reduced = tf.reduce_sum(weighted, axis=-1, keepdims=self.keep_rank) # Cast back to original dtype. if "int" in str(dtype_(inputs.dtype)): reduced = tf.cast(reduced, dtype=inputs.dtype) return reduced
def get_space_from_op(op): """ Tries to re-create a Space object given some DataOp (e.g. a tf op). This is useful for shape inference on returned ops after having run through a graph_fn. Args: op (DataOp): The op to create a corresponding Space for. Returns: Space: The inferred Space object. """ # a Dict if isinstance(op, dict): # DataOpDict spec = {} add_batch_rank = False add_time_rank = False for key, value in op.items(): spec[key] = get_space_from_op(value) if spec[key].has_batch_rank: add_batch_rank = True if spec[key].has_time_rank: add_time_rank = True return Dict(spec, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank) # a Tuple elif isinstance(op, tuple): # DataOpTuple spec = [] add_batch_rank = False add_time_rank = False for i in op: space = get_space_from_op(i) if space == 0: return 0 spec.append(space) if spec[-1].has_batch_rank: add_batch_rank = True if spec[-1].has_time_rank: add_time_rank = True return Tuple(spec, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank) # primitive Space -> infer from op dtype and shape else: # Op itself is a single value, simple python type. if isinstance(op, (bool, int, float)): return BoxSpace.from_spec(spec=type(op), shape=()) elif isinstance(op, str): raise RLGraphError( "Cannot derive Space from non-allowed op ({})!".format(op)) # A single numpy array. elif isinstance(op, np.ndarray): return BoxSpace.from_spec(spec=convert_dtype(str(op.dtype), "np"), shape=op.shape) elif isinstance(op, list): return try_space_inference_from_list(op) # No Space: e.g. the tf.no_op, a distribution (anything that's not a tensor). # PyTorch Tensors do not have get_shape so must check backend. elif hasattr(op, "dtype") is False or (get_backend() == "tf" and not hasattr(op, "get_shape")): return 0 # Some tensor: can be converted into a BoxSpace. else: shape = get_shape(op) # Unknown shape (e.g. a cond op). if shape is None: return 0 add_batch_rank = False add_time_rank = False time_major = False new_shape = list(shape) # New way: Detect via op._batch_rank and op._time_rank properties where these ranks are. if hasattr(op, "_batch_rank") and isinstance(op._batch_rank, int): add_batch_rank = True new_shape[op._batch_rank] = -1 # elif get_backend() == "pytorch": # if isinstance(op, torch.Tensor): # if op.dim() > 1 and shape[0] == 1: # add_batch_rank = True # new_shape[0] = 1 if hasattr(op, "_time_rank") and isinstance(op._time_rank, int): add_time_rank = True if op._time_rank == 0: time_major = True new_shape[op._time_rank] = -1 shape = tuple(n for n in new_shape if n != -1) # Old way: Detect automatically whether the first rank(s) are batch and/or time rank. if add_batch_rank is False and add_time_rank is False and shape != ( ) and shape[0] is None: if len(shape) > 1 and shape[1] is None: #raise RLGraphError( # "ERROR: Cannot determine time-major flag if both batch- and time-ranks are in an op w/o saying " # "which rank goes to which position!" #) shape = shape[2:] add_time_rank = True else: shape = shape[1:] add_batch_rank = True # TODO: If op._batch_rank and/or op._time_rank are not set, set them now. base_dtype = op.dtype.base_dtype if hasattr( op.dtype, "base_dtype") else op.dtype # PyTorch does not have a bool type if get_backend() == "pytorch": if op.dtype is torch.uint8: base_dtype = bool base_dtype_str = str(base_dtype) # FloatBox if "float" in base_dtype_str: return FloatBox(shape=shape, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank, time_major=time_major, dtype=convert_dtype(base_dtype, "np")) # IntBox elif "int" in base_dtype_str: high = getattr(op, "_num_categories", None) return IntBox(high, shape=shape, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank, time_major=time_major, dtype=convert_dtype(base_dtype, "np")) # a BoolBox elif "bool" in base_dtype_str: return BoolBox(shape=shape, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank, time_major=time_major) # a TextBox elif "string" in base_dtype_str: return TextBox(shape=shape, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank, time_major=time_major) raise RLGraphError( "ERROR: Cannot derive Space from op '{}' (unknown type?)!".format(op))
def get_space_from_op(op, read_key_hints=False, dtype=None, low=None, high=None): """ Tries to re-create a Space object given some DataOp (e.g. a tf op). This is useful for shape inference on returned ops after having run through a graph_fn. Args: op (DataOp): The op to create a corresponding Space for. read_key_hints (bool): If True, tries to read type- and low/high-hints from the pattern of the Dict keys (str). - Preceding "I_": IntBox, "F_": FloatBox, "B_": BoolBox. - Succeeding "_low=0.0": Low value. - Succeeding "_high=1.0": High value. E.g. Dict key "F_somekey_low=0.0_high=2.0" indicates a FloatBox with low=0.0 and high=2.0. Dict key "I_somekey" indicates an intbox with no limits. Dict key "I_somekey_high=5" indicates an intbox with high=5 (values 0-4). Default: False. dtype (Optional[str]): An optional indicator, what the `dtype` of a BoxSpace should be. low (Optional[int,float]): An optional indicator, what the `low` property for a BoxSpace should be. high (Optional[int,float]): An optional indicator, what the `high` property for a BoxSpace should be. Returns: Space: The inferred Space object. """ # a Dict if isinstance(op, dict): # DataOpDict spec = {} add_batch_rank = False add_time_rank = False for key, value in op.items(): # Try to infer hints from the key. if read_key_hints is True: dtype, low, high = get_space_hints_from_dict_key(key) spec[key] = get_space_from_op(value, dtype=dtype, low=low, high=high) # Return if spec[key] == 0: return 0 if spec[key].has_batch_rank: add_batch_rank = True if spec[key].has_time_rank: add_time_rank = True return Dict(spec, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank) # a Tuple elif isinstance(op, tuple): # DataOpTuple spec = [] add_batch_rank = False add_time_rank = False for i in op: space = get_space_from_op(i) if space == 0: return 0 spec.append(space) if spec[-1].has_batch_rank: add_batch_rank = True if spec[-1].has_time_rank: add_time_rank = True return Tuple(spec, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank) # primitive Space -> infer from op dtype and shape else: low_high = {} if high is not None: low_high["high"] = high if low is not None: low_high["low"] = low # Op itself is a single value, simple python type. if isinstance(op, (bool, int, float)): return BoxSpace.from_spec(spec=(dtype or type(op)), shape=(), **low_high) elif isinstance(op, str): raise RLGraphError( "Cannot derive Space from non-allowed op ({})!".format(op)) # A single numpy array. elif isinstance(op, np.ndarray): return BoxSpace.from_spec(spec=convert_dtype(str(op.dtype), "np"), shape=op.shape, **low_high) elif isinstance(op, list): return try_space_inference_from_list(op, dtype=dtype, **low_high) # No Space: e.g. the tf.no_op, a distribution (anything that's not a tensor). # PyTorch Tensors do not have get_shape so must check backend. elif hasattr(op, "dtype") is False or (get_backend() == "tf" and not hasattr(op, "get_shape")): return 0 # Some tensor: can be converted into a BoxSpace. else: shape = get_shape(op) # Unknown shape (e.g. a cond op). if shape is None: return 0 add_batch_rank = False add_time_rank = False time_major = False new_shape = list(shape) # New way: Detect via op._batch_rank and op._time_rank properties where these ranks are. if hasattr(op, "_batch_rank") and isinstance(op._batch_rank, int): add_batch_rank = True new_shape[op._batch_rank] = -1 # elif get_backend() == "pytorch": # if isinstance(op, torch.Tensor): # if op.dim() > 1 and shape[0] == 1: # add_batch_rank = True # new_shape[0] = 1 if hasattr(op, "_time_rank") and isinstance(op._time_rank, int): add_time_rank = True if op._time_rank == 0: time_major = True new_shape[op._time_rank] = -1 shape = tuple(n for n in new_shape if n != -1) # Old way: Detect automatically whether the first rank(s) are batch and/or time rank. if add_batch_rank is False and add_time_rank is False and shape != ( ) and shape[0] is None: if len(shape) > 1 and shape[1] is None: #raise RLGraphError( # "ERROR: Cannot determine time-major flag if both batch- and time-ranks are in an op w/o saying " # "which rank goes to which position!" #) shape = shape[2:] add_time_rank = True else: shape = shape[1:] add_batch_rank = True # TODO: If op._batch_rank and/or op._time_rank are not set, set them now. base_dtype = op.dtype.base_dtype if hasattr( op.dtype, "base_dtype") else op.dtype # PyTorch does not have a bool type if get_backend() == "pytorch": if op.dtype is torch.uint8: base_dtype = bool base_dtype_str = str(base_dtype) # FloatBox if "float" in base_dtype_str: return FloatBox(shape=shape, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank, time_major=time_major, dtype=convert_dtype(base_dtype, "np")) # IntBox elif "int" in base_dtype_str: high_ = high or getattr(op, "_num_categories", None) return IntBox(high_, shape=shape, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank, time_major=time_major, dtype=convert_dtype(base_dtype, "np")) # a BoolBox elif "bool" in base_dtype_str: return BoolBox(shape=shape, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank, time_major=time_major) # a TextBox elif "string" in base_dtype_str: return TextBox(shape=shape, add_batch_rank=add_batch_rank, add_time_rank=add_time_rank, time_major=time_major) raise RLGraphError( "ERROR: Cannot derive Space from op '{}' (unknown type?)!".format(op))