Beispiel #1
0
def planned(cell,
            objective_fn,
            embedded,
            prev_action,
            planner,
            context=1,
            length=20,
            amount=1000,
            debug=False):
    use_obs = tf.ones(tf.shape(input=embedded[:, :context, :1])[:3], tf.bool)
    (_, closed_state), last_state = tf.compat.v1.nn.dynamic_rnn(
        cell, (embedded[:, :context], prev_action[:, :context], use_obs),
        dtype=tf.float32)
    _, plan_state, return_ = planner(cell,
                                     objective_fn,
                                     last_state,
                                     obs_shape=shape.shape(embedded)[2:],
                                     action_shape=shape.shape(prev_action)[2:],
                                     horizon=length,
                                     amount=amount)
    state = nested.map(lambda x, y: tf.concat([x, y], 1), closed_state,
                       plan_state)
    if debug:
        with tf.control_dependencies([
                tf.compat.v1.assert_equal(
                    tf.shape(input=nested.flatten(state)[0])[1],
                    context + length)
        ]):
            state = nested.map(tf.identity, state)
            return_ = tf.identity(return_)
    return state, return_
Beispiel #2
0
def chunk_sequence(sequence, chunk_length, randomize=True, num_chunks=None):
    """Split a nested dict of sequence tensors into a batch of chunks.

  This function does not expect a batch of sequences, but a single sequence. A
  `length` key is added if it did not exist already. When `randomize` is set,
  up to `chunk_length - 1` initial frames will be discarded. Final frames that
  do not fit into a chunk are always discarded.

  Args:
    sequence: Nested dict of tensors with time dimension.
    chunk_length: Size of chunks the sequence will be split into.
    randomize: Start chunking from a random offset in the sequence,
        enforcing that at least one chunk is generated.
    num_chunks: Optionally specify the exact number of chunks to be extracted
        from the sequence. Requires input to be long enough.

  Returns:
    Nested dict of sequence tensors with chunk dimension.
  """
    with tf.device('/cpu:0'):
        if 'length' in sequence:  # sequence = {'state': <tf.Tensor 'arg3:0' shape=(?, 1) dtype=float32>, 'image': <tf.Tensor 'arg1:0' shape=(?, 64, 64, 3) dtype=uint8>, 'action': <tf.Tensor 'arg0:0' shape=(?, 1) dtype=float32>, 'reward': <tf.Tensor 'arg2:0' shape=(?,) dtype=float32>}
            length = sequence.pop('length')
        else:
            length = tf.shape(
                nested.flatten(sequence)[0]
            )[0]  # nested.flatten(): Combine all leaves of a nested structure into a tuple.
        if randomize:
            if num_chunks is None:
                num_chunks = tf.maximum(1, length // chunk_length - 1)
            else:
                num_chunks = num_chunks + 0 * length
            used_length = num_chunks * chunk_length
            max_offset = length - used_length  # the episode length must >= chunk_length * num_chunks
            offset = tf.random_uniform(
                (), 0, max_offset + 1,
                dtype=tf.int32)  # the starting point for clipping.
        else:
            if num_chunks is None:
                num_chunks = length // chunk_length
            else:
                num_chunks = num_chunks + 0 * length
            used_length = num_chunks * chunk_length
            max_offset = 0
            offset = 0
        clipped = nested.map(  # nested.map(): Apply a function to every element in a nested structure.
            lambda tensor: tensor[offset:offset + used_length], sequence)
        chunks = nested.map(
            lambda tensor: tf.reshape(
                tensor, [num_chunks, chunk_length] + tensor.shape[1:].as_list(
                )
            ),  # reshape the clipped episode (num_chunks*chunk_length,64,64,3) into (num_chunks, chunk_length,64,64,3)
            clipped)
        chunks['length'] = chunk_length * tf.ones(
            (num_chunks, ), dtype=tf.int32)
        return chunks  # shape(num_chunks, chunk_length,64,64,3)
Beispiel #3
0
def chunk_sequence(sequence, chunk_length, randomize=True, num_chunks=None):
    """Split a nested dict of sequence tensors into a batch of chunks.

  This function does not expect a batch of sequences, but a single sequence. A
  `length` key is added if it did not exist already. When `randomize` is set,
  up to `chunk_length - 1` initial frames will be discarded. Final frames that
  do not fit into a chunk are always discarded.

  Args:
    sequence: Nested dict of tensors with time dimension.
    chunk_length: Size of chunks the sequence will be split into.
    randomize: Start chunking from a random offset in the sequence,
        enforcing that at least one chunk is generated.
    num_chunks: Optionally specify the exact number of chunks to be extracted
        from the sequence. Requires input to be long enough.

  Returns:
    Nested dict of sequence tensors with chunk dimension.
  """
    with tf.device('/cpu:0'):
        if 'length' in sequence:
            length = sequence.pop('length')
        else:
            length = tf.shape(nested.flatten(sequence)[0])[0]

        if randomize:
            if num_chunks is None:
                num_chunks = tf.maximum(1, length // chunk_length - 1)
            else:
                num_chunks = num_chunks + 0 * length
            used_length = num_chunks * chunk_length
            max_offset = length - used_length
            # a=tf.print('dubuggg', max_offset,length,used_length)
            # with tf.control_dependencies([a]):
            #   max_offset = max_offset *1

            offset = tf.random_uniform((), 0, max_offset + 1, dtype=tf.int32)
        else:
            if num_chunks is None:
                num_chunks = length // chunk_length
            else:
                num_chunks = num_chunks + 0 * length
            used_length = num_chunks * chunk_length
            max_offset = 0
            offset = 0
        clipped = nested.map(
            lambda tensor: tensor[offset:offset + used_length], sequence)
        chunks = nested.map(
            lambda tensor: tf.reshape(tensor, [num_chunks, chunk_length] +
                                      tensor.shape[1:].as_list()), clipped)
        chunks['length'] = chunk_length * tf.ones(
            (num_chunks, ), dtype=tf.int32)
        return chunks
Beispiel #4
0
def closed_loop(cell, embedded, prev_action, debug=False):
    use_obs = tf.ones(tf.shape(input=embedded[:, :, :1])[:3], tf.bool)
    (prior, posterior), _ = tf.compat.v1.nn.dynamic_rnn(
        cell, (embedded, prev_action, use_obs), dtype=tf.float32)
    if debug:
        with tf.control_dependencies([
                tf.compat.v1.assert_equal(
                    tf.shape(input=nested.flatten(posterior)[0])[1],
                    tf.shape(input=embedded)[1])
        ]):
            prior = nested.map(tf.identity, prior)
            posterior = nested.map(tf.identity, posterior)
    return prior, posterior
Beispiel #5
0
def open_loop(cell, embedded, prev_action, context=1, debug=False):
    use_obs = tf.ones(tf.shape(embedded[:, :context, :1])[:3], tf.bool)
    (_, closed_state), last_state = tf.compat.v1.nn.dynamic_rnn(
        cell, (embedded[:, :context], prev_action[:, :context], use_obs),
        dtype=tf.float32)
    use_obs = tf.zeros(tf.shape(input=embedded[:, context:, :1])[:3], tf.bool)
    (_, open_state), _ = tf.compat.v1.nn.dynamic_rnn(
        cell, (0 * embedded[:, context:], prev_action[:, context:], use_obs),
        initial_state=last_state)
    state = nested.map(lambda x, y: tf.concat([x, y], 1), closed_state,
                       open_state)
    if debug:
        with tf.control_dependencies([
                tf.compat.v1.assert_equal(
                    tf.shape(input=nested.flatten(state)[0])[1],
                    tf.shape(input=embedded)[1])
        ]):
            state = nested.map(tf.identity, state)
    return state
Beispiel #6
0
def overshooting(cell,
                 target,
                 embedded,
                 prev_action,
                 length,
                 amount,
                 ignore_input=False):
    """Perform open loop rollouts from the posteriors at every step.

  First, we apply the encoder to embed raw inputs and apply the model to obtain
  posterior states for every time step. Then, we perform `amount` long open
  loop rollouts from these posteriors.

  Note that the actions should be those leading to the current time step. So
  under common convention, it contains the last actions while observations are
  the current ones.

  Input:

    target, embedded:
      [A B C D E F] [A B C D E  ]

    prev_action:
      [0 A B C D E] [0 A B C D  ]

    length:
      [6 5]

    amount:
      3

  Output:

    prior, posterior, target:
      [A B C D E F] [A B C D E  ]
      [B C D E F  ] [B C D E    ]
      [C D E F    ] [C D E      ]
      [D E F      ] [D E        ]

    mask:
      [1 1 1 1 1 1] [1 1 1 1 1 0]
      [1 1 1 1 1 0] [1 1 1 1 0 0]
      [1 1 1 1 0 0] [1 1 1 0 0 0]
      [1 1 1 0 0 0] [1 1 0 0 0 0]

  """
    # Closed loop unroll to get posterior states, which are the starting points
    # for open loop unrolls. We don't need the last time step, since we have no
    # targets for unrolls from it.
    use_obs = tf.ones(
        tf.shape(nested.flatten(embedded)[0][:, :, :1])[:3], tf.bool)
    use_obs = tf.cond(tf.convert_to_tensor(ignore_input),
                      lambda: tf.zeros_like(use_obs, tf.bool), lambda: use_obs)

    initial_state = {
        'mean': tf.zeros((int(use_obs.shape[0]), cell._state_size)),
        'stddev': tf.zeros((int(use_obs.shape[0]), cell._state_size)),
        'sample': tf.zeros((int(use_obs.shape[0]), cell._state_size)),
        'belief': tf.zeros((int(use_obs.shape[0]), cell._state_size)),
        'rnn_state': cell._cell.initial_state(int(use_obs.shape[0])),
    }
    (prior, posterior), _ = tf.nn.dynamic_rnn(cell,
                                              (embedded, prev_action, use_obs),
                                              length,
                                              dtype=tf.float32,
                                              initial_state=initial_state,
                                              swap_memory=False,
                                              time_major=True)

    # Arrange inputs for every iteration in the open loop unroll. Every loop
    # iteration below corresponds to one row in the docstring illustration.
    max_length = shape.shape(nested.flatten(embedded)[0])[1]
    first_output = {
        'observ': embedded,
        'prev_action': prev_action,
        'posterior': posterior,
        'target': target,
        'mask': tf.sequence_mask(length, max_length, tf.int32),
    }
    progress_fn = lambda tensor: tf.concat([tensor[:, 1:], 0 * tensor[:, :1]],
                                           1)
    other_outputs = tf.scan(
        lambda past_output, _: nested.map(progress_fn, past_output),
        tf.range(amount), first_output)
    sequences = nested.map(lambda lhs, rhs: tf.concat([lhs[None], rhs], 0),
                           first_output, other_outputs)

    # Merge batch and time dimensions of steps to compute unrolls from every
    # time step as one batch. The time dimension becomes the number of
    # overshooting distances.
    sequences = nested.map(lambda tensor: _merge_dims(tensor, [1, 2]),
                           sequences)
    sequences = nested.map(
        lambda tensor: tf.transpose(tensor, [1, 0] + list(
            range(2, tensor.shape.ndims))), sequences)
    merged_length = tf.reduce_sum(sequences['mask'], 1)

    # Mask out padding frames; unnecessary if the input is already masked.
    sequences = nested.map(
        lambda tensor: tensor * tf.cast(
            _pad_dims(sequences['mask'], tensor.shape.ndims), tensor.dtype),
        sequences)

    # Compute open loop rollouts.
    use_obs = tf.zeros(tf.shape(sequences['mask']), tf.bool)[..., None]
    prev_state = nested.map(
        lambda tensor: tf.concat([0 * tensor[:, :1], tensor[:, :-1]], 1),
        posterior)
    prev_state = nested.map(lambda tensor: _merge_dims(tensor, [0, 1]),
                            prev_state)
    (priors, _), _ = tf.nn.dynamic_rnn(
        cell, (sequences['observ'], sequences['prev_action'], use_obs),
        merged_length, prev_state)

    # Restore batch dimension.
    target, prior, posterior, mask = nested.map(
        functools.partial(_restore_batch_dim,
                          batch_size=shape.shape(length)[0]),
        (sequences['target'], priors, sequences['posterior'],
         sequences['mask']))

    mask = tf.cast(mask, tf.bool)
    return target, prior, posterior, mask
Beispiel #7
0
 def test_value_ordering(self):
     self.assertEqual((1, 2, 3), nested.flatten({'a': 1, 'b': 2, 'c': 3}))
Beispiel #8
0
 def test_mixed_structure(self):
     self.assertEqual((1, 2, 3, 4),
                      nested.flatten([(1, 2), 3, {'foo': [4]}]))
Beispiel #9
0
 def test_convert_type(self):
     self.assertEqual((1, 2, 3), nested.flatten([1, 2, 3]))
Beispiel #10
0
 def test_base_case(self):
     self.assertEqual((1,), nested.flatten(1))
Beispiel #11
0
 def test_empty(self):
     self.assertEqual((), nested.flatten({}))
Beispiel #12
0
 def test_scalar(self):
     self.assertEqual((42,), nested.flatten(42))
Beispiel #13
0
def overshooting(cell,
                 target,
                 embedded,
                 prev_action,
                 length,
                 amount,
                 ignore_input=False):
    """Perform open loop rollouts from the posteriors at every step.

  First, we apply the encoder to embed raw inputs and apply the model to obtain
  posterior states for every time step. Then, we perform `amount` long open
  loop rollouts from these posteriors.

  Note that the actions should be those leading to the current time step. So
  under common convention, it contains the last actions while observations are
  the current ones.

  Input:

    target, embedded:
      [A B C D E F] [A B C D E  ]

    prev_action:
      [0 A B C D E] [0 A B C D  ]

    length:
      [6 5]

    amount:
      3

  Output:

    prior, posterior, target:
      [A B C D E F] [A B C D E  ]   o---- chunk_length-->
      [B C D E F  ] [B C D E    ]   |
      [C D E F    ] [C D E      ]   |
      [D E F      ] [D E        ]   amount
                                    |
    mask:
      [1 1 1 1 1 1] [1 1 1 1 1 0]
      [1 1 1 1 1 0] [1 1 1 1 0 0]
      [1 1 1 1 0 0] [1 1 1 0 0 0]
      [1 1 1 0 0 0] [1 1 0 0 0 0]

  """
    # Closed loop unroll to get posterior states, which are the starting points
    # for open loop unrolls. We don't need the last time step, since we have no
    # targets for unrolls from it.
    use_obs = tf.ones(
        tf.shape(nested.flatten(embedded)[0][:, :, :1])[:3],
        tf.bool)  # shape(40,50,1024) --> shape(40,50,1)
    use_obs = tf.cond(tf.convert_to_tensor(ignore_input),
                      lambda: tf.zeros_like(use_obs, tf.bool), lambda: use_obs)
    (prior, posterior), _ = tf.nn.dynamic_rnn(
        cell,
        (embedded, prev_action, use_obs),
        length,
        dtype=tf.
        float32,  # cell, inputs:shape(batchsize,max_time,?):(40,50,?), sequence_length:shape(batchsize,):(40,)
        swap_memory=True)  # calculate posterior: q(s_t−d |o ≤t−d ,a <t−d )

    # Arrange inputs for every iteration in the open loop unroll. Every loop
    # iteration below corresponds to one row in the docstring illustration.
    max_length = shape.shape(nested.flatten(embedded)[0])[1]  # max_length = 50
    first_output = {
        'observ': embedded,  # shape(40,50,1024)
        'prev_action': prev_action,  # shape(40,50,2)
        'posterior': posterior,  # {'mean':shape(40,50,30), ...}
        'target': target,  # {'reward': shape(40,50), ...}
        'mask': tf.sequence_mask(length, max_length, tf.int32),  # shape(40,50)
    }
    progress_fn = lambda tensor: tf.concat([
        tensor[:, 1:], 0 * tensor[:, :1]
    ], 1)  # on the 1st dimension(episode_length): (a[1] ,a[2], ..., 0*a[0])
    other_outputs = tf.scan(  # other_outputs: { 'observ': shape(50(amount),40(batchsize),50(episode_length),1024),...}
        lambda past_output, _: nested.map(
            progress_fn, past_output
        ),  # past_output = progress_fn(past_output), initial past_output is first_output.
        tf.range(amount),
        first_output)  # first_output: { 'observ': shape(40,50,1024),...};
    sequences = nested.map(
        lambda lhs, rhs: tf.concat([lhs[None], rhs], 0),  # first_output[None]
        first_output,
        other_outputs)  # sequences: { 'observ': shape(51,40,50,1024),...};

    # Merge batch and time dimensions of steps to compute unrolls from every
    # time step as one batch. The time dimension becomes the number of
    # overshooting distances.
    sequences = nested.map(
        lambda tensor: _merge_dims(
            tensor, [1, 2]),  # sequences: { 'observ': shape(51,2000,1024),...}
        sequences)
    sequences = nested.map(
        lambda tensor: tf.transpose(tensor, [1, 0] + list(
            range(2, tensor.shape.ndims))),  # [1,0]+[2]
        sequences)  # sequences: { 'observ': shape(2000,51,1024),...}
    merged_length = tf.reduce_sum(sequences['mask'],
                                  1)  # shape(2000,51) --> shape(2000,)

    # Mask out padding frames; unnecessary if the input is already masked.
    sequences = nested.map(
        lambda tensor: tensor * tf.cast(
            _pad_dims(
                sequences['mask'], tensor.shape.ndims
            ),  # sequences['mask']: shape(2000,51) --> shape(2000,51,1); sequences['observ']: shape(2000,51,1024)
            tensor.dtype),  # shape(2000,51,1024)*shape(2000,51,1)
        sequences)

    # Compute open loop rollouts.
    use_obs = tf.zeros(tf.shape(sequences['mask']), tf.bool)[..., None]
    prev_state = nested.map(
        lambda tensor: tf.concat(
            [0 * tensor[:, :1], tensor[:, :-1]], 1
        ),  # {'mean': shape(40,50,30), ...}; on the 1st dimension(episode_length): (s1, s2, ..., s50) --> (0, s1, s2, ..., s49)
        posterior)
    prev_state = nested.map(lambda tensor: _merge_dims(tensor, [0, 1]),
                            prev_state)  # {'mean': shape(2000,30), ...}
    (priors, _), _ = tf.nn.dynamic_rnn(
        cell, (sequences['observ'], sequences['prev_action'], use_obs),
        merged_length, prev_state
    )  # initial_state = prev_state.    # calculate prior: p(s_t−1 |s_t−d ,a_t−d−1:t−2 )

    # Restore batch dimension.
    target, prior, posterior, mask = nested.map(
        functools.partial(_restore_batch_dim,
                          batch_size=shape.shape(length)[0]),
        (sequences['target'], priors, sequences['posterior'],
         sequences['mask']))

    mask = tf.cast(mask, tf.bool)
    return target, prior, posterior, mask