Esempio n. 1
0
def planned(cell,
            objective_fn,
            embedded,
            prev_action,
            planner,
            context=1,
            length=20,
            amount=1000,
            debug=False):
    use_obs = tf.ones(tf.shape(input=embedded[:, :context, :1])[:3], tf.bool)
    (_, closed_state), last_state = tf.compat.v1.nn.dynamic_rnn(
        cell, (embedded[:, :context], prev_action[:, :context], use_obs),
        dtype=tf.float32)
    _, plan_state, return_ = planner(cell,
                                     objective_fn,
                                     last_state,
                                     obs_shape=shape.shape(embedded)[2:],
                                     action_shape=shape.shape(prev_action)[2:],
                                     horizon=length,
                                     amount=amount)
    state = nested.map(lambda x, y: tf.concat([x, y], 1), closed_state,
                       plan_state)
    if debug:
        with tf.control_dependencies([
                tf.compat.v1.assert_equal(
                    tf.shape(input=nested.flatten(state)[0])[1],
                    context + length)
        ]):
            state = nested.map(tf.identity, state)
            return_ = tf.identity(return_)
    return state, return_
Esempio n. 2
0
def image_summaries(dist, target, name='image', max_batch=10):
    summaries = []
    with tf.variable_scope(name):
        empty_frame = 0 * target[:max_batch, :1]
        image = dist.mode()[:max_batch]
        target = target[:max_batch]
        change = tf.concat([empty_frame, image[:, 1:] - image[:, :-1]], 1)
        error = image - target
        summaries.append(
            image_strip_summary.image_strip_summary('prediction', image))
        summaries.append(
            image_strip_summary.image_strip_summary('change',
                                                    (change + 1) / 2))
        summaries.append(
            image_strip_summary.image_strip_summary('error', (error + 1) / 2))
        # Concat prediction and target vertically.
        frames = tf.concat([target, image], 2)
        # Stack batch entries horizontally.
        frames = tf.transpose(frames, [1, 2, 0, 3, 4])
        s = shapelib.shape(frames)
        frames = tf.reshape(frames, [s[0], s[1], s[2] * s[3], s[4]])
        summaries.append(
            gif_summary.gif_summary('animation',
                                    frames[None],
                                    max_outputs=1,
                                    fps=20))
    return summaries
Esempio n. 3
0
def overshooting(cell,
                 target,
                 embedded,
                 prev_action,
                 length,
                 amount,
                 ignore_input=False):
    """Perform open loop rollouts from the posteriors at every step.

  First, we apply the encoder to embed raw inputs and apply the model to obtain
  posterior states for every time step. Then, we perform `amount` long open
  loop rollouts from these posteriors.

  Note that the actions should be those leading to the current time step. So
  under common convention, it contains the last actions while observations are
  the current ones.

  Input:

    target, embedded:
      [A B C D E F] [A B C D E  ]

    prev_action:
      [0 A B C D E] [0 A B C D  ]

    length:
      [6 5]

    amount:
      3

  Output:

    prior, posterior, target:
      [A B C D E F] [A B C D E  ]
      [B C D E F  ] [B C D E    ]
      [C D E F    ] [C D E      ]
      [D E F      ] [D E        ]

    mask:
      [1 1 1 1 1 1] [1 1 1 1 1 0]
      [1 1 1 1 1 0] [1 1 1 1 0 0]
      [1 1 1 1 0 0] [1 1 1 0 0 0]
      [1 1 1 0 0 0] [1 1 0 0 0 0]

  """
    # Closed loop unroll to get posterior states, which are the starting points
    # for open loop unrolls. We don't need the last time step, since we have no
    # targets for unrolls from it.
    use_obs = tf.ones(
        tf.shape(nested.flatten(embedded)[0][:, :, :1])[:3], tf.bool)
    use_obs = tf.cond(tf.convert_to_tensor(ignore_input),
                      lambda: tf.zeros_like(use_obs, tf.bool), lambda: use_obs)

    initial_state = {
        'mean': tf.zeros((int(use_obs.shape[0]), cell._state_size)),
        'stddev': tf.zeros((int(use_obs.shape[0]), cell._state_size)),
        'sample': tf.zeros((int(use_obs.shape[0]), cell._state_size)),
        'belief': tf.zeros((int(use_obs.shape[0]), cell._state_size)),
        'rnn_state': cell._cell.initial_state(int(use_obs.shape[0])),
    }
    (prior, posterior), _ = tf.nn.dynamic_rnn(cell,
                                              (embedded, prev_action, use_obs),
                                              length,
                                              dtype=tf.float32,
                                              initial_state=initial_state,
                                              swap_memory=False,
                                              time_major=True)

    # Arrange inputs for every iteration in the open loop unroll. Every loop
    # iteration below corresponds to one row in the docstring illustration.
    max_length = shape.shape(nested.flatten(embedded)[0])[1]
    first_output = {
        'observ': embedded,
        'prev_action': prev_action,
        'posterior': posterior,
        'target': target,
        'mask': tf.sequence_mask(length, max_length, tf.int32),
    }
    progress_fn = lambda tensor: tf.concat([tensor[:, 1:], 0 * tensor[:, :1]],
                                           1)
    other_outputs = tf.scan(
        lambda past_output, _: nested.map(progress_fn, past_output),
        tf.range(amount), first_output)
    sequences = nested.map(lambda lhs, rhs: tf.concat([lhs[None], rhs], 0),
                           first_output, other_outputs)

    # Merge batch and time dimensions of steps to compute unrolls from every
    # time step as one batch. The time dimension becomes the number of
    # overshooting distances.
    sequences = nested.map(lambda tensor: _merge_dims(tensor, [1, 2]),
                           sequences)
    sequences = nested.map(
        lambda tensor: tf.transpose(tensor, [1, 0] + list(
            range(2, tensor.shape.ndims))), sequences)
    merged_length = tf.reduce_sum(sequences['mask'], 1)

    # Mask out padding frames; unnecessary if the input is already masked.
    sequences = nested.map(
        lambda tensor: tensor * tf.cast(
            _pad_dims(sequences['mask'], tensor.shape.ndims), tensor.dtype),
        sequences)

    # Compute open loop rollouts.
    use_obs = tf.zeros(tf.shape(sequences['mask']), tf.bool)[..., None]
    prev_state = nested.map(
        lambda tensor: tf.concat([0 * tensor[:, :1], tensor[:, :-1]], 1),
        posterior)
    prev_state = nested.map(lambda tensor: _merge_dims(tensor, [0, 1]),
                            prev_state)
    (priors, _), _ = tf.nn.dynamic_rnn(
        cell, (sequences['observ'], sequences['prev_action'], use_obs),
        merged_length, prev_state)

    # Restore batch dimension.
    target, prior, posterior, mask = nested.map(
        functools.partial(_restore_batch_dim,
                          batch_size=shape.shape(length)[0]),
        (sequences['target'], priors, sequences['posterior'],
         sequences['mask']))

    mask = tf.cast(mask, tf.bool)
    return target, prior, posterior, mask
Esempio n. 4
0
def _restore_batch_dim(tensor, batch_size):
    """Split batch dimension out of the first dimension of a tensor."""
    initial = shape.shape(tensor)
    desired = [batch_size, initial[0] // batch_size] + initial[1:]
    return tf.reshape(tensor, desired)
Esempio n. 5
0
def overshooting(cell,
                 target,
                 embedded,
                 prev_action,
                 length,
                 amount,
                 ignore_input=False):
    """Perform open loop rollouts from the posteriors at every step.

  First, we apply the encoder to embed raw inputs and apply the model to obtain
  posterior states for every time step. Then, we perform `amount` long open
  loop rollouts from these posteriors.

  Note that the actions should be those leading to the current time step. So
  under common convention, it contains the last actions while observations are
  the current ones.

  Input:

    target, embedded:
      [A B C D E F] [A B C D E  ]

    prev_action:
      [0 A B C D E] [0 A B C D  ]

    length:
      [6 5]

    amount:
      3

  Output:

    prior, posterior, target:
      [A B C D E F] [A B C D E  ]   o---- chunk_length-->
      [B C D E F  ] [B C D E    ]   |
      [C D E F    ] [C D E      ]   |
      [D E F      ] [D E        ]   amount
                                    |
    mask:
      [1 1 1 1 1 1] [1 1 1 1 1 0]
      [1 1 1 1 1 0] [1 1 1 1 0 0]
      [1 1 1 1 0 0] [1 1 1 0 0 0]
      [1 1 1 0 0 0] [1 1 0 0 0 0]

  """
    # Closed loop unroll to get posterior states, which are the starting points
    # for open loop unrolls. We don't need the last time step, since we have no
    # targets for unrolls from it.
    use_obs = tf.ones(
        tf.shape(nested.flatten(embedded)[0][:, :, :1])[:3],
        tf.bool)  # shape(40,50,1024) --> shape(40,50,1)
    use_obs = tf.cond(tf.convert_to_tensor(ignore_input),
                      lambda: tf.zeros_like(use_obs, tf.bool), lambda: use_obs)
    (prior, posterior), _ = tf.nn.dynamic_rnn(
        cell,
        (embedded, prev_action, use_obs),
        length,
        dtype=tf.
        float32,  # cell, inputs:shape(batchsize,max_time,?):(40,50,?), sequence_length:shape(batchsize,):(40,)
        swap_memory=True)  # calculate posterior: q(s_t−d |o ≤t−d ,a <t−d )

    # Arrange inputs for every iteration in the open loop unroll. Every loop
    # iteration below corresponds to one row in the docstring illustration.
    max_length = shape.shape(nested.flatten(embedded)[0])[1]  # max_length = 50
    first_output = {
        'observ': embedded,  # shape(40,50,1024)
        'prev_action': prev_action,  # shape(40,50,2)
        'posterior': posterior,  # {'mean':shape(40,50,30), ...}
        'target': target,  # {'reward': shape(40,50), ...}
        'mask': tf.sequence_mask(length, max_length, tf.int32),  # shape(40,50)
    }
    progress_fn = lambda tensor: tf.concat([
        tensor[:, 1:], 0 * tensor[:, :1]
    ], 1)  # on the 1st dimension(episode_length): (a[1] ,a[2], ..., 0*a[0])
    other_outputs = tf.scan(  # other_outputs: { 'observ': shape(50(amount),40(batchsize),50(episode_length),1024),...}
        lambda past_output, _: nested.map(
            progress_fn, past_output
        ),  # past_output = progress_fn(past_output), initial past_output is first_output.
        tf.range(amount),
        first_output)  # first_output: { 'observ': shape(40,50,1024),...};
    sequences = nested.map(
        lambda lhs, rhs: tf.concat([lhs[None], rhs], 0),  # first_output[None]
        first_output,
        other_outputs)  # sequences: { 'observ': shape(51,40,50,1024),...};

    # Merge batch and time dimensions of steps to compute unrolls from every
    # time step as one batch. The time dimension becomes the number of
    # overshooting distances.
    sequences = nested.map(
        lambda tensor: _merge_dims(
            tensor, [1, 2]),  # sequences: { 'observ': shape(51,2000,1024),...}
        sequences)
    sequences = nested.map(
        lambda tensor: tf.transpose(tensor, [1, 0] + list(
            range(2, tensor.shape.ndims))),  # [1,0]+[2]
        sequences)  # sequences: { 'observ': shape(2000,51,1024),...}
    merged_length = tf.reduce_sum(sequences['mask'],
                                  1)  # shape(2000,51) --> shape(2000,)

    # Mask out padding frames; unnecessary if the input is already masked.
    sequences = nested.map(
        lambda tensor: tensor * tf.cast(
            _pad_dims(
                sequences['mask'], tensor.shape.ndims
            ),  # sequences['mask']: shape(2000,51) --> shape(2000,51,1); sequences['observ']: shape(2000,51,1024)
            tensor.dtype),  # shape(2000,51,1024)*shape(2000,51,1)
        sequences)

    # Compute open loop rollouts.
    use_obs = tf.zeros(tf.shape(sequences['mask']), tf.bool)[..., None]
    prev_state = nested.map(
        lambda tensor: tf.concat(
            [0 * tensor[:, :1], tensor[:, :-1]], 1
        ),  # {'mean': shape(40,50,30), ...}; on the 1st dimension(episode_length): (s1, s2, ..., s50) --> (0, s1, s2, ..., s49)
        posterior)
    prev_state = nested.map(lambda tensor: _merge_dims(tensor, [0, 1]),
                            prev_state)  # {'mean': shape(2000,30), ...}
    (priors, _), _ = tf.nn.dynamic_rnn(
        cell, (sequences['observ'], sequences['prev_action'], use_obs),
        merged_length, prev_state
    )  # initial_state = prev_state.    # calculate prior: p(s_t−1 |s_t−d ,a_t−d−1:t−2 )

    # Restore batch dimension.
    target, prior, posterior, mask = nested.map(
        functools.partial(_restore_batch_dim,
                          batch_size=shape.shape(length)[0]),
        (sequences['target'], priors, sequences['posterior'],
         sequences['mask']))

    mask = tf.cast(mask, tf.bool)
    return target, prior, posterior, mask