Beispiel #1
0
def adr_vp_feedback_frames(frames,
                           actions,
                           states,
                           context_frames,
                           Ec,
                           Eo,
                           A,
                           Do,
                           Da,
                           L,
                           La=None,
                           gaussian_a=False,
                           use_seq_len=12,
                           lstm_a_units=256,
                           lstm_a_layers=1,
                           lstm_units=256,
                           lstm_layers=2,
                           learning_rate=0.0,
                           random_window=False):

    bs, seq_len, w, h, c = [int(s) for s in frames.shape]
    assert seq_len >= use_seq_len

    frame_inputs, action_state, initial_state_a, initial_state, ins = get_ins(
        frames,
        actions,
        states,
        use_seq_len=use_seq_len,
        random_window=random_window,
        gaussian=gaussian_a,
        a_units=lstm_a_units,
        a_layers=lstm_a_layers,
        units=lstm_units,
        layers=lstm_layers,
        lstm=True)
    # context frames at the beginning
    xc_0 = tf.slice(frame_inputs, (0, 0, 0, 0, 0),
                    (-1, context_frames, -1, -1, -1))
    n_frames = use_seq_len

    # ===== Build the model
    hc_0, skips_0 = Ec(xc_0)
    hc_0 = tf.slice(hc_0, (0, context_frames - 1, 0), (-1, 1, -1))
    skips_0 = slice_skips(skips_0, start=context_frames - 1, length=1)
    skips = repeat_skips(skips_0, n_frames)

    ha = A(action_state)
    hc_repeat = RepeatVector(n_frames)(tf.squeeze(hc_0, axis=1))
    hc_ha = K.concatenate([hc_repeat, ha], axis=-1)

    if gaussian_a:
        _, za, _, _ = La([hc_ha, initial_state_a])  # za taken as the mean
        hc_ha = K.concatenate([hc_repeat, ha, za], axis=-1)

    x_rec_a = Da([hc_ha, skips])  # agent only prediction

    # x_err_pos = K.relu(frame_inputs - x_rec_a)
    # x_err_neg = K.relu(x_rec_a - frame_inputs)
    # xo_rec_a = K.concatenate([x_err_pos, x_err_neg], axis=-1)  # ground truth error components

    # ho, _ = Eo(xo_rec_a)

    x_pred = []
    prev_state = initial_state
    hc_t = hc_0

    ha_t, _ = tf.split(ha, [-1, 1], axis=1)  # remove last step
    _, ha_tp1 = tf.split(ha, [1, -1], axis=1)  # remove first step
    _, xa_tp1 = tf.split(x_rec_a, [1, -1], axis=1)
    x = frame_inputs
    xa = x_rec_a

    for i in range(n_frames - 1):

        xa_t, xa = tf.split(xa, [1, -1], axis=1)
        xa_pred, xa_tp1 = tf.split(xa_tp1, [1, -1], axis=1)
        x_t, x = tf.split(x, [1, -1], axis=1)

        if i >= context_frames:
            x_t = x_pred_t

        x_xa_t = K.concatenate([x_t, xa_t], axis=-1)
        ho_t, _ = Eo(x_xa_t)

        _ha_t, ha_t = tf.split(ha_t, [1, -1], axis=1)
        _ha_tp1, ha_tp1 = tf.split(ha_tp1, [1, -1], axis=1)

        h = tf.concat([hc_t, _ha_t, _ha_tp1, ho_t], axis=-1)

        ho_pred, state = L([h, prev_state])

        h_pred_t = tf.concat([hc_t, _ha_tp1, ho_pred], axis=-1)

        x_err_pred_t = Do([h_pred_t, skips_0])
        x_err_pred_pos = x_err_pred_t[:, :, :, :, :3]
        x_err_pred_neg = x_err_pred_t[:, :, :, :, 3:]
        x_pred_t = xa_pred + x_err_pred_pos - x_err_pred_neg
        x_pred.append(x_pred_t)

        prev_state = state

    # Obtain predicted frames
    x_pred = tf.squeeze(tf.stack(x_pred, axis=1), axis=2)
    _, x_target = tf.split(frame_inputs, [1, -1], axis=1)

    outs = [x_pred, x_pred, x_pred, x_rec_a,
            x_target]  # repetitions to match teacher forcing version

    model = Model(inputs=ins, outputs=outs, name='vp_model')

    rec_pred = mean_squared_error(y_pred=x_pred, y_true=x_target)
    model.add_metric(rec_pred, name='rec_pred', aggregation='mean')

    rec_A = mean_squared_error(y_pred=x_rec_a, y_true=frame_inputs)
    model.add_metric(rec_A, name='rec_A', aggregation='mean')

    model.add_loss(K.mean(rec_pred))

    model.compile(optimizer=Adam(lr=learning_rate))

    return model
Beispiel #2
0
def adr(frames,
        actions,
        states,
        context_frames,
        Ec,
        Eo,
        A,
        Do,
        Da,
        La=None,
        gaussian_a=False,
        use_seq_len=12,
        lstm_units=256,
        lstm_layers=1,
        learning_rate=0.001,
        random_window=True,
        reconstruct_random_frame=True):

    bs, seq_len, w, h, c = [int(s) for s in frames.shape]
    assert seq_len > use_seq_len

    frame_inputs, action_state, initial_state, _, ins = get_ins(
        frames,
        actions,
        states,
        use_seq_len=use_seq_len,
        random_window=random_window,
        gaussian=gaussian_a,
        a_units=lstm_units,
        a_layers=lstm_layers)
    # context frames at the beginning
    xc_0 = tf.slice(frame_inputs, (0, 0, 0, 0, 0),
                    (-1, context_frames, -1, -1, -1))
    x_to_recover = frame_inputs
    n_frames = use_seq_len

    # ===== Build the model
    hc_0, skips_0 = Ec(xc_0)
    hc_0 = tf.slice(hc_0, (0, context_frames - 1, 0), (-1, 1, -1))
    skips = slice_skips(skips_0, start=context_frames - 1, length=1)

    if reconstruct_random_frame:
        a_s_dim = action_state.shape[-1]
        rand_index_1 = tf.random.uniform((),
                                         minval=0,
                                         maxval=use_seq_len,
                                         dtype='int32')
        action_state = tf.slice(action_state, (0, 0, 0),
                                (bs, rand_index_1 + 1, a_s_dim))
        x_to_recover = tf.slice(frames, (0, rand_index_1, 0, 0, 0),
                                (bs, 1, w, h, c))
        n_frames = rand_index_1 + 1
    else:
        skips = repeat_skips(skips, use_seq_len)

    ha = A(action_state)
    hc_repeat = RepeatVector(n_frames)(tf.squeeze(hc_0, axis=1))
    hc_ha = K.concatenate([hc_repeat, ha], axis=-1)

    if gaussian_a:
        _, za, _, _ = La([hc_ha, initial_state])
        hc_ha = K.concatenate([hc_repeat, ha, za], axis=-1)

    if reconstruct_random_frame:
        _, hc_ha = tf.split(hc_ha, [-1, 1], axis=1)
        _, ha = tf.split(ha, [-1, 1], axis=1)
        hc_repeat = hc_0

    x_rec_a = Da([hc_ha, skips])

    # --> Changed the input to Eo from the error image to the full frame and the action only prediction
    x_rec_a_pos = K.relu(x_to_recover - x_rec_a)
    x_rec_a_neg = K.relu(x_rec_a - x_to_recover)

    # xo_rec_a = K.concatenate([x_rec_a_pos, x_rec_a_neg], axis=-1)
    xo_rec_a = K.concatenate([x_to_recover, x_rec_a], axis=-1)

    ho, _ = Eo(xo_rec_a)
    # ho = Eo(xo_rec_a)

    h = K.concatenate([hc_repeat, ha, ho], axis=-1)  # multiple reconstruction

    x_err = Do([h, skips])

    x_err_pos = x_err[:, :, :, :, :3]
    x_err_neg = x_err[:, :, :, :, 3:]
    x_recovered = x_err_pos - x_err_neg
    x_target = x_to_recover - x_rec_a
    x_target_pos = x_rec_a_pos
    x_target_neg = x_rec_a_neg

    # == Autoencoder
    model = Model(inputs=ins, outputs=x_recovered)

    rec_loss = mean_squared_error(x_target, x_recovered)
    model.add_metric(K.mean(rec_loss), name='rec_loss', aggregation='mean')

    rec_loss_pos = mean_squared_error(x_target_pos, x_err_pos)
    model.add_metric(rec_loss_pos, name='rec_loss_pos', aggregation='mean')

    rec_loss_neg = mean_squared_error(x_target_neg, x_err_neg)
    model.add_metric(rec_loss_neg, name='rec_loss_neg', aggregation='mean')

    rec_action_only_loss = mean_squared_error(x_rec_a, x_to_recover)
    model.add_metric(rec_action_only_loss, name='rec_A', aggregation='mean')

    model.add_loss(
        K.mean(rec_loss) + (K.mean(rec_loss_pos) + K.mean(rec_loss_neg)))

    model.compile(optimizer=Adam(lr=learning_rate))

    return model
Beispiel #3
0
def adr_vp_teacher_forcing(frames,
                           actions,
                           states,
                           context_frames,
                           Ec,
                           Eo,
                           A,
                           Do,
                           Da,
                           L,
                           La=None,
                           gaussian_a=False,
                           use_seq_len=12,
                           lstm_a_units=256,
                           lstm_a_layers=1,
                           lstm_units=256,
                           lstm_layers=2,
                           learning_rate=0.001,
                           random_window=False):

    bs, seq_len, w, h, c = [int(s) for s in frames.shape]
    assert seq_len >= use_seq_len

    frame_inputs, action_state, initial_state_a, initial_state, ins = get_ins(
        frames,
        actions,
        states,
        use_seq_len=use_seq_len,
        random_window=random_window,
        gaussian=gaussian_a,
        a_units=lstm_a_units,
        a_layers=lstm_a_layers,
        units=lstm_units,
        layers=lstm_layers,
        lstm=True)

    # context frames at the beginning
    xc_0 = tf.slice(frame_inputs, (0, 0, 0, 0, 0),
                    (-1, context_frames, -1, -1, -1))
    n_frames = use_seq_len

    # ===== Build the model
    hc_0, skips_0 = Ec(xc_0)
    hc_0 = tf.slice(hc_0, (0, context_frames - 1, 0), (-1, 1, -1))
    skips_0 = slice_skips(skips_0, start=context_frames - 1, length=1)
    skips = repeat_skips(skips_0, n_frames)

    ha = A(action_state)
    hc_repeat = RepeatVector(n_frames)(tf.squeeze(hc_0, axis=1))
    hc_ha = K.concatenate([hc_repeat, ha], axis=-1)

    if gaussian_a:
        _, za, _, _ = La([hc_ha, initial_state_a])  # za taken as the mean
        hc_ha = K.concatenate([hc_repeat, ha, za], axis=-1)

    x_rec_a = Da([hc_ha, skips])  # agent only prediction

    x_err_pos = K.relu(frame_inputs - x_rec_a)
    x_err_neg = K.relu(x_rec_a - frame_inputs)

    # xo_rec_a = K.concatenate([frame_inputs, x_rec_a], axis=-1)  # -->  Here the action only image is not needed
    xo_rec_a = K.concatenate([x_err_pos, x_err_neg],
                             axis=-1)  # ground truth error components

    remove_first_step = Lambda(
        lambda _x: tf.split(_x, [1, -1], axis=1))  # new operations
    remove_last_step = Lambda(lambda _x: tf.split(_x, [-1, 1], axis=1))

    ho, _ = Eo(xo_rec_a)

    hc = RepeatVector(n_frames - 1)(K.squeeze(hc_0, axis=1))
    skips = repeat_skips(skips_0, ntimes=n_frames - 1)

    ha_t, _ = remove_last_step(ha)  # [0 to 18]
    _, ha_tp1 = remove_first_step(ha)  # [1 to 19]
    ho_t, _ = remove_last_step(ho)  # [0 to 18]

    h = tf.concat([hc, ha_t, ha_tp1, ho_t], axis=-1)  # [0 to 18]

    ho_pred, _ = L([h, initial_state])  # [1 to 19]
    _, ho_tp1 = remove_first_step(ho)  # [1 to 19] Target for LSTM outputs

    x_rec_a_t, _ = remove_last_step(x_rec_a)  # [0 to 18] Used to obtain x_curr
    _, x_rec_a_tp1 = remove_first_step(
        x_rec_a)  # [1 to 19] Used to obtain x_pred

    _, x_target_pred = remove_first_step(
        frame_inputs)  #           Target for Do pred reconstruction
    _, x_err_pos_target = remove_first_step(
        x_err_pos)  #           Target for Do pred reconstruction
    _, x_err_neg_target = remove_first_step(
        x_err_neg)  #           Target for Do pred reconstruction

    # reconstruct current step
    h = tf.concat([hc, ha_t, ho_t], axis=-1)
    x_err_curr = Do([h, skips])

    x_target_curr, _ = remove_last_step(
        frame_inputs)  # [0 to 18] Target for x_curr
    x_err_curr_pos = x_err_curr[:, :, :, :, :3]
    x_err_curr_neg = x_err_curr[:, :, :, :, 3:]
    x_curr = x_rec_a_t + x_err_curr_pos - x_err_curr_neg

    # predict one step ahead
    h = tf.concat([hc, ha_tp1, ho_pred], axis=-1)
    x_err_pred = Do([h, skips])

    x_err_pred_pos = x_err_pred[:, :, :, :, :3]
    x_err_pred_neg = x_err_pred[:, :, :, :, 3:]
    x_pred = x_rec_a_tp1 + x_err_pred_pos - x_err_pred_neg

    model = Model(inputs=ins,
                  outputs=[ho_pred, x_curr, x_pred, x_rec_a, x_target_pred],
                  name='vp_model')

    ho_mse = mean_squared_error(y_pred=ho_pred, y_true=ho_tp1)
    model.add_metric(K.mean(ho_mse), name='ho_mse', aggregation='mean')

    rec_curr = mean_squared_error(y_pred=x_curr, y_true=x_target_curr)
    model.add_metric(rec_curr, name='rec_curr', aggregation='mean')

    rec_pred = mean_squared_error(y_pred=x_pred, y_true=x_target_pred)
    model.add_metric(rec_pred, name='rec_pred', aggregation='mean')

    rec_pos = mean_squared_error(y_pred=x_err_pred_pos,
                                 y_true=x_err_pos_target)
    rec_neg = mean_squared_error(y_pred=x_err_pred_neg,
                                 y_true=x_err_neg_target)

    rec_A = mean_squared_error(y_pred=x_rec_a, y_true=frame_inputs)
    model.add_metric(rec_A, name='rec_A', aggregation='mean')

    # why did I have rec_curr??
    # model.add_loss(0.5*K.mean(ho_mse) + 0.125*K.mean(rec_curr) + 0.125*K.mean(rec_pred)
    #                                   + 0.125*K.mean(rec_pos) + 0.125*K.mean(rec_neg))

    # model.add_loss(0.5*K.mean(ho_mse) + 0.5/3*(K.mean(rec_pred)) + K.mean(rec_pos) + K.mean(rec_neg))
    model.add_loss(K.mean(rec_pred) + K.mean(rec_pos) + K.mean(rec_neg))

    model.compile(Adam(lr=learning_rate))

    return model
Beispiel #4
0
def adr_ao(frames,
           actions,
           states,
           context_frames,
           Ec,
           A,
           D,
           learning_rate=0.01,
           gaussian=False,
           kl_weight=None,
           L=None,
           use_seq_len=12,
           lstm_units=None,
           lstm_layers=None,
           training=True,
           reconstruct_random_frame=False,
           random_window=True):

    bs, seq_len, w, h, c = [int(s) for s in frames.shape]
    assert seq_len >= use_seq_len
    frame_inputs, action_state, initial_state, _, ins = get_ins(
        frames,
        actions,
        states,
        use_seq_len=use_seq_len,
        random_window=random_window,
        gaussian=gaussian,
        a_units=lstm_units,
        a_layers=lstm_layers)

    rand_index_1 = tf.random.uniform(shape=(),
                                     minval=0,
                                     maxval=use_seq_len - context_frames + 1,
                                     dtype='int32')

    # Random xc_0, as an artificial way of augmenting the dataset
    xc_0 = tf.slice(frame_inputs, (0, rand_index_1, 0, 0, 0),
                    (-1, context_frames, -1, -1, -1))
    xc_1 = tf.slice(frame_inputs, (0, 0, 0, 0, 0),
                    (-1, context_frames, -1, -1, -1))

    x_to_recover = frame_inputs
    n_frames = use_seq_len

    # ===== Build the model
    hc_0, skips_0 = Ec(xc_0)
    hc_1, _ = Ec(xc_1)

    hc_0 = tf.slice(hc_0, (0, context_frames - 1, 0), (-1, 1, -1))
    hc_1 = tf.slice(hc_1, (0, context_frames - 1, 0), (-1, 1, -1))
    skips = slice_skips(skips_0, start=context_frames - 1, length=1)

    if reconstruct_random_frame:
        action_state_len = action_state.shape[-1]
        rand_index_2 = tf.random.uniform(shape=(),
                                         minval=0,
                                         maxval=use_seq_len,
                                         dtype='int32')
        action_state = tf.slice(action_state, (0, 0, 0),
                                (bs, rand_index_2 + 1, action_state_len))
        x_to_recover = tf.slice(frame_inputs, (0, rand_index_2, 0, 0, 0),
                                (bs, 1, w, h, c))
        n_frames = rand_index_2 + 1
    else:
        skips = repeat_skips(skips, use_seq_len)

    ha = A(action_state)
    hc_repeat = RepeatVector(n_frames)(tf.squeeze(hc_0, axis=1))
    hc_ha = K.concatenate([hc_repeat, ha], axis=-1)

    if gaussian:
        z, mu, logvar, state = L([hc_ha, initial_state])
        z = mu if training is False else z
        hc_ha = K.concatenate([hc_repeat, ha, z], axis=-1)

    if reconstruct_random_frame:
        _, hc_ha = tf.split(hc_ha, [-1, 1], axis=1)
        if gaussian:
            _, mu = tf.split(mu, [-1, 1], axis=1)
            _, logvar = tf.split(logvar, [-1, 1], axis=1)

    x_recovered = D([hc_ha, skips])

    rec_loss = mean_squared_error(x_to_recover, x_recovered)
    sim_loss = mean_squared_error(hc_0, hc_1)

    if gaussian:
        ED = Model(inputs=ins, outputs=[x_recovered, x_to_recover, mu, logvar])
    else:
        ED = Model(inputs=ins, outputs=[x_recovered, x_to_recover])
    ED.add_metric(rec_loss, name='rec_loss', aggregation='mean')
    ED.add_metric(sim_loss, name='sim_loss', aggregation='mean')

    if gaussian:
        kl_loss = kl_unit_normal(mu, logvar)
        ED.add_metric(kl_loss, name='kl_loss', aggregation='mean')
        ED.add_loss(
            K.mean(rec_loss) + K.mean(sim_loss) + kl_weight * K.mean(kl_loss))
    else:
        ED.add_loss(K.mean(rec_loss) + K.mean(sim_loss))

    ED.compile(optimizer=Adam(lr=learning_rate))

    return ED