コード例 #1
0
    print("Before:", encoded_1)

    encoder.update(state_1, state_2, reward_1, reward_2, wasserstein)

    encoded_1 = encoder.predict(state=state_1)

    print("After:", encoded_1)
    #################################################################################

    ############################ Test Dynamics ######################################
    state = np.random.rand(state_dim)
    next_state = np.random.rand(state_dim)
    action = np.random.rand(action_dim)

    encoded_state = encoder.predict(state=state)
    encoded_next_state = encoder.predict(state=next_state)

    next_state_from_dynamics = dynamics.predict(state=encoded_state,
                                                action=action)

    print("Before:", next_state_from_dynamics)

    dynamics.update(state=encoded_state,
                    action=action,
                    next_state=encoded_next_state)

    next_state_from_dynamics = dynamics.predict(state=encoded_state,
                                                action=action)

    print("After:", next_state_from_dynamics)
    #################################################################################