Beispiel #1
0
def test():
    env = gym.make('HumanoidDeepMimicWalkBulletEnv-v1')
    env.render()

    actor_learning_rate = 0.00001
    critic_learning_rate = 0.0001
    gamma = .99  # .95
    tau = .001  # .125

    actor = Actor(env, actor_learning_rate, tau)
    critic = Critic(env, critic_learning_rate, gamma, tau)
    actor.model.load_weights(
        checkpoint_path.format(training_num=restore_training_num,
                               model='actor_model',
                               epoch=restore_epoch))
    actor.target_model.load_weights(
        checkpoint_path.format(training_num=restore_training_num,
                               model='actor_target',
                               epoch=restore_epoch))
    critic.model.load_weights(
        checkpoint_path.format(training_num=restore_training_num,
                               model='critic_model',
                               epoch=restore_epoch))
    critic.target_model.load_weights(
        checkpoint_path.format(training_num=restore_training_num,
                               model='critic_target',
                               epoch=restore_epoch))

    cur_state = env.reset()

    for _ in range(100000):
        action = actor.act(
            cur_state.reshape((1, env.observation_space.shape[0])))
        new_state, reward, done, _ = env.step(
            action.reshape((1, env.action_space.shape[0]))[0])
        cur_state = new_state
        time.sleep(.1)
        if done:
            break
Beispiel #2
0
import time
from composer import Composer
from actor import Actor

if __name__ == '__main__':
    # File where actions will be stored
    actions = "actions.act"
    # Composer that will record inputs for 5 seconds
    cp = Composer(actions, time=5)
    print("**Composing**")
    # Start composing
    cp.record()
    while cp.is_running():
        cp.update()
        time.sleep(0.1)
        # Do other stuff here...

    # Actor that will play the act
    ac = Actor(actions)
    print("\n**Acting**")
    # Start acting
    ac.act()
Beispiel #3
0
def main():
    env = gym.make('HumanoidDeepMimicWalkBulletEnv-v1')

    actor_learning_rate = 0.00001
    critic_learning_rate = 0.0001
    gamma = .99  #.95
    tau = .001  #.125

    actor = Actor(env, actor_learning_rate, tau)
    critic = Critic(env, critic_learning_rate, gamma, tau)
    buffer = Buffer()

    if restore_epoch >= 0:
        actor.model.load_weights(
            checkpoint_path.format(training_num=restore_training_num,
                                   model='actor_model',
                                   epoch=restore_epoch))
        actor.target_model.load_weights(
            checkpoint_path.format(training_num=restore_training_num,
                                   model='actor_target',
                                   epoch=restore_epoch))
        critic.model.load_weights(
            checkpoint_path.format(training_num=restore_training_num,
                                   model='critic_model',
                                   epoch=restore_epoch))
        critic.target_model.load_weights(
            checkpoint_path.format(training_num=restore_training_num,
                                   model='critic_target',
                                   epoch=restore_epoch))

    num_epochs = 50000
    epoch_len = 1000

    if render:
        env.render()

    for epoch in range(num_epochs):

        print("Epoch {}".format(epoch))
        cur_state = env.reset()

        max_train_reward = 0
        max_reward = 0

        for j in range(epoch_len):

            action = actor.act(
                cur_state.reshape((1, env.observation_space.shape[0])))

            new_state, reward, done, _ = env.step(
                action.reshape((1, env.action_space.shape[0]))[0])
            buffer.remember(cur_state, action, reward, new_state, done)

            max_reward = max(max_reward, reward)

            if len(buffer.memory) > batch_size:
                avg_reward = run_training(actor, critic, buffer)
                max_train_reward = max(max_train_reward, avg_reward)

                cur_state = new_state

                actor.update_target_network()
                critic.update_target_network()

            if done:
                break

        print('Max_reward {}\nAvg_max_train_reward {}\n'.format(
            max_reward, max_train_reward / epoch_len))

        if make_checkpoints and (epoch % checkpoints_freq == 0):
            actor.model.save_weights(
                checkpoint_path.format(training_num=training_num,
                                       model='actor_model',
                                       epoch=epoch))
            actor.target_model.save_weights(
                checkpoint_path.format(training_num=training_num,
                                       model='actor_target',
                                       epoch=epoch))
            critic.model.save_weights(
                checkpoint_path.format(training_num=training_num,
                                       model='critic_model',
                                       epoch=epoch))
            critic.target_model.save_weights(
                checkpoint_path.format(training_num=training_num,
                                       model='critic_target',
                                       epoch=epoch))

        if make_checkpoints and (epoch % hyperparam_freq == 0):
            file_path = hyperparam_path.format(training_num=training_num)
            with open(file_path, 'a') as f:
                f.write('{},{}\n'.format(max_reward,
                                         max_train_reward / epoch_len))
Beispiel #4
0
# Diagonal wall
"""d = 5
for i in xrange(d, size):
	environment.obstacles[(i, i - d)] = '#'
del environment.obstacles[(7, 2)]"""

# Specific points
"""environment.obstacles[(9, 4)] = '#'
environment.obstacles[(8, 6)] = '#'"""
# DEBUG

#print "\n"*100

actor.move(end)
actor.act()

"""for i in actor.actions:
	print i.position.as_tuple()"""

environment.print_state()

"""while actor.can_act():
	sleep(0.10)
	a = actor.act()
	actor.state = a
	print '\n'*50
	print a.as_tuple()
	environment.print_state()
	if not actor.can_act():
		break"""