def test(): env = gym.make('HumanoidDeepMimicWalkBulletEnv-v1') env.render() actor_learning_rate = 0.00001 critic_learning_rate = 0.0001 gamma = .99 # .95 tau = .001 # .125 actor = Actor(env, actor_learning_rate, tau) critic = Critic(env, critic_learning_rate, gamma, tau) actor.model.load_weights( checkpoint_path.format(training_num=restore_training_num, model='actor_model', epoch=restore_epoch)) actor.target_model.load_weights( checkpoint_path.format(training_num=restore_training_num, model='actor_target', epoch=restore_epoch)) critic.model.load_weights( checkpoint_path.format(training_num=restore_training_num, model='critic_model', epoch=restore_epoch)) critic.target_model.load_weights( checkpoint_path.format(training_num=restore_training_num, model='critic_target', epoch=restore_epoch)) cur_state = env.reset() for _ in range(100000): action = actor.act( cur_state.reshape((1, env.observation_space.shape[0]))) new_state, reward, done, _ = env.step( action.reshape((1, env.action_space.shape[0]))[0]) cur_state = new_state time.sleep(.1) if done: break
import time from composer import Composer from actor import Actor if __name__ == '__main__': # File where actions will be stored actions = "actions.act" # Composer that will record inputs for 5 seconds cp = Composer(actions, time=5) print("**Composing**") # Start composing cp.record() while cp.is_running(): cp.update() time.sleep(0.1) # Do other stuff here... # Actor that will play the act ac = Actor(actions) print("\n**Acting**") # Start acting ac.act()
def main(): env = gym.make('HumanoidDeepMimicWalkBulletEnv-v1') actor_learning_rate = 0.00001 critic_learning_rate = 0.0001 gamma = .99 #.95 tau = .001 #.125 actor = Actor(env, actor_learning_rate, tau) critic = Critic(env, critic_learning_rate, gamma, tau) buffer = Buffer() if restore_epoch >= 0: actor.model.load_weights( checkpoint_path.format(training_num=restore_training_num, model='actor_model', epoch=restore_epoch)) actor.target_model.load_weights( checkpoint_path.format(training_num=restore_training_num, model='actor_target', epoch=restore_epoch)) critic.model.load_weights( checkpoint_path.format(training_num=restore_training_num, model='critic_model', epoch=restore_epoch)) critic.target_model.load_weights( checkpoint_path.format(training_num=restore_training_num, model='critic_target', epoch=restore_epoch)) num_epochs = 50000 epoch_len = 1000 if render: env.render() for epoch in range(num_epochs): print("Epoch {}".format(epoch)) cur_state = env.reset() max_train_reward = 0 max_reward = 0 for j in range(epoch_len): action = actor.act( cur_state.reshape((1, env.observation_space.shape[0]))) new_state, reward, done, _ = env.step( action.reshape((1, env.action_space.shape[0]))[0]) buffer.remember(cur_state, action, reward, new_state, done) max_reward = max(max_reward, reward) if len(buffer.memory) > batch_size: avg_reward = run_training(actor, critic, buffer) max_train_reward = max(max_train_reward, avg_reward) cur_state = new_state actor.update_target_network() critic.update_target_network() if done: break print('Max_reward {}\nAvg_max_train_reward {}\n'.format( max_reward, max_train_reward / epoch_len)) if make_checkpoints and (epoch % checkpoints_freq == 0): actor.model.save_weights( checkpoint_path.format(training_num=training_num, model='actor_model', epoch=epoch)) actor.target_model.save_weights( checkpoint_path.format(training_num=training_num, model='actor_target', epoch=epoch)) critic.model.save_weights( checkpoint_path.format(training_num=training_num, model='critic_model', epoch=epoch)) critic.target_model.save_weights( checkpoint_path.format(training_num=training_num, model='critic_target', epoch=epoch)) if make_checkpoints and (epoch % hyperparam_freq == 0): file_path = hyperparam_path.format(training_num=training_num) with open(file_path, 'a') as f: f.write('{},{}\n'.format(max_reward, max_train_reward / epoch_len))
# Diagonal wall """d = 5 for i in xrange(d, size): environment.obstacles[(i, i - d)] = '#' del environment.obstacles[(7, 2)]""" # Specific points """environment.obstacles[(9, 4)] = '#' environment.obstacles[(8, 6)] = '#'""" # DEBUG #print "\n"*100 actor.move(end) actor.act() """for i in actor.actions: print i.position.as_tuple()""" environment.print_state() """while actor.can_act(): sleep(0.10) a = actor.act() actor.state = a print '\n'*50 print a.as_tuple() environment.print_state() if not actor.can_act(): break"""