/
main.py
174 lines (146 loc) · 5.37 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# -*- coding: utf-8 -*-
"""
Created on Fri May 17 21:03:14 2019
@author: shane
"""
import gym
import numpy as np
np.set_printoptions(precision=3, linewidth=120)
# Setup GPU TF stability
import tensorflow as tf
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)
sess = tf.Session(config=tf.ConfigProto(
allow_soft_placement=True, log_device_placement=True))
import gc
gc.enable()
"""
# Create an environment and set random seed
"""
selectedEnvironment = 8
env = 0
envName = 0
# Toy Text - Discrete state and action space
if selectedEnvironment == 0:
envName = 'Taxi-v2'
# Classic Control - Continuous State and Discrete Action Spaces
elif selectedEnvironment == 1:
envName = 'MountainCar-v0' # needs Discretized or better
elif selectedEnvironment == 2:
envName = 'Acrobot-v1' # needs Discretized, Tile Encoding or better
elif selectedEnvironment == 3:
envName = 'CartPole-v1' # needs Deep Q Learning to do well?
# Box 2D - Continuous State, Discrete Actions
elif selectedEnvironment == 4:
envName = 'LunarLander-v2' # discrete actions, continuous state
# Classic Control - Continuous State and Action Spaces
elif selectedEnvironment == 5:
envName = 'Pendulum-v0' # continuous only
elif selectedEnvironment == 6:
envName = 'MountainCarContinuous-v0' # continuous only
# Box 2D - Continuous State and Action Spaces
elif selectedEnvironment == 7:
envName = 'LunarLanderContinuous-v2' # continuous only
elif selectedEnvironment == 8:
envName = 'BipedalWalker-v2' # continuous only
# Box 2D - Image State and Continuous Action Spaces
elif selectedEnvironment == 9:
envName = 'CarRacing-v0' # image input, actions [steer, gas, brake]
# Initialize the environment
env = gym.make(envName)
env.reset()
# Set output file paths based on environment
from visuals import examine_environment, examine_environment_MountainCar_discretized, examine_environment_Acrobat_tiled
#examine_environment(env)
from datetime import datetime
FORMAT = '%Y%m%d%H%M%S'
file_output_train = envName + '_train.txt' # file name for saved results
file_output_test = envName + '_test.txt' # file name for saved results
file_output_train = datetime.now().strftime(FORMAT) + file_output_train
print('-----------------------------------------------------------')
print('New Experiment, training output file name: ', file_output_train)
"""
# Create Agent
"""
agent = 0
selectedAgent = 2
if selectedAgent == 0:
# create the agent discretized state space Q Learning
state_size = env.observation_space.shape[0]
action_size = env.action_space.n
print("env.observation_space.shape[0]", state_size)
print("env.action_space", action_size)
from agents import QLearningAgentDiscretized as qlad
agent = qlad.QLearningAgent(env)
# examine_environment_MountainCar_discretized(env)
if selectedAgent == 1:
# create the agent for tiled state space Q Learning
state_size = env.observation_space.shape[0]
action_size = env.action_space.n
print("env.observation_space.shape[0]", state_size)
print("env.action_space", action_size)
from agents import QLearningAgentDiscretizedTiles as qlat
agent = qlat.QLearningAgentDisTiles(env)
# examine_environment_Acrobat_tiled(env, n_bins)
if selectedAgent == 2:
# Create DDPG network agent
obsSpace = env.observation_space.shape
print("env.observation_space: ", obsSpace)
from agents.DDPG import DDPG
agent = DDPG(env, "continousStateAction") # continousStateAction imageStateContinuousAction
if selectedAgent == 3:
# Create DDPG network agent
obsSpace = env.observation_space.shape
print("env.observation_space: ", obsSpace)
from agents.DDPG import DDPG
agent = DDPG(env, "imageStateContinuousAction") # continousStateAction imageStateContinuousAction
"""
# run the simulation
"""
import interact as sim
num_episodes=500
sim.interact(agent, env, num_episodes, mode='train', file_output=file_output_train, renderSkip=100)
"""
# Plot training scores obtained per episode
"""
from visuals import plot_q_table, plot_score_from_file
plot_score_from_file(file_output_train, -300, 300, 1)
if selectedAgent == 0 or selectedAgent == 1:
plot_q_table(agent.q_table)
"""
# save model and architecture to single file
# https://machinelearningmastery.com/save-load-keras-deep-learning-models/
"""
if selectedAgent == 2 or selectedAgent == 3:
import sys
sys.setrecursionlimit(1000000) # need to be big enough to handle whole weight list
agent.actor_local.model.save(file_output_train+"actor_local.h5")
agent.actor_target.model.save(file_output_train+"actor_target.h5")
agent.critic_local.model.save(file_output_train+"critic_local.h5")
agent.critic_target.model.save(file_output_train+"critic_target.h5")
print("Saved model to disk")
"""
# Run in test mode and analyze scores obtained
"""
print("[TEST] Training Done, now running tests...")
test_scores = sim.interact(agent, env, num_episodes=3, mode='test', file_output=file_output_test)
plot_score_from_file(file_output_test, -300, 300, 1)
"""
# Watch Agent
"""
state = env.reset()
score = 0
for t in range(5000):
# get action from agent
action = agent.act(state, mode='test')
# show environment and step it forward
env.render()
state, reward, done, _ = env.step(action)
score += reward
if done:
break
print('Final score:', score)
"""
# Exit Environment
"""
#if 0:
env.close()