def main(lr=0.001, episodeMemory=100, replaySize=64, gamma=0.95): np.random.seed(0) env = gym.make('MountainCar-v0') model = keras.Sequential() model.add( Dense(128, activation="relu", input_dim=3, kernel_initializer='normal')) model.add(Dense(52, activation="relu")) model.add(Dense(1, kernel_initializer='normal', activation="linear")) adam = keras.optimizers.Adam(lr=lr) model.compile(loss='mean_squared_error', optimizer=adam) #gamma = 0.95 memorySize = 200 * episodeMemory dqn = DQN(model, gamma, memorySize, replaysize=replaySize, _env=env) dqnScore = dqnScorerMountainCar(dqn, _env=env) nrofEpisodes = 1001 #nrofEpisodes = 20 res = np.zeros(shape=(nrofEpisodes, 2)) for episode in range(nrofEpisodes): env.reset() action = 0 obs, _, done, _ = env.step(action) #if (episode % 100) == 10: if (episode % 100) == 10: print("episode ", episode) dqnScore.printDistance() #dqnScore.plot_cost_to_ßgo_mountain_car() #print(res[episode-1,:]) print("--- %s seconds ---" % (time.time() - start_time)) iter = 0 while not done: iter += 1 action = dqn.action(obs) new_obs, reward, done, info = env.step(action) if (done and (iter < 199)): reward = (200 - iter) / 10 print("****Success*****", -iter) dqn.add(action, obs, new_obs, reward) obs = new_obs #if(episode % 100) == 10: # env.render()j dqn.replay() env.reset() dqnScore.updateResult(iter) #res[episode,:] = [np.min(x[:,0]),np.max(x[:,0])] title = "eps_%d_mem_%d_rep_%d_gamma_%d" % (nrofEpisodes, episodeMemory, replaySize, gamma * 100) dqnScore.plotResults(title) dqnScore.plot_cost_to_go_mountain_car(title)
Q = model(obs, save=False) #print Q[0] if epsilon > rand() or step < 100: action = randint(0, 2) else: action = np.argmax(Q[0]) epsilon -= 2e-4 if epsilon < 0.: epsilon = 0. obs, reward, done, _ = env.step(action) reward = 0. if done: reward = -1. episode += 1 Memory.add(last_obs, action, reward, obs, done) if done: obs = env.reset() last_obs = deepcopy(obs) if done and episode % 100 == 0: print 'episode:', episode, 'step:', step, 'eps:', epsilon, 'ave:', time / 100., 'Q:', Q[ 0] time = 0. #t = deepcopy(Q) if step < 100: continue sample = [Memory.ReplayMemory[(Memory.count - 1) % 10**6]] #sample(16) #sample = [] #for i in range(10):