def dqn(): env = Tetris() episodes = 2000 max_steps = None epsilon_stop_episode = 1500 mem_size = 20000 discount = 0.95 batch_size = 512 epochs = 1 render_every = 50 log_every = 50 replay_start_size = 2000 train_every = 1 n_neurons = [32, 32] render_delay = None activations = ['relu', 'relu', 'linear'] agent = DQNAgent(env.get_state_size(), n_neurons=n_neurons, activations=activations, epsilon_stop_episode=epsilon_stop_episode, mem_size=mem_size, discount=discount, replay_start_size=replay_start_size) log_dir = f'logs/tetris-nn={str(n_neurons)}-mem={mem_size}-bs={batch_size}-e={epochs}-{datetime.now().strftime("%Y%m%d-%H%M%S")}' log = CustomTensorBoard(log_dir=log_dir) scores = [] current_state = env.reset() done = False steps = 0 # if render_every and episode % render_every == 0: # render = True # else: render = True actions = [] for episode in tqdm(range(episodes)): current_state = env.reset() done = False steps = 0 # if render_every and episode % render_every == 0: # render = True # else: render = False actions = [] # Game while not done and (not max_steps or steps < max_steps): next_states = env.get_next_states() best_state = agent.best_state(next_states.values()) best_action = None for action, state in next_states.items(): if state == best_state: best_action = action break reward, done = env.play(best_action[0], best_action[1], render=render, render_delay=render_delay) agent.add_to_memory(current_state, next_states[best_action], reward, done) current_state = next_states[best_action] actions.append(best_action) steps += 1 scores.append(env.get_game_score()) # Train if episode % train_every == 0: agent.train(batch_size=batch_size, epochs=epochs) # Logs if log_every and episode and episode % log_every == 0: avg_score = mean(scores[-log_every:]) min_score = min(scores[-log_every:]) max_score = max(scores[-log_every:]) log.log(episode, avg_score=avg_score, min_score=min_score, max_score=max_score) print(agent.model.evaluate(current_state)) agent.model.save_weights("ia_tetris_weights.h5") while True: current_state = env.reset() done = False steps = 0 render = True # Game while not done and (not max_steps or steps < max_steps): next_states = env.get_next_states() best_state = agent.best_state(next_states.values()) best_action = None for action, state in next_states.items(): if state == best_state: best_action = action break reward, done = env.play(best_action[0], best_action[1], render=render, render_delay=render_delay) agent.add_to_memory(current_state, next_states[best_action], reward, done) current_state = next_states[best_action] actions.append(best_action) steps += 1 scores.append(env.get_game_score())
def dqn(): env = Tetris() episodes = 2000 max_steps = 1000000000 epsilon_stop_episode = 1750 mem_size = 20000 discount = 0.95 batch_size = 1024 epochs = 1 render_every = 1 log_every = 1 replay_start_size = 2000 train_every = 1 n_neurons = [32, 32] render_delay = 0.01 activations = ['relu', 'relu', 'linear'] m = 0 agent = DQNAgent(env.get_state_size(), n_neurons=n_neurons, activations=activations, epsilon_stop_episode=epsilon_stop_episode, mem_size=mem_size, discount=discount, replay_start_size=replay_start_size) log_dir = f'logs/tetris-eps={episodes}-e-stop={epsilon_stop_episode}-e={epochs}-{datetime.now().strftime("%Y%m%d-%H%M%S")}' log = CustomTensorBoard(log_dir=log_dir) scores = [] steps_list = [] for episode in tqdm(range(episodes)): current_state = env.reset() done = False steps = 0 if (render_every and episode % render_every == 0) or episode == (episodes - 1): render = True record = True else: render = False record = False # Game while not done and (not max_steps or steps < max_steps): next_states = env.get_next_states() best_state = agent.best_state(next_states.values()) best_action = None for action, state in next_states.items(): if state == best_state: best_action = action break reward, done = env.play(best_action[0], best_action[1], episode, render=render, render_delay=render_delay, record=record) agent.add_to_memory(current_state, next_states[best_action], reward, done) current_state = next_states[best_action] steps += 1 scores.append(env.get_game_score()) steps_list.append(steps) # Train if episode % train_every == 0: agent.train(batch_size=batch_size, epochs=epochs) # Logs if log_every and episode and episode % log_every == 0: score = scores[-log_every] steps = steps_list[-log_every] log.log(episode, score = score, steps = steps)
update_target_every = None agent = DQNAgent(env.get_state_size(), n_neurons=n_neurons, activations=activations, add_batch_norm=add_batch_norm, epsilon=epsilon, epsilon_min=epsilon_min, use_target_model=use_target_model, update_target_every=update_target_every, epsilon_stop_episode=epsilon_stop_episode, mem_size=mem_size, discount=discount, replay_start_size=replay_start_size) log_dir = f'logs/tetris-epsilon={epsilon}-epsilon_min={epsilon_min}-epsilon_stop_episode={epsilon_stop_episode}-{datetime.now().strftime("%Y%m%d-%H%M%S")}' log = CustomTensorBoard(log_dir=log_dir) start_time = time() wall_time = [] scores = [] val_scores = [] val_steps = [] dqn() #%% Analyze plt.plot(range(len(val_scores)), val_scores) plt.plot(range(len(scores)), scores)
def dqn(): env = Tetris() episodes = 2000 max_steps = None epsilon_stop_episode = 500 mem_size = 20000 discount = 0.95 batch_size = 512 epochs = 1 render_every = 50 log_every = 50 replay_start_size = 2000 train_every = 1 n_neurons = [32, 32] render_delay = None activations = ['relu', 'relu', 'linear'] agent = DQNAgent(env.get_state_size(), n_neurons=n_neurons, activations=activations, epsilon_stop_episode=epsilon_stop_episode, mem_size=mem_size, discount=discount, replay_start_size=replay_start_size) log_dir = f'logs/tetris-nn={str(n_neurons)}-mem={mem_size}-bs={batch_size}-e={epochs}-{datetime.now().strftime("%Y%m%d-%H%M%S")}' log = CustomTensorBoard(log_dir=log_dir) scores = [] _max_height = True _min_height = True _current_piece = False _next_piece = False _max_bumpiness = False _lines = False _holes = True _total_bumpiness = True _sum_height = False for episode in tqdm(range(episodes)): current_state = env.reset(_max_height, _min_height, _current_piece, _next_piece, _max_bumpiness, _lines, _holes, _total_bumpiness, _sum_height) done = False steps = 0 if render_every and episode % render_every == 0: render = True else: render = False # Game while not done and (not max_steps or steps < max_steps): # No params for default next_states = env.get_next_states(_max_height, _min_height, _current_piece, _next_piece, _max_bumpiness, _lines, _holes, _total_bumpiness, _sum_height) best_state = agent.best_state(next_states.values()) best_action = None for action, state in next_states.items(): if state == best_state: best_action = action break reward, done = env.play(best_action[0], best_action[1], render=render, render_delay=render_delay) agent.add_to_memory(current_state, next_states[best_action], reward, done) current_state = next_states[best_action] steps += 1 scores.append(env.get_game_score()) # Train if episode % train_every == 0: agent.train(batch_size=batch_size, epochs=epochs) # Logs if log_every and episode and episode % log_every == 0: avg_score = mean(scores[-log_every:]) min_score = min(scores[-log_every:]) max_score = max(scores[-log_every:]) cleared_lines = env.get_lines() log.log(episode, avg_score=avg_score, min_score=min_score, max_score=max_score, cleared_lines=cleared_lines)
def dqn(): env = Tetris() episodes = 2000 max_steps = None epsilon_stop_episode = 1500 mem_size = 20000 discount = 0.95 batch_size = 512 epochs = 1 render_every = 50 log_every = 50 replay_start_size = 2000 train_every = 1 n_neurons = [64, 32, 16] render_delay = None activations = ['relu', 'relu', 'relu', 'linear'] agent = DQNAgent( env.get_state_size(), epsilon=0, n_neurons=n_neurons, activations=activations, epsilon_stop_episode=epsilon_stop_episode, mem_size=mem_size, discount=discount, replay_start_size=replay_start_size, ) log_dir = f'logs/tetris-nn={str(n_neurons)}-mem={mem_size}-bs={batch_size}-e={epochs}-{datetime.now().strftime("%Y%m%d-%H%M%S")}' log = CustomTensorBoard(log_dir=log_dir) scores = [] scores_sum = 0 score_max = 0 for episode in tqdm(range(episodes)): current_state = env.reset() done = False steps = 0 if render_every and episode % render_every == 0: render = True else: render = False # Game while not done and (not max_steps or steps < max_steps): next_states = env.get_next_states() # print('\n\n', next_states) best_state = agent.best_state(next_states.values()) best_action = None for action, state in next_states.items(): if state == best_state: best_action = action break reward, done = env.play(best_action[0], best_action[1], render=render, render_delay=render_delay) agent.add_to_memory(current_state, next_states[best_action], reward, done) current_state = next_states[best_action] steps += 1 score = env.get_game_score() scores.append(score) scores_sum += score if score > score_max: score_max = score if episode != 0 and episode % render_every == 0: # print('SCORES SUM:', scores_sum, 'AVG:', scores_sum / render_every, 'MAX:', score_max) scores_sum = 0 score_max = 0 # Train # if episode % train_every == 0: # agent.train(batch_size=batch_size, epochs=epochs) print('Done!') sleep(30) # Logs if log_every and episode and episode % log_every == 0: avg_score = mean(scores[-log_every:]) min_score = min(scores[-log_every:]) max_score = max(scores[-log_every:]) log.log(episode, avg_score=avg_score, min_score=min_score, max_score=max_score)
def dqn(conf: AgentConf): env = Tetris() agent = DQNAgent(env.get_state_size(), n_neurons=conf.n_neurons, activations=conf.activations, epsilon=conf.epsilon, epsilon_min=conf.epsilon_min, epsilon_stop_episode=conf.epsilon_stop_episode, mem_size=conf.mem_size, discount=conf.discount, replay_start_size=conf.replay_start_size) timestamp_str = datetime.now().strftime("%Y%m%d-%H%M%S") # conf.mem_size = mem_size # conf.epochs = epochs # conf.epsilon_stop_episode = epsilon_stop_episode # conf.discount = discount log_dir = f'logs/tetris-{timestamp_str}-ms{conf.mem_size}-e{conf.epochs}-ese{conf.epsilon_stop_episode}-d{conf.discount}' log = CustomTensorBoard(log_dir=log_dir) print(f"AGENT_CONF = {log_dir}") scores = [] episodes_wrapped: Iterable[int] = tqdm(range(conf.episodes)) for episode in episodes_wrapped: current_state = env.reset() done = False steps = 0 # update render flag render = True if conf.render_every and episode % conf.render_every == 0 else False # game while not done and (not conf.max_steps or steps < conf.max_steps): next_states = env.get_next_states() best_state = agent.best_state(next_states.values()) # find the action, that corresponds to the best state best_action = None for action, state in next_states.items(): if state == best_state: best_action = action break reward, done = env.hard_drop([best_action[0], 0], best_action[1], render=render) agent.add_to_memory(current_state, next_states[best_action], reward, done) current_state = next_states[best_action] steps += 1 # just return score scores.append(env.get_game_score()) # train if episode % conf.train_every == 0: # n = len(agent.memory) # print(f" agent.memory.len: {n}") agent.train(batch_size=conf.batch_size, epochs=conf.epochs) # logs if conf.log_every and episode and episode % conf.log_every == 0: avg_score = mean(scores[-conf.log_every:]) min_score = min(scores[-conf.log_every:]) max_score = max(scores[-conf.log_every:]) log.log(episode, avg_score=avg_score, min_score=min_score, max_score=max_score) # save_model save_model(agent.model, f'{log_dir}/model.hdf', overwrite=True, include_optimizer=True)
def dqn(): episodes = 10000 max_steps = None epsilon_stop_episode = 7000 mem_size = 20000 discount = 0.95 batch_size = 512 epochs = 1 render_every = 1000 log_every = 20 replay_start_size = 2000 train_every = 1 n_neurons = [32, 32] render_delay = None activations = ['relu', 'relu', 'linear'] env = Tetris() ''' with open(r"saved_agents/pickled_new_agent_10000_7000", "rb") as input_file: agent = pickle.load(input_file) agent.epsilon = 0 ''' agent = DQNAgent(env.get_state_size(), n_neurons=n_neurons, activations=activations, epsilon_stop_episode=epsilon_stop_episode, mem_size=mem_size, discount=discount, replay_start_size=replay_start_size) agent.epsilon = 0 ''' hateris = DQNAgent(env.get_state_size()+1, n_neurons=n_neurons, activations=activations, epsilon_stop_episode=epsilon_stop_episode, mem_size=mem_size, discount=discount, replay_start_size=replay_start_size) #env.hater = hateris ''' log_dir = f'logs/tetris-nn={str(n_neurons)}-mem={mem_size}-bs={batch_size}-e={epochs}-{datetime.now().strftime("%Y%m%d-%H%M%S")}' log = CustomTensorBoard(log_dir=log_dir) scores = [] tot_max_score = 0 for episode in tqdm(range(episodes)): current_state = env.reset() done = False steps = 0 if render_every and episode % render_every == 0: render = True else: render = False # Game while not done and (not max_steps or steps < max_steps): next_states = env.get_next_states(env.current_piece) best_state = agent.best_state(next_states.values()) best_action = None for action, state in next_states.items(): if state == best_state: best_action = action break reward, done = env.play(best_action[0], best_action[1], render=render, render_delay=render_delay) #agent.add_to_memory(current_state, next_states[best_action], reward, done) #hateris.add_to_memory(current_state+[env.current_piece], next_states[best_action]+[env.current_piece], -reward, done) current_state = next_states[best_action] steps += 1 scores.append(env.get_game_score()) # Train #if episode % train_every == 0: #agent.train(batch_size=batch_size, epochs=epochs) #hateris.train(batch_size=batch_size, epochs=epochs) # Logs #if log_every and episode and episode % log_every == 0 and episode>101: if log_every and episode and episode % log_every == 0: avg_score = mean(scores[-log_every:]) min_score = min(scores[-log_every:]) max_score = max(scores[-log_every:]) print( str(episode) + " " + str(avg_score) + " " + str(min_score) + " " + str(max_score)) '''if (tot_max_score < max_score): agent.save("dqnAgentMax10000.h5", episode) tot_max_score = max_score''' #agent.save("dqnAgent10000.h5", episode) # with open("saved_agents/pickled_new_agent_10000_7000", "wb") as input_file: #pickle.dump(agent,input_file) plt.plot(scores) plt.show()
def dqn(): trainingAgent = False trainingHater = False env = Tetris(trainingAgent or trainingHater) episodes = 2000 max_steps = None epsilon_stop_episode = 1500 mem_size = 20000 discount = 0.95 batch_size = 512 epochs = 1 render_every = 200 if (trainingAgent or trainingHater) else 10 log_every = 50 replay_start_size = 2000 train_every = 1 n_neurons = [32, 32] render_delay = None activations = ['relu', 'relu', 'linear'] agent_save_filepath = "keras_saved_maxbump.h5" # hater_save_filepath = "hater_changed_reward.h5" hater_save_filepath = "hater_best.h5" # Avg 135 || reward function = 1 + (lines_cleared ** 2)*self.BOARD_WIDTH - (.1)*self._bumpiness(self.board)[0]/self.BOARD_WIDTH # 200 death penalty # agent_save_filepath = "keras_saved_maxbump.h5" # Avg 25 || reward function = 1 + (lines_cleared ** 2)*self.BOARD_WIDTH # 2 death penalty # agent_save_filepath = "keras_saved.h5" agent = DQNAgent(env.get_state_size(), n_neurons=n_neurons, activations=activations, epsilon_stop_episode=epsilon_stop_episode, mem_size=mem_size, discount=discount, replay_start_size=replay_start_size, training=trainingAgent, agent_save_filepath=agent_save_filepath) hateris = DQNAgent(env.get_state_size(), n_neurons=n_neurons, activations=activations, epsilon_stop_episode=epsilon_stop_episode, mem_size=mem_size, discount=discount, replay_start_size=replay_start_size, training=trainingHater, agent_save_filepath=hater_save_filepath) env.hater = hateris log_dir = f'logs/tetris-nn={str(n_neurons)}-mem={mem_size}-bs={batch_size}-e={epochs}-{datetime.now().strftime("%Y%m%d-%H%M%S")}' log = CustomTensorBoard(log_dir=log_dir) scores = [] for episode in tqdm(range(episodes)): current_state = env.reset() done = False steps = 0 if render_every and episode % render_every == 0: render = True else: render = False # Game while not done and (not max_steps or steps < max_steps): next_states = env.get_next_states() best_state = agent.best_state(next_states.values()) best_action = None for action, state in next_states.items(): if state == best_state: best_action = action break reward, done = env.play(best_action[0], best_action[1], render=render, render_delay=render_delay) if len(current_state ) == env.get_state_size() - 1 and trainingAgent: toBeAdded = current_state + [env.next_piece] elif len(current_state ) == env.get_state_size() - 1 and trainingHater: toBeAdded = current_state + [env.current_piece] else: toBeAdded = current_state if trainingAgent: agent.add_to_memory(toBeAdded, next_states[best_action], reward, done) if trainingHater: hateris.add_to_memory(toBeAdded, next_states[best_action], -reward, done) current_state = next_states[best_action] steps += 1 scores.append(env.get_game_score()) # Train if episode % train_every == 0 and trainingAgent: agent.train(batch_size=batch_size, epochs=epochs) if episode % train_every == 0 and trainingHater: hateris.train(batch_size=batch_size, epochs=epochs) # Logs if log_every and episode and episode % log_every == 0: avg_score = mean(scores[-log_every:]) min_score = min(scores[-log_every:]) max_score = max(scores[-log_every:]) std_score = stdev(scores[-log_every:]) print( str(episode) + " Avg: " + str(avg_score) + " Min: " + str(min_score) + " Max: " + str(max_score) + " Std: " + str(round(std_score, 2))) if episode == epsilon_stop_episode and trainingAgent: agent.save_agent("agent_stopEps.h5") if episode == epsilon_stop_episode and trainingHater: hateris.save_agent("hater_stopEps.h5") if trainingAgent: agent.save_agent("real_agent.h5") if trainingHater: hateris.save_agent("real_hater.h5") plt.plot(scores) plt.show()
def dqn(): training = False env = Tetris(training) episodes = 2000 max_steps = None epsilon_stop_episode = 1500 mem_size = 20000 discount = 0.95 batch_size = 512 epochs = 1 render_every = 200 if training else 10 log_every = 50 replay_start_size = 2000 train_every = 1 n_neurons = [32, 32] render_delay = None activations = ['relu', 'relu', 'linear'] #agent_save_filepath = "keras_saved_maxbump.h5" # with open("saved_agent", "rb") as input_file: # agent = pickle.load(input_file) # agent.epsilon = 0 agent = DQNAgent(env.get_state_size(), n_neurons=n_neurons, activations=activations, epsilon_stop_episode=epsilon_stop_episode, mem_size=mem_size, discount=discount, replay_start_size=replay_start_size, training=training, agent_save_filepath=agent_save_filepath) log_dir = f'logs/tetris-nn={str(n_neurons)}-mem={mem_size}-bs={batch_size}-e={epochs}-{datetime.now().strftime("%Y%m%d-%H%M%S")}' log = CustomTensorBoard(log_dir=log_dir) scores = [] for episode in tqdm(range(episodes)): current_state = env.reset() done = False steps = 0 if render_every and episode % render_every == 0: render = True else: render = False # Game while not done and (not max_steps or steps < max_steps): next_states = env.get_next_states() best_state = agent.best_state(next_states.values()) best_action = None for action, state in next_states.items(): if state == best_state: best_action = action break reward, done = env.play(best_action[0], best_action[1], render=render, render_delay=render_delay) if len(current_state) == env.get_state_size() - 1: toBeAdded = current_state + [env.next_piece] else: toBeAdded = current_state if training: agent.add_to_memory(toBeAdded, next_states[best_action], reward, done) current_state = next_states[best_action] steps += 1 scores.append(env.get_game_score()) # Train if episode % train_every == 0 and training: agent.train(batch_size=batch_size, epochs=epochs) # Logs if log_every and episode and episode % log_every == 0: avg_score = mean(scores[-log_every:]) min_score = min(scores[-log_every:]) max_score = max(scores[-log_every:]) std_score = stdev(scores[-log_every:]) print( str(episode) + " Avg: " + str(avg_score) + " Min: " + str(min_score) + " Max: " + str(max_score) + " Std: " + str(round(std_score, 2))) if episode == epsilon_stop_episode: agent.save_agent("keras_saved_stopEps.h5") if training: agent.save_agent("keras_saved.h5") plt.plot(scores) plt.show()
def dqn(): env = Tetris() episodes = 4000 max_steps = None batch_size = 512 epochs = 1 render_every = 50 log_every = 50 replay_start_size = 2000 train_every = 1 render_delay = None algo = DQNAlgorithm(env.get_state_size()) log_dir = f'logs/tetris-nn={str([32, 32])}-mem={20000}-bs={batch_size}-e={epochs}-{datetime.now().strftime("%Y%m%d-%H%M%S")}' log = CustomTensorBoard(log_dir=log_dir) scores = [] times = [] for episode in tqdm(range(episodes)): current_state = env.reset() done = False steps = 0 if render_every and episode % render_every == 0: render = True else: render = False # Game while not done and (not max_steps or steps < max_steps): next_states = env.get_next_states() best_state = algo.best_state(next_states.values()) best_action = None for action, state in next_states.items(): if state == best_state: best_action = action break reward, done = env.play(best_action[0], best_action[1], render=render, render_delay=render_delay) algo.add_to_memory(current_state, next_states[best_action], reward, done) current_state = next_states[best_action] steps += 1 scores.append(env.get_game_score()) times.append(env.get_game_time()) # Train if episode % train_every == 0: algo.train(batch_size=batch_size, epochs=epochs) # Logs if log_every and episode and episode % log_every == 0: score = scores[-log_every:] time = times[-log_every:] avg_score = mean(score) min_score = min(score) max_score = max(score) avg_time = mean(time) min_time = min(time) max_time = max(time) log.log(episode, avg_score=avg_score, min_score=min_score, max_score=max_score, avg_time=avg_time, min_time=min_time, max_time=max_time)