gameDisplay = pygame.display.set_mode(DISPLAY_SHAPE) pygame.display.set_caption('Bouncing Balls') pygame.key.set_repeat(1, 1) env = GameEnvironment(DISPLAY_SHAPE, 1.0 / float(FPS)) def action_vector(a): res = np.zeros(9) res[int(a)] = 1.0 return res # Define Experience Replay if SAVE_EXPERIENCE: er = ExperienceReplay.load(EXP_REPLAY_FILE) if er == None: er = ExperienceReplay(BUFFER_SIZE) def gameover(hero_score): gameDisplay.fill(WHITE) font = pygame.font.SysFont(None, 42) text = font.render("GAME OVER", True, BLACK) gameDisplay.blit(text, (DISPLAY_SHAPE[0] / 3, DISPLAY_SHAPE[1] / 3)) pygame.display.update() pygame.time.delay(3000)
import sys import os from ddq_model import Qnet from experience_replay import ExperienceReplay from utils import Config argv = sys.argv[1:] config = Config(argv) env = gym.make('GazeboTurtlebotMazeColor-v0') replay = ExperienceReplay(config.args.output_dir, config.args.replay_buffer_size) qnet = Qnet(env.num_state, env.num_action) if (config.args.continue_from != None): qnet.load(config.args.continue_from) replay.load(config.args.continue_from) elif (config.args.from_pretrain != None): qnet.load(config.args.from_pretrain) epsilon = config.args.start_epsilon epsilon_decay = (config.args.start_epsilon - config.args.end_epsilon) / config.args.annealing_steps while True: state = env.reset() replay_ep = ExperienceReplay(config.args.output_dir, config.args.replay_buffer_size) total_reward = 0 num_random_step = 0 start_step = config.total_step