def __init__(self, environment, agent, config, output_dir): self.environment = environment self.s = self.environment.initial_state() self.total_pos_r = 0.0 self.total_neg_r = 0.0 self.agent = agent self.fps = config["fps"] self.show = config["show"] self.max_time = config["max_time"] self.output_dir = output_dir self.oserializer = ObservationSerializer() self.debug_mode = config["debug_mode"] if config["log_observations"]: self.obs_log_file = open("{}/observations.json".format(output_dir), "w") else: self.obs_log_file = None if config["replay_observations"]: for i in range(config["replay_count"]): for fn in config["replay_observations"]: self.replay_observations(fn) if self.show: import pygame from pygame.locals import * pygame.init() self.font = pygame.font.SysFont(None, 28) self.screen = pygame.display.set_mode(Globals.SCREEN_DIMS, 0, 32) self.clock = pygame.time.Clock() pygame.display.set_caption("Tetris") self.draw()
class Engine(object): def __init__(self, environment, agent, config, output_dir): self.environment = environment self.s = self.environment.initial_state() self.total_pos_r = 0.0 self.total_neg_r = 0.0 self.agent = agent self.fps = config["fps"] self.show = config["show"] self.max_time = config["max_time"] self.output_dir = output_dir self.oserializer = ObservationSerializer() self.debug_mode = config["debug_mode"] if config["log_observations"]: self.obs_log_file = open("{}/observations.json".format(output_dir), "w") else: self.obs_log_file = None if config["replay_observations"]: for i in range(config["replay_count"]): for fn in config["replay_observations"]: self.replay_observations(fn) if self.show: import pygame from pygame.locals import * pygame.init() self.font = pygame.font.SysFont(None, 28) self.screen = pygame.display.set_mode(Globals.SCREEN_DIMS, 0, 32) self.clock = pygame.time.Clock() pygame.display.set_caption("Tetris") self.draw() def replay_observations(self, fn): with open(fn) as fin: print "Replaying from file: {}...".format(fn) for l in fin: s, a, r, sprime, pfbm = self.oserializer.deserialize_json( json.loads(l)) self.agent.observe_sars_tuple(s, a, r, sprime, pfbm=pfbm) def detect_quit(self): if self.show: if pygame.event.peek(QUIT): pygame.quit() sys.exit() def loop(self): def bitmap_mean_active_column(b): if b.sum() == 0: return 0 else: cols = b.nonzero()[1] return cols.mean() self.draw() t = 0 start = time.clock() while True: t += 1 if t % 1000 == 0: self.agent.save_model("{}/model.{:06d}iters".format( self.output_dir, t)) if t > self.max_time: break if self.show: self.clock.tick(self.fps) self.detect_quit() a, debug_info = self.agent.act(self.s, debug_mode=self.debug_mode) if self.debug_mode: for pfbm in sorted( debug_info["pfbms"], cmp=lambda x, y: cmp(bitmap_mean_active_column(x), bitmap_mean_active_column(y))): self.clock.tick(6) self.draw_bitmap(pfbm) pygame.display.update() self.clock.tick(6) sprime, r, pfbm, rcounts = self.environment.next_state_and_reward( self.s, a) if "rows_cleared" in rcounts: logging.info("ROWS_CLEARED: {}".format( rcounts["rows_cleared"])) if "game_over" in rcounts: logging.info("GAME_OVER") if r > 0: self.total_pos_r += r else: self.total_neg_r += r if self.obs_log_file: self.obs_log_file.write("{}\n".format( json.dumps( self.oserializer.serialize_json(self.s, a, r, sprime, pfbm=pfbm)))) self.agent.observe_sars_tuple(self.s, a, r, sprime, pfbm=pfbm) self.s = sprime self.draw() duration = time.clock() - start print "Runtime={:.2f}s T={} Total Reward: {:.2f} {:.2f}".format( duration, t, self.total_pos_r, self.total_neg_r) def draw(self): if not self.show: return self.screen.fill(Colors.BLACK) w = 20 b = self.s.arena.bitmap ls = self.s.lshape text = self.font.render( "Total Reward: {:.2f} {:.2f}".format(self.total_pos_r, self.total_neg_r), True, Colors.WHITE, Colors.BLUE) textRect = text.get_rect() textRect.centerx = (w * b.shape[1]) + 250 textRect.centery = self.screen.get_rect().centery self.screen.blit(text, textRect) self.draw_bitmap(b) self.draw_lshape(ls) pygame.display.update() def draw_bitmap(self, b): w = 20 for r in range(b.shape[0]): for c in range(b.shape[1]): rect = (w + (w * c), w + (w * r), w, w) if b[r, c]: pygame.draw.rect(self.screen, Colors.GREEN, rect) else: pygame.draw.rect(self.screen, Colors.GRAY, rect) def draw_lshape(self, ls): w = 20 for coord in ls.coords(): r, c = (coord[0], coord[1]) pygame.draw.rect(self.screen, Colors.BLUE, (w + (w * c), w + (w * r), w, w))