class Board(GameObject): # Directions LEFT = 0 RIGHT = 1 UP = 2 DOWN = 3 TILE_MOVE_DURATION_MS = 100 def __init__(self, rows, cols, iterable=None): super().__init__(gs.BOARD_POS) self.rows = rows self.cols = cols self.m = [[0 for c in range(cols)] for r in range(rows)] self.tiles = [[None for c in range(cols)] for r in range(rows)] self.tiles_to_destroy = [] self.tiles_to_spawn = [] self.should_wait_for_move_finished = False self.tile_factory = TileFactory(self) self.scorer = Scorer((10, 5)) board_image = pygame.image.load("data/images/board.png") board_image_size = (gs.BOARD_WIDTH + gs.BOARD_BORDER, gs.BOARD_HEIGHT + gs.BOARD_BORDER) self.board_image = pygame.transform.scale(board_image, board_image_size) if iterable != None: for n, (i, j) in enumerate( itertools.product(range(self.rows), range(self.cols))): val = iterable[n] if val: self.m[i][j] = val self.tiles[i][j] = self.tile_factory.create(val, i, j) def val_spawner(self): return 2 * random.randint(1, 2) def pos_selector(self, select_from): return select_from[random.randint(0, len(select_from) - 1)] def __repr__(self): s = "Board:\n" for i in range(self.rows): for j in range(self.cols): s += f"{self.m[i][j]}, " s += "\n" return s def get_rect(self): return pygame.Rect(gs.BOARD_POS, gs.BOARD_SIZE) def get_state(self): return list([ self.m[i][j] for i, j in itertools.product(range(self.rows), range(self.cols)) ]) def check_state(self, state): cur_state = self.get_state() for i in range(len(cur_state)): if cur_state[i] != state[i]: return False return True def is_deadend(self): m = self.m # check empty cells for i, j in itertools.product(range(self.rows), range(self.cols)): if m[i][j] == 0: return False # check adjacent cells for i, j in itertools.product(range(self.rows), range(self.cols - 1)): if m[i][j] == m[i][j + 1]: return False for i, j in itertools.product(range(self.rows - 1), range(self.cols)): if m[i][j] == m[i + 1][j]: return False return True def is_complete(self): for i, j in itertools.product(range(self.rows), range(self.cols)): if self.m[i][j] == 2048: return True return False def spawn(self, value, row, col): self.m[row][col] = value self.tiles[row][col] = self.tile_factory.create(value, row, col) self.tiles[row][col].parent = self def spawn_random(self, val_spawner=None, pos_selector=None): if val_spawner is None: val_spawner = self.val_spawner if pos_selector is None: pos_selector = self.pos_selector empty_cells = [] for i, j in itertools.product(range(self.rows), range(self.cols)): if self.m[i][j] == 0: empty_cells.append((i, j)) assert empty_cells row, col = pos_selector(empty_cells) self.spawn(val_spawner(), row, col) def delayed_call(self, duration, call): time_left = duration def delayed_call_process(dtime): nonlocal time_left time_left -= dtime if time_left <= 0: call() return "DONE" return "INPROGRESS" GameObject.next_proc_id += 1 self.processes[GameObject.next_proc_id] = delayed_call_process def handle_post_move(self): for tile in self.tiles_to_destroy: self.scorer.add(tile.value) tile.destroy() self.tiles_to_destroy.clear() for val, row, col in self.tiles_to_spawn: self.spawn(val, row, col) self.tiles_to_spawn.clear() self.spawn_random() self.should_wait_for_move_finished = False def draw(self, surface): surface.blit(self.board_image, (self.gpos.x - 10, self.gpos.y - 10)) ''' Algorithm: 1. Take line 2. Collapse 3. Replace line with collapsed one ''' def move(self, direction): if self.should_wait_for_move_finished: return lines = self.get_lines(direction) new_lines, moves = self.collapse(lines) self.handle_moves(moves, direction) self.update_lines(new_lines, direction) def get_lines(self, direction): lines = [] if direction == Board.LEFT: for row in range(self.rows): lines.append([i for i in self.m[row][:]]) elif direction == Board.RIGHT: for row in range(self.rows): lines.append([i for i in self.m[row][::-1]]) elif direction == Board.UP: for col in range(self.cols): lines.append([self.m[i][col] for i in range(self.rows)]) elif direction == Board.DOWN: for col in range(self.cols): lines.append( [self.m[i][col] for i in reversed(range(self.rows))]) return lines def get_positions(self, direction): positions = [] if direction == Board.LEFT: for row in range(self.rows): positions.append([(row, i) for i in range(self.cols)]) elif direction == Board.RIGHT: for row in range(self.rows): positions.append([(row, i) for i in reversed(range(self.cols))]) elif direction == Board.UP: for col in range(self.cols): positions.append([(i, col) for i in range(self.rows)]) elif direction == Board.DOWN: for col in range(self.cols): positions.append([(i, col) for i in reversed(range(self.rows))]) return positions def handle_moves(self, moves, direction): if direction == Board.LEFT: for row in range(self.rows): for move in moves[row]: self.handle_move((row, move[0]), (row, move[1])) elif direction == Board.RIGHT: for row in range(self.rows): for move in moves[row]: self.handle_move((row, self.cols - 1 - move[0]), (row, self.cols - 1 - move[1])) elif direction == Board.UP: for col in range(self.cols): for move in moves[col]: self.handle_move((move[0], col), (move[1], col)) elif direction == Board.DOWN: for col in range(self.cols): for move in moves[col]: self.handle_move((self.rows - 1 - move[0], col), (self.rows - 1 - move[1], col)) if self.should_wait_for_move_finished: self.delayed_call(Board.TILE_MOVE_DURATION_MS, self.handle_post_move) def handle_move(self, cell_from, cell_to): self.should_wait_for_move_finished = True tiles = self.tiles tile1 = tiles[cell_from[0]][cell_from[1]] tile2 = tiles[cell_to[0]][cell_to[1]] tile1.move_to(cell_to[0], cell_to[1], Board.TILE_MOVE_DURATION_MS) tiles[cell_from[0]][cell_from[1]] = None if tile2 is not None: tiles[cell_to[0]][cell_to[1]] = None self.tiles_to_destroy.append(tile1) self.tiles_to_destroy.append(tile2) self.tiles_to_spawn.append( (tile1.value * 2, cell_to[0], cell_to[1])) else: tiles[cell_to[0]][cell_to[1]] = tile1 def update_lines(self, new_lines, direction): if direction == Board.LEFT: for row in range(self.rows): for col in range(self.cols): self.m[row][col] = new_lines[row][col] elif direction == Board.RIGHT: for row in range(self.rows): for col in range(self.cols): self.m[row][col] = new_lines[row][-1 - col] elif direction == Board.UP: for col in range(self.cols): for row in range(self.rows): self.m[row][col] = new_lines[col][row] elif direction == Board.DOWN: for col in range(self.cols): for row in range(self.rows): self.m[row][col] = new_lines[col][-1 - row] def collapse(self, lines): new_lines = [] moves = [] for line in lines: new_line, line_moves = self.collapse_one_line(line) new_lines.append(new_line) moves.append(line_moves) return new_lines, moves def collapse_one_line(self, line): # [0, 2, 2, 4] -> [2, 0, 2, 4] -> [4, 0, 0, 4] -> [4, 4, 0, 0] # [2, 4, 4, 2] -> [2, 4, 4, 2] -> [2, 8, 0, 2] -> [2, 8, 2, 0] # [2, 4, 2, 4] -> [2, 4, 2, 4] (not a move) moves = [] target_cell = 0 for i in range(1, len(line)): if line[i]: if line[target_cell] == 0: # move to empty cell line[target_cell], line[i] = line[i], line[target_cell] moves.append((i, target_cell)) elif line[target_cell] == line[i]: # move and collapse line[target_cell] *= 2 line[i] = 0 moves.append((i, target_cell)) target_cell += 1 else: # move to empty cell target_cell += 1 if target_cell != i: line[target_cell], line[i] = line[i], line[target_cell] moves.append((i, target_cell)) return line, moves def test_collapse_algo(self): lines = [[0, 2, 2, 4], [2, 4, 4, 2], [2, 4, 2, 4], [2, 2, 2, 2], [2, 2, 4, 4], [0, 0, 4, 2], [2, 0, 0, 2], [0, 2, 0, 2]] check = [[4, 4, 0, 0], [2, 8, 2, 0], [2, 4, 2, 4], [4, 4, 0, 0], [4, 8, 0, 0], [4, 2, 0, 0], [4, 0, 0, 0], [4, 0, 0, 0]] for line, check in zip(lines, check): new_line, _ = self.collapse_one_line(line) for i, v in enumerate(new_line): if v != check[i]: print( f"Board/test_collapse: {new_line} != {check} at pos {i}" ) return print(".", end="")
def train_icfat(self): # TODO ###### ## ### ## ############### # TODO ## # # # # # # ############## # TODO ## # # # # # # ############## # TODO ## # # # # # # ############## # TODO ## ## ### ## ############### """Train StarGAN within a single dataset.""" # # Set data loader. # if self.dataset == 'CelebA': # data_loader = self.celeba_loader # elif self.dataset == 'RaFD': # data_loader = self.rafd_loader # # # Fetch fixed inputs for debugging. # data_iter = iter(data_loader) # x_fixed, c_org = next(data_iter) # x_fixed = x_fixed.to(self.device) # c_fixed_list = self.create_labels(c_org, self.c_dim, self.dataset, # self.selected_attrs) # Learning rate cache for decaying. g_lr = self.g_lr d_lr = self.d_lr # Start training from scratch or resume training. # FIXME this will need to change start_epoch = 0 start_iters = 0 if self.resume_training is not None: # recover status with open(self.resume_training, 'r') as f: status = json.load(f)['status'] start_epoch = status['epoch'] start_iters = status['iteration'] g_lr = status['g_lr'] d_lr = status['d_lr'] # reload models self.restore_model(start_epoch, start_iters) # start_iters = 0 # if self.resume_iters: # start_iters = self.resume_iters # self.restore_model(self.resume_iters) fixed_x = [] total = 50 for i, (batchA, batchP, batchN) in enumerate(self.celeba_loader): for imageA, imageP, imageN in zip(batchA, batchP, batchN): print('Reading debugging images', i, total) fixed_x.append((imageA.unsqueeze(0), imageN.unsqueeze(0))) total -= 1 if total == 0: break if total == 0: break black_size = [1] black_size.extend(imageA.size()) scorer = Scorer(self.batch_size, variables=('D_cls/distance_same', 'D_cls/distance_different', 'G/distance_same', 'G/distance_different')) criterion = torch.nn.MarginRankingLoss(margin=self.margin) self.data_loader = self.celeba_loader iters_per_epoch = len(self.data_loader) # Start training. print('Start training...') start_time = time.time() for e in range(start_epoch, self.num_epochs): for i, (batchA, batchP, batchN) in enumerate(self.data_loader): # current_iteration is used to keep the global iteration in # the case of resuming training current_iteration = i + start_iters + 1 if e == start_epoch else i if e == start_epoch and i > (len(self.data_loader) - start_iters): break imageA = batchA.to(self.device) imageP = batchP.to(self.device) imageN = batchN.to(self.device) # ================== Train D_cls on CelebA ================== # # Compute loss with real images _, idA = self.D_cls(imageA) _, idP = self.D_cls(imageP) _, idN = self.D_cls(imageN) d_loss_cls = criterion( F.pairwise_distance(idA, idN), F.pairwise_distance(idA, idP), torch.ones((idA.size(0), 1), device=self.device)) # Compute classification accuracy of D_cls d = { 'D_cls/distance_same': torch.mean(F.pairwise_distance(idA, idP)).item(), 'D_cls/distance_different': torch.mean(F.pairwise_distance(idA, idN)).item() } log = [] log.extend( [d['D_cls/distance_same'], d['D_cls/distance_different']]) scorer.add(d) if (i + 1) % self.log_step == 0: print('Classification distances (same/different): ', end='') print(log) # Logging loss = OrderedDict() loss['D_cls/loss_cls'] = d_loss_cls.item() # ================== Train D_src on CelebA ================== # # Compute loss with real images out_srcA, _ = self.D_src(imageA) d_loss_real = -torch.mean(out_srcA) # Compute loss with fake images fake_x = self.G(imageA, idN) out_src_fake, _ = self.D_src(fake_x) d_loss_fake = torch.mean(out_src_fake) # ================== Optimization =========================== # # Backward + Optimize d_loss = d_loss_real + d_loss_fake # Backward + Optimize D's e_loss = d_loss + self.lambda_cls * d_loss_cls self.reset_grad() e_loss.backward() self.dsrc_optimizer.step() self.dcls_optimizer.step() # Compute gradient penalty alpha = torch.rand(imageA.size(0), 1, 1, 1).to(self.device) x_hat = (alpha * imageA.data + (1 - alpha) * fake_x.data).requires_grad_(True) out_src, _ = self.D_src(x_hat) d_loss_gp = self.gradient_penalty(out_src, x_hat) # FIXME don't we have to optimize D_cls as well with GP?! # FIXME if not, it could explain why at the end of training # the GP error increases so much! _, out_cls = self.D_cls(x_hat) d_loss_gp += self.gradient_penalty(out_cls, x_hat) # Backward + Optimize d_loss = self.lambda_gp * d_loss_gp self.reset_grad() d_loss.backward() self.dsrc_optimizer.step() # Logging loss = OrderedDict() loss['D_src/loss_real'] = d_loss_real.item() loss['D_src/loss_fake'] = d_loss_fake.item() loss['D/loss_gp'] = d_loss_gp.item() # ================== Train G ================== # if (i + 1) % self.n_critic == 0: # FIXME calling the network here is unnecessary _, idA = self.D_cls(imageA) _, idP = self.D_cls(imageP) _, idN = self.D_cls(imageN) # Original-to-target and target-to-original domain fake_c = idN real_c = idP fake_x = self.G(imageA, fake_c) rec_x = self.G(fake_x, real_c) # Compute losses out_src, _ = self.D_src(fake_x) _, idG = self.D_cls(fake_x) g_loss_fake = -torch.mean(out_src) g_loss_rec = torch.mean(torch.abs(imageA - rec_x)) # fake_label_AG = self.to_var(torch.zeros((len(idA), 1))) fake_label_BG = torch.ones((len(idA), 1)) g_loss_cls = criterion( F.pairwise_distance(idG, idA), F.pairwise_distance(idG, idN), torch.ones((idA.size(0), 1), device=self.device)) # Backward + Optimize g_loss = g_loss_fake \ + self.lambda_rec * g_loss_rec \ + self.lambda_cls * g_loss_cls self.reset_grad() g_loss.backward() self.g_optimizer.step() # Logging loss['G/loss_fake'] = g_loss_fake.item() loss['G/loss_rec'] = g_loss_rec.item() loss['G/loss_cls'] = g_loss_cls.item() # Compute classification accuracy of the discriminator # FIXME I think there's a problem here, shouldn't it be # positive_distance the one between idN and idG? positive_distance = torch.sum(F.pairwise_distance( idP, idG)).item() negative_distance = torch.sum(F.pairwise_distance( idA, idG)).item() d = { 'G/distance_same': positive_distance / len(idG), 'G/distance_different': negative_distance / len(idG) } scorer.add(d) # Print log info if (i + 1) % self.log_step == 0: elapsed = time.time() - start_time elapsed = str(datetime.timedelta(seconds=elapsed)) log = "Elapsed [{}], Epoch [{}/{}], Iter [{}/{}]".format( elapsed, e + 1, self.num_epochs, i + 1, iters_per_epoch) for tag, value in loss.items(): log += ", {}: {:.4f}".format(tag, value) print(log) scores = scorer.get_scores() for key, value in scores.items(): loss[key] = value if self.use_tensorboard: for tag, value in loss.items(): self.logger.scalar_summary( tag, value, e * iters_per_epoch + current_iteration + 1) # save model if not self.save_epochs and \ (i + 1) % self.model_save_step == 0: self.generate_debugging_images(fixed_x, e, current_iteration) self.generate_validation_images(e, current_iteration) self.save_training(e, current_iteration, self.g_lr, self.d_lr) print("Saved models..!") # ================== Debugging images ================== # if (e + 1) % self.sample_step == 0: self.generate_debugging_images(fixed_x, e, current_iteration) if (e + 1) % self.sample_step == 0: self.generate_validation_images(e, current_iteration) # ================== Checkpoints and lr decay ================== # if (e + 1) > (self.num_epochs - self.num_epochs_decay): if (e - (self.num_epochs - self.num_epochs_decay)) % \ self.decay_step == 0: g_lr -= (self.g_lr / float(self.decay_rate)) d_lr -= (self.d_lr / float(self.decay_rate)) print("Decay learning rate to g_lr: {}, d_lr: {" "}".format(g_lr, d_lr)) assert g_lr > 0.0 assert d_lr > 0.0 # Save model checkpoints if not self.save_epochs and \ (i + 1) % self.model_save_step == 0: self.save_training(e, 0, self.g_lr, self.d_lr) print("Saved models..!") # Save model checkpoints when training is done torch.save( self.G.state_dict(), os.path.join(self.model_save_dir, "{}_{}_G.pth".format(e + 1, i + 1))) torch.save( self.D_src.state_dict(), os.path.join(self.model_save_dir, "{}_{}_D_src.pth".format(e + 1, i + 1))) torch.save( self.D_cls.state_dict(), os.path.join(self.model_save_dir, "{}_{}_D_cls.pth".format(e + 1, i + 1))) print("Saved models..!") print("Train finished")