def extract_episodes(self): collected = collect_episodes( ReinforcementAI(self.judge), episode_cnt, episode_length) episodes = [] for c in collected: obs = torch.cat( tuple(Translator.encode_board(b).unsqueeze(0) for b in c['boards']), 0) idx = torch.tensor( tuple(Translator.encode_move_idx(m) for m in c['moves']), dtype=torch.long) prob = self.judge.forward(obs)[0][ torch.arange(idx.size()[0], dtype=torch.long), idx] ext = c['extrinsic'] episodes += [ {'obs': obs, 'idx': idx, 'prob': prob, 'ext': ext, 'over': c['boards'][-1].is_game_over()}] return episodes