def main(): env = get_player(args.rom, image_size=IMAGE_SIZE, train=True, frame_skip=FRAME_SKIP) test_env = get_player(args.rom, image_size=IMAGE_SIZE, frame_skip=FRAME_SKIP, context_len=CONTEXT_LEN) rpm = ReplayMemory(MEMORY_SIZE, IMAGE_SIZE, CONTEXT_LEN) act_dim = env.action_space.n model = AtariModel(act_dim, args.algo) if args.algo == 'DDQN': algorithm = parl.algorithms.DDQN(model, act_dim=act_dim, gamma=GAMMA) elif args.algo in ['DQN', 'Dueling']: algorithm = parl.algorithms.DQN(model, act_dim=act_dim, gamma=GAMMA) agent = AtariAgent(algorithm, act_dim=act_dim, start_lr=LEARNING_RATE, total_step=args.train_total_steps, update_freq=UPDATE_FREQ) with tqdm(total=MEMORY_WARMUP_SIZE, desc='[Replay Memory Warm Up]') as pbar: while rpm.size() < MEMORY_WARMUP_SIZE: total_reward, steps, _ = run_train_episode(env, agent, rpm) pbar.update(steps) # train test_flag = 0 pbar = tqdm(total=args.train_total_steps) total_steps = 0 max_reward = None while total_steps < args.train_total_steps: # start epoch total_reward, steps, loss = run_train_episode(env, agent, rpm) total_steps += steps pbar.set_description('[train]exploration:{}'.format(agent.exploration)) summary.add_scalar('dqn/score', total_reward, total_steps) summary.add_scalar('dqn/loss', loss, total_steps) # mean of total loss summary.add_scalar('dqn/exploration', agent.exploration, total_steps) pbar.update(steps) if total_steps // args.test_every_steps >= test_flag: while total_steps // args.test_every_steps >= test_flag: test_flag += 1 pbar.write("testing") eval_rewards = [] for _ in tqdm(range(3), desc='eval agent'): eval_reward = run_evaluate_episode(test_env, agent) eval_rewards.append(eval_reward) logger.info( "eval_agent done, (steps, eval_reward): ({}, {})".format( total_steps, np.mean(eval_rewards))) eval_test = np.mean(eval_rewards) summary.add_scalar('dqn/eval', eval_test, total_steps) pbar.close()
def main(): env = get_player(args.rom, image_size=IMAGE_SIZE, train=True, frame_skip=FRAME_SKIP) test_env = get_player(args.rom, image_size=IMAGE_SIZE, frame_skip=FRAME_SKIP, context_len=CONTEXT_LEN) rpm = ReplayMemory(MEMORY_SIZE, IMAGE_SIZE, CONTEXT_LEN) act_dim = env.action_space.n device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = AtariModel(CONTEXT_LEN, act_dim, args.algo) if args.algo in ['DQN', 'Dueling']: algorithm = DQN(model, gamma=GAMMA, lr=args.lr) elif args.algo == 'Double': algorithm = DDQN(model, gamma=GAMMA, lr=args.lr) agent = AtariAgent(algorithm, act_dim=act_dim) with tqdm(total=MEMORY_WARMUP_SIZE, desc='[Replay Memory Warm Up]') as pbar: while rpm.size() < MEMORY_WARMUP_SIZE: total_reward, steps, _ = run_train_episode(env, agent, rpm) pbar.update(steps) # Get fixed obs to check value function. fixed_obs = get_fixed_obs(rpm, args.batch_size) fixed_obs = torch.tensor(fixed_obs, dtype=torch.float, device=device) # train test_flag = 0 total_steps = 0 with tqdm(total=args.train_total_steps, desc='[Training Model]') as pbar: while total_steps < args.train_total_steps: total_reward, steps, loss = run_train_episode(env, agent, rpm) total_steps += steps pbar.update(steps) if total_steps // args.test_every_steps >= test_flag: while total_steps // args.test_every_steps >= test_flag: test_flag += 1 eval_rewards = [] for _ in range(3): eval_rewards.append(run_evaluate_episode(test_env, agent)) summary.add_scalar('dqn/eval', np.mean(eval_rewards), total_steps) summary.add_scalar('dqn/score', total_reward, total_steps) summary.add_scalar('dqn/loss', loss, total_steps) summary.add_scalar('dqn/exploration', agent.exploration, total_steps) summary.add_scalar('dqn/Q value', evaluate_fixed_Q(agent, fixed_obs), total_steps) summary.add_scalar('dqn/grad_norm', get_grad_norm(agent.alg.model), total_steps)
def get_match_history(self, request): """ Retrieves a game's history. This function allows players to see the history of their matches. Players are only allowed to view the history of their own matches; access to other player's history is not granted. Returns: MatchForm -- the match details including the history Raises: ForbiddenException -- raised when an attempt is made to view another player's history """ # Get the requested match and player from the datastore match = get_by_urlsafe(request.urlsafe_key, Match) player = get_player(request.user_name) # If match player and request player do not match, deny history access if match.player != player.key: raise endpoints.ForbiddenException( 'You may not view another player\'s history') # Return history as part of a MatchForm representation return match.to_form(history=True)
def main(): env = get_player( args.rom, image_size=IMAGE_SIZE, train=True, frame_skip=FRAME_SKIP) file_path = "memory.npz" rpm = ReplayMemory( MEMORY_SIZE, IMAGE_SIZE, CONTEXT_LEN, load_file=True, # load replay memory data from file file_path=file_path) act_dim = env.action_space.n model = AtariModel(act_dim) algorithm = DQN( model, act_dim=act_dim, gamma=GAMMA, lr=LEARNING_RATE * gpu_num) agent = AtariAgent( algorithm, act_dim=act_dim, total_step=args.train_total_steps) if os.path.isfile('./model.ckpt'): logger.info("load model from file") agent.restore('./model.ckpt') if args.train: logger.info("train with memory data") run_train_step(agent, rpm) logger.info("finish training. Save the model.") agent.save('./model.ckpt') else: logger.info("collect experience") collect_exp(env, rpm, agent) rpm.save_memory() logger.info("finish collecting, save successfully")
async def leave( client, event, ): """Leaves me from the channel.""" player = get_player(client, event) await player.disconnect() return 'Left voice channel.'
async def move_player( client, event, channel: ('channel_group_connectable', 'Select a channel.'), ): """Change channel of the player.""" player = get_player(client, event) await player.move_to(channel) return f'Player moved to {channel:m}.'
async def volume_( client, event, volume: P('number', 'Volume percentage', min_value=0, max_value=200), ): """Sets the player's volume.""" player = get_player(client, event) await player.set_volume(volume / 100.0) return f'Volume set to: {volume}%.'
async def pause( client, event, ): """Pauses the currently playing track.""" player = get_player(client, event) if not player.is_paused(): await player.pause() return 'Playing paused.'
async def resume( client, event, ): """Resumes the currently playing track.""" player = get_player(client, event) if player.is_paused(): await player.resume() return 'Playing resumed.'
def get_player_profile(self, request): """ Retrieves a player's profile. This function retrieves a player's profile. Returns: ProfileForm -- the player's public profile as a ProfileForm """ # Retrieve player from datastore player = get_player(request.user_name) # Return ProfileForm representation of player return player.to_profileform()
def get_user_score(self, request): """ Get a player's score. This function serves no real purpose and was just included to fullfil the Udacity course project requirements. This information is also included in a player's profile (see get_player_profile). Returns: ScoreForm -- a player's score in ScoreForm representation """ player = get_player(request.user_name) return ScoreForm(user_name=player.user_name, score=player.score)
def get_user_ranking(self, request): """ Get a player's ranking. This function serves no real purpose and was just included to fullfil the Udacity course project requirements. This information is also included in a player's profile (see get_player_profile). Returns: ScoreForm -- a player's ranking in ScoreForm representation """ # Get player from datastore and return their ranking player = get_player(request.user_name) return ScoreForm(user_name=player.user_name, batting_avg=player.batting_avg)
async def move_track( client, event, old_position: ('int', 'The position of the track.'), new_position: ('int', 'The new position for the track.'), ): """Change position of a track.""" player = get_player(client, event) track = player.move_track(old_position, new_position) if track is None: return 'Nothing was moved.' return f'Track moved: {create_track_repr(track, None)}'
def create_match(self, request): """ Creates a new match. This match allows players to create a new match. A match is an 'instance' of a game. This allows multiple users to play the same game at the same time or different times. Returns: StringMessage -- a confirmation that the match has been created Raises: BadRequestException -- raised when game has no questions or when game is not in play mode or when play mode enabling is required, but was not requested """ # Get game and player from datastore game = get_by_urlsafe(request.urlsafe_game_key, Game) player = get_player(request.user_name) # Check if game has questions. A game with no questions is not playable if game.questions == []: raise endpoints.BadRequestException( 'The game has no questions yet and cannot be played.') # Check if game has been changed from editing to play mode if not game.play_mode: # Only game creators can put a game into play mode if player.key != game.creator: raise endpoints.BadRequestException( 'Only the game creator can put it into play mode.') else: # Check if creator has explicitly requested to start the game if not request.start_game: raise endpoints.BadRequestException( 'Play mode enabling was not requested.') else: # Put game into play mode and save the change game.play_mode = True game.put() # Create the match match = Match.create_match(player=player.key, game=game) # Return a confirmation of the match creation return match.to_form()
def create_game(self, request): """ Creates a new game. This function creates a new game. (NB: Games are just shells for questions. See the supplied readme for more information.) Returns: GameForm -- a GameForm representation of the created game """ # Get player by username player = get_player(request.user_name) # Create the game game = Game.create_game(player=player.key, title=request.title) # Return confirmation of game creation return game.to_form()
async def next_( client, event, ): """Plays the next song.""" player = get_player(client, event) track = player.get_current() if track is None: abort('Nothing to skip.') if track.user is not event.user: abort( 'Sorry, the track was added by {event.user:m}, so only they can skip.' ) await player.skip() return f'Track skipped: {create_track_repr(track, None)}'
def set_playmode(self, request): """ Set a game's play mode This function allows to put a game into play mode. Games in play mode allow the creation of matches which players can then play. (NB: Only game creators may put a game into play mode) Returns: StringMessage -- a confirmation that the play mode is enabled Raises: ForbiddenException -- raised if player requesting play mode is not the game's creator or the game does not exist BadRequestException -- raised when the user has not explicitly requested the game be put into play mode """ # Get the game and player from the datastore game = get_by_urlsafe(request.urlsafe_key, Game) player = get_player(request.user_name) if not game: raise endpoints.ForbiddenException( 'The game does not exist.') if game.questions == []: raise endpoints.BadRequestException( 'The game has no questions and cannot be played.') # Only game creators can put a game into play mode if player.key != game.creator: raise endpoints.ForbiddenException( 'Only the game creator can put it into play mode.') else: # Check if creator has explicitly requested to start the game if not request.start_game: raise endpoints.BadRequestException( 'Play mode enabling was not requested.') else: # Put game into play mode and save the change game.play_mode = True game.put() # Return confirmation that play mode is now enabled return StringMessage(message="Game is now in play mode.")
async def repeat( client, event, option: (['track', 'queue', 'disable'], 'Repeat option'), ): """Set repeat for track or queue or disable both.""" player = get_player(client, event) if option == 'track': player.set_repeat_current(True) return 'Repeating track.' if option == 'queue': player.set_repeat_queue(True) return 'Repeating queue.' player.set_repeat_current(False) player.set_repeat_queue(False) return 'Repeating disabled.'
async def seek( client, event, seconds: (float, 'Where to seek?'), ): """Seek the track.""" player = get_player(client, event) track = player.get_current() if track is None: abort('No songs are being played right now!') duration = track.duration if (seconds < 0.0) or (seconds > duration): abort( f'Cannot seek to {seconds:.2f} seconds. Please define a value between `0` and {duration:.0f}.' ) await player.seek(seconds) return 'Seeked the current track!'
def get_user_matches(self, request): """ Lists a player's matches. This functions delivers a list of matches by a specific player. It also allows pagination. (NB: Change QUERY_LIMIT to increase/decrease results per page) Returns: MatchForms -- a list of matches in MatchForm representation """ # Get limit and offset based on requested page limit, offset = get_limit_offset(request.page) # Get the specified player's matches player = get_player(request.user_name) matches = Match.query(ancestor=player.key).fetch(offset=offset, limit=limit) matches = [match.to_form() for match in matches] # Return matches return MatchForms(matches=matches)
def minimax(state, max_player, depth): if is_end_state(state) or depth == 0: # base case score = get_score(state, not max_player) # get_score with previous player return score else: states = [] scores = [] # populate moves and scores player = get_player(max_player) for child_state in get_possible_states(state, player): score = minimax(child_state, not max_player, depth - 1) scores.append(score) states.append(child_state) # based on the player, choose the best move if max_player: max_score_index = scores.index(max(scores)) choice['choice'] = states[max_score_index] return scores[max_score_index] else: min_score_index = scores.index(min(scores)) choice['choice'] = states[min_score_index] return scores[min_score_index]
def cancel_match(self, request): """ Deletes a match. This function allows players to cancel their own (and only their own) matches. Cancelled matches are deleted and cannot be recovered. Only unfinished matches can be deleted. Returns: StringMessage -- a confirmation of the match deletion Raises: ForbiddenException -- raised if a player tries to delete another player's match or if the match does not exist """ # Get the specified match match = get_by_urlsafe(request.urlsafe_key, Match) # Check whether match exists, if not throw error if not match: raise endpoints.ForbiddenException('Match does not exist.') # Check deletion request comes from the creator of the match player = get_player(request.user_name) if match and match.player != player.key: raise endpoints.ForbiddenException( 'You cannot delete other players\'s matches.') # Check whether match has already finished if match.match_over: raise endpoints.ForbiddenException( 'You cannot delete a finished match.') # Delete the match match.key.delete() # Return confirmation of match deletion return StringMessage(message='Your match was successfully deleted.')
def main(): # Prepare environments env = get_player( args.rom, image_size=IMAGE_SIZE, train=True, frame_skip=FRAME_SKIP) test_env = get_player( args.rom, image_size=IMAGE_SIZE, frame_skip=FRAME_SKIP, context_len=CONTEXT_LEN) # Init Prioritized Replay Memory per = ProportionalPER(alpha=0.6, seg_num=args.batch_size, size=MEMORY_SIZE) # Prepare PARL agent act_dim = env.action_space.n model = AtariModel(act_dim) if args.alg == 'ddqn': algorithm = PrioritizedDoubleDQN( model, act_dim=act_dim, gamma=GAMMA, lr=LEARNING_RATE) elif args.alg == 'dqn': algorithm = PrioritizedDQN( model, act_dim=act_dim, gamma=GAMMA, lr=LEARNING_RATE) agent = AtariAgent(algorithm, act_dim=act_dim, update_freq=UPDATE_FREQ) # Replay memory warmup total_step = 0 with tqdm(total=MEMORY_SIZE, desc='[Replay Memory Warm Up]') as pbar: mem = [] while total_step < MEMORY_WARMUP_SIZE: total_reward, steps, _ = run_episode( env, agent, per, mem=mem, warmup=True) total_step += steps pbar.update(steps) per.elements.from_list(mem[:int(MEMORY_WARMUP_SIZE)]) env_name = args.rom.split('/')[-1].split('.')[0] test_flag = 0 total_steps = 0 pbar = tqdm(total=args.train_total_steps) while total_steps < args.train_total_steps: # start epoch total_reward, steps, loss = run_episode(env, agent, per, train=True) total_steps += steps pbar.set_description('[train]exploration:{}'.format(agent.exploration)) summary.add_scalar('{}/score'.format(env_name), total_reward, total_steps) summary.add_scalar('{}/loss'.format(env_name), loss, total_steps) # mean of total loss summary.add_scalar('{}/exploration'.format(env_name), agent.exploration, total_steps) pbar.update(steps) if total_steps // args.test_every_steps >= test_flag: while total_steps // args.test_every_steps >= test_flag: test_flag += 1 pbar.write("testing") test_rewards = [] for _ in tqdm(range(3), desc='eval agent'): eval_reward = run_evaluate_episode(test_env, agent) test_rewards.append(eval_reward) eval_reward = np.mean(test_rewards) logger.info( "eval_agent done, (steps, eval_reward): ({}, {})".format( total_steps, eval_reward)) summary.add_scalar('{}/eval'.format(env_name), eval_reward, total_steps) pbar.close()
while True: action = agent.predict(obs) test_env.render() obs, reward, isOver, info = env.step(action) total_reward += reward if isOver: break return total_reward if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--game_name', default='Phoenix-v0') test_env = get_player('Phoenix-v0', image_size=IMAGE_SIZE, context_len=CONTEXT_LEN) save_path = './dqn_model.ckpt' act_dim = test_env.action_space.n model = AtariModel(act_dim) algorithm = parl.algorithms.DQN(model, act_dim=act_dim, gamma=GAMMA) agent = AtariAgent(algorithm, act_dim=act_dim, start_lr=LEARNING_RATE, total_step=test_number, update_freq=UPDATE_FREQ) agent.restore(save_path)
def submit_answer(self, request): """ Allows players to submit questions to answers This function allows players to submit answers to questions in a match. It will also make sure that the question being answered belongs to the current round. This counteracts the accidental submission of answers several times in a row. Returns: AnswerMessage -- information about whether the player has answered the question correctly or incorrectly Raises: ForbbidenException -- raised if the match to which the answer was submitted has finished already BadRequestException -- raised if the player tried to submit an answer to a round which is not the match's current round """ # Get the match from the datastore match = get_by_urlsafe(request.urlsafe_match_key, Match) # Check if match is still ongoing if match.match_over: raise endpoints.ForbiddenException( 'Match is over, answering is not possible.') # Check that answer submitted is for the current round if match.current_round != request.round: raise endpoints.BadRequestException( "That's not the right round.") # Get the question and the player info from the datastore question = get_by_urlsafe(request.urlsafe_question_key, Question) player = get_player(request.user_name) # Check if answer was correct if request.answer == question.correct_answer: logger.debug('Player has guessed correct answer.') correct_answer = True player.correct_answers += 1 else: logger.debug('Player has guessed an incorrect answer.') correct_answer = False # V basic scoring system if correct_answer: match.score += 100 # Increase number of questions answered by player # for v basic ranking system player.total_questions += 1 # Add answer to the history history = History(question=question.question, correct_answer=correct_answer, answer=request.answer) match.history.append(history) # Advance one round # If last round is over transfer score to player and close the match match.current_round += 1 logger.debug('New current round: %s' % match.current_round) if match.current_round == len(match.questions): match.match_over = True if match.match_over: player.score += match.score # Commit match and player changes to datastore player.put() match.put() # Return whether answer was answered correcty or incorrectly return AnswerMessage(correct_answer=correct_answer)