Example #1
0
def main():
    env = get_player(args.rom,
                     image_size=IMAGE_SIZE,
                     train=True,
                     frame_skip=FRAME_SKIP)
    test_env = get_player(args.rom,
                          image_size=IMAGE_SIZE,
                          frame_skip=FRAME_SKIP,
                          context_len=CONTEXT_LEN)
    rpm = ReplayMemory(MEMORY_SIZE, IMAGE_SIZE, CONTEXT_LEN)
    act_dim = env.action_space.n

    model = AtariModel(act_dim, args.algo)
    if args.algo == 'DDQN':
        algorithm = parl.algorithms.DDQN(model, act_dim=act_dim, gamma=GAMMA)
    elif args.algo in ['DQN', 'Dueling']:
        algorithm = parl.algorithms.DQN(model, act_dim=act_dim, gamma=GAMMA)
    agent = AtariAgent(algorithm,
                       act_dim=act_dim,
                       start_lr=LEARNING_RATE,
                       total_step=args.train_total_steps,
                       update_freq=UPDATE_FREQ)

    with tqdm(total=MEMORY_WARMUP_SIZE,
              desc='[Replay Memory Warm Up]') as pbar:
        while rpm.size() < MEMORY_WARMUP_SIZE:
            total_reward, steps, _ = run_train_episode(env, agent, rpm)
            pbar.update(steps)

    # train
    test_flag = 0
    pbar = tqdm(total=args.train_total_steps)
    total_steps = 0
    max_reward = None
    while total_steps < args.train_total_steps:
        # start epoch
        total_reward, steps, loss = run_train_episode(env, agent, rpm)
        total_steps += steps
        pbar.set_description('[train]exploration:{}'.format(agent.exploration))
        summary.add_scalar('dqn/score', total_reward, total_steps)
        summary.add_scalar('dqn/loss', loss, total_steps)  # mean of total loss
        summary.add_scalar('dqn/exploration', agent.exploration, total_steps)
        pbar.update(steps)

        if total_steps // args.test_every_steps >= test_flag:
            while total_steps // args.test_every_steps >= test_flag:
                test_flag += 1
            pbar.write("testing")
            eval_rewards = []
            for _ in tqdm(range(3), desc='eval agent'):
                eval_reward = run_evaluate_episode(test_env, agent)
                eval_rewards.append(eval_reward)
            logger.info(
                "eval_agent done, (steps, eval_reward): ({}, {})".format(
                    total_steps, np.mean(eval_rewards)))
            eval_test = np.mean(eval_rewards)
            summary.add_scalar('dqn/eval', eval_test, total_steps)

    pbar.close()
Example #2
0
def main():
    env = get_player(args.rom,
                     image_size=IMAGE_SIZE,
                     train=True,
                     frame_skip=FRAME_SKIP)
    test_env = get_player(args.rom,
                          image_size=IMAGE_SIZE,
                          frame_skip=FRAME_SKIP,
                          context_len=CONTEXT_LEN)
    rpm = ReplayMemory(MEMORY_SIZE, IMAGE_SIZE, CONTEXT_LEN)
    act_dim = env.action_space.n
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = AtariModel(CONTEXT_LEN, act_dim, args.algo)
    if args.algo in ['DQN', 'Dueling']:
        algorithm = DQN(model, gamma=GAMMA, lr=args.lr)
    elif args.algo == 'Double':
        algorithm = DDQN(model, gamma=GAMMA, lr=args.lr)
    agent = AtariAgent(algorithm, act_dim=act_dim)

    with tqdm(total=MEMORY_WARMUP_SIZE,
              desc='[Replay Memory Warm Up]') as pbar:
        while rpm.size() < MEMORY_WARMUP_SIZE:
            total_reward, steps, _ = run_train_episode(env, agent, rpm)
            pbar.update(steps)

    # Get fixed obs to check value function.
    fixed_obs = get_fixed_obs(rpm, args.batch_size)
    fixed_obs = torch.tensor(fixed_obs, dtype=torch.float, device=device)

    # train
    test_flag = 0
    total_steps = 0

    with tqdm(total=args.train_total_steps, desc='[Training Model]') as pbar:
        while total_steps < args.train_total_steps:
            total_reward, steps, loss = run_train_episode(env, agent, rpm)
            total_steps += steps
            pbar.update(steps)
            if total_steps // args.test_every_steps >= test_flag:
                while total_steps // args.test_every_steps >= test_flag:
                    test_flag += 1

                eval_rewards = []
                for _ in range(3):
                    eval_rewards.append(run_evaluate_episode(test_env, agent))

                summary.add_scalar('dqn/eval', np.mean(eval_rewards),
                                   total_steps)
                summary.add_scalar('dqn/score', total_reward, total_steps)
                summary.add_scalar('dqn/loss', loss, total_steps)
                summary.add_scalar('dqn/exploration', agent.exploration,
                                   total_steps)
                summary.add_scalar('dqn/Q value',
                                   evaluate_fixed_Q(agent, fixed_obs),
                                   total_steps)
                summary.add_scalar('dqn/grad_norm',
                                   get_grad_norm(agent.alg.model), total_steps)
Example #3
0
    def get_match_history(self, request):
        """ Retrieves a game's history.

        This function allows players to see the history of their matches.
        Players are only allowed to view the history of their own matches;
        access to other player's history is not granted.

        Returns:
            MatchForm -- the match details including the history

        Raises:
            ForbiddenException -- raised when an attempt is made to view
                                  another player's history
        """
        # Get the requested match and player from the datastore
        match = get_by_urlsafe(request.urlsafe_key, Match)
        player = get_player(request.user_name)

        # If match player and request player do not match, deny history access
        if match.player != player.key:
            raise endpoints.ForbiddenException(
                'You may not view another player\'s history')

        # Return history as part of a MatchForm representation
        return match.to_form(history=True)
Example #4
0
def main():
    env = get_player(
        args.rom, image_size=IMAGE_SIZE, train=True, frame_skip=FRAME_SKIP)
    file_path = "memory.npz"
    rpm = ReplayMemory(
        MEMORY_SIZE,
        IMAGE_SIZE,
        CONTEXT_LEN,
        load_file=True,  # load replay memory data from file
        file_path=file_path)
    act_dim = env.action_space.n

    model = AtariModel(act_dim)
    algorithm = DQN(
        model, act_dim=act_dim, gamma=GAMMA, lr=LEARNING_RATE * gpu_num)
    agent = AtariAgent(
        algorithm, act_dim=act_dim, total_step=args.train_total_steps)
    if os.path.isfile('./model.ckpt'):
        logger.info("load model from file")
        agent.restore('./model.ckpt')

    if args.train:
        logger.info("train with memory data")
        run_train_step(agent, rpm)
        logger.info("finish training. Save the model.")
        agent.save('./model.ckpt')
    else:
        logger.info("collect experience")
        collect_exp(env, rpm, agent)
        rpm.save_memory()
        logger.info("finish collecting, save successfully")
Example #5
0
async def leave(
    client,
    event,
):
    """Leaves me from the channel."""
    player = get_player(client, event)
    await player.disconnect()
    return 'Left voice channel.'
Example #6
0
async def move_player(
        client,
        event,
        channel: ('channel_group_connectable', 'Select a channel.'),
):
    """Change channel of the player."""
    player = get_player(client, event)
    await player.move_to(channel)
    return f'Player moved to {channel:m}.'
Example #7
0
async def volume_(
        client,
        event,
        volume: P('number', 'Volume percentage', min_value=0, max_value=200),
):
    """Sets the player's volume."""
    player = get_player(client, event)
    await player.set_volume(volume / 100.0)
    return f'Volume set to: {volume}%.'
Example #8
0
async def pause(
    client,
    event,
):
    """Pauses the currently playing track."""
    player = get_player(client, event)

    if not player.is_paused():
        await player.pause()

    return 'Playing paused.'
Example #9
0
async def resume(
    client,
    event,
):
    """Resumes the currently playing track."""
    player = get_player(client, event)

    if player.is_paused():
        await player.resume()

    return 'Playing resumed.'
Example #10
0
    def get_player_profile(self, request):
        """ Retrieves a player's profile.

        This function retrieves a player's profile.

        Returns:
            ProfileForm -- the player's public profile as a ProfileForm
        """
        # Retrieve player from datastore
        player = get_player(request.user_name)

        # Return ProfileForm representation of player
        return player.to_profileform()
Example #11
0
    def get_user_score(self, request):
        """ Get a player's score.

        This function serves no real purpose and was just included to fullfil
        the Udacity course project requirements. This information is also
        included in a player's profile (see get_player_profile).

        Returns:
            ScoreForm -- a player's score in ScoreForm representation
        """
        player = get_player(request.user_name)

        return ScoreForm(user_name=player.user_name,
                         score=player.score)
Example #12
0
    def get_user_ranking(self, request):
        """ Get a player's ranking.

        This function serves no real purpose and was just included to fullfil
        the Udacity course project requirements. This information is also
        included in a player's profile (see get_player_profile).

        Returns:
            ScoreForm -- a player's ranking in ScoreForm representation
        """
        # Get player from datastore and return their ranking
        player = get_player(request.user_name)
        return ScoreForm(user_name=player.user_name,
                         batting_avg=player.batting_avg)
Example #13
0
async def move_track(
        client,
        event,
        old_position: ('int', 'The position of the track.'),
        new_position: ('int', 'The new position for the track.'),
):
    """Change position of a track."""
    player = get_player(client, event)

    track = player.move_track(old_position, new_position)

    if track is None:
        return 'Nothing was moved.'

    return f'Track moved: {create_track_repr(track, None)}'
Example #14
0
    def create_match(self, request):
        """ Creates a new match.

        This match allows players to create a new match. A match is an
        'instance' of a game. This allows multiple users to play the same
        game at the same time or different times.


        Returns:
            StringMessage -- a confirmation that the match has been created

        Raises:
            BadRequestException -- raised when game has no questions or when
                                   game is not in play mode or when play mode
                                   enabling is required, but was not requested
        """
        # Get game and player from datastore
        game = get_by_urlsafe(request.urlsafe_game_key, Game)
        player = get_player(request.user_name)

        # Check if game has questions. A game with no questions is not playable
        if game.questions == []:
            raise endpoints.BadRequestException(
                'The game has no questions yet and cannot be played.')

        # Check if game has been changed from editing to play mode
        if not game.play_mode:
            # Only game creators can put a game into play mode
            if player.key != game.creator:
                raise endpoints.BadRequestException(
                    'Only the game creator can put it into play mode.')
            else:
                # Check if creator has explicitly requested to start the game
                if not request.start_game:
                    raise endpoints.BadRequestException(
                        'Play mode enabling was not requested.')
                else:
                    # Put game into play mode and save the change
                    game.play_mode = True
                    game.put()

        # Create the match
        match = Match.create_match(player=player.key, game=game)

        # Return a confirmation of the match creation
        return match.to_form()
Example #15
0
    def create_game(self, request):
        """ Creates a new game.

        This function creates a new game.
        (NB: Games are just shells for questions. See the supplied readme
        for more information.)

        Returns:
            GameForm -- a GameForm representation of the created game
        """
        # Get player by username
        player = get_player(request.user_name)

        # Create the game
        game = Game.create_game(player=player.key, title=request.title)

        # Return confirmation of game creation
        return game.to_form()
Example #16
0
async def next_(
    client,
    event,
):
    """Plays the next song."""
    player = get_player(client, event)

    track = player.get_current()
    if track is None:
        abort('Nothing to skip.')

    if track.user is not event.user:
        abort(
            'Sorry, the track was added by {event.user:m}, so only they can skip.'
        )

    await player.skip()
    return f'Track skipped: {create_track_repr(track, None)}'
Example #17
0
    def set_playmode(self, request):
        """ Set a game's play mode

        This function allows to put a game into play mode. Games in play mode
        allow the creation of matches which players can then play.
        (NB: Only game creators may put a game into play mode)

        Returns:
            StringMessage -- a confirmation that the play mode is enabled

        Raises:
            ForbiddenException -- raised if player requesting play mode is not
                                  the game's creator or the game does not exist
            BadRequestException -- raised when the user has not explicitly
                                   requested the game be put into play mode
        """
        # Get the game and player from the datastore
        game = get_by_urlsafe(request.urlsafe_key, Game)
        player = get_player(request.user_name)

        if not game:
            raise endpoints.ForbiddenException(
                'The game does not exist.')

        if game.questions == []:
            raise endpoints.BadRequestException(
                'The game has no questions and cannot be played.')

        # Only game creators can put a game into play mode
        if player.key != game.creator:
            raise endpoints.ForbiddenException(
                'Only the game creator can put it into play mode.')
        else:
            # Check if creator has explicitly requested to start the game
            if not request.start_game:
                raise endpoints.BadRequestException(
                    'Play mode enabling was not requested.')
            else:
                # Put game into play mode and save the change
                game.play_mode = True
                game.put()

        # Return confirmation that play mode is now enabled
        return StringMessage(message="Game is now in play mode.")
Example #18
0
async def repeat(
        client,
        event,
        option: (['track', 'queue', 'disable'], 'Repeat option'),
):
    """Set repeat for track or queue or disable both."""
    player = get_player(client, event)

    if option == 'track':
        player.set_repeat_current(True)
        return 'Repeating track.'

    if option == 'queue':
        player.set_repeat_queue(True)
        return 'Repeating queue.'

    player.set_repeat_current(False)
    player.set_repeat_queue(False)
    return 'Repeating disabled.'
Example #19
0
async def seek(
        client,
        event,
        seconds: (float, 'Where to seek?'),
):
    """Seek the track."""
    player = get_player(client, event)

    track = player.get_current()
    if track is None:
        abort('No songs are being played right now!')

    duration = track.duration
    if (seconds < 0.0) or (seconds > duration):
        abort(
            f'Cannot seek to {seconds:.2f} seconds. Please define a value between `0` and {duration:.0f}.'
        )

    await player.seek(seconds)
    return 'Seeked the current track!'
Example #20
0
    def get_user_matches(self, request):
        """ Lists a player's matches.

        This functions delivers a list of matches by a specific player. It
        also allows pagination.
        (NB: Change QUERY_LIMIT to increase/decrease results per page)

        Returns:
            MatchForms -- a list of matches in MatchForm representation
        """
        # Get limit and offset based on requested page
        limit, offset = get_limit_offset(request.page)

        # Get the specified player's matches
        player = get_player(request.user_name)
        matches = Match.query(ancestor=player.key).fetch(offset=offset,
                                                         limit=limit)
        matches = [match.to_form() for match in matches]

        # Return matches
        return MatchForms(matches=matches)
Example #21
0
 def minimax(state, max_player, depth):
     if is_end_state(state) or depth == 0:  # base case
         score = get_score(state, not max_player)  # get_score with previous player
         return score
     else:
         states = []
         scores = []
         # populate moves and scores
         player = get_player(max_player)
         for child_state in get_possible_states(state, player):
             score = minimax(child_state, not max_player, depth - 1)
             scores.append(score)
             states.append(child_state)
         #  based on the player, choose the best move
         if max_player:
             max_score_index = scores.index(max(scores))
             choice['choice'] = states[max_score_index]
             return scores[max_score_index]
         else:
             min_score_index = scores.index(min(scores))
             choice['choice'] = states[min_score_index]
             return scores[min_score_index]
Example #22
0
    def cancel_match(self, request):
        """ Deletes a match.

        This function allows players to cancel their own (and only their own)
        matches. Cancelled matches are deleted and cannot be recovered.
        Only unfinished matches can be deleted.

        Returns:
            StringMessage -- a confirmation of the match deletion

        Raises:
            ForbiddenException -- raised if a player tries to
                                  delete another player's match
                                  or if the match does not exist
        """
        # Get the specified match
        match = get_by_urlsafe(request.urlsafe_key, Match)

        # Check whether match exists, if not throw error
        if not match:
            raise endpoints.ForbiddenException('Match does not exist.')

        # Check deletion request comes from the creator of the match
        player = get_player(request.user_name)
        if match and match.player != player.key:
            raise endpoints.ForbiddenException(
                'You cannot delete other players\'s matches.')

        # Check whether match has already finished
        if match.match_over:
            raise endpoints.ForbiddenException(
                'You cannot delete a finished match.')

        # Delete the match
        match.key.delete()

        # Return confirmation of match deletion
        return StringMessage(message='Your match was successfully deleted.')
Example #23
0
def main():
    # Prepare environments
    env = get_player(
        args.rom, image_size=IMAGE_SIZE, train=True, frame_skip=FRAME_SKIP)
    test_env = get_player(
        args.rom,
        image_size=IMAGE_SIZE,
        frame_skip=FRAME_SKIP,
        context_len=CONTEXT_LEN)

    # Init Prioritized Replay Memory
    per = ProportionalPER(alpha=0.6, seg_num=args.batch_size, size=MEMORY_SIZE)

    # Prepare PARL agent
    act_dim = env.action_space.n
    model = AtariModel(act_dim)
    if args.alg == 'ddqn':
        algorithm = PrioritizedDoubleDQN(
            model, act_dim=act_dim, gamma=GAMMA, lr=LEARNING_RATE)
    elif args.alg == 'dqn':
        algorithm = PrioritizedDQN(
            model, act_dim=act_dim, gamma=GAMMA, lr=LEARNING_RATE)
    agent = AtariAgent(algorithm, act_dim=act_dim, update_freq=UPDATE_FREQ)

    # Replay memory warmup
    total_step = 0
    with tqdm(total=MEMORY_SIZE, desc='[Replay Memory Warm Up]') as pbar:
        mem = []
        while total_step < MEMORY_WARMUP_SIZE:
            total_reward, steps, _ = run_episode(
                env, agent, per, mem=mem, warmup=True)
            total_step += steps
            pbar.update(steps)
    per.elements.from_list(mem[:int(MEMORY_WARMUP_SIZE)])

    env_name = args.rom.split('/')[-1].split('.')[0]

    test_flag = 0
    total_steps = 0
    pbar = tqdm(total=args.train_total_steps)
    while total_steps < args.train_total_steps:
        # start epoch
        total_reward, steps, loss = run_episode(env, agent, per, train=True)
        total_steps += steps
        pbar.set_description('[train]exploration:{}'.format(agent.exploration))
        summary.add_scalar('{}/score'.format(env_name), total_reward,
                           total_steps)
        summary.add_scalar('{}/loss'.format(env_name), loss,
                           total_steps)  # mean of total loss
        summary.add_scalar('{}/exploration'.format(env_name),
                           agent.exploration, total_steps)
        pbar.update(steps)

        if total_steps // args.test_every_steps >= test_flag:
            while total_steps // args.test_every_steps >= test_flag:
                test_flag += 1
            pbar.write("testing")
            test_rewards = []
            for _ in tqdm(range(3), desc='eval agent'):
                eval_reward = run_evaluate_episode(test_env, agent)
                test_rewards.append(eval_reward)
            eval_reward = np.mean(test_rewards)
            logger.info(
                "eval_agent done, (steps, eval_reward): ({}, {})".format(
                    total_steps, eval_reward))
            summary.add_scalar('{}/eval'.format(env_name), eval_reward,
                               total_steps)

    pbar.close()
Example #24
0
    while True:
        action = agent.predict(obs)
        test_env.render()
        obs, reward, isOver, info = env.step(action)
        total_reward += reward
        if isOver:
            break
    return total_reward


if __name__ == '__main__':

    parser = argparse.ArgumentParser()
    parser.add_argument('--game_name', default='Phoenix-v0')
    test_env = get_player('Phoenix-v0',
                          image_size=IMAGE_SIZE,
                          context_len=CONTEXT_LEN)
    save_path = './dqn_model.ckpt'

    act_dim = test_env.action_space.n

    model = AtariModel(act_dim)
    algorithm = parl.algorithms.DQN(model, act_dim=act_dim, gamma=GAMMA)

    agent = AtariAgent(algorithm,
                       act_dim=act_dim,
                       start_lr=LEARNING_RATE,
                       total_step=test_number,
                       update_freq=UPDATE_FREQ)

    agent.restore(save_path)
Example #25
0
    def submit_answer(self, request):
        """ Allows players to submit questions to answers

        This function allows players to submit answers to questions in a match.
        It will also make sure that the question being answered belongs to the
        current round. This counteracts the accidental submission of answers
        several times in a row.

        Returns:
            AnswerMessage -- information about whether the player has answered
                             the question correctly or incorrectly

        Raises:
            ForbbidenException -- raised if the match to which the answer was
                                  submitted has finished already
            BadRequestException -- raised if the player tried to submit an
                                   answer to a round which is not the match's
                                   current round
        """
        # Get the match from the datastore
        match = get_by_urlsafe(request.urlsafe_match_key, Match)

        # Check if match is still ongoing
        if match.match_over:
            raise endpoints.ForbiddenException(
                'Match is over, answering is not possible.')

        # Check that answer submitted is for the current round
        if match.current_round != request.round:
            raise endpoints.BadRequestException(
                "That's not the right round.")

        # Get the question and the player info from the datastore
        question = get_by_urlsafe(request.urlsafe_question_key, Question)
        player = get_player(request.user_name)

        # Check if answer was correct
        if request.answer == question.correct_answer:
            logger.debug('Player has guessed correct answer.')
            correct_answer = True
            player.correct_answers += 1
        else:
            logger.debug('Player has guessed an incorrect answer.')
            correct_answer = False

        # V basic scoring system
        if correct_answer:
            match.score += 100

        # Increase number of questions answered by player
        # for v basic ranking system
        player.total_questions += 1

        # Add answer to the history
        history = History(question=question.question,
                          correct_answer=correct_answer,
                          answer=request.answer)
        match.history.append(history)

        # Advance one round
        # If last round is over transfer score to player and close the match
        match.current_round += 1
        logger.debug('New current round: %s' % match.current_round)
        if match.current_round == len(match.questions):
            match.match_over = True
        if match.match_over:
            player.score += match.score

        # Commit match and player changes to datastore
        player.put()
        match.put()

        # Return whether answer was answered correcty or incorrectly
        return AnswerMessage(correct_answer=correct_answer)