Exemple #1
0
 def timed(msg):
     if rank == 0:
         print(colorize(msg, color='magenta'))
         tstart = time.time()
         yield
         print(colorize("done in %.3f seconds"%(time.time() - tstart), color='magenta'))
     else:
         yield
Exemple #2
0
 def timed(msg):
     #print ("/algo/trpo_mpi/200lines") #yi ge iter zhi xing 12 ci # zhe shi yi ge fa bu xin xi de guo cheng
     if rank == 0:
         print(colorize(msg, color='red'))
         tstart = time.time()
         yield
         print(
             colorize("done in %.3f seconds" % (time.time() - tstart),
                      color='magenta'))
     else:
         yield
Exemple #3
0
def traj_segment_generator(pi, env, horizon, stochastic, visualize=False):
    # Initialize state variables
    t = 0
    ac = env.action_space.sample()
    new = True
    rew = 0.0
    ob = env.reset()

    cur_ep_ret = 0
    cur_ep_len = 0
    ep_rets = []
    ep_lens = []

    # Initialize history arrays
    obs = np.array([ob for _ in range(horizon)])
    rews = np.zeros(horizon, 'float32')
    vpreds = np.zeros(horizon, 'float32')
    news = np.zeros(horizon, 'int32')
    acs = np.array([ac for _ in range(horizon)])
    prevacs = acs.copy()

    save = False
    playerScore = opponentScore = 0
    wins = losses = ties = gamesTotal = totalPlayer = totalOpponent = 0

    while True:
        prevac = ac
        ac, vpred = pi.act(stochastic, ob)
        # Slight weirdness here because we need value function at time T
        # before returning segment [0, T-1] so we get the correct
        # terminal value
        if t > 0 and t % horizon == 0:
            yield {
                "ob": obs,
                "rew": rews,
                "vpred": vpreds,
                "new": news,
                "ac": acs,
                "prevac": prevacs,
                "nextvpred": vpred * (1 - new),
                "ep_rets": ep_rets,
                "ep_lens": ep_lens
            }
            _, vpred = pi.act(stochastic, ob)
            # Be careful!!! if you change the downstream algorithm to aggregate
            # several of these batches, then be sure to do a deepcopy
            ep_rets = []
            ep_lens = []
        i = t % horizon
        obs[i] = ob
        vpreds[i] = vpred
        news[i] = new
        acs[i] = ac
        prevacs[i] = prevac

        ob, rew, new, _ = env.step(ac)
        if visualize:
            env.render()
        if rew > 0:
            playerScore += abs(rew)
        else:
            opponentScore += abs(rew)
        rews[i] = rew

        cur_ep_ret += rew
        cur_ep_len += 1
        if new:
            msg = format("End of game: score %d - %d" %
                         (playerScore, opponentScore))
            print(colorize(msg, color='red'))
            gamesTotal += 1

            if playerScore - opponentScore > 100:
                save = True

            if playerScore > opponentScore:
                wins += 1
            elif opponentScore > playerScore:
                losses += 1
            else:
                ties += 1

            totalPlayer += playerScore
            totalOpponent += opponentScore

            playerScore = opponentScore = 0

            msg = format(
                "Status so far: \nGames played - %d wins - %d losses - %d ties - %d\n Total score: %d - %d"
                % (gamesTotal, wins, losses, ties, totalPlayer, totalOpponent))
            print(colorize(msg, color='red'))

            ep_rets.append(cur_ep_ret)
            ep_lens.append(cur_ep_len)
            cur_ep_ret = 0
            cur_ep_len = 0
            ob = env.reset()
        t += 1
Exemple #4
0
def traj_episode_generator(pi, env, horizon, stochastic, render, downsample):
    t = 0
    ac = env.action_space.sample()  # not used, just so we have the datatype
    new = True  # marks if we're on first timestep of an episode

    ob = env.reset()
    cur_ep_ret = 0  # return in current episode
    cur_ep_len = 0  # len of current episode

    # Initialize history arrays
    obs = []
    rews = []
    news = []
    acs = []

    playerScore = opponentScore = 0
    wins = losses = ties = gamesTotal = totalPlayer = totalOpponent = 0

    while True:
        prevac = ac
        ac, vpred = pi.act(stochastic, ob)

        if not downsample and hasattr(env.unwrapped, 'ale'):
            obs.append(env.unwrapped.ale.getScreenRGB2())
        else:
            obs.append(obs)

        obs.append(ob)
        news.append(new)
        acs.append(ac)
        ob, rew, new, _ = env.step(ac)
        rews.append(rew)

        cur_ep_ret += rew
        cur_ep_len += 1

        if render:
            env.render()

        if t > 0 and (new or t % horizon == 0):
            msg = format("End of game: score %d - %d" %
                         (playerScore, opponentScore))
            print(colorize(msg, color='red'))
            gamesTotal += 1
            if playerScore > opponentScore:
                wins += 1
            elif opponentScore > playerScore:
                losses += 1
            else:
                ties += 1

            totalPlayer += playerScore
            totalOpponent += opponentScore

            playerScore = opponentScore = 0

            msg = format(
                "Status so far: \nGames played - %d wins - %d losses - %d ties - %d\n Total score: %d - %d"
                % (gamesTotal, wins, losses, ties, totalPlayer, totalOpponent))
            print(colorize(msg, color='red'))

            # convert list into numpy array
            obs = np.array(obs)
            rews = np.array(rews)
            news = np.array(news)
            acs = np.array(acs)
            yield {
                "ob": obs,
                "rew": rews,
                "new": news,
                "ac": acs,
                "ep_ret": cur_ep_ret,
                "ep_len": cur_ep_len
            }
            ob = env.reset()
            cur_ep_ret = 0
            cur_ep_len = 0
            t = 0

            # Initialize history arrays
            obs = []
            rews = []
            news = []
            acs = []
        t += 1
Exemple #5
0
def playAtari(env,
              agent,
              U,
              modelPath,
              transpose=True,
              stochastic=False,
              fps=30,
              zoom=None,
              delay=None):
    """
    Plays an Atari games' agent from trained model
    :param env: Atari environment
    :param agent: agent's policy model (neural network)
    :param U: baseline's tf_util
    :param modelPath: path to trained model
    :param transpose: transpose video
    :param stochastic: environment type
    :param fps:
    :param zoom:
    :param delay: start delay
    :return:
    """
    global obs

    U.initialize()
    U.load_state(modelPath)

    tempEnv = env
    while not isinstance(tempEnv, ActionWrapper):
        try:
            tempEnv = tempEnv.env
        except:
            break
    # using ActionWrapper:
    if isinstance(tempEnv, ActionWrapper):
        obs_s = tempEnv.screen_space
    else:
        obs_s = env.observation_space

    # obs_s = env.observation_space
    # if len(env.observation_space.shape) < 3:
    #     obs_s = env.env.screen_space

    assert type(obs_s) == Box
    assert len(obs_s.shape) == 2 or (len(obs_s.shape) == 3
                                     and obs_s.shape[2] in [1, 3])

    if zoom is None:
        zoom = 1

    video_size = int(obs_s.shape[0] * zoom), int(obs_s.shape[1] * zoom)

    if transpose:
        video_size = tuple(reversed(video_size))

    # setup the screen using pygame
    flags = RESIZABLE | HWSURFACE | DOUBLEBUF
    screen = pygame.display.set_mode(video_size, flags)
    pygame.event.set_blocked(pygame.MOUSEMOTION)
    clock = pygame.time.Clock()

    # =================================================================================================================

    observation = env.reset()

    running = True
    envDone = False

    playerScore = opponentScore = 0
    wins = losses = ties = gamesTotal = totalPlayer = totalOpponent = 0

    print("Get ready...")
    if delay is not None:
        time.sleep(delay)

    while running:
        pygame.event.get()
        if envDone:
            # results of game:
            msg = format("End of game: score %d - %d" %
                         (playerScore, opponentScore))
            print(colorize(msg, color='red'))
            gamesTotal += 1
            if playerScore > opponentScore:
                wins += 1
            elif opponentScore > playerScore:
                losses += 1
            else:
                ties += 1

            totalPlayer += playerScore
            totalOpponent += opponentScore

            playerScore = opponentScore = 0

            msg = format(
                "Status so far: \nGames played - %d wins - %d losses - %d ties - %d\n Total score: %d - %d"
                % (gamesTotal, wins, losses, ties, totalPlayer, totalOpponent))
            print(colorize(msg, color='red'))

            # if gamesTotal == 3:
            #     running = False

            envDone = False
            observation = env.reset()

        else:
            action, value_predicted = agent.act(stochastic, observation)

            if not isinstance(action, list):
                action = np.atleast_1d(action)

            observation, reward, envDone, info = env.step(action)

            # track of player score:
            if reward > 0:
                playerScore += abs(reward)
            else:
                opponentScore += abs(reward)

        # as we using RAM for observations so we need a screed data for playing a game with pygame:
        if hasattr(env, 'getImage'):
            obs = env.getImage()
        elif hasattr(env.unwrapped, 'ale'):
            obs = env.unwrapped.ale.getScreenRGB2()

        if obs is not None:
            if len(obs.shape) == 2:
                obs = obs[:, :, None]
            if obs.shape[2] == 1:
                obs = obs.repeat(3, axis=2)
            display_arr(screen, obs, video_size, transpose)

        pygame.display.flip()
        clock.tick(fps)
    pygame.quit()
Exemple #6
0
def replayACS(env, modelPath, transpose=True, fps=30, zoom=None):
    """
    Replays a game from recorded trajectories using actions
    This method is not precise though, because it indirectly recovers environment states from actions.
    Sometimes it gets asynchronous and distorts the real trajectory.
    :param env: Atari environment
    :param modelPath: path to trained model
    :param transpose:
    :param fps:
    :param zoom:
    :return:
    """
    global obs
    with open(modelPath, 'rb') as rfp:
        trajectories = pkl.load(rfp)

    U.make_session(num_cpu=1).__enter__()

    U.initialize()

    tempEnv = env
    while not isinstance(tempEnv, ActionWrapper):
        try:
            tempEnv = tempEnv.env
        except:
            break
    # using ActionWrapper:
    if isinstance(tempEnv, ActionWrapper):
        obs_s = tempEnv.screen_space
    else:
        obs_s = env.observation_space

    # assert type(obs_s) == Box
    assert len(obs_s.shape) == 2 or (len(obs_s.shape) == 3 and obs_s.shape[2] in [1, 3])

    if zoom is None:
        zoom = 1

    video_size = int(obs_s.shape[0] * zoom), int(obs_s.shape[1] * zoom)

    if transpose:
        video_size = tuple(reversed(video_size))

    # setup the screen using pygame
    flags = RESIZABLE | HWSURFACE | DOUBLEBUF
    screen = pygame.display.set_mode(video_size, flags)
    pygame.event.set_blocked(pygame.MOUSEMOTION)
    clock = pygame.time.Clock()

    # =================================================================================================================

    running = True
    envDone = False

    playerScore = opponentScore = 0
    wins = losses = ties = gamesTotal = totalPlayer = totalOpponent = 0

    while running:
        trl = len(trajectories)

        for i in range(trl):
            obs = env.reset()
            print("\nRunning trajectory {}".format(i))
            print("Length {}".format(len(trajectories[i]['ac'])))

            for ac in tqdm(trajectories[i]['ac']):
                if not isinstance(ac, list):
                    ac = np.atleast_1d(ac)

                obs, reward, envDone, info = env.step(ac)

                # track of player score:
                if reward > 0:
                    playerScore += abs(reward)
                else:
                    opponentScore += abs(reward)

                if hasattr(env, 'getImage'):
                    obs = env.getImage()

                if obs is not None:
                    if len(obs.shape) == 2:
                        obs = obs[:, :, None]
                    if obs.shape[2] == 1:
                        obs = obs.repeat(3, axis=2)
                    display_arr(screen, obs, video_size, transpose)

                    pygame.display.flip()
                    clock.tick(fps)

            msg = format("End of game: score %d - %d" % (playerScore, opponentScore))
            print(colorize(msg, color='red'))
            gamesTotal += 1
            if playerScore > opponentScore:
                wins += 1
            elif opponentScore > playerScore:
                losses += 1
            else:
                ties += 1

            totalPlayer += playerScore
            totalOpponent += opponentScore

            playerScore = opponentScore = 0

            msg = format("Status so far: \nGames played - %d wins - %d losses - %d ties - %d\n Total score: %d - %d" % (
                gamesTotal, wins, losses, ties, totalPlayer, totalOpponent))
            print(colorize(msg, color='red'))
    pygame.quit()