Beispiel #1
0
def main():

    #remote env
    env = grc.RemoteEnv('tmp/sock')

    #FIXME: DEBUG
    #import retro
    #env = retro.make(game='SonictheHedgehog-Genesis', state='GreenHillZone.Act1')

    #load the policy
    name = 'learner_global'
    state = process_state(env.reset())
    test_policy = Policy(state.shape,
                         env.action_space.n,
                         name,
                         act_int=False,
                         recover=True,
                         sess=tf.Session(),
                         pull_scope=name)

    #run the env
    lstm_state = test_policy.lstm_init_state
    while True:
        action, _, _, lstm_state = test_policy.act(state,
                                                   lstm_state,
                                                   explore=False)
        state, reward, done, _ = env.step(action)
        state = process_state(state)
        if done:
            env.reset()
Beispiel #2
0
def make_remote_env(stack=2,
                    scale_rew=True,
                    color=False,
                    exp_type='obs',
                    exp_const=0.002,
                    socket_dir='/tmp'):
    """
    Create an environment with some standard wrappers.
    """
    env = grc.RemoteEnv(socket_dir)
    env = BackupOriginalData(env)
    env = SonicDiscretizer(env)
    env = AllowBacktracking(env)

    if scale_rew:
        env = RewardScaler(env)

    env = WarpFrame(env, color)

    if exp_const > 0:
        if exp_type == 'obs':
            env = ObsExplorationReward(env, exp_const, game_specific=False)
        elif exp_type == 'x':
            env = XExplorationReward(env, exp_const, game_specific=False)

    if stack > 1:
        env = FrameStack(env, stack)

    env = ScaledFloatFrame(env)
    env = EpisodeInfo(env)

    return env
Beispiel #3
0
def make_vec_env(extra_wrap_fn=None):
    if 'RETRO_ROOT_DIR' in os.environ:
        subenv_ids, subenvs = build_envs(extra_wrap_fn=extra_wrap_fn)
        return SubprocessVecEnv(zip(subenv_ids, subenvs))
    else:
        return DummyVecEnv([('tmp/sock', lambda: wrap_env(
            grc.RemoteEnv('tmp/sock'), extra_wrap_fn=extra_wrap_fn))])
def main():
    """Run JERK on the attached environment."""
    env = grc.RemoteEnv('tmp/sock')
    # env = make(game='SonicTheHedgehog-Genesis',
    #                      state='GreenHillZone.Act1',
    #                      scenario='scenario.json')
    #                      )
    env = TrackedEnv(env)

    new_ep = True
    solutions = []
    while True:
        if new_ep:
            if (solutions and
                    random.random() < EXPLOIT_BIAS + env.total_steps_ever / TOTAL_TIMESTEPS):
                solutions = sorted(solutions, key=lambda x: np.mean(x[0]))
                best_pair = solutions[-1]
                new_rew = exploit(env, best_pair[1])
                best_pair[0].append(new_rew)
                print('replayed best with reward %f' % new_rew)
                continue
            else:
                env.reset()
                new_ep = False
        rew, new_ep = move(env, 100, False, 0.1)
        env.render()
        if not new_ep and rew <= 50:
            print('backtracking due to negative reward: %f' % rew)
            _, new_ep = move(env, 200, left=True)
        if new_ep:
            solutions.append(([max(env.reward_history)], env.best_sequence()))
Beispiel #5
0
def main():
    """Run JERK on the attached environment."""
    env = grc.RemoteEnv('tmp/sock')
    env = TrackedEnv(env)
    new_ep = True
    solutions = []
    while True:
        if new_ep:
            if (solutions and random.random() <
                    EXPLOIT_BIAS + env.total_steps_ever / TOTAL_TIMESTEPS):
                solutions = sorted(solutions, key=lambda x: np.mean(x[0]))
                best_pair = solutions[-1]
                new_rew = exploit(env, best_pair[1])
                best_pair[0].append(new_rew)
                print('replayed best with reward %f' % new_rew)
                continue
            else:
                env.reset()
                new_ep = False
        rew, new_ep = move(env, 100)
        if not new_ep and rew <= 0:
            print('backtracking due to negative reward: %f' % rew)
            _, new_ep = move(env, 70, left=True)
        if new_ep:
            solutions.append(([max(env.reward_history)], env.best_sequence()))
Beispiel #6
0
def make_sonic_env(
    game,
    state,
    remote_env=False,
    scale_rew=True,
    video_dir="",
    short_life=False,
    backtracking=False,
):
    """
    Create an environment with some standard wrappers.
    """
    if remote_env:
        env = grc.RemoteEnv("tmp/sock")
    else:
        env = make(game=game, state=state, bk2dir=video_dir)
    env = SonicDiscretizer(env)
    if scale_rew:
        env = RewardScaler(env)
    env = WarpFrame(env)
    # if stack:
    #     env = FrameStack(env, 4)
    if short_life:
        env = ShortLife(env)
    if backtracking:
        env = AllowBacktracking(env)
    return env
Beispiel #7
0
 def __init__(self, is_remote, game, state, max_timesteps, env_wrapper, do_render, monitor_path):
   if is_remote:
     import gym_remote.client as grc
     self.env = env_wrapper(grc.RemoteEnv('tmp/sock'), float('inf'), do_render, monitor_path)
   else:
     from retro_contest.local import make
     self.env = env_wrapper(make(game, state), max_timesteps, do_render, monitor_path)
Beispiel #8
0
def main():
    # Set up a new TrackedEnv that can keep track of total timestamps and store
    # previous best solutions.
    env = grc.RemoteEnv('tmp/sock')
    env = TrackedEnv(env)

    # new_ep will keep track of if a new episode should be started.
    new_ep = True
    # solutions is an array of successful gameplay sequences as well as the
    # total reward earned from them.
    solutions = []
    while True:
        if new_ep:
            if (solutions and random.random() <
                    EXPLOIT_BIAS + env.total_steps_ever / TOTAL_TIMESTEPS):
                solutions = sorted(solutions, key=lambda x: np.mean(x[0]))
                best_pair = solutions[-1]
                new_rew = exploit(env, best_pair[1])
                best_pair[0].append(new_rew)
                print('replayed best with reward %f' % new_rew)
                continue
            else:
                env.reset()
                new_ep = False
        rew, new_ep = move(env, 100)
        if not new_ep and rew <= 0:
            print('backtracking due to negative reward: %f' % rew)
            _, new_ep = move(env, 70, left=True)
        if new_ep:
            solutions.append(([max(env.reward_history)], env.best_sequence()))
def make_env(stack=True, scale_rew=True, local=False, level_choice=None):
    """
    Create an environment with some standard wrappers.
    """
    print(stack, scale_rew, local)
    if local:  # Select Random Level if local
        from retro_contest.local import make
        levels = [
            'SpringYardZone.Act3', 'SpringYardZone.Act2', 'GreenHillZone.Act3',
            'GreenHillZone.Act1', 'StarLightZone.Act2', 'StarLightZone.Act1',
            'MarbleZone.Act2', 'MarbleZone.Act1', 'MarbleZone.Act3',
            'ScrapBrainZone.Act2', 'LabyrinthZone.Act2', 'LabyrinthZone.Act1',
            'LabyrinthZone.Act3'
        ]
        if not level_choice:
            level_choice = levels[random.randrange(0, 13, 1)]
        else:
            level_choice = levels[level_choice]
        env = make(game='SonicTheHedgehog-Genesis', state=level_choice)
    else:
        print('connecting to remote environment')
        env = grc.RemoteEnv('tmp/sock')
        print('starting episode')
    if scale_rew:
        env = RewardScaler(env)
    env = WarpFrame(env)
    if stack:
        env = FrameStack(env, 4)
    return env
def get_environment():
    if is_local:
        from retro_contest.local import make
        env = make(game='SonicTheHedgehog-Genesis', state='LabyrinthZone.Act1')
    else:
        import gym_remote.exceptions as gre
        import gym_remote.client as grc
        env = grc.RemoteEnv('tmp/sock')
    return env
Beispiel #11
0
def main():
    #env = grc.RemoteEnv('tmp/sock')
    #env_data = get_training_envs()
    if len(sys.argv) < 4:
        print('Usage: python jerk_agent_train.py game state process_count')
        sys.exit()

    game = sys.argv[1]
    state = sys.argv[2]
    process_count = sys.argv[3]

    sockets = []
    for index in range(int(process_count)):
        #game, state = random.choice(env_data)
        # retro-contest-remote run -s tmp/sock -m monitor -d SonicTheHedgehog-Genesis GreenHillZone.Act1
        state_directory_name = state + '-' + str(index)
        base_dir = './remotes/'
        os.makedirs(base_dir, exist_ok=True)
        os.makedirs(base_dir + state_directory_name, exist_ok=True)
        socket_dir = base_dir + "{}/sock".format(state_directory_name)
        os.makedirs(socket_dir, exist_ok=True)
        monitor_dir = base_dir + "{}/monitor".format(state_directory_name)
        os.makedirs(monitor_dir, exist_ok=True)
        subprocess.Popen([
            "retro-contest-remote", "run", "-s", socket_dir, '-m', monitor_dir,
            '-d', game, state
        ],
                         stdout=subprocess.PIPE)
        print('launched {} ({})'.format(state, index))

        sockets.append(socket_dir)
        #envs.append(lambda: make_env(socket_dir=state + '/sock'))
    print('remote processes launched')
    #env = lambda: make_training_env('SonicTheHedgehog-Genesis', 'GreenHillZone.Act1', stack=True, scale_rew=True)
    #env = MultigameEnvWrapper
    #load_path = '/root/compo/trained_on_images_nature_cnn.joblib'
    #load_path = './saved_weights.joblib'
    #logger.configure(dir='./logs', format_strs=['stdout', 'tensorboard'])

    print('training...')

    global_solutions = []

    agents = []
    for socket_dir in sockets:
        env = grc.RemoteEnv(socket_dir)
        agent = JerkAgent(env, global_solutions)
        agents.append(agent)

    for agent in agents:
        agent.start()

    print('Created {} agents'.format(len(agents)))

    for agent in agents:
        agent.join()
Beispiel #12
0
def make_env(stack=True,
             scale_rew=True,
             frame_wrapper=WarpFrame,
             reward_type=None):
    """
    Create an environment with some standard wrappers.
    """
    env = grc.RemoteEnv('tmp/sock')

    return wrap_env(env, stack, scale_rew, frame_wrapper, reward_type)
Beispiel #13
0
def get_environment(environment):
    """ Return a local or remote environment as requested """
    if environment in ['aws', 'local']:
        from retro_contest.local import make
        env = make(game='SonicTheHedgehog-Genesis', state='LabyrinthZone.Act1')
    else:
        import gym_remote.exceptions as gre
        import gym_remote.client as grc
        env = grc.RemoteEnv('tmp/sock')
    return env
Beispiel #14
0
def make_env(extra_wrap_fn=None):
    if 'RETRO_RECORD' in os.environ:
        from retro_contest.local import make
        game = os.environ['RETRO_GAME']
        state = os.environ['RETRO_STATE']
        env_id = game + "-" + state
        env = make(game=game, state=state, bk2dir=os.environ['RETRO_RECORD'])
    else:
        env_id = 'tmp/sock'
        env = grc.RemoteEnv('tmp/sock')
    return env_id, wrap_env(env)
Beispiel #15
0
def create_env(env_name, env_state, contest=False, human=False):
    if human:
        env = SonicDiscretizer(
            retro.make(env_name,
                       env_state,
                       scenario="contest",
                       use_restricted_actions=retro.ACTIONS_FILTERED))
    elif not contest:
        env = SonicDiscretizer(make(env_name, env_state))
    else:
        env = SonicDiscretizer(grc.RemoteEnv('tmp/sock'))
    return env
Beispiel #16
0
def main():
    # Connect to remote environment (from simple-agent.py)
    print('connecting to remote environment')
    env = grc.RemoteEnv('tmp/sock')
    print('starting episode')
    env.reset()

    # This bot is just a kludge that tries to remember things if it moves forward, and tries to remember the highest reward sequence of actions.
    #obs = env.reset()
    c = 0
    lenActions = len(env.action_space.sample())
    jerkQueue = np.reshape(env.action_space.sample(), (1, lenActions))
    myActions = jerkQueue
    oldX = 0.
    bestReward = 0.
    rewAcc = 0.
    numRuns = 0.
    avgReward = 0.
    while True:
        if (c < len(jerkQueue)):
            act = jerkQueue[c, ...]
        else:
            act = env.action_space.sample()
            #if (np.random.random()>0.05):
            act[7] = 1

        obs, reward, done, info = env.step(act)

        rewAcc += reward
        if (reward > 0):
            # Remember action when rewarded
            myActions = np.append(myActions,
                                  np.reshape(act, (1, lenActions)),
                                  axis=0)

        c += 1
        if done:
            print('episode complete')
            if (rewAcc > bestReward):
                bestReward = rewAcc
                jerkQueue = myActions
                #print('new best reward: %.3f based on a %i action JERK queue'%(bestReward,len(jerkQueue)))
            avgReward = 0.95 * avgReward + 0.05 * rewAcc
            #print('avg. reward: %.3f, acc. reward: %.3f, best reward: %.3f, current JERK queue %i vs actions length %i'%(avgReward, rewAcc, bestReward, len(jerkQueue), len(myActions)))

            #Attenuate best reward (to diminish the effect of lucky runs)
            bestReward *= 0.95  #* bestReward
            env.reset()
            rewAcc = 0.
            numRuns += 1
            oldX = 0.
            c = 0
            myActions = np.reshape(env.action_space.sample(), (1, lenActions))
def make_env(stack=True, scale_rew=True):
    """
    Create an environment with some standard wrappers.
    """
    env = grc.RemoteEnv('tmp/sock')
    env = CustomSonicDiscretizer(env)
    if scale_rew:
        env = RewardScaler(env)
    env = WarpFrame(env)
    if stack:
        env = FrameStack(env, 4)
    return env
def main():
    print('connecting to remote environment')
    env = grc.RemoteEnv('tmp/sock')
    print('starting episode')
    env.reset()
    while True:
        action = env.action_space.sample()
        action[7] = 1
        _, _, done, _ = env.step(action)
        if done:
            print('episode complete')
            env.reset()
Beispiel #19
0
def make_remote_env(stack=True, scale_rew=True, socket_dir='/tmp'):
    """
    Create an environment with some standard wrappers.
    """
    env = grc.RemoteEnv(socket_dir)
    env = SonicDiscretizer(env)
    if scale_rew:
        env = RewardScaler(env)
    env = WarpFrame(env)
    if stack:
        env = FrameStack(env, 4)
    env = EpisodeInfo(env)
    return env
def make_env(stack=True, scale_rew=True):
    """
    Create an environment with some standard wrappers.
    """
    env = grc.RemoteEnv('tmp/sock')
#    env = make(game='SonicTheHedgehog-Genesis', state='LabyrinthZone.Act1')
    env = SonicDiscretizer(env)
    if scale_rew:
        env = RewardScaler(env)
    env = WarpFrame(env)
    if stack:
        env = FrameStack(env, 4)
    return env
Beispiel #21
0
def make_batched_env(extra_wrap_fn=None):
    if 'RETRO_ROOT_DIR' in os.environ:
        subenv_ids, subenvs = build_envs(extra_wrap_fn=extra_wrap_fn)
        env = batched_gym_env(subenvs, sync=False)
        #env = BatchedGymEnv([[subenv() for subenv in subenvs]])
        env.env_ids = subenv_ids
        return env
    else:
        env = BatchedGymEnv([[
            wrap_env(grc.RemoteEnv('tmp/sock'), extra_wrap_fn=extra_wrap_fn)
        ]])
        env.env_ids = ['tmp/sock']
        return env
Beispiel #22
0
def make_env(game, state, stack=True, scale_rew=True):
    """
    Create an environment with some standard wrappers.
    """
    #env = AllowBacktracking(make(game=game, state=state))
    env = AllowBacktracking(grc.RemoteEnv('tmp/sock'))
    env = SonicDiscretizer(env)
    if scale_rew:
        env = RewardScaler(env)
    env = WarpFrame(env)
    if stack:
        env = FrameStack(env, 4)
    return env
def make_env(stack=True, scale_rew=True):
    """
    Create an environment with some standard wrappers.
    """
    env = grc.RemoteEnv('tmp/sock')
    env = SonicDiscretizer(env)
    if scale_rew:
        env = RewardScaler(env)
    #env = WarpFrame(env)
    # Disabling FrameStack because it currently uses a different Box. (Box(0,255,w,h) compared to Box(0,1,w,h)
    #if stack:
    #    env = FrameStack(env, 4)
    env = CustomWarpFrame(env)
    env = NormalizedEnv(env)
    return env
Beispiel #24
0
def main():
    print('connecting to remote environment')
    env = grc.RemoteEnv('tmp/sock')
    print('starting episode')
    env.reset()
    
    while True:
        rewAcc = 0
        action = env.action_space.sample()
        action[7] = 1
        ob, reward, done, _ = env.step(action)
        rewAcc += reward
        if done:
            print('episode complete with reward = %.3f'%rewAcc)
            env.reset()
Beispiel #25
0
def make(stack=True, scale_rew=True):
    """
    Create an environment with some standard wrappers.
    """
    #env = retro.make(game, state)
    print("Connecting to remote environment")
    env = grc.RemoteEnv('/root/compo/tmp/sock')
    print("Starting preproc on env")
    env = SonicDiscretizer(env)
    if scale_rew:
        env = RewardScaler(env)
    env = WarpFrame(env)
    if stack:
        env = FrameStack(env, 4)
    return env
Beispiel #26
0
def launch_env(game, state):
    #game, state = random.choice(env_data)
    # retro-contest-remote run -s tmp/sock -m monitor -d SonicTheHedgehog-Genesis GreenHillZone.Act1
    base_dir = './remotes/'
    os.makedirs(base_dir, exist_ok=True)
    os.makedirs(base_dir + state, exist_ok=True)
    socket_dir = base_dir + "{}/sock".format(state)
    os.makedirs(socket_dir, exist_ok=True)
    monitor_dir = base_dir + "{}/monitor".format(state)
    os.makedirs(monitor_dir, exist_ok=True)
    subprocess.Popen([
        "retro-contest-remote", "run", "-s", socket_dir, '-m', monitor_dir,
        '-d', game, state
    ],
                     stdout=subprocess.PIPE)
    return grc.RemoteEnv(socket_dir)
Beispiel #27
0
def make_sonic_test(stack=True, scale_rew=True):
    """
    Create an environment with some standard wrappers.
    """
    env = grc.RemoteEnv('tmp/sock')
    #env_id = "SonicTheHedgehog-Genesis,GreenHillZone.Act1"
    #game, state = env_id.split(',')
    #env = make(game=game, state=state)

    env = SonicDiscretizer(env)
    if scale_rew:
        env = RewardScaler(env)
    env = WarpFrame(env)
    if stack:
        env = FrameStack(env, 4)
    return env
def main():
    """Run JERK on the attached environment."""
    env = grc.RemoteEnv('tmp/sock')
    env = SonicDiscretizer(env)
    env = RewardScaler(env)
    env = WarpFrame(env)
    env = FrameStack(env, 4)

    print('action space')
    print(env.action_space)
    print('observation space')
    print(env.observation_space)

    agent = JerkAgent(env)
    agent.train()

    return
def test():
    agent = CNQAgent(12, AGENT_NAME)
    agent.load_model()
    agent.epsilon = 0.1
    print('connecting to remote environment')
    env = grc.RemoteEnv('tmp/sock')
    # env = make(game='SonicTheHedgehog-Genesis', state='SpringYardZone.Act3')
    print('starting episode')
    state = env.reset()
    reward = 0
    action = np.zeros(12)
    while True:
        state = np.reshape(state, (1, 224, 320, 3))
        action = agent.act(state, action, reward)
        state, reward, done, _ = env.step(action)
        if done:
            print('episode complete')
            env.reset()
def main():
    """Run JERK on the attached environment."""
    env = grc.RemoteEnv('tmp/sock')
    #env = launch_env('SonicTheHedgehog-Genesis', 'SpringYardZone.Act1')
    env = SonicDiscretizer(env)
    env = RewardScaler(env)
    env = WarpFrame(env)
    env = FrameStack(env, 4)

    print('action space')
    print(env.action_space)
    print('observation space')
    print(env.observation_space)

    agent = JerkAgent(env)
    agent.train()

    return