Exemple #1
0
def state(game):
    errors = []
    states = retro.list_states(game)
    if not states:
        return [], []

    rom = retro.get_romfile_path(game)
    path = retro.get_game_path(game)
    emu = retro.RetroEmulator(rom)
    for statefile in states:
        try:
            with gzip.open(os.path.join(path, statefile + '.state'),
                           'rb') as fh:
                state = fh.read()
        except (IOError, zlib.error):
            errors.append((game, 'state failed to decode: %s' % statefile))
            continue

        emu.set_state(state)
        emu.step()

    del emu
    gc.collect()

    return [], errors
Exemple #2
0
def getEnvFns(bk2dir=None):
    env_fns = []
    for game in [
            'SonicTheHedgehog-Genesis', 'SonicTheHedgehog2-Genesis',
            'SonicAndKnuckles3-Genesis'
    ]:
        for state in retro.list_states(game):
            env_fns.append(makeEnvFn(game, state, bk2dir=bk2dir))
    return env_fns
def list_states(args):
    if args.game:
        games = args.game
    else:
        games = retro.list_games()
        games.sort()
    for game in games:
        states = retro.list_states(game)
        print(game + ':')
        states.sort()
        for state in states:
            print('  ' + state)
def scan_missing():
    missing = []
    for game in retro.list_games():
        gamedir = retro.get_game_path(game)
        if not os.path.isfile(os.path.join(gamedir, 'data.json')):
            missing.append((game, 'data.json'))
        if not os.path.isfile(os.path.join(gamedir, 'scenario.json')):
            missing.append((game, 'scenario.json'))
        if not os.path.isfile(os.path.join(gamedir, 'metadata.json')):
            missing.append((game, 'metadata.json'))
        if not retro.list_states(game):
            missing.append((game, '*.state'))
        if not os.path.isfile(os.path.join(gamedir, 'rom.sha')):
            missing.append((game, 'rom.sha'))
    return missing
Exemple #5
0
    def __init__(self, csv, init=None):
        #games, states read from csv
        #self.game_states = self._csv_dict(csv)

        games = [
            'SonicTheHedgehog-Genesis', 'SonicTheHedgehog2-Genesis',
            'SonicAndKnuckles3-Genesis'
        ]
        self.all_game_states = {k: retro.list_states(k) for k in games}
        #58 total states

        #states yet to be added
        self.game_states = {}  #current states used for training
        self.available_states = []
        for game in games:
            for state in self.all_game_states[game]:
                self.available_states.append((game, state))

        #start with two env states
        add_n_games = 44  #FIXME: steps / 10k + 2
        for _ in range(add_n_games):
            self._add_state()

        if init:
            self.env = retro_contest.local.make(game=init[0], state=init[1])
        self.act_int = False
        self.action_space = self.env.action_space
        self.done = False

        #FIXME: every x steps add an env state
        #progressively add
        self.keep_env = False
        self.switch_interval = 10000  #steps before adding a state
        self.switch_steps = 0
        self.add_interval = 300000  #350000 is ~ 1h at 3 batch/sec
        self.add_steps = 0
        # ~60 hrs to full dataset

        self.episode_steps = 0
        self.episode_len = 200  #limit length of episodes while training

        self._cur_r = 0.0
        self._max_r = 0.0
def verify_default_state(game, raw=None):
    file = os.path.join(game, 'metadata.json')
    try:
        if not raw:
            with open(retro.get_game_path(file)) as f:
                metadata = json.load(f)
        else:
            metadata = json.loads(raw)
    except json.JSONDecodeError:
        return [], [(file, 'fail decode')]
    except IOError:
        return [], []

    errors = []
    state = metadata.get('default_state')
    if not state:
        return [], [(file, 'default state missing')]
    if state not in retro.list_states(game):
        errors.append((file, 'invalid default state %s' % state))

    return [], errors
Exemple #7
0
def main():

    start_time = time.time()

    games = [
        "SonicTheHedgehog-Genesis", "SonicTheHedgehog2-Genesis",
        "SonicAndKnuckles3-Genesis"
    ]
    game = np.random.choice(games, 1)[0]
    state = np.random.choice(retro.list_states(game), 1)[0]
    #env = retro.make(game, state)
    env = AllowBacktracking(make(game, state))  #contest version
    env = SonicDiscretizer(env)  #contest version
    print(game, '-', state)

    # Parameters
    timesteps = 4500
    memory = deque(maxlen=30000)
    epsilon = 1  #probability of doing a random move
    max_random = 1
    min_random = 0.1  #minimun randomness #r12
    rand_decay = 1e-3  #reduce the randomness by decay/loops
    gamma = 0.99  #discount for future reward
    mb_size = 256  #learning minibatch size
    loops = 45  #loop through the different game levels
    sub_loops = 100
    hold_action = 1  #nb frames during which we hold (4 for normal, 1 for contest)
    learning_rate = 5e-5
    max_reward = 0
    min_reward = 10000
    #action_threshold = 1
    target_step_interval = 10
    reward_clip = 200
    resize_to = [128, 128]

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True  # pylint: disable=E1101
    with tf.Session(config=config) as sess:
        #input observation state, output Q of actions
        model = Sequential()
        model.add(
            Conv2D(32,
                   kernel_size=(8, 8),
                   strides=4,
                   activation="relu",
                   input_shape=(128, 128, 3)))
        model.add(Conv2D(64, kernel_size=(4, 4), strides=2, activation="relu"))
        model.add(Conv2D(64, (3, 3), activation="relu"))
        model.add(Flatten())
        model.add(Dense(512, activation="relu"))
        model.add(
            Dense(env.action_space.n,
                  kernel_initializer="uniform",
                  activation="linear"))

        if os.path.isfile("sonic_model.h5"):
            model.load_weights("sonic_model.h5")

        model.compile(loss="mse",
                      optimizer=optimizers.Adam(lr=learning_rate),
                      metrics=["accuracy"])

        tensorboard = TensorBoard(
            log_dir=
            "logs/sonic_modmemdecayrdq18_reshape_64x512mb256_resc_target_interval_{}_memory_30000_lr_{}_decay_{}.{}"
            .format(target_step_interval, learning_rate, rand_decay,
                    time.time()))
        tensorboard.set_model(model)
        train_names = ["Loss", "Accuracy"]

        # serialize model to JSON
        model_json = model.to_json()
        with open("sonic_model.json", "w") as json_file:
            json_file.write(model_json)
        # serialize weights to HDF5
        model.save_weights("sonic_model.h5")
        model.save_weights("sonic_target_model.h5")

        env.close()

    for training_loop in range(loops):
        with tf.Session(config=config) as sess:
            model = model_from_json(model_json)
            model.load_weights("sonic_model.h5")
            model.compile(loss="mse",
                          optimizer=optimizers.Adam(lr=learning_rate),
                          metrics=["accuracy"])

            target_model = model_from_json(model_json)
            target_model.load_weights("sonic_target_model.h5")
            target_model.trainable = False
            target_model.compile(loss="mse",
                                 optimizer=optimizers.Adam(lr=learning_rate),
                                 metrics=["accuracy"])

            for sub_training_loop in range(sub_loops):
                loop_start_time = time.time()
                game = np.random.choice(games, 1)[0]
                state = np.random.choice(retro.list_states(game), 1)[0]
                print("Playing", game, "-", state)
                #env = AllowBacktracking(retro.make(game, state))
                env = AllowBacktracking(make(game, state))  #contest version
                env = SonicDiscretizer(env)  #contest version
                obs = env.reset()  #game start
                obs_resized = resize(obs, resize_to)
                diff_obs = obs_resized  #difference between obs_new and obs to capture velocity

                done = False
                total_raw_reward = 0.0

                #Observation
                for t in range(timesteps):
                    #env.render() #display training
                    if np.random.rand() <= epsilon:
                        #pick a random action
                        action = env.action_space.sample()

                        reward_hold = np.zeros(hold_action)
                        for h in range(hold_action):
                            obs_new, reward_hold[h], done, info = env.step(
                                action)  # result of action
                        reward = sum(reward_hold)
                        reward_ = min(reward, reward_clip)

                        obs_new_resized = resize(obs_new, resize_to)
                        diff_obs_new = obs_new_resized - obs_resized

                        #Bellman double Q
                        Q = model.predict(
                            diff_obs[np.newaxis, :])  # Q-values predictions

                        Q_ = model.predict(diff_obs_new[np.newaxis, :])
                        Q_target = target_model.predict(
                            diff_obs_new[np.newaxis, :])

                        target_ = copy.copy(Q)

                        if done:
                            target_[0, action] = reward_ - reward_clip
                        else:
                            target_[0, action] = reward_ + gamma * Q_target[
                                0, :][np.argmax(Q_[0, :])]

                        distance_from_target = mean_squared_error(Q, target_)
                    else:
                        Q = model.predict(
                            diff_obs[np.newaxis, :])  # Q-values predictions

                        action = np.argmax(Q)

                        reward_hold = np.zeros(hold_action)
                        for h in range(hold_action):
                            obs_new, reward_hold[h], done, info = env.step(
                                action)  # result of action
                        reward = sum(reward_hold)
                        reward_ = min(reward, reward_clip)

                        obs_new_resized = resize(obs_new, resize_to)
                        diff_obs_new = obs_new_resized - obs_resized

                        #Bellman double Q
                        Q_ = model.predict(diff_obs_new[np.newaxis, :])
                        Q_target = target_model.predict(
                            diff_obs_new[np.newaxis, :])

                        target_ = copy.copy(Q)

                        if done:
                            target_[0, action] = reward_ - reward_clip
                        else:
                            target_[0, action] = reward_ + gamma * Q_target[
                                0, :][np.argmax(Q_[0, :])]

                        distance_from_target = mean_squared_error(Q, target_)
                        #print("distance from target",distance_from_target)

                    total_raw_reward += reward

                    #memory.append((diff_obs, action, reward, diff_obs_new, done))
                    max_reward = max(reward, max_reward)
                    min_reward = min(reward, min_reward)

                    if distance_from_target > 25:
                        memory.append(
                            (diff_obs, action, reward, diff_obs_new, done))
                    elif done:
                        memory.append(
                            (diff_obs, action, reward, diff_obs_new, done))

                    # save obs state
                    obs_resized = obs_new_resized
                    diff_obs = diff_obs_new

                    if done:
                        obs = env.reset()  #restart game if done
                        obs_resized = resize(obs, resize_to)
                        diff_obs = obs_resized  #difference between obs_new and obs to capture velocity

                epsilon = min_random + (max_random - min_random) * np.exp(
                    -rand_decay *
                    (training_loop * sub_loops + sub_training_loop + 1))
                print("Total reward: {}".format(round(total_raw_reward)))
                print("Observation Finished", sub_training_loop + 1, "x",
                      training_loop + 1, "out of", sub_loops, "x", loops)

                # Learning
                if len(memory) >= mb_size:
                    minibatch_train_start_time = time.time()

                    #sample memory
                    minibatch = random.sample(memory, mb_size)

                    inputs_shape = (mb_size, ) + obs_resized.shape
                    inputs = np.zeros(inputs_shape)
                    targets = np.zeros((mb_size, env.action_space.n))

                    #double Q: fix the target for a time to stabilise the model
                    if (
                            sub_training_loop + 1
                    ) % target_step_interval == 0:  # and training_loop*sub_loops + sub_training_loop+1 >= 100: #r12 chase score first
                        #with tf.device('/cpu:0'):
                        # serialize weights to HDF5
                        #model.save_weights("sonic_model.h5")
                        model.save_weights("sonic_target_model.h5")
                        if training_loop * sub_loops + sub_training_loop + 1 == 500:
                            model.save_weights("sonic_model_500.h5")
                        elif training_loop * sub_loops + sub_training_loop + 1 == 1000:
                            model.save_weights("sonic_model_1000.h5")
                        elif training_loop * sub_loops + sub_training_loop + 1 == 1500:
                            model.save_weights("sonic_model_1500.h5")
                        elif training_loop * sub_loops + sub_training_loop + 1 == 2000:
                            model.save_weights("sonic_model_2000.h5")
                        elif training_loop * sub_loops + sub_training_loop + 1 == 2500:
                            model.save_weights("sonic_model_2500.h5")
                        elif training_loop * sub_loops + sub_training_loop + 1 == 3000:
                            model.save_weights("sonic_model_3000.h5")
                        elif training_loop * sub_loops + sub_training_loop + 1 == 3500:
                            model.save_weights("sonic_model_3500.h5")
                        elif training_loop * sub_loops + sub_training_loop + 1 == 4000:
                            model.save_weights("sonic_model_4000.h5")
                        elif training_loop * sub_loops + sub_training_loop + 1 == 4500:
                            model.save_weights("sonic_model_4500.h5")
                        # create model from json and do inference on cpu
                        target_model = model_from_json(model_json)
                        target_model.load_weights("sonic_target_model.h5")
                        target_model.trainable = False
                        target_model.compile(
                            loss="mse",
                            optimizer=optimizers.Adam(lr=learning_rate),
                            metrics=["accuracy"])

                    for i in range(0, mb_size):
                        diff_obs = minibatch[i][0]
                        action = minibatch[i][1]
                        reward = minibatch[i][2]
                        diff_obs_new = minibatch[i][3]
                        done = minibatch[i][4]

                        #reward clipping
                        reward = min(reward, reward_clip)

                        #Bellman double Q
                        inputs[i] = diff_obs[np.newaxis, :]
                        Q = model.predict(diff_obs[np.newaxis, :])
                        Q_ = model.predict(diff_obs_new[np.newaxis, :])
                        Q_target = target_model.predict(
                            diff_obs_new[np.newaxis, :])

                        targets[i] = copy.copy(Q)

                        if done:
                            targets[i, action] = reward - reward_clip
                        else:
                            targets[i, action] = reward + gamma * Q_target[
                                0, :][np.argmax(Q_[0, :])]

                    #train network on constructed inputs,targets
                    logs = model.train_on_batch(inputs, targets)
                    write_log(tensorboard, train_names, logs,
                              training_loop * sub_loops + sub_training_loop)

                    model.save_weights("sonic_model.h5")

                    print(
                        "Model minibatch training lasted:",
                        str(
                            timedelta(seconds=time.time() -
                                      minibatch_train_start_time)),
                        "dd:hh:mm:ss")
                    print("Learning Finished", sub_training_loop + 1, "x",
                          training_loop + 1, "out of", sub_loops, "x", loops)

                env.close()

                print("Loop lasted:",
                      str(timedelta(seconds=time.time() - loop_start_time)),
                      "dd:hh:mm:ss")
                print("Training lasted:",
                      str(timedelta(seconds=time.time() - start_time)),
                      "dd:hh:mm:ss")
                print("Rewards between", min_reward, "and", max_reward)
                print("Pourcentage of random movements set to", epsilon * 100,
                      "%\n")
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--game', help='retro game to use')
    parser.add_argument('--state', help='retro state to start from')
    parser.add_argument('--scenario',
                        help='scenario to use',
                        default='scenario')
    args = parser.parse_args()

    if args.game is None:
        print('Please specify a game with --game <game>')
        print('Available games:')
        for game in sorted(retro.list_games()):
            print(game)
        sys.exit(1)

    if args.state is None:
        print('Please specify a state with --state <state>')
        print('Available states:')
        for state in sorted(retro.list_states(args.game)):
            print(state)
        sys.exit(1)

    env = retro.make(game=args.game,
                     state=args.state,
                     use_restricted_actions=retro.ACTIONS_ALL,
                     scenario=args.scenario)
    obs = env.reset()
    screen_height, screen_width = obs.shape[:2]

    random.seed(0)

    key_handler = pyglet.window.key.KeyStateHandler()
    win_width = 2000
    win_height = win_width * screen_height // screen_width
    win = pyglet.window.Window(width=win_width, height=win_height, vsync=False)

    if hasattr(win.context, '_nscontext'):
        pixel_scale = win.context._nscontext.view().backingScaleFactor()

    win.width = win.width // pixel_scale
    win.height = win.height // pixel_scale

    joysticks = pyglet.input.get_joysticks()
    if len(joysticks) > 0:
        joystick = joysticks[0]
        joystick.open()
    else:
        joystick = None

    win.push_handlers(key_handler)

    key_previous_states = {}
    button_previous_states = {}

    steps = 0
    recorded_actions = []
    recorded_states = []

    pyglet.app.platform_event_loop.start()

    fps_display = pyglet.clock.ClockDisplay()
    clock.set_fps_limit(60)

    glEnable(GL_TEXTURE_2D)
    texture_id = GLuint(0)
    glGenTextures(1, ctypes.byref(texture_id))
    glBindTexture(GL_TEXTURE_2D, texture_id)
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP)
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP)
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST)
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST)
    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, screen_width, screen_height, 0,
                 GL_RGB, GL_UNSIGNED_BYTE, None)

    while not win.has_exit:
        win.dispatch_events()

        win.clear()

        keys_clicked = set()
        keys_pressed = set()
        for key_code, pressed in key_handler.items():
            if pressed:
                keys_pressed.add(key_code)

            if not key_previous_states.get(key_code, False) and pressed:
                keys_clicked.add(key_code)
            key_previous_states[key_code] = pressed

        buttons_clicked = set()
        buttons_pressed = set()
        if joystick is not None:
            for button_code, pressed in enumerate(joystick.buttons):
                if pressed:
                    buttons_pressed.add(button_code)

                if not button_previous_states.get(button_code,
                                                  False) and pressed:
                    buttons_clicked.add(button_code)
                button_previous_states[button_code] = pressed

        if keycodes.R in keys_clicked or buttoncodes.LEFT_BUMPER in buttons_clicked:
            if len(recorded_states) > 1:
                recorded_states.pop()
                steps, save_state = recorded_states.pop()
                recorded_states = recorded_states[:steps]
                recorded_actions = recorded_actions[:steps]
                env.em.set_state(save_state)

        if keycodes.ESCAPE in keys_pressed or buttoncodes.XBOX in buttons_clicked:
            # record all the actions so far to a bk2 and exit
            i = 0
            while True:
                movie_filename = 'human/%s/%s/%s-%s-%04d.bk2' % (
                    args.game, args.scenario, args.game, args.state, i)
                if not os.path.exists(movie_filename):
                    break
                i += 1
            os.makedirs(os.path.dirname(movie_filename), exist_ok=True)
            env.record_movie(movie_filename)
            env.reset()
            for step, act in enumerate(recorded_actions):
                if step % 1000 == 0:
                    print('saving %d/%d' % (step, len(recorded_actions)))
                env.step(act)
            env.stop_record()
            print('complete')
            sys.exit(1)

        inputs = {
            'A':
            keycodes.Z in keys_pressed or buttoncodes.A in buttons_pressed,
            'B':
            keycodes.X in keys_pressed or buttoncodes.B in buttons_pressed,
            'C':
            keycodes.C in keys_pressed,
            'X':
            keycodes.A in keys_pressed or buttoncodes.X in buttons_pressed,
            'Y':
            keycodes.S in keys_pressed or buttoncodes.Y in buttons_pressed,
            'Z':
            keycodes.D in keys_pressed,
            'UP':
            keycodes.UP in keys_pressed or buttoncodes.D_UP in buttons_pressed,
            'DOWN':
            keycodes.DOWN in keys_pressed
            or buttoncodes.D_DOWN in buttons_pressed,
            'LEFT':
            keycodes.LEFT in keys_pressed
            or buttoncodes.D_LEFT in buttons_pressed,
            'RIGHT':
            keycodes.RIGHT in keys_pressed
            or buttoncodes.D_RIGHT in buttons_pressed,
            'MODE':
            keycodes.TAB in keys_pressed
            or buttoncodes.SELECT in buttons_pressed,
            'START':
            keycodes.ENTER in keys_pressed
            or buttoncodes.START in buttons_pressed,
        }
        action = [inputs[b] for b in env.BUTTONS]

        if steps % SAVE_PERIOD == 0:
            recorded_states.append((steps, env.em.get_state()))
        obs, rew, done, info = env.step(action)
        recorded_actions.append(action)
        steps += 1

        glBindTexture(GL_TEXTURE_2D, texture_id)
        video_buffer = ctypes.cast(obs.tobytes(),
                                   ctypes.POINTER(ctypes.c_short))
        glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, obs.shape[1], obs.shape[0],
                        GL_RGB, GL_UNSIGNED_BYTE, video_buffer)

        x = 0
        y = 0
        h = win.height
        w = win.width

        pyglet.graphics.draw(
            4,
            pyglet.gl.GL_QUADS,
            ('v2f', [x, y, x + w, y, x + w, y + h, x, y + h]),
            ('t2f', [0, 1, 1, 1, 1, 0, 0, 0]),
        )

        fps_display.draw()

        win.flip()

        # process joystick events
        timeout = clock.get_sleep_time(False)
        pyglet.app.platform_event_loop.step(timeout)

        clock.tick()

    pyglet.app.platform_event_loop.stop()
Exemple #9
0
"""
Environments and wrappers for Sonic training.
"""

import gym
import numpy as np
import retro

from atari_wrappers import WarpFrame, FrameStack

count = 0
envs = []
for game in retro.list_games():
    if "Sonic" in game:
        for state in retro.list_states(game):
            envs.append((game, state))
            count += 1


def make_envs(stack=True, scale_rew=True, backtracking=True, num=count):
    return [
        make_env(stack=stack,
                 scale_rew=scale_rew,
                 backtracking=backtracking,
                 i=i) for i in range(num)
    ]


def make_env(stack=True, scale_rew=True, backtracking=True, i=0):
    """
    Create an environment with some standard wrappers.
Exemple #10
0
def gen_vae_data(game, samples_per_stage=10000, threads=2, size=(320,320), **kwargs):
    training_data_dir = '{}/PycharmProjects/jjabrams_rl/data/training_data/'.format(expanduser("~")) if not kwargs.get('data_dir') else kwargs.get('data_dir')
    p = multiprocessing.Pool(threads)
    gen_stage_data_args = [(game, stage, samples_per_stage, training_data_dir, size) for stage in retro.list_states(game)]
    data_added = sum(p.map(gen_stage_data, gen_stage_data_args))
    return data_added
Exemple #11
0
    import retro

    for game in retro.list_games():
        is_installed = True
        try:
            retro.make(game).close()
        except FileNotFoundError:
            is_installed = False
        if is_installed:
            name = 'retro.' + game

            def make_retro_make(id_:str):
                def make(state, *args, **kwargs):
                    return retro.make(id_, *args, state=state, **kwargs)
                return make

            all_environments[name] = Maker(
                name=name,
                make=make_retro_make(game),
                states=retro.list_states(game),
            )
except ImportError:
    pass

# User-defined environments
objects = read_objects()
envs = objects['environment']

modules = import_from_objects(envs)
for name, module in modules.items():
    all_environments[name] = module