def state(game): errors = [] states = retro.list_states(game) if not states: return [], [] rom = retro.get_romfile_path(game) path = retro.get_game_path(game) emu = retro.RetroEmulator(rom) for statefile in states: try: with gzip.open(os.path.join(path, statefile + '.state'), 'rb') as fh: state = fh.read() except (IOError, zlib.error): errors.append((game, 'state failed to decode: %s' % statefile)) continue emu.set_state(state) emu.step() del emu gc.collect() return [], errors
def getEnvFns(bk2dir=None): env_fns = [] for game in [ 'SonicTheHedgehog-Genesis', 'SonicTheHedgehog2-Genesis', 'SonicAndKnuckles3-Genesis' ]: for state in retro.list_states(game): env_fns.append(makeEnvFn(game, state, bk2dir=bk2dir)) return env_fns
def list_states(args): if args.game: games = args.game else: games = retro.list_games() games.sort() for game in games: states = retro.list_states(game) print(game + ':') states.sort() for state in states: print(' ' + state)
def scan_missing(): missing = [] for game in retro.list_games(): gamedir = retro.get_game_path(game) if not os.path.isfile(os.path.join(gamedir, 'data.json')): missing.append((game, 'data.json')) if not os.path.isfile(os.path.join(gamedir, 'scenario.json')): missing.append((game, 'scenario.json')) if not os.path.isfile(os.path.join(gamedir, 'metadata.json')): missing.append((game, 'metadata.json')) if not retro.list_states(game): missing.append((game, '*.state')) if not os.path.isfile(os.path.join(gamedir, 'rom.sha')): missing.append((game, 'rom.sha')) return missing
def __init__(self, csv, init=None): #games, states read from csv #self.game_states = self._csv_dict(csv) games = [ 'SonicTheHedgehog-Genesis', 'SonicTheHedgehog2-Genesis', 'SonicAndKnuckles3-Genesis' ] self.all_game_states = {k: retro.list_states(k) for k in games} #58 total states #states yet to be added self.game_states = {} #current states used for training self.available_states = [] for game in games: for state in self.all_game_states[game]: self.available_states.append((game, state)) #start with two env states add_n_games = 44 #FIXME: steps / 10k + 2 for _ in range(add_n_games): self._add_state() if init: self.env = retro_contest.local.make(game=init[0], state=init[1]) self.act_int = False self.action_space = self.env.action_space self.done = False #FIXME: every x steps add an env state #progressively add self.keep_env = False self.switch_interval = 10000 #steps before adding a state self.switch_steps = 0 self.add_interval = 300000 #350000 is ~ 1h at 3 batch/sec self.add_steps = 0 # ~60 hrs to full dataset self.episode_steps = 0 self.episode_len = 200 #limit length of episodes while training self._cur_r = 0.0 self._max_r = 0.0
def verify_default_state(game, raw=None): file = os.path.join(game, 'metadata.json') try: if not raw: with open(retro.get_game_path(file)) as f: metadata = json.load(f) else: metadata = json.loads(raw) except json.JSONDecodeError: return [], [(file, 'fail decode')] except IOError: return [], [] errors = [] state = metadata.get('default_state') if not state: return [], [(file, 'default state missing')] if state not in retro.list_states(game): errors.append((file, 'invalid default state %s' % state)) return [], errors
def main(): start_time = time.time() games = [ "SonicTheHedgehog-Genesis", "SonicTheHedgehog2-Genesis", "SonicAndKnuckles3-Genesis" ] game = np.random.choice(games, 1)[0] state = np.random.choice(retro.list_states(game), 1)[0] #env = retro.make(game, state) env = AllowBacktracking(make(game, state)) #contest version env = SonicDiscretizer(env) #contest version print(game, '-', state) # Parameters timesteps = 4500 memory = deque(maxlen=30000) epsilon = 1 #probability of doing a random move max_random = 1 min_random = 0.1 #minimun randomness #r12 rand_decay = 1e-3 #reduce the randomness by decay/loops gamma = 0.99 #discount for future reward mb_size = 256 #learning minibatch size loops = 45 #loop through the different game levels sub_loops = 100 hold_action = 1 #nb frames during which we hold (4 for normal, 1 for contest) learning_rate = 5e-5 max_reward = 0 min_reward = 10000 #action_threshold = 1 target_step_interval = 10 reward_clip = 200 resize_to = [128, 128] config = tf.ConfigProto() config.gpu_options.allow_growth = True # pylint: disable=E1101 with tf.Session(config=config) as sess: #input observation state, output Q of actions model = Sequential() model.add( Conv2D(32, kernel_size=(8, 8), strides=4, activation="relu", input_shape=(128, 128, 3))) model.add(Conv2D(64, kernel_size=(4, 4), strides=2, activation="relu")) model.add(Conv2D(64, (3, 3), activation="relu")) model.add(Flatten()) model.add(Dense(512, activation="relu")) model.add( Dense(env.action_space.n, kernel_initializer="uniform", activation="linear")) if os.path.isfile("sonic_model.h5"): model.load_weights("sonic_model.h5") model.compile(loss="mse", optimizer=optimizers.Adam(lr=learning_rate), metrics=["accuracy"]) tensorboard = TensorBoard( log_dir= "logs/sonic_modmemdecayrdq18_reshape_64x512mb256_resc_target_interval_{}_memory_30000_lr_{}_decay_{}.{}" .format(target_step_interval, learning_rate, rand_decay, time.time())) tensorboard.set_model(model) train_names = ["Loss", "Accuracy"] # serialize model to JSON model_json = model.to_json() with open("sonic_model.json", "w") as json_file: json_file.write(model_json) # serialize weights to HDF5 model.save_weights("sonic_model.h5") model.save_weights("sonic_target_model.h5") env.close() for training_loop in range(loops): with tf.Session(config=config) as sess: model = model_from_json(model_json) model.load_weights("sonic_model.h5") model.compile(loss="mse", optimizer=optimizers.Adam(lr=learning_rate), metrics=["accuracy"]) target_model = model_from_json(model_json) target_model.load_weights("sonic_target_model.h5") target_model.trainable = False target_model.compile(loss="mse", optimizer=optimizers.Adam(lr=learning_rate), metrics=["accuracy"]) for sub_training_loop in range(sub_loops): loop_start_time = time.time() game = np.random.choice(games, 1)[0] state = np.random.choice(retro.list_states(game), 1)[0] print("Playing", game, "-", state) #env = AllowBacktracking(retro.make(game, state)) env = AllowBacktracking(make(game, state)) #contest version env = SonicDiscretizer(env) #contest version obs = env.reset() #game start obs_resized = resize(obs, resize_to) diff_obs = obs_resized #difference between obs_new and obs to capture velocity done = False total_raw_reward = 0.0 #Observation for t in range(timesteps): #env.render() #display training if np.random.rand() <= epsilon: #pick a random action action = env.action_space.sample() reward_hold = np.zeros(hold_action) for h in range(hold_action): obs_new, reward_hold[h], done, info = env.step( action) # result of action reward = sum(reward_hold) reward_ = min(reward, reward_clip) obs_new_resized = resize(obs_new, resize_to) diff_obs_new = obs_new_resized - obs_resized #Bellman double Q Q = model.predict( diff_obs[np.newaxis, :]) # Q-values predictions Q_ = model.predict(diff_obs_new[np.newaxis, :]) Q_target = target_model.predict( diff_obs_new[np.newaxis, :]) target_ = copy.copy(Q) if done: target_[0, action] = reward_ - reward_clip else: target_[0, action] = reward_ + gamma * Q_target[ 0, :][np.argmax(Q_[0, :])] distance_from_target = mean_squared_error(Q, target_) else: Q = model.predict( diff_obs[np.newaxis, :]) # Q-values predictions action = np.argmax(Q) reward_hold = np.zeros(hold_action) for h in range(hold_action): obs_new, reward_hold[h], done, info = env.step( action) # result of action reward = sum(reward_hold) reward_ = min(reward, reward_clip) obs_new_resized = resize(obs_new, resize_to) diff_obs_new = obs_new_resized - obs_resized #Bellman double Q Q_ = model.predict(diff_obs_new[np.newaxis, :]) Q_target = target_model.predict( diff_obs_new[np.newaxis, :]) target_ = copy.copy(Q) if done: target_[0, action] = reward_ - reward_clip else: target_[0, action] = reward_ + gamma * Q_target[ 0, :][np.argmax(Q_[0, :])] distance_from_target = mean_squared_error(Q, target_) #print("distance from target",distance_from_target) total_raw_reward += reward #memory.append((diff_obs, action, reward, diff_obs_new, done)) max_reward = max(reward, max_reward) min_reward = min(reward, min_reward) if distance_from_target > 25: memory.append( (diff_obs, action, reward, diff_obs_new, done)) elif done: memory.append( (diff_obs, action, reward, diff_obs_new, done)) # save obs state obs_resized = obs_new_resized diff_obs = diff_obs_new if done: obs = env.reset() #restart game if done obs_resized = resize(obs, resize_to) diff_obs = obs_resized #difference between obs_new and obs to capture velocity epsilon = min_random + (max_random - min_random) * np.exp( -rand_decay * (training_loop * sub_loops + sub_training_loop + 1)) print("Total reward: {}".format(round(total_raw_reward))) print("Observation Finished", sub_training_loop + 1, "x", training_loop + 1, "out of", sub_loops, "x", loops) # Learning if len(memory) >= mb_size: minibatch_train_start_time = time.time() #sample memory minibatch = random.sample(memory, mb_size) inputs_shape = (mb_size, ) + obs_resized.shape inputs = np.zeros(inputs_shape) targets = np.zeros((mb_size, env.action_space.n)) #double Q: fix the target for a time to stabilise the model if ( sub_training_loop + 1 ) % target_step_interval == 0: # and training_loop*sub_loops + sub_training_loop+1 >= 100: #r12 chase score first #with tf.device('/cpu:0'): # serialize weights to HDF5 #model.save_weights("sonic_model.h5") model.save_weights("sonic_target_model.h5") if training_loop * sub_loops + sub_training_loop + 1 == 500: model.save_weights("sonic_model_500.h5") elif training_loop * sub_loops + sub_training_loop + 1 == 1000: model.save_weights("sonic_model_1000.h5") elif training_loop * sub_loops + sub_training_loop + 1 == 1500: model.save_weights("sonic_model_1500.h5") elif training_loop * sub_loops + sub_training_loop + 1 == 2000: model.save_weights("sonic_model_2000.h5") elif training_loop * sub_loops + sub_training_loop + 1 == 2500: model.save_weights("sonic_model_2500.h5") elif training_loop * sub_loops + sub_training_loop + 1 == 3000: model.save_weights("sonic_model_3000.h5") elif training_loop * sub_loops + sub_training_loop + 1 == 3500: model.save_weights("sonic_model_3500.h5") elif training_loop * sub_loops + sub_training_loop + 1 == 4000: model.save_weights("sonic_model_4000.h5") elif training_loop * sub_loops + sub_training_loop + 1 == 4500: model.save_weights("sonic_model_4500.h5") # create model from json and do inference on cpu target_model = model_from_json(model_json) target_model.load_weights("sonic_target_model.h5") target_model.trainable = False target_model.compile( loss="mse", optimizer=optimizers.Adam(lr=learning_rate), metrics=["accuracy"]) for i in range(0, mb_size): diff_obs = minibatch[i][0] action = minibatch[i][1] reward = minibatch[i][2] diff_obs_new = minibatch[i][3] done = minibatch[i][4] #reward clipping reward = min(reward, reward_clip) #Bellman double Q inputs[i] = diff_obs[np.newaxis, :] Q = model.predict(diff_obs[np.newaxis, :]) Q_ = model.predict(diff_obs_new[np.newaxis, :]) Q_target = target_model.predict( diff_obs_new[np.newaxis, :]) targets[i] = copy.copy(Q) if done: targets[i, action] = reward - reward_clip else: targets[i, action] = reward + gamma * Q_target[ 0, :][np.argmax(Q_[0, :])] #train network on constructed inputs,targets logs = model.train_on_batch(inputs, targets) write_log(tensorboard, train_names, logs, training_loop * sub_loops + sub_training_loop) model.save_weights("sonic_model.h5") print( "Model minibatch training lasted:", str( timedelta(seconds=time.time() - minibatch_train_start_time)), "dd:hh:mm:ss") print("Learning Finished", sub_training_loop + 1, "x", training_loop + 1, "out of", sub_loops, "x", loops) env.close() print("Loop lasted:", str(timedelta(seconds=time.time() - loop_start_time)), "dd:hh:mm:ss") print("Training lasted:", str(timedelta(seconds=time.time() - start_time)), "dd:hh:mm:ss") print("Rewards between", min_reward, "and", max_reward) print("Pourcentage of random movements set to", epsilon * 100, "%\n")
def main(): parser = argparse.ArgumentParser() parser.add_argument('--game', help='retro game to use') parser.add_argument('--state', help='retro state to start from') parser.add_argument('--scenario', help='scenario to use', default='scenario') args = parser.parse_args() if args.game is None: print('Please specify a game with --game <game>') print('Available games:') for game in sorted(retro.list_games()): print(game) sys.exit(1) if args.state is None: print('Please specify a state with --state <state>') print('Available states:') for state in sorted(retro.list_states(args.game)): print(state) sys.exit(1) env = retro.make(game=args.game, state=args.state, use_restricted_actions=retro.ACTIONS_ALL, scenario=args.scenario) obs = env.reset() screen_height, screen_width = obs.shape[:2] random.seed(0) key_handler = pyglet.window.key.KeyStateHandler() win_width = 2000 win_height = win_width * screen_height // screen_width win = pyglet.window.Window(width=win_width, height=win_height, vsync=False) if hasattr(win.context, '_nscontext'): pixel_scale = win.context._nscontext.view().backingScaleFactor() win.width = win.width // pixel_scale win.height = win.height // pixel_scale joysticks = pyglet.input.get_joysticks() if len(joysticks) > 0: joystick = joysticks[0] joystick.open() else: joystick = None win.push_handlers(key_handler) key_previous_states = {} button_previous_states = {} steps = 0 recorded_actions = [] recorded_states = [] pyglet.app.platform_event_loop.start() fps_display = pyglet.clock.ClockDisplay() clock.set_fps_limit(60) glEnable(GL_TEXTURE_2D) texture_id = GLuint(0) glGenTextures(1, ctypes.byref(texture_id)) glBindTexture(GL_TEXTURE_2D, texture_id) glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP) glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP) glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST) glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST) glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, screen_width, screen_height, 0, GL_RGB, GL_UNSIGNED_BYTE, None) while not win.has_exit: win.dispatch_events() win.clear() keys_clicked = set() keys_pressed = set() for key_code, pressed in key_handler.items(): if pressed: keys_pressed.add(key_code) if not key_previous_states.get(key_code, False) and pressed: keys_clicked.add(key_code) key_previous_states[key_code] = pressed buttons_clicked = set() buttons_pressed = set() if joystick is not None: for button_code, pressed in enumerate(joystick.buttons): if pressed: buttons_pressed.add(button_code) if not button_previous_states.get(button_code, False) and pressed: buttons_clicked.add(button_code) button_previous_states[button_code] = pressed if keycodes.R in keys_clicked or buttoncodes.LEFT_BUMPER in buttons_clicked: if len(recorded_states) > 1: recorded_states.pop() steps, save_state = recorded_states.pop() recorded_states = recorded_states[:steps] recorded_actions = recorded_actions[:steps] env.em.set_state(save_state) if keycodes.ESCAPE in keys_pressed or buttoncodes.XBOX in buttons_clicked: # record all the actions so far to a bk2 and exit i = 0 while True: movie_filename = 'human/%s/%s/%s-%s-%04d.bk2' % ( args.game, args.scenario, args.game, args.state, i) if not os.path.exists(movie_filename): break i += 1 os.makedirs(os.path.dirname(movie_filename), exist_ok=True) env.record_movie(movie_filename) env.reset() for step, act in enumerate(recorded_actions): if step % 1000 == 0: print('saving %d/%d' % (step, len(recorded_actions))) env.step(act) env.stop_record() print('complete') sys.exit(1) inputs = { 'A': keycodes.Z in keys_pressed or buttoncodes.A in buttons_pressed, 'B': keycodes.X in keys_pressed or buttoncodes.B in buttons_pressed, 'C': keycodes.C in keys_pressed, 'X': keycodes.A in keys_pressed or buttoncodes.X in buttons_pressed, 'Y': keycodes.S in keys_pressed or buttoncodes.Y in buttons_pressed, 'Z': keycodes.D in keys_pressed, 'UP': keycodes.UP in keys_pressed or buttoncodes.D_UP in buttons_pressed, 'DOWN': keycodes.DOWN in keys_pressed or buttoncodes.D_DOWN in buttons_pressed, 'LEFT': keycodes.LEFT in keys_pressed or buttoncodes.D_LEFT in buttons_pressed, 'RIGHT': keycodes.RIGHT in keys_pressed or buttoncodes.D_RIGHT in buttons_pressed, 'MODE': keycodes.TAB in keys_pressed or buttoncodes.SELECT in buttons_pressed, 'START': keycodes.ENTER in keys_pressed or buttoncodes.START in buttons_pressed, } action = [inputs[b] for b in env.BUTTONS] if steps % SAVE_PERIOD == 0: recorded_states.append((steps, env.em.get_state())) obs, rew, done, info = env.step(action) recorded_actions.append(action) steps += 1 glBindTexture(GL_TEXTURE_2D, texture_id) video_buffer = ctypes.cast(obs.tobytes(), ctypes.POINTER(ctypes.c_short)) glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, obs.shape[1], obs.shape[0], GL_RGB, GL_UNSIGNED_BYTE, video_buffer) x = 0 y = 0 h = win.height w = win.width pyglet.graphics.draw( 4, pyglet.gl.GL_QUADS, ('v2f', [x, y, x + w, y, x + w, y + h, x, y + h]), ('t2f', [0, 1, 1, 1, 1, 0, 0, 0]), ) fps_display.draw() win.flip() # process joystick events timeout = clock.get_sleep_time(False) pyglet.app.platform_event_loop.step(timeout) clock.tick() pyglet.app.platform_event_loop.stop()
""" Environments and wrappers for Sonic training. """ import gym import numpy as np import retro from atari_wrappers import WarpFrame, FrameStack count = 0 envs = [] for game in retro.list_games(): if "Sonic" in game: for state in retro.list_states(game): envs.append((game, state)) count += 1 def make_envs(stack=True, scale_rew=True, backtracking=True, num=count): return [ make_env(stack=stack, scale_rew=scale_rew, backtracking=backtracking, i=i) for i in range(num) ] def make_env(stack=True, scale_rew=True, backtracking=True, i=0): """ Create an environment with some standard wrappers.
def gen_vae_data(game, samples_per_stage=10000, threads=2, size=(320,320), **kwargs): training_data_dir = '{}/PycharmProjects/jjabrams_rl/data/training_data/'.format(expanduser("~")) if not kwargs.get('data_dir') else kwargs.get('data_dir') p = multiprocessing.Pool(threads) gen_stage_data_args = [(game, stage, samples_per_stage, training_data_dir, size) for stage in retro.list_states(game)] data_added = sum(p.map(gen_stage_data, gen_stage_data_args)) return data_added
import retro for game in retro.list_games(): is_installed = True try: retro.make(game).close() except FileNotFoundError: is_installed = False if is_installed: name = 'retro.' + game def make_retro_make(id_:str): def make(state, *args, **kwargs): return retro.make(id_, *args, state=state, **kwargs) return make all_environments[name] = Maker( name=name, make=make_retro_make(game), states=retro.list_states(game), ) except ImportError: pass # User-defined environments objects = read_objects() envs = objects['environment'] modules = import_from_objects(envs) for name, module in modules.items(): all_environments[name] = module