def __init__(self, env, model, nsteps=5, gamma=0.99): super().__init__(env=env, model=model, nsteps=nsteps) self.gamma = gamma self.batch_action_shape = [ x if x is not None else -1 for x in model.train_model.action.shape.as_list() ] self.ob_dtype = model.train_model.X.dtype.as_numpy_dtype if USE_IMMITATION_ENV: print("a2c runner") self.featurizer = TDCFeaturizer(92, 92, 84, 84, feature_vector_size=1024, learning_rate=0, experiment_name='PE') self.featurizer.load("zelda") video_dataset = generate_dataset('zelda', framerate=60, width=84, height=84)[0] self.featurized_dataset = self.featurizer.featurize(video_dataset) self.checkpoint_indexes = None #[0] * nenvs self.rewards = 0 self.counter = 0
def __init__(self, *args, **kwargs): print("MontezumaImmitationEnv") print(args) print(kwargs) featurizer = TDCFeaturizer(92, 92, 84, 84, feature_vector_size=1024, learning_rate=0, experiment_name='default') featurizer.load() video_dataset = generate_dataset('default', framerate=30 / 15, width=84, height=84)[0] featurized_dataset = featurizer.featurize(video_dataset) self._env = ImmitationWrapper( gym.make('MontezumaRevengeNoFrameskip-v4'), featurizer=featurizer, featurized_dataset=featurized_dataset) self.observation_space = self._env.unwrapped.observation_space self.action_space = self._env.unwrapped.action_space self.np_random = self._env.unwrapped.np_random self.ale = self._env.unwrapped.ale
def __init__(self, env_fns, spaces=None): """ envs: list of gym environments to run in subprocesses """ self.waiting = False self.closed = False nenvs = len(env_fns) self.nenvs = nenvs self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)]) self.ps = [Process(target=worker, args=(work_remote, remote, CloudpickleWrapper(env_fn))) for (work_remote, remote, env_fn) in zip(self.work_remotes, self.remotes, env_fns)] for p in self.ps: p.daemon = True # if the main process crashes, we should not cause things to hang p.start() for remote in self.work_remotes: remote.close() self.remotes[0].send(('get_spaces', None)) observation_space, action_space = self.remotes[0].recv() self.viewer = None VecEnv.__init__(self, len(env_fns), observation_space, action_space) if USE_IMMITATION_ENV: self.featurizer = TDCFeaturizer(92, 92, 84, 84, feature_vector_size=1024, learning_rate=0, experiment_name='default') self.featurizer.load() video_dataset = generate_dataset('default', framerate=30/15, width=84, height=84)[0] self.featurized_dataset = self.featurizer.featurize(video_dataset) self.checkpoint_indexes = [0] * nenvs self.rewards = 0 self.counter = 0
def visualize_features(videos_path, featurizer_type, initial_width, initial_height, desired_width, desired_height, costar): if featurizer_type == 'vae': featurizer_class = VAEFeaturizer elif featurizer_type == 'tdc': featurizer_class = TDCFeaturizer elif featurizer_type == 'forward_model': featurizer_class = ForwardModelFeaturizer else: raise TypeError featurizer = featurizer_class(initial_width, initial_height, desired_width, desired_height, costar, feature_vector_size=1024, learning_rate=0.0001, experiment_name='default') d = generate_dataset(videos_path, 10, initial_width, initial_height, costar) features1 = featurizer.featurize(d[0]) features2 = featurizer.featurize(d[1]) #features3 = featurizer.featurize(d[2]) features_all = [features1, features2] #, features3] visualize_embeddings(features_all)
def __init__(self, env_fns, spaces=None, level_selector=None, experiment=None, dataset=None): """ Arguments: env_fns: iterable of callables functions that build environments """ self.envs = [fn() for fn in env_fns] self.num_envs = len(env_fns) env = self.envs[0] VecEnv.__init__(self, len(env_fns), env.observation_space, env.action_space) obs_space = env.observation_space self.keys, shapes, dtypes = obs_space_info(obs_space) self.buf_obs = { k: np.zeros((self.num_envs, ) + tuple(shapes[k]), dtype=dtypes[k]) for k in self.keys } self.buf_dones = np.zeros((self.num_envs, ), dtype=np.bool) self.buf_rews = np.zeros((self.num_envs, ), dtype=np.float32) self.buf_infos = [{} for _ in range(self.num_envs)] self.actions = None self.spec = self.envs[0].spec self.finsihed = [False for _ in range(self.num_envs)] self.last_mes = [None for _ in range(self.num_envs)] self.level_selector = level_selector if USE_IMMITATION_ENV: self.featurizer = TDCFeaturizer(92, 92, 84, 84, feature_vector_size=1024, learning_rate=0, experiment_name=experiment) self.featurizer.load(dataset) video_dataset = generate_dataset(experiment, framerate=60, width=84, height=84)[0] self.featurized_dataset = self.featurizer.featurize(video_dataset) self.checkpoint_indexes = [0] * nenvs self.rewards = 0 self.counter = 0
def __init__(self, *args, **kwargs): print("ZeldaImmitationEnv") featurizer = TDCFeaturizer(84, 84, 84, 84, feature_vector_size=1024, learning_rate=0, experiment_name='PE') featurizer.load("zelda") video_dataset = generate_dataset('PE', framerate=60, width=84, height=84)[0] featurized_dataset = featurizer.featurize(video_dataset) self._env = ImmitationWrapper(gym.make('gvgai-zelda-lvl0-v0'), featurizer=featurizer, featurized_dataset=featurized_dataset) # self._env = gym.make('gvgai-zelda-lvl0-v0') self.observation_space = self._env.unwrapped.observation_space self.action_space = self._env.unwrapped.action_space
def __init__(self, *, env, model, nsteps, gamma, lam): super().__init__(env=env, model=model, nsteps=nsteps) self.lam = lam self.gamma = gamma if USE_IMMITATION_ENV: self.featurizer = TDCFeaturizer(92, 92, 84, 84, feature_vector_size=1024, learning_rate=0, experiment_name='default', is_variational=False) self.featurizer.load() video_dataset = generate_dataset('default', framerate=30 / 15, width=84, height=84)[0] self.featurized_dataset = self.featurizer.featurize(video_dataset) self.checkpoint_indexes = None #[0] * nenvs self.rewards = 0 self.counter = 0
from train_featurizer import generate_dataset from embedding_visualization import visualize_embeddings from TDCFeaturizer import TDCFeaturizer featurizer = TDCFeaturizer(92, 92, 84, 84) featurizer.load('montezuma') d = generate_dataset('montezuma.txt', .25, 84, 84) features1 = featurizer.featurize(d[0]) features2 = featurizer.featurize(d[1]) features3 = featurizer.featurize(d[2]) features_all = [features1, features2, features3] visualize_embeddings(features_all, experiment_name='montezuma')