def __init__(self, venv, directory, record_video_trigger, video_length=200): """ # Arguments venv: VecEnv to wrap directory: Where to save videos record_video_trigger: Function that defines when to start recording. The function takes the current number of step, and returns whether we should start recording or not. video_length: Length of recorded video """ VecEnvWrapper.__init__(self, venv) self.record_video_trigger = record_video_trigger self.video_recorder = None self.directory = os.path.abspath(directory) if not os.path.exists(self.directory): os.mkdir(self.directory) self.file_prefix = "vecenv" self.file_infix = '{}'.format(os.getpid()) self.step_id = 0 self.video_length = video_length self.recording = False self.recorded_frames = 0
def __init__(self, env_fns, nstack, spaces=None): import baselines.common.vec_env.subproc_vec_env as VecEnv def worker(remote, parent_remote, env_fn_wrapper): parent_remote.close() env = env_fn_wrapper.x() while True: cmd, data = remote.recv() if cmd == 'step': ob, reward, done, info = env.step(data) if done: ob = env.reset() remote.send((ob, reward, done, info)) elif cmd == 'reset': ob = env.reset() remote.send(ob) elif cmd == 'render': remote.send(env.render(mode='rgb_array')) elif cmd == 'close': remote.close() break elif cmd == 'get_spaces': remote.send((env.observation_space, env.action_space)) elif cmd == 'is_human': remote.send(env.unwrapped.is_human) elif cmd == 'get_game_state': remote.send(env.unwrapped.game_state) else: raise NotImplementedError VecEnv.worker = worker venv = VecEnv.SubprocVecEnv(env_fns, spaces) venv = VecFrameStack(venv, nstack) VecEnvWrapper.__init__(self, venv)
def __init__(self, venv, ob=True, ret=True, clipob=10., cliprew=10., gamma=0.99, epsilon=1e-8, use_tf=False): VecEnvWrapper.__init__(self, venv) if use_tf: from baseline.common.running_mean_std import TfRunningMeanStd self.ob_rms = [ TfRunningMeanStd(shape=self.observation_space[i].shape, scope='ob_rms') for i in range(self.num_agent) ] if ob else None self.ret_rms = [ TfRunningMeanStd(shape=(), scope='ret_rms') for _ in range(self.num_agent) ] if ret else None else: from baseline.common.running_mean_std import RunningMeanStd self.ob_rms = [ RunningMeanStd(shape=self.observation_space[i].shape) for i in range(self.num_agent) ] if ob else None self.ret_rms = [ RunningMeanStd(shape=()) for _ in range(self.num_agent) ] if ret else None self.clipob = clipob self.cliprew = cliprew self.ret = [np.zeros(self.num_envs) for _ in range(self.num_agent)] self.gamma = gamma self.epsilon = epsilon self.num_agent = venv.num_agent
def __init__(self, venv, num_models, model_dir): VecEnvWrapper.__init__(self, venv) self.graph = tf.Graph() config = tf.ConfigProto(device_count={'GPU': 0}) # Run on CPU #config.gpu_options.allow_growth = True self.sess = tf.Session(graph=self.graph, config=config) with self.graph.as_default(): with self.sess.as_default(): import os, sys dir_path = os.path.dirname(os.path.realpath(__file__)) sys.path.append(os.path.join(dir_path, '..', '..', '..', '..')) from preference_learning import Model print(os.path.realpath(model_dir)) self.models = [] for i in range(num_models): with tf.variable_scope('model_%d' % i): model = Model(self.venv.observation_space.shape[0]) model.saver.restore(self.sess, model_dir + '/model_%d.ckpt' % (i)) self.models.append(model) """
def __init__(self, venv, pretrained_reward_net_path, chain_path, embedding_dim, env_name): VecEnvWrapper.__init__(self, venv) self.reward_net = EmbeddingNet(embedding_dim) #load the pretrained weights self.reward_net.load_state_dict(torch.load(pretrained_reward_net_path)) self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") #load the mean of the MCMC chain burn = 5000 skip = 20 reader = open(chain_path) data = [] for line in reader: parsed = line.strip().split(',') np_line = [] for s in parsed[:-1]: np_line.append(float(s)) data.append(np_line) data = np.array(data) #print(data[burn::skip,:].shape) #get average across chain and use it as the last layer in the network mean_weight = np.mean(data[burn::skip, :], axis=0) #print("mean weights", mean_weight[:-1]) #print("mean bias", mean_weight[-1]) #print(mean_weight.shape) self.reward_net.fc2 = nn.Linear( embedding_dim, 1, bias=False ) #last layer just outputs the scalar reward = w^T \phi(s) new_linear = torch.from_numpy(mean_weight) print("new linear", new_linear) print(new_linear.size()) with torch.no_grad(): #unsqueeze since nn.Linear wants a 2-d tensor for weights new_linear = new_linear.unsqueeze(0) #print("new linear", new_linear) #print("new bias", new_bias) with torch.no_grad(): #print(last_layer.weight) #print(last_layer.bias) #print(last_layer.weight.data) #print(last_layer.bias.data) self.reward_net.fc2.weight.data = new_linear.float().to( self.device) #TODO: print out last layer to make sure it stuck... print("USING MEAN WEIGHTS FROM MCMC") #with torch.no_grad(): # for param in self.reward_net.fc2.parameters(): # print(param) self.reward_net.to(self.device) self.rew_rms = RunningMeanStd(shape=()) self.epsilon = 1e-8 self.cliprew = 10. self.env_name = env_name
def __init__(self, venv, directory, video_name, record_video_trigger, video_length=200): """ # Arguments venv: VecEnv to wrap directory: Where to save videos record_video_trigger: Function that defines when to start recording. The function takes the current number of step, and returns whether we should start recording or not. video_length: Length of recorded video """ #print("initializing vecvideorecordernamed") #print(directory) #print(video_name) VecEnvWrapper.__init__(self, venv) self.record_video_trigger = record_video_trigger self.video_recorder = None self.directory = os.path.abspath(directory) if not os.path.exists(self.directory): os.mkdir(self.directory) self.video_name = video_name self.file_prefix = "vecenv" self.file_infix = '{}'.format(os.getpid()) self.step_id = 0 self.video_length = video_length self.recording = False self.recorded_frames = 0
def __init__(self, venv, ob=True, ret=True, clipob=10., cliprew=10., gamma=0.99, epsilon=1e-8): VecEnvWrapper.__init__(self, venv) try: self.num_agents = num_agents = len(self.observation_space) self.ob_rms = [ RunningMeanStd(shape=self.observation_space[k].shape) for k in range(num_agents) ] if ob else None except: self.num_agents = num_agents = len(self.observation_space.spaces) self.ob_rms = [ RunningMeanStd(shape=self.observation_space.spaces[k].shape) for k in range(num_agents) ] if ob else None self.ret_rms = RunningMeanStd(shape=()) if ret else None #[RunningMeanStd(shape=()) for k in range(num_agents)] if ret else None self.clipob = clipob self.cliprew = cliprew # self.ret = [np.zeros(self.num_envs) for _ in range(num_agents)] self.ret = np.zeros(self.num_envs) self.gamma = gamma self.epsilon = epsilon
def __init__(self, venv, norm_obs=True, norm_reward=True, clip_obs=10., clip_reward=10., gamma=0.99, epsilon=1e-8): """ A rolling average, normalizing, vectorized wrapepr for environment base class :param venv: ([Gym Environment]) the list of environments to vectorize and normalize :param norm_obs: (bool) normalize observation :param norm_reward: (bool) normalize reward with discounting (r = sum(r_old) * gamma + r_new) :param clip_obs: (float) clipping value for nomalizing observation :param clip_reward: (float) clipping value for nomalizing reward :param gamma: (float) discount factor :param epsilon: (float) epsilon value to avoid arithmetic issues """ VecEnvWrapper.__init__(self, venv) self.ob_rms = RunningMeanStd( shape=self.observation_space.shape) if norm_obs else None self.ret_rms = RunningMeanStd(shape=()) if norm_reward else None self.clip_obs = clip_obs self.clip_reward = clip_reward self.ret = np.zeros(self.num_envs) self.gamma = gamma self.epsilon = epsilon
def __init__(self, venv, render=False): # self.reward_range = (-float('inf'), float('inf')) VecEnvWrapper.__init__(self, venv) h, w, c = venv.observation_space.shape self.observation_space = gym.spaces.Box(0, 255, shape=[h, w, c + 1]) # init Goal manager self.gms = [Goal(i) for i in range(self.num_envs)] self.m_Render = render
def __init__(self, venv, nstack): self.venv = venv self.nstack = nstack wos = venv.observation_space # wrapped ob space low = np.repeat(wos.low, self.nstack, axis=-1) high = np.repeat(wos.high, self.nstack, axis=-1) self.stackedobs = np.zeros((venv.num_envs,)+low.shape, low.dtype) observation_space = spaces.Box(low=low, high=high, dtype=venv.observation_space.dtype) VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
def __init__(self, venv, ob=True, ret=True, clipob=10., cliprew=10., gamma=0.99, epsilon=1e-8): VecEnvWrapper.__init__(self, venv) self.ob_rms = RunningMeanStd(shape=self.observation_space.shape) if ob else None self.ret_rms = None self.clipob = clipob self.cliprew = cliprew self.ret = np.zeros(self.num_envs) self.gamma = gamma self.epsilon = epsilon
def __init__(self, venv, ob=True, ret=True, clipob=10., cliprew=10., gamma=0.99, epsilon=1e-8): VecEnvWrapper.__init__(self, venv) self.ob_rms = RunningMeanStd(shape=self.observation_space.shape) if ob else None self.ret_rms = RunningMeanStd(shape=()) if ret else None self.clipob = clipob self.cliprew = cliprew self.ret = np.zeros(self.num_envs) self.gamma = gamma self.epsilon = epsilon
def __init__(self, env, *, encoder): VecEnvWrapper.__init__(self, env) self.encoder = encoder self.observation_space = spaces.Box( shape=(self.encoder.d_embedding,), low=np.finfo(np.float32).min, high=np.finfo(np.float32).max ) print("Wrapping with encoder")
def __init__(self, venv, filename=None, keep_buf=0, info_keywords=()): VecEnvWrapper.__init__(self, venv) self.eprets = None self.eplens = None self.epcount = 0 self.tstart = time.time() self.info_keywords = info_keywords self.keep_buf = keep_buf self.num_agent = venv.num_agent if self.keep_buf: self.epret_buf = deque([], maxlen=keep_buf) self.eplen_buf = deque([], maxlen=keep_buf)
def __init__(self, venv, reward_net_path, combo_param): VecEnvWrapper.__init__(self, venv) self.reward_net = AtariNet() self.reward_net.load_state_dict(torch.load(reward_net_path)) self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") self.reward_net.to(self.device) self.lamda = combo_param #how much weight to give to IRL verus RL combo_param \in [0,1] with 0 being RL and 1 being IRL self.rew_rms = RunningMeanStd(shape=()) self.epsilon = 1e-8 self.cliprew = 10.
def __init__(self, venv, ob=True, ret=True, clipob=10., cliprew=10., gamma=0.99, epsilon=1e-8, reward_scale=1., update=True): VecEnvWrapper.__init__(self, venv) self.ob_rms = RunningMeanStd(shape=self.observation_space.shape) if ob else None self.ret_rms = RunningMeanStd(shape=()) if ret else None self.clipob = clipob self.cliprew = cliprew self.ret = np.zeros(self.num_envs) self.gamma = gamma self.epsilon = epsilon self.variables_name_save = ['clipob','cliprew','ret','gamma', 'epsilon' ] self.reward_scale = reward_scale self.update = update
def __init__(self, venv, reward_net_path, env_name): VecEnvWrapper.__init__(self, venv) self.reward_net = AtariNet() self.reward_net.load_state_dict(torch.load(reward_net_path)) self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") self.reward_net.to(self.device) self.rew_rms = RunningMeanStd(shape=()) self.epsilon = 1e-8 self.cliprew = 10. self.env_name = env_name
def __init__(self, venv, nstack, device): self.venv = venv self.nstack = nstack wos = venv.observation_space # wrapped ob space self.shape_dim0 = wos.low.shape[0] low = np.repeat(wos.low, self.nstack, axis=0) high = np.repeat(wos.high, self.nstack, axis=0) self.stackedobs = np.zeros((venv.num_envs, ) + low.shape) self.stackedobs = torch.from_numpy(self.stackedobs).float() self.stackedobs = self.stackedobs.to(device) observation_space = gym.spaces.Box(low=low, high=high, dtype=venv.observation_space.dtype) VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
def __init__(self, venv, nstack, device=None): self.venv = venv self.nstack = nstack wos = venv.observation_space # wrapped ob space self.shape_dim0 = wos.shape[0] low = np.repeat(wos.low, self.nstack, axis=0) high = np.repeat(wos.high, self.nstack, axis=0) if device is None: device = torch.device("cpu") self.stacked_obs = torch.zeros((venv.num_envs,) + low.shape).to(device) observation_space = gym.spaces.Box(low=low, high=high, dtype=venv.observation_space.dtype) VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
def __init__(self, venv): VecEnvWrapper.__init__(self, venv) self.graph = tf.Graph() config = tf.ConfigProto(device_count={'GPU': 0}) # Run on CPU #config.gpu_options.allow_growth = True self.sess = tf.Session(graph=self.graph, config=config) with self.graph.as_default(): with self.sess.as_default(): self.obs = tf.placeholder(tf.float32, [None, 84, 84, 4]) self.rewards = tf.reduce_mean(tf.random_normal( tf.shape(self.obs)), axis=[1, 2, 3])
def __init__(self, venv, nstack): self.venv = venv self.nstack = nstack wos = venv.observation_space # wrapped ob space self.shape_dim0 = wos.shape[0] low = np.repeat(wos.low, self.nstack, axis=0) high = np.repeat(wos.high, self.nstack, axis=0) self.stacked_obs = cuda(torch.zeros((venv.num_envs, ) + low.shape)) observation_space = gym.spaces.Box(low=low, high=high, dtype=venv.observation_space.dtype) VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
def __init__(self, venv, filename=None, keep_buf=0, info_keywords=()): VecEnvWrapper.__init__(self, venv) self.eprets = np.zeros(self.num_envs, 'f') self.eplens = np.zeros(self.num_envs, 'i') self.epcount = 0 self.tstart = time.time() if filename: self.results_writer = ResultsWriter(filename, header={'t_start': self.tstart}, extra_keys=info_keywords) else: self.results_writer = None self.info_keywords = info_keywords self.keep_buf = keep_buf if self.keep_buf: self.epret_buf = deque([], maxlen=keep_buf) self.eplen_buf = deque([], maxlen=keep_buf)
def __init__(self, venv, model_dir, ctrl_coeff=0., alive_bonus=0.): VecEnvWrapper.__init__(self, venv) ob_shape = venv.observation_space.shape ac_dims = venv.action_space.n if venv.action_space.dtype == int else venv.action_space.shape[ -1] self.ctrl_coeff = ctrl_coeff self.alive_bonus = alive_bonus self.graph = tf.Graph() config = tf.ConfigProto(device_count={'GPU': 0}) # Run on CPU #config.gpu_options.allow_growth = True self.sess = tf.Session(graph=self.graph, config=config) with self.graph.as_default(): with self.sess.as_default(): import os, sys from argparse import Namespace from pathlib import Path dir_path = os.path.dirname(os.path.realpath(__file__)) sys.path.append(os.path.join(dir_path, '..', '..', '..', '..')) from utils import Model, RewardNet print(os.path.realpath(model_dir)) with open(str(Path(model_dir) / 'args.txt')) as f: args = eval(f.read()) models = [] for i in range(args.num_models): with tf.variable_scope('model_%d' % i): net = RewardNet(args.include_action, ob_shape[-1], ac_dims, num_layers=args.num_layers, embedding_dims=args.embedding_dims) model = Model(net, batch_size=1) model.saver.restore( self.sess, os.path.join(model_dir, 'model_%d.ckpt' % i)) models.append(model) self.models = models
def __init__(self, venv, n_stack): """ Vectorized environment base class :param venv: ([Gym Environment]) the list of environments to vectorize and normalize :param n_stack: """ self.venv = venv self.n_stack = n_stack wrapped_obs_space = venv.observation_space low = np.repeat(wrapped_obs_space.low, self.n_stack, axis=-1) high = np.repeat(wrapped_obs_space.high, self.n_stack, axis=-1) self.stackedobs = np.zeros((venv.num_envs, ) + low.shape, low.dtype) observation_space = spaces.Box(low=low, high=high, dtype=venv.observation_space.dtype) VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
def __init__(self, venv, ob_rms, ret_rms, clipob=10., cliprew=10., gamma=0.99, epsilon=1e-8): VecEnvWrapper.__init__(self, venv) self.ob_rms = self.RMS(*ob_rms) self.ret_rms = self.RMS(*ret_rms) self.clipob = clipob self.cliprew = cliprew self.ret = np.zeros(self.num_envs) self.gamma = gamma self.epsilon = epsilon assert self.ob_rms.mean.shape == self.ob_rms.var.shape == self.observation_space.shape
def __init__(self, venv, priors, device=None): self.venv = venv self.nstack = venv.nstack self.addit_ch = 3 self.priors = priors wos = venv.observation_space # wrapped ob space w_shape = wos.shape low = np.zeros((w_shape[0] + self.addit_ch, w_shape[1], w_shape[2])) high = np.full((w_shape[0] + self.addit_ch, w_shape[1], w_shape[2]), 255) if device is None: self.device = torch.device('cpu') self.full_obs = torch.zeros((venv.num_envs, ) + low.shape).to(device) observation_space = gym.spaces.Box(low=low, high=high, dtype=venv.observation_space.dtype) VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
def __init__(self, venv, nstack): self.venv = venv self.nstack = nstack self.dict_obs = False if isinstance(venv.observation_space, gym.spaces.Dict): wos = venv.observation_space.spaces['observation'] self.dict_obs = True else: wos = venv.observation_space # wrapped ob space low = np.repeat(wos.low, self.nstack, axis=-1) high = np.repeat(wos.high, self.nstack, axis=-1) self.stackedobs = np.zeros((venv.num_envs, ) + low.shape, low.dtype) observation_space = spaces.Box(low=low, high=high, dtype=venv.observation_space.dtype) if self.dict_obs: _observation_space = venv.observation_space _observation_space.spaces['observation'] = observation_space observation_space = _observation_space VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
def __init__(self, venv, nstack, device=None): self.venv = venv self.nstack = nstack wrapped_ob_space = venv.observation_space # should be 1 x 84 x 84 self.shape_dim0 = wrapped_ob_space.shape[0] # shape_dim0 is 1 # wrapped_ob_space.low is ZERO matrix of size 1 x 84 x 84, we make it 4 x 84 x 84 now # wrapped_ob_space.high is 255-matrix of size 1 x 84 x 84, we make it 4 x 84 x 84 now low = np.repeat(wrapped_ob_space.low, self.nstack, axis=0) high = np.repeat(wrapped_ob_space.high, self.nstack, axis=0) if device is None: device = torch.device('cpu') new_shape_tuple = (venv.num_envs, ) + low.shape # num_processes x 4 x 84 x 84 self.stacked_obs = torch.zeros(new_shape_tuple).to(device) observation_space = gym.spaces.Box( low=low, high=high, dtype=venv.observation_space.dtype) VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
def __init__( self, venv: Env, ob: bool = True, ret: bool = True, clipob: float = 10.0, cliprew: float = 10.0, gamma: float = 0.99, epsilon: float = 1e-8, first_n: int = None, ) -> None: """ Modified init function of VecNormalize. The only change here is in modifying the shape of self.ob_rms. The argument ``first_n`` controls how much of the observation we want to normalize: for an observation ``obs``, we normalize the vector ``obs[:first_n]``. """ VecEnvWrapper.__init__(self, venv) if ob is not None: if first_n is None: self.ob_rms = RunningMeanStd( shape=self.observation_space.shape) else: if len(self.observation_space.shape) == 1: self.ob_rms = RunningMeanStd(shape=(first_n, )) else: raise NotImplementedError else: self.ob_rms = None self.ret_rms = RunningMeanStd(shape=()) if ret else None self.clipob = clipob self.cliprew = cliprew self.ret = np.zeros(self.num_envs) self.gamma = gamma self.epsilon = epsilon self.first_n = first_n
def __init__(self, venv): self.venv = venv VecEnvWrapper.__init__(self, venv)
def __init__(self, venv): VecEnvWrapper.__init__(self, venv) self.episodic_rets = None self.episodic_lens = None
def __init__(self, env, *, reward_network): VecEnvWrapper.__init__(self, env) self.reward_network = reward_network self.prev_obs = None