def __init__(self, env_fns, spaces=None, context='spawn'): """ If you don't specify observation_space, we'll have to create a dummy environment to get it. """ ctx = mp.get_context(context) if spaces: observation_space, action_space = spaces else: logger.log('Creating dummy env object to get spaces') with logger.scoped_configure(format_strs=[]): dummy = env_fns[0]() observation_space, action_space = dummy.observation_space, dummy.action_space dummy.close() del dummy VecEnv.__init__(self, len(env_fns), observation_space, action_space) self.obs_keys, self.obs_shapes, self.obs_dtypes = obs_space_info(observation_space) self.obs_bufs = [ {k: ctx.Array(_NP_TO_CT[self.obs_dtypes[k].type], int(np.prod(self.obs_shapes[k]))) for k in self.obs_keys} for _ in env_fns] self.parent_pipes = [] self.procs = [] with clear_mpi_env_vars(): for env_fn, obs_buf in zip(env_fns, self.obs_bufs): wrapped_fn = CloudpickleWrapper(env_fn) parent_pipe, child_pipe = ctx.Pipe() proc = ctx.Process(target=_subproc_worker, args=(child_pipe, parent_pipe, wrapped_fn, obs_buf, self.obs_shapes, self.obs_dtypes, self.obs_keys)) proc.daemon = True self.procs.append(proc) self.parent_pipes.append(parent_pipe) proc.start() child_pipe.close() self.waiting_step = False self.viewer = None
def __init__(self, env_fns, spaces=None): """ If you don't specify observation_space, we'll have to create a dummy environment to get it. """ if spaces: observation_space, action_space = spaces else: logger.log('Creating dummy env object to get spaces') with logger.scoped_configure(format_strs=[]): dummy = env_fns[0]() observation_space, action_space = dummy.observation_space, dummy.action_space dummy.close() del dummy VecEnv.__init__(self, len(env_fns), observation_space, action_space) self.obs_keys, self.obs_shapes, self.obs_dtypes = obs_space_info(observation_space) self.obs_bufs = [ {k: Array(_NP_TO_CT[self.obs_dtypes[k].type], int(np.prod(self.obs_shapes[k]))) for k in self.obs_keys} for _ in env_fns] self.parent_pipes = [] self.procs = [] for env_fn, obs_buf in zip(env_fns, self.obs_bufs): wrapped_fn = CloudpickleWrapper(env_fn) parent_pipe, child_pipe = Pipe() proc = Process(target=_subproc_worker, args=(child_pipe, parent_pipe, wrapped_fn, obs_buf, self.obs_shapes, self.obs_dtypes, self.obs_keys)) proc.daemon = True self.procs.append(proc) self.parent_pipes.append(parent_pipe) proc.start() child_pipe.close() self.waiting_step = False self.specs = [f().spec for f in env_fns] self.viewer = None
def __init__(self, env_fns, spaces=None, context="spawn"): """ If you don't specify observation_space, we'll have to create a dummy environment to get it. """ ctx = mp.get_context(context) if spaces: observation_space, action_space = spaces else: logger.log("Creating dummy env object to get spaces") with logger.scoped_configure(format_strs=[]): dummy = env_fns[0]() observation_space, action_space = ( dummy.observation_space, dummy.action_space, ) dummy.close() del dummy VecEnv.__init__(self, len(env_fns), observation_space, action_space) self.obs_keys, self.obs_shapes, self.obs_dtypes = obs_space_info( observation_space ) self.obs_bufs = [ { k: ctx.Array( _NP_TO_CT[self.obs_dtypes[k].type], int(np.prod(self.obs_shapes[k])) * (400000 if self.obs_dtypes[k].type == np.str_ else 1), ) for k in self.obs_keys } for _ in env_fns ] self.parent_pipes = [] self.procs = [] with clear_mpi_env_vars(): for env_fn, obs_buf in zip(env_fns, self.obs_bufs): wrapped_fn = CloudpickleWrapper(env_fn) parent_pipe, child_pipe = ctx.Pipe() proc = ctx.Process( target=_subproc_worker, args=( child_pipe, parent_pipe, wrapped_fn, obs_buf, self.obs_shapes, self.obs_dtypes, self.obs_keys, ), ) proc.daemon = True self.procs.append(proc) self.parent_pipes.append(parent_pipe) proc.start() child_pipe.close() self.waiting_step = False self.viewer = None
def __init__(self, env_fns, spaces=None, context='spawn'): """ If you don't specify observation_space, we'll have to create a dummy environment to get it. """ # envs = [_thunk, _thunk] ctx = mp.get_context(context) if spaces: observation_space, action_space = spaces else: logger.log('Creating dummy env object to get spaces') with logger.scoped_configure(format_strs=[]): dummy = env_fns[0]() observation_space, action_space = dummy.observation_space, dummy.action_space self.obs_space, self.act_space = observation_space, action_space self.pixel_shape, self.non_pixel_obs, self.non_pixel_input_size = parse_obs_space(self.obs_space) self.action_spaces, self.action_spaces_name = dddqn_parse_action_space(self.act_space) self.num_branches = len(self.action_spaces) self.action_template = dummy.action_space.noop() dummy.close() del dummy VecEnv.__init__(self, len(env_fns), observation_space, action_space) self.obs_keys, self.obs_shapes, self.obs_dtypes = obs_space_info(observation_space) # self.obs_keys: compass, inventory.dirt, pov # self.obs_shapes: (), (), (64,64,3) # self.obs_dtypes: float64, np.int64, ? #print("test 1:,\n", self.obs_keys,'\n',self.obs_shapes, '\n', self.obs_dtypes) #for k in self.obs_keys: #print("1:,",_NP_TO_CT[self.obs_dtypes[k].type]) #print("2:,",int(np.prod(self.obs_shapes[k]))) self.obs_bufs = [ {k: ctx.Array(_NP_TO_CT[self.obs_dtypes[k].type], int(np.prod(self.obs_shapes[k]))) for k in self.obs_keys} for _ in env_fns] # self.obs_bufs[0] :{k: array(type, size)} self.parent_pipes = [] self.procs = [] with clear_mpi_env_vars(): for i in range(0,len(env_fns)): env_fn, obs_buf = env_fns[i], self.obs_bufs[i] #for env_fn, obs_buf in zip(env_fns, self.obs_bufs): wrapped_fn = CloudpickleWrapper(env_fn) parent_pipe, child_pipe = ctx.Pipe() proc = NoDaemonProcess(target=_subproc_worker, args=(child_pipe, parent_pipe, wrapped_fn, obs_buf, self.obs_shapes, self.obs_dtypes, self.obs_keys)) #proc.daemon = True self.procs.append(proc) self.parent_pipes.append(parent_pipe) proc.start() child_pipe.close() self.waiting_step = False self.viewer = None
def get_experiment_environment(**args): from utils import setup_tensorflow_session from baselines.common import set_global_seeds from gym.utils.seeding import hash_seed process_seed = args["seed"] + 1000 * 0 process_seed = hash_seed(process_seed, max_bytes=4) set_global_seeds(process_seed) logger_context = logger.scoped_configure( dir=None, format_strs=['stdout', 'log', 'csv']) tf_context = setup_tensorflow_session() return logger_context, tf_context
def __init__(self, env_fns, spaces=None, context='spawn'): """ If you don't specify observation_space, we'll have to create a dummy environment to get it. """ ctx = mp.get_context(context) if spaces: observation_space, action_space = spaces else: logger.log('Creating dummy env object to get spaces') with logger.scoped_configure(format_strs=[]): dummy = env_fns[0]() observation_space, action_space = dummy.observation_space, dummy.action_space dummy.close() try: self.visionnet_input = dummy.env.env.env.visionnet_input self.nn = dummy.env.env.env.nn self.xml_path = dummy.env.env.env.xml_path if dummy.env.env.env.unity: dummy.env.env.env.close() ## HACK>>> except Exception as e: print(e) pass del dummy VecEnv.__init__(self, len(env_fns), observation_space, action_space) self.obs_keys, self.obs_shapes, self.obs_dtypes = obs_space_info( observation_space) self.obs_bufs = [{ k: ctx.Array(_NP_TO_CT[self.obs_dtypes[k].type], int(np.prod(self.obs_shapes[k]))) for k in self.obs_keys } for _ in env_fns] self.parent_pipes = [] self.procs = [] with clear_mpi_env_vars(): for env_fn, obs_buf in zip(env_fns, self.obs_bufs): wrapped_fn = CloudpickleWrapper(env_fn) parent_pipe, child_pipe = ctx.Pipe() proc = ctx.Process(target=_subproc_worker, args=(child_pipe, parent_pipe, wrapped_fn, obs_buf, self.obs_shapes, self.obs_dtypes, self.obs_keys)) proc.daemon = True self.procs.append(proc) self.parent_pipes.append(parent_pipe) proc.start() child_pipe.close() self.waiting_step = False self.viewer = None
def get_experiment_environment(**args): process_seed = 1234 + 1000 * MPI.COMM_WORLD.Get_rank() process_seed = hash_seed(process_seed, max_bytes=4) set_global_seeds(1234) setup_mpi_gpus() logger_context = logger.scoped_configure( dir='C:/Users/Elias/Desktop/savedunc/' + MODE + '_' + datetime.now().strftime('%Y_%m_%d_%H_%M_%S'), format_strs=['stdout', 'log', 'csv', 'tensorboard'] if MPI.COMM_WORLD.Get_rank() == 0 else ['log']) tf_context = setup_tensorflow_session() return logger_context, tf_context
def get_experiment_environment(**args): from curiosity.utils import setup_mpi_gpus, setup_tensorflow_session from baselines.common import set_global_seeds from gym.utils.seeding import hash_seed process_seed = args["seed"] + 1000 * MPI.COMM_WORLD.Get_rank() process_seed = hash_seed(process_seed, max_bytes=4) set_global_seeds(process_seed) setup_mpi_gpus() logger_context = logger.scoped_configure(dir=None, format_strs=['stdout', 'log', 'csv'] if MPI.COMM_WORLD.Get_rank() == 0 else ['log']) tf_context = setup_tensorflow_session() return logger_context, tf_context
def start_experiment(**args): # create environment # coinrun environment is already vectorized env, test_env = make_env_all_params(args=args) # set random seeds for reproducibility utils.set_global_seeds(seed=args['seed']) # create tf.session tf_sess = utils.setup_tensorflow_session() if args['server_type'] == 'local': logger_context = logger.scoped_configure(dir=args['log_dir'], format_strs=['stdout', 'csv']) else: logger_context = logger.scoped_configure(dir=args['log_dir'], format_strs=['csv']) with logger_context, tf_sess: print("logging directory: {}".format(args['log_dir'])) # create trainer trainer = Trainer(env=env, test_env=test_env, args=args) if args['evaluation'] == 1: # load_path is changed to model_path print('run.py, def start_experiment, evaluating model: {}'.format( args['load_path'])) trainer.eval() # this is for visualizing the loss landscape elif args['visualize'] == 1: print('running visualization...') trainer.visualize() else: print('run.py, def start_experiment, training begins...') trainer.train()
def get_experiment_environment(**args): log_directory = osp.join( './output/' + datetime.datetime.now().strftime("openai-%Y-%m-%d-%H-%M-%S-%f")) process_seed = args["seed"] + 1000 * MPI.COMM_WORLD.Get_rank() process_seed = hash_seed(process_seed, max_bytes=4) set_global_seeds(process_seed) # TODO: turn this back up when have GPU support! # setup_mpi_gpus() logger_context = logger.scoped_configure( dir=log_directory, format_strs=['stdout', 'log', 'csv'] if MPI.COMM_WORLD.Get_rank() == 0 else ['log']) tf_context = setup_tensorflow_session() return logger_context, tf_context
def __init__(self, env_fns, spaces=None): """ If you don't specify observation_space, we'll have to create a dummy environment to get it. """ if spaces: observation_space, action_space = spaces else: logger.log("Creating dummy env object to get spaces") with logger.scoped_configure(format_strs=[]): dummy = env_fns[0]() observation_space, action_space = ( dummy.observation_space, dummy.action_space, ) dummy.close() del dummy VecEnv.__init__(self, len(env_fns), observation_space, action_space) obs_spaces = (observation_space.spaces if isinstance( self.observation_space, gym.spaces.Tuple) else (self.observation_space, )) self.obs_bufs = [ tuple( Array(_NP_TO_CT[s.dtype.type], int(np.prod(s.shape))) for s in obs_spaces) for _ in env_fns ] self.obs_shapes = [s.shape for s in obs_spaces] self.obs_dtypes = [s.dtype for s in obs_spaces] self.parent_pipes = [] self.procs = [] for env_fn, obs_buf in zip(env_fns, self.obs_bufs): wrapped_fn = CloudpickleWrapper(env_fn) parent_pipe, child_pipe = Pipe() proc = Process( target=_subproc_worker, args=(child_pipe, parent_pipe, wrapped_fn, obs_buf, self.obs_shapes), ) proc.daemon = True self.procs.append(proc) self.parent_pipes.append(parent_pipe) proc.start() child_pipe.close() self.waiting_step = False
def get_experiment_environment(**args): from utils import setup_mpi_gpus, setup_tensorflow_session from baselines.common import set_global_seeds from gym.utils.seeding import hash_seed from baselines import logger process_seed = args["seed"] + 1000 * MPI.COMM_WORLD.Get_rank() process_seed = hash_seed(process_seed, max_bytes=4) set_global_seeds(process_seed) setup_mpi_gpus() logdir = args['logdir'] logger_context = logger.scoped_configure(dir='./' +logdir + '/' + datetime.datetime.now().strftime(args["expID"] + "-openai-%Y-%m-%d-%H-%M-%S-%f"), format_strs=['stdout', 'log', 'csv', 'tensorboard'] if MPI.COMM_WORLD.Get_rank() == 0 else ['log']) tf_context = setup_tensorflow_session() return logger_context, tf_context
def start_experiment(**args): make_env = partial(make_env_all_params, add_monitor=True, args=args) logdir = osp.join("/result", args['env'], args['exp_name'], datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S-%f")) log = logger.scoped_configure(dir=logdir, format_strs=['stdout', 'log', 'csv'] if MPI.COMM_WORLD.Get_rank() == 0 else ['log']) trainer = Trainer(make_env=make_env, num_timesteps=args['num_timesteps'], hps=args, envs_per_process=args['envs_per_process'], logdir=logdir) tf_sess = get_experiment_environment(**args) with log, tf_sess: print("results will be saved to ", logdir) with open("{}/args.txt".format(logdir), 'w') as argfile: print("saving argments...") for k, v in args.items(): argfile.write(str(k) + ' >>> ' + str(v) + '\n') trainer.train()
def thunk_plus(): import torch import random import numpy as np from baselines import logger from proj.utils.tqdm_util import tqdm_out np.random.seed(seed) random.seed(seed) torch.manual_seed(seed) torch.set_num_threads(4) with tqdm_out(), logger.scoped_configure(log_dir, format_strs): from proj.common.log_utils import save_config logger.set_level(logger.WARN) save_config({"exp_name": exp_name, "alg": thunk}) thunk(**kwargs)
def test_mpi_weighted_mean(): comm = MPI.COMM_WORLD with logger.scoped_configure(comm=comm): if comm.rank == 0: name2valcount = {'a': (10, 2), 'b': (20, 3)} elif comm.rank == 1: name2valcount = {'a': (19, 1), 'c': (42, 3)} else: raise NotImplementedError d = mpi_util.mpi_weighted_mean(comm, name2valcount) correctval = {'a': (10 * 2 + 19) / 3.0, 'b': 20, 'c': 42} if comm.rank == 0: assert d == correctval, '{} != {}'.format(d, correctval) for name, (val, count) in name2valcount.items(): for _ in range(count): logger.logkv_mean(name, val) d2 = logger.dumpkvs() if comm.rank == 0: assert d2 == correctval
def test_mpi_weighted_mean(): from mpi4py import MPI comm = MPI.COMM_WORLD with logger.scoped_configure(comm=comm): if comm.rank == 0: name2valcount = {'a' : (10, 2), 'b' : (20,3)} elif comm.rank == 1: name2valcount = {'a' : (19, 1), 'c' : (42,3)} else: raise NotImplementedError d = mpi_util.mpi_weighted_mean(comm, name2valcount) correctval = {'a' : (10 * 2 + 19) / 3.0, 'b' : 20, 'c' : 42} if comm.rank == 0: assert d == correctval, '{} != {}'.format(d, correctval) for name, (val, count) in name2valcount.items(): for _ in range(count): logger.logkv_mean(name, val) d2 = logger.dumpkvs() if comm.rank == 0: assert d2 == correctval
def get_experiment_environment(**args): from utils import setup_mpi_gpus, setup_tensorflow_session from baselines.common import set_global_seeds from gym.utils.seeding import hash_seed process_seed = args["seed"] + 1000 * MPI.COMM_WORLD.Get_rank() process_seed = hash_seed(process_seed, max_bytes=4) set_global_seeds(process_seed) setup_mpi_gpus() time = datetime.datetime.now().strftime("%m-%d-%H-%M-%S") path_with_args = './logs/' + '_'.join([ time, args['exp_name'], args['env_kind'], args['feature_space'], str(args['envs_per_process']), str(args['train_discriminator']), str(args['discriminator_weighted']) ]) format_strs = ['stdout', 'log', 'csv', 'tensorboard' ] if MPI.COMM_WORLD.Get_rank() == 0 else ['log'] logger_context = logger.scoped_configure(dir=path_with_args, format_strs=format_strs) tf_context = setup_tensorflow_session() return logger_context, tf_context
def get_experiment_environment(**args): # 初始化 MPI 相关的量 from utils import setup_mpi_gpus, setup_tensorflow_session from baselines.common import set_global_seeds from gym.utils.seeding import hash_seed process_seed = args["seed"] + 1000 * MPI.COMM_WORLD.Get_rank() process_seed = hash_seed(process_seed, max_bytes=4) set_global_seeds(process_seed) setup_mpi_gpus() logger_dir = './logs/' + datetime.datetime.now().strftime( args["env"] + "-" + args["reward_type"] + "-" + str(args["nepochs_dvae"]) + "-" + str(args["stickyAtari"]) + "-%Y-%m-%d-%H-%M-%S-%f") logger_context = logger.scoped_configure( dir=logger_dir, format_strs=['stdout', 'log', 'csv'] if MPI.COMM_WORLD.Get_rank() == 0 else ['log']) tf_context = setup_tensorflow_session() # bai. 新增 saver 用于保存权重 saver = tf.train.Saver() return logger_context, tf_context, saver, logger_dir