Esempio n. 1
0
 def __init__(self, env_fns, spaces=None, context='spawn'):
     """
     If you don't specify observation_space, we'll have to create a dummy
     environment to get it.
     """
     ctx = mp.get_context(context)
     if spaces:
         observation_space, action_space = spaces
     else:
         logger.log('Creating dummy env object to get spaces')
         with logger.scoped_configure(format_strs=[]):
             dummy = env_fns[0]()
             observation_space, action_space = dummy.observation_space, dummy.action_space
             dummy.close()
             del dummy
     VecEnv.__init__(self, len(env_fns), observation_space, action_space)
     self.obs_keys, self.obs_shapes, self.obs_dtypes = obs_space_info(observation_space)
     self.obs_bufs = [
         {k: ctx.Array(_NP_TO_CT[self.obs_dtypes[k].type], int(np.prod(self.obs_shapes[k]))) for k in self.obs_keys}
         for _ in env_fns]
     self.parent_pipes = []
     self.procs = []
     with clear_mpi_env_vars():
         for env_fn, obs_buf in zip(env_fns, self.obs_bufs):
             wrapped_fn = CloudpickleWrapper(env_fn)
             parent_pipe, child_pipe = ctx.Pipe()
             proc = ctx.Process(target=_subproc_worker,
                         args=(child_pipe, parent_pipe, wrapped_fn, obs_buf, self.obs_shapes, self.obs_dtypes, self.obs_keys))
             proc.daemon = True
             self.procs.append(proc)
             self.parent_pipes.append(parent_pipe)
             proc.start()
             child_pipe.close()
     self.waiting_step = False
     self.viewer = None
Esempio n. 2
0
 def __init__(self, env_fns, spaces=None):
     """
     If you don't specify observation_space, we'll have to create a dummy
     environment to get it.
     """
     if spaces:
         observation_space, action_space = spaces
     else:
         logger.log('Creating dummy env object to get spaces')
         with logger.scoped_configure(format_strs=[]):
             dummy = env_fns[0]()
             observation_space, action_space = dummy.observation_space, dummy.action_space
             dummy.close()
             del dummy
     VecEnv.__init__(self, len(env_fns), observation_space, action_space)
     self.obs_keys, self.obs_shapes, self.obs_dtypes = obs_space_info(observation_space)
     self.obs_bufs = [
         {k: Array(_NP_TO_CT[self.obs_dtypes[k].type], int(np.prod(self.obs_shapes[k]))) for k in self.obs_keys}
         for _ in env_fns]
     self.parent_pipes = []
     self.procs = []
     for env_fn, obs_buf in zip(env_fns, self.obs_bufs):
         wrapped_fn = CloudpickleWrapper(env_fn)
         parent_pipe, child_pipe = Pipe()
         proc = Process(target=_subproc_worker,
                        args=(child_pipe, parent_pipe, wrapped_fn, obs_buf, self.obs_shapes, self.obs_dtypes, self.obs_keys))
         proc.daemon = True
         self.procs.append(proc)
         self.parent_pipes.append(parent_pipe)
         proc.start()
         child_pipe.close()
     self.waiting_step = False
     self.specs = [f().spec for f in env_fns]
     self.viewer = None
 def __init__(self, env_fns, spaces=None, context="spawn"):
     """
     If you don't specify observation_space, we'll have to create a dummy
     environment to get it.
     """
     ctx = mp.get_context(context)
     if spaces:
         observation_space, action_space = spaces
     else:
         logger.log("Creating dummy env object to get spaces")
         with logger.scoped_configure(format_strs=[]):
             dummy = env_fns[0]()
             observation_space, action_space = (
                 dummy.observation_space,
                 dummy.action_space,
             )
             dummy.close()
             del dummy
     VecEnv.__init__(self, len(env_fns), observation_space, action_space)
     self.obs_keys, self.obs_shapes, self.obs_dtypes = obs_space_info(
         observation_space
     )
     self.obs_bufs = [
         {
             k: ctx.Array(
                 _NP_TO_CT[self.obs_dtypes[k].type],
                 int(np.prod(self.obs_shapes[k]))
                 * (400000 if self.obs_dtypes[k].type == np.str_ else 1),
             )
             for k in self.obs_keys
         }
         for _ in env_fns
     ]
     self.parent_pipes = []
     self.procs = []
     with clear_mpi_env_vars():
         for env_fn, obs_buf in zip(env_fns, self.obs_bufs):
             wrapped_fn = CloudpickleWrapper(env_fn)
             parent_pipe, child_pipe = ctx.Pipe()
             proc = ctx.Process(
                 target=_subproc_worker,
                 args=(
                     child_pipe,
                     parent_pipe,
                     wrapped_fn,
                     obs_buf,
                     self.obs_shapes,
                     self.obs_dtypes,
                     self.obs_keys,
                 ),
             )
             proc.daemon = True
             self.procs.append(proc)
             self.parent_pipes.append(parent_pipe)
             proc.start()
             child_pipe.close()
     self.waiting_step = False
     self.viewer = None
Esempio n. 4
0
    def __init__(self, env_fns, spaces=None, context='spawn'):
        """
        If you don't specify observation_space, we'll have to create a dummy
        environment to get it.
        """
        # envs = [_thunk, _thunk]
        ctx = mp.get_context(context)
        if spaces:
            observation_space, action_space = spaces
        else:
            logger.log('Creating dummy env object to get spaces')
            with logger.scoped_configure(format_strs=[]):
                dummy = env_fns[0]()
                observation_space, action_space = dummy.observation_space, dummy.action_space

                self.obs_space, self.act_space = observation_space, action_space
                self.pixel_shape, self.non_pixel_obs, self.non_pixel_input_size = parse_obs_space(self.obs_space)
                self.action_spaces, self.action_spaces_name = dddqn_parse_action_space(self.act_space)
                self.num_branches = len(self.action_spaces)
                self.action_template = dummy.action_space.noop()

                dummy.close()
                del dummy
        VecEnv.__init__(self, len(env_fns), observation_space, action_space)


        self.obs_keys, self.obs_shapes, self.obs_dtypes = obs_space_info(observation_space)
        # self.obs_keys: compass, inventory.dirt, pov
        # self.obs_shapes: (), (), (64,64,3)
        # self.obs_dtypes: float64, np.int64, ?
        #print("test 1:,\n", self.obs_keys,'\n',self.obs_shapes, '\n', self.obs_dtypes)
        #for k in self.obs_keys:
            #print("1:,",_NP_TO_CT[self.obs_dtypes[k].type])
            #print("2:,",int(np.prod(self.obs_shapes[k])))
        self.obs_bufs = [
            {k: ctx.Array(_NP_TO_CT[self.obs_dtypes[k].type], int(np.prod(self.obs_shapes[k]))) for k in self.obs_keys}
            for _ in env_fns]
        # self.obs_bufs[0] :{k: array(type, size)}
        self.parent_pipes = []
        self.procs = []
        with clear_mpi_env_vars():
            for i in range(0,len(env_fns)):
                env_fn, obs_buf = env_fns[i], self.obs_bufs[i]
            #for env_fn, obs_buf in zip(env_fns, self.obs_bufs):
                wrapped_fn = CloudpickleWrapper(env_fn)
                parent_pipe, child_pipe = ctx.Pipe()
                proc = NoDaemonProcess(target=_subproc_worker,
                            args=(child_pipe, parent_pipe, wrapped_fn, obs_buf, self.obs_shapes, self.obs_dtypes, self.obs_keys))
                #proc.daemon = True
                self.procs.append(proc)
                self.parent_pipes.append(parent_pipe)
                proc.start()
                child_pipe.close()
        self.waiting_step = False
        self.viewer = None
Esempio n. 5
0
def get_experiment_environment(**args):
    from utils import setup_tensorflow_session
    from baselines.common import set_global_seeds
    from gym.utils.seeding import hash_seed
    process_seed = args["seed"] + 1000 * 0
    process_seed = hash_seed(process_seed, max_bytes=4)
    set_global_seeds(process_seed)

    logger_context = logger.scoped_configure(
        dir=None, format_strs=['stdout', 'log', 'csv'])
    tf_context = setup_tensorflow_session()
    return logger_context, tf_context
Esempio n. 6
0
    def __init__(self, env_fns, spaces=None, context='spawn'):
        """
        If you don't specify observation_space, we'll have to create a dummy
        environment to get it.
        """

        ctx = mp.get_context(context)
        if spaces:
            observation_space, action_space = spaces
        else:
            logger.log('Creating dummy env object to get spaces')
            with logger.scoped_configure(format_strs=[]):
                dummy = env_fns[0]()
                observation_space, action_space = dummy.observation_space, dummy.action_space
                dummy.close()
                try:
                    self.visionnet_input = dummy.env.env.env.visionnet_input
                    self.nn = dummy.env.env.env.nn
                    self.xml_path = dummy.env.env.env.xml_path
                    if dummy.env.env.env.unity:
                        dummy.env.env.env.close()  ## HACK>>>
                except Exception as e:
                    print(e)
                    pass
                del dummy

        VecEnv.__init__(self, len(env_fns), observation_space, action_space)
        self.obs_keys, self.obs_shapes, self.obs_dtypes = obs_space_info(
            observation_space)
        self.obs_bufs = [{
            k: ctx.Array(_NP_TO_CT[self.obs_dtypes[k].type],
                         int(np.prod(self.obs_shapes[k])))
            for k in self.obs_keys
        } for _ in env_fns]
        self.parent_pipes = []
        self.procs = []
        with clear_mpi_env_vars():
            for env_fn, obs_buf in zip(env_fns, self.obs_bufs):
                wrapped_fn = CloudpickleWrapper(env_fn)
                parent_pipe, child_pipe = ctx.Pipe()
                proc = ctx.Process(target=_subproc_worker,
                                   args=(child_pipe, parent_pipe, wrapped_fn,
                                         obs_buf, self.obs_shapes,
                                         self.obs_dtypes, self.obs_keys))
                proc.daemon = True
                self.procs.append(proc)
                self.parent_pipes.append(parent_pipe)
                proc.start()
                child_pipe.close()
        self.waiting_step = False
        self.viewer = None
Esempio n. 7
0
def get_experiment_environment(**args):
    process_seed = 1234 + 1000 * MPI.COMM_WORLD.Get_rank()
    process_seed = hash_seed(process_seed, max_bytes=4)
    set_global_seeds(1234)
    setup_mpi_gpus()

    logger_context = logger.scoped_configure(
        dir='C:/Users/Elias/Desktop/savedunc/' + MODE + '_' +
        datetime.now().strftime('%Y_%m_%d_%H_%M_%S'),
        format_strs=['stdout', 'log', 'csv', 'tensorboard']
        if MPI.COMM_WORLD.Get_rank() == 0 else ['log'])

    tf_context = setup_tensorflow_session()
    return logger_context, tf_context
Esempio n. 8
0
def get_experiment_environment(**args):
    from curiosity.utils import setup_mpi_gpus, setup_tensorflow_session
    from baselines.common import set_global_seeds
    from gym.utils.seeding import hash_seed
    process_seed = args["seed"] + 1000 * MPI.COMM_WORLD.Get_rank()
    process_seed = hash_seed(process_seed, max_bytes=4)
    set_global_seeds(process_seed)
    setup_mpi_gpus()

    logger_context = logger.scoped_configure(dir=None,
                                             format_strs=['stdout', 'log',
                                                          'csv'] if MPI.COMM_WORLD.Get_rank() == 0 else ['log'])
    tf_context = setup_tensorflow_session()
    return logger_context, tf_context
Esempio n. 9
0
def start_experiment(**args):
    # create environment
    # coinrun environment is already vectorized
    env, test_env = make_env_all_params(args=args)

    # set random seeds for reproducibility
    utils.set_global_seeds(seed=args['seed'])

    # create tf.session
    tf_sess = utils.setup_tensorflow_session()

    if args['server_type'] == 'local':
        logger_context = logger.scoped_configure(dir=args['log_dir'],
                                                 format_strs=['stdout', 'csv'])
    else:
        logger_context = logger.scoped_configure(dir=args['log_dir'],
                                                 format_strs=['csv'])

    with logger_context, tf_sess:
        print("logging directory: {}".format(args['log_dir']))

        # create trainer
        trainer = Trainer(env=env, test_env=test_env, args=args)

        if args['evaluation'] == 1:
            # load_path is changed to model_path
            print('run.py, def start_experiment, evaluating model: {}'.format(
                args['load_path']))
            trainer.eval()

        # this is for visualizing the loss landscape
        elif args['visualize'] == 1:
            print('running visualization...')
            trainer.visualize()
        else:
            print('run.py, def start_experiment, training begins...')
            trainer.train()
def get_experiment_environment(**args):
    log_directory = osp.join(
        './output/' +
        datetime.datetime.now().strftime("openai-%Y-%m-%d-%H-%M-%S-%f"))
    process_seed = args["seed"] + 1000 * MPI.COMM_WORLD.Get_rank()
    process_seed = hash_seed(process_seed, max_bytes=4)
    set_global_seeds(process_seed)
    # TODO: turn this back up when have GPU support!
    # setup_mpi_gpus()

    logger_context = logger.scoped_configure(
        dir=log_directory,
        format_strs=['stdout', 'log', 'csv']
        if MPI.COMM_WORLD.Get_rank() == 0 else ['log'])
    tf_context = setup_tensorflow_session()
    return logger_context, tf_context
Esempio n. 11
0
    def __init__(self, env_fns, spaces=None):
        """
        If you don't specify observation_space, we'll have to create a dummy
        environment to get it.
        """
        if spaces:
            observation_space, action_space = spaces
        else:
            logger.log("Creating dummy env object to get spaces")
            with logger.scoped_configure(format_strs=[]):
                dummy = env_fns[0]()
                observation_space, action_space = (
                    dummy.observation_space,
                    dummy.action_space,
                )
                dummy.close()
                del dummy
        VecEnv.__init__(self, len(env_fns), observation_space, action_space)

        obs_spaces = (observation_space.spaces if isinstance(
            self.observation_space, gym.spaces.Tuple) else
                      (self.observation_space, ))
        self.obs_bufs = [
            tuple(
                Array(_NP_TO_CT[s.dtype.type], int(np.prod(s.shape)))
                for s in obs_spaces) for _ in env_fns
        ]
        self.obs_shapes = [s.shape for s in obs_spaces]
        self.obs_dtypes = [s.dtype for s in obs_spaces]

        self.parent_pipes = []
        self.procs = []
        for env_fn, obs_buf in zip(env_fns, self.obs_bufs):
            wrapped_fn = CloudpickleWrapper(env_fn)
            parent_pipe, child_pipe = Pipe()
            proc = Process(
                target=_subproc_worker,
                args=(child_pipe, parent_pipe, wrapped_fn, obs_buf,
                      self.obs_shapes),
            )
            proc.daemon = True
            self.procs.append(proc)
            self.parent_pipes.append(parent_pipe)
            proc.start()
            child_pipe.close()
        self.waiting_step = False
Esempio n. 12
0
def get_experiment_environment(**args):
    from utils import setup_mpi_gpus, setup_tensorflow_session
    from baselines.common import set_global_seeds
    from gym.utils.seeding import hash_seed
    from baselines import logger

    process_seed = args["seed"] + 1000 * MPI.COMM_WORLD.Get_rank()
    process_seed = hash_seed(process_seed, max_bytes=4)
    set_global_seeds(process_seed)
    setup_mpi_gpus()
    logdir = args['logdir']

    logger_context = logger.scoped_configure(dir='./' +logdir + '/' + 
                        datetime.datetime.now().strftime(args["expID"] + "-openai-%Y-%m-%d-%H-%M-%S-%f"),
                         format_strs=['stdout', 'log', 'csv', 'tensorboard'] if MPI.COMM_WORLD.Get_rank() == 0 else ['log'])
    tf_context = setup_tensorflow_session()
    return logger_context, tf_context
Esempio n. 13
0
def start_experiment(**args):
    make_env = partial(make_env_all_params, add_monitor=True, args=args)
    logdir = osp.join("/result", args['env'], args['exp_name'], datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S-%f"))
    log = logger.scoped_configure(dir=logdir, format_strs=['stdout', 'log', 'csv'] if MPI.COMM_WORLD.Get_rank() == 0 else ['log'])

    trainer = Trainer(make_env=make_env,
                      num_timesteps=args['num_timesteps'], hps=args,
                      envs_per_process=args['envs_per_process'],
                      logdir=logdir)
    tf_sess = get_experiment_environment(**args)
    with log, tf_sess:
        print("results will be saved to ", logdir)
        with open("{}/args.txt".format(logdir), 'w') as argfile:
            print("saving argments...")
            for k, v in args.items():
                argfile.write(str(k) + ' >>> ' + str(v) + '\n')

        trainer.train()
Esempio n. 14
0
    def thunk_plus():
        import torch
        import random
        import numpy as np
        from baselines import logger
        from proj.utils.tqdm_util import tqdm_out

        np.random.seed(seed)
        random.seed(seed)
        torch.manual_seed(seed)

        torch.set_num_threads(4)

        with tqdm_out(), logger.scoped_configure(log_dir, format_strs):
            from proj.common.log_utils import save_config

            logger.set_level(logger.WARN)
            save_config({"exp_name": exp_name, "alg": thunk})
            thunk(**kwargs)
Esempio n. 15
0
def test_mpi_weighted_mean():
    comm = MPI.COMM_WORLD
    with logger.scoped_configure(comm=comm):
        if comm.rank == 0:
            name2valcount = {'a': (10, 2), 'b': (20, 3)}
        elif comm.rank == 1:
            name2valcount = {'a': (19, 1), 'c': (42, 3)}
        else:
            raise NotImplementedError
        d = mpi_util.mpi_weighted_mean(comm, name2valcount)
        correctval = {'a': (10 * 2 + 19) / 3.0, 'b': 20, 'c': 42}
        if comm.rank == 0:
            assert d == correctval, '{} != {}'.format(d, correctval)

        for name, (val, count) in name2valcount.items():
            for _ in range(count):
                logger.logkv_mean(name, val)
        d2 = logger.dumpkvs()
        if comm.rank == 0:
            assert d2 == correctval
Esempio n. 16
0
def test_mpi_weighted_mean():
    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    with logger.scoped_configure(comm=comm):
        if comm.rank == 0:
            name2valcount = {'a' : (10, 2), 'b' : (20,3)}
        elif comm.rank == 1:
            name2valcount = {'a' : (19, 1), 'c' : (42,3)}
        else:
            raise NotImplementedError

        d = mpi_util.mpi_weighted_mean(comm, name2valcount)
        correctval = {'a' : (10 * 2 + 19) / 3.0, 'b' : 20, 'c' : 42}
        if comm.rank == 0:
            assert d == correctval, '{} != {}'.format(d, correctval)

        for name, (val, count) in name2valcount.items():
            for _ in range(count):
                logger.logkv_mean(name, val)
        d2 = logger.dumpkvs()
        if comm.rank == 0:
            assert d2 == correctval
Esempio n. 17
0
def get_experiment_environment(**args):
    from utils import setup_mpi_gpus, setup_tensorflow_session
    from baselines.common import set_global_seeds
    from gym.utils.seeding import hash_seed
    process_seed = args["seed"] + 1000 * MPI.COMM_WORLD.Get_rank()
    process_seed = hash_seed(process_seed, max_bytes=4)
    set_global_seeds(process_seed)
    setup_mpi_gpus()

    time = datetime.datetime.now().strftime("%m-%d-%H-%M-%S")
    path_with_args = './logs/' + '_'.join([
        time, args['exp_name'], args['env_kind'], args['feature_space'],
        str(args['envs_per_process']),
        str(args['train_discriminator']),
        str(args['discriminator_weighted'])
    ])

    format_strs = ['stdout', 'log', 'csv', 'tensorboard'
                   ] if MPI.COMM_WORLD.Get_rank() == 0 else ['log']
    logger_context = logger.scoped_configure(dir=path_with_args,
                                             format_strs=format_strs)
    tf_context = setup_tensorflow_session()
    return logger_context, tf_context
Esempio n. 18
0
def get_experiment_environment(**args):
    # 初始化 MPI 相关的量
    from utils import setup_mpi_gpus, setup_tensorflow_session
    from baselines.common import set_global_seeds
    from gym.utils.seeding import hash_seed
    process_seed = args["seed"] + 1000 * MPI.COMM_WORLD.Get_rank()
    process_seed = hash_seed(process_seed, max_bytes=4)
    set_global_seeds(process_seed)
    setup_mpi_gpus()

    logger_dir = './logs/' + datetime.datetime.now().strftime(
        args["env"] + "-" + args["reward_type"] + "-" +
        str(args["nepochs_dvae"]) + "-" + str(args["stickyAtari"]) +
        "-%Y-%m-%d-%H-%M-%S-%f")
    logger_context = logger.scoped_configure(
        dir=logger_dir,
        format_strs=['stdout', 'log', 'csv']
        if MPI.COMM_WORLD.Get_rank() == 0 else ['log'])
    tf_context = setup_tensorflow_session()

    # bai.      新增 saver 用于保存权重
    saver = tf.train.Saver()
    return logger_context, tf_context, saver, logger_dir