def load_running_means(self): # load the observation running mean if self.args.ckpt_name is not None: base_path = os.path.join(init_path.get_base_dir(), 'checkpoint') logger.info('[LOAD_CKPT] loading observation normalizer info') self.running_mean_info = model_saver.load_numpy_model( os.path.join(base_path, self.args.ckpt_name + '_normalizer.npy'), numpy_var_list=self.running_mean_info) self.running_mean_info['transfer_env'] = self.args.transfer_env if not self.args.transfer_env == 'Nothing2Nothing': if self.args.mlp_raw_transfer == 0: assert 'shared' in self.args.gnn_embedding_option ienv, oenv = [ env + '-v1' for env in self.args.transfer_env.split('2') ] self.running_mean_info = \ structure_mapper.map_transfer_env_running_mean( ienv, oenv, self.running_mean_info, self.observation_size, self.args.gnn_node_option, self.args.root_connection_option, self.args.gnn_output_option, self.args.gnn_embedding_option )
def __init__(self, args, observation_size, action_size, task_q, result_q, name_scope='trpo_agent'): # the multiprocessing initialization multiprocessing.Process.__init__(self) self.task_q = task_q self.result_q = result_q # the configurations for the agent self.args = args # the network parameters self.name_scope = name_scope self.observation_size = observation_size self.action_size = action_size # the variables and networks to be used, init them before use them self.policy_network = None self.policy_var_list = None self.tf_var_list = None self.iteration = None # the gnn parameters if self.args.use_gnn_as_policy: self.gnn_parameter_initialization() self.base_path = init_path.get_base_dir()
def __init__(self): # get the path of the environments xml_name = 'WalkersKangaroo.xml' xml_name = modify_xml(xml_name) xml_path = os.path.join( os.path.join(init_path.get_base_dir(), 'environments', 'assets', xml_name)) xml_path = str(os.path.abspath(xml_path)) mujoco_env.MujocoEnv.__init__(self, xml_path, 4) utils.EzPickle.__init__(self)
def __init__(self, num=None): xml_name = 'WalkersFullcheetah.xml' xml_name = modify_xml(xml_name, num) xml_path = os.path.join( os.path.join(init_path.get_base_dir(), 'environments', 'assets', xml_name)) xml_path = str(os.path.abspath(xml_path)) self.num = num mujoco_env.MujocoEnv.__init__(self, xml_path, 4) utils.EzPickle.__init__(self)
def get_output_path(self): if self.args.output_dir is None: path = init_path.get_base_dir() path = os.path.abspath(path) else: path = os.path.abspath(self.args.output_dir) base_path = os.path.join(path, 'csv_log') if not os.path.exists(base_path): os.makedirs(base_path) log_name = os.path.join( base_path, self.args.task + '_' + self.args.time_id + '.csv') return log_name
def get_output_path(self, save=True): if save: if self.args.output_dir is None: path = init_path.get_base_dir() path = os.path.abspath(path) else: path = os.path.abspath(self.args.output_dir) base_path = os.path.join(path, 'checkpoint') if not os.path.exists(base_path): os.makedirs(base_path) model_name = os.path.join(base_path, self.get_experiment_name()) else: path = self.args.ckpt_name model_name = path return model_name
def wrap_env_monitor(self): if self.allow_monitor: def video_callback(episode): return episode % self.args.video_freq < 6 if self.args.output_dir is None: base_path = init_path.get_base_dir() else: base_path = self.args.output_dir path = os.path.join(base_path, 'video', self.args.task + '_' + self.args.time_id) path = os.path.abspath(path) if not os.path.exists(path): os.makedirs(path) self.env = gym.wrappers.Monitor(self.env, path, video_callable=video_callback)
def __init__(self, pod_number=2): # get the path of the environments xml_name = 'Reacher' + self.get_env_num_str(pod_number) + '.xml' xml_path = os.path.join( os.path.join(init_path.get_base_dir(), 'environments', 'assets', xml_name)) xml_path = str(os.path.abspath(xml_path)) # the environment coeff self.num_body = pod_number + 1 self._task_indicator = -1.0 self._ctrl_coeff = 2.0 / (self.num_body / 2 + 1) # norm the max penalty to be 1, max norm is self.num_body * 0.1 * 2 self._dist_coeff = 2.0 / self.num_body mujoco_env.MujocoEnv.__init__(self, xml_path, 2) utils.EzPickle.__init__(self)
def __init__(self, sess, summary_name, enable=True, summary_dir=None): # the interface we need self.summary = None self.sess = sess self.enable = enable if not self.enable: # the summary handler is disabled return if summary_dir is None: self.path = os.path.join(init_path.get_base_dir(), 'summary') else: self.path = os.path.join(summary_dir, 'summary') self.path = os.path.abspath(self.path) if not os.path.exists(self.path): os.makedirs(self.path) self.path = os.path.join(self.path, summary_name) self.train_writer = tf.summary.FileWriter(self.path, self.sess.graph) logger.info('summary write initialized, writing to {}'.format( self.path))
def __init__(self, CentipedeLegNum=4, is_crippled=False): # get the path of the environments if is_crippled: xml_name = 'CpCentipede' + self.get_env_num_str(CentipedeLegNum) + \ '.xml' else: xml_name = 'Centipede' + self.get_env_num_str(CentipedeLegNum) + \ '.xml' xml_path = os.path.join(init_path.get_base_dir(), 'environments', 'assets', xml_name) xml_path = str(os.path.abspath(xml_path)) self.num_body = int(np.ceil(CentipedeLegNum / 2.0)) self._control_cost_coeff = .5 * 4 / CentipedeLegNum self._contact_cost_coeff = 0.5 * 1e-3 * 4 / CentipedeLegNum self.torso_geom_id = 1 + np.array(range(self.num_body)) * 5 # make sure the centipede is not born to be end of episode self.body_qpos_id = 6 + 6 + np.array(range(self.num_body)) * 6 self.body_qpos_id[-1] = 5 mujoco_env.MujocoEnv.__init__(self, xml_path, 5) utils.EzPickle.__init__(self)
# 3. add support for reacher, pendulumm # 4. MAJOR UPDATE Aug. 21, 2017: now the root only absorb the joints that # are not motors. # 5. MAJOR UPDATE, removing all the geom node, Sept. 10th, 2017 # ----------------------------------------------------------------------------- import os import numpy as np from bs4 import BeautifulSoup as bs from tool import init_path from util import logger from environments import register __all__ = ['parse_mujoco_graph'] XML_ASSERT_DIR = os.path.join(init_path.get_base_dir(), 'environments', 'assets') ''' Definition of nodes: @root: The 'root' type is the combination of the top level 'body' node and the top level free 'joint' (two nodes combined) Also, additional input will be assigned to the root node (e.g. the postion of the targer). For different tasks, we should have different MLP for each root. @geom, @body, @joint: The structure defined in the xml files. Ideally, the MLP for input, propogation, and output could be shared among different models. '''
#!/usr/bin/env python2 # ----------------------------------------------------------------------------- # @author: # Tingwu Wang, Jun 23rd, 2017 # ----------------------------------------------------------------------------- import tool.init_path as init_path from util import logger import graph_util.mujoco_parser as mujoco_parser import numpy as np _BASE_DIR = init_path.get_base_dir() def map_output(transfer_env, i_value, added_constant, gnn_option_list): ''' @brief: i_value could be the logstd (1, num_action), policy_output/w (64, num_action), policy_output/b (1, num_action) ''' assert len(gnn_option_list) == 4 i_value = np.transpose(i_value) # make the num_action to the front ienv, oenv = [env + '-v1' for env in transfer_env.split('2')] ienv_info = mujoco_parser.parse_mujoco_graph( ienv, gnn_node_option=gnn_option_list[0], root_connection_option=gnn_option_list[1], gnn_output_option=gnn_option_list[2], gnn_embedding_option=gnn_option_list[3]) oenv_info = mujoco_parser.parse_mujoco_graph( oenv,
# are not motors. # 5. MAJOR UPDATE, removing all the geom node, Sept. 10th, 2017 # ----------------------------------------------------------------------------- import tool.init_path as init_path import os import numpy as np from bs4 import BeautifulSoup as bs from util import logger from environments import register __all__ = ['parse_mujoco_graph'] XML_ASSERT_DIR = os.path.join(init_path.get_base_dir(), 'environments', 'assets') ''' Definition of nodes: @root: The 'root' type is the combination of the top level 'body' node and the top level free 'joint' (two nodes combined) Also, additional input will be assigned to the root node (e.g. the postion of the targer). For different tasks, we should have different MLP for each root. @geom, @body, @joint: The structure defined in the xml files. Ideally, the MLP for input,