def load_running_means(self):
        # load the observation running mean
        if self.args.ckpt_name is not None:
            base_path = os.path.join(init_path.get_base_dir(), 'checkpoint')
            logger.info('[LOAD_CKPT] loading observation normalizer info')
            self.running_mean_info = model_saver.load_numpy_model(
                os.path.join(base_path,
                             self.args.ckpt_name + '_normalizer.npy'),
                numpy_var_list=self.running_mean_info)
            self.running_mean_info['transfer_env'] = self.args.transfer_env

        if not self.args.transfer_env == 'Nothing2Nothing':
            if self.args.mlp_raw_transfer == 0:
                assert 'shared' in self.args.gnn_embedding_option

            ienv, oenv = [
                env + '-v1' for env in self.args.transfer_env.split('2')
            ]
            self.running_mean_info = \
                structure_mapper.map_transfer_env_running_mean(
                    ienv, oenv, self.running_mean_info,
                    self.observation_size,
                    self.args.gnn_node_option, self.args.root_connection_option,
                    self.args.gnn_output_option, self.args.gnn_embedding_option
                )
Beispiel #2
0
    def __init__(self,
                 args,
                 observation_size,
                 action_size,
                 task_q,
                 result_q,
                 name_scope='trpo_agent'):

        # the multiprocessing initialization
        multiprocessing.Process.__init__(self)
        self.task_q = task_q
        self.result_q = result_q

        # the configurations for the agent
        self.args = args

        # the network parameters
        self.name_scope = name_scope
        self.observation_size = observation_size
        self.action_size = action_size

        # the variables and networks to be used, init them before use them
        self.policy_network = None
        self.policy_var_list = None

        self.tf_var_list = None
        self.iteration = None

        # the gnn parameters
        if self.args.use_gnn_as_policy:
            self.gnn_parameter_initialization()

        self.base_path = init_path.get_base_dir()
Beispiel #3
0
    def __init__(self):

        # get the path of the environments
        xml_name = 'WalkersKangaroo.xml'
        xml_name = modify_xml(xml_name)
        xml_path = os.path.join(
            os.path.join(init_path.get_base_dir(), 'environments', 'assets',
                         xml_name))
        xml_path = str(os.path.abspath(xml_path))

        mujoco_env.MujocoEnv.__init__(self, xml_path, 4)
        utils.EzPickle.__init__(self)
Beispiel #4
0
    def __init__(self, num=None):

        xml_name = 'WalkersFullcheetah.xml'
        xml_name = modify_xml(xml_name, num)
        xml_path = os.path.join(
            os.path.join(init_path.get_base_dir(), 'environments', 'assets',
                         xml_name))
        xml_path = str(os.path.abspath(xml_path))
        self.num = num

        mujoco_env.MujocoEnv.__init__(self, xml_path, 4)
        utils.EzPickle.__init__(self)
Beispiel #5
0
    def get_output_path(self):
        if self.args.output_dir is None:
            path = init_path.get_base_dir()
            path = os.path.abspath(path)
        else:
            path = os.path.abspath(self.args.output_dir)
        base_path = os.path.join(path, 'csv_log')
        if not os.path.exists(base_path):
            os.makedirs(base_path)

        log_name = os.path.join(
            base_path, self.args.task + '_' + self.args.time_id + '.csv')
        return log_name
    def get_output_path(self, save=True):
        if save:
            if self.args.output_dir is None:
                path = init_path.get_base_dir()
                path = os.path.abspath(path)
            else:
                path = os.path.abspath(self.args.output_dir)
            base_path = os.path.join(path, 'checkpoint')
            if not os.path.exists(base_path):
                os.makedirs(base_path)

            model_name = os.path.join(base_path, self.get_experiment_name())
        else:
            path = self.args.ckpt_name
            model_name = path
        return model_name
Beispiel #7
0
    def wrap_env_monitor(self):
        if self.allow_monitor:

            def video_callback(episode):
                return episode % self.args.video_freq < 6

            if self.args.output_dir is None:
                base_path = init_path.get_base_dir()
            else:
                base_path = self.args.output_dir

            path = os.path.join(base_path, 'video',
                                self.args.task + '_' + self.args.time_id)
            path = os.path.abspath(path)
            if not os.path.exists(path):
                os.makedirs(path)
            self.env = gym.wrappers.Monitor(self.env,
                                            path,
                                            video_callable=video_callback)
Beispiel #8
0
    def __init__(self, pod_number=2):

        # get the path of the environments
        xml_name = 'Reacher' + self.get_env_num_str(pod_number) + '.xml'
        xml_path = os.path.join(
            os.path.join(init_path.get_base_dir(), 'environments', 'assets',
                         xml_name))
        xml_path = str(os.path.abspath(xml_path))

        # the environment coeff
        self.num_body = pod_number + 1
        self._task_indicator = -1.0

        self._ctrl_coeff = 2.0 / (self.num_body / 2 + 1)
        # norm the max penalty to be 1, max norm is self.num_body * 0.1 * 2
        self._dist_coeff = 2.0 / self.num_body

        mujoco_env.MujocoEnv.__init__(self, xml_path, 2)
        utils.EzPickle.__init__(self)
    def __init__(self, sess, summary_name, enable=True, summary_dir=None):
        # the interface we need
        self.summary = None
        self.sess = sess
        self.enable = enable
        if not self.enable:  # the summary handler is disabled
            return
        if summary_dir is None:
            self.path = os.path.join(init_path.get_base_dir(), 'summary')
        else:
            self.path = os.path.join(summary_dir, 'summary')
        self.path = os.path.abspath(self.path)

        if not os.path.exists(self.path):
            os.makedirs(self.path)
        self.path = os.path.join(self.path, summary_name)

        self.train_writer = tf.summary.FileWriter(self.path, self.sess.graph)

        logger.info('summary write initialized, writing to {}'.format(
            self.path))
Beispiel #10
0
    def __init__(self, CentipedeLegNum=4, is_crippled=False):

        # get the path of the environments
        if is_crippled:
            xml_name = 'CpCentipede' + self.get_env_num_str(CentipedeLegNum) + \
                '.xml'
        else:
            xml_name = 'Centipede' + self.get_env_num_str(CentipedeLegNum) + \
                '.xml'
        xml_path = os.path.join(init_path.get_base_dir(), 'environments',
                                'assets', xml_name)
        xml_path = str(os.path.abspath(xml_path))
        self.num_body = int(np.ceil(CentipedeLegNum / 2.0))
        self._control_cost_coeff = .5 * 4 / CentipedeLegNum
        self._contact_cost_coeff = 0.5 * 1e-3 * 4 / CentipedeLegNum

        self.torso_geom_id = 1 + np.array(range(self.num_body)) * 5
        # make sure the centipede is not born to be end of episode
        self.body_qpos_id = 6 + 6 + np.array(range(self.num_body)) * 6
        self.body_qpos_id[-1] = 5

        mujoco_env.MujocoEnv.__init__(self, xml_path, 5)

        utils.EzPickle.__init__(self)
Beispiel #11
0
#       3. add support for reacher, pendulumm
#       4. MAJOR UPDATE Aug. 21, 2017: now the root only absorb the joints that
#           are not motors.
#       5. MAJOR UPDATE, removing all the geom node, Sept. 10th, 2017
# -----------------------------------------------------------------------------

import os
import numpy as np
from bs4 import BeautifulSoup as bs
from tool import init_path
from util import logger
from environments import register

__all__ = ['parse_mujoco_graph']

XML_ASSERT_DIR = os.path.join(init_path.get_base_dir(), 'environments',
                              'assets')
'''
    Definition of nodes:
    @root:
        The 'root' type is the combination of the top level 'body' node and
        the top level free 'joint' (two nodes combined)
        Also, additional input will be assigned to the root node
        (e.g. the postion of the targer).

        For different tasks, we should have different MLP for each root.

    @geom, @body, @joint:
        The structure defined in the xml files. Ideally, the MLP for input,
        propogation, and output could be shared among different models.
'''
Beispiel #12
0
#!/usr/bin/env python2
# -----------------------------------------------------------------------------
#   @author:
#       Tingwu Wang, Jun 23rd, 2017
# -----------------------------------------------------------------------------

import tool.init_path as init_path
from util import logger
import graph_util.mujoco_parser as mujoco_parser
import numpy as np

_BASE_DIR = init_path.get_base_dir()


def map_output(transfer_env, i_value, added_constant, gnn_option_list):
    '''
        @brief:
            i_value could be the logstd (1, num_action), policy_output/w
            (64, num_action), policy_output/b (1, num_action)
    '''
    assert len(gnn_option_list) == 4
    i_value = np.transpose(i_value)  # make the num_action to the front
    ienv, oenv = [env + '-v1' for env in transfer_env.split('2')]
    ienv_info = mujoco_parser.parse_mujoco_graph(
        ienv,
        gnn_node_option=gnn_option_list[0],
        root_connection_option=gnn_option_list[1],
        gnn_output_option=gnn_option_list[2],
        gnn_embedding_option=gnn_option_list[3])
    oenv_info = mujoco_parser.parse_mujoco_graph(
        oenv,
#           are not motors.
#       5. MAJOR UPDATE, removing all the geom node, Sept. 10th, 2017
# -----------------------------------------------------------------------------


import tool.init_path as init_path
import os
import numpy as np
from bs4 import BeautifulSoup as bs
from util import logger
from environments import register


__all__ = ['parse_mujoco_graph']

XML_ASSERT_DIR = os.path.join(init_path.get_base_dir(),
                              'environments',
                              'assets')

'''
    Definition of nodes:
    @root:
        The 'root' type is the combination of the top level 'body' node and
        the top level free 'joint' (two nodes combined)
        Also, additional input will be assigned to the root node
        (e.g. the postion of the targer).

        For different tasks, we should have different MLP for each root.

    @geom, @body, @joint:
        The structure defined in the xml files. Ideally, the MLP for input,