def __init__(self, args, observation_size, action_size, action_distribution, network_type, task_queue, result_queue, worker_id, name_scope='worker'): # the multiprocessing initialization multiprocessing.Process.__init__(self) self.args = args self._name_scope = name_scope self._worker_id = worker_id self._network_type = network_type self._npr = np.random.RandomState(args.seed + self._worker_id) self._observation_size = observation_size self._action_size = action_size self._action_distribution = action_distribution self._task_queue = task_queue self._result_queue = result_queue self._num_envs_required = 1 self._env_start_index = 0 self._envs = [] self._environments_cache = [] self._episodes_so_far = 0 logger.info('Worker {} online'.format(self._worker_id)) self._base_dir = init_path.get_base_dir() self._build_env() self.control_info = \ {'use_default_goal':True, 'use_default_states':True, 'use_cached_environments':self.args.cache_environments, 'rollout_model': 'final'}
def __init__(self, *args, **kwargs): super(network, self).__init__(*args, **kwargs) self._base_dir = init_path.get_base_dir() with tf.variable_scope(self.name, reuse=self.reuse): self._build_preprocess() self._build_outputs()
def __init__(self, CentipedeLegNum=4, is_crippled=False): # get the path of the environments if is_crippled: xml_name = 'CpCentipede' + self.get_env_num_str(CentipedeLegNum) + \ '.xml' else: xml_name = 'Centipede' + self.get_env_num_str(CentipedeLegNum) + \ '.xml' xml_path = os.path.join(init_path.get_base_dir(), 'environments', 'assets', xml_name) xml_path = str(os.path.abspath(xml_path)) self.num_body = int(np.ceil(CentipedeLegNum / 2.0)) self._control_cost_coeff = .5 * 4 / CentipedeLegNum self._contact_cost_coeff = 0.5 * 1e-3 * 4 / CentipedeLegNum self.torso_geom_id = 1 + np.array(range(self.num_body)) * 5 # make sure the centipede is not born to be end of episode self.body_qpos_id = 6 + 6 + np.array(range(self.num_body)) * 6 self.body_qpos_id[-1] = 5 mujoco_env.MujocoEnv.__init__(self, xml_path, 5) utils.EzPickle.__init__(self)
def burst_images(args, local_dir, spc_candidates): ''' ''' visualize_script = os.path.join(init_path.get_base_dir(), 'env', 'visualize_species.py') if args.video == False: topology_dir = os.path.join(local_dir, 'species_topology') for spc_id in spc_candidates: topology_file = '%s/%d.npy' % (topology_dir, spc_id) burst_image_cmd = 'python %s -i %s -v 0' % \ (visualize_script, topology_file) process = subprocess.Popen(burst_image_cmd, shell=True) process.wait() else: video_dir = os.path.join(local_dir, 'species_video') for filename in glob.glob(video_dir + '/*'): v_id = filename.split('/')[-1].split('.')[0] gen_id, spc_id, ep_num = [int(x) for x in v_id.split('_')] if ep_num != 5: continue if spc_id not in spc_candidates: continue burst_video_cmd = 'python %s -i %s -v 1 -l 1000' % \ (visualize_script, filename) process = subprocess.Popen(burst_video_cmd, shell=True) process.wait() return None
def test_perturb_hierarchy(args, max_evo_step=7, body_part_num=3): ''' ''' spc = hierarchy_model.Species(args, body_num=body_part_num) for i in range(max_evo_step): print('Evolution @ %d' % i) videoh = cv2.VideoWriter( 'evo' + str(i) + '.mp4', cv2.VideoWriter_fourcc(*'mp4v'), 40, (width * 2, height) ) adj_mat, node_attr = spc.get_gene() xml_struct, xml_str = spc.get_xml() file_path = os.path.join(init_path.get_base_dir(), 'env/assets/gen/test_hierarchy_perturb.xml' ) model_gen_util.xml_string_to_file(xml_str, file_path) test_func.run_one_ep_given_model(args, adj_mat, xml_str, videoh=videoh, max_time_step=30 ) debug_info = spc.mutate() import pdb; pdb.set_trace() print('Mutate option: %s' % debug_info['op']) pass
def __init__(self, args, observation_size, action_size, task_q, result_q, name_scope='trpo_agent'): # the multiprocessing initialization multiprocessing.Process.__init__(self) self.task_q = task_q self.result_q = result_q # the configurations for the agent self.args = args # the network parameters self.name_scope = name_scope # self.observation_size = observation_size self.action_size = action_size # the variables and networks to be used, init them before use them self.policy_network = None self.policy_var_list = None self.tf_var_list = None self.iteration = None # the gnn parameters if self.args.use_gnn_as_policy: self.gnn_parameter_initialization() self.base_path = init_path.get_base_dir()
def load_running_means(self): # load the observation running mean if self.args.ckpt_name is not None: base_path = os.path.join(init_path.get_base_dir(), 'checkpoint') logger.info('[LOAD_CKPT] loading observation normalizer info') self.running_mean_info = model_saver.load_numpy_model( os.path.join(base_path, self.args.ckpt_name + '_normalizer.npy'), numpy_var_list=self.running_mean_info) self.running_mean_info['transfer_env'] = self.args.transfer_env if not self.args.transfer_env == 'Nothing2Nothing': if self.args.mlp_raw_transfer == 0: assert 'shared' in self.args.gnn_embedding_option ienv, oenv = [ env + '-v1' for env in self.args.transfer_env.split('2') ] self.running_mean_info = \ structure_mapper.map_transfer_env_running_mean( ienv, oenv, self.running_mean_info, self.observation_size, self.args.gnn_node_option, self.args.root_connection_option, self.args.gnn_output_option, self.args.gnn_embedding_option )
def save(self, sess): base_path = init_path.get_base_dir() path = os.path.join(base_path, 'checkpoint', 'tigan_' + str(self.step) + '.ckpt') self.saver.save(sess, path) logger.info('checkpoint saved to {}'.format(path)) return
def __init__(self, sess): # the interface we need self.summary = None self.sess = sess self.path = os.path.join(init_path.get_base_dir(), 'checkpoint') self.train_writer = tf.summary.FileWriter(self.path, sess.graph) logger.info('summary write initialized, writing to {}'.format( self.path))
def __init__(self, num=None): xml_name = 'WalkersFullcheetah.xml' xml_name = modify_xml(xml_name, num) xml_path = os.path.join( os.path.join(init_path.get_base_dir(), 'environments', 'assets', xml_name)) xml_path = str(os.path.abspath(xml_path)) self.num = num mujoco_env.MujocoEnv.__init__(self, xml_path, 4) utils.EzPickle.__init__(self)
def __init__(self): # get the path of the environments xml_name = 'WalkersKangaroo.xml' xml_name = modify_xml(xml_name) xml_path = os.path.join( os.path.join(init_path.get_base_dir(), 'environments', 'assets', xml_name)) xml_path = str(os.path.abspath(xml_path)) mujoco_env.MujocoEnv.__init__(self, xml_path, 4) utils.EzPickle.__init__(self)
def __init__(self, pod_number=3): # get the path of the environments xml_name = 'InvPendulum' + self.get_env_num_str(pod_number) + '.xml' xml_path = os.path.join( os.path.join(init_path.get_base_dir(), 'environments', 'assets', xml_name)) xml_path = str(os.path.abspath(xml_path)) self.num_body = pod_number mujoco_env.MujocoEnv.__init__(self, xml_path, 5) utils.EzPickle.__init__(self)
def save_all(self): ''' @brief: save all the network parameters and experiences ''' base_path = init_path.get_base_dir() path = os.path.join(base_path, 'checkpoint', 'dqn_' + str(self.step) + '.ckpt') self.saver.save(self.sess, path) logger.info('checkpoint saved to {}'.format(path)) # save the experience shop self.exp_shop.save(path) return
def get_output_path(args, env_str='deepmind'): if args.output_dir is None: base_path = init_path.get_base_dir() else: base_path = args.output_dir is_test_env = 'test_' if args.test_env else '' path = os.path.join(base_path, 'video', is_test_env + env_str, args.task + '_' + args.time_id) path = os.path.abspath(path) if not os.path.exists(path): os.makedirs(path) return path
def play_game_and_save(self): # save the video and play a little bit assert False, logger.error('Not usable') base_path = init_path.get_base_dir() path = os.path.join( base_path, 'video', init_path.get_time() + 'dqn_' + str(self.step) + '_' + self.env_name) if not os.path.exists(path): os.mkdir(path) for i_video in range(10): self.env.set_monitor(os.path.join(path, str(i_video))) # self.generate_experience(num_episode=1) self.env.unset_monitor() return
def __init__(self, args, input_tensor_dict, output_distribution, input_state_size, input_goal_size, output_goal_size, maximum_dimension, random_state, batch_length, batch_size, lookahead, name, is_manager=False, reuse=False): self.args = args self.reuse = reuse self._input_tensor = input_tensor_dict self._distribution = output_distribution # if using raw state inputs if args.use_state_preprocessing or args.use_state_embedding: self._input_state_size = input_state_size self._input_goal_size = input_goal_size self._output_size = output_goal_size else: self._input_state_size = input_state_size self._input_goal_size = input_state_size if is_manager: self._output_size = input_state_size else: self._output_size = output_goal_size self._maximum_dimension = maximum_dimension self._batch_dimension = batch_length self._batch_size = batch_size self._lookahead_range = lookahead self._base_dir = init_path.get_base_dir() self._npr = random_state self.name = name self._is_manager = is_manager self._tensor = {} self.outputs = {} self.states = {}
def get_output_path(self, save=True): if save: if self.args.output_dir is None: path = init_path.get_base_dir() path = os.path.abspath(path) else: path = os.path.abspath(self.args.output_dir) base_path = os.path.join(path, 'checkpoint') if not os.path.exists(base_path): os.makedirs(base_path) model_name = os.path.join(base_path, self.get_experiment_name()) else: path = self.args.ckpt_name model_name = path return model_name
def save_generated_imgs(self, fake_img, text, dataset_name): save_path = os.path.join(init_path.get_base_dir(), 'data', 'data_dir', dataset_name, 'sample', 'tiGAN' + str(self.step)) if not os.path.exists(save_path): # make a dir for the new samples os.mkdir(save_path) logger.info('Making new directory {}'.format(save_path)) fake_img = (fake_img + 1.0) * 255.0 / 2.0 fake_img = fake_img.astype('uint8') for i_img in range(len(text)): sio.imsave(os.path.join(save_path, text[i_img] + '.jpg'), fake_img[i_img]) logger.info('Generated images are saved to {}'.format(save_path)) return
def wrap_env_monitor(self): if self.allow_monitor: def video_callback(episode): return episode % self.args.video_freq < 6 if self.args.output_dir is None: base_path = init_path.get_base_dir() else: base_path = self.args.output_dir path = os.path.join( base_path, 'video', self.args.task + '_' + self.args.time_id ) path = os.path.abspath(path) if not os.path.exists(path): os.makedirs(path) self.env = gym.wrappers.Monitor( self.env, path, video_callable=video_callback)
def __init__(self, pod_number=3, is_crippled=False): # get the path of the environments if is_crippled: xml_name = 'CrippledSnake' + self.get_env_num_str(pod_number) + \ '.xml' else: xml_name = 'Snake' + self.get_env_num_str(pod_number) + '.xml' xml_path = os.path.join(os.path.join(init_path.get_base_dir(), 'environments', 'assets', xml_name)) xml_path = str(os.path.abspath(xml_path)) self.num_body = pod_number self._direction = 0 self.ctrl_cost_coeff = 0.0001 / pod_number * 3 mujoco_env.MujocoEnv.__init__(self, xml_path, 4) utils.EzPickle.__init__(self)
def __init__(self, pod_number=2): # get the path of the environments xml_name = 'Reacher' + self.get_env_num_str(pod_number) + '.xml' xml_path = os.path.join(os.path.join(init_path.get_base_dir(), 'environments', 'assets', xml_name)) xml_path = str(os.path.abspath(xml_path)) # the environment coeff self.num_body = pod_number + 1 self._task_indicator = -1.0 self._ctrl_coeff = 2.0 / (self.num_body / 2 + 1) # norm the max penalty to be 1, max norm is self.num_body * 0.1 * 2 self._dist_coeff = 2.0 / self.num_body mujoco_env.MujocoEnv.__init__(self, xml_path, 2) utils.EzPickle.__init__(self)
def init_summary(self, sess): self.loss_d_sum = tf.summary.scalar('discriminator_loss', self.loss_d) self.loss_g_sum = tf.summary.scalar('generator_loss', self.loss_g) self.loss_real_sum = tf.summary.scalar('real_pair_loss', self.loss_r) self.loss_w_sum = tf.summary.scalar('fake_text_real_img_loss', self.loss_w) self.loss_f_sum = tf.summary.scalar('real_text_fake_img_loss', self.loss_f) self.g_sum = tf.summary.merge( [self.loss_g_sum, self.loss_f_sum, self.loss_w_sum]) self.d_sum = tf.summary.merge([self.loss_d_sum, self.loss_real_sum]) path = os.path.join(init_path.get_base_dir(), 'summary') self.train_writer = tf.summary.FileWriter(path, sess.graph) logger.info('summary write initialized, writing to {}'.format(path)) return
def __init__(self, sess, summary_name, enable=True, summary_dir=None): # the interface we need self.summary = None self.sess = sess self.enable = enable if not self.enable: # the summary handler is disabled return if summary_dir is None: self.path = os.path.join(init_path.get_base_dir(), 'summary') else: self.path = os.path.join(summary_dir, 'summary') self.path = os.path.abspath(self.path) if not os.path.exists(self.path): os.makedirs(self.path) self.path = os.path.join(self.path, summary_name) self.train_writer = tf.summary.FileWriter(self.path, self.sess.graph) logger.info('summary write initialized, writing to {}'.format( self.path))
def __init__(self, args, session, name_scope, observation_size, action_size, action_distribution): if session is not None: self._session = session else: self._session = tf.get_default_session() self.args = args self._name_scope = name_scope self._observation_size = observation_size self._action_size = action_size self._action_distribution = action_distribution self._base_dir = init_path.get_base_dir() self._whitening_operator = {} self._whitening_variable = [] self._npr = np.random.RandomState(args.seed) self._input_ph = {} self._tensor = {} self._agents = {} self._update_operator = {}
def sync_dir(remote_dir, synced_flag=False): ''' ''' trn_sess_name = remote_dir.split('/')[-1] local_dir = os.path.join(init_path.get_base_dir(), 'evolution_data') print('Local directory', local_dir) command = 'rsync -avz %s %s/ --exclude=\'*mp4\' --exclude=\'*png\' --delete' % \ (remote_dir, local_dir) # command = 'sshfs %s %s' % (remote_dir, local_dir) print(command) cur_time = time.time() if not synced_flag: pass # momentarily commenting it off # process = subprocess.Popen(command, # shell=True#, stdout=subprocess.PIPE # ) # process.wait() # assert process.returncode, 'Command failure: \'%s\'' % command print('Syncing takes', time.time() - cur_time) local_dir = os.path.join(local_dir, trn_sess_name) return local_dir
# 3. add support for reacher, pendulumm # 4. MAJOR UPDATE Aug. 21, 2017: now the root only absorb the joints that # are not motors. # 5. MAJOR UPDATE, removing all the geom node, Sept. 10th, 2017 # ----------------------------------------------------------------------------- import init_path import os import numpy as np from bs4 import BeautifulSoup as bs from util import logger from environments import register __all__ = ['parse_mujoco_graph'] XML_ASSERT_DIR = os.path.join(init_path.get_base_dir(), 'environments', 'assets') ''' Definition of nodes: @root: The 'root' type is the combination of the top level 'body' node and the top level free 'joint' (two nodes combined) Also, additional input will be assigned to the root node (e.g. the postion of the targer). For different tasks, we should have different MLP for each root. @geom, @body, @joint: The structure defined in the xml files. Ideally, the MLP for input, propogation, and output could be shared among different models. '''
# are not motors. # 5. MAJOR UPDATE, removing all the geom node, Sept. 10th, 2017 # ----------------------------------------------------------------------------- import init_path import os import numpy as np from bs4 import BeautifulSoup as bs from util import logger from environments import register __all__ = ['parse_mujoco_graph'] XML_ASSERT_DIR = os.path.join(init_path.get_base_dir(), 'environments', 'assets') ''' Definition of nodes: @root: The 'root' type is the combination of the top level 'body' node and the top level free 'joint' (two nodes combined) Also, additional input will be assigned to the root node (e.g. the postion of the targer). For different tasks, we should have different MLP for each root. @geom, @body, @joint: The structure defined in the xml files. Ideally, the MLP for input,
# init the logger, just save the network ---------------------------------- if not args.dcgan: logger.set_file_handler(prefix='TIGAN_') gan_net = TI_GAN(config) logger.info('Training TIGAN') else: logger.set_file_handler(prefix='DCGAN_') gan_net = DC_GAN(config) logger.info('Training DCGAN') # build the network and data loader --------------------------------------- sess = tf.Session() # tf.device('/gpu:' + str(args.gpu)) logger.info('Session starts, using gpu: {}'.format(str(args.gpu))) gan_net.build_models() gan_net.init_training(sess, args.restore) # get the data reader dataset_dir = os.path.join(init_path.get_base_dir(), 'data', 'data_dir') data_reader = tiGAN_data_reader(dataset_name='bird', dataset_dir=dataset_dir, stage='train', debug=True) if args.restore is not None: data_reader.active_shuffle() # train the network logger.info('Training starts, using gpu: {}'.format(str(args.gpu))) gan_net.train_net(sess, data_reader)
import os import init_path import glob import json import argparse import subprocess from shutil import copyfile # local imports from html_visual import vis_tree from html_visual import vis_spc_tree GENEALOGY_HTML = \ os.path.join(init_path.get_base_dir(), 'html_visual/genealogy.html') EVOLUTION_HTML = \ os.path.join(init_path.get_base_dir(), 'html_visual/evolution.html') GENEALOGY_HTML = \ os.path.join(init_path.get_base_dir(), 'html_visual/expand_genealogy.html') def get_config(): ''' ''' def post_process(args): ''' ''' return args parser = argparse.ArgumentParser(description='Set up genealogy visualization')
#!/usr/bin/env python2 # ----------------------------------------------------------------------------- # @author: # Tingwu Wang, Jun 23rd, 2017 # ----------------------------------------------------------------------------- import init_path from util import logger import graph_util.mujoco_parser import numpy as np import graph_util.mujoco_parser _BASE_DIR = init_path.get_base_dir() def map_output(transfer_env, i_value, added_constant, gnn_option_list): ''' @brief: i_value could be the logstd (1, num_action), policy_output/w (64, num_action), policy_output/b (1, num_action) ''' assert len(gnn_option_list) == 4 i_value = np.transpose(i_value) # make the num_action to the front ienv, oenv = [env + '-v1' for env in transfer_env.split('2')] ienv_info = mujoco_parser.parse_mujoco_graph( ienv, gnn_node_option=gnn_option_list[0], root_connection_option=gnn_option_list[1], gnn_output_option=gnn_option_list[2], gnn_embedding_option=gnn_option_list[3]) oenv_info = mujoco_parser.parse_mujoco_graph(