def __init__(self, name, env, ob_env_name, primitives, config): # args self.name = name self._config = config # training self._hid_size = config.meta_hid_size self._num_hid_layers = config.meta_num_hid_layers self._activation = ops.activation(config.meta_activation) # properties primitive_env = make_env(ob_env_name, config) self._ob_shape = primitive_env.ob_shape self.ob_type = sorted(primitive_env.ob_type) if 'acc' in self._ob_shape: self._ob_shape.pop('acc') self.ob_type.remove('acc') primitive_env.close() self._env = env self._ob_space = np.sum( [np.prod(ob) for ob in self._ob_shape.values()]) self.num_primitives = len(primitives) self.primitive_names = primitives if not config.meta_oracle: self._build()
def __init__(self, name, env, ob_env_name, config): # args self.name = name # training self._hid_size = config.rl_hid_size self._num_hid_layers = config.rl_num_hid_layers self._gaussian_fixed_var = config.rl_fixed_var self._activation = ops.activation(config.rl_activation) self._include_acc = config.primitive_include_acc # properties primitive_env = make_env(ob_env_name) self._ob_shape = primitive_env.ob_shape self.ob_type = sorted(primitive_env.ob_type) if not self._include_acc and 'acc' in self.ob_type: self._ob_shape.pop('acc') self.ob_type.remove('acc') self._env = env self._ob_space = np.sum( [np.prod(ob) for ob in self._ob_shape.values()]) self._ac_space = primitive_env.action_space primitive_env.close() with tf.variable_scope(self.name): self._scope = tf.get_variable_scope().name self._build()
def __init__(self, name, env, ob_env_name, num_primitives, trans_term_activation='softmax', config=None): # configs self.term_activation = trans_term_activation self._config = config # args self.name = name self.env_name = self.name.split('.')[0] # training self._hid_size = config.trans_hid_size self._num_hid_layers = config.trans_num_hid_layers self._gaussian_fixed_var = config.trans_fixed_var self._activation = ops.activation(config.trans_activation) self._include_acc = config.trans_include_acc # properties primitive_env = make_env(ob_env_name, config) self._ob_shape = primitive_env.ob_shape self.ob_type = sorted(primitive_env.ob_type) self.primitive_env = primitive_env if not self._include_acc and 'acc' in self.ob_type: self._ob_shape.pop('acc') self.ob_type.remove('acc') self._env = env self._ob_space = np.sum( [np.prod(ob) for ob in self._ob_shape.values()]) self._ac_space = env.action_space self._num_primitives = num_primitives with tf.variable_scope(self.name): self._scope = tf.get_variable_scope().name self._build()
def __init__(self, name, env, ob_env_name, config=None): # configs self._config = config # args self.name = name self.env_name = self.name.split('-')[0] # training self._hid_size = config.primitive_hid_size self._num_hid_layers = config.primitive_num_hid_layers self._gaussian_fixed_var = config.primitive_fixed_var self._activation = ops.activation(config.primitive_activation) self._include_acc = config.primitive_include_acc # properties self.ob_env_name = ob_env_name primitive_env = make_env(ob_env_name, config) self.hard_coded = primitive_env.hard_coded self._ob_shape = primitive_env.ob_shape self.ob_type = sorted(primitive_env.ob_type) if not self._include_acc and 'acc' in self.ob_type: self._ob_shape.pop('acc') self.ob_type.remove('acc') self._env = env self._ob_space = np.sum( [np.prod(ob) for ob in self._ob_shape.values()]) self._ac_space = primitive_env.action_space if config.primitive_use_term: self.primitive_env = primitive_env else: primitive_env.close() if not self.hard_coded: with tf.variable_scope(self.name): self._scope = tf.get_variable_scope().name self._build()
def __init__(self, name, path, env, ob_env_name, is_train=True, use_traj_portion_start=0.0, use_traj_portion_end=1.0, config=None): self._scope = 'proximity_predictor/' + name self.env_name = name.split('.')[0] self._config = config # make primitive env for observation self._env = make_env(ob_env_name, config) self._include_acc = config.proximity_include_acc self._ob_shape = self._env.unwrapped.ob_shape self.ob_type = sorted(self._env.unwrapped.ob_type) if not self._include_acc and 'acc' in self.ob_type: self._ob_shape.pop('acc') self.ob_type.remove('acc') self.obs_norm = config.proximity_obs_norm self.observation_shape = np.sum( [np.prod(ob) for ob in self._ob_shape.values()]) # replay buffers self.fail_buffer = Replay(max_size=config.proximity_replay_size, name='fail_buffer') self.success_buffer = Replay(max_size=config.proximity_replay_size, name='success_buffer') # build the architecture self._num_hidden_layer = config.proximity_num_hid_layers self._hidden_size = config.proximity_hid_size self._activation_fn = activation(config.proximity_activation_fn) self._build_ph() logger.info('===== Proximity_predictor for {} ====='.format( self._scope)) # load collected states if is_train or config.evaluate_proximity_predictor: state_file_path = osp.join(config.primitive_dir, path.split('/')[0], 'state') logger.info('Search state files from: {}'.format( config.primitive_dir)) state_file_list = glob.glob(osp.join(state_file_path, '*.hdf5')) logger.info('Candidate state files: {}'.format(' '.join( [f.split('/')[-1] for f in state_file_list]))) state_file = {} try: logger.info('Use state files: {}'.format( state_file_list[0].split('/')[-1])) state_file = h5py.File(state_file_list[0], 'r') except: logger.warn( "No collected state hdf5 file is located at {}".format( state_file_path)) logger.info('Use traj portion: {} to {}'.format( use_traj_portion_start, use_traj_portion_end)) if self._config.proximity_keep_collected_obs: add_obs = self.success_buffer.add_collected_obs else: add_obs = self.success_buffer.add for k in list(state_file.keys()): traj_state = state_file[k]['obs'].value start_idx = int(traj_state.shape[0] * use_traj_portion_start) end_idx = int(traj_state.shape[0] * use_traj_portion_end) try: if state_file[k]['success'].value == 1: traj_state = traj_state[start_idx:end_idx] else: continue except: traj_state = traj_state[start_idx:end_idx] for t in range(traj_state.shape[0]): ob = traj_state[t][:self.observation_shape] # [ob, label] add_obs(np.concatenate((ob, [1.0]), axis=0)) # shape [num_state, dim_state] logger.info('Size of collected state: {}'.format( self.success_buffer.size())) logger.info('Average of collected state: {}'.format( np.mean(self.success_buffer.list(), axis=0))) # build graph fail_logits, fail_target_value, success_logits, success_target_value = \ self._build_graph(self.fail_obs_ph, self.success_obs_ph, reuse=False) # compute prob fake_prob = tf.reduce_mean(fail_logits) # should go to 0 real_prob = tf.reduce_mean(success_logits) # should go to 1 # compute loss if config.proximity_loss_type == 'lsgan': self.fake_loss = tf.reduce_mean( (fail_logits - fail_target_value)**2) self.real_loss = tf.reduce_mean( (success_logits - success_target_value)**2) elif config.proximity_loss_type == 'wgan': self.fake_loss = tf.reduce_mean( tf.abs(fail_logits - fail_target_value)) self.real_loss = tf.reduce_mean( tf.abs(success_logits - success_target_value)) # loss + accuracy terms self.total_loss = self.fake_loss + self.real_loss self.losses = { "fake_loss": self.fake_loss, "real_loss": self.real_loss, "fake_prob": fake_prob, "real_prob": real_prob, "total_loss": self.total_loss } # predict proximity self._proximity_op = tf.clip_by_value(success_logits, 0, 1)[:, 0]