예제 #1
0
    def __init__(self, name, env, ob_env_name, primitives, config):
        # args
        self.name = name
        self._config = config

        # training
        self._hid_size = config.meta_hid_size
        self._num_hid_layers = config.meta_num_hid_layers
        self._activation = ops.activation(config.meta_activation)

        # properties
        primitive_env = make_env(ob_env_name, config)
        self._ob_shape = primitive_env.ob_shape
        self.ob_type = sorted(primitive_env.ob_type)
        if 'acc' in self._ob_shape:
            self._ob_shape.pop('acc')
            self.ob_type.remove('acc')
        primitive_env.close()

        self._env = env
        self._ob_space = np.sum(
            [np.prod(ob) for ob in self._ob_shape.values()])
        self.num_primitives = len(primitives)
        self.primitive_names = primitives

        if not config.meta_oracle:
            self._build()
예제 #2
0
    def __init__(self, name, env, ob_env_name, config):
        # args
        self.name = name

        # training
        self._hid_size = config.rl_hid_size
        self._num_hid_layers = config.rl_num_hid_layers
        self._gaussian_fixed_var = config.rl_fixed_var
        self._activation = ops.activation(config.rl_activation)
        self._include_acc = config.primitive_include_acc

        # properties
        primitive_env = make_env(ob_env_name)
        self._ob_shape = primitive_env.ob_shape
        self.ob_type = sorted(primitive_env.ob_type)
        if not self._include_acc and 'acc' in self.ob_type:
            self._ob_shape.pop('acc')
            self.ob_type.remove('acc')

        self._env = env
        self._ob_space = np.sum(
            [np.prod(ob) for ob in self._ob_shape.values()])
        self._ac_space = primitive_env.action_space
        primitive_env.close()

        with tf.variable_scope(self.name):
            self._scope = tf.get_variable_scope().name
            self._build()
예제 #3
0
    def __init__(self,
                 name,
                 env,
                 ob_env_name,
                 num_primitives,
                 trans_term_activation='softmax',
                 config=None):
        # configs
        self.term_activation = trans_term_activation
        self._config = config

        # args
        self.name = name
        self.env_name = self.name.split('.')[0]

        # training
        self._hid_size = config.trans_hid_size
        self._num_hid_layers = config.trans_num_hid_layers
        self._gaussian_fixed_var = config.trans_fixed_var
        self._activation = ops.activation(config.trans_activation)
        self._include_acc = config.trans_include_acc

        # properties
        primitive_env = make_env(ob_env_name, config)
        self._ob_shape = primitive_env.ob_shape
        self.ob_type = sorted(primitive_env.ob_type)
        self.primitive_env = primitive_env

        if not self._include_acc and 'acc' in self.ob_type:
            self._ob_shape.pop('acc')
            self.ob_type.remove('acc')

        self._env = env
        self._ob_space = np.sum(
            [np.prod(ob) for ob in self._ob_shape.values()])
        self._ac_space = env.action_space
        self._num_primitives = num_primitives

        with tf.variable_scope(self.name):
            self._scope = tf.get_variable_scope().name
            self._build()
예제 #4
0
    def __init__(self, name, env, ob_env_name, config=None):
        # configs
        self._config = config

        # args
        self.name = name
        self.env_name = self.name.split('-')[0]

        # training
        self._hid_size = config.primitive_hid_size
        self._num_hid_layers = config.primitive_num_hid_layers
        self._gaussian_fixed_var = config.primitive_fixed_var
        self._activation = ops.activation(config.primitive_activation)
        self._include_acc = config.primitive_include_acc

        # properties
        self.ob_env_name = ob_env_name
        primitive_env = make_env(ob_env_name, config)
        self.hard_coded = primitive_env.hard_coded
        self._ob_shape = primitive_env.ob_shape
        self.ob_type = sorted(primitive_env.ob_type)

        if not self._include_acc and 'acc' in self.ob_type:
            self._ob_shape.pop('acc')
            self.ob_type.remove('acc')

        self._env = env
        self._ob_space = np.sum(
            [np.prod(ob) for ob in self._ob_shape.values()])
        self._ac_space = primitive_env.action_space

        if config.primitive_use_term:
            self.primitive_env = primitive_env
        else:
            primitive_env.close()

        if not self.hard_coded:
            with tf.variable_scope(self.name):
                self._scope = tf.get_variable_scope().name
                self._build()
예제 #5
0
    def __init__(self,
                 name,
                 path,
                 env,
                 ob_env_name,
                 is_train=True,
                 use_traj_portion_start=0.0,
                 use_traj_portion_end=1.0,
                 config=None):
        self._scope = 'proximity_predictor/' + name
        self.env_name = name.split('.')[0]
        self._config = config

        # make primitive env for observation
        self._env = make_env(ob_env_name, config)
        self._include_acc = config.proximity_include_acc
        self._ob_shape = self._env.unwrapped.ob_shape
        self.ob_type = sorted(self._env.unwrapped.ob_type)
        if not self._include_acc and 'acc' in self.ob_type:
            self._ob_shape.pop('acc')
            self.ob_type.remove('acc')

        self.obs_norm = config.proximity_obs_norm
        self.observation_shape = np.sum(
            [np.prod(ob) for ob in self._ob_shape.values()])

        # replay buffers
        self.fail_buffer = Replay(max_size=config.proximity_replay_size,
                                  name='fail_buffer')
        self.success_buffer = Replay(max_size=config.proximity_replay_size,
                                     name='success_buffer')

        # build the architecture
        self._num_hidden_layer = config.proximity_num_hid_layers
        self._hidden_size = config.proximity_hid_size
        self._activation_fn = activation(config.proximity_activation_fn)
        self._build_ph()

        logger.info('===== Proximity_predictor for {} ====='.format(
            self._scope))
        # load collected states
        if is_train or config.evaluate_proximity_predictor:
            state_file_path = osp.join(config.primitive_dir,
                                       path.split('/')[0], 'state')
            logger.info('Search state files from: {}'.format(
                config.primitive_dir))
            state_file_list = glob.glob(osp.join(state_file_path, '*.hdf5'))
            logger.info('Candidate state files: {}'.format(' '.join(
                [f.split('/')[-1] for f in state_file_list])))
            state_file = {}
            try:
                logger.info('Use state files: {}'.format(
                    state_file_list[0].split('/')[-1]))
                state_file = h5py.File(state_file_list[0], 'r')
            except:
                logger.warn(
                    "No collected state hdf5 file is located at {}".format(
                        state_file_path))
            logger.info('Use traj portion: {} to {}'.format(
                use_traj_portion_start, use_traj_portion_end))

            if self._config.proximity_keep_collected_obs:
                add_obs = self.success_buffer.add_collected_obs
            else:
                add_obs = self.success_buffer.add

            for k in list(state_file.keys()):
                traj_state = state_file[k]['obs'].value
                start_idx = int(traj_state.shape[0] * use_traj_portion_start)
                end_idx = int(traj_state.shape[0] * use_traj_portion_end)
                try:
                    if state_file[k]['success'].value == 1:
                        traj_state = traj_state[start_idx:end_idx]
                    else:
                        continue
                except:
                    traj_state = traj_state[start_idx:end_idx]
                for t in range(traj_state.shape[0]):
                    ob = traj_state[t][:self.observation_shape]
                    # [ob, label]
                    add_obs(np.concatenate((ob, [1.0]), axis=0))

            # shape [num_state, dim_state]
            logger.info('Size of collected state: {}'.format(
                self.success_buffer.size()))
            logger.info('Average of collected state: {}'.format(
                np.mean(self.success_buffer.list(), axis=0)))

        # build graph
        fail_logits, fail_target_value, success_logits, success_target_value = \
            self._build_graph(self.fail_obs_ph, self.success_obs_ph, reuse=False)

        # compute prob
        fake_prob = tf.reduce_mean(fail_logits)  # should go to 0
        real_prob = tf.reduce_mean(success_logits)  # should go to 1

        # compute loss
        if config.proximity_loss_type == 'lsgan':
            self.fake_loss = tf.reduce_mean(
                (fail_logits - fail_target_value)**2)
            self.real_loss = tf.reduce_mean(
                (success_logits - success_target_value)**2)
        elif config.proximity_loss_type == 'wgan':
            self.fake_loss = tf.reduce_mean(
                tf.abs(fail_logits - fail_target_value))
            self.real_loss = tf.reduce_mean(
                tf.abs(success_logits - success_target_value))

        # loss + accuracy terms
        self.total_loss = self.fake_loss + self.real_loss
        self.losses = {
            "fake_loss": self.fake_loss,
            "real_loss": self.real_loss,
            "fake_prob": fake_prob,
            "real_prob": real_prob,
            "total_loss": self.total_loss
        }

        # predict proximity
        self._proximity_op = tf.clip_by_value(success_logits, 0, 1)[:, 0]