def __compute_reward(self, action, preact_interp_prd_logits, episode_step_count, action_shape): interp_action = layers.Interp(x=action, shape=self.inputs_shape_spatial, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) interp_action_prd = tf.stop_gradient(interp_action * tf.expand_dims( (tf.argmax(preact_interp_prd_logits, axis=-1) + 1), axis=-1 ) ) certain_region_mask = x=tf.to_float(tf.not_equal(action, 0)) uncertain_region_mask = x=tf.to_float(tf.equal(action, 0)) uncertain_region_sum = uncertain_region_mask pos_mask = x=tf.to_float(tf.equal(interp_action_prd, self.ground_truth + 1)) neg_mask = 1.0 - pos_mask pos_mask = layers.Interp(x=pos_mask, shape=action_shape) neg_mask = layers.Interp(x=neg_mask, shape=action_shape) unignored_mask = layers.Interp(x=tf.to_float(tf.not_equal(self.ground_truth, 255)), shape=action_shape, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) pos_certreg_sum = pos_mask neg_certreg_sum = unignored_mask * certain_region_mask * neg_mask reward_dict_idx = episode_step_count if episode_step_count < (len(self.reward_dict)-1) else len(self.reward_dict)-1 #reward = 1.0 * pos_certreg_sum - 1.0 * neg_certreg_sum - 0.1 * uncertain_region_sum reward = pos_certreg_sum - neg_certreg_sum + self.reward_dict[reward_dict_idx] * uncertain_region_sum return tf.stop_gradient(tf.squeeze(reward, axis=-1))
def __inference_once(self): """ Predict segementation for an image. Arguments: img: must be rowsxcolsx3 """ state_value, _policys = layers.pspnet_top_plc(inputs=self.features_priors, dynamic_envs=self.dynamic_envs, inputs_shape_spatial=self.inputs_shape_spatial, is_training=self.is_training) _preact_prd_logits = layers.pspnet_top_cls(inputs=self.features_priors, dynamic_envs=self.dynamic_envs, nb_classes=self.nb_classes, activation='softmax', is_training=self.is_training) _preact_policy, policy, preact_interp_policy, interp_policy = _policys preact_interp_prd_logits = layers.Interp(x=_preact_prd_logits, shape=self.inputs_shape_spatial) action = self.__gen_action(policy) NS_action = tf.expand_dims(tf.argmax(policy, axis=-1), axis=-1) NS_action_interp_smooth = tf.expand_dims( tf.argmax( layers.Interp(x=policy, shape=self.inputs_shape_spatial, method=tf.image.ResizeMethod.BILINEAR), axis=-1), axis=-1) _action_prd = tf.stop_gradient(action * tf.expand_dims( (tf.argmax(_preact_prd_logits, axis=-1) + 1), axis=-1 ) ) NS_action_prd = tf.stop_gradient(NS_action * tf.expand_dims( (tf.argmax(_preact_prd_logits, axis=-1) + 1), axis=-1 ) ) self.dynamic_envs = self.__new_envs(_action_prd) return [state_value, policy, preact_interp_policy, interp_policy, action, NS_action, preact_interp_prd_logits, NS_action_interp_smooth]
def __new_envs_soft(self, prd_softmax): return tf.stop_gradient( tf.to_float( layers.Interp(x=prd_softmax, shape=self.dynamic_envs_shape_spatial, method=tf.image.ResizeMethod.BILINEAR)))
def __new_knowledges(self, action_prd): return tf.stop_gradient( tf.to_float( tf.one_hot(indices=tf.squeeze(layers.Interp( x=action_prd, shape=self.knowledges_shape_spatial, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR), axis=-1), depth=self.total_classes)))
def __new_envs(self, prd): return tf.stop_gradient( tf.to_float( tf.one_hot(indices=tf.squeeze(layers.Interp( x=prd, shape=self.dynamic_envs_shape_spatial, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR), axis=-1), depth=self.nb_classes)))
def __inference_once(self, step): """ Predict segementation for an image. Arguments: img: must be rowsxcolsx3 """ ''' state_value, _policys = self._get_stval_plc() _preact_prd_logits = self._get_preact_prd_logits() _preact_policy, policy, preact_interp_policy, interp_policy = _policys preact_interp_prd_logits = layers.Interp(x=_preact_prd_logits, shape=self.original_inputs_shape_spatial) action = self.__gen_action(policy) #input softmax policy to explore more. NS_action = tf.expand_dims(tf.argmax(policy, axis=-1), axis=-1) NS_action_interp_smooth = tf.expand_dims( tf.argmax( layers.Interp(x=policy, shape=self.original_inputs_shape_spatial, method=tf.image.ResizeMethod.BILINEAR), axis=-1), axis=-1) _action_prd = tf.stop_gradient(action * tf.expand_dims( (tf.argmax(_preact_prd_logits, axis=-1) + 1), axis=-1 ) ) NS_action_prd = tf.stop_gradient(NS_action * tf.expand_dims( (tf.argmax(_preact_prd_logits, axis=-1) + 1), axis=-1 ) ) self.dynamic_envs = self.__new_envs(_action_prd) ''' _preact_prd_logits = self._get_preact_prd_logits( is_knowledge_empty=True if step == 0 else False) preact_interp_prd_logits = layers.Interp( x=_preact_prd_logits, shape=self.original_inputs_shape_spatial) _preact_prd_logits_argmax = tf.expand_dims(tf.argmax( _preact_prd_logits, axis=-1), axis=-1) #_prd = tf.stop_gradient(_preact_prd_logits_argmax) #self.dynamic_envs = self.__new_envs(prd=_prd) self.dynamic_envs = self.__new_envs_soft( prd_softmax=tf.nn.softmax(_preact_prd_logits, axis=-1)) state_value, _policys = self._get_stval_plc() _preact_policy, policy, preact_interp_policy, interp_policy = _policys action = self.__gen_action( policy) # input softmax policy to explore more. NS_action = tf.expand_dims(tf.argmax(policy, axis=-1), axis=-1) NS_action_interp_smooth = tf.expand_dims(tf.argmax(layers.Interp( x=policy, shape=self.original_inputs_shape_spatial, method=tf.image.ResizeMethod.BILINEAR), axis=-1), axis=-1) _action_prd = tf.stop_gradient(action * (_preact_prd_logits_argmax + 1)) self.knowledges = self.__new_knowledges(_action_prd) element_wise_comp = tf.equal( _preact_prd_logits_argmax, layers.Interp( x=self.ground_truth, shape=_preact_prd_logits_argmax.shape.as_list()[1:-1], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)) return [ state_value, policy, preact_interp_policy, interp_policy, action, NS_action, preact_interp_prd_logits, NS_action_interp_smooth, tf.stop_gradient(element_wise_comp) ]