def unit_test_reset_belief( pomdp ):
    final_belief = tbh_pomdp_util.reset_belief( pomdp )
    pomdp_dict = { 'belief_for_policy': final_belief, 
                   'final_belief': final_belief,                   
                   'new_machine_action': None,
                   'reset_belief': True }

    return pomdp_dict
Beispiel #2
0
def pomdp_choose_action( pomdp, Q, belief_for_policy, action_code_list ):
  
    new_machine_action_number = tbh_pomdp_util.solve_qmdp( belief_for_policy, pomdp, Q ) 
    new_machine_action = action_code_list[new_machine_action_number]
    
    #print "ACTION NO., ACTION", new_machine_action_number, new_machine_action
 
    # Determine if we need to reset the policy
    reset_belief = determine_reset_belief( new_machine_action )

    if reset_belief:        
        final_belief = tbh_pomdp_util.reset_belief( pomdp )
    else:
        final_belief = belief_for_policy


    pomdp_dict = { 'belief_for_policy': belief_for_policy, 
                   'final_belief': final_belief,                   
                   'new_machine_action': new_machine_action,
                   'reset_belief': reset_belief }

    return pomdp_dict
    def process_nbest_handler( self , param_dict ):
        
        # ----- COMMON TO ALL DMs ----- #                        
        # Fill in action parts away from defaults: 
        # Using confidence scores 
        feature_set_list = []

        # if we want all 10 hypotheses:
        # use this for computing confidence scores

        ### custom for this experiment ###
        ### for computing the confidence score
        action_code_list = []
        for index, nbest_item in enumerate( param_dict['nbest_list'] ):
            feature_set = \
                tbh_compute_text_feature_set.compute_feature_set( \
                [ nbest_item ], self.feature_type_list )
            action_code = self.determine_action_code( feature_set )
            action_code_list.append( action_code )

        if len( action_code_list ) > 0:
            top_action_code = action_code_list[0]
            confidence_score = 0
            for code in action_code_list:
                if code == top_action_code:
                    confidence_score += 0.1
            confidence_score = confidence_score / float( len( action_code_list ) ) * 10 - 0.03
        else:
            confidence_score = 0

        print "CONFIDENCE SCORE:", confidence_score

        ### end of computing confidence score



        #### LOAD ENTRIES INTO PARAM_DICT
        param_dict['confidence_score'] = confidence_score
        feature_set = \
            tbh_compute_text_feature_set.compute_feature_set( \
            param_dict['nbest_list'] , self.feature_type_list )
        # add feature set to param_dict for storage and other purposes
        param_dict[ 'feature_set' ] = feature_set


        # for the pomdp
        # Search over the features to determine the right action

        # --- CUSTOM TO YOUR DM --- #
        # Adjust things based on this action dictionary and send to
        # everyone who might want to use it

        # ----- COMMON TO ALL DMs ----- #
        # Send the action_dict to everyone that might need it: common
        # to all dialog managers; you probably don't want to change
        # this part of the code.

        #if action_dict['skype_manager_phone_action'] is not None:
        #    self._process_phone_event( action_dict['skype_manager_phone_action'] )


        if self.current_experiment_log.experiment_name[0] == "POMDP":
            action_dict = self.determine_action_confidence( param_dict )
            self.machine_action = action_dict['pomdp_dict']['new_machine_action']
            self.belief_for_policy = action_dict['pomdp_dict']['belief_for_policy']

            print "\n================="
            print "CURRENT POLICY"
            print self.belief_for_policy
            print "================="

            #print "==========BELIEF=========="
            #print self.belief_for_policy
            #print "=========================="

            # Update the belief as appropriate
            self.pomdp.belief = action_dict['pomdp_dict']['final_belief']

            current_turn = tbh_experiment_controller.experiment_turn( \
                time = time.time(), \
                    utterance_file = None, \
                    param_dict = param_dict, \
                    utterance_hypothesis = param_dict['nbest_list'][0]['text'],
                    hypothesis_code = \
                    self.determine_action_code( param_dict['feature_set'] ), \
                    confidence_score = confidence_score, \
                    system_response = \
                    action_dict['pomdp_dict']['new_machine_action'] )

        elif self.current_experiment_log.experiment_name[0] == "THRESHOLD":
            threshold = 0.75
            action_dict = self.determine_action_threshold( param_dict, threshold )
            self.belief_for_policy = tbh_pomdp_util.reset_belief( self.pomdp )
            self.pomdp.belief = tbh_pomdp_util.reset_belief( self.pomdp )

            if confidence_score < threshold:
                self.machine_action = action_dict['pomdp_dict']['new_machine_action']

                current_turn = tbh_experiment_controller.experiment_turn( \
                    time = time.time(), \
                        utterance_file = None, \
                        param_dict = param_dict, \
                        utterance_hypothesis = param_dict['nbest_list'][0]['text'],
                        hypothesis_code = \
                        self.determine_action_code( param_dict['feature_set'] ), \
                        confidence_score = confidence_score, \
                        system_response = \
                        action_dict['pomdp_dict']['new_machine_action'] )

            else:
                self.machine_action = action_dict['pomdp_dict']['new_machine_action']

                current_turn = tbh_experiment_controller.experiment_turn( \
                    time = time.time(), \
                        utterance_file = None, \
                        param_dict = param_dict, \
                        utterance_hypothesis = param_dict['nbest_list'][0]['text'],
                        hypothesis_code = \
                        self.determine_action_code( param_dict['feature_set'] ), \
                        confidence_score = confidence_score, \
                        system_response = \
                        self.determine_action_code( param_dict['feature_set'] ) )

        else:
            raise NameError( 'neither option as name of experiment' )


        self.current_experiment_log.turn_list.append( current_turn )

        # print self.current_experiment_log.turn_list

        # add this to the param_dict
        param_dict[ 'action_dict' ] = action_dict      
        
        if len( self.current_experiment_log.turn_list ) > 0:
            for turn in self.current_experiment_log.turn_list:
                print turn.time, turn.utterance_file, turn.utterance_hypothesis, turn.hypothesis_code, turn.confidence_score, turn.system_response

        self._processed_nbest_event( param_dict )