def unit_test_reset_belief( pomdp ): final_belief = tbh_pomdp_util.reset_belief( pomdp ) pomdp_dict = { 'belief_for_policy': final_belief, 'final_belief': final_belief, 'new_machine_action': None, 'reset_belief': True } return pomdp_dict
def pomdp_choose_action( pomdp, Q, belief_for_policy, action_code_list ): new_machine_action_number = tbh_pomdp_util.solve_qmdp( belief_for_policy, pomdp, Q ) new_machine_action = action_code_list[new_machine_action_number] #print "ACTION NO., ACTION", new_machine_action_number, new_machine_action # Determine if we need to reset the policy reset_belief = determine_reset_belief( new_machine_action ) if reset_belief: final_belief = tbh_pomdp_util.reset_belief( pomdp ) else: final_belief = belief_for_policy pomdp_dict = { 'belief_for_policy': belief_for_policy, 'final_belief': final_belief, 'new_machine_action': new_machine_action, 'reset_belief': reset_belief } return pomdp_dict
def process_nbest_handler( self , param_dict ): # ----- COMMON TO ALL DMs ----- # # Fill in action parts away from defaults: # Using confidence scores feature_set_list = [] # if we want all 10 hypotheses: # use this for computing confidence scores ### custom for this experiment ### ### for computing the confidence score action_code_list = [] for index, nbest_item in enumerate( param_dict['nbest_list'] ): feature_set = \ tbh_compute_text_feature_set.compute_feature_set( \ [ nbest_item ], self.feature_type_list ) action_code = self.determine_action_code( feature_set ) action_code_list.append( action_code ) if len( action_code_list ) > 0: top_action_code = action_code_list[0] confidence_score = 0 for code in action_code_list: if code == top_action_code: confidence_score += 0.1 confidence_score = confidence_score / float( len( action_code_list ) ) * 10 - 0.03 else: confidence_score = 0 print "CONFIDENCE SCORE:", confidence_score ### end of computing confidence score #### LOAD ENTRIES INTO PARAM_DICT param_dict['confidence_score'] = confidence_score feature_set = \ tbh_compute_text_feature_set.compute_feature_set( \ param_dict['nbest_list'] , self.feature_type_list ) # add feature set to param_dict for storage and other purposes param_dict[ 'feature_set' ] = feature_set # for the pomdp # Search over the features to determine the right action # --- CUSTOM TO YOUR DM --- # # Adjust things based on this action dictionary and send to # everyone who might want to use it # ----- COMMON TO ALL DMs ----- # # Send the action_dict to everyone that might need it: common # to all dialog managers; you probably don't want to change # this part of the code. #if action_dict['skype_manager_phone_action'] is not None: # self._process_phone_event( action_dict['skype_manager_phone_action'] ) if self.current_experiment_log.experiment_name[0] == "POMDP": action_dict = self.determine_action_confidence( param_dict ) self.machine_action = action_dict['pomdp_dict']['new_machine_action'] self.belief_for_policy = action_dict['pomdp_dict']['belief_for_policy'] print "\n=================" print "CURRENT POLICY" print self.belief_for_policy print "=================" #print "==========BELIEF==========" #print self.belief_for_policy #print "==========================" # Update the belief as appropriate self.pomdp.belief = action_dict['pomdp_dict']['final_belief'] current_turn = tbh_experiment_controller.experiment_turn( \ time = time.time(), \ utterance_file = None, \ param_dict = param_dict, \ utterance_hypothesis = param_dict['nbest_list'][0]['text'], hypothesis_code = \ self.determine_action_code( param_dict['feature_set'] ), \ confidence_score = confidence_score, \ system_response = \ action_dict['pomdp_dict']['new_machine_action'] ) elif self.current_experiment_log.experiment_name[0] == "THRESHOLD": threshold = 0.75 action_dict = self.determine_action_threshold( param_dict, threshold ) self.belief_for_policy = tbh_pomdp_util.reset_belief( self.pomdp ) self.pomdp.belief = tbh_pomdp_util.reset_belief( self.pomdp ) if confidence_score < threshold: self.machine_action = action_dict['pomdp_dict']['new_machine_action'] current_turn = tbh_experiment_controller.experiment_turn( \ time = time.time(), \ utterance_file = None, \ param_dict = param_dict, \ utterance_hypothesis = param_dict['nbest_list'][0]['text'], hypothesis_code = \ self.determine_action_code( param_dict['feature_set'] ), \ confidence_score = confidence_score, \ system_response = \ action_dict['pomdp_dict']['new_machine_action'] ) else: self.machine_action = action_dict['pomdp_dict']['new_machine_action'] current_turn = tbh_experiment_controller.experiment_turn( \ time = time.time(), \ utterance_file = None, \ param_dict = param_dict, \ utterance_hypothesis = param_dict['nbest_list'][0]['text'], hypothesis_code = \ self.determine_action_code( param_dict['feature_set'] ), \ confidence_score = confidence_score, \ system_response = \ self.determine_action_code( param_dict['feature_set'] ) ) else: raise NameError( 'neither option as name of experiment' ) self.current_experiment_log.turn_list.append( current_turn ) # print self.current_experiment_log.turn_list # add this to the param_dict param_dict[ 'action_dict' ] = action_dict if len( self.current_experiment_log.turn_list ) > 0: for turn in self.current_experiment_log.turn_list: print turn.time, turn.utterance_file, turn.utterance_hypothesis, turn.hypothesis_code, turn.confidence_score, turn.system_response self._processed_nbest_event( param_dict )