Esempio n. 1
0
def test_gp_sarsa_funcs() :
    knowledge = Knowledge()
    b = HISBeliefState(knowledge)
    print '--------------------------------------'    
    print str(b)
    print '--------------------------------------'    
    p = PomdpGpSarsaPolicy(knowledge)
    s = SummaryState(b)
    a = p.get_initial_action(s)
    print '--------------------------------------'    
    print str(a)
    print '--------------------------------------'    

    if a == 'confirm_action' : 
        m = SystemAction('confirm_action', 'searchroom', {'patient':'ray', 'location':'3512'})
        u = Utterance('affirm', None, None)
    elif a == 'request_missing_param' :
        m = SystemAction('request_missing_param', 'searchroom')
        u = Utterance('inform_full', 'searchroom', {'patient':'ray', 'location':'3512'})
    else :
        m = SystemAction('repeat_goal')    
        u = Utterance('inform_full', 'searchroom', {'patient':'ray', 'location':'3512'})
    
    print '--------------------------------------'    
    print str(m)
    print str(u)
    print '--------------------------------------'    
    
    b.update(m, [u])
    print '--------------------------------------'    
    print str(b)
    print '--------------------------------------'    
    s = SummaryState(b)
    a = p.get_next_action(-1, s)
    print '--------------------------------------'    
    print str(a)
    print '--------------------------------------'    
    
    p.update_final_reward(10)
    
    b = HISBeliefState(knowledge)
    print '--------------------------------------'    
    print str(b)
    print '--------------------------------------'    
    s = SummaryState(b)
    a = p.get_initial_action(s)
    print '--------------------------------------'    
    print str(a)
    print '--------------------------------------'    
Esempio n. 2
0
    def __init__(self,
                 parser,
                 grounder,
                 policy,
                 u_input,
                 output,
                 parse_depth=10):
        self.parser = parser
        self.grounder = grounder
        self.parse_depth = parse_depth
        self.state = None
        self.policy = policy
        self.input = u_input
        self.output = output

        self.knowledge = Knowledge()
        self.belief_state = HISBeliefState(self.knowledge)
        self.previous_system_action = SystemAction('repeat_goal')
        self.n_best_utterances = None

        # define action names and functions they correspond to
        self.dialog_actions = self.knowledge.system_dialog_actions
        self.dialog_action_functions = [
            self.request_user_initiative, self.confirm_action,
            self.request_missing_param
        ]
    def confirm_action(self, args):
        a = args[0]

        self.previous_system_action = SystemAction('confirm_action', a.name)
        self.previous_system_action.referring_params = dict()
        goal = a.name
        if goal in self.knowledge.param_order:
            param_order = self.knowledge.param_order[goal]
            for (idx, param_name) in enumerate(param_order):
                if len(a.params) > idx:
                    self.previous_system_action.referring_params[
                        param_name] = a.params[idx]

        response = self.response_generator.get_sentence(
            self.previous_system_action)
        self.output.say(response)
        c = self.get_user_input()
        self.cur_turn_log = [self.previous_system_action, c]
        self.dialog_objects_log.append(self.cur_turn_log)

        if c.lower().strip() == 'stop':
            self.dialogue_stopped = True
            return

        self.update_state(c)
    def request_missing_param(self, args):
        idx = args[0]
        # Using a bad approximation to generate an understandable prompt
        max_belief_action = None
        for goal in self.state.user_action_belief:
            if max_belief_action is None or self.state.user_action_belief[
                    max_belief_action] < self.state.user_action_belief[goal]:
                max_belief_action = goal

        self.previous_system_action = SystemAction('request_missing_param',
                                                   max_belief_action)
        param_order = self.knowledge.param_order[max_belief_action]
        self.previous_system_action.extra_data = [param_order[idx]]
        print 'self.previous_system_action.extra_data = ', self.previous_system_action.extra_data

        response = self.response_generator.get_sentence(
            self.previous_system_action)
        self.output.say(response)
        u = self.get_user_input()
        self.cur_turn_log = [self.previous_system_action, u]
        self.dialog_objects_log.append(self.cur_turn_log)

        if u.lower().strip() == 'stop':
            self.dialogue_stopped = True
            return

        theme = self.previous_system_action.extra_data[0]
        if theme in self.parser_train_data:
            self.parser_train_data[theme].append(u)
        else:
            self.parser_train_data[theme] = [u]

        self.update_state(u)
Esempio n. 5
0
    def initiate_dialog_to_get_action(self, u):
        self.state = StaticDialogState.StaticDialogState()
        self.update_state_from_user_initiative(u)

        # select next action from state
        action = None
        while action is None:
            self.belief_state.increment_dialog_turns()
            print "dialog state: " + str(self.state)  # DEBUG
            action = self.policy.resolve_state_to_action(self.state)

            print 'Belief state: ', str(self.belief_state)

            # if the state does not clearly define a user goal, take a dialog action to move towards it
            if action is None:
                dialog_action, dialog_action_args, user_goal_object = self.policy.select_dialog_action(
                    self.state)
                if dialog_action not in self.dialog_actions:
                    sys.exit("ERROR: unrecognized dialog action '" +
                             dialog_action +
                             "' returned by policy for state " +
                             str(self.state))
                self.state.previous_action = [
                    dialog_action, dialog_action_args
                ]

                # Create an object encapsulating the system action you
                # are about to perform
                # dialog_action - Name/type of system action
                self.previous_system_action = SystemAction(dialog_action)
                print '\nCreated system action\n'
                # The third value returned by self.policy.select_dialog_action()
                # is an Action object which encapsulates what the user wants
                # to be done. Action.name is the goal and Action.params has
                # the list of parameters
                if user_goal_object is not None and user_goal_object.name is not None:
                    self.previous_system_action.referring_goal = user_goal_object.name
                num_known_args = 0
                if dialog_action is not None and user_goal_object is not None and user_goal_object.name is not None and user_goal_object.params is not None:
                    num_known_args = len([
                        arg for arg in user_goal_object.params
                        if arg is not None
                    ])
                if num_known_args > 0:
                    system_action_params = dict()
                    for (idx, param_val) in enumerate(user_goal_object.params):
                        if param_val is not None:
                            param_name = self.knowledge.param_order[
                                user_goal_object.name][idx]
                            system_action_params[param_name] = param_val
                    self.previous_system_action.referring_params = system_action_params

                # Take the appropriate dialog action
                self.dialog_action_functions[self.dialog_actions.index(
                    dialog_action)](dialog_action_args)
        return action
    def initEnvironment(self):
        Logger.log("Resetting Environment...", "warning")
        self.resetEnvironment()
        Logger.log("Initing Environment...", "warning")
        ##### COULD PASS THE FLAG INTO THE CONTAINER AS A PARAM
        splitPorts = self.ports.split()
        imageNames = ["one", "two", "three"]
        sshPorts = ["2222", "2223", "2224"]
        flags = []
        print splitPorts
        #create docker containers for game peices
        if (self.mode == "docker"):
            #Runner.Run(["bash","./buildContainers.sh", str( " ".join(splitPorts) )] )
            print("------going into buildCons " + str(self.ports) + " " +
                  str(self.units))
            os.system("bash ./buildContainers.sh " + str(self.ports) + " " +
                      str(self.units))
            for idx, port in enumerate(splitPorts):
                os.system("touch ./SystemAction/ports")
                Runner.Run([
                    "docker", "run", "-d",
                    "--cidfile=./units/" + imageNames[idx] + ".id", "-p",
                    port + ":" + port, "hc/" + imageNames[idx]
                ])
                os.system("echo " + str(port) + " > ./SystemAction/port_" +
                          str(port))
                # there is an issue with running a copy command upon running container
                # will look into other solutions
        elif (self.mode == "vagrant"):
            Runner.Run(["pwd"])
            #Runner.Run(["bash","./buildVMs.sh"])
            os.system("bash ./buildVMs.sh " + str(self.ports) + " " +
                      str(self.units))
            for idx, port in enumerate(splitPorts):
                Logger.log("Vagrant: per port - " + port, "okblue")

                os.system("touch ./SystemAction/ports")
                getHostPortFromVagrant = "vagrant port $(vagrant global-status | grep virtualbox | awk '{print $1}' | sed -n " + str(
                    idx + 1) + "p) | tail -n1 | awk '{print $4}'"
                Logger.log(getHostPortFromVagrant, "okblue")
                os.system(
                    str(getHostPortFromVagrant) + " > ./SystemAction/port_" +
                    str(port))
                # there is an issue with running a copy command upon running container
                # will look into other solutions
                #
                #
        SystemAction.ActionsForSetup(imageNames, self.mode, splitPorts)
 def request_user_initiative(self, args):
     print 'In request_user_initiative'
     self.previous_system_action = SystemAction('repeat_goal')
     output = self.response_generator.get_sentence(
         self.previous_system_action)
     self.output.say(output)
     u = self.get_user_input()
     self.cur_turn_log = [self.previous_system_action, u]
     self.dialog_objects_log.append(self.cur_turn_log)
     if u.lower().strip() == 'stop':
         self.dialogue_stopped = True
         return
     if 'full' in self.parser_train_data:
         self.parser_train_data['full'].append(u)
     else:
         self.parser_train_data['full'] = [u]
     self.update_state(u)
Esempio n. 8
0
def test_gp_sarsa_resolving_summary_action() :
    knowledge = Knowledge()
    b = HISBeliefState(knowledge)
    m = SystemAction('repeat_goal')    
    u = Utterance('inform_full', 'searchroom', {'patient':'ray', 'location':'3512'})
    b.update(m, [u])
    print str(b)
    s = SummaryState(b)
    p = PomdpGpSarsaPolicy(knowledge)
    for a in ['repeat_goal', 'request_missing_param', 'confirm_action', 'take_action'] :
        print 'a = ', a
        l = p.get_system_action_requirements(a, s)
        if l is None :
            print 'None'
        else :
            for e in l :
                print str(e)
        print '------------------------'
    def __init__(self,
                 parser,
                 grounder,
                 policy,
                 u_input,
                 output,
                 parse_depth=10):
        DialogAgent.__init__(self, parser, grounder, policy, u_input, output,
                             parse_depth)
        self.policy = policy

        # To convert system actions to understandable English
        self.response_generator = TemplateBasedGenerator()
        # For typechecking information
        self.knowledge = Knowledge()

        self.state = HISBeliefState(self.knowledge)
        self.previous_system_action = SystemAction('repeat_goal')
        self.n_best_utterances = None

        self.first_turn = True

        # To store data for retraining the parser
        self.retrain_parser = False  # Retrain parser at the end of
        # every dialogue
        self.parser_train_data = None
        self.max_prob_user_utterances = None
        self.max_parses_examined_in_retraining = 10

        # Logging for future use
        self.final_action_log = None
        self.lexical_addition_log = None
        # Pickle and store system action and user response at each turn
        # with some other information useful for retraining
        self.dialog_objects_logfile = None
        self.dialog_objects_log = None
        self.cur_turn_log = None
        self.log_header = 'pomdp'
Esempio n. 10
0
    def get_system_action_requirements(self, action_type, state):
        if action_type == 'repeat_goal':
            return SystemAction(action_type)
        elif action_type == 'take_action':
            return self.resolve_state_to_goal(state)

        elif action_type == 'confirm_action':
            if state.top_hypothesis is None or state.top_hypothesis[0] is None:
                # No hypotheses to verify. Confirm something random
                goal_idx = int(
                    np.random.uniform(0, len(state.knowledge.goal_actions)))
                return SystemAction(action_type,
                                    state.knowledge.goal_actions[goal_idx])

            goal = None
            if len(state.top_hypothesis[0].possible_goals) > 0:
                goal_idx = int(
                    np.random.uniform(
                        0, len(state.top_hypothesis[0].possible_goals)))
                goal = state.top_hypothesis[0].possible_goals[goal_idx]
            else:
                # Top hypothesis has no goals, verify any random goal
                goal_idx = int(
                    np.random.uniform(0, len(self.knowledge.possible_goals)))
                goal = state.top_hypothesis[0].possible_goals[goal_idx]

            system_action = SystemAction(action_type, goal)
            param_order = state.knowledge.param_order[goal]
            params = dict()
            partition_params = state.top_hypothesis[0].possible_param_values
            if partition_params is None:
                return system_action
            for param_name in param_order:
                if param_name in partition_params and len(
                        partition_params[param_name]) == 1:
                    # Only confirm params if the hypothesis thinks there is only one
                    # possible value for it
                    params[param_name] = partition_params[param_name][0]
                #else :
                #print 'Uncertain about ', param_name, ' : ', partition_params[param_name]
            system_action.referring_params = params
            return system_action

        elif action_type == 'request_missing_param':
            if state.top_hypothesis is None or state.top_hypothesis[0] is None:
                # No hypotheses. Anything can be asked
                goal_idx = int(
                    np.random.uniform(0, len(state.knowledge.goal_actions)))
                goal = state.knowledge.goal_actions[goal_idx]
                system_action = SystemAction(action_type, goal)
                param_idx = int(
                    np.random.uniform(0,
                                      len(state.knowledge.param_order[goal])))
                system_action.extra_data = [
                    state.knowledge.param_order[goal][param_idx]
                ]
                return system_action

            goal = state.top_hypothesis[0].possible_goals[0]
            system_action = SystemAction(action_type, goal)
            param_order = state.knowledge.param_order[goal]
            params = dict()
            uncertain_params = list()
            partition_params = state.top_hypothesis[0].possible_param_values
            if partition_params is None:
                return system_action
            for param_name in param_order:
                if param_name not in partition_params or len(
                        partition_params[param_name]) != 1:
                    uncertain_params.append(param_name)
                else:
                    params[param_name] = partition_params[param_name][0]
            system_action.referring_params = params
            if len(uncertain_params) > 0:
                # If the top partition is uncertain about a param, confirm it
                param_idx = int(np.random.uniform(0, len(uncertain_params)))
                system_action.extra_data = [uncertain_params[param_idx]]
                return system_action
            else:
                # The top hypothesis partition doesn't have uncertain
                # params but it is possible it is not of high enough
                # confidence

                # If there is no second hypothesis, just confirm any value
                # This param si chosen at random so that you don't get
                # stuck in a loop here
                if state.second_hypothesis is None or state.second_hypothesis[
                        0].possible_param_values is None:
                    param_idx = int(np.random.uniform(0, len(param_order)))
                    system_action.extra_data = [param_order[param_idx]]
                    return system_action

                # A good heuristic is to see in what params the first
                # and second hypotheses differ. Any one of these is
                # likely to help.
                second_params = state.second_hypothesis[
                    0].possible_param_values
                for param_name in param_order:
                    top_param_value = partition_params[param_name][0]
                    if param_name not in second_params or top_param_value not in second_params[
                            param_name] or len(second_params[param_name]) == 1:
                        # This is not a useful param to compare
                        pass
                    else:
                        system_action.extra_data = [param_name]
                        return system_action

                # If you reached here, this is probably an inappropriate
                # action so just verify a random param.
                param_idx = int(np.random.uniform(0, len(param_order)))
                system_action.extra_data = [param_order[param_idx]]
                return system_action

            return system_action
Esempio n. 11
0
 def train_policy_from_single_old_log(self, conv_log_name, dialogue_successful=True) :
     conv_log = open(conv_log_name, 'r')
     self.max_prob_user_utterances = list()
     self.parser_train_data = dict()
     self.state = HISBeliefState(self.knowledge)
     for line in conv_log :
         parts = line.split('\t')
         print '---------------------------------------' # DEBUG
         print 'parts = ', parts # DEBUG
         print '---------------------------------------' # DEBUG
         if parts[0] == 'USER' : 
             if self.previous_system_action is not None :
                 try :
                     prev_state = SummaryState(self.state)
                     response = parts[1]
                     response = self.map_text_to_desired_vocab(response)
                     print 'Updating state for response :', response # DEBUG
                     self.update_state(response)
                     next_state = SummaryState(self.state)
                     print '---------------------------------------' # DEBUG
                     print '---------------------------------------' # DEBUG
                     print 'prev_state : ', str(prev_state)  # DEBUG
                     print '---------------------------------------'
                     print 'action : ', str(self.previous_system_action) # DEBUG
                     print '---------------------------------------' # DEBUG
                     print 'next_state : ', str(next_state)  # DEBUG
                     print '---------------------------------------' # DEBUG
                     self.policy.train(prev_state, self.previous_system_action.action_type, next_state, self.knowledge.per_turn_reward)
                     self.previous_system_action = None
                 except :
                     self.num_not_parsed += 1
                     print "Exception in user code:" # DEBUG
                     print '-'*60    # DEBUG
                     traceback.print_exc(file=sys.stdout)    # DEBUG
                     print '-'*60    # DEBUG
         else :
             if len(parts) != 6 :
                 continue
             # Create system action and set as prev system action
             if parts[4] == 'user_initiative' :
                 self.previous_system_action = SystemAction('repeat_goal')
             elif parts[4] == 'system_initiative' :
                 # Disambiguate between confirm and request missing param
                 details = ast.literal_eval(parts[3])
                 goal = None
                 params = dict()
                 if details[0] == 'at' :
                     goal = 'at'
                     if details[2] is not None :
                         params['location'] = self.get_mapped_value(details[2].strip())
                 elif details[0] == 'served' :
                     goal = 'bring'
                     if details[1] is not None :
                         params['patient'] = self.get_mapped_value(details[1].strip())
                     if details[2] is not None :
                         params['recipient'] = self.get_mapped_value(details[2].strip())
                 target = parts[5].strip()
                 details_header = ['action', 'patient', 'recipient']
                 if target in details_header :
                     if details[details_header.index(target)] is None :
                         action_type = 'request_missing_param'
                     else :
                         action_type = 'confirm_action'
                     self.previous_system_action = SystemAction(action_type, goal, params)
                 else :
                     self.previous_system_action = None
             else :
                 # Not useful dialogue steps. Mostly robot politeness
                 self.previous_system_action = None
             print '---------------------------------------' # DEBUG
             print 'action : ', str(self.previous_system_action) # DEBUG
             print '---------------------------------------' # DEBUG
     
     # Need to feed it with terminal reward
     prev_state = SummaryState(self.state)
     if dialogue_successful :
         self.policy.train(prev_state, 'take_action', None, self.knowledge.correct_action_reward)
     else :
         self.policy.train(prev_state, 'take_action', None, self.knowledge.wrong_action_reward)
    def initiate_dialog_to_get_action(self, u):
        self.dialogue_stopped = False
        self.parser_train_data = dict()
        self.parser_train_data['full'] = [u]

        self.state = UtteranceBasedStaticDialogState()

        self.cur_turn_log = [self.state, 'repeat_goal', u]
        self.dialog_objects_log = [self.cur_turn_log]
        self.previous_system_action = SystemAction('repeat_goal')
        self.update_state(u)

        # select next action from state
        action = None
        while action is None:
            self.cur_turn_log = []
            print "dialog state: " + str(self.state)  # DEBUG
            action = self.policy.resolve_state_to_action(self.state)

            # if the state does not clearly define a user goal, take a dialog action to move towards it
            if action is None:
                dialog_action, dialog_action_args = self.policy.select_dialog_action(
                    self.state)
                self.cur_turn_log.append(str(dialog_action))
                if dialog_action not in self.dialog_actions:
                    sys.exit("ERROR: unrecognized dialog action '" +
                             dialog_action +
                             "' returned by policy for state " +
                             str(self.state))
                self.state.previous_action = [
                    dialog_action, dialog_action_args
                ]
                self.dialog_action_functions[self.dialog_actions.index(
                    dialog_action)](dialog_action_args)
                self.dialog_objects_log.append(self.cur_turn_log)
                if self.dialogue_stopped:
                    print 'Dialogue stopped'
                    complete_log_object = (self.log_header,
                                           self.dialog_objects_log, None,
                                           False, self.parser_train_data)
                    if self.dialog_objects_logfile is not None:
                        save_obj_general(complete_log_object,
                                         self.dialog_objects_logfile)
                    return None

        success = False
        if action is not None:
            self.output.say(
                self.response_generator.get_action_sentence(
                    action, self.final_action_log) +
                ' Was this the right action? ')
            response = self.get_user_input()
            if response == '<ERROR/>' or response.lower(
            ) == 'y' or response.lower() == 'yes':
                success = True

        complete_log_object = (self.log_header, self.dialog_objects_log,
                               action, success, self.parser_train_data)
        if self.dialog_objects_logfile is not None:
            save_obj_general(complete_log_object, self.dialog_objects_logfile)

        if action is not None and success and self.retrain_parser:
            self.train_parser_from_dialogue(action)

        return action