Ejemplo n.º 1
0
    def act_on(self, domainInControl, state):
        '''
        Provides the next system action based on the domain in control and the belief state.
        
        The belief state is mapped to an abstract representation which is used for all committee members.
        
        :param domainInControl: the domain unique identifier string of the domain in control
        :type domainInControl: str
        :param state: the belief state to act on
        :type state: :class:`~utils.DialogueState.DialogueState`
        :returns: the next system action
        '''
        # 1. check if domains are booted

        # 2. calculate domain Qs etc from each committee member

        # 3. calculate committee Q and select act

        belief = state.getDomainState(domainInControl)

        domainPolicies = self.manager.domainPolicies

        if not isinstance(domainPolicies[domainInControl], CommitteeMember):
            logger.error(
                "Committee member is not of type CommitteeMember: {}".format(
                    domainInControl))

        # 1. Abstract the belief state (blief) based on domainInControl:
        abstracted_state = domainPolicies[domainInControl].get_State(belief)

        # 2. Determine the domains non-executable actions and then abstract them:
        nonExecutableActions = domainPolicies[
            domainInControl].actions.getNonExecutable(
                belief, domainPolicies[domainInControl].lastSystemAction)
        nonExecutableActions = domainPolicies[
            domainInControl].abstract_actions(nonExecutableActions)

        # 3. Get the statistics needed for BCM decision:
        # 3.1 - Q(b,*) for all abstract executable actions for each committee member
        domainQs = {}
        just_return_bye = False
        for dstring in self.members:
            if domainPolicies[dstring] is None:
                self.manager.bootup(dstring)
            if isinstance(domainPolicies[dstring], CommitteeMember):
                #method exists as type must be GPPolicy or derived
                padded_state = domainPolicies[dstring].get_State(
                    abstracted_state)
                domain_QmeanVar = domainPolicies[
                    dstring].getMeanVar_for_executable_actions(
                        belief, padded_state, nonExecutableActions)
                if not isinstance(domain_QmeanVar, dict):
                    # have returned the bye action --
                    just_return_bye = True
                    summaryAct = domain_QmeanVar
                    break
                else:
                    domainQs[dstring] = domain_QmeanVar
            else:
                logger.warning(
                    'Skipping policy committee member %s as policy is not a GP'
                    % dstring)

        if not just_return_bye:
            # 3.2 - get domain priors over acts:
            priors = {}
            for abs_act in domainQs[domainInControl]:
                priors[abs_act] = domainPolicies[domainInControl].getPriorVar(
                    belief, abs_act)

            # 4. form BCM decision
            # ----- get BCM abstract summary action
            abstractAct = self._bayes_committee_calculator(
                domainQs, priors, domainInControl,
                domainPolicies[domainInControl].learner._scale)
            logger.info('BCM: abstract action: %s' % abstractAct)
            # 5. Convert abstract action back to real action:
            summaryAct = domainPolicies[domainInControl].unabstract_action(
                abstractAct)
        logger.info('BCM: summary action: %s' % summaryAct)

        # 6. Finally convert to master action:
        systemAct = domainPolicies[domainInControl].actions.Convert(
            belief, summaryAct,
            domainPolicies[domainInControl].lastSystemAction)

        # 7. Bookkeeping:
        domainPolicies[domainInControl].lastSystemAction = systemAct
        domainPolicies[
            domainInControl].summaryAct = summaryAct  # TODO -check- not sure this is correct
        domainPolicies[domainInControl].prevbelief = belief
        domainPolicies[domainInControl].actToBeRecorded = abstractAct

        # 8. Multiagent learning
        if self.learning_method == "multiagent":
            abstract_GPAction = domainPolicies[domainInControl].get_Action(
                summaryAct)
            self.domainInControl = domainInControl
            self.multiagent_abstract_state = abstracted_state
            self.multiagent_abstract_action = abstract_GPAction

        _systemAct = DiaAct.DiaAct(systemAct)

        return _systemAct
Ejemplo n.º 2
0
    def transform(self, sysAct):
        '''
        Transforms the sysAct from a semantic utterance form to a text form using the rules in the generator.
        This function will run the sysAct through all variable rules and will choose the best one according to the
        number of matched act types, matched items and missing items.

        :param sysAct: input system action (semantic form).
        :type sysAct: str
        :returns: (str) natural language 
        '''
        input_utt = DiaAct.DiaAct(sysAct)

        # FIXME hack to transform system acts with slot op "!=" to "=" and add slot-value pair other=true which is needed by NLG rule base
        # assumption: "!=" only appears if there are no further alternatives, ie, inform(name=none, name!=place!, ...)
        negFound = False
        for item in input_utt.items:
            if item.op == "!=":
                item.op = u"="
                negFound = True
        if negFound:
            otherTrue = dact.DactItem(u'other', u'=', u'true')
            input_utt.items.append(otherTrue)

        # Iterate over BasicTemplateRule rules.
        best_rule = None
        best = None
        best_matches = 0
        best_type_match = 0
        best_missing = 1000
        best_non_term_map = None
        for rule in self.rules:
            logger.debug('Checking Rule %s' % str(rule))
            out, matches, missing, type_match, non_term_map = rule.generate(
                input_utt)
            if type_match > 0:
                logger.debug(
                    'Checking Rule %s: type_match=%d, missing=%d, matches=%d, output=%s'
                    % (str(rule), type_match, missing, matches, ' '.join(out)))

            # Pick up the best rule.
            choose_this = False
            if type_match > 0:
                if missing < best_missing:
                    choose_this = True
                elif missing == best_missing:
                    if type_match > best_type_match:
                        choose_this = True
                    elif type_match == best_type_match and matches > best_matches:
                        choose_this = True

            if choose_this:
                best_rule = rule
                best = out
                best_missing = missing
                best_type_match = type_match
                best_matches = matches
                best_non_term_map = non_term_map

                if best_type_match == 1 and best_missing == 0 and best_matches == len(
                        input_utt.items):
                    break

        if best_rule is not None:
            if best_missing > 0:
                logger.warning(
                    'While transforming %s, there were missing items.' %
                    sysAct)
        else:
            logger.debug('No rule used.')

        best = self.compute_ftn(best, best_non_term_map)
        return ' '.join(best)
Ejemplo n.º 3
0
    def _getTurnReward(self, turnInfo):
        '''
        Computes the turn reward regarding turnInfo. The default turn reward is -1 unless otherwise computed. 
        
        :param turnInfo: parameters necessary for computing the turn reward, eg., system act or model of the simulated user.
        :type turnInfo: dict
        :return: int -- the turn reward.
        '''

        # Immediate reward for each turn.
        reward = -self.penalise_all_turns

        if turnInfo is not None and isinstance(turnInfo, dict):
            if 'usermodel' in turnInfo and 'sys_act' in turnInfo:
                um = turnInfo['usermodel']
                self.user_goal = um.goal.constraints

                # unpack input user model um.
                #prev_consts = um.prev_goal.constraints
                prev_consts = copy.deepcopy(um.goal.constraints)
                for item in prev_consts:
                    if item.slot == 'name' and item.op == '=':
                        item.val = 'dontcare'
                requests = um.goal.requests
                sys_act = DiaAct.DiaAct(turnInfo['sys_act'])
                user_act = um.lastUserAct

                # Check if the most recent venue satisfies constraints.
                name = sys_act.get_value('name', negate=False)
                lvr = self.last_venue_recomended if hasattr(
                    self, 'last_venue_recomended') else 'not existing'
                if name not in ['none', None]:
                    # Venue is recommended.
                    #possible_entities = Ontology.global_ontology.entity_by_features(self.domainString, constraints=prev_consts)
                    #is_valid_venue = name in [e['name'] for e in possible_entities]
                    self.last_venue_recomended = name
                    is_valid_venue = self._isValidVenue(name, prev_consts)
                    if is_valid_venue:
                        # Success except if the next user action is reqalts.
                        if user_act.act != 'reqalts':
                            logger.debug('Correct venue is recommended.')
                            self.venue_recommended = True  # Correct venue is recommended.
                        else:
                            logger.debug(
                                'Correct venue is recommended but the user has changed his mind.'
                            )
                    else:
                        # Previous venue did not match.
                        logger.debug('Venue is not correct.')
                        self.venue_recommended = False
                        logger.debug(
                            'Goal constraints: {}'.format(prev_consts))
                        reward -= self.wrong_venue_penalty

                # If system inform(name=none) but it was not right decision based on wrong values.
                if name == 'none' and sys_act.has_conflicting_value(
                        prev_consts):
                    reward -= self.wrong_venue_penalty

                # Check if the system used slot values previously not mentioned for 'select' and 'confirm'.
                not_mentioned = False
                if sys_act.act in ['select', 'confirm']:
                    for slot in Ontology.global_ontology.get_system_requestable_slots(
                            self.domainString):
                        values = set(sys_act.get_values(slot))
                        if len(values - self.mentioned_values[slot]) > 0:
                            # System used values which are not previously mentioned.
                            not_mentioned = True
                            break

                if not_mentioned:
                    reward -= self.not_mentioned_value_penalty

                # If the correct venue has been recommended and all requested slots are filled,
                # check if this dialogue is successful.
                if self.venue_recommended and None not in requests.values():
                    reward += self.reward_venue_recommended

                # Update mentioned values.
                self._update_mentioned_value(sys_act)
                self._update_mentioned_value(user_act)
            if 'sys_act' in turnInfo and self.using_tasks:
                self.DM_history.append(turnInfo['sys_act'])

        return reward
Ejemplo n.º 4
0
 def read_from_stream(self, scanner):
     sin = ''
     while scanner.cur[1] != ';' and scanner.cur[0] != tokenize.ENDMARKER and scanner.cur[1] != ':':
         sin += scanner.cur[1]
         scanner.next()
     return DiaAct.DiaAct(sin)