def parse_action(self, action_list): actions = self._parse_action_aggregated(action_list) # Substitute booking domain for action in actions: if action.domain == "booking": if self._domain_substitute: action.domain = self._domain_substitute if action.name not in ["advise", "reply"]: if action.tags: action.tags.append("booking") else: action.tags = ["booking"] else: # Let `reply_booking_DOMAIN` be just `reply_DOMAIN` action.tags = None action.store_in_domain_info() else: log_problem({"type": "unclear_booking_domain"}) return [] # Sort actions actions.sort(key=lambda a: a.sort_key) # Post-process actions2 = [] for action in actions: # Remove repetitions of actions if actions2 == [] or action.to_string() != actions2[-1].to_string( ): # Insert status slots when appropriate if self.add_status_slots: if (action.name == "inform" and action.tags and "booking" in action.tags) or \ (action.name in ["offerbook"]): slot = DialogSlot( {f"{action.domain}_status": "unique"}) slot.store_in_domain_info() actions2.append(slot) actions2.append(action) elif action.name in ["nobook", "nooffer"]: slot = DialogSlot({f"{action.domain}_status": "NA"}) slot.store_in_domain_info() actions2.append(slot) actions2.append(action) elif action.name in ["book", "offerbooked"]: actions2.append(action) slot = DialogSlot( {f"{action.domain}_status": "booked"}) slot.store_in_domain_info() actions2.append(slot) else: actions2.append(action) else: actions2.append(action) actions = actions2 return actions
def _parse_action_aggregated(self, action_list): action_infos = [] if type(action_list) is not dict: if type(action_list) is str and action_list == "No Annotation": log_problem({"type": "no_annotation"}) else: log_problem({ "type": "bad_action_list_type", "action_list": action_list }) return [] for base_action, slots in action_list.items(): # Parse key (basic action and domain) if "-" in base_action: domain, action_name = base_action.split("-") else: # Single-letter actions like "A" or "N" are actually never requested log_problem({ "type": "bad_action_format", "base_action": base_action }) return [] action_name = action_name.lower() # Parse value (slots) for slot in slots: if slot == ["none", "none"]: spec = "" else: slot_name = slot[0].lower() spec = slot_name.strip() action_info = { "activity": action_name, "spec": spec, "domain": domain.lower() } action_infos += [action_info] # Combine all `action_infos` entries to strings of aggregated actions # for each domain and activity action_groups = {} for info in action_infos: head = info["domain"] + "_" + info["activity"] if head not in action_groups: action_groups[head] = set() action_groups[head].update([info["spec"]]) actions = [] for head, specs in action_groups.items(): domain, activity = head.split("_") string = " - " + activity string += "_" + domain action = DialogTurn.from_string(string) action.store_in_domain_info() actions.append(action) return actions
def find_differences( self, baseline, dataset, results, lookups, branch="" ): """Searches `baseline` and `dataset` synchronously with a depth-first-search and appends the location of any difference to `results`.""" assert type(baseline) is type(dataset) if type(baseline) is list: if len(baseline) <= len(dataset): for i in range(len(baseline)): self.find_differences( baseline[i], dataset[i], results, lookups, branch ) if len(baseline) < len(dataset): # The Wizard looked up something (a booking was made) lookups += self.parse_lookup(branch, baseline, dataset) if len(baseline) > len(dataset): log_problem({ "type": "long_baseline", "branch": branch, "baseline": baseline, "dataset": dataset }) elif type(baseline) is dict: for key in baseline: self.find_differences( baseline[key], dataset[key], results, lookups, branch + "_" + str(key) ) elif type(baseline) is str: if baseline != dataset: results.append(branch + "_" + dataset) else: raise TypeError("Dataset contains objects that is not dict/list/str.")
def parse_story(self, name, verbose=0, infuse_chitchat_callback=None, chitchat_variability=1): """ Parse a MultiWOZ story. :param name: Name of the story (e.g. MUL0129.json) :param verbose: Level of output (0 = no print, 1 = print parsed story, 2 = also print utterances) :param infuse_chitchat_callback: Function that takes the current story name, present and maximum number of turns and returns the number of chitchats that should be infused at this point. :param chitchat_variability: How many different chitchat intents/actions should be created :return: """ initial_num_problems = len(multiwoz.domain_info.problems) dialog = self.data[name] log = dialog["log"] num_turns = len(log) story = "" parse_intent = IntentParser(self.slot_parser, add_status_slots=self.add_status_slots) name = name[:-5] story += f"## story_{name}" + "\n" if verbose > 0: print(colored(f"## story_{name}", "green")) # If we infuse chitchat, then add the chitchat action to the domain file # The intent is added in the parse_stories.py script if infuse_chitchat_callback is not None: if chitchat_variability > 1: for v in range(chitchat_variability): DialogAction(f"chitchat_{v + 1}", "general").store_in_domain_info() else: DialogAction("chitchat", "general").store_in_domain_info() count_use = 0 # How often the user spoke count_wiz = 0 # How often the wizard replied (consecutive actions count as one) self._domain_substitute = None for step in log: if len(step["metadata"]) == 0: # User-texts don't have metadata # Possibly infuse a chitchat detour if infuse_chitchat_callback is not None: # Determine how many chitchats should be added req_num_chitchat = infuse_chitchat_callback( name, count_use + count_wiz + 1, num_turns) # Create the intent/action pairs turns = [] for _ in range(req_num_chitchat): # Determine the chitchat type if chitchat_variability > 1: cc_name = f"chitchat_{random.randint(1, chitchat_variability)}" else: cc_name = "chitchat" turns.append(DialogIntent(cc_name)) turns.append(DialogAction(cc_name, "general")) # Add the turns to the story for turn in turns: story += turn.to_string() + "\n" # User's text if verbose > 1: print("U: " + step["text"]) count_use += 1 else: turns_from_wizard = [] count_wiz += 1 # Infer user intent from new information # This includes the user intent + possible additional slots that come from the wizard # looking up information during booking turns_from_user, domain_substitute = parse_intent( step["metadata"]) if domain_substitute: self._domain_substitute = domain_substitute # If this is the end of the dialog, then we assume the user's `inform{}` (no slots) is actually a `bye` if count_wiz * 2 == len(log) and len( turns_from_user) == 1 and not turns_from_user[0].slots: turns_from_user[0].name = "bye" if verbose > 0: for turn in turns_from_user: print(colored(turn.to_string(), "blue")) # Wizard's text if verbose > 1: print("W: " + step["text"]) # Wizard's information if str(count_wiz) in self.acts[name]: action = self.acts[name][str(count_wiz)] turns_from_wizard = self.parse_action(action) if verbose > 0: for turn in turns_from_wizard: print(colored(turn.to_string(), "red")) else: log_problem({ "type": "no_action", "count_wiz": count_wiz, "actions": self.acts[name] }) # Merge adjacent slots all_turns = [] last_slot = None for turn in turns_from_user + turns_from_wizard: if turn.is_slot or turn.is_intent: if last_slot: if last_slot.slots: last_slot.slots.update(turn.slots) else: last_slot.slots = turn.slots else: last_slot = turn else: if last_slot: all_turns.append(last_slot) last_slot = None all_turns.append(turn) if last_slot: all_turns.append(last_slot) for turn in all_turns: story += turn.to_string() + "\n" story += "\n" if len(multiwoz.domain_info.problems) > initial_num_problems: story = None return story
def parse_story_e2e(self, name, verbose=0): """ Parse a MultiWOZ story for end-to-end training. :param name: Name of the story (e.g. MUL0129.json) :param verbose: Level of output (0 = no print, 1 = print parsed story, 2 = also print utterances) :return: """ initial_num_problems = len(multiwoz.domain_info.problems) dialog = self.data[name] log = dialog["log"] num_turns = len(log) story = "" parse_intent = IntentParser(self.slot_parser, add_status_slots=self.add_status_slots) name = name[:-5] story += f"## story_{name}" + "\n" if verbose > 0: print(colored(f"## story_{name}", "green")) count_use = 0 # How often the user spoke count_wiz = 0 # How often the wizard replied (consecutive actions count as one) self._domain_substitute = None intent_name = None for step in log: if len(step["metadata"]) == 0: # User-texts don't have metadata # User's text if verbose > 0: print("U: " + step["text"].strip()) intent_name = step["text"] count_use += 1 else: turns_from_wizard = [] count_wiz += 1 # Infer user intent from new information # This includes the user intent + possible additional slots that come from the wizard # looking up information during booking turns_from_user, domain_substitute = parse_intent( step["metadata"]) if domain_substitute: self._domain_substitute = domain_substitute if verbose > 1: for turn in turns_from_user: print(colored(turn.to_string(), "blue")) # Wizard's information if str(count_wiz) in self.acts[name]: action = self.acts[name][str(count_wiz)] turns_from_wizard = self.parse_action(action) if verbose > 1: for turn in turns_from_wizard: print(colored(turn.to_string(), "red")) else: log_problem({ "type": "no_action", "count_wiz": count_wiz, "story": name, "actions": self.acts[name] }) if self.add_status_slots: # Merge all slots all_slots = DialogSlot({}) for turn in turns_from_user + turns_from_wizard: if not turn.is_action and turn.slots: all_slots.slots.update(turn.slots) status_slots = DialogSlot({ k: v for k, v in all_slots.slots.items() if k.endswith("status") }) else: status_slots = None action_name = step["text"] # Substitute entities intent_name = self._substitute_entity(intent_name) action_name = self._substitute_entity(action_name) # Remove line breaks, `/`, and redundant whitespaces or tabs intent_name = re.sub(r"[/:\"'`#]+", lambda k: " ", intent_name) intent_name = intent_name.replace("\n", "").strip(" \t/") intent_name = re.sub(r"\s\s+", lambda k: " ", intent_name) action_name = re.sub(r"[/:\"'`#]+", lambda k: " ", action_name) action_name = action_name.replace("\n", "").strip(" \t/") action_name = re.sub(r"\s\s+", lambda k: " ", action_name) if verbose > 0: print(f"U: {intent_name}") print(f"W: {step['text'].strip()}") print(f"W: {action_name}") # Store action for domain (if no errors occurred) if len(multiwoz.domain_info.problems) == initial_num_problems: multiwoz.domain_info.e2e_actions.update({ action_name: sorted([ a.to_string()[5:] for a in turns_from_wizard if a.is_action ]) }) story += f"* {intent_name}\n" if self.add_status_slots: if status_slots.slots: story += status_slots.to_string() + "\n" story += f" - {action_name}\n" story += "\n" if len(multiwoz.domain_info.problems) > initial_num_problems: story = None return story