def get_schema_intents( split: Literal['train', 'test', 'dev']) -> Dict[str, Set[str]]: """Returns a mapping of intent type (transactional/search) to a set of intents for the required split. Parameters ---------- split The split from which the intents are retrieved. Returns ------- intents A mapping from intent type to a set of intent names of that type found in `split`. """ intents = {'transactional': set(), 'search': set()} for service in schema_iterator(split): for intent in service['intents']: if intent['is_transactional']: intents['transactional'].add(intent['name']) else: intents['search'].add(intent['name']) return intents
def count_intents(): intents = set() for split in SPLIT_NAMES: for service in schema_iterator(split): for intent_dict in service['intents']: intents.add(intent_dict['name']) return len(intents)
def get_services(split: Literal['train', 'test', 'dev']) -> Set[str]: """Returns a set of services invoked in a dataset split. Parameters ---------- split The split for which services are to be returned. Returns ------- Set of services invoked in `split`. """ return {service['service_name'] for service in schema_iterator(split)}
def get_categorical_slots(binary_slots_by_service: Dict[str, List[str]]) -> \ Tuple[List[str], Dict[str, Dict[str, List[str]]]]: """Find all categorical slots in the corpus. Parameters ---------- binary_slots_by_service A mapping from service names to the names of slots that take only ``True``/``False`` and ``0`` and ``1`` values. Returns ------- all_cat_slots A set of all categorical slots in the corpus. cat_slots_by service A mapping with the structure:: { 'service_name': {'slot_name': List[str], values of slot indicated in key} } where the slot names are categorical slots (i.e., take a finite number of values). """ cat_slots_by_service = {} all_cat_slots = set() for split in _SPLIT_NAMES: for service in schema_iterator(split): service_name = service['service_name'] binary_slots = [] if service_name in binary_slots_by_service: binary_slots = binary_slots_by_service[service_name] service_cat_slots = _get_service_categorical_slots( service, binary_slots) all_cat_slots.update(list(service_cat_slots.keys())) if service_name in cat_slots_by_service: for slot_name, values in cat_slots_by_service[ service_name].items(): assert slot_name in service_cat_slots assert values == service_cat_slots[slot_name] else: cat_slots_by_service[service_name] = service_cat_slots return list(all_cat_slots), cat_slots_by_service
def get_binary_slots() -> Tuple[List[str], Dict[str, List[str]]]: """Get names of slots with binary values. For these slots, there is no delexicalisation annotation. Returns ------- service_binary_slots Mapping from services to the binary slots contained in their schemas. binary_slots Set of binary slots """ service_binary_slots = {} for split in _SPLIT_NAMES: for service in schema_iterator(split): binary_slots = _find_service_binary_slots(service) service_binary_slots.update(binary_slots) # cast and sort output for writing to .json binary_slots = list(set.union(*list(service_binary_slots.values()))) binary_slots.sort(key=alphabetical_sort_key) return binary_slots, cast_vals_to_sorted_list(service_binary_slots)
def _map_intents_to_services() -> Dict[str, Dict[str, List[str]]]: """Create a map of intents to services. The same intent (e.g., `FindRestaurant`) can be part of multiple service APIs (e.g., `Restaurant_1` and `Restaurant_2`. Returns ------- intents_to_services A mapping of the form:: { 'split':{ 'intent_1': ['Service_1'] `intent_2': ['Service_1', 'Service_2'] } """ intents_to_services = defaultdict(lambda: defaultdict(list)) for split in _SPLIT_NAMES: for service in schema_iterator(split): for intent in service["intents"]: intent_name = intent['name'] intents_to_services[split][intent_name].append( service['service_name']) return intents_to_services