def parse(self, dai_str): """ Parses the dialogue act item in text format into a structured form. """ dai_str = dai_str.strip() try: first_par_idx = dai_str.index('(') except ValueError: raise DialogueActItemException( 'Parsing error in: "{dai}". Missing opening parenthesis.' .format(dai=dai_str)) self._dat = dai_str[:first_par_idx] # Remove the parentheses, parse slot name and value. dai_nv = dai_str[first_par_idx + 1:-1] if dai_nv: name_val = split_by(dai_nv, splitter='=', quotes='"') if len(name_val) == 1: # There is only a slot name. self._name = name_val[0] elif len(name_val) == 2: # There is a slot name and a value. self._name = name_val[0] self._value = name_val[1] if self._value and self._value[0] in ["'", '"']: self._value = self._value[1:-1] else: raise DialogueActItemException( "Parsing error in: {dai_str}: {atval}".format( dai_str=dai_str, atval=name_val)) self._str = None return self
def parse(self, dai_str): """ Parses the dialogue act item in text format into a structured form. """ dai_str = dai_str.strip() try: first_par_idx = dai_str.index('(') except ValueError: raise DialogueActItemException( 'Parsing error in: "{dai}". Missing opening parenthesis.'. format(dai=dai_str)) self._dat = dai_str[:first_par_idx] # Remove the parentheses, parse slot name and value. dai_nv = dai_str[first_par_idx + 1:-1] if dai_nv: name_val = split_by(dai_nv, splitter='=', quotes='"') if len(name_val) == 1: # There is only a slot name. self._name = name_val[0] elif len(name_val) == 2: # There is a slot name and a value. self._name = name_val[0] self._value = name_val[1] if self._value and self._value[0] in ["'", '"']: self._value = self._value[1:-1] else: raise DialogueActItemException( "Parsing error in: {dai_str}: {atval}".format( dai_str=dai_str, atval=name_val)) self._str = None return self
def parse(self, da_str): """ Parses the dialogue act from text. If any DAIs have been already defined for this DA, they will be overwritten. """ if self._dais: del self._dais[:] dais = split_by(da_str, splitter='&', opening_parentheses='(', closing_parentheses=')', quotes='"') self._dais.extend(DialogueActItem(dai=dai) for dai in dais) self._dais_sorted = False
def load_semantics(file_name): f = codecs.open(file_name, encoding='UTF-8') semantics = defaultdict(list) for l in f: l = l.strip() if not l: continue l = l.split("=>") key = l[0].strip() sem = l[1].strip() sem = split_by(sem, '&', '(', ')', '"') semantics[key] = sem f.close() return semantics
def load_semantics(file_name): f = codecs.open(file_name,encoding = 'UTF-8') semantics = defaultdict(list) for l in f: l = l.strip() if not l: continue l = l.split("=>") key = l[0].strip() sem = l[1].strip() sem = split_by(sem, '&', '(', ')', '"') semantics[key] = sem f.close() return semantics
def parse(self, da_str): # Get the dialogue act type. first_par_idx = da_str.index("(") self.dat = da_str[:first_par_idx] if len(split_by_comma(da_str)) != 1: raise ValueError('Too many (or none -- too few) DAs in CUED DA ' 'representation.') slots_str = da_str[first_par_idx:].lower()[1:-1] if not slots_str: # no slots to process self._dais = list() else: # split slots_str slotstr_list = split_by(slots_str, splitter=',', quotes='"') slots = list() for slot_str in slotstr_list: try: slots.append(CUEDSlot(slot_str)) except ValueError: # Skip slots we cannot parse. pass if self.dat == 'inform': for slot in slots: if slot.negated: self._dais.append( DialogueActItem('deny', slot.name, slot.value)) else: self._dais.append( DialogueActItem('inform', slot.name, slot.value)) elif self.dat == 'request': for slot in slots: if slot.value: if slot.negated: self._dais.append( DialogueActItem('deny', slot.name, slot.value)) else: self._dais.append( DialogueActItem('inform', slot.name, slot.value)) else: self._dais.append( DialogueActItem('request', slot.name, slot.value)) elif self.dat == 'confirm': for slot in slots: if slot.name == 'name': self._dais.append( DialogueActItem('inform', slot.name, slot.value)) else: self._dais.append( DialogueActItem('confirm', slot.name, slot.value)) elif self.dat == 'select': # XXX We cannot represent DAIS with multiple slots as of now. # Therefore, the select DAT is split into two DAIs here. self._dais.append( DialogueActItem('select', slots[0].name, slots[0].value)) self._dais.append( DialogueActItem('select', slots[1].name, slots[1].value)) elif self.dat in ('silence', 'thankyou', 'ack', 'bye', 'hangup', 'repeat', 'help', 'restart', 'null'): self._dais.append(DialogueActItem(self.dat)) elif self.dat in ('hello', 'affirm', 'negate', 'reqalts', 'reqmore'): self._dais.append(DialogueActItem(self.dat)) for slot in self._dais: if slot.negated: self._dais.append( DialogueActItem('deny', slot.name, slot.value)) else: self._dais.append( DialogueActItem('inform', slot.name, slot.value)) elif self.dat == 'deny': self._dais.append( DialogueActItem('deny', slots[0].name, slots[0].value)) for slot in slots[1:]: if slot.negated: self._dais.append( DialogueActItem('deny', slot.name, slot.value)) else: self._dais.append( DialogueActItem('inform', slot.name, slot.value)) else: raise CuedDialogueActError( 'Unknown CUED DA type "{dat}" when parsing "{da_str}".'. format(dat=self.dat, da_str=da_str)) self._dais_sorted = False
slts = da.get_slots_and_values() for slt in slts: slots[slt].update(slts[slt]) fo = open( os.path.join(outdir, os.path.basename(fn).replace('.sem', '.grp')), 'w+') for key in sorted(da_clustered): fo.write(key) fo.write(' <=> ') fo.write(str(sorted(list(da_clustered[key]))) + '\n') fo.close() dai_unique = set() for da in sorted(da_clustered): dais = split_by(da, '&', '(', ')', '"') for dai in dais: dai_unique.add(dai) fo = open( os.path.join(outdir, os.path.basename(fn).replace('.sem', '.grp.dais')), 'w+') for dai in sorted(dai_unique): fo.write(dai) fo.write('\n') fo.close() da_reclustered = collections.defaultdict(set) for key in da_clustered: sem_reduced = re.sub(r'([a-z_0-9]+)(="[a-zA-Z0-9_\'! ]+")', r'\1',
slts = da.get_slots_and_values() for slt in slts: slots[slt].update(slts[slt]) fo = open(os.path.join( outdir, os.path.basename(fn).replace('.sem', '.grp')), 'w+') for key in sorted(da_clustered): fo.write(key) fo.write(' <=> ') fo.write(str(sorted(list(da_clustered[key]))) + '\n') fo.close() dai_unique = set() for da in sorted(da_clustered): dais = split_by(da, '&', '(', ')', '"') for dai in dais: dai_unique.add(dai) fo = open(os.path.join( outdir, os.path.basename(fn).replace('.sem', '.grp.dais')), 'w+') for dai in sorted(dai_unique): fo.write(dai) fo.write('\n') fo.close() da_reclustered = collections.defaultdict(set) for key in da_clustered: sem_reduced = re.sub( r'([a-z_0-9]+)(="[a-zA-Z0-9_\'! ]+")', r'\1', key) da_reclustered[sem_reduced].update(da_clustered[key])
def parse(self, da_str): # Get the dialogue act type. first_par_idx = da_str.index("(") self.dat = da_str[:first_par_idx] if len(split_by_comma(da_str)) != 1: raise ValueError('Too many (or none -- too few) DAs in CUED DA ' 'representation.') slots_str = da_str[first_par_idx:].lower()[1:-1] if not slots_str: # no slots to process self._dais = list() else: # split slots_str slotstr_list = split_by(slots_str, splitter=',', quotes='"') slots = list() for slot_str in slotstr_list: try: slots.append(CUEDSlot(slot_str)) except ValueError: # Skip slots we cannot parse. pass if self.dat == 'inform': for slot in slots: if slot.negated: self._dais.append(DialogueActItem( 'deny', slot.name, slot.value)) else: self._dais.append(DialogueActItem( 'inform', slot.name, slot.value)) elif self.dat == 'request': for slot in slots: if slot.value: if slot.negated: self._dais.append(DialogueActItem( 'deny', slot.name, slot.value)) else: self._dais.append(DialogueActItem( 'inform', slot.name, slot.value)) else: self._dais.append(DialogueActItem( 'request', slot.name, slot.value)) elif self.dat == 'confirm': for slot in slots: if slot.name == 'name': self._dais.append(DialogueActItem( 'inform', slot.name, slot.value)) else: self._dais.append(DialogueActItem( 'confirm', slot.name, slot.value)) elif self.dat == 'select': # XXX We cannot represent DAIS with multiple slots as of now. # Therefore, the select DAT is split into two DAIs here. self._dais.append(DialogueActItem( 'select', slots[0].name, slots[0].value)) self._dais.append(DialogueActItem( 'select', slots[1].name, slots[1].value)) elif self.dat in ('silence', 'thankyou', 'ack', 'bye', 'hangup', 'repeat', 'help', 'restart', 'null'): self._dais.append(DialogueActItem(self.dat)) elif self.dat in ('hello', 'affirm', 'negate', 'reqalts', 'reqmore'): self._dais.append(DialogueActItem(self.dat)) for slot in self._dais: if slot.negated: self._dais.append(DialogueActItem( 'deny', slot.name, slot.value)) else: self._dais.append(DialogueActItem( 'inform', slot.name, slot.value)) elif self.dat == 'deny': self._dais.append(DialogueActItem( 'deny', slots[0].name, slots[0].value)) for slot in slots[1:]: if slot.negated: self._dais.append(DialogueActItem( 'deny', slot.name, slot.value)) else: self._dais.append(DialogueActItem( 'inform', slot.name, slot.value)) else: raise CuedDialogueActError( 'Unknown CUED DA type "{dat}" when parsing "{da_str}".' .format(dat=self.dat, da_str=da_str)) self._dais_sorted = False