def __init__(self, tagsets, model_dir, ratio_thres = 0.8, max_num = 2, \ slot_prob_thres = 0.6, value_prob_thres = 0.8, \ mode = 'hr', bs_mode = 'enhance', bs_alpha = 0.0, \ unified_thres = 0.5): self.tagsets = tagsets self.frame = {} self.memory = {} self.beliefstate = BeliefState(bs_mode, bs_alpha) self.slot_prob_threshold = slot_prob_thres self.value_prob_threshold = value_prob_thres self.ratio_thres = ratio_thres self.unified_thres = unified_thres self.slot_prob_factor = math.log(self.unified_thres, self.slot_prob_threshold) self.value_prob_factor = math.log(self.unified_thres, self.value_prob_threshold) self.ratio_thres_factor = math.log(self.unified_thres, self.ratio_thres) self.mode = mode self.svc = slot_value_classifier() self.svc.LoadModel(model_dir) self.tuple_extractor = Tuple_Extractor() self.rules = DSTC4_rules(tagsets) self.appLogger = logging.getLogger(self.MY_ID) if not self.svc.is_set: self.appLogger.error('Error: Fail to load slot_value_classifier model!') raise Exception('Error: Fail to load slot_value_classifier model!') self.value_extractor = value_extractor(tagsets, ratio_thres, max_num)
def find_stop_words(dataset): stop_words_count = {} svc = slot_value_classifier() svc.feature = feature(None) for call in dataset: for (log_utter, label_utter) in call: sys.stderr.write('%d:%d\n'%(call.log['session_id'], log_utter['utter_index'])) flag = False for act in label_utter['speech_act']: if "ACK" in act['attributes']: flag = True if flag: sent = log_utter['transcript'].lower() tokens = svc.feature.tokenizer.tokenize(sent) for t in tokens: stop_words_count[t] = stop_words_count.get(t, 0) + 1 return stop_words_count