コード例 #1
0
ファイル: msiip_nsvc_tracker.py プロジェクト: liangkai/DSTC4
	def __init__(self, tagsets, model_dir, ratio_thres = 0.8, max_num = 2, \
				slot_prob_thres = 0.6, value_prob_thres = 0.8, \
				mode = 'hr', bs_mode = 'enhance', bs_alpha = 0.0, \
				unified_thres = 0.5):
		self.tagsets = tagsets
		self.frame = {}
		self.memory = {}
		self.beliefstate = BeliefState(bs_mode, bs_alpha)

		self.slot_prob_threshold = slot_prob_thres
		self.value_prob_threshold = value_prob_thres
		self.ratio_thres = ratio_thres

		self.unified_thres = unified_thres
		self.slot_prob_factor = math.log(self.unified_thres, self.slot_prob_threshold)
		self.value_prob_factor = math.log(self.unified_thres, self.value_prob_threshold)
		self.ratio_thres_factor = math.log(self.unified_thres, self.ratio_thres)

		self.mode = mode

		self.svc = slot_value_classifier()
		self.svc.LoadModel(model_dir)

		self.tuple_extractor = Tuple_Extractor()
		self.rules = DSTC4_rules(tagsets)
		self.appLogger = logging.getLogger(self.MY_ID)

		if not self.svc.is_set:
			self.appLogger.error('Error: Fail to load slot_value_classifier model!')
			raise Exception('Error: Fail to load slot_value_classifier model!')
		self.value_extractor = value_extractor(tagsets, ratio_thres, max_num)
コード例 #2
0
def find_stop_words(dataset):
	stop_words_count = {}
	svc = slot_value_classifier()
	svc.feature = feature(None)
	for call in dataset:
		for (log_utter, label_utter) in call:
			sys.stderr.write('%d:%d\n'%(call.log['session_id'], log_utter['utter_index']))
			flag = False
			for act in label_utter['speech_act']:
				if "ACK" in act['attributes']:
					flag = True
			if flag:
				sent = log_utter['transcript'].lower()
				tokens = svc.feature.tokenizer.tokenize(sent)
				for t in tokens:
					stop_words_count[t] = stop_words_count.get(t, 0) + 1
	return stop_words_count