Beispiel #1
0
def do_pos(text):
	tagger = POSTagger()
	tag_map={}
	order=[]
	for token, tag in tagger.tag(text):
		if token == '。':
			continue
		tag_map[token] = tag
		order.append(token)
	return tag_map, order
Beispiel #2
0
class Dialog():
	def __init__(self):
		self.context_dialog = {}
		self.tagger = POSTagger()

	def add_dialog_template(self, dialog_template=data_path+'sentence_template.txt'):
		fp = open(dialog_template, 'r')
		for line in fp.read().split('==========\n'):
			if line.strip() == '':
				continue
			context = line.split('++++++++++\n')[0].strip()
			locations = context.split('|')[0].split('+')
			actions = context.split('|')[1].split('+')
			dialogs = line.split('++++++++++\n')[1].strip()
			for dialog in dialogs.split('----------\n'):
				dialog = dialog.strip()
				difficulty = self.calculate_dialog_difficulty(dialog)
				action_category, location_category = \
						self.find_associate_category(dialog, locations, actions)
				add_dialog(dialog, difficulty, action_category, location_category)
				

	def calculate_dialog_difficulty(self, dialog):
		sentences = dialog.split('\n')
		num_slots = 0
		num_segments = 0
		for sentence in sentences:
			chinese = sentence.split('\t')[0].strip()
			english = sentence.split('\t')[1].strip()
			num_slots += chinese.count('[')
			segments =  self.tagger.tag(\
					chinese.replace('[', '').replace(']', '').decode('utf-8'))
			num_segments += len(segments)
		return (num_slots*2+num_segments)*len(sentences)

	def find_associate_category(self, dialog, locations, actions):
		sentences = dialog.split('\n')
		action_category = []
		location_category = []
		location_ids = get_location_ids(locations)
		action_ids = get_action_ids(actions)
		#print "-----"
		#print location_ids
		for sentence in sentences:
			chinese = sentence.split('\t')[0].strip()
			english = sentence.split('\t')[1].strip()
			matches = re.findall(r'\[\w+\]', \
					chinese.decode('utf-8'), flags=re.UNICODE)
			matches = list(set(matches))
			for match in matches:
				action_category.extend(get_action_concept_category(\
					match.replace('[', '').replace(']', '').encode('utf-8')))
				location_category.extend(get_location_concept_category(\
					match.replace('[', '').replace(']', '').encode('utf-8')))
		#print location_category
		location_category = filter(lambda x: x[0] in location_ids, location_category)
		action_category = filter(lambda x: x[0] in action_ids, action_category)
		return (action_category, location_category)
Beispiel #3
0
	def __init__(self):
		self.context_dialog = {}
		self.tagger = POSTagger()