Example #1
0
def split_text_by_ut_tokens(text, ut_set):
	segs = [itm for itm in text.split(' ') if itm != '']
	ut_segs = [itm.text for itm in ut_set]
	ret = []
	for itm in segs:
		if itm in ut_segs:
			ret.append(fact.create_unterminal(itm))
		else :
			ret.append(fact.create_terminal(itm))
	return ret
	return [fact.create_terminal(itm) for itm in segs if itm not in ut_segs]
	i = 0
	tokens = []
	while i < len(text):
		left_most = len(text)
		ut_token = None
		for itm in ut_set:
			idx = text[i : ].find(itm.text)
			if idx == -1:
				continue
			idx += i
			if idx >= left_most:
				continue
			left_most = idx
			ut_token = itm
		if i != left_most:
			tokens.append(fact.create_terminal(text[i : left_most]))
		if ut_token != None:
			tokens.append(ut_token)
			i = left_most + len(ut_token.text)
		else :
			i = len(text)
	return tokens	
Example #2
0
def split_text_by_ut_tokens(text, ut_set):
    segs = [itm for itm in text.split(' ') if itm != '']
    ut_segs = [itm.text for itm in ut_set]
    ret = []
    for itm in segs:
        if itm in ut_segs:
            ret.append(fact.create_unterminal(itm))
        else:
            ret.append(fact.create_terminal(itm))
    return ret
    return [fact.create_terminal(itm) for itm in segs if itm not in ut_segs]
    i = 0
    tokens = []
    while i < len(text):
        left_most = len(text)
        ut_token = None
        for itm in ut_set:
            idx = text[i:].find(itm.text)
            if idx == -1:
                continue
            idx += i
            if idx >= left_most:
                continue
            left_most = idx
            ut_token = itm
        if i != left_most:
            tokens.append(fact.create_terminal(text[i:left_most]))
        if ut_token != None:
            tokens.append(ut_token)
            i = left_most + len(ut_token.text)
        else:
            i = len(text)
    return tokens
Example #3
0
	def augment(self):
		if self.is_augmented:
			return
		tmp = self.start_token
		#create a different token for new start state
		self.start_token = fact.create_unterminal(self.start_token.text + "__S")	
		self.expresses.append(e_fact.create_simple(self.start_token, [[tmp]]))
		self.is_augmented = True
Example #4
0
 def augment(self):
     if self.is_augmented:
         return
     tmp = self.start_token
     #create a different token for new start state
     self.start_token = fact.create_unterminal(self.start_token.text +
                                               "__S")
     self.expresses.append(e_fact.create_simple(self.start_token, [[tmp]]))
     self.is_augmented = True
Example #5
0
	def __init__(self, start, others):
		self.is_augmented = False
		self.is_non_left_rec = False
		self.is_expanded = False
		self.normalized_mode = None
		#undeterminal tokens
		self.ut_tokens = set()
		self.expresses = list()
		start = start.strip()
#		start = start.replace(' ', '')
#		others = [itm.replace(' ', '') for itm in others]
		others = [itm.strip() for itm in others]
		for itm in others:
			left, right = itm.split('->')
			left.strip()
			self.ut_tokens.add(fact.create_unterminal(left))
		left, right = start.split('->')
		left.strip()
		self.start_token = fact.create_unterminal(left)
		self.ut_tokens.add(self.start_token)

		others.append(start)

		#merge express whch uses the same left part
		left_right_dict = {}
		for itm in others:
			left, right = itm.split('->')
			right_text_list = right.split('|')
			right_text_list = \
				[itm.strip() for itm in right_text_list]
			if left not in left_right_dict:
				left_right_dict[left] = []
			left_right_dict[left].extend(right_text_list)

		for left, right_text_list in left_right_dict.iteritems():
			tokens_list = []
			for right_text in right_text_list:
				print right_text
				tokens = split_text_by_ut_tokens(right_text, self.ut_tokens)
				tokens_list.append(tokens)
				print tokens
			self.expresses.append( \
					e_fact.create_simple( \
						fact.create_unterminal(left), tokens_list))
Example #6
0
    def __init__(self, start, others):
        self.is_augmented = False
        self.is_non_left_rec = False
        self.is_expanded = False
        self.normalized_mode = None
        #undeterminal tokens
        self.ut_tokens = set()
        self.expresses = list()
        start = start.strip()
        #		start = start.replace(' ', '')
        #		others = [itm.replace(' ', '') for itm in others]
        others = [itm.strip() for itm in others]
        for itm in others:
            left, right = itm.split('->')
            left.strip()
            self.ut_tokens.add(fact.create_unterminal(left))
        left, right = start.split('->')
        left.strip()
        self.start_token = fact.create_unterminal(left)
        self.ut_tokens.add(self.start_token)

        others.append(start)

        #merge express whch uses the same left part
        left_right_dict = {}
        for itm in others:
            left, right = itm.split('->')
            right_text_list = right.split('|')
            right_text_list = \
             [itm.strip() for itm in right_text_list]
            if left not in left_right_dict:
                left_right_dict[left] = []
            left_right_dict[left].extend(right_text_list)

        for left, right_text_list in left_right_dict.iteritems():
            tokens_list = []
            for right_text in right_text_list:
                print right_text
                tokens = split_text_by_ut_tokens(right_text, self.ut_tokens)
                tokens_list.append(tokens)
                print tokens
            self.expresses.append( \
              e_fact.create_simple( \
               fact.create_unterminal(left), tokens_list))
Example #7
0
	def eliminate_left_recursive(self):
		if not self.is_left_recursive():
			return (self, None)
		ill_tokens_list = [itms for itms in self.right_tokens_list \
				if itms[0] == self.left_token]
		healthy_tokens_list = [itms for itms in self.right_tokens_list \
				if itms[0] != self.left_token]			
		if len(ill_tokens_list) > 0:
			assert len(healthy_tokens_list) > 0, 'eliminate left recursive failed'
		owned_tokens_list = []
		new_left_token = fact.create_unterminal(self.left_token.text + "'")		
		for tokens in healthy_tokens_list:
			owned_tokens_list.append(tokens + [new_left_token])
		self_cpy = express_factory.create_simple(self.left_token, owned_tokens_list)

		new_tokens_list = []
		for tokens in ill_tokens_list:
			new_tokens_list.append(tokens[1 : ] + [new_left_token])
		new_tokens_list.append([fact.create_epsilon()])			
		new_exp = express_factory.create_simple(new_left_token, new_tokens_list)
		return (self_cpy, new_exp)
Example #8
0
    def eliminate_left_recursive(self):
        if not self.is_left_recursive():
            return (self, None)
        ill_tokens_list = [itms for itms in self.right_tokens_list \
          if itms[0] == self.left_token]
        healthy_tokens_list = [itms for itms in self.right_tokens_list \
          if itms[0] != self.left_token]
        if len(ill_tokens_list) > 0:
            assert len(
                healthy_tokens_list) > 0, 'eliminate left recursive failed'
        owned_tokens_list = []
        new_left_token = fact.create_unterminal(self.left_token.text + "'")
        for tokens in healthy_tokens_list:
            owned_tokens_list.append(tokens + [new_left_token])
        self_cpy = express_factory.create_simple(self.left_token,
                                                 owned_tokens_list)

        new_tokens_list = []
        for tokens in ill_tokens_list:
            new_tokens_list.append(tokens[1:] + [new_left_token])
        new_tokens_list.append([fact.create_epsilon()])
        new_exp = express_factory.create_simple(new_left_token,
                                                new_tokens_list)
        return (self_cpy, new_exp)