def split_text_by_ut_tokens(text, ut_set): segs = [itm for itm in text.split(' ') if itm != ''] ut_segs = [itm.text for itm in ut_set] ret = [] for itm in segs: if itm in ut_segs: ret.append(fact.create_unterminal(itm)) else : ret.append(fact.create_terminal(itm)) return ret return [fact.create_terminal(itm) for itm in segs if itm not in ut_segs] i = 0 tokens = [] while i < len(text): left_most = len(text) ut_token = None for itm in ut_set: idx = text[i : ].find(itm.text) if idx == -1: continue idx += i if idx >= left_most: continue left_most = idx ut_token = itm if i != left_most: tokens.append(fact.create_terminal(text[i : left_most])) if ut_token != None: tokens.append(ut_token) i = left_most + len(ut_token.text) else : i = len(text) return tokens
def split_text_by_ut_tokens(text, ut_set): segs = [itm for itm in text.split(' ') if itm != ''] ut_segs = [itm.text for itm in ut_set] ret = [] for itm in segs: if itm in ut_segs: ret.append(fact.create_unterminal(itm)) else: ret.append(fact.create_terminal(itm)) return ret return [fact.create_terminal(itm) for itm in segs if itm not in ut_segs] i = 0 tokens = [] while i < len(text): left_most = len(text) ut_token = None for itm in ut_set: idx = text[i:].find(itm.text) if idx == -1: continue idx += i if idx >= left_most: continue left_most = idx ut_token = itm if i != left_most: tokens.append(fact.create_terminal(text[i:left_most])) if ut_token != None: tokens.append(ut_token) i = left_most + len(ut_token.text) else: i = len(text) return tokens
def augment(self): if self.is_augmented: return tmp = self.start_token #create a different token for new start state self.start_token = fact.create_unterminal(self.start_token.text + "__S") self.expresses.append(e_fact.create_simple(self.start_token, [[tmp]])) self.is_augmented = True
def __init__(self, start, others): self.is_augmented = False self.is_non_left_rec = False self.is_expanded = False self.normalized_mode = None #undeterminal tokens self.ut_tokens = set() self.expresses = list() start = start.strip() # start = start.replace(' ', '') # others = [itm.replace(' ', '') for itm in others] others = [itm.strip() for itm in others] for itm in others: left, right = itm.split('->') left.strip() self.ut_tokens.add(fact.create_unterminal(left)) left, right = start.split('->') left.strip() self.start_token = fact.create_unterminal(left) self.ut_tokens.add(self.start_token) others.append(start) #merge express whch uses the same left part left_right_dict = {} for itm in others: left, right = itm.split('->') right_text_list = right.split('|') right_text_list = \ [itm.strip() for itm in right_text_list] if left not in left_right_dict: left_right_dict[left] = [] left_right_dict[left].extend(right_text_list) for left, right_text_list in left_right_dict.iteritems(): tokens_list = [] for right_text in right_text_list: print right_text tokens = split_text_by_ut_tokens(right_text, self.ut_tokens) tokens_list.append(tokens) print tokens self.expresses.append( \ e_fact.create_simple( \ fact.create_unterminal(left), tokens_list))
def eliminate_left_recursive(self): if not self.is_left_recursive(): return (self, None) ill_tokens_list = [itms for itms in self.right_tokens_list \ if itms[0] == self.left_token] healthy_tokens_list = [itms for itms in self.right_tokens_list \ if itms[0] != self.left_token] if len(ill_tokens_list) > 0: assert len(healthy_tokens_list) > 0, 'eliminate left recursive failed' owned_tokens_list = [] new_left_token = fact.create_unterminal(self.left_token.text + "'") for tokens in healthy_tokens_list: owned_tokens_list.append(tokens + [new_left_token]) self_cpy = express_factory.create_simple(self.left_token, owned_tokens_list) new_tokens_list = [] for tokens in ill_tokens_list: new_tokens_list.append(tokens[1 : ] + [new_left_token]) new_tokens_list.append([fact.create_epsilon()]) new_exp = express_factory.create_simple(new_left_token, new_tokens_list) return (self_cpy, new_exp)
def eliminate_left_recursive(self): if not self.is_left_recursive(): return (self, None) ill_tokens_list = [itms for itms in self.right_tokens_list \ if itms[0] == self.left_token] healthy_tokens_list = [itms for itms in self.right_tokens_list \ if itms[0] != self.left_token] if len(ill_tokens_list) > 0: assert len( healthy_tokens_list) > 0, 'eliminate left recursive failed' owned_tokens_list = [] new_left_token = fact.create_unterminal(self.left_token.text + "'") for tokens in healthy_tokens_list: owned_tokens_list.append(tokens + [new_left_token]) self_cpy = express_factory.create_simple(self.left_token, owned_tokens_list) new_tokens_list = [] for tokens in ill_tokens_list: new_tokens_list.append(tokens[1:] + [new_left_token]) new_tokens_list.append([fact.create_epsilon()]) new_exp = express_factory.create_simple(new_left_token, new_tokens_list) return (self_cpy, new_exp)