def simplify_alternation_with_common_prefixes(pattern): threads = {} simplified = False for clause in pattern.clauses: if type(clause) == ebnf_semantic.Concatenation: first_term = clause.terms[0] next_pattern = simplify_list(ebnf_semantic.Concatenation(clause.terms[1:])) else: first_term = clause next_pattern = None if first_term not in threads: threads[first_term] = [] threads[first_term].append(next_pattern) for term, clauses in threads.items(): if len(clauses) > 1: simplified = True threads[term] = simplify_list(ebnf_semantic.Alternation(clauses)) if not simplified: return pattern, False, threads clauses = [] for pattern, next_pattern in threads.items(): clause = simplify_list(ebnf_semantic.Concatenation([pattern, next_pattern])) clauses.append(clause) return simplify_list(ebnf_semantic.Alternation(clauses)), simplified, None
def crazy_expansion(term1, term2, ast_info, cache): prefix_map1 = get_prefix_map(term1, ast_info, cache) prefix_map2 = expand_for_merge(get_prefix_map(term2, ast_info, cache), ast_info, cache) prefix_map1 = { c: subpattern for c, subpattern in prefix_map1.items() if c != None } prefix_map = {} for c, subpattern in prefix_map2.items(): if c not in prefix_map1: prefix_map[c] = subpattern for c, subpattern in prefix_map1.items(): if c not in prefix_map2: prefix_map[c] = ebnf_semantic.Concatenation([subpattern, term2], subpattern.offset) for c, subpattern in prefix_map1.items(): if c in prefix_map2: if type(prefix_map2[c]) == ebnf_semantic.Alternation: prefix_map[c] = ebnf_semantic.Alternation([ ebnf_semantic.Concatenation([term1, term2], term1.offset) ] + list(prefix_map2[c].clauses), term1.offset) else: prefix_map[c] = ebnf_semantic.Alternation([ ebnf_semantic.Concatenation([term1, term2], term1.offset), prefix_map2[c] ], term1.offset) return prefix_map
def simplify_list(pattern): if type(pattern) == ebnf_semantic.Concatenation: terms = [term for term in pattern.terms if term != None] if len(terms) == 0: return None if len(terms) == 1: return terms[0] return ebnf_semantic.Concatenation(terms) elif type(pattern) == ebnf_semantic.Alternation: clauses = sorted(list(set([clause for clause in pattern.clauses if clause != None]))) if len(clauses) == 0: return None if len(clauses) == 1: result = clauses[0] else: result = ebnf_semantic.Alternation(clauses) if None in pattern.clauses: result = ebnf_semantic.Optional(result) return result return pattern
def build_state_node_for_concatenation(pattern, cache): terms = list(pattern.terms) subnode = build_state_node(terms[0], cache) transitions = { symbol: simplify_list(ebnf_semantic.Concatenation([next_pattern] + terms[1:])) for symbol, next_pattern in subnode.transitions.items() if symbol != EOF } if EOF in subnode.transitions: clauses = [] for symbol, next_pattern in transitions.items(): if type(symbol) == str: clauses.append( simplify_list( ebnf_semantic.Concatenation( [ebnf_semantic.Terminal(symbol), next_pattern]))) else: clauses.append( simplify_list( ebnf_semantic.Concatenation([symbol, next_pattern]))) clauses.append( simplify_list( ebnf_semantic.Concatenation([subnode.transitions[EOF]] + terms[1:]))) simplified_pattern = simplify_list(ebnf_semantic.Alternation(clauses)) return build_state_node(simplified_pattern, cache) node = StateNode() node.transitions = transitions return node
def expand_transitions(transitions, cache): ast_info = cache["ast_info"] clauses = [] threads = set({}) for symbol, next_pattern in transitions.items(): if type(symbol) == ebnf_semantic.Identifier: identifier_pattern = ast_info.rules[symbol.identifier] thread = simplify_list(ebnf_semantic.Concatenation([symbol, next_pattern])) clause = simplify_list(ebnf_semantic.Concatenation([ExpandedIdentifier(symbol.identifier, identifier_pattern, thread), next_pattern])) threads.add(thread) elif type(symbol) == ExpandedIdentifier: subnode = build_state_node(symbol.subpattern, cache) clauses1 = [] for symbol1, next_pattern1 in subnode.transitions.items(): if type(symbol1) == str: symbol2 = ebnf_semantic.Terminal(symbol1) else: symbol2 = symbol1 if next_pattern1 == None: clause1 = simplify_list(ebnf_semantic.Concatenation([symbol2, ReduceAction(symbol.identifier, symbol.thread), next_pattern])) else: clause1 = simplify_list(ebnf_semantic.Concatenation([symbol2, ExpandedIdentifier(symbol.identifier, next_pattern1, symbol.thread), next_pattern])) clauses1.append(clause1) clause = simplify_list(ebnf_semantic.Alternation(clauses1)) elif type(symbol) == ForkAction: clause = next_pattern threads.update(symbol.threads) elif type(symbol) == str: clause = simplify_list(ebnf_semantic.Concatenation([ebnf_semantic.Terminal(symbol), next_pattern])) elif symbol == None: clause = next_pattern else: clause = simplify_list(ebnf_semantic.Concatenation([symbol, next_pattern])) clauses.append(clause) expanded_pattern = simplify_list(ebnf_semantic.Alternation(clauses)) if len(threads) > 0: expanded_pattern = simplify_list(ebnf_semantic.Concatenation([ForkAction(threads), expanded_pattern])) return expanded_pattern
def get_prefix_map_for_concatenation(pattern, ast_info, cache): terms = [pattern.terms[0]] for term in pattern.terms[1:]: if type(terms[-1]) == ebnf_semantic.Repetition: if type(term) == ebnf_semantic.Repetition: terms[-1] = ebnf_semantic.Alternation([terms[-1], term], terms[-1].offset) elif type(term) == ebnf_semantic.Identifier: if ast_info.rules[term.identifier] == terms[-1]: terms[-1] = term else: terms.append(term) elif type(term) == PartialIdentifier: if term.subpattern == terms[-1]: terms[-1] = term else: terms.append(term) else: terms.append(term) elif type(terms[-1]) == ebnf_semantic.Identifier and type( ast_info.rules[ terms[-1].identifier]) == ebnf_semantic.Repetition: if type(term) == ebnf_semantic.Repetition: if ast_info.rules[terms[-1].identifier] == term: continue else: terms.append(term) else: terms.append(term) elif type(terms[-1]) == PartialIdentifier and type( terms[-1].subpattern) == ebnf_semantic.Repetition: if type(term) == ebnf_semantic.Repetition: if terms[-1].subpattern == term: continue else: terms.append(term) else: terms.append(term) else: terms.append(term) if len(terms) == 1: return get_prefix_map(terms[0], ast_info, cache) if type(terms[0]) == ebnf_semantic.Repetition and type( terms[1]) in [ebnf_semantic.Identifier or PartialIdentifier]: submap = crazy_expansion(terms[0], terms[1], ast_info, cache) if len(terms) > 2: return concatenate_to_map(submap, terms[2:], ast_info, cache) else: return submap submap = get_prefix_map(terms[0], ast_info, cache) return concatenate_to_map(submap, terms[1:], ast_info, cache)
def simplify_alternations_in_threads(threads): clauses = [] for pattern, next_pattern in threads.items(): if type(pattern) == ebnf_semantic.Alternation: clauses.extend([simplify_list(ebnf_semantic.Concatenation([clause, next_pattern])) for clause in pattern.clauses]) else: clauses.append(simplify_list(ebnf_semantic.Concatenation([pattern, next_pattern]))) return simplify_list(ebnf_semantic.Alternation(clauses))
def simplify_repetitions_in_threads(threads): clauses = [] for pattern, next_pattern in threads.items(): if type(pattern) == ebnf_semantic.Repetition: clauses.append(simplify_concatenation_to_repetition(pattern, next_pattern)) else: clauses.append(simplify_list(ebnf_semantic.Concatenation([pattern, next_pattern]))) return simplify_list(ebnf_semantic.Alternation(clauses))
def get_prefix_map_for_alternation(pattern, ast_info, cache): pattern1 = simplify_list(pattern) if pattern1 != pattern: return get_prefix_map(pattern1, ast_info, cache) clauses = pattern.clauses if ebnf_semantic.Optional in map(type, clauses): return get_prefix_map(simplify_alternation_with_optional(pattern), ast_info, cache) if ebnf_semantic.Repetition in map(type, clauses): return get_prefix_map(simplify_alternation_with_repetition(pattern), ast_info, cache) pattern, simplified, threads = simplify_alternation_with_common_prefixes(pattern) if simplified: return get_prefix_map(pattern, ast_info, cache) types = set(map(type, threads.keys())) if ebnf_semantic.Optional in types: return get_prefix_map(simplify_optionals_in_threads(threads), ast_info, cache) if ebnf_semantic.Repetition in types: return get_prefix_map(simplify_repetitions_in_threads(threads), ast_info, cache) if ebnf_semantic.Alternation in types: return get_prefix_map(simplify_alternations_in_threads(threads), ast_info, cache) if ReduceAction in types: reduced_pattern, none_actions = simplify_reduce_actions_in_threads(threads) prefix_map = get_prefix_map(reduced_pattern, ast_info, cache) if none_actions: if None in prefix_map: prefix_map[None] = none_actions | prefix_map[None] else: prefix_map[None] = none_actions return prefix_map if types != set([ebnf_semantic.Terminal]) or set(map(lambda x: len(x.terminal), threads.keys())) != set([1]): clauses = [] for term, next_pattern in threads.items(): expanded_term = expand_term(term, ast_info, cache) clauses.append(simplify_list(ebnf_semantic.Concatenation([expanded_term, next_pattern]))) return get_prefix_map(simplify_list(ebnf_semantic.Alternation(clauses)), ast_info, cache) prefix_map = {x.terminal: subpattern for x, subpattern in threads.items()} return prefix_map
def build_state_node_for_alternation(pattern, cache): subnodes = [build_state_node(clause, cache) for clause in pattern.clauses] transitions_clauses = {} for subnode in subnodes: for symbol, next_pattern in subnode.transitions.items(): if symbol not in transitions_clauses: transitions_clauses[symbol] = [] transitions_clauses[symbol].append(next_pattern) transitions = {symbol: simplify_list(ebnf_semantic.Alternation(clauses)) for symbol, clauses in transitions_clauses.items()} node = StateNode() node.transitions = transitions return node
def expand_term(term, ast_info, cache): prefix_map1 = expand_pattern(term, ast_info, cache) prefix_map = {} for c, subpattern in prefix_map1.items(): if type(c) == str: prefix_map[ebnf_semantic.Terminal(c)] = subpattern else: prefix_map[c] = subpattern clauses = [] for pattern, next_pattern in prefix_map.items(): clauses.append(simplify_list(ebnf_semantic.Concatenation([pattern, next_pattern]))) return simplify_list(ebnf_semantic.Alternation(clauses))
def simplify_reduce_actions_in_threads(threads): import pdb pdb.set_trace() clauses = [] none_actions = [] for pattern, next_pattern in threads.items(): if type(pattern) == ReduceAction: if next_pattern == None: none_actions.append(tuple([pattern])) elif type(next_pattern) == ReduceAction: none_actions.append(tuple([pattern, next_pattern])) elif type(next_pattern) == ebnf_semantic.Concatenation: else: clauses.append(simplify_list(ebnf_semantic.Concatenation([next_pattern, delay_reduce_action(pattern)]))) else: clauses.append(simplify_list(ebnf_semantic.Concatenation([pattern, next_pattern]))) none_actions = set(tuple(none_actions)) return simplify_list(ebnf_semantic.Alternation(clauses)), none_actions
def simplify_alternation_with_repetition(pattern): clauses = [simplify_list(ebnf_semantic.Concatenation([clause.rhs, clause])) if type(clause) == ebnf_semantic.Repetition else clause for clause in pattern.clauses] return ebnf_semantic.Optional(simplify_list(ebnf_semantic.Alternation(clauses)))
def simplify_alternation_with_optional(pattern): clauses = [clause.rhs if type(clause) == ebnf_semantic.Optional else clause for clause in pattern.clauses] return ebnf_semantic.Optional(simplify_list(ebnf_semantic.Alternation(clauses)))
def simplify_concatenation_to_optional(term1, term2): clause1 = simplify_list(ebnf_semantic.Concatenation([term1.rhs, term2])) return simplify_list(ebnf_semantic.Alternation([clause1, term2]))
def merge_maps(prefix_map1, prefix_map2, ast_info, cache): if ebnf_semantic.Identifier in map( type, prefix_map1.keys()) and ebnf_semantic.Identifier in map( type, prefix_map2.keys()): identkey1 = None identkey2 = None for c in prefix_map1.keys(): if type(c) == ebnf_semantic.Identifier: identkey1 = c break for c in prefix_map2.keys(): if type(c) == ebnf_semantic.Identifier: identkey2 = c break if identkey1 != identkey2: expanded_map1 = expand_for_merge(prefix_map1, ast_info, cache) expanded_map2 = expand_for_merge(prefix_map2, ast_info, cache) return merge_maps(expanded_map1, expanded_map2, ast_info, cache) elif PartialIdentifier in map( type, prefix_map1.keys()) and PartialIdentifier in map( type, prefix_map2.keys()): identkey1 = None identkey2 = None for c in prefix_map1.keys(): if type(c) == PartialIdentifier: identkey1 = c break for c in prefix_map2.keys(): if type(c) == PartialIdentifier: identkey2 = c break if is_finished_partial(identkey1) and is_finished_partial(identkey2): # ambiguous. which to return? return prefix_map1 if identkey1 != identkey2: expanded_map1 = expand_for_merge(prefix_map1, ast_info, cache) expanded_map2 = expand_for_merge(prefix_map2, ast_info, cache) return merge_maps(expanded_map1, expanded_map2, ast_info, cache) elif ebnf_semantic.Identifier in map( type, prefix_map1.keys()) or PartialIdentifier in map( type, prefix_map1.keys()): expanded_map1 = expand_for_merge(prefix_map1, ast_info, cache) return merge_maps(expanded_map1, prefix_map2, ast_info, cache) elif ebnf_semantic.Identifier in map( type, prefix_map2.keys()) or PartialIdentifier in map( type, prefix_map2.keys()): expanded_map2 = expand_for_merge(prefix_map2, ast_info, cache) return merge_maps(prefix_map1, expanded_map2, ast_info, cache) else: new_map = {} for c in prefix_map1: if c in prefix_map2: subpattern1 = prefix_map1[c] subpattern2 = prefix_map2[c] if subpattern1 == None: if subpattern2 == None: new_map[c] = None else: new_map[c] = ebnf_semantic.Optional( subpattern2, subpattern2.offset) else: if subpattern2 == None: new_map[c] = ebnf_semantic.Optional( subpattern1, subpattern1.offset) else: if type(subpattern1) == ebnf_semantic.Alternation: if type(subpattern2) == ebnf_semantic.Alternation: new_map[c] = ebnf_semantic.Alternation( list(subpattern1.clauses) + list(subpattern2.clauses), subpattern1.clauses[0].offset) else: new_map[c] = ebnf_semantic.Alternation( list(subpattern1.clauses) + [subpattern2], subpattern1.clauses[0].offset) else: if type(subpattern2) == ebnf_semantic.Alternation: new_map[c] = ebnf_semantic.Alternation( [subpattern1] + list(subpattern2.clauses), subpattern1.offset) else: new_map[c] = ebnf_semantic.Alternation( [subpattern1, subpattern2], subpattern1.offset) else: new_map[c] = prefix_map1[c] for c in prefix_map2: if c not in prefix_map1: new_map[c] = prefix_map2[c] return new_map
def expand_reduce_action(symbol, next_pattern, cache): if next_pattern == EOF: return symbol if has_only_reduce_actions(next_pattern): return simplify_list( ebnf_semantic.Concatenation([symbol, next_pattern])) if type(next_pattern) == ForkAction: return simplify_list( ebnf_semantic.Concatenation([next_pattern, symbol])) if type(next_pattern) == ExpandedIdentifier: # XXX return simplify_list( ebnf_semantic.Concatenation([next_pattern, symbol])) if type(next_pattern) == ebnf_semantic.Terminal: first = next_pattern.terminal[0] rest = next_pattern.terminal[1:] if not rest: return simplify_list( ebnf_semantic.Concatenation( [ebnf_semantic.Terminal(first), symbol])) return simplify_list( ebnf_semantic.Concatenation([ ebnf_semantic.Terminal(first), symbol, ebnf_semantic.Terminal(rest) ])) if type(next_pattern) == ebnf_semantic.Identifier: # XXX return simplify_list( ebnf_semantic.Concatenation([next_pattern, symbol])) if type(next_pattern) == ebnf_semantic.Optional: clause1 = symbol clause2 = expand_reduce_action(symbol, next_pattern.rhs, cache) return simplify_list(ebnf_semantic.Alternation([clause1, clause2])) if type(next_pattern) == ebnf_semantic.Repetition: # clause1 = symbol # XXX clause2 = expand_reduce_action(symbol, simplify_list(ebnf_semantic.Concatenation([next_pattern.rhs, next_pattern])), cache) # return simplify_list(ebnf_semantic.Alternation([clause1, clause2])) return simplify_list(ebnf_semantic.Alternation([symbol, next_pattern])) if type(next_pattern) == ebnf_semantic.Concatenation: i = 0 while has_only_reduce_actions(next_pattern.terms[i]): i += 1 if i == 0: return simplify_list( ebnf_semantic.Concatenation([ expand_reduce_action(symbol, next_pattern.terms[0], cache) ] + list(next_pattern.terms[1:]))) new_symbol = simplify_list( ebnf_semantic.Concatenation([symbol] + list(next_pattern.terms[:i]))) return expand_reduce_action( new_symbol, simplify_list(ebnf_semantic.Concatenation(next_pattern.terms[i:])), cache) if type(next_pattern) == ebnf_semantic.Alternation: clauses = [ expand_reduce_action(symbol, clause, cache) for clause in next_pattern.clauses ] return simplify_list(ebnf_semantic.Alternation(clauses)) raise Exception("Shouldn't reach this point.")