Example #1
0
    def generate_content(self, token):
        code = CodeEmitter(token.stream)
        code.indent()
        
        # Generate the state machine factory
        self.get_state_machine = lambda section: section.dfa
        if self.plugin_options.form == self.plugin_options.NFA_IR:
            nfas = {}
            for section in self.rules_file.sections.values():
                nfas[section] = section.rules[0].nfa.alternate(rule.nfa for rule in section.rules)
            self.get_state_machine = lambda section: nfas[section]

        # Generate vertices
        for id, section in self.rules_file.sections.items():
            formatted_id = self.cache.get_section(id)
            with code.block("subgraph {0} {{".format(formatted_id), "}"):
                code.line('label="{0}"'.format(self.get_section_label(section, id)))
                self.generate_vertices(code, formatted_id, section)
        for vertex in self.exit_vertices.values():
            self.draw_vertex(code, id=vertex, label="exit section", is_shapeless=True)
        
        code.line()
        
        #Generate edges
        for id, section in self.rules_file.sections.items():
            formatted_id = self.cache.get_section(id)
            self.generate_edges(code, formatted_id, section)
Example #2
0
    def generate_content(self, token):
        code = CodeEmitter(token.stream)
        code.indent()

        # Generate the state machine factory
        self.get_state_machine = lambda section: section.dfa
        if self.plugin_options.form == self.plugin_options.NFA_IR:
            nfas = {}
            for section in self.rules_file.sections.values():
                nfas[section] = section.rules[0].nfa.alternate(rule.nfa for rule in section.rules)
            self.get_state_machine = lambda section: nfas[section]

        # Generate vertices
        for id, section in self.rules_file.sections.items():
            formatted_id = self.cache.get_section(id)
            with code.block("subgraph {0} {{".format(formatted_id), "}"):
                code.line('label="{0}"'.format(self.get_section_label(section, id)))
                self.generate_vertices(code, formatted_id, section)
        for vertex in self.exit_vertices.values():
            self.draw_vertex(code, id=vertex, label="exit section", is_shapeless=True)

        code.line()

        # Generate edges
        for id, section in self.rules_file.sections.items():
            formatted_id = self.cache.get_section(id)
            self.generate_edges(code, formatted_id, section)
Example #3
0
 def process(self, token):
     if token.token == "BASE_CLASS":
         if len(self.lexical_analyzer.sections) > 1:
             token.stream.write(self.formatter.get_mode_stack_class_name())
         else:
             token.stream.write("Object")
     elif token.token == "BASE_FILE_NAME":
         token.stream.write(self.plugin_options.file_name)
     elif token.token == "CLASS_NAME":
         token.stream.write(self.formatter.get_class_name())
     elif token.token == "DEFAULT_ENCODING":
         token.stream.write(self.formatter.get_default_encoding())
     elif token.token == "ENUM_MODE_IDS":
         code = CodeEmitter(token.stream, token.indent)
         for section in self.lexical_analyzer.sections:
             x = self.formatter.get_section_id(section)
             code.line(x)
     elif token.token == "ENUM_TOKEN_IDS":
         code = CodeEmitter(token.stream, token.indent)
         rules = [id for id in self.lexical_analyzer.rule_ids.values() if id is not None]
         rules.append("SkippedToken")
         for rule in sorted(rules):
             code.line(self.formatter.get_token_id(rule))
     elif token.token == "HEADER_GUARD_NAME":
         token.stream.write(
             "{namespace}_{class_name}_BI".format(
                 namespace=self.plugin_options.namespace.upper(), class_name=self.plugin_options.class_name.upper()
             )
         )
     elif token.token == "HEADER_GUARD_MODE_NAME":
         token.stream.write(
             "{namespace}_{class_name}_BI".format(
                 namespace=self.plugin_options.namespace.upper(),
                 class_name=self.formatter.get_mode_stack_class_name().upper(),
             )
         )
     elif token.token == "INITIAL_MODE_ID":
         token.stream.write(self.formatter.get_section_id("::main::"))
     elif token.token == "MODE_INCLUDE":
         if len(self.lexical_analyzer.sections) > 1:
             code = CodeEmitter(token.stream, token.indent)
             code.line(
                 '#include "{include_file}"'.format(include_file=self.plugin_options.file_name + "ModeStack.bi")
             )
     elif token.token == "MODE_SOURCE":
         if len(self.lexical_analyzer.sections) > 1:
             token.stream.write(
                 "../{base_file_name}ModeStack.bas".format(base_file_name=self.plugin_options.file_name)
             )
     elif token.token == "MODE_STACK_CLASS_NAME":
         token.stream.write(self.formatter.get_mode_stack_class_name())
     elif token.token == "NAMESPACE":
         token.stream.write(self.formatter.get_namespace())
     elif token.token == "STACK_DEPTH_ID":
         token.stream.write(
             "{namespace}_{class_name}_STACK_DEPTH".format(
                 namespace=self.plugin_options.namespace.upper(),
                 class_name=self.formatter.get_mode_stack_class_name().upper(),
             )
         )
     elif token.token == "STACK_DEPTH":
         token.stream.write("32")
     elif token.token == "STATE_MACHINE_DECLARATIONS":
         if len(self.lexical_analyzer.sections) > 1:
             code = CodeEmitter(token.stream, token.indent)
             for section in self.lexical_analyzer.sections:
                 code.line(
                     "Declare Function {method_name}() As {token_type}".format(
                         method_name=self.formatter.get_state_machine_method_name(section, True),
                         token_type=self.formatter.get_type("token", True),
                     )
                 )
     elif token.token == "STATE_MACHINES":
         code = CodeEmitter(token.stream, token.indent)
         if len(self.lexical_analyzer.sections) > 1:
             self.StateMachineEmitter.generate_state_machine_switch(code, self.formatter, self.lexical_analyzer)
         for i, section in enumerate(self.lexical_analyzer.sections):
             code.line()
             emitter = self.StateMachineEmitter(
                 code=code, formatter=self.formatter, dfa_ir=self.lexical_analyzer, section_id=section
             )
             emitter.generate_state_machine()
     elif token.token == "TOKEN_IDNAMES":
         code = CodeEmitter(token.stream, token.indent)
         filtered_ids = [rule_id for rule_id in self.lexical_analyzer.rule_ids.values() if rule_id is not None]
         filtered_ids.append("SkippedToken")
         for i, rule in enumerate(sorted(filtered_ids)):
             template = '@"{name}"'.format(name=rule)
             template += ", _" if i < len(filtered_ids) - 1 else " _"
             code.line(template.format(name=rule))
     elif token.token == "TOKEN_IDNAMES_LIMIT":
         token.stream.write(str(len(self.lexical_analyzer.rule_ids) + 2))
     elif token.token.startswith("TYPE_REL_"):
         type_name = token.token[len("TYPE_REL_") :].lower()
         token.stream.write(self.formatter.get_type(type_name, True))
     elif token.token.startswith("TYPE_"):
         type_name = token.token[len("TYPE_") :].lower()
         token.stream.write(self.formatter.get_type(type_name, False))
     elif token.token == "UNICODE_CHAR_DEFINES":
         namespace = self.plugin_options.namespace.upper()
         all_range_edges = set()
         for section in self.lexical_analyzer.sections.values():
             for state in section.dfa:
                 for edge in state.edges.values():
                     for minv, maxv in edge:
                         all_range_edges.update((minv, maxv))
         for codepoint in all_range_edges:
             name = self.formatter.get_unicode_char_name(codepoint)
             if name[0] != "&":
                 token.stream.write("#define {name} {codepoint}\n".format(name=name, codepoint="&h%02x" % codepoint))
     elif token.token == "UNICODE_HEADER_GUARD_NAME":
         namespace = self.plugin_options.namespace.upper()
         class_name = self.plugin_options.class_name.upper()
         token.stream.write("_".join((namespace, class_name, "UCS_DEFINES")))
     else:
         raise Exception("Token '{id}' not recognized".format(id=token.token))
Example #4
0
    def process(self, token):
        if token.token == 'BASE_FILE_NAME':
            token.stream.write(self.base_file_name)
        elif token.token == 'CLASS_NAME':
            token.stream.write(self.class_name)
        elif token.token == 'ENUM_TOKEN_IDS':
            ids = sorted(rule for rule in self.dfa_ir.rule_ids.values()
                         if rule is not None)
            ids.insert(0, 'invalidcharacter')
            ids.insert(0, 'endofstream')
            ids.insert(0, 'skippedtoken')
            for i, id in enumerate(ids):
                id_key = self.formatter.get_token_id(id)
                if i != len(ids) - 1:
                    token.stream.write('{indent}{id},\n'.format(indent=' ' *
                                                                token.indent,
                                                                id=id_key))
                else:
                    token.stream.write('{indent}{id}\n'.format(indent=' ' *
                                                               token.indent,
                                                               id=id_key))
        elif token.token == 'ENUM_SECTION_IDS':
            code = CodeEmitter(token.stream, token.indent)
            code.line()
            if len(self.dfa_ir.sections) > 1:
                code.line('enum Mode')
                with code.block('{', '};'):
                    ids = sorted(self.dfa_ir.sections)
                    for i, id in enumerate(ids):
                        formatted = self.formatter.get_section_id(id)
                        if i == len(ids) - 1:
                            code.line(formatted)
                        else:
                            code.line('{id},'.format(id=formatted))
                code.line()
        elif token.token == 'HEADER_GUARD':
            token.stream.write("{namespace}_{basefile}_H".format(
                namespace=self.namespace.replace(':', ''),
                basefile=self.base_file_name.upper()))
        elif token.token == 'INCLUDES':
            token.stream.write("{indent}#include \"{file_name}.h\"".format(
                indent=' ' * token.indent, file_name=self.base_file_name))
        elif token.token == 'MODE_STACK_DECLARATION':
            token.stream.write('{indent}std::istream* stream;\n'.format(
                indent=' ' * token.indent))
            if len(self.dfa_ir.sections) > 1:
                token.stream.write('{indent}{stack_type} mode;\n'.format(
                    indent=' ' * token.indent,
                    stack_type=self.formatter.get_type('mode_stack',
                                                       is_relative=True)))
        elif token.token == 'MODE_STACK_INCLUDE':
            if len(self.dfa_ir.sections) > 1:
                token.stream.write('#include <stack>\n\n')
        elif token.token == 'NAMESPACE':
            token.stream.write(self.namespace)
        elif token.token == 'PUSH_INITIAL_MODE':
            if len(self.dfa_ir.sections) > 1:
                token.stream.write(
                    '{indent}this->mode.push({initial_mode});\n'.format(
                        indent=' ' * token.indent,
                        initial_mode=self.formatter.get_section_id(
                            '::main::')))
        elif token.token == 'SELECT_ID_STRING':
            ids = sorted(rule for rule in self.dfa_ir.rule_ids.values()
                         if rule is not None)
            code = CodeEmitter(token.stream, token.indent)
            for id in ids:
                with code.block('case {class_name}::Token::{token_id}:'.format(
                        class_name=self.class_name,
                        token_id=self.formatter.get_token_id(id))):
                    code.line('id_string = "{id}";'.format(id=id))
                    code.line('break;')

        elif token.token == 'STATE_MACHINE_METHOD_DECLARATIONS':
            token.stream.write("{indent}{token_type} get_token();\n".format(
                indent=' ' * token.indent,
                token_type=self.formatter.get_type('token', is_relative=True)))
            if len(self.dfa_ir.sections) > 1:
                for section_id in self.dfa_ir.sections:
                    token.stream.write(
                        "{indent}{token_type} {method_name}();\n".format(
                            indent=' ' * token.indent,
                            token_type=self.formatter.get_type(
                                'token', is_relative=True),
                            method_name=self.formatter.
                            get_state_machine_method_name(section_id,
                                                          is_relative=True)))
        elif token.token == 'STATE_MACHINES':
            code = CodeEmitter(token.stream, token.indent)
            if len(self.dfa_ir.sections) > 1:
                self.emit_state_machine_switch(code)
            for section in self.dfa_ir.sections:
                state_machine_emitter = self.StateMachineEmitter(
                    self.dfa_ir, section, self.formatter, code)
                state_machine_emitter.emit_state_machine()
        else:
            raise Exception("Unrecognized token: {0}".format(token.token))
Example #5
0
 def process(self, token):
     if token.token == 'BASE_FILE_NAME':
         token.stream.write(self.base_file_name)
     elif token.token == 'CLASS_NAME':
         token.stream.write(self.class_name)
     elif token.token == 'ENUM_TOKEN_IDS':
         ids = sorted(rule for rule in self.dfa_ir.rule_ids.values() if rule is not None)
         ids.insert(0, 'invalidcharacter')
         ids.insert(0, 'endofstream')
         ids.insert(0, 'skippedtoken')
         for i, id in enumerate(ids):
             id_key = self.formatter.get_token_id(id)
             if i != len(ids)-1:
                 token.stream.write('{indent}{id},\n'.format(indent=' '*token.indent, id=id_key))
             else:
                 token.stream.write('{indent}{id}\n'.format(indent=' '*token.indent, id=id_key))
     elif token.token == 'ENUM_SECTION_IDS':
         code = CodeEmitter(token.stream, token.indent)
         code.line()
         if len(self.dfa_ir.sections) > 1:
             code.line('enum Mode');
             with code.block('{', '};'):
                 ids = sorted(self.dfa_ir.sections)
                 for i, id in enumerate(ids):
                     formatted = self.formatter.get_section_id(id)
                     if i == len(ids)-1:
                         code.line(formatted)
                     else:
                         code.line('{id},'.format(id=formatted))
             code.line()
     elif token.token == 'HEADER_GUARD':
         token.stream.write("{namespace}_{basefile}_H".format(
             namespace=self.namespace.replace(':', ''),
             basefile=self.base_file_name.upper()))
     elif token.token == 'INCLUDES':
         token.stream.write("{indent}#include \"{file_name}.h\"".format(
             indent=' '*token.indent,
             file_name=self.base_file_name))
     elif token.token == 'MODE_STACK_DECLARATION':
         token.stream.write('{indent}std::istream* stream;\n'.format(indent=' '*token.indent))
         if len(self.dfa_ir.sections) > 1:
             token.stream.write('{indent}{stack_type} mode;\n'.format(
                 indent=' '*token.indent,
                 stack_type=self.formatter.get_type('mode_stack', is_relative=True)))
     elif token.token == 'MODE_STACK_INCLUDE':
         if len(self.dfa_ir.sections) > 1:
             token.stream.write('#include <stack>\n\n')
     elif token.token == 'NAMESPACE':
         token.stream.write(self.namespace)
     elif token.token == 'PUSH_INITIAL_MODE':
         if len(self.dfa_ir.sections) > 1:
             token.stream.write('{indent}this->mode.push({initial_mode});\n'.format(
                 indent = ' '*token.indent,
                 initial_mode = self.formatter.get_section_id('::main::')))
     elif token.token == 'SELECT_ID_STRING':
         ids = sorted(rule for rule in self.dfa_ir.rule_ids.values() if rule is not None)
         code = CodeEmitter(token.stream, token.indent)
         for id in ids:
             with code.block('case {class_name}::Token::{token_id}:'.format(   
                 class_name=self.class_name,
                 token_id=self.formatter.get_token_id(id))):
                 code.line('id_string = "{id}";'.format(id=id))
                 code.line('break;')
     
     elif token.token == 'STATE_MACHINE_METHOD_DECLARATIONS':
         token.stream.write("{indent}{token_type} get_token();\n".format(
             indent = ' '*token.indent,
             token_type = self.formatter.get_type('token', is_relative=True)))
         if len(self.dfa_ir.sections) > 1:
             for section_id in self.dfa_ir.sections:
                 token.stream.write("{indent}{token_type} {method_name}();\n".format(
                     indent = ' '*token.indent,
                     token_type = self.formatter.get_type('token', is_relative=True),
                     method_name = self.formatter.get_state_machine_method_name(section_id, is_relative=True)))
     elif token.token == 'STATE_MACHINES':
         code = CodeEmitter(token.stream, token.indent)
         if len(self.dfa_ir.sections) > 1:
             self.emit_state_machine_switch(code)
         for section in self.dfa_ir.sections:
             state_machine_emitter = self.StateMachineEmitter(self.dfa_ir, section, self.formatter, code)
             state_machine_emitter.emit_state_machine()
     else:
         raise Exception("Unrecognized token: {0}".format(token.token))
Example #6
0
 def process(self, token):
     if token.token == 'BASE_CLASS':
         if len(self.lexical_analyzer.sections) > 1:
             token.stream.write(self.formatter.get_mode_stack_class_name())
         else:
             token.stream.write('Object')
     elif token.token == 'BASE_FILE_NAME':
         token.stream.write(self.plugin_options.file_name)
     elif token.token == 'CLASS_NAME':
         token.stream.write(self.formatter.get_class_name())
     elif token.token == 'DEFAULT_ENCODING':
         token.stream.write(self.formatter.get_default_encoding())
     elif token.token == 'ENUM_MODE_IDS':
         code = CodeEmitter(token.stream, token.indent)
         for section in self.lexical_analyzer.sections:
             x = self.formatter.get_section_id(section)
             code.line(x)
     elif token.token == 'ENUM_TOKEN_IDS':
         code = CodeEmitter(token.stream, token.indent)
         rules = [
             id for id in self.lexical_analyzer.rule_ids.values()
             if id is not None
         ]
         rules.append('SkippedToken')
         for rule in sorted(rules):
             code.line(self.formatter.get_token_id(rule))
     elif token.token == 'HEADER_GUARD_NAME':
         token.stream.write('{namespace}_{class_name}_BI'.format(
             namespace=self.plugin_options.namespace.upper(),
             class_name=self.plugin_options.class_name.upper()))
     elif token.token == 'HEADER_GUARD_MODE_NAME':
         token.stream.write('{namespace}_{class_name}_BI'.format(
             namespace=self.plugin_options.namespace.upper(),
             class_name=self.formatter.get_mode_stack_class_name().upper()))
     elif token.token == 'INITIAL_MODE_ID':
         token.stream.write(self.formatter.get_section_id('::main::'))
     elif token.token == 'MODE_INCLUDE':
         if len(self.lexical_analyzer.sections) > 1:
             code = CodeEmitter(token.stream, token.indent)
             code.line('#include "{include_file}"'.format(
                 include_file=self.plugin_options.file_name +
                 "ModeStack.bi"))
     elif token.token == 'MODE_SOURCE':
         if len(self.lexical_analyzer.sections) > 1:
             token.stream.write("../{base_file_name}ModeStack.bas".format(
                 base_file_name=self.plugin_options.file_name))
     elif token.token == 'MODE_STACK_CLASS_NAME':
         token.stream.write(self.formatter.get_mode_stack_class_name())
     elif token.token == 'NAMESPACE':
         token.stream.write(self.formatter.get_namespace())
     elif token.token == 'STACK_DEPTH_ID':
         token.stream.write('{namespace}_{class_name}_STACK_DEPTH'.format(
             namespace=self.plugin_options.namespace.upper(),
             class_name=self.formatter.get_mode_stack_class_name().upper()))
     elif token.token == 'STACK_DEPTH':
         token.stream.write('32')
     elif token.token == 'STATE_MACHINE_DECLARATIONS':
         if len(self.lexical_analyzer.sections) > 1:
             code = CodeEmitter(token.stream, token.indent)
             for section in self.lexical_analyzer.sections:
                 code.line(
                     "Declare Function {method_name}() As {token_type}".
                     format(method_name=self.formatter.
                            get_state_machine_method_name(section, True),
                            token_type=self.formatter.get_type(
                                'token', True)))
     elif token.token == 'STATE_MACHINES':
         code = CodeEmitter(token.stream, token.indent)
         if len(self.lexical_analyzer.sections) > 1:
             self.StateMachineEmitter.generate_state_machine_switch(
                 code, self.formatter, self.lexical_analyzer)
         for i, section in enumerate(self.lexical_analyzer.sections):
             code.line()
             emitter = self.StateMachineEmitter(
                 code=code,
                 formatter=self.formatter,
                 dfa_ir=self.lexical_analyzer,
                 section_id=section)
             emitter.generate_state_machine()
     elif token.token == 'TOKEN_IDNAMES':
         code = CodeEmitter(token.stream, token.indent)
         filtered_ids = [
             rule_id for rule_id in self.lexical_analyzer.rule_ids.values()
             if rule_id is not None
         ]
         filtered_ids.append('SkippedToken')
         for i, rule in enumerate(sorted(filtered_ids)):
             template = '@"{name}"'.format(name=rule)
             template += ", _" if i < len(filtered_ids) - 1 else " _"
             code.line(template.format(name=rule))
     elif token.token == 'TOKEN_IDNAMES_LIMIT':
         token.stream.write(str(len(self.lexical_analyzer.rule_ids) + 2))
     elif token.token.startswith('TYPE_REL_'):
         type_name = token.token[len('TYPE_REL_'):].lower()
         token.stream.write(self.formatter.get_type(type_name, True))
     elif token.token.startswith('TYPE_'):
         type_name = token.token[len('TYPE_'):].lower()
         token.stream.write(self.formatter.get_type(type_name, False))
     elif token.token == 'UNICODE_CHAR_DEFINES':
         namespace = self.plugin_options.namespace.upper()
         all_range_edges = set()
         for section in self.lexical_analyzer.sections.values():
             for state in section.dfa:
                 for edge in state.edges.values():
                     for minv, maxv in edge:
                         all_range_edges.update((minv, maxv))
         for codepoint in all_range_edges:
             name = self.formatter.get_unicode_char_name(codepoint)
             if name[0] != '&':
                 token.stream.write("#define {name} {codepoint}\n".format(
                     name=name, codepoint="&h%02x" % codepoint))
     elif token.token == 'UNICODE_HEADER_GUARD_NAME':
         namespace = self.plugin_options.namespace.upper()
         class_name = self.plugin_options.class_name.upper()
         token.stream.write('_'.join(
             (namespace, class_name, 'UCS_DEFINES')))
     else:
         raise Exception(
             "Token '{id}' not recognized".format(id=token.token))