Example #1
0
    def tokenize(message: list) -> list:
        time_ = time.time()
        if not 3 <= len(message) <= 4:
            return error(
                "Tokenization request format is:\n input: ['tokenize', file_name:str, file_contents:str, binary=False]\n output: ['tokenize', token_ranges:list(list(token_code, first_index, index_after))]"
            )
        file_name = message[1]
        file_contents = message[2]
        if not isinstance(file_name, str):
            return error(
                'Tokenization request: "file_name" arg must be a string.')
        if not isinstance(file_contents, str):
            return error(
                'Tokenization request: "file_contents" arg must be a string.')
        if VERBOSE:
            print("\tfile-name: " + file_name)
            print("\tfile-contents: " +
                  (repr(file_contents) if len(file_contents) < 80 else
                   repr(file_contents[0:80]) + " ..."))
        if len(message) == 4:
            binary = message[3]
            if not isinstance(file_contents, bool):
                return error(
                    'Tokenization request: "binary" arg must be a string.')
        else:
            binary = True

        stream = StringStream(file_contents, name=file_name)

        parser = AnokyParser()

        token_ranges = []
        current_index = 0
        try:
            for token in parser.tokenize(stream, emmit_restart_tokens=True):
                token_first = token.range.first_position.index
                token_after = token.range.position_after.index
                # if token_first > current_index:
                #     token_type = Tokens._TokenTypes.WHITESPACE.value if binary else Tokens._TokenTypes.WHITESPACE.name
                #     token_ranges.append([token_type, current_index, token_first])
                #     current_index = token_first
                # el
                if token_first < current_index:
                    raise Exception(
                        token_first,
                        "Overlapping tokens (%s, %s), something is wrong with the tokenizer!!!"
                        % (current_index, token_first))
                token_type = token.type.value if binary else token.type.name
                token_ranges.append([token_type, current_index, token_after])
                current_index = token_after
        except TokenizingError as e:
            return error(e)

        if len(token_ranges) > 0:
            last_token_range = token_ranges[-1]
            if last_token_range[2] < len(file_contents):
                last_token_range[2] += 1

        print("Tokenization took %s seconds" % (time_ - time.time()))
        return pack(['tokenize', token_ranges])
Example #2
0
def tokenize(options):
    try:
        filename = options.filename


        code = open(filename, encoding='utf-8').read()
        stream = StringStream(code)

        parser = AnokyParser()

        if 'output' in options:
            output = options.output
            encoder = options.encoder
            filler_token_value = Tokens.WHITESPACE.value if options.binary else Tokens.WHITESPACE.name
            for token, first_index, index_after in parser.tokenize_with_intervals(stream):
                if token is None:
                    bytes_ = encoder((filler_token_value, first_index, index_after))
                else:
                    token_value = token.type.value if options.binary else token.type.name
                    bytes_ = encoder((token_value, first_index, index_after))
                output.write(bytes_)
        else:
            for token in parser.tokenize(stream):
                print(str(token))

    except CompilerError as e:
        print(e.trace)
Example #3
0
def generate(options):
    parser = AnokyParser()
    try:
        if "filename" not in options:
            print("No filename specified.")
            return
        filename = options.filename
        stream = FileStream(filename)

        file_node = parser.parse(stream)
        parsed = indented_lisp_printer(file_node)

        expander = DefaultExpander()
        ec = expander.expand_unit(file_node)
        expanded = indented_lisp_printer(file_node)

        generator = DefaultGenerator()
        py_module = generator.generate_unit(
            file_node,
            # provide expansion context to generation context
            EC=ec)

        if options.verbose:
            print(parsed)
            print("\n〰〰〰〰〰〰 After macro expansion 〰〰〰〰〰〰")
            print(expanded)
            print("\n〰〰〰〰〰〰 Generated Python code 〰〰〰〰〰〰\n")
            astpp.parseprint(py_module)
            print("\n〰〰〰〰〰〰 Python retrosource 〰〰〰〰〰〰\n")
            print(ASTFormatter().format(py_module))

        return py_module

    except CompilerError as e:
        print(e.trace)
Example #4
0
    def tokenize(message: list) -> list:
        time_ = time.time()
        if not 3 <= len(message) <= 4:
            return error(
                "Tokenization request format is:\n input: ['tokenize', file_name:str, file_contents:str, binary=False]\n output: ['tokenize', token_ranges:list(list(token_code, first_index, index_after))]")
        file_name = message[1]
        file_contents = message[2]
        if not isinstance(file_name, str):
            return error('Tokenization request: "file_name" arg must be a string.')
        if not isinstance(file_contents, str):
            return error('Tokenization request: "file_contents" arg must be a string.')
        if VERBOSE:
            print("\tfile-name: " + file_name)
            print("\tfile-contents: " + (
            repr(file_contents) if len(file_contents) < 80 else repr(file_contents[0:80]) + " ..."))
        if len(message) == 4:
            binary = message[3]
            if not isinstance(file_contents, bool):
                return error('Tokenization request: "binary" arg must be a string.')
        else:
            binary = True

        stream = StringStream(file_contents, name=file_name)

        parser = AnokyParser()

        token_ranges = []
        current_index = 0
        try:
            for token in parser.tokenize(stream, emmit_restart_tokens=True):
                token_first = token.range.first_position.index
                token_after = token.range.position_after.index
                # if token_first > current_index:
                #     token_type = Tokens._TokenTypes.WHITESPACE.value if binary else Tokens._TokenTypes.WHITESPACE.name
                #     token_ranges.append([token_type, current_index, token_first])
                #     current_index = token_first
                # el
                if token_first < current_index:
                    raise Exception(token_first,
                                    "Overlapping tokens (%s, %s), something is wrong with the tokenizer!!!" % (
                                    current_index, token_first))
                token_type = token.type.value if binary else token.type.name
                token_ranges.append([token_type, current_index, token_after])
                current_index = token_after
        except TokenizingError as e:
            return error(e)

        if len(token_ranges) > 0:
            last_token_range = token_ranges[-1]
            if last_token_range[2] < len(file_contents):
                last_token_range[2] += 1

        print("Tokenization took %s seconds" % (time_ - time.time()))
        return pack(['tokenize', token_ranges])
Example #5
0
    def colorize(message: list) -> list:
        if not 3 <= len(message) <= 4:
            return error(
                "Colorization request format is:\n input: ['colorize', file_name:str, file_contents:str, binary=False]\n output: ['colorize', token_ranges:list(list(color_code, first_index, index_after))]")
        file_name = message[1]
        file_contents = message[2]
        if not isinstance(file_name, str):
            return error('Colorization request: "file_name" arg must be a string.')
        if not isinstance(file_contents, str):
            return error('Colorization request: "file_contents" arg must be a string.')
        if VERBOSE:
            print("\tfile-name: " + file_name)
            print("\tfile-contents: " + (
                repr(file_contents) if len(file_contents) < 80 else repr(file_contents[0:80]) + " ..."))
        if len(message) == 4:
            binary = message[3]
            if not isinstance(file_contents, bool):
                return error('Colorization request: "binary" arg must be a string.')
        else:
            binary = True

        stream = StringStream(file_contents, name=file_name)

        parser = AnokyParser()
        code_expander = DefaultExpander()
        code_generator = DefaultGenerator()

        try:
            node = parser.parse(stream)
            code_expander.expand_unit(node)
            code_generator.generate_unit(node)

            colorized_tokens = []

            def extract_colorized_tokens(element):
                nonlocal colorized_tokens
                if element.color is not None and is_not_none(element, ".range.first_position.index") and is_not_none(
                        element, ".range.position_after.index"):
                    token_color = element.color
                    token_first = element.range.first_position.index
                    token_after = element.range.position_after.index
                    if not isinstance(token_color, int):
                        return error('Colorization request: color of token "%s" was not int!' % element.text)
                    colorized_tokens.append([token_color, token_first, token_after])
                if isinstance(element.code, Node):
                    for subelement in element.code:
                        extract_colorized_tokens(subelement)

            for element in node: extract_colorized_tokens(element)

        except CompilerError as e:
            return error(e)

        return pack(['colorize', colorized_tokens])
Example #6
0
        def _compile_to_ast(filepath):
            parser = AnokyParser()
            expander = DefaultExpander()
            generator = DefaultGenerator()

            stream = FileStream(filepath)

            file_node = parser.parse(stream)
            expander.expand_unit(file_node)
            py_module = generator.generate_unit(file_node)

            ast.fix_missing_locations(py_module)

            return py_module
Example #7
0
        def _compile_to_ast(filepath):
            parser = AnokyParser()
            expander = DefaultExpander()
            generator = DefaultGenerator()

            stream = FileStream(filepath)

            file_node = parser.parse(stream)
            expander.expand_unit(file_node)
            py_module = generator.generate_unit(file_node)

            ast.fix_missing_locations(py_module)

            return py_module
Example #8
0
def arrange(options):
    parser = AnokyParser()
    try:
        if "filename" not in options:
            print("No filename specified.")
            return
        filename = options.filename

        stream = FileStream(filename)

        file_node = parser.parse(stream)
        print(indented_lisp_printer(file_node))

    except CompilerError as e:
        print(e.trace)
Example #9
0
def arrange(options):
    parser = AnokyParser()
    try:
        if "filename" not in options:
            print("No filename specified.")
            return
        filename = options.filename

        stream = FileStream(filename)

        file_node = parser.parse(stream)
        print(indented_lisp_printer(file_node))

    except CompilerError as e:
        print(e.trace)
Example #10
0
def expand(options):
    parser = AnokyParser()
    try:
        if "filename" not in options:
            print("No filename specified.")
            return
        filename = options.filename

        stream = FileStream(filename)

        file_node = parser.parse(stream)
        # print(indented_lisp_printer(file_node))

        expander = DefaultExpander()
        ec = expander.expand_unit(file_node)
        # print("\n〰〰〰〰〰〰 After macro expansion 〰〰〰〰〰〰")
        # print(indented_lisp_printer(file_node))

        generator = DefaultGenerator()
        py_module = generator.generate_unit(file_node,
        # provide expansion context to generation context
                                            EC=ec)

        # print("\n〰〰〰〰〰〰 Generated Python code 〰〰〰〰〰〰\n")
        # astpp.parseprint(py_module)

        # print("\n〰〰〰〰〰〰 Python retrosource 〰〰〰〰〰〰\n")
        print(ASTFormatter().format(py_module))

        if options.execute:

            ast.fix_missing_locations(py_module)

            compiled_module = compile(py_module,
                                      filename="<ast>",
                                      mode="exec")

            exec(compiled_module)




    except CompilerError as e:
        print(e.trace)
Example #11
0
def expand(options):
    parser = AnokyParser()
    try:
        if "filename" not in options:
            print("No filename specified.")
            return
        filename = options.filename

        stream = FileStream(filename)

        file_node = parser.parse(stream)
        print(indented_lisp_printer(file_node))

        expander = DefaultExpander()
        ec = expander.expand_unit(file_node)
        print("\nAfter macro expansion:")
        print(indented_lisp_printer(file_node))

    except CompilerError as e:
        print(e.trace)
Example #12
0
def expand(options):
    parser = AnokyParser()
    try:
        if "filename" not in options:
            print("No filename specified.")
            return
        filename = options.filename

        stream = FileStream(filename)

        file_node = parser.parse(stream)
        print(indented_lisp_printer(file_node))

        expander = DefaultExpander()
        ec = expander.expand_unit(file_node)
        print("\nAfter macro expansion:")
        print(indented_lisp_printer(file_node))

    except CompilerError as e:
        print(e.trace)
Example #13
0
    async def async_tokenize(id, incomming, outgoing):
        def my_send_message(msg):
            if VERBOSE: print("\treply: " + str(msg))
            return outgoing.push_message(pack(msg))

        def my_error(e):
            nonlocal outgoing
            if VERBOSE: print("\terror: " + str(e))
            return outgoing.push_message(error(e))

        # first message (see below for syntax)
        # It will give us the filename name and contents of the written code,
        # and also whether we should mark the first offset as being anything other than zero,
        # and the indentation level at which the code is written
        message = await incomming()

        if not 3 <= len(message) <= 5:
            return outgoing.push_message(
                error(
                    "Async tokenization request format is:\n"
                    " first message: ['async_tokenize', file_name:str, file_contents:str, first_offset:int = 0, indentation_level:int = 0]\n"
                    " first reply: ['async_tokenize', handler_id:int]\n"
                    " following messages: ['async_tokenize_next', handler_id:int]\n"
                    " reply: ['async_tokenize_next', token_code, first_index, index_after]\n"
                    " ending_message: ['close', handler_id:int]\n"
                    " reply: ['close']"
                    "at any moment, reply may be:"
                    "  ['async_tokenize_error', message:str, first_position?:int, position_after?:int]"
                ))

        file_name = message[1]
        file_contents = message[2]
        if not isinstance(file_name, str):
            return my_error(
                'Async tokenization request: "file_name" arg must be a string.'
            )
        if not isinstance(file_contents, str):
            return my_error(
                'Async tokenization request: "file_contents" arg must be a string.'
            )

        if VERBOSE:
            print("\tfile-name: " + file_name)
            print("\tfile-contents: " +
                  (repr(file_contents) if len(file_contents) < 80 else
                   repr(file_contents[0:80]) + " ..."))
            if len(message) >= 4: print("\toffset: %s " % message[3])
            if len(message) >= 5: print("\tindentation: %s" % message[4])

        # Get global offset of first character, if any
        if len(message) >= 4:
            shift = message[3]
            if not isinstance(shift, int):
                return my_error(
                    'Tokenization request: "first_offset" arg must be an integer.'
                )
        else:
            shift = 0

        # get indentation level of code, if any
        if len(message) >= 5:
            indentation_level = message[4]
            if not isinstance(indentation_level, int):
                return my_error(
                    'Tokenization request: "indentation_level" arg must be an integer.'
                )
        else:
            indentation_level = 0

        # reply with the id of this async tokenization handler
        my_send_message(['async_tokenize', id])

        # Now the tokenization actually begins
        # We will tokenize each token, and between tokens we wait for the request of the next token.

        # First we prepare the stream, with the right shift and indentation level
        stream = StringStream(file_contents, name=file_name)

        if indentation_level > 0:
            stream = IndentedCharacterStream(stream)
            stream.readn(indentation_level)
            stream.push()

        # Then we tokenize the given text,
        parser = AnokyParser()
        current_index = indentation_level
        try:
            for token in parser.tokenize(stream, emmit_restart_tokens=True):
                token_first = token.range.first_position.index
                token_after = token.range.position_after.index
                # if token_first > current_index:
                #     token_type = Tokens._TokenTypes.WHITESPACE.value
                #     # We wait for the next token request, and emit a whitespace filler to the outgoing socket
                #     message = await incomming()
                #     if VERBOSE: print("\tmessage: %s" % message)
                #     assert len(message) >= 2 and message[1] == id
                #     if message[0] == 'close':
                #         my_send_message(['close'])
                #         return
                #     elif message[0] == 'async_tokenize_next':
                #         my_send_message(['async_tokenize_next', token_type, current_index+shift, token_first+shift])
                #     else:
                #         return my_error("Unkown message for async_tokenize handler, '%s'." % message[0])
                #     current_index = token_first
                # el
                if token_first < current_index:
                    raise Exception(
                        token_first,
                        "Overlapping tokens (%s, %s), something is wrong with the tokenizer!!!"
                        % (current_index + shift, token_first + shift))
                token_type = token.type.value

                # Now that we know the next token type, we wait for the next token request,
                # and emit it to the outgoing socket
                message = await incomming()
                if VERBOSE:
                    print("\tmessage: " + str(message))
                assert len(message) >= 2 and message[1] == id
                if message[0] == 'close':
                    my_send_message(['close'])
                    return
                elif message[0] == 'async_tokenize_next':
                    my_send_message([
                        'async_tokenize_next', token_type,
                        current_index + shift, token_after + shift
                    ])
                else:
                    return my_error(
                        "Unkown message for async_tokenize handler, '%s'." %
                        message[0])
                current_index = token_after
        except TokenizingError as e:
            return my_error(e)

        while True:
            message = await incomming()
            if VERBOSE: print("\tmessage: %s" % message)
            assert len(message) >= 2 and message[1] == id
            if message[0] == 'close':
                my_send_message(['close'])
                return
            elif message[0] == 'async_tokenize_next':
                my_send_message(['async_tokenize_next', -1, -1, -1])
            else:
                return my_error(
                    "Unkown message for async_tokenize handler, '%s'." %
                    message[0])

        return
Example #14
0
    def colorize(message: list) -> list:
        if not 3 <= len(message) <= 4:
            return error(
                "Colorization request format is:\n input: ['colorize', file_name:str, file_contents:str, binary=False]\n output: ['colorize', token_ranges:list(list(color_code, first_index, index_after))]"
            )
        file_name = message[1]
        file_contents = message[2]
        if not isinstance(file_name, str):
            return error(
                'Colorization request: "file_name" arg must be a string.')
        if not isinstance(file_contents, str):
            return error(
                'Colorization request: "file_contents" arg must be a string.')
        if VERBOSE:
            print("\tfile-name: " + file_name)
            print("\tfile-contents: " +
                  (repr(file_contents) if len(file_contents) < 80 else
                   repr(file_contents[0:80]) + " ..."))
        if len(message) == 4:
            binary = message[3]
            if not isinstance(file_contents, bool):
                return error(
                    'Colorization request: "binary" arg must be a string.')
        else:
            binary = True

        stream = StringStream(file_contents, name=file_name)

        parser = AnokyParser()
        code_expander = DefaultExpander()
        code_generator = DefaultGenerator()

        try:
            node = parser.parse(stream)
            code_expander.expand_unit(node)
            code_generator.generate_unit(node)

            colorized_tokens = []

            def extract_colorized_tokens(element):
                nonlocal colorized_tokens
                if element.color is not None and is_not_none(
                        element,
                        ".range.first_position.index") and is_not_none(
                            element, ".range.position_after.index"):
                    token_color = element.color
                    token_first = element.range.first_position.index
                    token_after = element.range.position_after.index
                    if not isinstance(token_color, int):
                        return error(
                            'Colorization request: color of token "%s" was not int!'
                            % element.text)
                    colorized_tokens.append(
                        [token_color, token_first, token_after])
                if isinstance(element.code, Node):
                    for subelement in element.code:
                        extract_colorized_tokens(subelement)

            for element in node:
                extract_colorized_tokens(element)

        except CompilerError as e:
            return error(e)

        return pack(['colorize', colorized_tokens])
Example #15
0
from anoky.syntax.lisp_printer import indented_lisp_printer
from anoky.common.errors import CompilerError, TokenizingError
from anoky.streams.string_stream import StringStream
from anoky.generation.default_special_forms_table import default_special_forms_table
from anoky.expansion.default_macro_table import default_macro_table, default_id_macro_table
from anoky.syntax.token import is_token
from prompt_toolkit.history import InMemoryHistory
from prompt_toolkit import prompt
import argparse
import ast
import astpp
import sys
import traceback
import os
import anoky.syntax.tokens as Tokens
__parser__ = AnokyParser()
__macros__ = default_macro_table()
__id_macros__ = default_id_macro_table()
__special_forms__ = default_special_forms_table()
code_expander = DefaultExpander()
code_generator = DefaultGenerator()


def anoky_tokenize(stream, options):
    tokenized_node = __parser__.tokenize_into_node(stream,
                                                   emmit_restart_tokens=False)
    if options.print_tokens:
        print('\n——›–  Tokenized source  –‹——')
        for token in tokenized_node:
            print(str(token))
    errors = []
Example #16
0
    async def async_tokenize(id, incomming, outgoing):
        def my_send_message(msg):
            if VERBOSE: print("\treply: " + str(msg))
            return outgoing.push_message(pack(msg))

        def my_error(e):
            nonlocal outgoing
            if VERBOSE: print("\terror: " + str(e))
            return outgoing.push_message(error(e))

        # first message (see below for syntax)
        # It will give us the filename name and contents of the written code,
        # and also whether we should mark the first offset as being anything other than zero,
        # and the indentation level at which the code is written
        message = await incomming()

        if not 3 <= len(message) <= 5:
            return outgoing.push_message(error(
                "Async tokenization request format is:\n"
                " first message: ['async_tokenize', file_name:str, file_contents:str, first_offset:int = 0, indentation_level:int = 0]\n"
                " first reply: ['async_tokenize', handler_id:int]\n"

                " following messages: ['async_tokenize_next', handler_id:int]\n"
                " reply: ['async_tokenize_next', token_code, first_index, index_after]\n"

                " ending_message: ['close', handler_id:int]\n"
                " reply: ['close']"

                "at any moment, reply may be:"
                "  ['async_tokenize_error', message:str, first_position?:int, position_after?:int]"))

        file_name = message[1]
        file_contents = message[2]
        if not isinstance(file_name, str):
            return my_error('Async tokenization request: "file_name" arg must be a string.')
        if not isinstance(file_contents, str):
            return my_error('Async tokenization request: "file_contents" arg must be a string.')

        if VERBOSE:
            print("\tfile-name: " + file_name)
            print("\tfile-contents: " + (
            repr(file_contents) if len(file_contents) < 80 else repr(file_contents[0:80]) + " ..."))
            if len(message) >= 4: print("\toffset: %s " % message[3])
            if len(message) >= 5: print("\tindentation: %s" % message[4])

        # Get global offset of first character, if any
        if len(message) >= 4:
            shift = message[3]
            if not isinstance(shift, int):
                return my_error('Tokenization request: "first_offset" arg must be an integer.')
        else:
            shift = 0

        # get indentation level of code, if any
        if len(message) >= 5:
            indentation_level = message[4]
            if not isinstance(indentation_level, int):
                return my_error('Tokenization request: "indentation_level" arg must be an integer.')
        else:
            indentation_level = 0

        # reply with the id of this async tokenization handler
        my_send_message(['async_tokenize', id])


        # Now the tokenization actually begins
        # We will tokenize each token, and between tokens we wait for the request of the next token.

        # First we prepare the stream, with the right shift and indentation level
        stream = StringStream(file_contents, name=file_name)

        if indentation_level > 0:
            stream = IndentedCharacterStream(stream)
            stream.readn(indentation_level)
            stream.push()

        # Then we tokenize the given text,
        parser = AnokyParser()
        current_index = indentation_level
        try:
            for token in parser.tokenize(stream, emmit_restart_tokens=True):
                token_first = token.range.first_position.index
                token_after = token.range.position_after.index
                # if token_first > current_index:
                #     token_type = Tokens._TokenTypes.WHITESPACE.value
                #     # We wait for the next token request, and emit a whitespace filler to the outgoing socket
                #     message = await incomming()
                #     if VERBOSE: print("\tmessage: %s" % message)
                #     assert len(message) >= 2 and message[1] == id
                #     if message[0] == 'close':
                #         my_send_message(['close'])
                #         return
                #     elif message[0] == 'async_tokenize_next':
                #         my_send_message(['async_tokenize_next', token_type, current_index+shift, token_first+shift])
                #     else:
                #         return my_error("Unkown message for async_tokenize handler, '%s'." % message[0])
                #     current_index = token_first
                # el
                if token_first < current_index:
                    raise Exception(token_first,
                                    "Overlapping tokens (%s, %s), something is wrong with the tokenizer!!!" % (
                                    current_index+shift, token_first+shift))
                token_type = token.type.value

                # Now that we know the next token type, we wait for the next token request,
                # and emit it to the outgoing socket
                message = await incomming()
                if VERBOSE:
                    print("\tmessage: " + str(message))
                assert len(message) >= 2 and message[1] == id
                if message[0] == 'close':
                    my_send_message(['close'])
                    return
                elif message[0] == 'async_tokenize_next':
                    my_send_message(['async_tokenize_next', token_type, current_index+shift, token_after+shift])
                else:
                    return my_error("Unkown message for async_tokenize handler, '%s'." % message[0])
                current_index = token_after
        except TokenizingError as e:
            return my_error(e)

        while True:
            message = await incomming()
            if VERBOSE: print("\tmessage: %s" % message)
            assert len(message) >= 2 and message[1] == id
            if message[0] == 'close':
                my_send_message(['close'])
                return
            elif message[0] == 'async_tokenize_next':
                my_send_message(['async_tokenize_next', -1, -1, -1])
            else:
                return my_error("Unkown message for async_tokenize handler, '%s'." % message[0])

        return