def tokenize(message: list) -> list: time_ = time.time() if not 3 <= len(message) <= 4: return error( "Tokenization request format is:\n input: ['tokenize', file_name:str, file_contents:str, binary=False]\n output: ['tokenize', token_ranges:list(list(token_code, first_index, index_after))]" ) file_name = message[1] file_contents = message[2] if not isinstance(file_name, str): return error( 'Tokenization request: "file_name" arg must be a string.') if not isinstance(file_contents, str): return error( 'Tokenization request: "file_contents" arg must be a string.') if VERBOSE: print("\tfile-name: " + file_name) print("\tfile-contents: " + (repr(file_contents) if len(file_contents) < 80 else repr(file_contents[0:80]) + " ...")) if len(message) == 4: binary = message[3] if not isinstance(file_contents, bool): return error( 'Tokenization request: "binary" arg must be a string.') else: binary = True stream = StringStream(file_contents, name=file_name) parser = AnokyParser() token_ranges = [] current_index = 0 try: for token in parser.tokenize(stream, emmit_restart_tokens=True): token_first = token.range.first_position.index token_after = token.range.position_after.index # if token_first > current_index: # token_type = Tokens._TokenTypes.WHITESPACE.value if binary else Tokens._TokenTypes.WHITESPACE.name # token_ranges.append([token_type, current_index, token_first]) # current_index = token_first # el if token_first < current_index: raise Exception( token_first, "Overlapping tokens (%s, %s), something is wrong with the tokenizer!!!" % (current_index, token_first)) token_type = token.type.value if binary else token.type.name token_ranges.append([token_type, current_index, token_after]) current_index = token_after except TokenizingError as e: return error(e) if len(token_ranges) > 0: last_token_range = token_ranges[-1] if last_token_range[2] < len(file_contents): last_token_range[2] += 1 print("Tokenization took %s seconds" % (time_ - time.time())) return pack(['tokenize', token_ranges])
def tokenize(options): try: filename = options.filename code = open(filename, encoding='utf-8').read() stream = StringStream(code) parser = AnokyParser() if 'output' in options: output = options.output encoder = options.encoder filler_token_value = Tokens.WHITESPACE.value if options.binary else Tokens.WHITESPACE.name for token, first_index, index_after in parser.tokenize_with_intervals(stream): if token is None: bytes_ = encoder((filler_token_value, first_index, index_after)) else: token_value = token.type.value if options.binary else token.type.name bytes_ = encoder((token_value, first_index, index_after)) output.write(bytes_) else: for token in parser.tokenize(stream): print(str(token)) except CompilerError as e: print(e.trace)
def generate(options): parser = AnokyParser() try: if "filename" not in options: print("No filename specified.") return filename = options.filename stream = FileStream(filename) file_node = parser.parse(stream) parsed = indented_lisp_printer(file_node) expander = DefaultExpander() ec = expander.expand_unit(file_node) expanded = indented_lisp_printer(file_node) generator = DefaultGenerator() py_module = generator.generate_unit( file_node, # provide expansion context to generation context EC=ec) if options.verbose: print(parsed) print("\n〰〰〰〰〰〰 After macro expansion 〰〰〰〰〰〰") print(expanded) print("\n〰〰〰〰〰〰 Generated Python code 〰〰〰〰〰〰\n") astpp.parseprint(py_module) print("\n〰〰〰〰〰〰 Python retrosource 〰〰〰〰〰〰\n") print(ASTFormatter().format(py_module)) return py_module except CompilerError as e: print(e.trace)
def tokenize(message: list) -> list: time_ = time.time() if not 3 <= len(message) <= 4: return error( "Tokenization request format is:\n input: ['tokenize', file_name:str, file_contents:str, binary=False]\n output: ['tokenize', token_ranges:list(list(token_code, first_index, index_after))]") file_name = message[1] file_contents = message[2] if not isinstance(file_name, str): return error('Tokenization request: "file_name" arg must be a string.') if not isinstance(file_contents, str): return error('Tokenization request: "file_contents" arg must be a string.') if VERBOSE: print("\tfile-name: " + file_name) print("\tfile-contents: " + ( repr(file_contents) if len(file_contents) < 80 else repr(file_contents[0:80]) + " ...")) if len(message) == 4: binary = message[3] if not isinstance(file_contents, bool): return error('Tokenization request: "binary" arg must be a string.') else: binary = True stream = StringStream(file_contents, name=file_name) parser = AnokyParser() token_ranges = [] current_index = 0 try: for token in parser.tokenize(stream, emmit_restart_tokens=True): token_first = token.range.first_position.index token_after = token.range.position_after.index # if token_first > current_index: # token_type = Tokens._TokenTypes.WHITESPACE.value if binary else Tokens._TokenTypes.WHITESPACE.name # token_ranges.append([token_type, current_index, token_first]) # current_index = token_first # el if token_first < current_index: raise Exception(token_first, "Overlapping tokens (%s, %s), something is wrong with the tokenizer!!!" % ( current_index, token_first)) token_type = token.type.value if binary else token.type.name token_ranges.append([token_type, current_index, token_after]) current_index = token_after except TokenizingError as e: return error(e) if len(token_ranges) > 0: last_token_range = token_ranges[-1] if last_token_range[2] < len(file_contents): last_token_range[2] += 1 print("Tokenization took %s seconds" % (time_ - time.time())) return pack(['tokenize', token_ranges])
def colorize(message: list) -> list: if not 3 <= len(message) <= 4: return error( "Colorization request format is:\n input: ['colorize', file_name:str, file_contents:str, binary=False]\n output: ['colorize', token_ranges:list(list(color_code, first_index, index_after))]") file_name = message[1] file_contents = message[2] if not isinstance(file_name, str): return error('Colorization request: "file_name" arg must be a string.') if not isinstance(file_contents, str): return error('Colorization request: "file_contents" arg must be a string.') if VERBOSE: print("\tfile-name: " + file_name) print("\tfile-contents: " + ( repr(file_contents) if len(file_contents) < 80 else repr(file_contents[0:80]) + " ...")) if len(message) == 4: binary = message[3] if not isinstance(file_contents, bool): return error('Colorization request: "binary" arg must be a string.') else: binary = True stream = StringStream(file_contents, name=file_name) parser = AnokyParser() code_expander = DefaultExpander() code_generator = DefaultGenerator() try: node = parser.parse(stream) code_expander.expand_unit(node) code_generator.generate_unit(node) colorized_tokens = [] def extract_colorized_tokens(element): nonlocal colorized_tokens if element.color is not None and is_not_none(element, ".range.first_position.index") and is_not_none( element, ".range.position_after.index"): token_color = element.color token_first = element.range.first_position.index token_after = element.range.position_after.index if not isinstance(token_color, int): return error('Colorization request: color of token "%s" was not int!' % element.text) colorized_tokens.append([token_color, token_first, token_after]) if isinstance(element.code, Node): for subelement in element.code: extract_colorized_tokens(subelement) for element in node: extract_colorized_tokens(element) except CompilerError as e: return error(e) return pack(['colorize', colorized_tokens])
def _compile_to_ast(filepath): parser = AnokyParser() expander = DefaultExpander() generator = DefaultGenerator() stream = FileStream(filepath) file_node = parser.parse(stream) expander.expand_unit(file_node) py_module = generator.generate_unit(file_node) ast.fix_missing_locations(py_module) return py_module
def arrange(options): parser = AnokyParser() try: if "filename" not in options: print("No filename specified.") return filename = options.filename stream = FileStream(filename) file_node = parser.parse(stream) print(indented_lisp_printer(file_node)) except CompilerError as e: print(e.trace)
def expand(options): parser = AnokyParser() try: if "filename" not in options: print("No filename specified.") return filename = options.filename stream = FileStream(filename) file_node = parser.parse(stream) # print(indented_lisp_printer(file_node)) expander = DefaultExpander() ec = expander.expand_unit(file_node) # print("\n〰〰〰〰〰〰 After macro expansion 〰〰〰〰〰〰") # print(indented_lisp_printer(file_node)) generator = DefaultGenerator() py_module = generator.generate_unit(file_node, # provide expansion context to generation context EC=ec) # print("\n〰〰〰〰〰〰 Generated Python code 〰〰〰〰〰〰\n") # astpp.parseprint(py_module) # print("\n〰〰〰〰〰〰 Python retrosource 〰〰〰〰〰〰\n") print(ASTFormatter().format(py_module)) if options.execute: ast.fix_missing_locations(py_module) compiled_module = compile(py_module, filename="<ast>", mode="exec") exec(compiled_module) except CompilerError as e: print(e.trace)
def expand(options): parser = AnokyParser() try: if "filename" not in options: print("No filename specified.") return filename = options.filename stream = FileStream(filename) file_node = parser.parse(stream) print(indented_lisp_printer(file_node)) expander = DefaultExpander() ec = expander.expand_unit(file_node) print("\nAfter macro expansion:") print(indented_lisp_printer(file_node)) except CompilerError as e: print(e.trace)
async def async_tokenize(id, incomming, outgoing): def my_send_message(msg): if VERBOSE: print("\treply: " + str(msg)) return outgoing.push_message(pack(msg)) def my_error(e): nonlocal outgoing if VERBOSE: print("\terror: " + str(e)) return outgoing.push_message(error(e)) # first message (see below for syntax) # It will give us the filename name and contents of the written code, # and also whether we should mark the first offset as being anything other than zero, # and the indentation level at which the code is written message = await incomming() if not 3 <= len(message) <= 5: return outgoing.push_message( error( "Async tokenization request format is:\n" " first message: ['async_tokenize', file_name:str, file_contents:str, first_offset:int = 0, indentation_level:int = 0]\n" " first reply: ['async_tokenize', handler_id:int]\n" " following messages: ['async_tokenize_next', handler_id:int]\n" " reply: ['async_tokenize_next', token_code, first_index, index_after]\n" " ending_message: ['close', handler_id:int]\n" " reply: ['close']" "at any moment, reply may be:" " ['async_tokenize_error', message:str, first_position?:int, position_after?:int]" )) file_name = message[1] file_contents = message[2] if not isinstance(file_name, str): return my_error( 'Async tokenization request: "file_name" arg must be a string.' ) if not isinstance(file_contents, str): return my_error( 'Async tokenization request: "file_contents" arg must be a string.' ) if VERBOSE: print("\tfile-name: " + file_name) print("\tfile-contents: " + (repr(file_contents) if len(file_contents) < 80 else repr(file_contents[0:80]) + " ...")) if len(message) >= 4: print("\toffset: %s " % message[3]) if len(message) >= 5: print("\tindentation: %s" % message[4]) # Get global offset of first character, if any if len(message) >= 4: shift = message[3] if not isinstance(shift, int): return my_error( 'Tokenization request: "first_offset" arg must be an integer.' ) else: shift = 0 # get indentation level of code, if any if len(message) >= 5: indentation_level = message[4] if not isinstance(indentation_level, int): return my_error( 'Tokenization request: "indentation_level" arg must be an integer.' ) else: indentation_level = 0 # reply with the id of this async tokenization handler my_send_message(['async_tokenize', id]) # Now the tokenization actually begins # We will tokenize each token, and between tokens we wait for the request of the next token. # First we prepare the stream, with the right shift and indentation level stream = StringStream(file_contents, name=file_name) if indentation_level > 0: stream = IndentedCharacterStream(stream) stream.readn(indentation_level) stream.push() # Then we tokenize the given text, parser = AnokyParser() current_index = indentation_level try: for token in parser.tokenize(stream, emmit_restart_tokens=True): token_first = token.range.first_position.index token_after = token.range.position_after.index # if token_first > current_index: # token_type = Tokens._TokenTypes.WHITESPACE.value # # We wait for the next token request, and emit a whitespace filler to the outgoing socket # message = await incomming() # if VERBOSE: print("\tmessage: %s" % message) # assert len(message) >= 2 and message[1] == id # if message[0] == 'close': # my_send_message(['close']) # return # elif message[0] == 'async_tokenize_next': # my_send_message(['async_tokenize_next', token_type, current_index+shift, token_first+shift]) # else: # return my_error("Unkown message for async_tokenize handler, '%s'." % message[0]) # current_index = token_first # el if token_first < current_index: raise Exception( token_first, "Overlapping tokens (%s, %s), something is wrong with the tokenizer!!!" % (current_index + shift, token_first + shift)) token_type = token.type.value # Now that we know the next token type, we wait for the next token request, # and emit it to the outgoing socket message = await incomming() if VERBOSE: print("\tmessage: " + str(message)) assert len(message) >= 2 and message[1] == id if message[0] == 'close': my_send_message(['close']) return elif message[0] == 'async_tokenize_next': my_send_message([ 'async_tokenize_next', token_type, current_index + shift, token_after + shift ]) else: return my_error( "Unkown message for async_tokenize handler, '%s'." % message[0]) current_index = token_after except TokenizingError as e: return my_error(e) while True: message = await incomming() if VERBOSE: print("\tmessage: %s" % message) assert len(message) >= 2 and message[1] == id if message[0] == 'close': my_send_message(['close']) return elif message[0] == 'async_tokenize_next': my_send_message(['async_tokenize_next', -1, -1, -1]) else: return my_error( "Unkown message for async_tokenize handler, '%s'." % message[0]) return
def colorize(message: list) -> list: if not 3 <= len(message) <= 4: return error( "Colorization request format is:\n input: ['colorize', file_name:str, file_contents:str, binary=False]\n output: ['colorize', token_ranges:list(list(color_code, first_index, index_after))]" ) file_name = message[1] file_contents = message[2] if not isinstance(file_name, str): return error( 'Colorization request: "file_name" arg must be a string.') if not isinstance(file_contents, str): return error( 'Colorization request: "file_contents" arg must be a string.') if VERBOSE: print("\tfile-name: " + file_name) print("\tfile-contents: " + (repr(file_contents) if len(file_contents) < 80 else repr(file_contents[0:80]) + " ...")) if len(message) == 4: binary = message[3] if not isinstance(file_contents, bool): return error( 'Colorization request: "binary" arg must be a string.') else: binary = True stream = StringStream(file_contents, name=file_name) parser = AnokyParser() code_expander = DefaultExpander() code_generator = DefaultGenerator() try: node = parser.parse(stream) code_expander.expand_unit(node) code_generator.generate_unit(node) colorized_tokens = [] def extract_colorized_tokens(element): nonlocal colorized_tokens if element.color is not None and is_not_none( element, ".range.first_position.index") and is_not_none( element, ".range.position_after.index"): token_color = element.color token_first = element.range.first_position.index token_after = element.range.position_after.index if not isinstance(token_color, int): return error( 'Colorization request: color of token "%s" was not int!' % element.text) colorized_tokens.append( [token_color, token_first, token_after]) if isinstance(element.code, Node): for subelement in element.code: extract_colorized_tokens(subelement) for element in node: extract_colorized_tokens(element) except CompilerError as e: return error(e) return pack(['colorize', colorized_tokens])
from anoky.syntax.lisp_printer import indented_lisp_printer from anoky.common.errors import CompilerError, TokenizingError from anoky.streams.string_stream import StringStream from anoky.generation.default_special_forms_table import default_special_forms_table from anoky.expansion.default_macro_table import default_macro_table, default_id_macro_table from anoky.syntax.token import is_token from prompt_toolkit.history import InMemoryHistory from prompt_toolkit import prompt import argparse import ast import astpp import sys import traceback import os import anoky.syntax.tokens as Tokens __parser__ = AnokyParser() __macros__ = default_macro_table() __id_macros__ = default_id_macro_table() __special_forms__ = default_special_forms_table() code_expander = DefaultExpander() code_generator = DefaultGenerator() def anoky_tokenize(stream, options): tokenized_node = __parser__.tokenize_into_node(stream, emmit_restart_tokens=False) if options.print_tokens: print('\n——›– Tokenized source –‹——') for token in tokenized_node: print(str(token)) errors = []
async def async_tokenize(id, incomming, outgoing): def my_send_message(msg): if VERBOSE: print("\treply: " + str(msg)) return outgoing.push_message(pack(msg)) def my_error(e): nonlocal outgoing if VERBOSE: print("\terror: " + str(e)) return outgoing.push_message(error(e)) # first message (see below for syntax) # It will give us the filename name and contents of the written code, # and also whether we should mark the first offset as being anything other than zero, # and the indentation level at which the code is written message = await incomming() if not 3 <= len(message) <= 5: return outgoing.push_message(error( "Async tokenization request format is:\n" " first message: ['async_tokenize', file_name:str, file_contents:str, first_offset:int = 0, indentation_level:int = 0]\n" " first reply: ['async_tokenize', handler_id:int]\n" " following messages: ['async_tokenize_next', handler_id:int]\n" " reply: ['async_tokenize_next', token_code, first_index, index_after]\n" " ending_message: ['close', handler_id:int]\n" " reply: ['close']" "at any moment, reply may be:" " ['async_tokenize_error', message:str, first_position?:int, position_after?:int]")) file_name = message[1] file_contents = message[2] if not isinstance(file_name, str): return my_error('Async tokenization request: "file_name" arg must be a string.') if not isinstance(file_contents, str): return my_error('Async tokenization request: "file_contents" arg must be a string.') if VERBOSE: print("\tfile-name: " + file_name) print("\tfile-contents: " + ( repr(file_contents) if len(file_contents) < 80 else repr(file_contents[0:80]) + " ...")) if len(message) >= 4: print("\toffset: %s " % message[3]) if len(message) >= 5: print("\tindentation: %s" % message[4]) # Get global offset of first character, if any if len(message) >= 4: shift = message[3] if not isinstance(shift, int): return my_error('Tokenization request: "first_offset" arg must be an integer.') else: shift = 0 # get indentation level of code, if any if len(message) >= 5: indentation_level = message[4] if not isinstance(indentation_level, int): return my_error('Tokenization request: "indentation_level" arg must be an integer.') else: indentation_level = 0 # reply with the id of this async tokenization handler my_send_message(['async_tokenize', id]) # Now the tokenization actually begins # We will tokenize each token, and between tokens we wait for the request of the next token. # First we prepare the stream, with the right shift and indentation level stream = StringStream(file_contents, name=file_name) if indentation_level > 0: stream = IndentedCharacterStream(stream) stream.readn(indentation_level) stream.push() # Then we tokenize the given text, parser = AnokyParser() current_index = indentation_level try: for token in parser.tokenize(stream, emmit_restart_tokens=True): token_first = token.range.first_position.index token_after = token.range.position_after.index # if token_first > current_index: # token_type = Tokens._TokenTypes.WHITESPACE.value # # We wait for the next token request, and emit a whitespace filler to the outgoing socket # message = await incomming() # if VERBOSE: print("\tmessage: %s" % message) # assert len(message) >= 2 and message[1] == id # if message[0] == 'close': # my_send_message(['close']) # return # elif message[0] == 'async_tokenize_next': # my_send_message(['async_tokenize_next', token_type, current_index+shift, token_first+shift]) # else: # return my_error("Unkown message for async_tokenize handler, '%s'." % message[0]) # current_index = token_first # el if token_first < current_index: raise Exception(token_first, "Overlapping tokens (%s, %s), something is wrong with the tokenizer!!!" % ( current_index+shift, token_first+shift)) token_type = token.type.value # Now that we know the next token type, we wait for the next token request, # and emit it to the outgoing socket message = await incomming() if VERBOSE: print("\tmessage: " + str(message)) assert len(message) >= 2 and message[1] == id if message[0] == 'close': my_send_message(['close']) return elif message[0] == 'async_tokenize_next': my_send_message(['async_tokenize_next', token_type, current_index+shift, token_after+shift]) else: return my_error("Unkown message for async_tokenize handler, '%s'." % message[0]) current_index = token_after except TokenizingError as e: return my_error(e) while True: message = await incomming() if VERBOSE: print("\tmessage: %s" % message) assert len(message) >= 2 and message[1] == id if message[0] == 'close': my_send_message(['close']) return elif message[0] == 'async_tokenize_next': my_send_message(['async_tokenize_next', -1, -1, -1]) else: return my_error("Unkown message for async_tokenize handler, '%s'." % message[0]) return