def sanity_check_listing(Lexer, numbered_file): """ First pass - This function returns a list of all the line numbers in the file. """ logger.debug("Sanity Checking Numbered Listing.") original_line_numbers = [] for line_no, line in enumerate(numbered_file): tokenized_line = tokenize_line(Lexer, line, line_no) if tokenized_line[0].type == "LINE": if len(original_line_numbers) == 0: original_line_numbers.append(tokenized_line[0].val) else: if tokenized_line[0].val in original_line_numbers: # Duplicate Line sanity check logger.critical( "Fatal Error! Line number {} is a duplicate!", tokenized_line[0].val, ) sys.exit(1) if int(tokenized_line[0].val) < int(original_line_numbers[-1]): # Out of order check. logger.warning( "Line number {} is out of order.", tokenized_line[0].val ) original_line_numbers.append(tokenized_line[0].val) logger.debug("Original Line Numbers: {}", original_line_numbers) return original_line_numbers
def populate_label_data(Lexer, working_file): """ This function populates a dictionary with labels and determines how many bytes to assume when replacing a line label. """ logger.debug("Populating Label Dictionary.") file_length = len(working_file) line_count = 0 label_dict = {} # add labels to dictionary for line_no, line in enumerate(working_file): tokenized_line = tokenize_line(Lexer, line, line_no) if tokenized_line[0].type == "LABEL": label = tokenized_line[0].val.rstrip(":") label_dict[label] = 0 line_count += 1 logger.debug("Label: {}", label) lines_total = file_length - line_count logger.debug("Total number of lines: {}", lines_total) # determine how many bytes are needed when replacing labels if lines_total < 10: line_replacement = 1 elif lines_total < 100: line_replacement = 2 elif lines_total < 1000: line_replacement = 3 elif lines_total < 10000: line_replacement = 4 else: line_replacement = 5 logger.debug("Line replacement value: {}", line_replacement) logger.debug("Labels: {}", label_dict) return label_dict, line_replacement
def extract_jump_targets(Lexer, numbered_file, original_line_numbers): """ Second pass - This function returns a set of jump targets. """ logger.debug("Getting jump targets.") jump_targets = set() for line_no, line in enumerate(numbered_file): handled = [] tokenized_line = tokenize_line(Lexer, line, line_no) tokenized_line_length = len(tokenized_line) indices = [i for i, x in enumerate(tokenized_line) if x.type == "FLOW"] logger.debug("Current Line: {}", line) logger.debug("Jump target indices: {}", indices) for index in indices: if index in handled: continue if tokenized_line[index].val == "ON": if ( tokenized_line[index + 1].type == "ID" and tokenized_line[index + 2].type == "FLOW" ): on_start_index = index + 2 handled.append(on_start_index) for i in range(on_start_index + 1, tokenized_line_length): if tokenized_line[i].type == "NUMBER": target = tokenized_line[i].val logger.debug("Jump target: {}", target) if target not in original_line_numbers: logger.critical( "Fatal Error! Attempt to jump to line number {} is invalid!", target, ) sys.exit(1) jump_targets.add(target) elif tokenized_line[i].val == ",": continue if index + 1 < tokenized_line_length: if tokenized_line[index + 1].type == "NUMBER": target = tokenized_line[index + 1].val logger.debug("Jump target: {}", target) if target not in original_line_numbers: logger.critical( "Fatal Error! Attempt to jump to line number {} is invalid!", target, ) sys.exit(1) jump_targets.add(target) logger.debug("Jump targets: {}", jump_targets) return jump_targets
def replace_in_labeled_file(Lexer, labeled_file, *, external_dict={}): """ Returns a labeled file with stuff replaced. """ logger.debug("Replacing label names.") final_file = "" for line_no, line in enumerate(labeled_file): tokenized_line = tokenize_line(Lexer, line, line_no) tokenized_line_length = len(tokenized_line) for index, token in enumerate(tokenized_line): if token.type == "LABEL": label_val = token.val.rstrip(":") if label_val in external_dict: current_value = "\n" + external_dict[label_val] + ":" + "\n" else: current_value = "\n" + token.val + "\n" elif token.val in external_dict: current_value = external_dict[token.val] else: current_value = token.val final_file = final_file + current_value if index + 1 < tokenized_line_length: # Insert spaces. if token.type == "ID" and token.val.endswith("("): continue if ( not token.type == "PUNCTUATION" and not tokenized_line[index + 1].type == "PUNCTUATION" and not token.type == "STATEMENT" ): final_file = final_file + " " # The following elif is not redundant. It fixes formatting errors. elif ( tokenized_line[index + 1].type == "FLOW" and not token.type == "STATEMENT" ): final_file = final_file + " " final_file = final_file + "\n" return final_file
def output_basic_listing( Lexer, numbered_file, jump_targets, basic_type, external_dict={} ): """ Final pass - This function returns the labelled BASIC file. """ logger.debug("Converting to labeled format.") labeled_file = "" for line_no, line in enumerate(numbered_file): set_flow = False set_on = False tokenized_line = tokenize_line(Lexer, line, line_no) tokenized_line_length = len(tokenized_line) for index, token in enumerate(tokenized_line): current_value = "" if token.type == "LINE": # Insert a jump target. if token.val in jump_targets: logger.debug("Jump target at line number: {}", token.val) check_value = "_" + token.val if check_value in external_dict: labeled_file = ( labeled_file + "\n" + external_dict[check_value] + ":" + "\n" ) else: labeled_file = ( labeled_file + "\n" + "_" + token.val + ":" + "\n" ) continue continue if ( token.type == "STATEMENT" and tokenized_line[index + 1].type == "COMMENT" ): # Deal with comments after statements. labeled_file = labeled_file + " " set_flow = False set_on = False continue if token.type == "STATEMENT": # Insert a newline when there's a statement end. labeled_file = labeled_file.rstrip() + "\n" set_flow = False set_on = False continue if basic_type.startswith("cbm"): # Format into upper-case correctly for CBM BASIC. # Output valid labels. if set_on and token.type == "NUMBER": current_value = "_" + token.val if current_value in external_dict: current_value = external_dict[current_value] elif set_flow and not set_on: current_value = "_" + token.val if current_value in external_dict: current_value = external_dict[current_value] set_flow = False # ON handling. elif ( token.type == "FLOW" and token.val == "ON" and index + 1 < tokenized_line_length and tokenized_line[index + 1].type == "ID" ): set_on = True current_value = token.val.upper() elif ( token.type == "FLOW" and index + 1 < tokenized_line_length and tokenized_line[index + 1].type == "NUMBER" ): set_flow = True current_value = token.val.upper() # Replace REM with ' elif token.type == "COMMENT" and token.val.islower(): current_value = "'" + token.val[3:].upper() elif token.type == "COMMENT": current_value = "'" + token.val[3:] elif token.type == "DATA" and token.val.islower(): current_value = token.val.upper() elif token.type == "DATA": current_value = "DATA" + token.val[4:] elif token.type == "STRING" and token.val.islower(): current_value = token.val.upper() elif token.type == "STRING": current_value = token.val # Handle question marks as PRINT. elif token.type == "PRINT": current_value = "PRINT" else: current_value = token.val.upper() else: # Output valid labels. if set_on and token.type == "NUMBER": current_value = "_" + token.val if current_value in external_dict: current_value = external_dict[current_value] elif set_flow and not set_on: current_value = "_" + token.val if current_value in external_dict: current_value = external_dict[current_value] set_flow = False # ON handling. elif ( token.type == "FLOW" and token.val == "ON" and index + 1 < tokenized_line_length and tokenized_line[index + 1].type == "ID" ): set_on = True current_value = token.val elif ( token.type == "FLOW" and index + 1 < tokenized_line_length and tokenized_line[index + 1].type == "NUMBER" ): set_flow = True current_value = token.val # Replace REM with ' elif token.type == "COMMENT": current_value = "'" + token.val[3:] # Handle question marks as PRINT. elif token.type == "PRINT": current_value = "PRINT" else: current_value = token.val labeled_file = labeled_file + current_value if index + 1 < tokenized_line_length: # Insert spaces. if token.type == "ID" and token.val.endswith("("): continue if ( not token.type == "PUNCTUATION" and not tokenized_line[index + 1].type == "PUNCTUATION" and not token.type == "STATEMENT" ): labeled_file = labeled_file + " " # The following elif is not redundant. It fixes formatting errors. elif ( tokenized_line[index + 1].type == "FLOW" and not token.type == "STATEMENT" ): labeled_file = labeled_file + " " labeled_file = labeled_file + "\n" return labeled_file
def renumber_basic_file(Lexer, input_file, basic_defs, label_dict, line_replacement, basic_type): """ The main renumbering routine. """ logger.debug("Numbering BASIC listing.") output_file = [] current_line_number = basic_defs.numbering persistent_buffer, persistent_line_length = start_new_line( current_line_number) for line_no, line in enumerate(input_file): tokenized_line = tokenize_line(Lexer, line, line_no) logger.debug("Persistent Buffer: {}", persistent_buffer) # routine for jump targets if tokenized_line[0].type == "LABEL": label = tokenized_line[0].val.rstrip(":") if persistent_buffer != str(current_line_number): label_dict[label] = current_line_number + basic_defs.increment persistent_buffer = persistent_buffer.rstrip( basic_defs.statement_joining_character) output_file.append(persistent_buffer) current_line_number = current_line_number + basic_defs.increment else: label_dict[label] = current_line_number logger.debug("Persistent Buffer: {}", persistent_buffer) persistent_buffer, persistent_line_length = start_new_line( current_line_number) continue # tokenizes lines, determines line length, and sets the current buffer to the tokenized line current_buffer_length, current_buffer = crunch_line( tokenized_line, label_dict, line_replacement, line_no, basic_defs) logger.debug("Current buffer length: {}", current_buffer_length) logger.debug("Current Buffer: {}", current_buffer) # when lines don't need to be combined if not basic_defs.combine: persistent_buffer = persistent_buffer + current_buffer output_file.append(persistent_buffer) current_line_number = current_line_number + basic_defs.increment logger.debug("Persistent Buffer: {}", persistent_buffer) persistent_buffer, persistent_line_length = start_new_line( current_line_number) # when lines do need to be combined else: combined_line_length = ( persistent_line_length + len(basic_defs.statement_joining_character) + current_buffer_length) logger.debug("Combined line length: {}", combined_line_length) need_new_line = check_new_line(tokenized_line) # when a new line is mandatory if need_new_line: if basic_type in ["bbc", "riscos" ] and current_buffer.startswith("DATA"): # This is to avoid Out of DATA errors in BBC BASIC # it's the same code as the code below, I know, # but using seperate functions for this stuff is trickier than it looks, # and there's other features for this that are worth working on first. persistent_buffer = persistent_buffer.rstrip( basic_defs.statement_joining_character) output_file.append(persistent_buffer) current_line_number = current_line_number + basic_defs.increment logger.debug("Persistent Buffer: {}", persistent_buffer) persistent_buffer, persistent_line_length = start_new_line( current_line_number) persistent_buffer = persistent_buffer + current_buffer output_file.append(persistent_buffer) current_line_number = current_line_number + basic_defs.increment logger.debug("Persistent Buffer: {}", persistent_buffer) persistent_buffer, persistent_line_length = start_new_line( current_line_number) elif combined_line_length <= basic_defs.basic_line_length: persistent_buffer = persistent_buffer + current_buffer output_file.append(persistent_buffer) current_line_number = current_line_number + basic_defs.increment logger.debug("Persistent Buffer: {}", persistent_buffer) persistent_buffer, persistent_line_length = start_new_line( current_line_number) elif combined_line_length > basic_defs.basic_line_length: persistent_buffer = persistent_buffer.rstrip( basic_defs.statement_joining_character) output_file.append(persistent_buffer) current_line_number = current_line_number + basic_defs.increment logger.debug("Persistent Buffer: {}", persistent_buffer) persistent_buffer, persistent_line_length = start_new_line( current_line_number) persistent_buffer = persistent_buffer + current_buffer output_file.append(persistent_buffer) current_line_number = current_line_number + basic_defs.increment logger.debug("Persistent Buffer: {}", persistent_buffer) persistent_buffer, persistent_line_length = start_new_line( current_line_number) continue # when a new line is not mandatory if combined_line_length <= basic_defs.basic_line_length: current_buffer = current_buffer + basic_defs.statement_joining_character persistent_buffer = persistent_buffer + current_buffer persistent_line_length = len(persistent_buffer) elif combined_line_length > basic_defs.basic_line_length: persistent_buffer = persistent_buffer.rstrip( basic_defs.statement_joining_character) output_file.append(persistent_buffer) current_line_number = current_line_number + basic_defs.increment logger.debug("Persistent Buffer: {}", persistent_buffer) persistent_buffer, persistent_line_length = start_new_line( current_line_number) persistent_buffer = (persistent_buffer + current_buffer + basic_defs.statement_joining_character) persistent_line_length = len(persistent_buffer) # replace labels with line numbers for key in sorted(label_dict, key=len, reverse=True): for index, line in enumerate(output_file): output_file[index] = re.sub(key + RE_QUOTES, str(label_dict[key]), line) # warn if line is too long for index, line in enumerate(output_file): if len(line) > basic_defs.basic_line_length: line_number_match = re.search(r"^\d*", line) line_number = line[line_number_match.span()[0]:line_number_match. span()[1]] logger.warning("Line number {} may be too long.", line_number) # add space in between line number and rest of line for certain basic versions if basic_type in ["bascom", "amiga"] or basic_type.startswith("zx"): for index, line in enumerate(output_file): space_index = re.search(r"^\d*", line) output_file[index] = (line[0:space_index.span()[1]] + " " + line[space_index.span()[1]:]) return output_file