Python tokenize_line 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: basicwrangler.common.functions

메소드/함수: tokenize_line

hotexamples.com에서의 예제들: 6

Python tokenize_line - 6개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 basicwrangler.common.functions.tokenize_line에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

def sanity_check_listing(Lexer, numbered_file):
    """ First pass - This function returns a list of all the line numbers in the file. """
    logger.debug("Sanity Checking Numbered Listing.")
    original_line_numbers = []
    for line_no, line in enumerate(numbered_file):
        tokenized_line = tokenize_line(Lexer, line, line_no)
        if tokenized_line[0].type == "LINE":
            if len(original_line_numbers) == 0:
                original_line_numbers.append(tokenized_line[0].val)
            else:
                if tokenized_line[0].val in original_line_numbers:
                    # Duplicate Line sanity check
                    logger.critical(
                        "Fatal Error! Line number {} is a duplicate!",
                        tokenized_line[0].val,
                    )
                    sys.exit(1)
                if int(tokenized_line[0].val) < int(original_line_numbers[-1]):
                    # Out of order check.
                    logger.warning(
                        "Line number {} is out of order.", tokenized_line[0].val
                    )
                original_line_numbers.append(tokenized_line[0].val)
    logger.debug("Original Line Numbers: {}", original_line_numbers)
    return original_line_numbers

예제 #2

파일 보기

def populate_label_data(Lexer, working_file):
    """ This function populates a dictionary with labels and determines
        how many bytes to assume when replacing a line label. """
    logger.debug("Populating Label Dictionary.")
    file_length = len(working_file)
    line_count = 0
    label_dict = {}
    # add labels to dictionary
    for line_no, line in enumerate(working_file):
        tokenized_line = tokenize_line(Lexer, line, line_no)
        if tokenized_line[0].type == "LABEL":
            label = tokenized_line[0].val.rstrip(":")
            label_dict[label] = 0
            line_count += 1
            logger.debug("Label: {}", label)
    lines_total = file_length - line_count
    logger.debug("Total number of lines: {}", lines_total)
    # determine how many bytes are needed when replacing labels
    if lines_total < 10:
        line_replacement = 1
    elif lines_total < 100:
        line_replacement = 2
    elif lines_total < 1000:
        line_replacement = 3
    elif lines_total < 10000:
        line_replacement = 4
    else:
        line_replacement = 5
    logger.debug("Line replacement value: {}", line_replacement)
    logger.debug("Labels: {}", label_dict)
    return label_dict, line_replacement

예제 #3

파일 보기

def extract_jump_targets(Lexer, numbered_file, original_line_numbers):
    """ Second pass - This function returns a set of jump targets. """
    logger.debug("Getting jump targets.")
    jump_targets = set()
    for line_no, line in enumerate(numbered_file):
        handled = []
        tokenized_line = tokenize_line(Lexer, line, line_no)
        tokenized_line_length = len(tokenized_line)
        indices = [i for i, x in enumerate(tokenized_line) if x.type == "FLOW"]
        logger.debug("Current Line: {}", line)
        logger.debug("Jump target indices: {}", indices)
        for index in indices:
            if index in handled:
                continue
            if tokenized_line[index].val == "ON":
                if (
                    tokenized_line[index + 1].type == "ID"
                    and tokenized_line[index + 2].type == "FLOW"
                ):
                    on_start_index = index + 2
                    handled.append(on_start_index)
                    for i in range(on_start_index + 1, tokenized_line_length):
                        if tokenized_line[i].type == "NUMBER":
                            target = tokenized_line[i].val
                            logger.debug("Jump target: {}", target)
                            if target not in original_line_numbers:
                                logger.critical(
                                    "Fatal Error! Attempt to jump to line number {} is invalid!",
                                    target,
                                )
                                sys.exit(1)
                            jump_targets.add(target)
                        elif tokenized_line[i].val == ",":
                            continue
            if index + 1 < tokenized_line_length:
                if tokenized_line[index + 1].type == "NUMBER":
                    target = tokenized_line[index + 1].val
                    logger.debug("Jump target: {}", target)
                    if target not in original_line_numbers:
                        logger.critical(
                            "Fatal Error! Attempt to jump to line number {} is invalid!",
                            target,
                        )
                        sys.exit(1)
                    jump_targets.add(target)
    logger.debug("Jump targets: {}", jump_targets)
    return jump_targets

예제 #4

파일 보기

def replace_in_labeled_file(Lexer, labeled_file, *, external_dict={}):
    """ Returns a labeled file with stuff replaced. """
    logger.debug("Replacing label names.")
    final_file = ""
    for line_no, line in enumerate(labeled_file):
        tokenized_line = tokenize_line(Lexer, line, line_no)
        tokenized_line_length = len(tokenized_line)
        for index, token in enumerate(tokenized_line):
            if token.type == "LABEL":
                label_val = token.val.rstrip(":")
                if label_val in external_dict:
                    current_value = "\n" + external_dict[label_val] + ":" + "\n"
                else:
                    current_value = "\n" + token.val + "\n"
            elif token.val in external_dict:
                current_value = external_dict[token.val]
            else:
                current_value = token.val
            final_file = final_file + current_value
            if index + 1 < tokenized_line_length:
                # Insert spaces.
                if token.type == "ID" and token.val.endswith("("):
                    continue
                if (
                    not token.type == "PUNCTUATION"
                    and not tokenized_line[index + 1].type == "PUNCTUATION"
                    and not token.type == "STATEMENT"
                ):
                    final_file = final_file + " "
                # The following elif is not redundant. It fixes formatting errors.
                elif (
                    tokenized_line[index + 1].type == "FLOW"
                    and not token.type == "STATEMENT"
                ):
                    final_file = final_file + " "
        final_file = final_file + "\n"
    return final_file

예제 #5

파일 보기

def output_basic_listing(
    Lexer, numbered_file, jump_targets, basic_type, external_dict={}
):
    """ Final pass - This function returns the labelled BASIC file. """
    logger.debug("Converting to labeled format.")
    labeled_file = ""
    for line_no, line in enumerate(numbered_file):
        set_flow = False
        set_on = False
        tokenized_line = tokenize_line(Lexer, line, line_no)
        tokenized_line_length = len(tokenized_line)
        for index, token in enumerate(tokenized_line):
            current_value = ""
            if token.type == "LINE":
                # Insert a jump target.
                if token.val in jump_targets:
                    logger.debug("Jump target at line number: {}", token.val)
                    check_value = "_" + token.val
                    if check_value in external_dict:
                        labeled_file = (
                            labeled_file
                            + "\n"
                            + external_dict[check_value]
                            + ":"
                            + "\n"
                        )
                    else:
                        labeled_file = (
                            labeled_file + "\n" + "_" + token.val + ":" + "\n"
                        )
                    continue
                continue
            if (
                token.type == "STATEMENT"
                and tokenized_line[index + 1].type == "COMMENT"
            ):
                # Deal with comments after statements.
                labeled_file = labeled_file + " "
                set_flow = False
                set_on = False
                continue
            if token.type == "STATEMENT":
                # Insert a newline when there's a statement end.
                labeled_file = labeled_file.rstrip() + "\n"
                set_flow = False
                set_on = False
                continue
            if basic_type.startswith("cbm"):
                # Format into upper-case correctly for CBM BASIC.
                # Output valid labels.
                if set_on and token.type == "NUMBER":
                    current_value = "_" + token.val
                    if current_value in external_dict:
                        current_value = external_dict[current_value]
                elif set_flow and not set_on:
                    current_value = "_" + token.val
                    if current_value in external_dict:
                        current_value = external_dict[current_value]
                    set_flow = False
                # ON handling.
                elif (
                    token.type == "FLOW"
                    and token.val == "ON"
                    and index + 1 < tokenized_line_length
                    and tokenized_line[index + 1].type == "ID"
                ):
                    set_on = True
                    current_value = token.val.upper()
                elif (
                    token.type == "FLOW"
                    and index + 1 < tokenized_line_length
                    and tokenized_line[index + 1].type == "NUMBER"
                ):
                    set_flow = True
                    current_value = token.val.upper()
                # Replace REM with '
                elif token.type == "COMMENT" and token.val.islower():
                    current_value = "'" + token.val[3:].upper()
                elif token.type == "COMMENT":
                    current_value = "'" + token.val[3:]
                elif token.type == "DATA" and token.val.islower():
                    current_value = token.val.upper()
                elif token.type == "DATA":
                    current_value = "DATA" + token.val[4:]
                elif token.type == "STRING" and token.val.islower():
                    current_value = token.val.upper()
                elif token.type == "STRING":
                    current_value = token.val
                # Handle question marks as PRINT.
                elif token.type == "PRINT":
                    current_value = "PRINT"
                else:
                    current_value = token.val.upper()
            else:
                # Output valid labels.
                if set_on and token.type == "NUMBER":
                    current_value = "_" + token.val
                    if current_value in external_dict:
                        current_value = external_dict[current_value]
                elif set_flow and not set_on:
                    current_value = "_" + token.val
                    if current_value in external_dict:
                        current_value = external_dict[current_value]
                    set_flow = False
                # ON handling.
                elif (
                    token.type == "FLOW"
                    and token.val == "ON"
                    and index + 1 < tokenized_line_length
                    and tokenized_line[index + 1].type == "ID"
                ):
                    set_on = True
                    current_value = token.val
                elif (
                    token.type == "FLOW"
                    and index + 1 < tokenized_line_length
                    and tokenized_line[index + 1].type == "NUMBER"
                ):
                    set_flow = True
                    current_value = token.val
                # Replace REM with '
                elif token.type == "COMMENT":
                    current_value = "'" + token.val[3:]
                # Handle question marks as PRINT.
                elif token.type == "PRINT":
                    current_value = "PRINT"
                else:
                    current_value = token.val
            labeled_file = labeled_file + current_value
            if index + 1 < tokenized_line_length:
                # Insert spaces.
                if token.type == "ID" and token.val.endswith("("):
                    continue
                if (
                    not token.type == "PUNCTUATION"
                    and not tokenized_line[index + 1].type == "PUNCTUATION"
                    and not token.type == "STATEMENT"
                ):
                    labeled_file = labeled_file + " "
                # The following elif is not redundant. It fixes formatting errors.
                elif (
                    tokenized_line[index + 1].type == "FLOW"
                    and not token.type == "STATEMENT"
                ):
                    labeled_file = labeled_file + " "
        labeled_file = labeled_file + "\n"
    return labeled_file

예제 #6

파일 보기

def renumber_basic_file(Lexer, input_file, basic_defs, label_dict,
                        line_replacement, basic_type):
    """ The main renumbering routine. """
    logger.debug("Numbering BASIC listing.")
    output_file = []
    current_line_number = basic_defs.numbering
    persistent_buffer, persistent_line_length = start_new_line(
        current_line_number)
    for line_no, line in enumerate(input_file):
        tokenized_line = tokenize_line(Lexer, line, line_no)
        logger.debug("Persistent Buffer: {}", persistent_buffer)
        # routine for jump targets
        if tokenized_line[0].type == "LABEL":
            label = tokenized_line[0].val.rstrip(":")
            if persistent_buffer != str(current_line_number):
                label_dict[label] = current_line_number + basic_defs.increment
                persistent_buffer = persistent_buffer.rstrip(
                    basic_defs.statement_joining_character)
                output_file.append(persistent_buffer)
                current_line_number = current_line_number + basic_defs.increment
            else:
                label_dict[label] = current_line_number
            logger.debug("Persistent Buffer: {}", persistent_buffer)
            persistent_buffer, persistent_line_length = start_new_line(
                current_line_number)
            continue
        # tokenizes lines, determines line length, and sets the current buffer to the tokenized line
        current_buffer_length, current_buffer = crunch_line(
            tokenized_line, label_dict, line_replacement, line_no, basic_defs)
        logger.debug("Current buffer length: {}", current_buffer_length)
        logger.debug("Current Buffer: {}", current_buffer)
        # when lines don't need to be combined
        if not basic_defs.combine:
            persistent_buffer = persistent_buffer + current_buffer
            output_file.append(persistent_buffer)
            current_line_number = current_line_number + basic_defs.increment
            logger.debug("Persistent Buffer: {}", persistent_buffer)
            persistent_buffer, persistent_line_length = start_new_line(
                current_line_number)
        # when lines do need to be combined
        else:
            combined_line_length = (
                persistent_line_length +
                len(basic_defs.statement_joining_character) +
                current_buffer_length)
            logger.debug("Combined line length: {}", combined_line_length)
            need_new_line = check_new_line(tokenized_line)
            # when a new line is mandatory
            if need_new_line:
                if basic_type in ["bbc", "riscos"
                                  ] and current_buffer.startswith("DATA"):
                    # This is to avoid Out of DATA errors in BBC BASIC
                    # it's the same code as the code below, I know,
                    # but using seperate functions for this stuff is trickier than it looks,
                    # and there's other features for this that are worth working on first.
                    persistent_buffer = persistent_buffer.rstrip(
                        basic_defs.statement_joining_character)
                    output_file.append(persistent_buffer)
                    current_line_number = current_line_number + basic_defs.increment
                    logger.debug("Persistent Buffer: {}", persistent_buffer)
                    persistent_buffer, persistent_line_length = start_new_line(
                        current_line_number)
                    persistent_buffer = persistent_buffer + current_buffer
                    output_file.append(persistent_buffer)
                    current_line_number = current_line_number + basic_defs.increment
                    logger.debug("Persistent Buffer: {}", persistent_buffer)
                    persistent_buffer, persistent_line_length = start_new_line(
                        current_line_number)
                elif combined_line_length <= basic_defs.basic_line_length:
                    persistent_buffer = persistent_buffer + current_buffer
                    output_file.append(persistent_buffer)
                    current_line_number = current_line_number + basic_defs.increment
                    logger.debug("Persistent Buffer: {}", persistent_buffer)
                    persistent_buffer, persistent_line_length = start_new_line(
                        current_line_number)
                elif combined_line_length > basic_defs.basic_line_length:
                    persistent_buffer = persistent_buffer.rstrip(
                        basic_defs.statement_joining_character)
                    output_file.append(persistent_buffer)
                    current_line_number = current_line_number + basic_defs.increment
                    logger.debug("Persistent Buffer: {}", persistent_buffer)
                    persistent_buffer, persistent_line_length = start_new_line(
                        current_line_number)
                    persistent_buffer = persistent_buffer + current_buffer
                    output_file.append(persistent_buffer)
                    current_line_number = current_line_number + basic_defs.increment
                    logger.debug("Persistent Buffer: {}", persistent_buffer)
                    persistent_buffer, persistent_line_length = start_new_line(
                        current_line_number)
                continue
            # when a new line is not mandatory
            if combined_line_length <= basic_defs.basic_line_length:
                current_buffer = current_buffer + basic_defs.statement_joining_character
                persistent_buffer = persistent_buffer + current_buffer
                persistent_line_length = len(persistent_buffer)
            elif combined_line_length > basic_defs.basic_line_length:
                persistent_buffer = persistent_buffer.rstrip(
                    basic_defs.statement_joining_character)
                output_file.append(persistent_buffer)
                current_line_number = current_line_number + basic_defs.increment
                logger.debug("Persistent Buffer: {}", persistent_buffer)
                persistent_buffer, persistent_line_length = start_new_line(
                    current_line_number)
                persistent_buffer = (persistent_buffer + current_buffer +
                                     basic_defs.statement_joining_character)
                persistent_line_length = len(persistent_buffer)
    # replace labels with line numbers
    for key in sorted(label_dict, key=len, reverse=True):
        for index, line in enumerate(output_file):
            output_file[index] = re.sub(key + RE_QUOTES, str(label_dict[key]),
                                        line)
    # warn if line is too long
    for index, line in enumerate(output_file):
        if len(line) > basic_defs.basic_line_length:
            line_number_match = re.search(r"^\d*", line)
            line_number = line[line_number_match.span()[0]:line_number_match.
                               span()[1]]
            logger.warning("Line number {} may be too long.", line_number)
    # add space in between line number and rest of line for certain basic versions
    if basic_type in ["bascom", "amiga"] or basic_type.startswith("zx"):
        for index, line in enumerate(output_file):
            space_index = re.search(r"^\d*", line)
            output_file[index] = (line[0:space_index.span()[1]] + " " +
                                  line[space_index.span()[1]:])
    return output_file