Beispiel #1
0
def test_tokenize_twice(test_examples, keep_comments=False):
    for i, (x, _) in enumerate(test_examples):
        tokenized_once = tokenize_cpp(x, keep_comments=keep_comments)
        tokenized_twice = tokenize_cpp(detokenize_cpp(
            tokenized_once), keep_comments=keep_comments)
        if tokenized_once != tokenized_twice:
            lenght = min(len(tokenized_twice), len(tokenized_once))
            char_message = ""
            for j in range(lenght):
                if tokenized_twice[j] != tokenized_once[j]:
                    char_message = f"expected token '{tokenized_once[j]}' at index {j} but found '{tokenized_twice[j]}'"
            if char_message == "":
                char_message = f"expected length {len(tokenized_once)}, found {len(tokenized_twice)}"
            raise Exception(
                f"Expected:\n==========\n{tokenized_once}\nbut found:\n==========\n{tokenized_twice} \n==========\n{char_message}")
def test_detokenize_invertible(test_examples):
    for i, (x, _) in enumerate(test_examples):
        x_ = detokenize_cpp(tokenize_cpp(x, keep_comments=False))
        if x_.strip() != x.strip():
            raise Exception(
                f"Expected:\n==========\n{x.strip()}\nbut found:\n==========\n{x_.strip()}"
            )
Beispiel #3
0
def test_tokenizer(test_examples, keep_comments):
    for i, (x, y) in enumerate(test_examples):
        y_ = tokenize_cpp(x, keep_comments=keep_comments)
        if y_ != y:
            line_diff = [j for j, (line, line_) in enumerate(
                zip(y, y_)) if line != line_]
            line_diff = line_diff[-1] if len(line_diff) > 0 else -1
            raise Exception(
                f"Difference at {line_diff}\nExpected:\n==========\n{y}\nbut found:\n==========\n{y_}")
Beispiel #4
0
def test_detokenize_non_invertible(test_examples):
    for i, (x, y) in enumerate(test_examples):
        y_ = detokenize_cpp(tokenize_cpp(x, keep_comments=False))
        if y_ != y:
            lenght = min(len(y_), len(y))
            char_message = ""
            for j in range(lenght):
                if y_[j] != y[j]:
                    char_message = f"expected character '{y[j]}' at index {j} but found '{y_[j]}'"
            if char_message == "":
                char_message = f"expected length {len(y)}, found {len(y_)}"
            raise Exception(
                f"Expected:\n==========\n{y}\nbut found:\n==========\n{y_} \n==========\n{char_message}")