コード例 #1
0
def test_multi_line_comment():
    text = '''
/*multi-line MyComment_
*//
_operations
'''

    expected_result = [
        MultilineComment([
            NonCodeChar('/'),
            NonCodeChar('*'),
            SplitContainer.from_single_token('multi'),
            NonCodeChar('-'),
            SplitContainer.from_single_token('line'),
            SplitContainer(
                [Word.from_('My'),
                 Word.from_('Comment'),
                 Underscore()]),
            NewLine(),
            NonCodeChar('*'),
            NonCodeChar('/')
        ]),
        Operator('/'),
        NewLine(),
        SplitContainer([Underscore(), Word.from_('operations')]),
        NewLine()
    ]

    actual = [t for t in convert_text(text, 'java')]

    assert expected_result == actual
コード例 #2
0
def test_floats():
    text = '''float[] floats = {-0.43E4f, .58F, 0.d, -9.63e+2D, 0.E-8};'''
    expected_result = [
        KeyWord('float'),
        Operator('['),
        Operator(']'),
        SplitContainer.from_single_token('floats'),
        Operator('='),
        OpeningCurlyBracket(),
        Operator('-'),
        Number("0.43E4f"),
        Operator(','),
        Number(".58F"),
        Operator(','),
        Number("0.d"),
        Operator(','),
        Operator('-'),
        Number('9.63e+2D'),
        Operator(','),
        Number('0.E-8'),
        ClosingCurlyBracket(),
        Semicolon(),
        NewLine()
    ]

    actual = [t for t in convert_text(text, 'java')]

    assert expected_result == actual
コード例 #3
0
def test_one_line_comment():
    text = '''// this code won't compile but the preprocessing still has to be done corrrectly'''

    expected_result = [
        OneLineComment([
            NonCodeChar('/'),
            NonCodeChar('/'),
            SplitContainer.from_single_token('this'),
            SplitContainer.from_single_token('code'),
            SplitContainer.from_single_token('won'),
            NonCodeChar("'"),
            SplitContainer.from_single_token('t'),
            SplitContainer.from_single_token('compile'),
            SplitContainer.from_single_token('but'),
            SplitContainer.from_single_token('the'),
            SplitContainer.from_single_token('preprocessing'),
            SplitContainer.from_single_token('still'),
            SplitContainer.from_single_token('has'),
            SplitContainer.from_single_token('to'),
            SplitContainer.from_single_token('be'),
            SplitContainer.from_single_token('done'),
            SplitContainer.from_single_token('corrrectly'),
            NewLine()
        ])
    ]

    actual = [t for t in convert_text(text, 'java')]

    assert expected_result == actual
コード例 #4
0
def test_longs():
    text = '''long[] lovely_longs = {0x34a35EL,     0x88bc96fl           , -0x34L};'''
    expected_result = [
        KeyWord('long'),
        Operator('['),
        Operator(']'),
        SplitContainer(
            [Word.from_('lovely'),
             Underscore(),
             Word.from_('longs')]),
        Operator('='),
        OpeningCurlyBracket(),
        Number("0x34a35EL"),
        Operator(','),
        Tab(),
        Number("0x88bc96fl"),
        Tab(),
        Tab(),
        Operator(','),
        Operator('-'),
        Number("0x34L"),
        ClosingCurlyBracket(),
        Semicolon(),
        NewLine()
    ]

    actual = [t for t in convert_text(text, 'java')]

    assert expected_result == actual
コード例 #5
0
ファイル: test_subtokens.py プロジェクト: mir-am/codeprep
def test_split_string():
    actual = split_string("123\nAb2cd34Ef000GG     j_89_J")

    expected = [
        Number('123'),
        NewLine(),
        SplitContainer([
            Word.from_('Ab'),
            Word.from_('2'),
            Word.from_('cd'),
            Word.from_('34'),
            Word.from_('Ef'),
            Word.from_('000'),
            Word.from_('GG')
        ]),
        SpaceInString(5),
        SplitContainer([
            Word.from_('j'),
            Underscore(),
            Word.from_('89'),
            Underscore(),
            Word.from_('J')
        ])
    ]

    assert expected == actual
コード例 #6
0
def to_parsed_token(token: str) -> ParsedToken:
    if token == '\n':
        return NewLine()
    elif token == '\t':
        return Tab()
    elif is_number(token):
        return Number(token)
    elif regex.fullmatch("\\w+", token):
        return split_identifier(token)
    else:
        return NonCodeChar(token)
コード例 #7
0
def test_string_with_spaces():
    text = '''"hi   dear     world    !"'''
    expected = [
        StringLiteral([
            NonCodeChar('"'),
            SplitContainer.from_single_token('hi'),
            SpaceInString(3),
            SplitContainer.from_single_token('dear'),
            SpaceInString(5),
            SplitContainer.from_single_token('world'),
            SpaceInString(4),
            NonCodeChar('!'),
            NonCodeChar('"'),
        ], 26),
        NewLine()
    ]

    actual = [t for t in convert_text(text, 'java')]

    assert expected == actual
コード例 #8
0
def test_ints():
    text = '''int[] _my_favoRite_ints_ = {0x12, 0x1fE, 441, -81, -0xfFf};'''

    expected_result = [
        KeyWord('int'),
        Operator('['),
        Operator(']'),
        SplitContainer([
            Underscore(),
            Word.from_('my'),
            Underscore(),
            Word.from_('favo'),
            Word.from_('Rite'),
            Underscore(),
            Word.from_('ints'),
            Underscore()
        ]),
        Operator('='),
        OpeningCurlyBracket(),
        Number("0x12"),
        Operator(','),
        Number("0x1fE"),
        Operator(','),
        Number("441"),
        Operator(','),
        Operator('-'),
        Number("81"),
        Operator(','),
        Operator('-'),
        Number("0xfFf"),
        ClosingCurlyBracket(),
        Semicolon(),
        NewLine()
    ]

    actual = [t for t in convert_text(text, 'java')]

    assert expected_result == actual
コード例 #9
0
def test_spaces_in_strings():
    text = '''BigAWESOMEString[] a2y = "a    bc".doSplit("\\"");'''
    expected_result = [
        SplitContainer(
            [Word.from_('Big'),
             Word.from_('AWESOME'),
             Word.from_('String')], ),
        Operator('['),
        Operator(']'),
        SplitContainer([Word.from_('a'),
                        Word.from_('2'),
                        Word.from_('y')]),
        Operator('='),
        StringLiteral([
            NonCodeChar('"'),
            SplitContainer.from_single_token('a'),
            SpaceInString(n_chars=4),
            SplitContainer.from_single_token('bc'),
            NonCodeChar('"')
        ], 9),
        Operator('.'),
        SplitContainer([Word.from_('do'),
                        Word.from_('Split')]),
        OpeningBracket(),
        StringLiteral([
            NonCodeChar('"'),
            NonCodeChar('\\'),
            NonCodeChar('"'),
            NonCodeChar('"')
        ], 4),
        ClosingBracket(),
        Semicolon(),
        NewLine()
    ]

    actual = [t for t in convert_text(text, 'java')]

    assert expected_result == actual
コード例 #10
0
def test_capitals():
    text = '''
MyClass Class CONSTANT VAR_WITH_UNDERSCORES
'''

    expected_result = [
        SplitContainer([Word.from_("My"),
                        Word.from_("Class")]),
        SplitContainer.from_single_token("Class"),
        SplitContainer.from_single_token("CONSTANT"),
        SplitContainer([
            Word.from_("VAR"),
            Underscore(),
            Word.from_("WITH"),
            Underscore(),
            Word.from_("UNDERSCORES")
        ]),
        NewLine()
    ]

    actual = [t for t in convert_text(text, 'java')]

    assert expected_result == actual
コード例 #11
0
def test_string_literal_double():
    text = '''a = "some_text".split()'''

    expected_result = [
        SplitContainer.from_single_token("a"),
        Operator('='),
        StringLiteral([NonCodeChar('"')], 1),
        StringLiteral([
            SplitContainer(
                [Word.from_("some"),
                 Underscore(),
                 Word.from_("text")])
        ], 9),
        StringLiteral([NonCodeChar('"')], 1),
        Operator('.'),
        SplitContainer.from_single_token("split"),
        OpeningBracket(),
        ClosingBracket(),
        NewLine()
    ]

    actual = [t for t in convert_text(text, 'py')]

    assert expected_result == actual
コード例 #12
0
 def transform(self, value: str) -> List[NewLine]:
     return [NewLine()]
コード例 #13
0
ファイル: text.py プロジェクト: mir-am/codeprep
def remove_trailing_newline(
        prep_tokens: List[ParsedToken]) -> List[ParsedToken]:
    return prep_tokens[:-1] if len(
        prep_tokens) > 0 and prep_tokens[-1] == NewLine() else prep_tokens
コード例 #14
0
ファイル: test_to_repr.py プロジェクト: mir-am/codeprep
def test_to_repr_with_enonlycontents1():
    prep_config = PrepConfig({
        PrepParam.EN_ONLY: 'U',
        PrepParam.COM: 'c',
        PrepParam.STR: '1',
        PrepParam.SPLIT: '2',
        PrepParam.TABS_NEWLINES: '0',
        PrepParam.CASE: 'l'
    })

    tokens = [
        Number("1.1"),
        Operator("*"),
        NonEng(SplitContainer([Word.from_("dinero")])),
        StringLiteral([
            NonCodeChar('"'),
            NonEng(SplitContainer([Word.from_("ich")])),
            SpaceInString(),
            NonEng(SplitContainer([Word.from_("weiss")])),
            SpaceInString(),
            NonEng(SplitContainer([Word.from_("nicht")])),
            SpaceInString(),
            NonEng(SplitContainer([Word.from_("was")])),
            SpaceInString(),
            NonEng(SplitContainer([Word.from_("soll")])),
            SpaceInString(),
            NonEng(SplitContainer([Word.from_("es")])),
            SpaceInString(),
            NonEng(SplitContainer([Word.from_("bedeuten")])),
            SpaceInString(),
            NonEng(SplitContainer([Word.from_("dass")])),
            SpaceInString(),
            NonEng(SplitContainer([Word.from_("ich")])),
            SpaceInString(),
            NonEng(SplitContainer([Word.from_("so")])),
            SpaceInString(),
            NonEng(SplitContainer([Word.from_("traurig")])),
            SpaceInString(),
            NonEng(SplitContainer([Word.from_("bin")])),
            NonCodeChar('"'),
        ], 62),
        NewLine(),
        MultilineComment([NonCodeChar('/'), NonCodeChar('*')]),
        MultilineComment([
            NonEng(SplitContainer([Word.from_('ц')])),
            NonEng(
                SplitContainer([
                    Word.from_("blanco"),
                    Underscore(),
                    Word.from_("english")
                ])
            ),
        ]),
        MultilineComment([NonCodeChar('*'), NonCodeChar('/')]),
        NewLine(), Tab(),
        OneLineComment([NonCodeChar('/'), NonCodeChar('/'),
            NonEng(
                SplitContainer([
                    Word.from_("DIESELBE"),
                    Word.from_("8")
                ])
            )
        ])
    ]

    actual, actual_metadata = to_repr(prep_config, tokens)

    expected = [
        pl['word_start'],
        '1',
        '.',
        '1',
        pl['word_end'],
        "*",
        pl['non_eng'],
        '"', pl["non_eng"], pl["non_eng"], pl["non_eng"], pl["non_eng"], pl["non_eng"], pl["non_eng"],
        pl["non_eng"], pl["non_eng"], pl["non_eng"], pl["non_eng"], pl["non_eng"], pl["non_eng"], '"',
        '/', '*', pl['non_eng'], pl['non_eng'],
        '*', '/',
        '/', '/',  pl['non_eng'],
        pl['olc_end']
    ]

    expected_metadata = PreprocessingMetadata({'*', '"', "/", "*"},
                                              word_boundaries=[0] + list(range(5, 32)),
                                              token_types=[Number, Operator, NonEng]
                                                          + [StringLiteral] * 14
                                                          + [MultilineComment] * 6
                                                          + [OneLineComment] * 4)

    assert expected == actual
    assert expected_metadata == actual_metadata
コード例 #15
0
ファイル: test_to_repr.py プロジェクト: mir-am/codeprep
tokens = [
    Number('1.1'),
    Operator("*"),
    NonEng(SplitContainer([Word.from_("übersetzen")])),
    StringLiteral([
        NonCodeChar('"'),
        NonEng(
            SplitContainer([
                Word.from_("A"),
                Word.from_("Wirklicä")
            ])
        ),
        SpaceInString(1),
        NonCodeChar('"')
    ], 11),
    NewLine(),
    MultilineComment([NonCodeChar('/'), NonCodeChar('*')]),
    MultilineComment([
        NonEng(
            SplitContainer([Word.from_('ц')]),
        ),
        NonEng(
            SplitContainer([
                Word.from_("blanco"),
                Underscore(),
                Word.from_("english")
            ])
        ),
    ]),
    MultilineComment([NonCodeChar('*'), NonCodeChar('/')]),
    NewLine(), Tab(),
コード例 #16
0
def test_special_characters():
    text = '''
abc1
~-0xFFFFFL=
.0E+5
|=
?
==
!=
**
++
--
+=
-=
/=
*=
%=
$
<=
>=
@
    ^=
    &=
    #
                                                                                 >>
<<
&&
||
+*!/><\t\n
{}[],.-:();&|\\'~%^
'''

    expected_result = [
        SplitContainer([Word.from_('abc'), Word.from_('1')]),
        NewLine(),
        Operator('~'),
        Operator('-'),
        Number("0xFFFFFL"),
        Operator('='),
        NewLine(),
        Number(".0E+5"),
        NewLine(),
        Operator('|'),
        Operator('='),
        NewLine(),
        Operator('?'),
        NewLine(),
        Operator('='),
        Operator('='),
        NewLine(),
        Operator('!'),
        Operator('='),
        NewLine(),
        Operator('*'),
        Operator('*'),
        NewLine(),
        Operator('+'),
        Operator('+'),
        NewLine(),
        Operator('-'),
        Operator('-'),
        NewLine(),
        Operator('+'),
        Operator('='),
        NewLine(),
        Operator('-'),
        Operator('='),
        NewLine(),
        Operator('/'),
        Operator('='),
        NewLine(),
        Operator('*'),
        Operator('='),
        NewLine(),
        Operator('%'),
        Operator('='),
        NewLine(),
        NonCodeChar('$'),
        NewLine(),
        Operator('<'),
        Operator('='),
        NewLine(),
        Operator('>'),
        Operator('='),
        NewLine(),
        NonCodeChar('@'),
        NewLine(),
        Tab(),
        Operator('^'),
        Operator('='),
        NewLine(),
        Tab(),
        Operator('&'),
        Operator('='),
        NewLine(),
        Tab(),
        NonCodeChar('#'),
        NewLine(),
        Tab(),
        Tab(),
        Tab(),
        Tab(),
        Tab(),
        Tab(),
        Tab(),
        Tab(),
        Tab(),
        Tab(),
        Tab(),
        Tab(),
        Tab(),
        Tab(),
        Tab(),
        Tab(),
        Tab(),
        Tab(),
        Tab(),
        Tab(),
        Operator('>'),
        Operator('>'),
        NewLine(),
        Operator('<'),
        Operator('<'),
        NewLine(),
        Operator('&'),
        Operator('&'),
        NewLine(),
        Operator('|'),
        Operator('|'),
        NewLine(),
        Operator('+'),
        Operator('*'),
        Operator('!'),
        Operator('/'),
        Operator('>'),
        Operator('<'),
        Tab(),
        NewLine(),
        NewLine(),
        OpeningCurlyBracket(),
        ClosingCurlyBracket(),
        Operator('['),
        Operator(']'),
        Operator(','),
        Operator('.'),
        Operator('-'),
        Operator(':'),
        OpeningBracket(),
        ClosingBracket(),
        Semicolon(),
        Operator('&'),
        Operator('|'),
        NonCodeChar('\\'),
        NonCodeChar("'"),
        Operator('~'),
        Operator('%'),
        Operator('^'),
        NewLine()
    ]

    actual = [t for t in convert_text(text, 'java')]

    assert expected_result == actual