def test_multi_line_comment(): text = ''' /*multi-line MyComment_ *// _operations ''' expected_result = [ MultilineComment([ NonCodeChar('/'), NonCodeChar('*'), SplitContainer.from_single_token('multi'), NonCodeChar('-'), SplitContainer.from_single_token('line'), SplitContainer( [Word.from_('My'), Word.from_('Comment'), Underscore()]), NewLine(), NonCodeChar('*'), NonCodeChar('/') ]), Operator('/'), NewLine(), SplitContainer([Underscore(), Word.from_('operations')]), NewLine() ] actual = [t for t in convert_text(text, 'java')] assert expected_result == actual
def test_floats(): text = '''float[] floats = {-0.43E4f, .58F, 0.d, -9.63e+2D, 0.E-8};''' expected_result = [ KeyWord('float'), Operator('['), Operator(']'), SplitContainer.from_single_token('floats'), Operator('='), OpeningCurlyBracket(), Operator('-'), Number("0.43E4f"), Operator(','), Number(".58F"), Operator(','), Number("0.d"), Operator(','), Operator('-'), Number('9.63e+2D'), Operator(','), Number('0.E-8'), ClosingCurlyBracket(), Semicolon(), NewLine() ] actual = [t for t in convert_text(text, 'java')] assert expected_result == actual
def test_one_line_comment(): text = '''// this code won't compile but the preprocessing still has to be done corrrectly''' expected_result = [ OneLineComment([ NonCodeChar('/'), NonCodeChar('/'), SplitContainer.from_single_token('this'), SplitContainer.from_single_token('code'), SplitContainer.from_single_token('won'), NonCodeChar("'"), SplitContainer.from_single_token('t'), SplitContainer.from_single_token('compile'), SplitContainer.from_single_token('but'), SplitContainer.from_single_token('the'), SplitContainer.from_single_token('preprocessing'), SplitContainer.from_single_token('still'), SplitContainer.from_single_token('has'), SplitContainer.from_single_token('to'), SplitContainer.from_single_token('be'), SplitContainer.from_single_token('done'), SplitContainer.from_single_token('corrrectly'), NewLine() ]) ] actual = [t for t in convert_text(text, 'java')] assert expected_result == actual
def test_longs(): text = '''long[] lovely_longs = {0x34a35EL, 0x88bc96fl , -0x34L};''' expected_result = [ KeyWord('long'), Operator('['), Operator(']'), SplitContainer( [Word.from_('lovely'), Underscore(), Word.from_('longs')]), Operator('='), OpeningCurlyBracket(), Number("0x34a35EL"), Operator(','), Tab(), Number("0x88bc96fl"), Tab(), Tab(), Operator(','), Operator('-'), Number("0x34L"), ClosingCurlyBracket(), Semicolon(), NewLine() ] actual = [t for t in convert_text(text, 'java')] assert expected_result == actual
def test_split_string(): actual = split_string("123\nAb2cd34Ef000GG j_89_J") expected = [ Number('123'), NewLine(), SplitContainer([ Word.from_('Ab'), Word.from_('2'), Word.from_('cd'), Word.from_('34'), Word.from_('Ef'), Word.from_('000'), Word.from_('GG') ]), SpaceInString(5), SplitContainer([ Word.from_('j'), Underscore(), Word.from_('89'), Underscore(), Word.from_('J') ]) ] assert expected == actual
def to_parsed_token(token: str) -> ParsedToken: if token == '\n': return NewLine() elif token == '\t': return Tab() elif is_number(token): return Number(token) elif regex.fullmatch("\\w+", token): return split_identifier(token) else: return NonCodeChar(token)
def test_string_with_spaces(): text = '''"hi dear world !"''' expected = [ StringLiteral([ NonCodeChar('"'), SplitContainer.from_single_token('hi'), SpaceInString(3), SplitContainer.from_single_token('dear'), SpaceInString(5), SplitContainer.from_single_token('world'), SpaceInString(4), NonCodeChar('!'), NonCodeChar('"'), ], 26), NewLine() ] actual = [t for t in convert_text(text, 'java')] assert expected == actual
def test_ints(): text = '''int[] _my_favoRite_ints_ = {0x12, 0x1fE, 441, -81, -0xfFf};''' expected_result = [ KeyWord('int'), Operator('['), Operator(']'), SplitContainer([ Underscore(), Word.from_('my'), Underscore(), Word.from_('favo'), Word.from_('Rite'), Underscore(), Word.from_('ints'), Underscore() ]), Operator('='), OpeningCurlyBracket(), Number("0x12"), Operator(','), Number("0x1fE"), Operator(','), Number("441"), Operator(','), Operator('-'), Number("81"), Operator(','), Operator('-'), Number("0xfFf"), ClosingCurlyBracket(), Semicolon(), NewLine() ] actual = [t for t in convert_text(text, 'java')] assert expected_result == actual
def test_spaces_in_strings(): text = '''BigAWESOMEString[] a2y = "a bc".doSplit("\\"");''' expected_result = [ SplitContainer( [Word.from_('Big'), Word.from_('AWESOME'), Word.from_('String')], ), Operator('['), Operator(']'), SplitContainer([Word.from_('a'), Word.from_('2'), Word.from_('y')]), Operator('='), StringLiteral([ NonCodeChar('"'), SplitContainer.from_single_token('a'), SpaceInString(n_chars=4), SplitContainer.from_single_token('bc'), NonCodeChar('"') ], 9), Operator('.'), SplitContainer([Word.from_('do'), Word.from_('Split')]), OpeningBracket(), StringLiteral([ NonCodeChar('"'), NonCodeChar('\\'), NonCodeChar('"'), NonCodeChar('"') ], 4), ClosingBracket(), Semicolon(), NewLine() ] actual = [t for t in convert_text(text, 'java')] assert expected_result == actual
def test_capitals(): text = ''' MyClass Class CONSTANT VAR_WITH_UNDERSCORES ''' expected_result = [ SplitContainer([Word.from_("My"), Word.from_("Class")]), SplitContainer.from_single_token("Class"), SplitContainer.from_single_token("CONSTANT"), SplitContainer([ Word.from_("VAR"), Underscore(), Word.from_("WITH"), Underscore(), Word.from_("UNDERSCORES") ]), NewLine() ] actual = [t for t in convert_text(text, 'java')] assert expected_result == actual
def test_string_literal_double(): text = '''a = "some_text".split()''' expected_result = [ SplitContainer.from_single_token("a"), Operator('='), StringLiteral([NonCodeChar('"')], 1), StringLiteral([ SplitContainer( [Word.from_("some"), Underscore(), Word.from_("text")]) ], 9), StringLiteral([NonCodeChar('"')], 1), Operator('.'), SplitContainer.from_single_token("split"), OpeningBracket(), ClosingBracket(), NewLine() ] actual = [t for t in convert_text(text, 'py')] assert expected_result == actual
def transform(self, value: str) -> List[NewLine]: return [NewLine()]
def remove_trailing_newline( prep_tokens: List[ParsedToken]) -> List[ParsedToken]: return prep_tokens[:-1] if len( prep_tokens) > 0 and prep_tokens[-1] == NewLine() else prep_tokens
def test_to_repr_with_enonlycontents1(): prep_config = PrepConfig({ PrepParam.EN_ONLY: 'U', PrepParam.COM: 'c', PrepParam.STR: '1', PrepParam.SPLIT: '2', PrepParam.TABS_NEWLINES: '0', PrepParam.CASE: 'l' }) tokens = [ Number("1.1"), Operator("*"), NonEng(SplitContainer([Word.from_("dinero")])), StringLiteral([ NonCodeChar('"'), NonEng(SplitContainer([Word.from_("ich")])), SpaceInString(), NonEng(SplitContainer([Word.from_("weiss")])), SpaceInString(), NonEng(SplitContainer([Word.from_("nicht")])), SpaceInString(), NonEng(SplitContainer([Word.from_("was")])), SpaceInString(), NonEng(SplitContainer([Word.from_("soll")])), SpaceInString(), NonEng(SplitContainer([Word.from_("es")])), SpaceInString(), NonEng(SplitContainer([Word.from_("bedeuten")])), SpaceInString(), NonEng(SplitContainer([Word.from_("dass")])), SpaceInString(), NonEng(SplitContainer([Word.from_("ich")])), SpaceInString(), NonEng(SplitContainer([Word.from_("so")])), SpaceInString(), NonEng(SplitContainer([Word.from_("traurig")])), SpaceInString(), NonEng(SplitContainer([Word.from_("bin")])), NonCodeChar('"'), ], 62), NewLine(), MultilineComment([NonCodeChar('/'), NonCodeChar('*')]), MultilineComment([ NonEng(SplitContainer([Word.from_('ц')])), NonEng( SplitContainer([ Word.from_("blanco"), Underscore(), Word.from_("english") ]) ), ]), MultilineComment([NonCodeChar('*'), NonCodeChar('/')]), NewLine(), Tab(), OneLineComment([NonCodeChar('/'), NonCodeChar('/'), NonEng( SplitContainer([ Word.from_("DIESELBE"), Word.from_("8") ]) ) ]) ] actual, actual_metadata = to_repr(prep_config, tokens) expected = [ pl['word_start'], '1', '.', '1', pl['word_end'], "*", pl['non_eng'], '"', pl["non_eng"], pl["non_eng"], pl["non_eng"], pl["non_eng"], pl["non_eng"], pl["non_eng"], pl["non_eng"], pl["non_eng"], pl["non_eng"], pl["non_eng"], pl["non_eng"], pl["non_eng"], '"', '/', '*', pl['non_eng'], pl['non_eng'], '*', '/', '/', '/', pl['non_eng'], pl['olc_end'] ] expected_metadata = PreprocessingMetadata({'*', '"', "/", "*"}, word_boundaries=[0] + list(range(5, 32)), token_types=[Number, Operator, NonEng] + [StringLiteral] * 14 + [MultilineComment] * 6 + [OneLineComment] * 4) assert expected == actual assert expected_metadata == actual_metadata
tokens = [ Number('1.1'), Operator("*"), NonEng(SplitContainer([Word.from_("übersetzen")])), StringLiteral([ NonCodeChar('"'), NonEng( SplitContainer([ Word.from_("A"), Word.from_("Wirklicä") ]) ), SpaceInString(1), NonCodeChar('"') ], 11), NewLine(), MultilineComment([NonCodeChar('/'), NonCodeChar('*')]), MultilineComment([ NonEng( SplitContainer([Word.from_('ц')]), ), NonEng( SplitContainer([ Word.from_("blanco"), Underscore(), Word.from_("english") ]) ), ]), MultilineComment([NonCodeChar('*'), NonCodeChar('/')]), NewLine(), Tab(),
def test_special_characters(): text = ''' abc1 ~-0xFFFFFL= .0E+5 |= ? == != ** ++ -- += -= /= *= %= $ <= >= @ ^= &= # >> << && || +*!/><\t\n {}[],.-:();&|\\'~%^ ''' expected_result = [ SplitContainer([Word.from_('abc'), Word.from_('1')]), NewLine(), Operator('~'), Operator('-'), Number("0xFFFFFL"), Operator('='), NewLine(), Number(".0E+5"), NewLine(), Operator('|'), Operator('='), NewLine(), Operator('?'), NewLine(), Operator('='), Operator('='), NewLine(), Operator('!'), Operator('='), NewLine(), Operator('*'), Operator('*'), NewLine(), Operator('+'), Operator('+'), NewLine(), Operator('-'), Operator('-'), NewLine(), Operator('+'), Operator('='), NewLine(), Operator('-'), Operator('='), NewLine(), Operator('/'), Operator('='), NewLine(), Operator('*'), Operator('='), NewLine(), Operator('%'), Operator('='), NewLine(), NonCodeChar('$'), NewLine(), Operator('<'), Operator('='), NewLine(), Operator('>'), Operator('='), NewLine(), NonCodeChar('@'), NewLine(), Tab(), Operator('^'), Operator('='), NewLine(), Tab(), Operator('&'), Operator('='), NewLine(), Tab(), NonCodeChar('#'), NewLine(), Tab(), Tab(), Tab(), Tab(), Tab(), Tab(), Tab(), Tab(), Tab(), Tab(), Tab(), Tab(), Tab(), Tab(), Tab(), Tab(), Tab(), Tab(), Tab(), Tab(), Operator('>'), Operator('>'), NewLine(), Operator('<'), Operator('<'), NewLine(), Operator('&'), Operator('&'), NewLine(), Operator('|'), Operator('|'), NewLine(), Operator('+'), Operator('*'), Operator('!'), Operator('/'), Operator('>'), Operator('<'), Tab(), NewLine(), NewLine(), OpeningCurlyBracket(), ClosingCurlyBracket(), Operator('['), Operator(']'), Operator(','), Operator('.'), Operator('-'), Operator(':'), OpeningBracket(), ClosingBracket(), Semicolon(), Operator('&'), Operator('|'), NonCodeChar('\\'), NonCodeChar("'"), Operator('~'), Operator('%'), Operator('^'), NewLine() ] actual = [t for t in convert_text(text, 'java')] assert expected_result == actual