예제 #1
0
    def test_2(self):
        text = '''
int[] _my_favoRite_ints_ = {0x12, 0x1fE, 441, -81, -0xfFf};
'''
        expected_result = [
            NewLine(),
            SplitContainer.from_single_token('int'), '[', ']',
            SplitContainer([
                Underscore(),
                Word.from_('my'),
                Underscore(),
                NonEng(Word.from_('favo')),
                Word.from_('Rite'),
                Underscore(),
                Word.from_('ints'),
                Underscore()
            ]), '=', '{',
            Number([HexStart(), '1', '2']), ',',
            Number([HexStart(), '1', 'f', 'E']), ',',
            Number(['4', '4', '1']), ',',
            Number(['-', '8', '1']), ',',
            Number(['-', HexStart(), 'f', 'F', 'f']), '}', ';',
            NewLine(),
            NewLine()
        ]

        self.__test_apply_preprocessors(text, expected_result)
예제 #2
0
    def test_to_repr_with_enonlycontents(self):
        prep_config = PrepConfig({
            PrepParam.EN_ONLY: 2,
            PrepParam.COM_STR: 0,
            PrepParam.SPLIT: 3,
            PrepParam.TABS_NEWLINES: 1,
            PrepParam.MARK_LOGS: 1,
            PrepParam.CAPS: 1
        })

        ngramSplittingConfig = NgramSplitConfig(
            splitting_type=NgramSplittingType.NUMBERS_AND_CUSTOM,
            sc_splittings={})

        tokens = [
            Number([1, DecimalPoint(), 1]), "*",
            SplitContainer([NonEng(Word.from_("dinero"))]),
            StringLiteral([
                NonEng(Word.from_("ich")),
                NonEng(Word.from_("weiss")),
                NonEng(Word.from_("nicht")),
                NonEng(Word.from_("was")),
                NonEng(Word.from_("soll")),
                NonEng(Word.from_("es")),
                NonEng(Word.from_("bedeuten")),
                NonEng(Word.from_("dass")),
                NonEng(Word.from_("ich")),
                NonEng(Word.from_("so")),
                NonEng(Word.from_("traurig")),
                NonEng(Word.from_("bin")),
            ]),
            NewLine(),
            MultilineComment([
                SplitContainer([NonEng(Word.from_('ц'))]),
                SplitContainer([
                    NonEng(Word.from_("blanco")),
                    Underscore(),
                    Word.from_("english")
                ])
            ]),
            NewLine(),
            Tab(),
            OneLineComment([
                SplitContainer(
                    [NonEng(Word.from_("DIESELBE")),
                     Word.from_("8")])
            ])
        ]

        actual = to_repr(prep_config, tokens, ngramSplittingConfig)

        expected = [
            pl['word_start'], '1', '.', '1', pl['word_end'], "*",
            pl['non_eng'], '"', pl["non_eng_content"], '"', '/*',
            pl['non_eng'], pl['word_start'], pl['non_eng'], '_', 'english',
            pl['word_end'], '*/', '//', pl['word_start'], pl['capitals'],
            pl['non_eng'], "8", pl['word_end'], pl['olc_end']
        ]

        self.assertEqual(expected, actual)
예제 #3
0
def process_number_literal(possible_number):
    if is_number(possible_number) and possible_number not in tabs:
        parts_of_number = []
        if possible_number.startswith('-'):
            parts_of_number.append('-')
            possible_number = possible_number[1:]
        if possible_number.startswith("0x"):
            parts_of_number.append(HexStart())
            possible_number = possible_number[2:]
            hex = True
        else:
            hex = False
        for ch in possible_number:
            if ch == '.':
                parts_of_number.append(DecimalPoint())
            elif ch == 'l' or ch == 'L':
                parts_of_number.append(L())
            elif (ch == 'f' or ch == 'F') and not hex:
                parts_of_number.append(F())
            elif (ch == 'd' or ch == 'D') and not hex:
                parts_of_number.append(D())
            elif (ch == 'e' or ch == 'E') and not hex:
                parts_of_number.append(E())
            else:
                parts_of_number.append(ch)
        return Number(parts_of_number)
    else:
        return ParseableToken(possible_number)
예제 #4
0
    def test_simple_log(self):
        input = [NewLine(),
                 SplitContainer.from_single_token('log'),
                 '.', SplitContainer.from_single_token('info'),
                 '(',
                 StringLiteral([SplitContainer.from_single_token("Hi")]),
                 ')', ';',
                 Number([HexStart(), '3', '4', 'a', '3', '5', 'E', L()])]

        actual = logs.mark(input, None)

        expected = [NewLine(),
                    LogStatement(SplitContainer.from_single_token('log'),
                                 SplitContainer.from_single_token('info'), INFO,
                                 [StringLiteral([SplitContainer.from_single_token("Hi")])]),
                    Number([HexStart(), '3', '4', 'a', '3', '5', 'E', L()])]

        self.assertEqual(expected, actual)
예제 #5
0
    def test_tabs_and_newlines_before_semicolon(self):
        input = [NewLine(),
                 SplitContainer.from_single_token('log'),
                 '.', SplitContainer.from_single_token('d'),
                 '(',
                 StringLiteral([SplitContainer.from_single_token("Hi")]),
                 ')', NewLine(), NewLine(), Tab(), Tab(), ';',
                 Number([HexStart(), '3', '4', 'a', '3', '5', 'E', L()])]

        actual = logs.mark(input, None)

        expected = [NewLine(),
                    LogStatement(SplitContainer.from_single_token('log'),
                                 SplitContainer.from_single_token('d'), DEBUG,
                                 [StringLiteral([SplitContainer.from_single_token("Hi")])],
                                 [NewLine(), NewLine(), Tab(), Tab()]),
                    Number([HexStart(), '3', '4', 'a', '3', '5', 'E', L()])]

        self.assertEqual(expected, actual)
예제 #6
0
    def test_no_dot(self):
        input = [NewLine(),
                 SplitContainer.from_single_token('log'),
                 SplitContainer.from_single_token('infooooo'),
                 '(',
                 StringLiteral([SplitContainer.from_single_token("Hi")]),
                 ')', ';',
                 Number([HexStart(), '3', '4', 'a', '3', '5', 'E', L()])]

        actual = logs.mark(input, None)

        self.assertEqual(input, actual)
예제 #7
0
    def test_no_logs(self):
        input = [NewLine(),
                 SplitContainer.from_single_token('long'),
                 '[',
                 ']',
                 SplitContainer([Word.from_('lovely'), Underscore(), Word.from_('longs')]),
                 '=',
                 '{',
                 Number([HexStart(), '3', '4', 'a', '3', '5', 'E', L
                 ()])]

        actual = logs.mark(input, None)

        self.assertEqual(input, actual)
예제 #8
0
    def test_3(self):
        text = '''
float[] floats = {-0.43E4f, .58F, 0.d, -9.63e+2D, 0.E-8};
'''
        expected_result = [
            NewLine(),
            SplitContainer.from_single_token('float'), '[', ']',
            SplitContainer.from_single_token('floats'), '=', '{',
            Number(['-', '0',
                    DecimalPoint(), '4', '3',
                    E(), '4',
                    F()]), ',',
            Number([DecimalPoint(), '5', '8', F()]), ',',
            Number(['0', DecimalPoint(), D()]), ',',
            Number(['-', '9',
                    DecimalPoint(), '6', '3',
                    E(), '+', '2',
                    D()]), ',',
            Number(['0', DecimalPoint(), E(), '-', '8']), '}', ';',
            NewLine(),
            NewLine()
        ]

        self.__test_apply_preprocessors(text, expected_result)
예제 #9
0
    def test_1(self):
        text = '''
long[] lovely_longs = {0x34a35EL,     0x88bc96fl           , -0x34L};
'''
        expected_result = [
            NewLine(),
            SplitContainer.from_single_token('long'), '[', ']',
            SplitContainer(
                [Word.from_('lovely'),
                 Underscore(),
                 Word.from_('longs')]), '=', '{',
            Number([HexStart(), '3', '4', 'a', '3', '5', 'E',
                    L()]), ',',
            Tab(),
            Number([HexStart(), '8', '8', 'b', 'c', '9', '6', 'f',
                    L()]),
            Tab(),
            Tab(), ',',
            Number(['-', HexStart(), '3', '4', L()]), '}', ';',
            NewLine(),
            NewLine()
        ]

        self.__test_apply_preprocessors(text, expected_result)
예제 #10
0
    def test_content_length_over_limit(self):
        input = [NewLine(),
                 SplitContainer.from_single_token('log'),
                 '.', SplitContainer.from_single_token('info'),
                 '(', '(', '(', '(', '(', '(', '(', '(',
                 '(', '(', '(', '(', '(', '(', '(', '(',
                 '(', '(', '(', '(', '(', '(', '(', '(',
                 '(', '(', '(', '(', '(', '(', '(', '(',
                 '1', '*', '3', ')', ')', ')', ')', ')', ')', ')', ')',
                 ')', ')', ')', ')', ')', ')', ')', ')'
                                                    ')', ')', ')', ')', ')', ')', ')', ')'
                                                                                       ')', ')', ')', ')', ')', ')',
                 ')', ')'
                      ';',
                 Number([HexStart(), '3', '4', 'a', '3', '5', 'E', L()])]

        actual = logs.mark(input, None)

        self.assertEqual(input, actual)
예제 #11
0
    def test_6(self):
        text = '''
9a abc1
~-0xFFFFFL=
.0E+5
|=
?
==
!=
**
++
--
+=
-=
/=
*=
%=
$
<=
>=
@
    ^=
    &=
    #
                                                                                 >>
<<
&&
||
+*!/><\t\n
{}[],.-:();&|\\'~%^
'''

        expected_result = [
            NewLine(),
            SplitContainer([Word.from_('9'), Word.from_('a')]),
            SplitContainer([Word.from_('abc'),
                            Word.from_('1')]),
            NewLine(), '~',
            Number(['-', HexStart(), 'F', 'F', 'F', 'F', 'F',
                    L()]), '=',
            NewLine(),
            Number([DecimalPoint(), '0', E(), '+', '5']),
            NewLine(), '|=',
            NewLine(), '?',
            NewLine(), '==',
            NewLine(), '!=',
            NewLine(), '**',
            NewLine(), '++',
            NewLine(), '--',
            NewLine(), '+=',
            NewLine(), '-=',
            NewLine(), '/=',
            NewLine(), '*=',
            NewLine(), '%=',
            NewLine(), '$',
            NewLine(), '<=',
            NewLine(), '>=',
            NewLine(), '@',
            NewLine(),
            Tab(), '^=',
            NewLine(),
            Tab(), '&=',
            NewLine(),
            Tab(), '#',
            NewLine(),
            Tab(),
            Tab(),
            Tab(),
            Tab(),
            Tab(),
            Tab(),
            Tab(),
            Tab(),
            Tab(),
            Tab(),
            Tab(),
            Tab(),
            Tab(),
            Tab(),
            Tab(),
            Tab(),
            Tab(),
            Tab(),
            Tab(),
            Tab(), '>>',
            NewLine(), '<<',
            NewLine(), '&&',
            NewLine(), '||',
            NewLine(), '+', '*', '!', '/', '>', '<',
            Tab(),
            NewLine(),
            NewLine(), '{', '}', '[', ']', ',', '.', '-', ':', '(', ')', ';',
            '&', '|',
            Backslash(), "'", '~', '%', '^',
            NewLine(),
            NewLine()
        ]

        self.__test_apply_preprocessors(text, expected_result)
예제 #12
0
from logrec.dataprep.model.chars import NewLine, Tab
# TODO write explanations with normal strings
from logrec.dataprep.model.containers import SplitContainer, OneLineComment, MultilineComment, StringLiteral
from logrec.dataprep.model.logging import INFO, LogStatement, LoggableBlock
from logrec.dataprep.model.noneng import NonEng
from logrec.dataprep.model.numeric import DecimalPoint, Number
from logrec.dataprep.model.placeholders import placeholders
from logrec.dataprep.model.word import Word, Underscore
from logrec.dataprep.prepconfig import PrepParam, PrepConfig
from logrec.dataprep.split.ngram import NgramSplittingType, NgramSplitConfig
from logrec.dataprep.to_repr import to_repr

pl = placeholders

tokens = [
    Number([1, DecimalPoint(), 1]), "*",
    SplitContainer([NonEng(Word.from_("dinero"))]),
    StringLiteral(
        [SplitContainer([Word.from_("A"),
                         NonEng(Word.from_("Wirklich"))])]),
    NewLine(),
    MultilineComment([
        SplitContainer([NonEng(Word.from_('ц'))]),
        SplitContainer([
            NonEng(Word.from_("blanco")),
            Underscore(),
            Word.from_("english")
        ])
    ]),
    NewLine(),
    Tab(),
test_cases = {
    "create": (
        [SplitContainer.from_single_token("create")],
        ["create"],
    ),
    "Vector": (
        [SplitContainer.from_single_token("Vector")],
        [placeholders["capital"], "vector"],
    ),
    "players": (
        [SplitContainer.from_single_token("players")],
        [placeholders["word_start"], 'play', 'er', 's', placeholders["word_end"]]
    ),
    "0.345e+4": (
        [Number(["0", DecimalPoint(), "3", "4", "5", E(), "+", "4"])],
        [placeholders["word_start"], "0.", "3", "4", "5", "e+", "4", placeholders["word_end"]]
    ),
    "bestPlayers": (
        [SplitContainer([Word.from_("best"), Word.from_("Players")])],
        [placeholders["word_start"], "best", placeholders["capital"], 'play', "er", "s", placeholders["word_end"]]
    ),
    "test_BestPlayers": (
        [SplitContainer([Word.from_("test"), Underscore(), Word.from_("Best"), Word.from_("Players")])],
        [placeholders["word_start"], "test", '_', placeholders["capital"],
         "best", placeholders["capital"], 'play', "er", "s", placeholders["word_end"]]
    ),
    "test_BestPlayers_modified": (
        [SplitContainer(
            [Word.from_("test"), Underscore(), Word.from_("Best"), Word.from_("Players"), Underscore(),
             Word.from_("modified")]