Python SplitContainer.from_single_tokenの例、logrec.dataprep.model.containers.SplitContainer.from_single_token Pythonの例

コード例 #1

0

ファイルを表示

ファイル: core.py プロジェクト: hlibbabii/log-recommender

    def test_7(self):
        text = '''
/*multi-line MyComment_
*//
_operations
'''

        expected_result = [
            NewLine(),
            MultilineComment([
                SplitContainer.from_single_token('multi'), '-',
                SplitContainer.from_single_token('line'),
                SplitContainer(
                    [Word.from_('My'),
                     Word.from_('Comment'),
                     Underscore()]),
                NewLine()
            ]), '/',
            NewLine(),
            SplitContainer([Underscore(),
                            Word.from_('operations')]),
            NewLine(),
            NewLine()
        ]

        self.__test_apply_preprocessors(text, expected_result)

コード例 #2

0

ファイルを表示

ファイル: noneng.py プロジェクト: hlibbabii/log-recommender

    def test_mark_all_eng(self):
        '''
        All words are english. Nothing changed
        '''
        tokens = [
            StringLiteral([
                OneLineCommentStart(),
                SplitContainer([
                    Word.from_("test"),
                    Word.from_("my"),
                    Word.from_("class")
                ])
            ]),
            NewLine(),
            OneLineComment([
                MultilineCommentEnd(),
                SplitContainer.from_single_token("lifeisgood")
            ]),
            NewLine(),
            StringLiteral([
                MultilineCommentStart(),
                SplitContainer.from_single_token("!")
            ]),
            NewLine(),
            MultilineComment([NewLine()]),
            NewLine()
        ]

        actual = mark(tokens, {})

        self.assertEqual(actual, tokens)

コード例 #3

0

ファイルを表示

    def test_log_no_mark_logs(self):
        prep_config = PrepConfig({
            PrepParam.EN_ONLY: 1,
            PrepParam.COM_STR: 0,
            PrepParam.SPLIT: 1,
            PrepParam.TABS_NEWLINES: 0,
            PrepParam.MARK_LOGS: 0,
            PrepParam.CAPS: 1
        })

        ngramSplittingConfig = NgramSplitConfig()

        tokens = [
            LogStatement(
                SplitContainer.from_single_token('LOGGER'),
                SplitContainer.from_single_token('Info'), INFO,
                [StringLiteral([SplitContainer.from_single_token("Hi")])])
        ]

        actual = to_repr(prep_config, tokens, ngramSplittingConfig)

        expected = [
            pl['capitals'], 'logger', '.', pl['capital'], 'info', '(', '"',
            pl['capital'], 'hi', '"', ')', ';'
        ]

        self.assertEqual(expected, actual)

コード例 #4

0

ファイルを表示

ファイル: loggable.py プロジェクト: hlibbabii/log-recommender

    def test_class_class(self):
        input = [
            SplitContainer.from_single_token('class'),
            SplitContainer.from_single_token('A'),
            SplitContainer.from_single_token('class')
        ]

        actual = loggable.mark(input, None)

コード例 #5

0

ファイルを表示

    def test_no_dot(self):
        input = [NewLine(),
                 SplitContainer.from_single_token('log'),
                 SplitContainer.from_single_token('infooooo'),
                 '(',
                 StringLiteral([SplitContainer.from_single_token("Hi")]),
                 ')', ';',
                 Number([HexStart(), '3', '4', 'a', '3', '5', 'E', L()])]

        actual = logs.mark(input, None)

        self.assertEqual(input, actual)

コード例 #6

0

ファイルを表示

ファイル: core.py プロジェクト: hlibbabii/log-recommender

    def test_2(self):
        text = '''
int[] _my_favoRite_ints_ = {0x12, 0x1fE, 441, -81, -0xfFf};
'''
        expected_result = [
            NewLine(),
            SplitContainer.from_single_token('int'), '[', ']',
            SplitContainer([
                Underscore(),
                Word.from_('my'),
                Underscore(),
                NonEng(Word.from_('favo')),
                Word.from_('Rite'),
                Underscore(),
                Word.from_('ints'),
                Underscore()
            ]), '=', '{',
            Number([HexStart(), '1', '2']), ',',
            Number([HexStart(), '1', 'f', 'E']), ',',
            Number(['4', '4', '1']), ',',
            Number(['-', '8', '1']), ',',
            Number(['-', HexStart(), 'f', 'F', 'f']), '}', ';',
            NewLine(),
            NewLine()
        ]

        self.__test_apply_preprocessors(text, expected_result)

コード例 #7

0

ファイルを表示

    def test_merges_no_cache(self):
        prep_config = PrepConfig({
            PrepParam.EN_ONLY: 0,
            PrepParam.COM_STR: 0,
            PrepParam.SPLIT: 4,
            PrepParam.TABS_NEWLINES: 0,
            PrepParam.MARK_LOGS: 1,
            PrepParam.CAPS: 1
        })

        ngramSplittingConfig = NgramSplitConfig(
            splitting_type=NgramSplittingType.BPE,
            merges={('w', 'h'): 0},
            merges_cache={})

        tokens = [SplitContainer.from_single_token("While")]

        actual = to_repr(prep_config, tokens, ngramSplittingConfig)

        expected = [
            pl['word_start'], pl['capital'], "wh", "i", "l", "e",
            pl["word_end"]
        ]

        self.assertEqual(expected, actual)

コード例 #8

0

ファイルを表示

    def test_content_length_over_limit(self):
        input = [NewLine(),
                 SplitContainer.from_single_token('log'),
                 '.', SplitContainer.from_single_token('info'),
                 '(', '(', '(', '(', '(', '(', '(', '(',
                 '(', '(', '(', '(', '(', '(', '(', '(',
                 '(', '(', '(', '(', '(', '(', '(', '(',
                 '(', '(', '(', '(', '(', '(', '(', '(',
                 '1', '*', '3', ')', ')', ')', ')', ')', ')', ')', ')',
                 ')', ')', ')', ')', ')', ')', ')', ')'
                                                    ')', ')', ')', ')', ')', ')', ')', ')'
                                                                                       ')', ')', ')', ')', ')', ')',
                 ')', ')'
                      ';',
                 Number([HexStart(), '3', '4', 'a', '3', '5', 'E', L()])]

        actual = logs.mark(input, None)

        self.assertEqual(input, actual)

コード例 #9

0

ファイルを表示

ファイル: core.py プロジェクト: hlibbabii/log-recommender

    def test_capitals(self):
        text = '''
MyClass Class CONSTANT VAR_WITH_UNDERSCORES
'''

        expected_result = [
            NewLine(),
            SplitContainer([Word.from_("My"),
                            Word.from_("Class")]),
            SplitContainer.from_single_token("Class"),
            SplitContainer.from_single_token("CONSTANT"),
            SplitContainer([
                Word.from_("VAR"),
                Underscore(),
                Word.from_("WITH"),
                Underscore(),
                Word.from_("UNDERSCORES")
            ]),
            NewLine(),
            NewLine()
        ]

        self.__test_apply_preprocessors(text, expected_result)

コード例 #10

0

ファイルを表示

    def test_no_logs(self):
        input = [NewLine(),
                 SplitContainer.from_single_token('long'),
                 '[',
                 ']',
                 SplitContainer([Word.from_('lovely'), Underscore(), Word.from_('longs')]),
                 '=',
                 '{',
                 Number([HexStart(), '3', '4', 'a', '3', '5', 'E', L
                 ()])]

        actual = logs.mark(input, None)

        self.assertEqual(input, actual)

コード例 #11

0

ファイルを表示

ファイル: core.py プロジェクト: hlibbabii/log-recommender

    def test_3(self):
        text = '''
float[] floats = {-0.43E4f, .58F, 0.d, -9.63e+2D, 0.E-8};
'''
        expected_result = [
            NewLine(),
            SplitContainer.from_single_token('float'), '[', ']',
            SplitContainer.from_single_token('floats'), '=', '{',
            Number(['-', '0',
                    DecimalPoint(), '4', '3',
                    E(), '4',
                    F()]), ',',
            Number([DecimalPoint(), '5', '8', F()]), ',',
            Number(['0', DecimalPoint(), D()]), ',',
            Number(['-', '9',
                    DecimalPoint(), '6', '3',
                    E(), '+', '2',
                    D()]), ',',
            Number(['0', DecimalPoint(), E(), '-', '8']), '}', ';',
            NewLine(),
            NewLine()
        ]

        self.__test_apply_preprocessors(text, expected_result)

コード例 #12

0

ファイルを表示

ファイル: noneng.py プロジェクト: hlibbabii/log-recommender

    def test_mark_with_noneng(self):
        tokens = [
            StringLiteral(
                [SplitContainer([Word.from_("A"),
                                 Word.from_("Wirklich")])]),
            MultilineComment([
                SplitContainer.from_single_token('ц'),
                SplitContainer([
                    Word.from_("blanco"),
                    Underscore(),
                    Word.from_("english")
                ])
            ]),
            OneLineComment(
                [SplitContainer([Word.from_("DIESELBE"),
                                 Word.from_("8")])])
        ]

        actual = mark(tokens, {})

        expected = [
            StringLiteral([
                SplitContainer(
                    [Word.from_("A"),
                     NonEng(Word.from_("Wirklich"))])
            ]),
            MultilineComment([
                SplitContainer([NonEng(Word.from_('ц'))]),
                SplitContainer([
                    # we have to call constructor manually here,
                    # case split container cannot set wordStart prefix
                    # when the first subword is wrapped in NonEng
                    NonEng(Word.from_("blanco")),
                    Underscore(),
                    Word.from_("english")
                ])
            ]),
            OneLineComment([
                SplitContainer([
                    # we have to call constructor manually here,
                    # case split container cannot set wordStart prefix
                    # when the first subword is wrapped in NonEng
                    NonEng(Word.from_("DIESELBE")),
                    Word.from_("8")
                ])
            ])
        ]
        self.assertEqual(expected, actual)

コード例 #13

0

ファイルを表示

ファイル: core.py プロジェクト: hlibbabii/log-recommender

    def test_4(self):
        text = '''
BigAWESOMEString[] a2y = "abc".doSplit("\\"");
'''
        expected_result = [
            NewLine(),
            SplitContainer([
                Word.from_('Big'),
                Word.from_('AWESOME'),
                Word.from_('String')
            ]), '[', ']',
            SplitContainer([Word.from_('a'),
                            Word.from_('2'),
                            Word.from_('y')]), '=',
            StringLiteral([SplitContainer.from_single_token('abc')]), '.',
            SplitContainer([Word.from_('do'),
                            Word.from_('Split')]), '(',
            StringLiteral([Backslash(), Quote()]), ')', ';',
            NewLine(),
            NewLine()
        ]

        self.__test_apply_preprocessors(text, expected_result)

コード例 #14

0

ファイルを表示

    def test_simple_log(self):
        input = [NewLine(),
                 SplitContainer.from_single_token('log'),
                 '.', SplitContainer.from_single_token('info'),
                 '(',
                 StringLiteral([SplitContainer.from_single_token("Hi")]),
                 ')', ';',
                 Number([HexStart(), '3', '4', 'a', '3', '5', 'E', L()])]

        actual = logs.mark(input, None)

        expected = [NewLine(),
                    LogStatement(SplitContainer.from_single_token('log'),
                                 SplitContainer.from_single_token('info'), INFO,
                                 [StringLiteral([SplitContainer.from_single_token("Hi")])]),
                    Number([HexStart(), '3', '4', 'a', '3', '5', 'E', L()])]

        self.assertEqual(expected, actual)

コード例 #15

0

ファイルを表示

    def test_1(self):
        prep_config = PrepConfig({
            PrepParam.EN_ONLY: 0,
            PrepParam.COM_STR: 0,
            PrepParam.SPLIT: 4,
            PrepParam.TABS_NEWLINES: 0,
            PrepParam.MARK_LOGS: 1,
            PrepParam.CAPS: 1
        })

        ngramSplittingConfig = NgramSplitConfig(
            splitting_type=NgramSplittingType.BPE,
            merges_cache={'while': ['while']})

        tokens = [SplitContainer.from_single_token("While")]

        actual = to_repr(prep_config, tokens, ngramSplittingConfig)

        expected = [
            pl['capital'],
            "while",
        ]

        self.assertEqual(expected, actual)

コード例 #16

0

ファイルを表示

ファイル: core.py プロジェクト: hlibbabii/log-recommender

    def test_1(self):
        text = '''
long[] lovely_longs = {0x34a35EL,     0x88bc96fl           , -0x34L};
'''
        expected_result = [
            NewLine(),
            SplitContainer.from_single_token('long'), '[', ']',
            SplitContainer(
                [Word.from_('lovely'),
                 Underscore(),
                 Word.from_('longs')]), '=', '{',
            Number([HexStart(), '3', '4', 'a', '3', '5', 'E',
                    L()]), ',',
            Tab(),
            Number([HexStart(), '8', '8', 'b', 'c', '9', '6', 'f',
                    L()]),
            Tab(),
            Tab(), ',',
            Number(['-', HexStart(), '3', '4', L()]), '}', ';',
            NewLine(),
            NewLine()
        ]

        self.__test_apply_preprocessors(text, expected_result)

コード例 #17

0

ファイルを表示

    def test_tabs_and_newlines_before_semicolon(self):
        input = [NewLine(),
                 SplitContainer.from_single_token('log'),
                 '.', SplitContainer.from_single_token('d'),
                 '(',
                 StringLiteral([SplitContainer.from_single_token("Hi")]),
                 ')', NewLine(), NewLine(), Tab(), Tab(), ';',
                 Number([HexStart(), '3', '4', 'a', '3', '5', 'E', L()])]

        actual = logs.mark(input, None)

        expected = [NewLine(),
                    LogStatement(SplitContainer.from_single_token('log'),
                                 SplitContainer.from_single_token('d'), DEBUG,
                                 [StringLiteral([SplitContainer.from_single_token("Hi")])],
                                 [NewLine(), NewLine(), Tab(), Tab()]),
                    Number([HexStart(), '3', '4', 'a', '3', '5', 'E', L()])]

        self.assertEqual(expected, actual)

コード例 #18

0

ファイルを表示

ファイル: loggable.py プロジェクト: hlibbabii/log-recommender

    def test_class_closing_bracket(self):
        input = [SplitContainer.from_single_token('class'), '}']

        actual = loggable.mark(input, None)

コード例 #19

0

ファイルを表示

ファイル: java.py プロジェクト: hlibbabii/log-recommender

    def test_process_comments_and_str_literals(self):
        '''
        Positive scenario

        <start>"//test_MyClass"
        //*/
        "/*!"
        /*
        /*
        <end>


        '''
        tokens = [
            Quote(),
            OneLineCommentStart(),
            SplitContainer([
                Word.from_("test"),
                Underscore(),
                Word.from_("my"),
                Word.from_("Class")
            ]),
            Quote(),
            NewLine(),
            OneLineCommentStart(),
            MultilineCommentEnd(),
            NewLine(),
            Quote(),
            MultilineCommentStart(),
            SplitContainer.from_single_token("!"),
            Quote(),
            NewLine(),
            MultilineCommentStart(),
            NewLine(),
            MultilineCommentEnd(),
            NewLine(),
        ]

        actual = process_comments_and_str_literals(tokens, {})

        expected = [
            StringLiteral([
                OneLineCommentStart(),
                SplitContainer([
                    Word.from_("test"),
                    Underscore(),
                    Word.from_("my"),
                    Word.from_("Class")
                ], )
            ]),
            NewLine(),
            OneLineComment([MultilineCommentEnd()]),
            NewLine(),
            StringLiteral([
                MultilineCommentStart(),
                SplitContainer.from_single_token("!")
            ]),
            NewLine(),
            MultilineComment([NewLine()]),
            NewLine()
        ]

        self.assertEqual(expected, actual)

コード例 #20

0

ファイルを表示

    def test_2_logs(self):
        input = [NewLine(),
                 SplitContainer.from_single_token('log'), '.', SplitContainer.from_single_token('t'),
                 '(', StringLiteral([SplitContainer.from_single_token("Hi")]), ')', ';',
                 NewLine(),
                 SplitContainer.from_single_token('Logger'), '.', SplitContainer.from_single_token('SEVERE'),
                 '(', StringLiteral([SplitContainer.from_single_token("Hi")]), ')', ';', ]

        actual = logs.mark(input, None)

        expected = [NewLine(),
                    LogStatement(SplitContainer.from_single_token('log'),
                                 SplitContainer.from_single_token('t'), TRACE,
                                 [StringLiteral([SplitContainer.from_single_token("Hi")])]),
                    NewLine(), LogStatement(SplitContainer.from_single_token('Logger'),
                                            SplitContainer.from_single_token('SEVERE'), FATAL,
                                            [StringLiteral([SplitContainer.from_single_token("Hi")])])]

        self.assertEqual(expected, actual)

コード例 #21

0

ファイルを表示

ファイル: core.py プロジェクト: hlibbabii/log-recommender

    def test_5(self):
        text = '''
// this code won't compile but the preprocessing still has to be done corrrectly
'''
        expected_result = [
            NewLine(),
            OneLineComment([
                SplitContainer.from_single_token('this'),
                SplitContainer.from_single_token('code'),
                SplitContainer.from_single_token('won'), "'",
                SplitContainer.from_single_token('t'),
                SplitContainer.from_single_token('compile'),
                SplitContainer.from_single_token('but'),
                SplitContainer.from_single_token('the'),
                SplitContainer.from_single_token('preprocessing'),
                SplitContainer.from_single_token('still'),
                SplitContainer.from_single_token('has'),
                SplitContainer.from_single_token('to'),
                SplitContainer.from_single_token('be'),
                SplitContainer.from_single_token('done'),
                SplitContainer.from_single_token('corrrectly')
            ]),
            NewLine(),
            NewLine()
        ]

        self.__test_apply_preprocessors(text, expected_result)

コード例 #22

0

ファイルを表示

ファイル: loggable.py プロジェクト: hlibbabii/log-recommender

    def test_nested_data_class(self):
        input = [
            '{',
            '}',
            MultilineComment([SplitContainer.from_single_token("class")]),
            SplitContainer.from_single_token('import'),
            SplitContainer.from_single_token("a"),
            NewLine(),
            SplitContainer.from_single_token('class'),
            SplitContainer.from_single_token('A'),
            '{',
            SplitContainer.from_single_token('void'),
            SplitContainer.from_single_token('print1'),
            '(',
            ')',
            '{',
            SplitContainer.from_single_token('if'),
            '(',
            SplitContainer.from_single_token('True'),
            ')',
            '{',
            '}',
            '}',
            SplitContainer.from_single_token('static'),
            SplitContainer.from_single_token('private'),
            SplitContainer.from_single_token('class'),
            SplitContainer.from_single_token('B'),
            SplitContainer.from_single_token('extends'),
            SplitContainer.from_single_token('D'),
            '{',
            SplitContainer.from_single_token('private'),
            SplitContainer.from_single_token('String'),
            SplitContainer.from_single_token('b'),
            ';',
            SplitContainer.from_single_token('B'),
            '(',
            ')',
            '{',
            '}',
            SplitContainer.from_single_token('static'),
            '{',
            SplitContainer.from_single_token('c'),
            '=',
            StringLiteral([SplitContainer.from_single_token('class')]),
            '.',
            SplitContainer.from_single_token('class'),
            '}',
            '}',
            SplitContainer.from_single_token('void'),
            SplitContainer.from_single_token('print'),
            '(',
            ')',
            '{',
            SplitContainer.from_single_token('if'),
            '(',
            SplitContainer.from_single_token('True'),
            ')',
            '{',
            '}',
            '}',
            SplitContainer.from_single_token('int'),
            SplitContainer.from_single_token('a'),
            ';',
            '}',
        ]

        actual = loggable.mark(input, None)

        expected = [
            '{', '}',
            MultilineComment([SplitContainer.from_single_token("class")]),
            SplitContainer.from_single_token('import'),
            SplitContainer.from_single_token("a"),
            NewLine(),
            SplitContainer.from_single_token('class'),
            SplitContainer.from_single_token('A'), '{',
            SplitContainer.from_single_token('void'),
            SplitContainer.from_single_token('print1'), '(', ')',
            LoggableBlock([
                '{',
                SplitContainer.from_single_token('if'), '(',
                SplitContainer.from_single_token('True'), ')', '{', '}', '}'
            ]),
            SplitContainer.from_single_token('static'),
            SplitContainer.from_single_token('private'),
            SplitContainer.from_single_token('class'),
            SplitContainer.from_single_token('B'),
            SplitContainer.from_single_token('extends'),
            SplitContainer.from_single_token('D'), '{',
            SplitContainer.from_single_token('private'),
            SplitContainer.from_single_token('String'),
            SplitContainer.from_single_token('b'), ';',
            SplitContainer.from_single_token('B'), '(', ')',
            LoggableBlock(['{', '}']),
            SplitContainer.from_single_token('static'),
            LoggableBlock([
                '{',
                SplitContainer.from_single_token('c'), '=',
                StringLiteral([SplitContainer.from_single_token('class')]),
                '.',
                SplitContainer.from_single_token('class'), '}'
            ]), '}',
            SplitContainer.from_single_token('void'),
            SplitContainer.from_single_token('print'), '(', ')',
            LoggableBlock([
                '{',
                SplitContainer.from_single_token('if'), '(',
                SplitContainer.from_single_token('True'), ')', '{', '}', '}'
            ]),
            SplitContainer.from_single_token('int'),
            SplitContainer.from_single_token('a'), ';', '}'
        ]

        self.assertEqual(expected, actual)

コード例 #23

0

ファイルを表示

ファイル: subword_separation.py プロジェクト: hlibbabii/log-recommender

from logrec.dataprep.model.noneng import NonEng
from logrec.dataprep.preprocessors.preprocessor_list import pp_params
from logrec.dataprep.preprocessors import apply_preprocessors
from logrec.dataprep.preprocessors.general import from_string
from logrec.dataprep.model.containers import SplitContainer, StringLiteral
from logrec.dataprep.model.logging import LogStatement, INFO
from logrec.dataprep.model.numeric import Number, DecimalPoint, E
from logrec.dataprep.model.placeholders import placeholders
from logrec.dataprep.model.word import Word, Underscore
from logrec.dataprep.prepconfig import PrepConfig
from logrec.dataprep.split.ngram import NgramSplitConfig, NgramSplittingType
from logrec.dataprep.to_repr import to_repr

test_cases = {
    "create": (
        [SplitContainer.from_single_token("create")],
        ["create"],
    ),
    "Vector": (
        [SplitContainer.from_single_token("Vector")],
        [placeholders["capital"], "vector"],
    ),
    "players": (
        [SplitContainer.from_single_token("players")],
        [placeholders["word_start"], 'play', 'er', 's', placeholders["word_end"]]
    ),
    "0.345e+4": (
        [Number(["0", DecimalPoint(), "3", "4", "5", E(), "+", "4"])],
        [placeholders["word_start"], "0.", "3", "4", "5", "e+", "4", placeholders["word_end"]]
    ),
    "bestPlayers": (