def test_tokenize(self):

        base_cases = (
            ('', [''], ''),
            ('a', ['a'], ''),
            ('a b', ['a', 'b'], ''),
            (' a', ['', 'a'], ''),
            ('b ', ['b', ''], ''),
            ('abc def gh', ['abc', 'def', 'gh'], ''),
            ('"ab cd"', ['"ab cd"'], ''),
            ('"ab  cd"', ['"ab  cd"'], 'multiple space inside quotes'),
            ('"ab cd" ', ['"ab cd"', ''], ''),
            (' "ab cd"', ['', '"ab cd"'], ''),
            ('"ab cd" "x', ['"ab cd"'], 'discard incomplete'),
            ('"ab cd" "x y"', ['"ab cd"', '"x y"'], ''),

            ('foo "ab cd" "x y"', ['foo', '"ab cd"', '"x y"'], ''),
            ('foo "ab cd" "x', ['foo', '"ab cd"'], 'discard incomplete'),

            ('foo "\\"ab cd" "x', ['foo', '""ab cd"'], 'escape "'),
            ('foo "\\\\"ab cd" "x', ['foo', '"\\"ab', 'cd" "x'], 'escape \\'),
            ('foo "\\\\\\"ab cd" "x', ['foo', '"\\"ab cd"'], 'escape \\ "'),

            ('a \\"bc "d e" "f',  ['a', '"bc', '"d e"'], ''),
            ('a \\\\"bc "d e" "f',  ['a', '\\"bc "d', 'e" "f'], ''),
            ('a \\\\\\"bc "d e" "f',  ['a', '\\"bc', '"d e"'], ''),

            ('a "bc "d \\"f',  ['a', '"bc "d', '"f'], ''),
            ('a "bc "d \\\\"f',  ['a', '"bc "d'], ''),
            ('a "bc "d \\\\\\"f',  ['a', '"bc "d', '\\"f'], ''),

            ('\\"bc "d "f',  ['"bc', '"d "f'], ''),
            ('\\\\"bc "d "f',  ['\\"bc "d'], ''),
            ('\\\\\\"bc "d "f',  ['\\"bc', '"d "f'], ''),

            ('a "bc "d f\\"',  ['a', '"bc "d', 'f"'], ''),
            ('a "bc "d f\\\\"',  ['a', '"bc "d'], ''),
            ('a "bc "d f\\\\\\"',  ['a', '"bc "d', 'f\\"'], ''),
        )

        for _in, _out, _mes in base_cases:
            rst = strutil.tokenize(_in, sep=' ', preserve=True)
            self.assertEqual(_out, rst,
                             ('input: {_in}, output: {_out}, expected: {rst},'
                              ' message: {_mes}').format(
                                 _in=repr(_in),
                                 _out=repr(_out),
                                 rst=repr(rst),
                                 _mes=_mes
                             ))

        sep_cases = (
            ('',              None,    True,   []),
            (' a  b  c ',     None,    True,   ['a', 'b', 'c']),
            (' a  "b  c" ',   None,    True,   ['a', '"b  c"']),
            (' a  "b  c" ',   None,    False,  ['a', 'b  c']),
            ('a b c',         None,    True,   ['a', 'b', 'c']),
            ('"a b c"',       None,    True,   ['"a b c"']),
            ('"a b c"',       None,    False,  ['a b c']),
            ('a b"c d"',      None,    True,   ['a', 'b"c d"']),
            ('a b"c d"',      None,    False,  ['a', 'bc d']),
            ('a bcd',         'bc',    True,   ['a ', 'd']),
            ('a "bc" d',      'bc',    True,   ['a "bc" d']),
            ('a "bc" d',      'bc',    False,  ['a bc d']),
            ('abcd',          'abcd',  True,   ['', '']),
        )

        for line, sep, preserve, rst_expected in sep_cases:
            dd('in: ', line, sep)
            rst = strutil.tokenize(line, sep=sep, quote='"', preserve=preserve)
            dd('out: ', rst)
            self.assertEqual(rst, rst_expected)

        preserve_cases = (
            ('""',                   '"',    True,    ['""']),
            ('""',                   '"',    False,   ['']),
            ('abc xd efx gh',        'x',    True,    ['abc', 'xd efx', 'gh']),
            ('abc xd efx gh',        'x',    False,   ['abc', 'd ef', 'gh']),
            ('ab cxd efx gh',        'x',    True,    ['ab', 'cxd efx', 'gh']),
            ('ab cxd efx gh',        'x',    False,   ['ab', 'cd ef', 'gh']),
            ('ab cxd efxgh',         'x',    True,    ['ab', 'cxd efxgh']),
            ('ab cxd efxgh',         'x',    False,   ['ab', 'cd efgh']),
            ('ab cxd yey fx gh',     'xy',   True,    ['ab', 'cxd yey fx', 'gh']),
            ('ab cxd yey fx gh',     'xy',   False,   ['ab', 'cd yey f', 'gh']),
            ('ab cxd yey f gh',      'xy',   True,    ['ab']),
            ('ab cxd yey f gh',      'xy',   False,   ['ab']),
            ('ab cxd xex f gh',      'x',    True,    ['ab']),
            ('ab cxd xex f gh',      'x',    False,   ['ab']),
        )

        for line, quote, preserve, rst_expected in preserve_cases:
            dd('in: ', line, quote, preserve)
            rst = strutil.tokenize(line, sep=' ', quote=quote, preserve=preserve)
            dd('out: ', rst)
            self.assertEqual(rst, rst_expected)
Exemple #2
0
    def test_tokenize(self):

        base_cases = (
            ('', [''], ''),
            ('a', ['a'], ''),
            ('a b', ['a', 'b'], ''),
            (' a', ['', 'a'], ''),
            ('b ', ['b', ''], ''),
            ('abc def gh', ['abc', 'def', 'gh'], ''),
            ('"ab cd"', ['"ab cd"'], ''),
            ('"ab  cd"', ['"ab  cd"'], 'multiple space inside quotes'),
            ('"ab cd" ', ['"ab cd"', ''], ''),
            (' "ab cd"', ['', '"ab cd"'], ''),
            ('"ab cd" "x', ['"ab cd"'], 'discard incomplete'),
            ('"ab cd" "x y"', ['"ab cd"', '"x y"'], ''),
            ('foo "ab cd" "x y"', ['foo', '"ab cd"', '"x y"'], ''),
            ('foo "ab cd" "x', ['foo', '"ab cd"'], 'discard incomplete'),
            ('foo "\\"ab cd" "x', ['foo', '""ab cd"'], 'escape "'),
            ('foo "\\\\"ab cd" "x', ['foo', '"\\"ab', 'cd" "x'], 'escape \\'),
            ('foo "\\\\\\"ab cd" "x', ['foo', '"\\"ab cd"'], 'escape \\ "'),
            ('a \\"bc "d e" "f', ['a', '"bc', '"d e"'], ''),
            ('a \\\\"bc "d e" "f', ['a', '\\"bc "d', 'e" "f'], ''),
            ('a \\\\\\"bc "d e" "f', ['a', '\\"bc', '"d e"'], ''),
            ('a "bc "d \\"f', ['a', '"bc "d', '"f'], ''),
            ('a "bc "d \\\\"f', ['a', '"bc "d'], ''),
            ('a "bc "d \\\\\\"f', ['a', '"bc "d', '\\"f'], ''),
            ('\\"bc "d "f', ['"bc', '"d "f'], ''),
            ('\\\\"bc "d "f', ['\\"bc "d'], ''),
            ('\\\\\\"bc "d "f', ['\\"bc', '"d "f'], ''),
            ('a "bc "d f\\"', ['a', '"bc "d', 'f"'], ''),
            ('a "bc "d f\\\\"', ['a', '"bc "d'], ''),
            ('a "bc "d f\\\\\\"', ['a', '"bc "d', 'f\\"'], ''),
        )

        for _in, _out, _mes in base_cases:
            rst = strutil.tokenize(_in, sep=' ', preserve=True)
            self.assertEqual(_out, rst,
                             ('input: {_in}, output: {_out}, expected: {rst},'
                              ' message: {_mes}').format(_in=repr(_in),
                                                         _out=repr(_out),
                                                         rst=repr(rst),
                                                         _mes=_mes))

        sep_cases = (
            ('', None, True, []),
            (' a  b  c ', None, True, ['a', 'b', 'c']),
            (' a  "b  c" ', None, True, ['a', '"b  c"']),
            (' a  "b  c" ', None, False, ['a', 'b  c']),
            ('a b c', None, True, ['a', 'b', 'c']),
            ('"a b c"', None, True, ['"a b c"']),
            ('"a b c"', None, False, ['a b c']),
            ('a b"c d"', None, True, ['a', 'b"c d"']),
            ('a b"c d"', None, False, ['a', 'bc d']),
            ('a bcd', 'bc', True, ['a ', 'd']),
            ('a "bc" d', 'bc', True, ['a "bc" d']),
            ('a "bc" d', 'bc', False, ['a bc d']),
            ('abcd', 'abcd', True, ['', '']),
        )

        for line, sep, preserve, rst_expected in sep_cases:
            dd('in: ', line, sep)
            rst = strutil.tokenize(line, sep=sep, quote='"', preserve=preserve)
            dd('out: ', rst)
            self.assertEqual(rst, rst_expected)

        preserve_cases = (
            ('""', '"', True, ['""']),
            ('""', '"', False, ['']),
            ('abc xd efx gh', 'x', True, ['abc', 'xd efx', 'gh']),
            ('abc xd efx gh', 'x', False, ['abc', 'd ef', 'gh']),
            ('ab cxd efx gh', 'x', True, ['ab', 'cxd efx', 'gh']),
            ('ab cxd efx gh', 'x', False, ['ab', 'cd ef', 'gh']),
            ('ab cxd efxgh', 'x', True, ['ab', 'cxd efxgh']),
            ('ab cxd efxgh', 'x', False, ['ab', 'cd efgh']),
            ('ab cxd yey fx gh', 'xy', True, ['ab', 'cxd yey fx', 'gh']),
            ('ab cxd yey fx gh', 'xy', False, ['ab', 'cd yey f', 'gh']),
            ('ab cxd yey f gh', 'xy', True, ['ab']),
            ('ab cxd yey f gh', 'xy', False, ['ab']),
            ('ab cxd xex f gh', 'x', True, ['ab']),
            ('ab cxd xex f gh', 'x', False, ['ab']),
        )

        for line, quote, preserve, rst_expected in preserve_cases:
            dd('in: ', line, quote, preserve)
            rst = strutil.tokenize(line,
                                   sep=' ',
                                   quote=quote,
                                   preserve=preserve)
            dd('out: ', rst)
            self.assertEqual(rst, rst_expected)
Exemple #3
0
    print t

    for p in xrange(0, 200):
        print strutil.colorize(p, 100),
    print 
    for p in xrange(0, 100):
        print strutil.colorize(p, 10),
    print
    print strutil.colorize(22, 100, '{0:>10}%')
    
    str_linestr = 'adw\nni\nleng\n'
    print strutil.line_pad(str_linestr, 'hehe' * 4)

    print strutil.format_line([["name:", "age:"], ["drdrxp", "18"], "wow"], sep= 
    " | ", aligns="lll")

    lines = 'cheng yang 180'
    lines += 'zhang xixi 150'
    lines += 'liu xiang 170'
    lines += 'yao ming 160'
    print strutil.sharding(lines, size=160, accuracy=30)
    
    a = [1, 2, 3, 4, 5, 6, 7, 8]
    for l in strutil.struct_repr(a):
        print l
    print

    print strutil.tokenize(' a\t b\n c\r ')
    print strutil.tokenize('a bxyc d', sep='xy')