Exemple #1
0
    def test_make_program_from_patterns_good(self):
        buf = b'aaaaabb a+b'

        patlist = [("a+b", ['UTF-8'], lightgrep.KeyOpts()),
                   ("a+b", ['UTF-8'], lightgrep.KeyOpts(fixedString=True))]

        progOpts = lightgrep.ProgOpts()
        with lightgrep.make_program_from_patterns(patlist, progOpts) as prog:
            with lightgrep.Context(prog, lightgrep.CtxOpts()) as ctx:
                acc = lightgrep.HitAccumulator()
                ctx.searchBuffer(buf, acc)

        exp_hits = [{
            'start': 0,
            'end': 6,
            'keywordIndex': 0,
            'pattern': 'a+b',
            'encChain': 'UTF-8'
        }, {
            'start': 8,
            'end': 11,
            'keywordIndex': 1,
            'pattern': 'a+b',
            'encChain': 'UTF-8'
        }]

        self.assertEqual(acc.Hits, exp_hits)
Exemple #2
0
 def test_count_three(self):
     self.pat.parse("a+b", lightgrep.KeyOpts())
     self.fsm.add_pattern(self.prog, self.pat, 'UTF-8', 42)
     self.pat.parse("foo", lightgrep.KeyOpts())
     self.fsm.add_pattern(self.prog, self.pat, 'UTF-8', 1)
     self.pat.parse(".+", lightgrep.KeyOpts())
     self.fsm.add_pattern(self.prog, self.pat, 'UTF-8', 75)
     self.assertEqual(self.prog.count(), 3)
Exemple #3
0
    def test_size(self):
        with lightgrep.Program(0) as prog:
            with lightgrep.Pattern() as pat:
                with lightgrep.Fsm(0) as fsm:
                    pat.parse("a+b", lightgrep.KeyOpts())
                    fsm.add_pattern(prog, pat, 'UTF-8', 42)
                    prog.compile(fsm, lightgrep.ProgOpts())

            self.assertTrue(prog.size() > 0)
Exemple #4
0
    def setUp(self):
        self.prog = lightgrep.Program(0)
        self.addCleanup(self.prog.close)

        with lightgrep.Pattern() as pat:
            with lightgrep.Fsm(0) as fsm:
                pat.parse("a+b", lightgrep.KeyOpts())
                fsm.add_pattern(self.prog, pat, 'UTF-8', 42)
                self.prog.compile(fsm, lightgrep.ProgOpts())

        self.ctx = lightgrep.Context(self.prog, lightgrep.CtxOpts())
        self.addCleanup(self.ctx.close)
Exemple #5
0
    def test_write_read(self):
        with lightgrep.Program(0) as prog1:
            with lightgrep.Pattern() as pat:
                with lightgrep.Fsm(0) as fsm:
                    pat.parse("a+b", lightgrep.KeyOpts())
                    fsm.add_pattern(prog1, pat, 'UTF-8', 42)
                    prog1.compile(fsm, lightgrep.ProgOpts())

            buf = prog1.write()

            with lightgrep.Program(buf) as prog2:
                self.assertEqual(prog2.count(), prog1.count())
                self.assertEqual(prog2.size(), prog1.size())
Exemple #6
0
 def test_flags(self):
     for fixed_string in (False, True):
         for case_insensitive in (False, True):
             for unicode_mode in (False, True):
                 with self.subTest(fixed_string=fixed_string,
                                   case_insensitive=case_insensitive):
                     opts = lightgrep.KeyOpts(
                         fixedString=fixed_string,
                         caseInsensitive=case_insensitive,
                         unicodeMode=unicode_mode)
                     self.assertEqual(opts.isFixed(), fixed_string)
                     self.assertEqual(opts.isCaseSensitive(),
                                      not case_insensitive)
                     self.assertEqual(opts.isUnicodeMode(), unicode_mode)
import lightgrep as lg

# Using the with statement correctly releases lightgrep resources when block
# closes; better to loop over files/string within the lightgrep with statement,
# of course, as Lightgrep initialization is relatively heavyweight
searchString = "hello, World O'Sullivan, please don't bl0w up Nain s\\09-123/12-002 s\\EU-12-23 s\\AU-13-059 "
testString = "hello, World"
searchData = searchString.encode('utf-8')
testData = testString.encode('utf-8')
print(f"searchString: {searchString}")
keys = [
    ("hello", ["UTF-8", "ISO-8859-1"],
     lg.KeyOpts(fixedString=True, caseInsensitive=False)),
    ("world", ["UTF-8"], lg.KeyOpts(fixedString=True, caseInsensitive=True)),
    # bl0w
    ("bl\\dw", ["UTF-8"], lg.KeyOpts(fixedString=False, caseInsensitive=True)),
    ("[^a-z]+", ["UTF-8"], lg.KeyOpts(fixedString=False,
                                      caseInsensitive=True)),
    # Backslash must be escaped once for Python, and again for Lightgrep
    ("s\\\\((A|E)U\\-)?\\d{1,3}-\\d{1,4}[^a-zA-Z0-9]", ["UTF-8"],
     lg.KeyOpts(fixedString=False, caseInsensitive=False)),
    # Or you can use an r-string to avoid Python escaping
    (r'\\[\d-]+', ["UTF-8"],
     lg.KeyOpts(fixedString=False, caseInsensitive=False))
]

# Using with to open a Lightgrep object and  perform a search,
# passing keys and callback at init.
print("============================")
print("Results using 'with'")
withHits = lg.HitAccumulator()
Exemple #8
0
 def test_add_pattern_good(self):
     self.pat.parse("a+b", lightgrep.KeyOpts())
     idx = self.fsm.add_pattern(self.prog, self.pat, 'UTF-8', 42)
     self.assertEqual(idx, 0)
Exemple #9
0
 def test_add_pattern_bad_args(self):
     # fuzz add_pattern()
     self.pat.parse("a+b", lightgrep.KeyOpts())
     arglist = [self.prog, self.pat, 'UTF-8', 42]
     subs = (None, 'bogus')
     fuzz_it(self, self.fsm.add_pattern, arglist, subs)
Exemple #10
0
 def test_add_pattern_closed_pat(self):
     self.pat.parse("a+b", lightgrep.KeyOpts())
     self.pat.close()
     with self.assertRaises(RuntimeError):
         self.fsm.add_pattern(self.prog, self.pat, 'UTF-8', 42)
Exemple #11
0
 def test_parse_bad_args(self):
     arglist = ["a", lightgrep.KeyOpts()]
     subs = (None, '*')
     fuzz_it(self, self.pat.parse, arglist, subs)
Exemple #12
0
 def test_parse_closed(self):
     self.pat.close()
     with self.assertRaises(RuntimeError):
         self.pat.parse("a", lightgrep.KeyOpts())
Exemple #13
0
 def test_parse_bad(self):
     with self.assertRaises(RuntimeError):
         self.pat.parse("+", lightgrep.KeyOpts())
Exemple #14
0
 def test_pattern_good(self):
     self.pat.parse("a+b", lightgrep.KeyOpts())
Exemple #15
0
#!/usr/bin/python3

import array
import ctypes
import mmap
import unittest

import lightgrep

PATLIST = [("a+b", ['UTF-8',
                    'iso-8859-1'], lightgrep.KeyOpts(caseInsensitive=True)),
           ("a+b", ['UTF-8',
                    'iso-8859-1'], lightgrep.KeyOpts(fixedString=True))]


def fuzz_args(arglist, subs):
    for i in range(0, len(arglist)):
        args = arglist.copy()
        for s in subs:
            args[i] = s
            yield args


def fuzz_it(testobj, func, arglist, subs):
    for args in fuzz_args(arglist, subs):
        with testobj.subTest(args=args):
            with testobj.assertRaises(Exception):
                func(*args)


class PointerTests(unittest.TestCase):