예제 #1
0
 def setUp(self):
     self.engine = rosie.engine(librosiedir)
     self.assertTrue(self.engine)
     ok, pkgname, errs = self.engine.import_pkg(b'net')
     self.assertTrue(ok)
     self.net_any, errs = self.engine.compile(b'net.any')
     self.assertTrue(self.net_any)
예제 #2
0
 def setUp(self):
     self.engine = rosie.engine(librosiedir)
     assert (self.engine)
     ok, pkgname, errs = self.engine.import_pkg('net')
     assert (ok)
     self.net_any, errs = self.engine.compile("net.any")
     assert (self.net_any)
예제 #3
0
 def setUp(self):
     rosie.load(librosiedir, quiet=True)
     self.engine = rosie.engine()
     self.assertTrue(self.engine)
     ok, pkgname, errs = self.engine.import_pkg(b'net')
     self.assertTrue(ok)
     self.net_any, errs = self.engine.compile(b"net.any")
     self.assertTrue(self.net_any)
     self.findall_net_any, errs = self.engine.compile(b"findall:net.any")
     self.assertTrue(self.findall_net_any)
예제 #4
0
 def __init__(self):
     # Check to see if user prefers to use their own installation
     # of rosie, or one already installed on their system.
     rosie_home = os.getenv('ROSIE_HOME')
     if rosie_home:
         rosie.load(os.path.join(rosie_home, 'src/librosie/local'))
     self.engine = rosie.engine()
     self.engine.import_pkg(b'all')
     self.engine.import_pkg(b'csv')
     self.csv_pattern, errs = self.engine.compile(b'csv.comma')
     self.all_pattern, errs = self.engine.compile(b'all.things')
예제 #5
0
 def loadRosieEngine(self):
     librosiedir = './lib'
     rosie.load(librosiedir, quiet=True)
     engine = rosie.engine()
     engine.import_package("date")
     self.date_patterns = engine.compile("date.any")
     engine.load("short_year = [0-9]{ 2 }")
     self.date_check = [
         engine.compile(
             "{ { \"APR\" / \"OCT\" / \"MARCH\" / \"SEPT\" } [:digit:]* }"),
         engine.compile(
             "{ { date.month { \"-\" / [/] } short_year} / { date.day { \"-\" / [/] } date.month } }"
         )
     ]
예제 #6
0
    def test(self):
        ok, pkgname, errs = self.engine.load(b'package x; foo = "foo"')

        self.assertTrue(ok)
        self.assertTrue(pkgname == b"x")
        self.assertTrue(errs == None)

        b, errs = self.engine.compile(b"x.foo")
        self.assertTrue(b.valid())
        self.assertTrue(errs == None)

        bb, errs = self.engine.compile(b"[:digit:]+")
        self.assertTrue(bb.valid())
        self.assertTrue(errs == None)
        self.assertTrue(b.id[0] != bb.id[0])

        b2, errs = self.engine.compile(b"[:foobar:]+")
        self.assertTrue(not b2)
        errlist = json.loads(errs)
        self.assertTrue(len(errlist) > 0)
        err = errlist[0]
        self.assertTrue(err['message'])
        self.assertTrue(err['who'] == 'compiler')

        b = None  # trigger call to librosie to gc the compiled pattern
        b, errs = self.engine.compile(b"[:digit:]+")
        self.assertTrue(
            b.id[0] != bb.id[0])  # distinct values for distinct patterns
        self.assertTrue(errs == None)

        num_int, errs = self.engine.compile(b"num.int")
        self.assertTrue(not num_int)
        errlist = json.loads(errs)
        err = errlist[0]
        self.assertTrue(err['message'])
        self.assertTrue(err['who'] == 'compiler')

        ok, pkgname, errs = self.engine.load(b'foo = "')
        self.assertTrue(not ok)
        errlist = json.loads(errs)
        err = errlist[0]
        self.assertTrue(err['message'])
        self.assertTrue(err['who'] == 'parser')

        engine2 = rosie.engine()
        self.assertTrue(engine2)
        self.assertTrue(engine2 != self.engine)
        engine2 = None  # triggers call to librosie to gc the engine
예제 #7
0
def readFile():
    librosiedir = './lib'
    rosie.load(librosiedir, quiet=True)
    engine = rosie.engine()
    package = engine.import_package('all')
    allPackages = ['ts', 'date', 'time', 'net', 'num', 'id', 'word']
    arrayOfPatterns = None
    for pack in allPackages:
        engine.import_package(pack)
    with open('pattern.csv', 'r') as csvfile:
        fileRead = csv.reader(csvfile)
        rowOne = next(fileRead)
        cols = len(rowOne)
        arrayOfPatterns = [set() for j in range(0, cols)]
        for row in fileRead:
            for i in range(0, cols):
                element = row[i]
                stringBuilder = ""
                match = None
                b = None
                if len(arrayOfPatterns[i]) > 0:
                    for each in arrayOfPatterns[i]:
                        if (match != None):
                            break
                        stringBuilder += each + '/'
                        b = engine.compile(each)
                        match = b.fullmatch(element)
                if match == None:
                    stringBuilder = 'all.thing'
                    b = engine.compile('all.thing')
                    match = b.fullmatch(element)
                if match is None:
                    print('NO MATCH', element)
                    continue
                best_match = None
                if stringBuilder == 'all.thing':
                    best_match = rosieSub(match.rosie_match)
                    print(best_match['type'], ',', best_match['data'])
                if best_match != None and best_match['type'] == 'all.thing':
                    continue
                elif best_match != None:
                    arrayOfPatterns[i].add(best_match['type'])
                    print(arrayOfPatterns)

            print()

    return arrayOfPatterns
예제 #8
0
    def test(self):
        ok, pkgname, errs = self.engine.load('package x; foo = "foo"')
        assert (ok)
        assert (pkgname == "x")
        assert (errs == None)

        b, errs = self.engine.compile("x.foo")
        assert (b[0] > 0)
        assert (errs == None)

        bb, errs = self.engine.compile("[:digit:]+")
        assert (bb[0] > 0)
        assert (errs == None)
        assert (b[0] != bb[0])

        b2, errs = self.engine.compile("[:foobar:]+")
        assert (not b2)
        errlist = json.loads(errs)
        assert (len(errlist) > 0)
        err = errlist[0]
        assert (err['message'])
        assert (err['who'] == 'compiler')

        b = None  # triggers call to librosie to gc the compiled pattern
        b, errs = self.engine.compile("[:digit:]+")
        assert (b[0] != bb[0])  # distinct values for distinct patterns
        assert (errs == None)

        num_int, errs = self.engine.compile("num.int")
        assert (not num_int)
        errlist = json.loads(errs)
        err = errlist[0]
        assert (err['message'])
        assert (err['who'] == 'compiler')

        ok, pkgname, errs = self.engine.load('foo = "')
        assert (not ok)
        errlist = json.loads(errs)
        err = errlist[0]
        assert (err['message'])
        assert (err['who'] == 'parser')

        engine2 = rosie.engine(librosiedir)
        assert (engine2)
        assert (engine2 != self.engine)
        engine2 = None  # triggers call to librosie to gc the engine
예제 #9
0
 def date_format(element):
     #print(element)
     librosiedir = './lib'
     rosie.load(librosiedir, quiet=True)
     engine = rosie.engine()
     engine.import_package("date")
     date_patterns = engine.compile("date.any")
     match = date_patterns.fullmatch(element).rosie_match
     type_of_format = match['subs'][0]['type']
     #print(type_of_format)
     if type_of_format == "date.us_long":
         #print("us long")
         if match['subs'][0]['subs'][0]['type'] == "date.day_name":
             return match['subs'][0]['subs'][1]['data'] + " " + match[
                 'subs'][0]['subs'][2]['data'] + ", " + match['subs'][
                     0]['subs'][3]['data']
         else:
             return match['subs'][0]['subs'][0]['data'] + " " + match[
                 'subs'][0]['subs'][1]['data'] + ", " + match['subs'][
                     0]['subs'][2]['data']
     elif type_of_format == "date.eur":
         #print("europe")
         return match['subs'][0]['subs'][1]['data'] + "/" + match[
             'subs'][0]['subs'][0]['data'] + "/" + match['subs'][0][
                 'subs'][2]['data']
     elif type_of_format == "date.spaced":
         #print("spaced")
         return element.replace(" ", "/")
     elif type_of_format == "date.spaced_en" or (
             type_of_format == "date.rfc2822" and "," not in element):
         #print("eng or rfc")
         return match['subs'][0]['subs'][1]['data'] + " " + match[
             'subs'][0]['subs'][2]['data'] + ", " + match['subs'][0][
                 'subs'][0]['data']
     elif type_of_format == "date.rfc2822" and "," in element:
         #print("big rfc")
         index = element.rindex(",")
         return date_format(element[index + 1:].strip())
     elif type_of_format == "date.us_short":
         #print(match['subs'][0]['subs'])
         return match['subs'][0]['subs'][1]['data'] + " " + match[
             'subs'][0]['subs'][0]['data'] + ", " + match['subs'][0][
                 'subs'][2]['data']
     return element
예제 #10
0
 def setUp(self):
     self.engine = rosie.engine(librosiedir)
     self.assertTrue(self.engine)
예제 #11
0
 def test(self):
     engine = rosie.engine(librosiedir)
예제 #12
0
def readFile(data_file):
    #     librosiedir = './lib'
    #     rosie.load(librosiedir, quiet=True)
    engine = rosie.engine()
    engine.import_package("date")
    date_patterns = engine.compile("date.any")
    info = {}
    with open(data_file, 'r') as csvfile:
        content = csvfile.read()
        is_sylk = False
        if content[0:2] == "ID":
            is_sylk = True
        info["SYLK"] = is_sylk
        csvfile.seek(0)
        dial = csv.Sniffer().sniff(csvfile.read(), delimiters=';,| \t')
        info["DUTCHSEP"] = dial.delimiter
        csvfile.seek(0)
        fileRead = csv.reader(csvfile, dialect=dial)
        keys = next(fileRead)
        print("CSV file has these column headings: {}".format(keys))
        cols = len(keys)
        rows = 0
        date_found = []
        num_found = []
        injection_found = []
        all = []
        date_counter = [0] * cols
        actual_date_counter = [0] * cols
        total_counter = [0] * cols
        date_check = engine.compile(
            "{ { \"APR\" / \"OCT\" / \"MARCH\" / \"SEPT\" } [:digit:]* }")
        num_check = engine.compile("{ [0]+ [:digit:]+ } / [0]+")
        engine.load("short_year = [0-9]{ 2 }")
        excel_check = engine.compile(
            "{ { date.month { \"-\" / [/] } short_year } / { date.day { \"-\" / [/] } date.month } }"
        )
        limit = r"[:digit:]{15,}"
        num_check1 = engine.compile(limit)
        #num_check2 = engine.compile("{ [:digit:]+ {\"e\" / \"E\"} {[\\-]? [:digit:]*}}")
        for row in fileRead:
            rows += 1
            for i in range(0, cols):
                element = row[i]
                #print(element)
                if element != None:
                    total_counter[i] += 1
                if date_patterns.fullmatch(element) != None:
                    actual_date_counter[i] += 1
                    all.append({
                        "row_no": rows,
                        "col_no": i + 1,
                        "data": element,
                        "type": "ACTUALDATE"
                    })
                if num_check.fullmatch(
                        element) != None or num_check1.fullmatch(
                            element) != None:
                    num_found.append({
                        "row_no": rows,
                        "col_no": i + 1,
                        "data": element
                    })
                    all.append({
                        "row_no": rows,
                        "col_no": i + 1,
                        "data": element,
                        "type": "BIGNUM"
                    })
                if date_check.fullmatch(element.upper(
                )) != None or excel_check.fullmatch(element) != None:
                    date_counter[i] += 1
                    date_found.append({
                        "row_no": rows,
                        "col_no": i + 1,
                        "data": element
                    })
                    all.append({
                        "row_no": rows,
                        "col_no": i + 1,
                        "data": element,
                        "type": "NOTADATE"
                    })
                if element[0] == "=":
                    injection_found.append({
                        "row_no": rows,
                        "col_no": i + 1,
                        "data": element
                    })
                    all.append({
                        "row_no": rows,
                        "col_no": i + 1,
                        "data": element,
                        "type": "INJECTION"
                    })
        info["ROWS"] = rows
        info["COLUMNS"] = cols
        info["INJECTION"] = injection_found
        info["NOTADATE"] = date_found
        info["BIGNUM"] = num_found
        info["DATESTAT"] = [[
            a / c, b / c
        ] for a, b, c in zip(date_counter, actual_date_counter, total_counter)]
        print(info["DATESTAT"])
    with open(data_file[0:len(data_file) - 4] + "_sorted.txt", 'w') as file:
        json.dump(all, file)
    return info
예제 #13
0
import rosie
engine = rosie.engine(
)  # or rosie.engine(librosiedir) if using a local (non-system) installation
ok, pkgname, errs = engine.import_pkg('net')
net_any, errs = engine.compile("net.any")

match, leftover, abend, t0, t1 = engine.match(net_any, "1.2.3.4", 1, "color")
if leftover != 0: print "There were", leftover, "characters left over"
print "Match was:", match
예제 #14
0
 def setUp(self):
     rosie.load(librosiedir, quiet=True)
     self.engine = rosie.engine()
예제 #15
0
 def test(self):
     rosie.load(librosiedir, quiet=True)
     engine = rosie.engine()
     assert (engine)
     path = rosie.librosie_path()
     assert (path)
예제 #16
0
 def test(self):
     engine = rosie.engine(librosiedir)
     assert(engine)
예제 #17
0
 def setUp(self):
     self.engine = rosie.engine(librosiedir)
예제 #18
0
#  © Copyright Jamie A. Jennings 2018.
#  LICENSE: MIT License (https://opensource.org/licenses/mit-license.html)
#  AUTHOR: Jamie A. Jennings

# Example:
# python generic_sloc.py "--" ../../src/core/*.lua

import sys
if len(sys.argv) < 2:
    print("Usage: " + sys.argv[0] + " <comment_start> [files ...]")
    sys.exit(-1)

comment_start = sys.argv[1]

import rosie
engine = rosie.engine()
source_line, errs = engine.compile(bytes('!{[:space:]* "' + comment_start + '"/$}'))
if errs:
    print(str(errs))
    sys.exit(-1)

def is_source(line):
    if not line: return False
    match, leftover, abend, t0, t1 = engine.match(source_line, bytes(line), 1, b"bool")
    return match and True or False

def count(f):
    count = 0
    for line in f:
        if is_source(line): count += 1
    return count