def setUp(self): self.engine = rosie.engine(librosiedir) self.assertTrue(self.engine) ok, pkgname, errs = self.engine.import_pkg(b'net') self.assertTrue(ok) self.net_any, errs = self.engine.compile(b'net.any') self.assertTrue(self.net_any)
def setUp(self): self.engine = rosie.engine(librosiedir) assert (self.engine) ok, pkgname, errs = self.engine.import_pkg('net') assert (ok) self.net_any, errs = self.engine.compile("net.any") assert (self.net_any)
def setUp(self): rosie.load(librosiedir, quiet=True) self.engine = rosie.engine() self.assertTrue(self.engine) ok, pkgname, errs = self.engine.import_pkg(b'net') self.assertTrue(ok) self.net_any, errs = self.engine.compile(b"net.any") self.assertTrue(self.net_any) self.findall_net_any, errs = self.engine.compile(b"findall:net.any") self.assertTrue(self.findall_net_any)
def __init__(self): # Check to see if user prefers to use their own installation # of rosie, or one already installed on their system. rosie_home = os.getenv('ROSIE_HOME') if rosie_home: rosie.load(os.path.join(rosie_home, 'src/librosie/local')) self.engine = rosie.engine() self.engine.import_pkg(b'all') self.engine.import_pkg(b'csv') self.csv_pattern, errs = self.engine.compile(b'csv.comma') self.all_pattern, errs = self.engine.compile(b'all.things')
def loadRosieEngine(self): librosiedir = './lib' rosie.load(librosiedir, quiet=True) engine = rosie.engine() engine.import_package("date") self.date_patterns = engine.compile("date.any") engine.load("short_year = [0-9]{ 2 }") self.date_check = [ engine.compile( "{ { \"APR\" / \"OCT\" / \"MARCH\" / \"SEPT\" } [:digit:]* }"), engine.compile( "{ { date.month { \"-\" / [/] } short_year} / { date.day { \"-\" / [/] } date.month } }" ) ]
def test(self): ok, pkgname, errs = self.engine.load(b'package x; foo = "foo"') self.assertTrue(ok) self.assertTrue(pkgname == b"x") self.assertTrue(errs == None) b, errs = self.engine.compile(b"x.foo") self.assertTrue(b.valid()) self.assertTrue(errs == None) bb, errs = self.engine.compile(b"[:digit:]+") self.assertTrue(bb.valid()) self.assertTrue(errs == None) self.assertTrue(b.id[0] != bb.id[0]) b2, errs = self.engine.compile(b"[:foobar:]+") self.assertTrue(not b2) errlist = json.loads(errs) self.assertTrue(len(errlist) > 0) err = errlist[0] self.assertTrue(err['message']) self.assertTrue(err['who'] == 'compiler') b = None # trigger call to librosie to gc the compiled pattern b, errs = self.engine.compile(b"[:digit:]+") self.assertTrue( b.id[0] != bb.id[0]) # distinct values for distinct patterns self.assertTrue(errs == None) num_int, errs = self.engine.compile(b"num.int") self.assertTrue(not num_int) errlist = json.loads(errs) err = errlist[0] self.assertTrue(err['message']) self.assertTrue(err['who'] == 'compiler') ok, pkgname, errs = self.engine.load(b'foo = "') self.assertTrue(not ok) errlist = json.loads(errs) err = errlist[0] self.assertTrue(err['message']) self.assertTrue(err['who'] == 'parser') engine2 = rosie.engine() self.assertTrue(engine2) self.assertTrue(engine2 != self.engine) engine2 = None # triggers call to librosie to gc the engine
def readFile(): librosiedir = './lib' rosie.load(librosiedir, quiet=True) engine = rosie.engine() package = engine.import_package('all') allPackages = ['ts', 'date', 'time', 'net', 'num', 'id', 'word'] arrayOfPatterns = None for pack in allPackages: engine.import_package(pack) with open('pattern.csv', 'r') as csvfile: fileRead = csv.reader(csvfile) rowOne = next(fileRead) cols = len(rowOne) arrayOfPatterns = [set() for j in range(0, cols)] for row in fileRead: for i in range(0, cols): element = row[i] stringBuilder = "" match = None b = None if len(arrayOfPatterns[i]) > 0: for each in arrayOfPatterns[i]: if (match != None): break stringBuilder += each + '/' b = engine.compile(each) match = b.fullmatch(element) if match == None: stringBuilder = 'all.thing' b = engine.compile('all.thing') match = b.fullmatch(element) if match is None: print('NO MATCH', element) continue best_match = None if stringBuilder == 'all.thing': best_match = rosieSub(match.rosie_match) print(best_match['type'], ',', best_match['data']) if best_match != None and best_match['type'] == 'all.thing': continue elif best_match != None: arrayOfPatterns[i].add(best_match['type']) print(arrayOfPatterns) print() return arrayOfPatterns
def test(self): ok, pkgname, errs = self.engine.load('package x; foo = "foo"') assert (ok) assert (pkgname == "x") assert (errs == None) b, errs = self.engine.compile("x.foo") assert (b[0] > 0) assert (errs == None) bb, errs = self.engine.compile("[:digit:]+") assert (bb[0] > 0) assert (errs == None) assert (b[0] != bb[0]) b2, errs = self.engine.compile("[:foobar:]+") assert (not b2) errlist = json.loads(errs) assert (len(errlist) > 0) err = errlist[0] assert (err['message']) assert (err['who'] == 'compiler') b = None # triggers call to librosie to gc the compiled pattern b, errs = self.engine.compile("[:digit:]+") assert (b[0] != bb[0]) # distinct values for distinct patterns assert (errs == None) num_int, errs = self.engine.compile("num.int") assert (not num_int) errlist = json.loads(errs) err = errlist[0] assert (err['message']) assert (err['who'] == 'compiler') ok, pkgname, errs = self.engine.load('foo = "') assert (not ok) errlist = json.loads(errs) err = errlist[0] assert (err['message']) assert (err['who'] == 'parser') engine2 = rosie.engine(librosiedir) assert (engine2) assert (engine2 != self.engine) engine2 = None # triggers call to librosie to gc the engine
def date_format(element): #print(element) librosiedir = './lib' rosie.load(librosiedir, quiet=True) engine = rosie.engine() engine.import_package("date") date_patterns = engine.compile("date.any") match = date_patterns.fullmatch(element).rosie_match type_of_format = match['subs'][0]['type'] #print(type_of_format) if type_of_format == "date.us_long": #print("us long") if match['subs'][0]['subs'][0]['type'] == "date.day_name": return match['subs'][0]['subs'][1]['data'] + " " + match[ 'subs'][0]['subs'][2]['data'] + ", " + match['subs'][ 0]['subs'][3]['data'] else: return match['subs'][0]['subs'][0]['data'] + " " + match[ 'subs'][0]['subs'][1]['data'] + ", " + match['subs'][ 0]['subs'][2]['data'] elif type_of_format == "date.eur": #print("europe") return match['subs'][0]['subs'][1]['data'] + "/" + match[ 'subs'][0]['subs'][0]['data'] + "/" + match['subs'][0][ 'subs'][2]['data'] elif type_of_format == "date.spaced": #print("spaced") return element.replace(" ", "/") elif type_of_format == "date.spaced_en" or ( type_of_format == "date.rfc2822" and "," not in element): #print("eng or rfc") return match['subs'][0]['subs'][1]['data'] + " " + match[ 'subs'][0]['subs'][2]['data'] + ", " + match['subs'][0][ 'subs'][0]['data'] elif type_of_format == "date.rfc2822" and "," in element: #print("big rfc") index = element.rindex(",") return date_format(element[index + 1:].strip()) elif type_of_format == "date.us_short": #print(match['subs'][0]['subs']) return match['subs'][0]['subs'][1]['data'] + " " + match[ 'subs'][0]['subs'][0]['data'] + ", " + match['subs'][0][ 'subs'][2]['data'] return element
def setUp(self): self.engine = rosie.engine(librosiedir) self.assertTrue(self.engine)
def test(self): engine = rosie.engine(librosiedir)
def readFile(data_file): # librosiedir = './lib' # rosie.load(librosiedir, quiet=True) engine = rosie.engine() engine.import_package("date") date_patterns = engine.compile("date.any") info = {} with open(data_file, 'r') as csvfile: content = csvfile.read() is_sylk = False if content[0:2] == "ID": is_sylk = True info["SYLK"] = is_sylk csvfile.seek(0) dial = csv.Sniffer().sniff(csvfile.read(), delimiters=';,| \t') info["DUTCHSEP"] = dial.delimiter csvfile.seek(0) fileRead = csv.reader(csvfile, dialect=dial) keys = next(fileRead) print("CSV file has these column headings: {}".format(keys)) cols = len(keys) rows = 0 date_found = [] num_found = [] injection_found = [] all = [] date_counter = [0] * cols actual_date_counter = [0] * cols total_counter = [0] * cols date_check = engine.compile( "{ { \"APR\" / \"OCT\" / \"MARCH\" / \"SEPT\" } [:digit:]* }") num_check = engine.compile("{ [0]+ [:digit:]+ } / [0]+") engine.load("short_year = [0-9]{ 2 }") excel_check = engine.compile( "{ { date.month { \"-\" / [/] } short_year } / { date.day { \"-\" / [/] } date.month } }" ) limit = r"[:digit:]{15,}" num_check1 = engine.compile(limit) #num_check2 = engine.compile("{ [:digit:]+ {\"e\" / \"E\"} {[\\-]? [:digit:]*}}") for row in fileRead: rows += 1 for i in range(0, cols): element = row[i] #print(element) if element != None: total_counter[i] += 1 if date_patterns.fullmatch(element) != None: actual_date_counter[i] += 1 all.append({ "row_no": rows, "col_no": i + 1, "data": element, "type": "ACTUALDATE" }) if num_check.fullmatch( element) != None or num_check1.fullmatch( element) != None: num_found.append({ "row_no": rows, "col_no": i + 1, "data": element }) all.append({ "row_no": rows, "col_no": i + 1, "data": element, "type": "BIGNUM" }) if date_check.fullmatch(element.upper( )) != None or excel_check.fullmatch(element) != None: date_counter[i] += 1 date_found.append({ "row_no": rows, "col_no": i + 1, "data": element }) all.append({ "row_no": rows, "col_no": i + 1, "data": element, "type": "NOTADATE" }) if element[0] == "=": injection_found.append({ "row_no": rows, "col_no": i + 1, "data": element }) all.append({ "row_no": rows, "col_no": i + 1, "data": element, "type": "INJECTION" }) info["ROWS"] = rows info["COLUMNS"] = cols info["INJECTION"] = injection_found info["NOTADATE"] = date_found info["BIGNUM"] = num_found info["DATESTAT"] = [[ a / c, b / c ] for a, b, c in zip(date_counter, actual_date_counter, total_counter)] print(info["DATESTAT"]) with open(data_file[0:len(data_file) - 4] + "_sorted.txt", 'w') as file: json.dump(all, file) return info
import rosie engine = rosie.engine( ) # or rosie.engine(librosiedir) if using a local (non-system) installation ok, pkgname, errs = engine.import_pkg('net') net_any, errs = engine.compile("net.any") match, leftover, abend, t0, t1 = engine.match(net_any, "1.2.3.4", 1, "color") if leftover != 0: print "There were", leftover, "characters left over" print "Match was:", match
def setUp(self): rosie.load(librosiedir, quiet=True) self.engine = rosie.engine()
def test(self): rosie.load(librosiedir, quiet=True) engine = rosie.engine() assert (engine) path = rosie.librosie_path() assert (path)
def test(self): engine = rosie.engine(librosiedir) assert(engine)
def setUp(self): self.engine = rosie.engine(librosiedir)
# © Copyright Jamie A. Jennings 2018. # LICENSE: MIT License (https://opensource.org/licenses/mit-license.html) # AUTHOR: Jamie A. Jennings # Example: # python generic_sloc.py "--" ../../src/core/*.lua import sys if len(sys.argv) < 2: print("Usage: " + sys.argv[0] + " <comment_start> [files ...]") sys.exit(-1) comment_start = sys.argv[1] import rosie engine = rosie.engine() source_line, errs = engine.compile(bytes('!{[:space:]* "' + comment_start + '"/$}')) if errs: print(str(errs)) sys.exit(-1) def is_source(line): if not line: return False match, leftover, abend, t0, t1 = engine.match(source_line, bytes(line), 1, b"bool") return match and True or False def count(f): count = 0 for line in f: if is_source(line): count += 1 return count