def setUp(self): rosie.load(librosiedir, quiet=True) self.engine = rosie.engine() self.assertTrue(self.engine) ok, pkgname, errs = self.engine.import_pkg(b'net') self.assertTrue(ok) self.net_any, errs = self.engine.compile(b'net.any') self.assertTrue(self.net_any)
def __init__(self): # Check to see if user prefers to use their own installation # of rosie, or one already installed on their system. rosie_home = os.getenv('ROSIE_HOME') if rosie_home: rosie.load(os.path.join(rosie_home, 'src/librosie/local')) self.engine = rosie.engine() self.engine.import_pkg(b'all') self.engine.import_pkg(b'csv') self.csv_pattern, errs = self.engine.compile(b'csv.comma') self.all_pattern, errs = self.engine.compile(b'all.things')
def loadRosieEngine(self): librosiedir = './lib' rosie.load(librosiedir, quiet=True) engine = rosie.engine() engine.import_package("date") self.date_patterns = engine.compile("date.any") engine.load("short_year = [0-9]{ 2 }") self.date_check = [ engine.compile( "{ { \"APR\" / \"OCT\" / \"MARCH\" / \"SEPT\" } [:digit:]* }"), engine.compile( "{ { date.month { \"-\" / [/] } short_year} / { date.day { \"-\" / [/] } date.month } }" ) ]
def readFile(): librosiedir = './lib' rosie.load(librosiedir, quiet=True) engine = rosie.engine() package = engine.import_package('all') allPackages = ['ts', 'date', 'time', 'net', 'num', 'id', 'word'] arrayOfPatterns = None for pack in allPackages: engine.import_package(pack) with open('pattern.csv', 'r') as csvfile: fileRead = csv.reader(csvfile) rowOne = next(fileRead) cols = len(rowOne) arrayOfPatterns = [set() for j in range(0, cols)] for row in fileRead: for i in range(0, cols): element = row[i] stringBuilder = "" match = None b = None if len(arrayOfPatterns[i]) > 0: for each in arrayOfPatterns[i]: if (match != None): break stringBuilder += each + '/' b = engine.compile(each) match = b.fullmatch(element) if match == None: stringBuilder = 'all.thing' b = engine.compile('all.thing') match = b.fullmatch(element) if match is None: print('NO MATCH', element) continue best_match = None if stringBuilder == 'all.thing': best_match = rosieSub(match.rosie_match) print(best_match['type'], ',', best_match['data']) if best_match != None and best_match['type'] == 'all.thing': continue elif best_match != None: arrayOfPatterns[i].add(best_match['type']) print(arrayOfPatterns) print() return arrayOfPatterns
def date_format(element): #print(element) librosiedir = './lib' rosie.load(librosiedir, quiet=True) engine = rosie.engine() engine.import_package("date") date_patterns = engine.compile("date.any") match = date_patterns.fullmatch(element).rosie_match type_of_format = match['subs'][0]['type'] #print(type_of_format) if type_of_format == "date.us_long": #print("us long") if match['subs'][0]['subs'][0]['type'] == "date.day_name": return match['subs'][0]['subs'][1]['data'] + " " + match[ 'subs'][0]['subs'][2]['data'] + ", " + match['subs'][ 0]['subs'][3]['data'] else: return match['subs'][0]['subs'][0]['data'] + " " + match[ 'subs'][0]['subs'][1]['data'] + ", " + match['subs'][ 0]['subs'][2]['data'] elif type_of_format == "date.eur": #print("europe") return match['subs'][0]['subs'][1]['data'] + "/" + match[ 'subs'][0]['subs'][0]['data'] + "/" + match['subs'][0][ 'subs'][2]['data'] elif type_of_format == "date.spaced": #print("spaced") return element.replace(" ", "/") elif type_of_format == "date.spaced_en" or ( type_of_format == "date.rfc2822" and "," not in element): #print("eng or rfc") return match['subs'][0]['subs'][1]['data'] + " " + match[ 'subs'][0]['subs'][2]['data'] + ", " + match['subs'][0][ 'subs'][0]['data'] elif type_of_format == "date.rfc2822" and "," in element: #print("big rfc") index = element.rindex(",") return date_format(element[index + 1:].strip()) elif type_of_format == "date.us_short": #print(match['subs'][0]['subs']) return match['subs'][0]['subs'][1]['data'] + " " + match[ 'subs'][0]['subs'][0]['data'] + ", " + match['subs'][0][ 'subs'][2]['data'] return element
def setUp(self): rosie.load(librosiedir, quiet=True) self.engine = rosie.engine()
def test(self): rosie.load(librosiedir, quiet=True) engine = rosie.engine() assert (engine) path = rosie.librosie_path() assert (path)