def test_database_find_text(self): """Genera un database con info random, y realiza un búsqueda de texto""" block = Block() # Generic esperado = [(1, 1, 0), (1, 1, 12028), (1, 2, 0), (1, 2, 12028), (1, 3, 0), (1, 3, 12028), (1, 4, 0), (1, 4, 12028), (1, 5, 0), (1, 5, 12028), (1, 6, 0), (1, 6, 12028), (1, 7, 0), (1, 7, 12028), (1, 8, 0), (1, 8, 12028), (1, 9, 0), (1, 9, 12028), (1, 10, 0), (1, 10, 12029)] for item in block.compressor.available_types: filename = os.path.join(self._repopath, "test.{0}-{1}.oerm".format(item[0], 0)) db = Database(file=filename, mode="rb") matches = db.find_text("Pagina", reports=[1]) self.assertEqual([(x[0], x[1], x[2]) for x in matches], esperado)
def test_read_database(self): """Lee un database con info random y verifica los resultados""" block = Block() # Generic for item in block.compressor.available_types: filename = os.path.join(self._repopath, "test.{0}-{1}.oerm".format(item[0], 0)) db = Database(file=filename, mode="rb") paginas_leidas = [] for report in db.reports(): for p in report: paginas_leidas.append(p) db.close() self.assertEqual(self._paginas_escritas, paginas_leidas)
def test_reports_find_text(self): """Genera un database con info random, y realiza un búsqueda de texto """ block = Block() # Generic for item in block.compressor.available_types: filename = os.path.join(self._repopath, "test.{0}-{1}.oerm".format(item[0], 0)) db = Database(file=filename, mode="rb") matches = db.reports().find_text(text="Pagina", search_in_reports=[2]) # print(matches) esperado = [(2, 1, 0), (2, 1, 12029), (2, 2, 0), (2, 2, 12029), (2, 3, 0), (2, 3, 12029), (2, 4, 0), (2, 4, 12029), (2, 5, 0), (2, 5, 12029), (2, 6, 0), (2, 6, 12029), (2, 7, 0), (2, 7, 12029), (2, 8, 0), (2, 8, 12029), (2, 9, 0), (2, 9, 12029), (2, 10, 0), (2, 10, 12029)] self.assertEqual([(x[0], x[1], x[2]) for x in matches], esperado)
def procces_tree(path, output=SqliteRepoIndex(), update=False): """Procesa el path de un repositorio de datbases Oerm Args: path (string): Carpeta principal del repositorio type (string): Tipo de indización (por defecto sqlite) update (bool): (Opcional) Se actualiza o regenera completamente el catalogo """ output.create_db(path) databases = [] reports_list = [] reportid = AutoNum() dateid = AutoNum() systemid = AutoNum() appid = AutoNum() deptid = AutoNum() for i, f in enumerate(filesInPath(path, "*.oerm"), 1): databases.append((i, f)) dbpath = os.path.join(path, f) d = Database(dbpath, mode="rb") print("Procesando {0}...".format(dbpath)) for report in d.reports(): # print("{0}: {1}".format(report.nombre, reportid.get(report.nombre))) elemento = (i, reportid.get(report.nombre), appid.get(report.aplicacion), dateid.get(report.fecha), systemid.get(report.sistema), deptid.get(report.departamento), report.total_pages) reports_list.append(elemento) output.write(dateid, systemid, appid, deptid, reportid, databases, reports_list) output.close()
block = Block() # Generic test_file = "d:/mfw/test.txt" size_test_file = os.path.getsize(test_file) for item in block.compressor.available_types: print("Procesando: [{0}] {1}".format(item[0], item[1])) start = time.time() paginas = 0 file_name = "out\{0}.prueba.oerm".format(item[1]) db = Database(file=file_name, default_compress_method=item[0], default_encription_method=0, pages_in_container=10) db.add_report(reporte="Sin identificar", sistema="n/a", aplicacion="n/a", departamento="n/a") with SpoolHostReprint(test_file, buffer_size=102400, encoding="Latin1") as s: for page in s: paginas = paginas + 1 db.add_page(page) db.close()
def process_file(configfile, inputfile, outputfile, compressiontype, complevel, ciphertype, testall, append, pagesingroups) : block = Block(default_compress_level=complevel) # Generic resultados = [] size_test_file = os.path.getsize(inputfile) compresiones = [e for e in block.compressor.available_types if e[0] == compressiontype] encriptados = [e for e in block.cipher.available_types if e[0] == ciphertype] if testall: if 'e' in testall: encriptados = block.cipher.available_types if 'c' in testall: compresiones = block.compressor.available_types mode = "ab" if append else "wb" r = ReportMatcher(configfile) for encriptado in encriptados: for compress in compresiones: print("Procesando: {2} Compresión: [{0}] {1} Cifrado: {3}".format(compress[0], compress[1], inputfile, encriptado[1])) start = time.time() paginas = 0 file_name = "{0}.{1}.oerm".format(outputfile, slugify("{0}.{1}".format(compress[1], encriptado[1]), "_")) db = Database( file=file_name, mode=mode, default_compress_method=compress[0], default_compress_level=complevel, default_encription_method=encriptado[0], pages_in_container = pagesingroups) reportname_anterior = "" # spool = SpoolHostReprint(inputfile, buffer_size=102400, encoding="Latin1") spool = SpoolFixedRecordLength(inputfile, buffer_size=102400, encoding="cp500", newpage_code="NEVADO" ) # with SpoolHostReprint(inputfile, buffer_size=102400, encoding="Latin1") as s: with spool as s: for page in s: data = r.match(page) reportname = data[0] if reportname != reportname_anterior: db.add_report(reporte=reportname, sistema=data[1], aplicacion=data[2], departamento=data[3], fecha=data[4]) reportname_anterior = reportname paginas = paginas + 1 db.add_page(page) db.close() compress_time = time.time() - start compress_size = os.path.getsize(file_name) start = time.time() db = Database(file=file_name, mode="rb") for report in db.reports(): try: for page in report: pass except Exception as err: print("Error: {0} al descomprimir reporte".format(err)) break uncompress_time = time.time() - start container_size = compress_size / (db.Index.container_objects + db.Index.metadata_objects) resultados.append([ "[{0}] {1} ({2}p/cont.)".format(compress[0], compress[1], pagesingroups), ("" if encriptado[0] == 0 else encriptado[1]), float(size_test_file), float(compress_size), (compress_size/size_test_file)*100, paginas/compress_time, paginas/uncompress_time, container_size ]) tablestr = tabulate( tabular_data = resultados, headers = ["Algoritmo", "Encript.", "Real (bytes)", "Compr. (bytes)", "Ratio", "Compr. Pg/Seg", "Descompr. Pg/Seg", "BSize (Prom.)" ], floatfmt = "8.2f", tablefmt = "psql", numalign = "right", stralign = "left", override_cols_fmt = [None, None, ",.0f", ",.0f",",.2f", ",.2f", ",.2f", ",.2f", ",.2f" ] ) return tablestr
def _generate_db(cls, compress_method=1, encription_method=0): """Genera un Database Oerm con info random en un path temporal""" # print("OermTestFixtures._generate_db") filename = os.path.join( cls._repopath, "test.{0}-{1}.oerm".format(compress_method, encription_method)) # Primer reporte db = Database(file=filename, mode="wb", default_compress_method=compress_method, default_encription_method=encription_method, pages_in_container=10) db.add_report(reporte=cls._reports[0][1], sistema="Sistema 1", aplicacion="Aplicacion 1", departamento="Departamento 1") for p in cls._paginas_escritas[:10]: db.add_page(p) db.close() # Segundo reporte db = Database(file=filename, mode="ab", default_compress_method=compress_method, default_encription_method=encription_method, pages_in_container=10) db.add_report(reporte=cls._reports[1][1], sistema="Sistema 2", aplicacion="Aplicacion 2", departamento="Departamento 2") for p in cls._paginas_escritas[10:]: db.add_page(p) db.close()
if __name__ == "__main__": cmdparser = init_argparse() try: args = cmdparser.parse_args() except IOError as msg: args.error(str(msg)) filename = args.inputfile if not file_accessible(filename, "rb"): print( _("Error: El archivo {0} no se ha encontrado o no es accesible para lectura" ).format(filename)) sys.exit(-1) d = Database(filename, mode="rb") # Listar reportes en la base oerm if args.listreports or (not args.showpages and not args.searchtext): reports_list = [] for report in d.reports(): reports_list.append((report.id, report.nombre, report.total_pages)) if reports_list: print("") print("Archivo : {0}".format(filename)) print("Reportes : {0}".format(len(reports_list))) print("Páginas : {0}".format(sum([e[2] for e in reports_list]))) print("") tablestr = tabulate(tabular_data=reports_list, headers=["Reporte", "Nombre", "Páginas"],
def add_repo(self, catalog_id, path, update=False): """Procesa el path de un repositorio de datbases Oerm y genera el repo.db (sqlite). Basicamente cataloga cada database y genera una base sqlite (repo.db) en el directorio root del repositorio. Args: catalog_id (string): Id del catálogo al cual se le agregará este repositorio path (string): Carpeta principal del repositorio update (bool): (Opcional) Se actualiza o regenera completamente el catalogo Ejemplo: >>> from openerm.OermClient import OermClient >>> c = OermClient("samples/openermcfg.yaml") >>> c.add_repo("catalogo1", "/var/repo1") """ dbname = os.path.join(path, 'repo.db') if file_accessible(dbname, "r"): os.remove(dbname) conn = sqlite3.connect(dbname) # conn.text_factory = lambda x: repr(x) c = conn.cursor() c.execute("CREATE TABLE databases (database_id int, path text)") c.execute( "CREATE TABLE date (date_id INTEGER PRIMARY KEY ASC, date text)") c.execute( "CREATE TABLE system (system_id INTEGER PRIMARY KEY ASC, system_name text)" ) c.execute( "CREATE TABLE department (department_id INTEGER PRIMARY KEY ASC, department_name text)" ) c.execute( "CREATE TABLE report (report_id INTEGER PRIMARY KEY ASC, report_name text)" ) c.execute( "CREATE TABLE reports (database_id int, report_id int, date_id int, system_id, department_id int, pages int)" ) # c.execute("CREATE TABLE reports (database_id int, report_id text, report_name text, aplicacion text, fecha text, sistema text, departamento text, pages int)") databases = [] reports_list = [] reportid = AutoNum() dateid = AutoNum() systemid = AutoNum() deptid = AutoNum() for i, f in enumerate(filesInPath(path, "*.oerm"), 1): databases.append((i, f)) d = Database(os.path.join(path, f), mode="rb") for report in d.reports(): # print("{0}: {1}".format(report.nombre, reportid.get(report.nombre))) elemento = (i, reportid.get(report.nombre), dateid.get(report.fecha), systemid.get(report.sistema), deptid.get(report.departamento), report.total_pages) reports_list.append(elemento) c.executemany("INSERT INTO date (date, date_id) VALUES (?,?)", dateid.list()) c.executemany( "INSERT INTO system (system_name, system_id) VALUES (?,?)", systemid.list()) c.executemany( "INSERT INTO department (department_name, department_id) VALUES (?,?)", deptid.list()) c.executemany( "INSERT INTO report (report_name, report_id) VALUES (?,?)", reportid.list()) c.executemany("INSERT INTO databases (database_id, path) VALUES (?,?)", databases) c.executemany( "INSERT INTO reports (database_id, report_id, date_id, system_id, department_id, pages) VALUES (?,?,?,?,?,?)", reports_list) conn.commit() conn.close() d = self.config["catalogs"].get(catalog_id) if "urls" not in d: d["urls"] = [path] else: d["urls"].append(path) self._flush()
def procces_tree(path, update=False): """Procesa el path de un repositorio de datbases Oerm Args: path (string): Carpeta principal del repositorio update (bool): (Opcional) Se actualiza o regenera completamente el catalogo """ dbname = os.path.join(path, 'catalog.db') if file_accessible(dbname, "r"): os.remove(dbname) conn = sqlite3.connect(dbname) # conn.text_factory = lambda x: repr(x) c = conn.cursor() c.execute("CREATE TABLE databases (database_id int, path text, size int)") c.execute( "CREATE TABLE date (date_id INTEGER PRIMARY KEY ASC, date text)") c.execute( "CREATE TABLE system (system_id INTEGER PRIMARY KEY ASC, system_name text)" ) c.execute( "CREATE TABLE department (department_id INTEGER PRIMARY KEY ASC, department_name text)" ) c.execute( "CREATE TABLE report (report_id INTEGER PRIMARY KEY ASC, report_name text)" ) c.execute( "CREATE TABLE reports (database_id int, report_id int, date_id int, system_id, department_id int, pages int)" ) # c.execute("CREATE TABLE reports (database_id int, report_id text, report_name text, aplicacion text, fecha text, sistema text, departamento text, pages int)") databases = [] reports_list = [] reportid = AutoNum() dateid = AutoNum() systemid = AutoNum() deptid = AutoNum() for i, f in enumerate(filesInPath(path, "*.oerm"), 1): fname = os.path.join(path, f) databases.append((i, f, os.stat(fname).st_size)) d = Database(fname, mode="rb") for report in d.reports(): print("{0}: {1}".format(report.nombre, reportid.get(report.nombre))) elemento = (i, reportid.get(report.nombre), dateid.get(report.fecha), systemid.get(report.sistema), deptid.get(report.departamento), report.total_pages) reports_list.append(elemento) c.executemany("INSERT INTO date (date, date_id) VALUES (?,?)", dateid.list()) c.executemany("INSERT INTO system (system_name, system_id) VALUES (?,?)", systemid.list()) c.executemany( "INSERT INTO department (department_name, department_id) VALUES (?,?)", deptid.list()) c.executemany("INSERT INTO report (report_name, report_id) VALUES (?,?)", reportid.list()) c.executemany( "INSERT INTO databases (database_id, path, size) VALUES (?,?,?)", databases) c.executemany( "INSERT INTO reports (database_id, report_id, date_id, system_id, department_id, pages) VALUES (?,?,?,?,?,?)", reports_list) conn.commit() conn.close()
def process_file(self, input_file): block = Block(default_compress_level=self.config.compress_level) resultados = [] self.input_file = input_file size_test_file = os.path.getsize(self.input_file) self.spool_types = { "fixed": SpoolFixedRecordLength(self.input_file, buffer_size=self.config.buffer_size, encoding=self.config.encoding, newpage_code=self.config.EOP), "fcfc": SpoolHostReprint(self.input_file, buffer_size=self.config.buffer_size, encoding=self.config.encoding) } compresiones = [ e for e in block.compressor.available_types if e[0] == self.config.compress_type ] encriptados = [ e for e in block.cipher.available_types if e[0] == self.config.cipher_type ] mode = "ab" r = ReportMatcher(self.config.report_cfg) reports = [] for encriptado in encriptados: for compress in compresiones: start = time.time() paginas = 0 # file_name = "{0}.{1}.oerm".format(self.config.output_path, slugify("{0}.{1}".format(compress[1], encriptado[1]), "_")) file_name = os.path.join( self.config.output_path, generate_filename(self.config.file_mask) + ".oerm") db = Database( file=file_name, mode=mode, default_compress_method=compress[0], default_compress_level=self.config.compress_level, default_encription_method=encriptado[0], pages_in_container=self.config.pages_in_group) file_size = os.path.getsize(file_name) reportname_anterior = "" widgets = [ os.path.basename(self.input_file), ': ', FormatLabel( '%(value)d bytes de %(max_value)d (%(percentage)0.2f)' ), Bar(marker='#', left='[', right=']'), ' ', ETA(), ' ', FileTransferSpeed() ] p_size = 0 with ProgressBar(max_value=size_test_file, widgets=widgets) as bar: spool = self.spool_types[self.config.file_type] with spool as s: for page in s: p_size += len(page) bar.update(p_size) data = r.match(page) reportname = data[0] if reportname not in reports: reports.append(reportname) if reportname != reportname_anterior: rpt_id = db.get_report(reportname) if rpt_id: db.set_report(reportname) else: db.add_report(reporte=reportname, sistema=data[1], departamento=data[2], fecha=data[3]) reportname_anterior = reportname paginas = paginas + 1 db.add_page(page) db.close() compress_time = time.time() - start compress_size = os.path.getsize(file_name) - file_size resultados.append([ "[{0}] {1} ({2}p/cont.)".format( compress[0], compress[1], self.config.pages_in_group), ("" if encriptado[0] == 0 else encriptado[1]), float(size_test_file), float(compress_size), (compress_size / size_test_file) * 100, paginas / compress_time, len(reports) ]) tablestr = tabulate(tabular_data=resultados, headers=[ "Algoritmo", "Encript.", "Real (bytes)", "Compr. (bytes)", "Ratio", "Compr. Pg/Seg", "Reportes" ], floatfmt="8.2f", tablefmt="psql", numalign="right", stralign="left", override_cols_fmt=[ None, None, ",.0f", ",.0f", ",.2f", ",.2f", ",.2f" ]) return tablestr