class StatementRunner(): logger = logging.getLogger("StatementRunner") # TODO document def __init__(self, connection, statements, continue_on_error=False, show_statements=False): # types statements list of StatementHelper self.connection = connection self.statements = statements self.continue_on_error = continue_on_error self.cursor = CursorHelper(connection.cursor()) # logging.info("continue_on_error %s" % (continue_on_error)) # logging.info("number of statements: %s" % (len(self.statements))) def process(self, binds): for stmt in self.statements: logging.info(stmt.description) if self.continue_on_error: try: self.cursor.execute(stmt, binds) self.connection.commit() except Exception as e: self.connection.rollback() print(stmt + str(e)) raise e else: self.cursor.execute(stmt.sql, binds) # stmt is StatementHelper self.connection.commit() # TODO is this a good idea?
def unload_sale_tot(self, etl_file_id: int, compute: bool = False) -> None: """ fetch etl_inventory :param etl_file_id: the primary key of etl_file being unloaded :param compute: True - compute from actual values False - extract only if record exists :return: None """ # TODO Compute if self.sale_total_cursor is None: self.sale_total_cursor = CursorHelper(self.connection.cursor()) sql = "select * from etl_sale_tot where etl_file_id = %(ETL_FILE_ID)s" rows = self.sale_total_cursor.execute(sql, {"ETL_FILE_ID": etl_file_id}) if self.sale_total_column_names is None: self.sale_total_column_names = [i[0].upper() for i in self.sale_total_cursor.description] for row in rows: data_map = dict(zip(self.sale_total_column_names, row)) data_map["HEADER"] = "9999999999" data_map["FILLER_28"] = "".ljust(28) data_map["SALES_REC_CNT"] = int(data_map["SALES_REC_CNT"]) data_map["SUM_EXT_NET_AMT"] = int(data_map["SUM_EXT_NET_AMT"]) * 100 data_map["FILLER_86"] = "".ljust(86) data_map["RECORD_TYPE"] = "ST" outrec = field_metadata.format_line(self.record_defs["sales_total"], data_map) self.emit(outrec)
def unload_inventory_tot(self, etl_file_id: int, compute: bool = False) -> None: """ Unload Customer Total :param etl_file_id: :param compute: boolean True -compute the totals rather than extract False - extract if exist else ignore :return: None """ logger = logging.getLogger("unload_inventory_tot") logger.setLevel(logging.DEBUG) if self.inventory_total_cursor is None: self.inventory_total_cursor = CursorHelper(self.connection.cursor()) sql = "select * from etl_inventory_tot where etl_file_id = %(ETL_FILE_ID)s" rows = self.inventory_total_cursor.execute(sql, {"ETL_FILE_ID": etl_file_id}) if self.inventory_total_column_names is None: self.inventory_total_column_names = [i[0].upper() for i in self.inventory_total_cursor.description] for row in rows: data_map = dict(zip(self.inventory_total_column_names, row)) for k, v in data_map.items(): print("inventory_tot k %s v %s type(v) %s" % (k, v, type(v))) data_map["HEADER"] = "9999999999" data_map["FILLER36"] = "".ljust(36) data_map["RECORD_TYPE"] = "IT" data_map["RECORD_CNT_REPORTED"] = int(data_map["RECORD_CNT_REPORTED"]) data_map["FILLER97"] = "".ljust(97) logger.debug("record definition %s" % self.record_defs["inventory_total"]) logger.debug("data_map = %s" % data_map) outrec = field_metadata.format_line(self.record_defs["inventory_total"], data_map, trace=True) self.emit(outrec)
def unload_customer_total(self, etl_file_id: int, compute: bool = False) -> None: """ Unload Customer Total :param etl_file_id: :param compute: True - compute from etl_customer records False - extract only if the record exists :return: None """ if self.customer_total_cursor is None: self.customer_total_cursor = CursorHelper(self.connection.cursor()) sql = "select * from etl_customer_tot where etl_file_id = %(ETL_FILE_ID)s" rows = self.customer_total_cursor.execute(sql, {"ETL_FILE_ID": etl_file_id}) if self.customer_total_column_names is None: self.customer_total_column_names = [i[0].upper() for i in self.customer_total_cursor.description] # print ("cursor columns: %s" % self.customer_column_names) for row in rows: data_map = dict(zip(self.customer_total_column_names, row)) data_map["HEADER"] = "9999999999" data_map["RECORD_TYPE"] = "CT" data_map["FILLER_127"] = "".ljust(127) data_map["CUSTOMER_COUNT"] = int(data_map["CUSTOMER_COUNT"]) data_map["FILLER_22"] = "".ljust(22) outrec = field_metadata.format_line(self.record_defs["customer_total"], data_map) self.emit(outrec)
def unload_inventory(self, id: int, by_etl_inventory_id: bool = True) -> None: """ :param id: int - if by_etl_inventory_id is True, then etl_inventory_id else etl_file_id :param by_etl_inventory_id: True - unload one record False - unload all records :return: """ if self.inventory_cursor is None: self.inventory_cursor = CursorHelper(self.connection.cursor()) sql = None if (by_etl_inventory_id): sql = "select * from etl_inventory where etl_inventory_id = %(ETL_INVENTORY_ID)s" rows = self.inventory_cursor.execute(sql, {"ETL_INVENTORY_ID": id}) else: sql = "select * from etl_inventory where etl_file_id = %(ETL_FILE_ID)s" rows = self.inventory_cursor.execute(sql, {"ETL_FILE_ID": id}) if self.inventory_column_names is None: self.inventory_column_names = [i[0].upper() for i in self.inventory_cursor.description] for row in rows: data_map = dict(zip(self.inventory_column_names, row)) self.emit_inventory(data_map)
def process(self, conn, binds: Dict[str, object], rerun: bool = False, verbose=False) -> None: """ :param conn: :param binds: etl_file_id numeric :param rerun: if True, deletes all the other runs :return: """ self.logger.info("Connection is " + str(conn)) self.logger.info("Rerun %s" % rerun) assert len(binds) == 1 etl_file_id = binds["ETL_FILE_ID"] assert isinstance(etl_file_id, int) # if rerun: # self.delete_run(conn, binds) sql = "select count(*) from etl_file where etl_file_id = %(ETL_FILE_ID)s" cursor = CursorHelper(conn.cursor()) row_count = None rows = cursor.execute(sql, binds) for row in rows: row_count = row[0] if row_count == 0: raise Exception("No such etl_file_id %s" % etl_file_id) processor = ConditionIdentification(conn, self.rules) processor.process(binds, verbosity=3)
def test_no_rows(self, sql) -> None: cursor = CursorHelper(self.connection.cursor()) if self.verbose: logger.info("testing: %s" % sql) rows = cursor.execute(sql) for row in rows: assert row[0] == 0 cursor.close()
class UtProcess: def __init__(self, connection): self.connection = connection self.process_cursor = CursorHelper(connection.cursor()) self.step_cursor = CursorHelper(connection.cursor()) def insert_process(self, binds): insert_sql = """ insert into ut_process_status ( schema_nm, process_nm, thread_nm, process_run_nbr, status_msg, status_id, status_ts, ignore_flg ) values ( %(schema_nm)s, %(process_nm)s, %(thread_nm)s, %(process_run_nbr)s, %(status_msg)s, %(status_id)s, %(status_ts)s, %(ignore_flg)s ) """ self.process_cursor.execute(insert_sql, binds) def get_process_for_id(self, id): sql = "select * from ut_process where ut_process_id" def insert_process_step(self, binds): sql = """ insert into ut_process_log ( ut_process_status_id, log_seq_nbr, log_msg_id, log_msg, log_msg_ts, caller_name, line_nbr, call_stack, log_level ) values ( %(ut_process_status_id)s, %(log_seq_nbr)s, %(log_msg_id)s, %(log_msg)s, %(log_msg_ts)s, %(caller_name)s, %(line_nbr)s, %(call_stack)s, %(log_level)s ) """ self.step_cursor.execute(sql, binds)
def __init__(self, connection, verbose=False): """ :param connection: a database connection """ self.connection = connection self.cursor = CursorHelper(self.connection.cursor()) logger = logging.getLogger("SeedSalesReportingDatabase") self.verbose = verbose
def unload_all(self): cursor = CursorHelper(self.connection.cursor()) sql = "select etl_file_id from etl_file" rows = cursor.execute(sql) for row in rows: etl_file_id = row[0] # self.process(etl_file_id, "../pdssr_testdata/%s.cds" % etl_file_id, False) self.process(etl_file_id, "/tmp/python/%s.cds" % etl_file_id, False)
def __init__(self, connection, statements, continue_on_error=False, show_statements=False): # types statements list of StatementHelper self.connection = connection self.statements = statements self.continue_on_error = continue_on_error self.cursor = CursorHelper(connection.cursor())
def unload_all(cls): sql = "select etl_file_id from etl_file" cursor = CursorHelper(cls.connection.cursor()) rows = cursor.execute(sql) for row in rows: unloader = CdsUnload(cls.connection) etl_file_id = row[0] unloader.process(etl_file_id, "/tmp/%s.cds" % etl_file_id, False)
def check_customers(cls): sql = "select etl_file_id, count(*)from etl_file where etl_file_id = %(ETL_FILE_ID)s group by etl_file_id" cursor = CursorHelper(cls.connection.cursor()) binds = {"ETL_FILE_ID": 1} rows = cursor.execute(sql, binds) count = 0 for row in rows: row_count = row[1] count += row_count print("etl_customer count: %s %s" % (row[0], row_count)) assert (count > 1)
def __init__(self, connection): self.connection = connection self.cursor = CursorHelper(connection.cursor()) self.binds = {} fdir = os.path.dirname(os.path.realpath(__file__)) logger.debug("fdir %s" % fdir) meta_file = os.path.join(fdir, 'config/post_dml.yaml') logger.debug("using meta_file %s" % meta_file) self.statements = SqlStatements.from_yaml(meta_file).statements
def conditions(cls): logger.info("running conditions") processor = CdsDataloadConditions() file_id_sql = "select etl_file_id from etl_file" cursor = CursorHelper(cls.connection.cursor()) rows = cursor.execute(file_id_sql) for row in rows: etl_file_id = row[0] binds = {"ETL_FILE_ID": etl_file_id} logger.info("conditions for %s" % etl_file_id) processor.process(cls.connection, binds=binds) cursor.close()
def read_sql(sql, connection, params): retval = Dataset() cursor = CursorHelper(connection.cursor()) rows = cursor.execute(sql, params) retval.column_names = [] for column in cursor.description: retval.column_names.append(column[0]) for column in retval.column_names: Dataset.logger.debug("column: %s" % column) retval.rows = [] for row in rows: retval.rows.append(row) cursor.close() return retval
def test_one(self): """ This test ensures that a returning clause returns # TODO test under sqlite :return: """ cursor = CursorHelper(self.connection.cursor()) stmt = self.statements["etl_file_initial_insert"] # self.assertEquals(stmt["name"],"etl_file_initial_insert") sql = stmt["sql"] returning = stmt["returning"] self.assertEqual(stmt["returning"], "returning etl_file_id") binds = {"ORG_CD": "TX EXOTIC"} id = cursor.execute(sql, binds=binds, returning=returning, verbose=False) self.assertIsNotNone(id)
def from_sql(connection,sql:str, params:Dict[str,object]): retval = Dataset() cursor = CursorHelper(connection.cursor()) rows = cursor.execute(sql, params) retval.column_names = [] for column in cursor.description: retval.column_names.append(column[0]) for column in retval.column_names: Dataset.logger.debug("column: %s" % column) retval.rows = [] for row in rows: retval.rows.append(row) cursor.close() retval.determine_data_types() return retval
def unload_customer(self, id: int, by_etl_customer_id: bool) -> None: if self.customer_cursor is None: self.customer_cursor = CursorHelper(self.connection.cursor()) if by_etl_customer_id: sql = "select * from etl_customer where etl_customer_id = %(ETL_CUSTOMER_ID)s" rows = self.customer_cursor.execute(sql, {"ETL_CUSTOMER_ID": id}) else: sql = "select * from etl_customer where etl_file_id = %(ETL_FILE_ID)s" rows = self.customer_cursor.execute(sql, {"ETL_FILE_ID": id}) if self.customer_column_names is None: self.customer_column_names = [i[0].upper() for i in self.customer_cursor.description] # print ("cursor columns: %s" % self.customer_column_names) for row in rows: data_map = dict(zip(self.customer_column_names, row)) self.emit_customer(data_map)
def unload_sale(self, id, by_etl_sale_id=True): if self.sale_cursor is None: self.sale_cursor = CursorHelper(self.connection.cursor()) if by_etl_sale_id: sql = "select * from etl_sale where etl_sale_id = %(ETL_SALE_ID)s" rows = self.sale_cursor.execute(sql, {"ETL_SALE_ID": id}) else: sql = "select * from etl_sale where etl_file_id = %(ETL_FILE_ID)s" rows = self.sale_cursor.execute(sql, {"ETL_FILE_ID": id}) if self.sale_column_names is None: self.sale_column_names = [i[0].upper() for i in self.sale_cursor.description] # print ("cursor columns: %s" % self.sale_column_names) #TODO for row in rows: data_map = dict(zip(self.sale_column_names, row)) self.emit_sale(data_map)
def test_wrapped_cursor_with_return(self): cursor = CursorHelper(self.connect.cursor()) returning_id = cursor.execute(self.insert, self.binds, returning="returning etl_sale_id") logger.info("returning id is %s " % returning_id) rows = cursor.execute("select count(*) from etl_sale") for row in rows: value = row[0] rows = cursor.execute("select max(etl_sale_id) from etl_sale") for row in rows: actual_id = row[0] logger.debug("actual_id %s" % actual_id) self.assertEqual(actual_id, returning_id) logger.info("value is %s" % value)
def render_cursor(self, workbook, connection, binds): """ Renders the sheet :param workbook: :param connection: :param binds: :return: """ cursor = CursorHelper(connection.cursor()) logger.info("sql: " + str(self.sql)) # TODO logging in Dexterous logger.info("binds: " + str(binds)) rows = cursor.execute(self.sql, binds) # if self.column_names is None: # self.populate_column_names_and_headers(cursor.description) self.populate_dataset_column_index(cursor.description) self.infer_metadata(cursor.description) self.render_data(workbook, rows)
def seed_distributor(self): self.cursor.execute("insert into org(org_cd) values ( %(ORG_CD)s)", {"ORG_CD": "EXOTICTX"}) print('EXOTICTX inserted into org') verify_cursor = CursorHelper(self.connection.cursor()) rows = verify_cursor.execute('select * from org order by org_cd') for row in rows: print('org_id ' + str(row[0]) + ' org_cd ' + row[1]) self.cursor.execute( "insert into org_distrib(org_id,distrib_id) " "select org_id,'EXOTICTX' " "from org " "where org_cd = %(ORG_CD)s", {"ORG_CD": "EXOTICTX"}) self.cursor.execute( "insert into org_datafeed(org_id) " "select org_id " "from org " "where org_cd = %(ORG_CD)s", {"ORG_CD": "EXOTICTX"})
def create_schema(self) -> None: if self.test_schema: cursor = CursorHelper(self.connection.cursor()) try: cursor.execute("create schema %s" % self.test_schema) except Exception as e: logger.warning(e) self.connection.rollback() logger.warning("create schema %s failed, connection rolled back " % self.test_schema) set_schema_sql = "set schema '%s'" % self.test_schema logger.info("about to %s" % set_schema_sql) cursor.execute(set_schema_sql) logger.info("should be new schema") cursor.close()
def get_line_numbers(self, etl_file_id: int): """ retrieve all the records in the load from the origina :param etl_file_id: :return: """ if self.line_number_cursor is None: self.line_number_cursor = CursorHelper(self.connection.cursor()) rows = self.line_number_cursor.execute(""" select table_name, id, line_number from ( select 'etl_customer' table_name, etl_customer_id id, line_number from etl_customer where etl_file_id = %(ETL_FILE_ID)s union select 'etl_sale', etl_sale_Id, line_number from etl_sale where etl_file_id = %(ETL_FILE_ID)s union select 'etl_inventory', etl_inventory_id, line_number from etl_inventory where etl_file_id = %(ETL_FILE_ID)s union select 'etl_inventory_tot', etl_file_id, line_number from etl_inventory_tot where etl_file_id = %(ETL_FILE_ID)s union select 'etl_customer_tot', etl_file_id, line_number from etl_customer_tot where etl_file_id = %(ETL_FILE_ID)s ) as by_line_number order by line_number """, {"ETL_FILE_ID": etl_file_id}) return rows
def test_sqlite_returning(self): logger = logging.getLogger(__name__ + ":test_sqlite_returning") drop_sql = "drop table if exists a" create_sql = "create table a (b serial primary key, c numeric)" insert_sql = "insert into a (c) values (%(c)s)" returning_text = "returning b" ch = ConnectionHelper(ConnectionHelperTest.yaml_file_path) connection = ch.get_named_connection("sqlite3_mem") cursor = CursorHelper(connection.cursor()) cursor.execute(drop_sql) cursor.execute(create_sql) new_id = cursor.execute(insert_sql, {"c": 3}, returning=returning_text) self.assertEqual(1, new_id)
def to_sqlite(self, table_name, connection=None, verbose=False): """ Creates a memory sqlite database, creates a table based on column names and types inferred by column data types :param table_name: :param connection if a connection is specified the table will be created using the connection :return: a sqlite connection with a populated table with the specified table_name If no connection is specified a sqlite3 ":memory:" database will be created """ logger = logging.getLogger(__name__ + ":to_sqlite") if connection is None: connection = sqlite3.connect(":memory:") cursor = CursorHelper(connection.cursor()) ddl_cols = self.get_ddl_columns() ddlgenner = DdlUtil(dialects.DIALECT_POSTGRES) # TODO use dialects sql = ddlgenner.get_create_table(table_name, ddl_cols) cursor.execute(sql) insert_sql = ddlgenner.get_insert(table_name, ddl_cols) logger.debug("\n%s" % insert_sql) for row in self.rows: binds = {} col_idx = 0 for col_name in self.column_names: val = row[col_idx] if val is not None and isinstance(val,decimal.Decimal): val = float(val) binds[col_name] = val if verbose: logging.debug("col_name %s value %s type %s" % (col_name,row[col_idx], str(type(row[col_idx])))) col_idx += 1 cursor.execute(insert_sql, binds) connection.commit() sql = "select count(*) from %s" % table_name rows = cursor.execute(sql, None) rowcount = None for row in rows: rowcount = row[0] Dataset.logger.debug("rowcount %s" % rowcount) return connection
class SeedSalesReportingDatabase: def __init__(self, connection, verbose=False): """ :param connection: a database connection """ self.connection = connection self.cursor = CursorHelper(self.connection.cursor()) logger = logging.getLogger("SeedSalesReportingDatabase") self.verbose = verbose def seed_manufacturers(self): """ Populates the tables org and org_mfr with CDS manufacturer identifiers :return: None """ # TODO load from resources/cds_manufactures.csv mfrs = [ ("0000000020", "F-L", "Frito-Lay"), ("0000000030", "GM", "General Mills"), ("0000000040", "HVEND", "Hershey Vending"), ("0000000050", "HFUND", "Hershey Fund Raising"), ("0000000055", "HCONC", "Hershey Concession"), ("0000000060", "SNYDERS", "Snyder's of Hanover"), ("0000000080", "KELLOGG", "Kellogg, Keebler"), ("0000000115", "KARS", "Kar Nut Product (Kar's)"), ("0000000135", "MARS", "Mars Chocolate "), ("0000000145", "POORE", "Inventure Group (Poore Brothers)"), ("0000000150", "WOW", "WOW Foods"), ("0000000160", "CADBURY", "Cadbury Adam USA, LLC"), ("0000000170", "MONOGRAM", "Monogram Food"), ("0000000185", "JUSTBORN", "Just Born"), ("0000000190", "HOSTESS", "Hostess, Dolly Madison"), ("0000000210", "SARALEE", "Sara Lee"), ] for mfr in mfrs: binds = dict(zip(("CDS_MFR_ID", "ORG_CD", "ORG_NM"), mfr)) if self.verbose: logging.info("binds: %s", binds) self.cursor.execute( "insert into org (org_cd, org_nm) values (%(ORG_CD)s, %(ORG_NM)s)", binds) self.cursor.execute( """ insert into org_mfr( org_id,cds_mfr_id ) select org_id, %(CDS_MFR_ID)s from org where org_cd = %(ORG_CD)s""", binds) def seed_distributor(self): self.cursor.execute("insert into org(org_cd) values ( %(ORG_CD)s)", {"ORG_CD": "EXOTICTX"}) print('EXOTICTX inserted into org') verify_cursor = CursorHelper(self.connection.cursor()) rows = verify_cursor.execute('select * from org order by org_cd') for row in rows: print('org_id ' + str(row[0]) + ' org_cd ' + row[1]) self.cursor.execute( "insert into org_distrib(org_id,distrib_id) " "select org_id,'EXOTICTX' " "from org " "where org_cd = %(ORG_CD)s", {"ORG_CD": "EXOTICTX"}) self.cursor.execute( "insert into org_datafeed(org_id) " "select org_id " "from org " "where org_cd = %(ORG_CD)s", {"ORG_CD": "EXOTICTX"}) def process(self): """ Populates all the prerequisite tables :return: """ self.seed_manufacturers() self.seed_distributor()
from pdsutil.DbUtil import ConnectionHelper, CursorHelper import sqlite3 sql = """ insert into org_mfr(org_id,cds_mfr_id) select org_id, %(CDS_MFR_ID)s from org where org_cd = %(ORG_CD)s """ binds = {"CDS_MFR_ID": "0000000020", "ORG_CD": "F-L", "ORG_NM": "Frito-Lay"} connection = sqlite3.Connection("/tmp/wtf.dbf") cursor = CursorHelper(connection.cursor()) cursor.execute( "insert into org (org_cd, org_nm) " "values (%(ORG_CD)s, %(ORG_NM)s)", binds) cursor.execute(sql, binds) sql = "select * from org_mfr" rows = cursor.execute(sql) for row in rows: print(row)
from pdsutil.DbUtil import ConnectionHelper, CursorHelper import sys import csv if __name__ == "__main__": connection_name = sys.argv[0] connection = ConnectionHelper().get_named_connection(connection_name) cursor = CursorHelper(connection.cursor()) rows = cursor.execute(sql,binds) quoting_types = [csv.QUOTE_NONNUMERIC, csv.QUOTE_ALL, csv.QUOTE_MINIMAL, csv.QUOTE_NONE] def to_csv(self, file, emit_headers: bool = True, dialect: str = "excel", delimiter: str = ",", quotechar: str = "'", quoting: str = csv.QUOTE_NONNUMERIC): writer = csv.writer(file, dialect="excel", delimiter=',', quotechar='"', quoting=csv.QUOTE_NONNUMERIC) if emit_headers: writer.writerow(self.column_names) for row in self.rows: writer.writerow(row)