def start(self): """ Override: Code called on initial start and subsequent restarts. Must set: - self.option = Option() - self.schedule = Schedule() Note: We don't load resources here; resources are loaded on demand. """ # load standard config config = ConfigSectionKey('../conf', '../local') self.config = config config.load('bootstrap.ini', 'bootstrap') config.load('init.ini') config.load('connect.ini') # load project specific config self.config.load(self.project_file) # load project specific options from optional project specific environ var environ_var = just_file_stem(self.project_file).lower() self.option = Option(environ_var, options=config('project').options) # load project specific schedule self.schedule = Schedule(config('schedule')) # diagnostics self.option.dump() self.config('project').dump(False) self.config('schedule').dump(False)
def setup(self): """Generic setup code.""" # load standard config config = ConfigSectionKey('../conf', '../local') self.config = config config.load('bootstrap.ini', 'bootstrap') config.load('init.ini') config.load('connect.ini') # load project specific config self.config.load(self.project_file) self.project = self.config('project') # load project specific options from optional project specific environ var environ_var = just_file_stem(self.project_file).lower() self.option = Option(environ_var, options=config('project').options) # load project namespace self.namespace = self.config('namespace') # load project specific schedule self.schedule = Schedule(config('schedule')) # job specific folders self.state_folder = f'{self.session_folder}/{self.namespace.dataset}/state' self.work_folder = f'{self.session_folder}/{self.namespace.dataset}/work' self.publish_folder = f'{self.session_folder}/{self.namespace.dataset}/publish'
def main(): # load standard config config = ConfigSectionKey('conf', 'local') config.load('bootstrap.ini', 'bootstrap') config.load('init.ini') config.load('connect.ini') setup(config)
def test(): # configuration driven support config = ConfigSectionKey("../conf", "../local") config = config config.load("bootstrap.ini", "bootstrap") config.load("init.ini") config.load("connect.ini") bs_test = BlobStore() resource = config("resource:bs_test_local") bs_test.create(resource) bs_test.connect(resource) bs_test.remove(resource) bs_test.create(resource) # # good things save_text("testfile-1.txt", "test file") delete_file("testfile-2.txt", ignore_errors=True) # expected Connection exception try: bs_test.put("testfile-1.txt", "downloads/testfile-1.txt") except ConnectionError as e: logger.info(f"Non-connected resource raised ConnectionError as expected: {e}") bs_test.connect(resource) assert bs_test.put("testfile-1.txt", "downloads/testfile-1.txt") assert bs_test.put("testfile-1.txt", "downloads/testfile-2.txt") assert bs_test.put("testfile-1.txt", "downloads/testfile-3.txt") assert bs_test.get("testfile-2.txt", "downloads/testfile-2.txt") downloads_folder_only = ["downloads"] downloads_folder_files = [ "downloads/testfile-1.txt", "downloads/testfile-2.txt", "downloads/testfile-3.txt", ] # assert bs_test.list() == downloads_folder_only # assert bs_test.list('*') == downloads_folder_only # assert bs_test.list('/') == downloads_folder_only # assert bs_test.list('/downloads') == downloads_folder_files # assert bs_test.list('downloads') == downloads_folder_files # assert bs_test.list('downloads/') == downloads_folder_files bs_test.list("downloads") bs_test.list("downloads/") bs_test.list("downloads/*") bs_test.delete("downloads/testfile-1.txt") bs_test.list("downloads/*") # bad things assert not bs_test.list("bad-path*") assert not bs_test.put("bad-file-1.txt", "downloads/bad-file.txt") assert not bs_test.get("bad-file-2.txt", "downloads/bad-file.txt") assert not bs_test.delete("downloads/bad-file.txt") bs_test.clear()
def main(): from config import ConfigSectionKey # test data config = ConfigSectionKey('conf', 'local') config.load('bootstrap.ini', 'bootstrap') config.load('init.ini') config.load('connect.ini') # for testing purposes: # - test with both cloud connection values (*capture and *archive) # - these connections have different permissions and will yield different results # cloud_connection_name = 'cloud:amc_aws_capture_01_etl' cloud_connection_name = 'cloud:udp_aws_archive_01_etl' cloud = config(cloud_connection_name) capture_objectstore_name = cloud.capture_objectstore capture_queue_name = cloud.capture_queue cloud.dump() # create test files (must have *.zip extension for S3:SQS notification) test_folder = 'test_folder_1' test_file_1 = f'{test_folder}/test1.zip' test_file_2 = f'{test_folder}/test2.zip' save_text(f'Test @{now()}', test_file_1) # object store put, get, delete objectstore = Objectstore(capture_objectstore_name, cloud) objectstore.put(test_file_1, 'test/test1.zip') objectstore.get(test_file_2, 'test/test1.zip') objectstore.delete('test/test1.zip') # sleep for 3 seconds to give notification message time to post to queue time.sleep(3) # queue get, remove queue = Queue(capture_queue_name, cloud) queue.put('Test message 1') time.sleep(2) queue.put('Test message 2') time.sleep(2) while True: time.sleep(1) response = queue.get() notification = ObjectstoreNotification(response) queue.delete(notification.message_id) if notification.message_id: logger.info(f'Test mode: notification message = {notification}') else: break # debugging info logger.info(f'Available queues: {queue._list_queue_names()}') queue._dump()
def test(): # activate logging log_setup() log_session_info() # load standard config config = ConfigSectionKey("../conf", "../local") config.load("bootstrap.ini", "bootstrap") config.load("init.ini") config.load("connect.ini") setup(config)
def setup(self): """Generic setup code.""" # load standard config self.config = ConfigSectionKey('../conf', '../local') self.config.load('bootstrap.ini', 'bootstrap') self.config.load('init.ini') self.config.load('connect.ini') # load utility specific options using # env variable = UDP_<SCRIPT-NAME>; Option() retrieves command line options self.option = Option(f'udp_{script_name()}') # create/clear work folder self.work_folder = f'{self.session_folder}/{script_name()}/work' create_folder(self.work_folder) # display application banner # TODO: This should be a banner method() print(f'UDP {script_name()} utility') print(f'Alterra Unified Data Platform') copyright_year = f'{now():%Y}' copyright_message = f'Copyright (c) 2018-{copyright_year} Alterra Mountain Company, Inc.' print(copyright_message)
def __call__(self, ad, section_name): class_names = 'BlobStore KeyVault StorageQueue' config = ConfigSectionKey('../src') config.load('cloud.ini') resource = config(section_name) print(resource) resource_type = propercase_by_example(resource.resource_type, class_names) resource_class = f'{resource_type}{resource.platform.title()}' obj = eval(resource_class)(resource) # verify ad.identity has been authenticated if ad.identity is None: logger.warning('Identity not authenticated') raise ResourceException('Identity not authenticated') # verify access to resource if ad.has_access(obj): return obj else: logger.warning( f'Identity {ad.identity_name} not authorized for {resource.name}' ) return None
def main(): # initialize the dict that will hold all config objects config_list = [] db_list = [] # Detect and load audit config files into a dict of config objects # ToDo: Additional logic needed to include project files in local dir for project_file in sorted(pathlib.Path('../conf/').glob('project_capture*')): config = ConfigSectionKey('../conf', '../local') config.load('connect.ini') config.load(project_file) # conn_config drills through the project file to the connect.ini and returns a DatabaseSection object conn_config = config(config('project').database_source) if conn_config.platform == 'mssql': db_conn = MSSQL(conn_config) db = DatabaseAudit(f'{conn_config.platform}_audit', db_conn.conn) db.schema = conn_config.schema elif conn_config.platform == 'postgresql': db_conn = PostgreSQL(conn_config) db = DatabaseAudit(f'{conn_config.platform}_audit', db_conn.conn) db.schema = conn_config.schema else: print("platform not found. Config file's incorrectly set up") # add db and config objects to respective lists based on project_capture_* files config_list.append(config) db_list.append(db) # add target_db to the db_list[] target_conn_config = config('database:amc_dsg_udp_01_stage_dev') target_db_conn = MSSQL(target_conn_config) target_db = DatabaseAudit('mssql_audit', target_db_conn.conn) target_db.use_database('udp_stage_dev') db_list.append(target_db) # Excel Logic environment = 'dev' time = datetime.datetime.now() file_name = f'''..\output\Audit_Results_{environment}_{time:%Y-%m-%d}.xlsx''' # create workbook and worksheets workbook1 = xlsxwriter.Workbook(file_name) worksheet1 = workbook1.add_worksheet('Table Overview') worksheet2 = workbook1.add_worksheet('Column Overview') worksheet3 = workbook1.add_worksheet('Column Detail') # Start the magic iterate_row_count(config_list, db_list, workbook1, worksheet1) # iterate_null_columns(config_list, db_list, workbook1, worksheet2) # iterate_column_min_max(config_list, db_list, workbook1, worksheet3) # start it up workbook1.close() os.startfile(file_name)
def main(): from config import ConfigSectionKey, ConfigSection # load cloud specific values config = ConfigSectionKey('conf', 'local') # config.load('connect.ini') config.load_file('../conf/azure.ini') cloud = config('cloud') # Load cloud test specific values cloud_config = ConfigSection('conf', 'local') cloud_config.load_file('../conf/cloud.cfg') # test_queue_message = cloud_config('test_queue_message_1') """objectstore logic""" #objectstore = Objectstore(cloud.account_id, cloud) #objectstore.put('C:/test/test.txt', 'test.txt') # objectstore.get('../tmp/readwrite_downloaded.txt', 'readwrite.txt') # objectstore.get('C:/udp-app-master/dev/tests/working', 'test.txt') # objectstore.delete('test.txt') # file_check = filecmp.cmp(f1='C:/test/test.txt', f2='C:/test/get/test.txt', shallow=False) # print(file_check) """Queue logic""" queue = Queue(cloud.account_id, cloud) # queue_names = queue._list_queue_names() # print(queue_names) # IMPORTANT: The Queue delete method parameter should now be an # ObjectstoreNotification object instead of just the message_id of the object. #queue.delete(notification) # queue.encode_function = QueueMessageFormat.text_base64encode queue_message = '{"Message":"Hello World"}' # encoded_message = str(base64.b64encode(queue_message.encode('utf-8'))) queue_message = { "topic": "test_queue_message", "subject": "/This/Is/A/Test/Message/TestMessage", "eventType": "test_queue_message", "eventTime": "", "id": "", "data":{ "api": "", "clientRequestId": "", "requestId": "", "eTag": "", "contentType": "", "contentLength": 0, "blobType": "", "url": "", "sequencer": "", "storageDiagnostics": { "batchId": "" } }, "dataVersion": "", "metadataVersion": "1" } json_queue_message = json.dumps(queue_message) # response = queue.get() # notification = ObjectstoreNotification(response) queue.put(json_queue_message) response = queue.get() notification = ObjectstoreNotification(response) queue.delete(notification) """
def set_up_cloud(): config = ConfigSectionKey('conf', 'local') config.load_file('../conf/azure.ini') cloud_connection_name = 'cloud' cloud = config(cloud_connection_name) return cloud
def main(): # Excel Logic time = datetime.datetime.now() file_name = f'''..\output\Connection_Results_{time:%Y-%m-%d}.xlsx''' # create workbook and worksheets workbook = xlsxwriter.Workbook(file_name) worksheet1 = workbook.add_worksheet('Table Overview') # Workbook Formats header_format = workbook.add_format({'bold': True, 'underline': True}) failure_format = workbook.add_format({'font_color': 'red', 'bold': True}) success_format = workbook.add_format({'font_color': 'green', 'bold': True}) undefined_format = workbook.add_format({ 'font_color': 'orange', 'bold': True }) # Write Headers worksheet1.write(0, 0, 'Database', header_format) worksheet1.write(0, 1, 'Connection Result', header_format) # Config logic config = ConfigSectionKey('../conf', '../local') config.load('connect.ini') # set y index row_index = 1 for database in sorted( (db for db in config.sections if 'database:' in db)): db_name = database.partition(':')[2] db_config = config(database) worksheet1.write(row_index, 0, db_name) if not db_config.database: worksheet1.write(row_index, 1, 'Connection Undefined', undefined_format) if db_config.platform == 'mssql': try: db_conn = MSSQL(db_config) print(f'{db_name}: Success') worksheet1.write(row_index, 1, 'Success', success_format) except Exception as e: print(f'{db_name}: Failed') worksheet1.write(row_index, 1, 'Failed', failure_format) worksheet1.write(row_index, 2, str(e)) elif db_config.platform == 'postgresql': try: db_conn = PostgreSQL(db_config) print(f'{database}: Success') worksheet1.write(row_index, 1, 'Success', success_format) except Exception as e: print(f'{database}: Failed') worksheet1.write(row_index, 1, 'Failed', failure_format) worksheet1.write(row_index, 2, str(e)) row_index += 1 # start it up workbook.close() file_full_path = full_path(file_name) os.startfile(file_full_path)
def main(): config = ConfigSectionKey('conf', 'local') config.load('bootstrap.ini', 'bootstrap') config.load('init.ini') config.load('connect.ini') # SQL Server mssql_connection = config('database:udp_aws_stage_01_datalake') db = MSSQL(mssql_connection) # cursor = conn.cursor() # cursor.execute('select top 10 * from udp_stage.amc_heroku_amp_01_sales.addresses;') # rows = cursor.fetchall() # for row in rows: # print(row) # print() database = Database('mssql', db.conn) print(f'SQL Server current timestamp = {database.current_timestamp()}') database.create_database('test123') database.use_database('test123') database.create_schema('test_schema') # # test creating udp_admin tables via create_named_table # database.use_database('udp_stage') # database.create_schema('udp_admin') # table = database.select_table_schema('dbo', 'dimproduct') # database.create_table_from_table_schema('udp_admin', 'dimproduct2', table) # database.create_named_table('udp_admin', 'nst_lookup') # database.create_named_table('udp_admin', 'job_log') # database.create_named_table('udp_admin', 'table_log') # FUTURE: # extend create tables with custom udp* columns defined as a block # capture: udp_jobid, udp_timestamp, udp_rowversion # stage:. udp_pk (identity for non-temp), udp_nk (based on primary key), udp_nstpk, udp_jobpk, # TODO: Extra columns for #target (no identity) are slightly different than target (pk default identity). """ # test staging management of arriving archived capture files database.use_database('udp_stage') database.create_schema('udp_catalog') database.drop_table('udp_catalog', 'stage_arrival_queue') database.drop_table('udp_catalog', 'stage_pending_queue') database.create_named_table('udp_catalog', 'stage_arrival_queue') database.create_named_table('udp_catalog', 'stage_pending_queue') new_file = dict(archive_file_name='amc_heroku_amp_01_sales-1.zip', job_id=1) database.insert_into_table('udp_catalog', 'stage_arrival_queue', **new_file) new_file = dict(archive_file_name='amc_heroku_amp_01_sales-2.zip', job_id=2) database.insert_into_table('udp_catalog', 'stage_arrival_queue', **new_file) new_file = dict(archive_file_name='amc_heroku_amp_01_sales-3.zip', job_id=3) database.insert_into_table('udp_catalog', 'stage_arrival_queue', **new_file) # new_file = dict(archive_file_name='amc_heroku_amp_01_sales-1.zip') # database.insert_into_table('udp_catalog', 'stage_pending_queue', **new_file) # any new arrivals that we can process? job_id=1 or next job in sequence? cursor = database.execute('select_from_stage_arrival_queue') row = cursor.fetchone() if row: # get object_key we should fetch for staging print(f'Found next file to stage: {row}') archive_file_name = row.archive_file_name job_id = int(archive_file_name.split('.')[0].rsplit('-', 1)[-1]) namespace = archive_file_name.rsplit('-', 1)[0] object_key = f'{namespace}/{archive_file_name}' print(f'fetch from archives: {object_key}\n') # remove the file from both the arrival and pending queues database.execute('delete_from_stage_arrival_queue', archive_file_name) database.execute('delete_from_stage_pending_queue', archive_file_name) # post the next file in sequence for namespace to pending queue next_archive_file_name = f'{namespace}-{job_id+1}.zip' next_file = dict(archive_file_name=next_archive_file_name) database.insert_into_table('udp_catalog', 'stage_pending_queue', **next_file) """ """ # Ideas: # [create_named_table_udp_catalog_stage_queue] # [insert_into_named_table_udp_catalog_stage_queue] # [select_from_named_table_udp_catalog_stage_queue] # [update_named_table_udp_catalog_stage_queue] # namespace, job_id, is_last_staged, is_pending, queued_timestamp # archive processes capture file: insert namespace, jobid, 0, 1, now() into stage_queue # stage poll - select new jobs in sequence # update # table.postgresql_to_mssql() # table.mssql_to_mssql() """ # PostgreSQL postgresql_connection = config('database_amc_heroku_amp_01_sales_prod') db = PostgreSQL(postgresql_connection) # cursor = conn.cursor() # cursor.execute('select * from guests limit 10;') # for row in cursor.fetchall(): # print(row.email, row) database = Database('postgresql', db.conn) # print(f'PostgreSQL current timestamp = {database.current_timestamp()}') table_name = 'carts' table_1 = database.select_table_schema('public', table_name) output_stream = open(f'{table_name}.schema', 'wb') pickle.dump(table_1, output_stream) output_stream.close() input_stream = open(f'{table_name}.schema', 'rb') table_2 = pickle.load(input_stream) input_stream.close() for column in table_2.columns: print(table_2.columns[column]) pass
def main(): config = ConfigSectionKey("../conf", "../local") config.load("project_capture_amc_rtp_sales.ini") for table_name in config.keys("table:*"): table = config(table_name) table.dump(dump_blank_values=False)
class Utility: def __init__(self): """Generic initialization code.""" # session folder (create if missing) self.session_folder = '../sessions' create_folder(self.session_folder) # log folder (create if missing) self.log_folder = f'{self.session_folder}/logs' create_folder(self.log_folder) # work folder (create and clear) self.work_folder = f'{self.session_folder}/{script_name()}/work' clear_folder(self.work_folder) # configuration engines self.config = None self.option = None # database self.database = None self.target_db_conn = None # parameter driven self.dataset_id = '' self.table_name = '' # since we start logging before we read config/options we log to known path vs dataset specific path log_setup(log_file_name=f'{self.log_folder}/{script_name()}.log') log_session_info() def run(self): """Generic workflow.""" self.setup() self.main() self.cleanup() def setup(self): """Generic setup code.""" # load standard config self.config = ConfigSectionKey('../conf', '../local') self.config.load('bootstrap.ini', 'bootstrap') self.config.load('init.ini') self.config.load('connect.ini') # load utility specific options using # env variable = UDP_<SCRIPT-NAME>; Option() retrieves command line options self.option = Option(f'udp_{script_name()}') # create/clear work folder self.work_folder = f'{self.session_folder}/{script_name()}/work' create_folder(self.work_folder) # display application banner # TODO: This should be a banner method() print(f'UDP {script_name()} utility') print(f'Alterra Unified Data Platform') copyright_year = f'{now():%Y}' copyright_message = f'Copyright (c) 2018-{copyright_year} Alterra Mountain Company, Inc.' print(copyright_message) def main(self): """Subclass with application specific logic.""" pass def cleanup(self): """Subclass with application specific logic.""" pass
def main(): sdlc = 'dev' # ref_file_list will hold all paths to the reference definition excel files ref_wb_dict = {} # masterdata_directory_list = ['access', 'common', 'reference'] # Detect and load audit reference definition files into a list # ToDo: make glob case insensitive for masterdata_directory in masterdata_directory_list: for ref_file in sorted( pathlib.Path(f'../ref_docs/{masterdata_directory}/').glob( '*.xlsx')): ref_wb = openpyxl.load_workbook(ref_file, data_only=True) ref_table_name = just_file_stem(str(ref_file)) # Add file name (Table Name) as key. Add workbook object as value. ref_wb_dict.update([(f'{masterdata_directory}.{ref_table_name}', ref_wb)]) # Delete all output files so new ones can be generated for output_file in sorted( pathlib.Path('../ref_docs/ddl_output/').glob('*.sql')): os.remove(output_file) config = ConfigSectionKey('../conf', '../local') config.load('connect.ini') # ToDo: Fill in uat and prod connection names when added to connect.ini if sdlc == 'dev': connection_name = 'database:amc_dsg_udp_01_stage_dev' elif sdlc == 'uat': connection_name = 'unknown connection point' elif sdlc == 'prod': connection_name = 'database:amc_dsg_udp_01_stage_prod' else: connection_name = 'unknown connection point' udp_conn_config = config(connection_name) udp_conn_config = MSSQL(udp_conn_config) udp_db = DatabaseExcel('mssql_excel_upload', udp_conn_config.conn) for key, value in ref_wb_dict.items(): # Instantiate DatabaseExcel object using mssql_excel_upload.cfg as platform and udp_conn_config sql_file = open(f"../ref_docs/ddl_output/{key}.sql", "x", encoding='utf8') sql_use_statement = udp_db.use_database_sql(f'udp_masterdata_{sdlc}') sql_drop_table = udp_db.drop_table_sql( key.split('.')[0], key.split('.')[1]) sql_create_schema = udp_db.create_schema_sql(key.split('.')[0]) # sql_create_table = udp_db.create_table_sql(schema_name='udp_ref', table_name=key, workbook=value) sql_create_table = udp_db.create_table_sql_v2( schema_name=key.split('.')[0], table_name=key.split('.')[1], worksheet=value.worksheets[0]) sql_file.write(sql_use_statement) # sql_file.write('\n begin transaction \n') sql_file.write(sql_create_schema) sql_file.write(sql_drop_table) sql_file.write(sql_create_table) # print(sql_use_statement) # udp_db.direct_execute(sql_use_statement) # print(sql_create_schema) # udp_db.direct_execute(sql_create_schema) # print(sql_drop_table) # udp_db.direct_execute(sql_drop_table) # print(sql_create_table) # udp_db.direct_execute(sql_create_table) # udp_db.direct_execute('commit') for sheet in [ x for x in value.worksheets if x.title.lower() not in ('documentation', 'change log', 'changelog') ]: sql_insert_values = udp_db.insert_into_table_sql( schema_name=key.split('.')[0], table_name=key.split('.')[1], worksheet=sheet) sql_file.write(sql_insert_values) # print(sql_insert_values) # udp_db.direct_execute(sql_insert_values) # sql_file.write('\n end transaction \n') # sql_file.write('\n commit \n') # Clear all err_files for err_file in sorted(pathlib.Path(f'../ref_docs/log/').glob('*_error*')): os.remove(err_file) # Publish directly to udp_reference_<SLDC> for ddl_file in sorted( pathlib.Path(f'../ref_docs/ddl_output/').glob('*.sql')): print(f'executing {ddl_file}') ddl_sql = open(ddl_file, mode='r', encoding='utf8').read() try: # print(f'SQL Code: \n {ddl_sql}') udp_db.direct_execute(ddl_sql) udp_db.direct_execute('\n commit') print('execution successful!') except Exception as e: err_sql_file = open(f'../ref_docs/log/{ddl_file.stem}_error.sql', 'x', encoding='utf8') err_log_file = open(f'../ref_docs/log/{ddl_file.stem}_error.log', 'x', encoding='utf8') err_sql_file.write(ddl_sql) err_log_file.write(str(e)) err_sql_file.close() err_log_file.close()