def get_secret(): secret_name = "pocdbpw" region_name = "eu-west-1" # Create a Secrets Manager client session = boto3.session.Session() client = session.client(service_name='secretsmanager', region_name=region_name) get_secret_value_response = client.get_secret_value(SecretId=secret_name) if 'SecretString' in get_secret_value_response: secret = get_secret_value_response['SecretString'] else: secret = base64.b64decode(get_secret_value_response['SecretBinary']) secret = json.loads(secret) username = secret["username"] password = secret["password"] host = secret["host"] creds = [host, username, password, "poc_data"] logger.info("logger import worked!") return (creds)
def save_transaction(self, transformed_list): connection = self.get_connection() start = time.time() logger.info( f"The number of transactions processed:{len(transformed_list)}") print(f"The number of transactions processed:{len(transformed_list)}") index = 0 for t in transformed_list: args = t[0:8] sql_query = "INSERT INTO clean_transactions (id, date, transaction_time, location, firstname, lastname, total_price, method) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)" cursor = self.update_sql(sql_query, args, connection) if index % 10 == 0 and index != 0: t2 = time.time() current_load_time = t2 - start percentage = round(index * 100 / len(transformed_list), 2) total_time_estimate = current_load_time / percentage * 100 time_remaining = total_time_estimate - current_load_time print( f"Progress: {progress(percentage)} [{percentage}%] Estimated time remaining: {round(time_remaining,2)} seconds", end="\r") index += 1 print( f"Progress: {progress(100)} [100%] Load transactions complete! You are king d00p!", end="\r") connection.commit() cursor.close()
def get_connection(self): # function to get the connection string using: pymysql.connect(host, username, password, database) try: db_connection = pymysql.connect( environ.get("DB_HOST2"), # host environ.get("DB_USER2"), # username environ.get("DB_PW2"), # password environ.get("DB_NAME2") # database ) logger.info("Connection successful LOL") return db_connection except Exception as error: logger.critical(f"Connection failed lol {error}") print(f"didn't work lol {error}")
def save_drink_menu(self, drink_dict): connection = self.get_connection() logger.info( f"The number of unique drinks processed: {len(drink_dict)}") print(drink_dict) for drink_features, drink_id in drink_dict.items(): args = (drink_id, drink_features[0], drink_features[1], drink_features[2], drink_features[3]) sql_query = "INSERT INTO drink_menu (id, drink_name, drink_size, drink_flavour, price) VALUES (%s, %s, %s, %s, %s)" cursor = self.update_sql(sql_query, args, connection) connection.commit() try: cursor.close() except Exception as error: print("no new drinks lol")
def save_location_menu(self, new_locations): connection = self.get_connection() logger.info( f"The number of unique locations processed: {len(new_locations)}") for location, location_id in new_locations.items(): args = (location_id, location) sql_query = "INSERT INTO locations (id, location) VALUES (%s, %s)" try: cursor = self.update_sql(sql_query, args, connection) except Exception as error: logger.critical(f"DOOP! {error}") connection.commit() try: cursor.close() except Exception as error: print("no new locals lmao, where's ches??????")
def get_connection( self ): # function to get the connection string using: pymysql.connect(host, username, password, database) if environ.get("ENVIRONMENT") == "prod": host, username, password, db_name = get_secret()[0:5] else: host, username, password, db_name = environ.get( "DB_HOST2"), environ.get("DB_USER2"), environ.get( "DB_PW2"), environ.get("DB_NAME2") try: db_connection = pymysql.connect(host, username, password, db_name) print("Got connection") logger.info("Load connection successful LOL") return db_connection except Exception as error: logger.critical("Load connection failed LOL") print(f"didn't work lol {error}")
def run_etl(filename): logger.info("application ran") start = time.time() app = Extract() # Command to extract data from csv via s3 bucket: raw_data_list = app.get_data_from_bucket(filename) # Commands to load data from RDS: # raw_data_list = app.load_yesterdays_data() # extract output from yesterday # raw_data_list = app.load_all_data() # extract output from all time end_extract = time.time() extract_time = round(end_extract - start, 4) print(f"Extract time: {extract_time}") logger.info(f"Extract time: {extract_time}") apple = Transform() transformed_data, new_drinks, new_locations, basket = apple.transform( raw_data_list ) # raw data into transform returns transformed data and drinks dic # transformed_data, basket = apple.transform(raw_data_list) # raw data into transform returns transformed data and drinks dic end_transform = time.time() transform_time = round(end_transform - end_extract, 4) logger.info(f"Transform time: {transform_time}") print(f"Transform time: {transform_time}") appley = Load() appley.save_transaction( transformed_data) # populate RDS instance with cleaned data. appley.save_drink_menu(new_drinks) # generate drinks menu appley.save_location_menu(new_locations) # generate locations menu appley.save_basket(basket) # generate drinks menu end_load = time.time() load_time = round(end_load - end_transform, 4) logger.info(f"Loading time: {load_time}") total_time = extract_time + transform_time + load_time logger.info(f"total time: {total_time}") print(f"Load time: {load_time}\nTotal time: {total_time}")