def ProcessApps(logger, processArray, folderlocs): ''' ProcessApps process all the applications that are turned on. ''' FileUtilities.CreateFolder(folderlocs["relativeOutputfolder"]) try: ab = ApplicationBase() ev = ab.LoadEnvironmentVariables(logger) if "tblEtl" in folderlocs: etlUtilities = EtlLoggingUtilities(logger) etlUtilities.awsParams = ev.awsParams etlUtilities.appschema = folderlocs["tblEtl"]["appschema"] etlUtilities.etlSchema = folderlocs["tblEtl"]["schemaName"] for proc in processArray: module = proc["module"] baseName = module.rsplit('.', 1)[1] logger.info(baseName + " - Starting module.") moduleName = importlib.import_module(module) className = getattr(moduleName, baseName)() className.Start(logger, baseName, folderlocs) # For single threading if "tblEtl" in folderlocs: procid = etlUtilities.GetRunID(folderlocs["tblEtl"]["table"], str(baseName)) if procid > -1: etlUtilities.CompleteInstance(folderlocs["tblEtl"]["table"], procid, 'C') except: logger.exception("Exception processing application modules!") raise logger.info(baseName + " - module COMPLETED.")
def Start(self, logger, moduleName, filelocs): ''' Start of routine ''' ApplicationBase.Start(self, logger, moduleName, filelocs) # At some point this will be part of Start ApplicationBase.ProcessInput(self, logger, moduleName, filelocs)
def Start(self, logger, moduleName, filelocs): ''' Start of routine ''' try: ApplicationBase.Start(self, logger, moduleName, filelocs) for sheet in self.job["ExcelSheets"]: self.DownloadAllFiles(sheet) self.ConvertExcel2Csv(sheet) self.SkipPackAndLoad(self.job["ExcelSheets"]) if "postETLQueries" in self.job: ApplicationBase.CreateTables(self, self.job["postETLQueries"]) except Exception: logger.exception(moduleName + " - Exception!") raise
def ProcessApps(logger, processArray, folderlocs): ''' ProcessApps process all the applications that are turned on. ''' FileUtilities.CreateFolder(folderlocs["relativeOutputfolder"]) ## # runs holds a list of the processes we are wanting to run ## runs = [] ### # if you have a new process make sure you add it here ### try: ab = ApplicationBase() ev = ab.LoadEnvironmentVariables(logger) if "tblEtl" in folderlocs: etlUtilities = EtlLoggingUtilities(logger) etlUtilities.awsParams = ev.awsParams etlUtilities.appschema = folderlocs["tblEtl"]["appschema"] etlUtilities.etlSchema = folderlocs["tblEtl"]["schemaName"] for proc in processArray: module = proc["module"] baseName = module.rsplit('.', 1)[1] logger.info(baseName + " - Starting module.") moduleName = importlib.import_module(module) className = getattr(moduleName, baseName)() #className.Start(logger, baseName, folderlocs) # For single threading # For multi-threading runs.append(Thread(name=baseName, target=className.Start, args=(logger, baseName, folderlocs))) for rn in runs: rn.start() for rn in runs: rn.join() if rn.is_alive() is False and "tblEtl" in folderlocs: procid = etlUtilities.GetRunID(folderlocs["tblEtl"]["table"], str(rn.name)) if procid > -1: etlUtilities.CompleteInstance(folderlocs["tblEtl"]["table"], procid, 'C') except: logger.exception("Exception processing application modules!") raise logger.info("All threads complete.")
def Start(self, logger, moduleName, filelocs): ''' Start of routine ''' try: ApplicationBase.Start(self, logger, moduleName, filelocs) for srcFileParameter in self.job["srcFileParameters"]: self.DownloadAllFiles(srcFileParameter) self.UnzipExcel(srcFileParameter) self.SkipPackAndLoad(self.job["srcFileParameters"]) if "postETLQueries" in self.job: ApplicationBase.CreateTables(self, self.job["postETLQueries"]) except Exception: logger.exception(moduleName + " - Exception!") raise
def AsyncBuildTables(self, proc): ''' build the tables in RedShift ''' try: ### ## first create the file so we can use it ### self.logger.info(self.moduleName + " - " + proc["processname"] + " - SQL tables starting.") sqlTableCreationScript = ApplicationBase.BuildTableCreationScriptTable( self, proc["sqlscript"], proc["processname"], templateFolder="sql", sqlFolder="sql") ### # now we create the table from the file created ### RedshiftUtilities.PSqlExecute(sqlTableCreationScript, self.logger) self.logger.info(self.moduleName + " - " + proc["processname"] + " - SQL tables created finished.") except: self.logger.exception(self.moduleName + "- we had an error in AsyncBuildCXR") raise
def AsyncBuildCXR(self, proc, basesql): ''' actually build the cross reference tables ''' try: self.logger.debug(self.moduleName + " -- " + "AsyncBuildCXR for " + proc["name"] + " starting ") ### ## first create the file so we can use it ### sqlTableCreationScript = ApplicationBase.BuildTableCreationScriptTable( self, basesql, proc["name"], templateFolder="sql", sqlFolder="sql") ### ## execute DDL so we now have the blank table ### RedshiftUtilities.PSqlExecute(sqlTableCreationScript, self.logger) self.logger.debug(self.moduleName + " -- " + "AsyncBuildCXR for " + proc["name"] + " finished ") except: self.logger.exception(self.moduleName + "- we had an error in AsyncBuildCXR") raise
def Start(self, logger, moduleName, filelocs): try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.fileUtilities.EmptyFolderContents( self.localTempDirectory + "/QC/") #delete and recreate the folder hqc = HindsightQC(self.logger, self.fileUtilities, self.bcpUtilities, self.localTempDirectory) hqc.Get_sql_server_rowcounts("pre") #Get pre-ETL rowcounts #Execute the pre-etl queries for sqlFile in self.job["bcpParameters"].get("preETLQueries"): RedshiftUtilities.PSqlExecute( self.fileUtilities.GetApplicationDirectory("Hindsight") + sqlFile, logger) for subJob in self.job["bcpParameters"]["subJobs"]: if subJob.get("destinationSchema") is None: subJob["destinationSchema"] = self.job["bcpParameters"][ "destinationSchema"] self.ProcessSubJob(subJob) #Get SQL Server rowcounts hqc.Get_sql_server_rowcounts("post") #Execute the post-etl queries to prepare the data post-ETL prior to loading into the production tables for sqlFile in self.job["bcpParameters"].get("postETLQueries"): RedshiftUtilities.PSqlExecute( self.fileUtilities.GetApplicationDirectory("Hindsight") + sqlFile, logger) #Get Redshift rowcounts hqc.Get_redshift_rowcounts("post") #Execute the post-etl qc queries status = hqc.ValidateETL() #Check whether the ETL passed the QC #Check 1: inter-version counts. Are the difference beyond a particular threshold #Check 2: pre-sql v/s post-redshift. Are the differences beyond a particular threshold #If the ETL doesn't pass the QC, do not update/insert the prod tables #If the ETL passed the QC, insert into production tables (data, attributes, history) if status == True: self.logger.info("ETL good to go") for sqlFile in self.job["bcpParameters"].get( "FinalLoadQueries"): #=========================================================== # add a process to backup data/attributes history tables # Download to S3 #=========================================================== RedshiftUtilities.PSqlExecute( self.fileUtilities.GetApplicationDirectory("Hindsight") + sqlFile, logger) else: self.logger.warning("Bad ETL. No go!") print hqc.TimeElaspsed() except: logger.exception(moduleName + " - Exception!") raise
def Start(self, logger, moduleName, filelocs): ''' main routine ''' try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.logger.info(self.moduleName + " - Processing: ") outputCSVfileName = self.localTempDirectory + '/PheonixDocuments.csv' self.logger.info(self.moduleName + " - Pull documents from Phoenix: ") jsonDocuments = self.PullDataFromPhoenix() self.logger.info(self.moduleName + " - save contents to CSV file from Phoenix: ") self.ExportToCSV(outputCSVfileName, jsonDocuments) self.logger.info(self.moduleName + " - push documents csv file to S3: ") bucketName, s3TempKey = S3Utilities.UploadFileToS3Temp(self.awsParams.s3, outputCSVfileName) self.logger.info(self.moduleName + " - Create document table: ") psConnect = self.GetPSConnection() self.CreatePostgresTables(psConnect) self.logger.info(self.moduleName + " - pull document s3 to database server temp: ") postgresTempFile = self.DownloadFromS3ToPSTempDir(psConnect, bucketName, s3TempKey) self.logger.info(self.moduleName + " - load documents csv file: ") self.LoadDataFromPostgresTempDir(psConnect, postgresTempFile) self.logger.info(self.moduleName + " - clean up temp file: ") S3Utilities.DeleteFile(self.awsParams.s3, bucketName, s3TempKey) except: logger.exception(moduleName + " - Exception in start!") raise
def Start(self, logger, moduleName, filelocs): ''' Start of routine ''' currProcId = None try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.logger.debug(self.moduleName + " -- " + " starting ") currProcId = self.etlUtilities.GetRunID( filelocs["tblEtl"]["table"], self.moduleName) ### # set up to run create folder ### # self.fileUtilities.moduleName = self.moduleName # self.fileUtilities.localBaseDirectory = self.localTempDirectory # self.fileUtilities.CreateFolders(self.job["folders"]) ### self.ProcessCategories() if self.job["cleanlocal"] == "Y": self.fileUtilities.RemoveFolder(self.localTempDirectory) self.logger.debug(self.moduleName + " -- " + " finished ") except Exception as err: self.logger.exception(moduleName + " - Exception! Error: " + err.message) if self.etlUtilities.CompleteInstance(filelocs["tblEtl"]["table"],\ currProcId, 'F') is not True: self.logger.info(self.moduleName + " - we could not Complete Instance.") raise Exception(err.message)
def Start(self, logger, moduleName, filelocs): currProcId = None try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.logger.debug(self.moduleName + " -- " + " starting ") currProcId = self.etlUtilities.GetRunID( filelocs["tblEtl"]["table"], self.moduleName) ### # set up to run create folder ### self.fileUtilities.moduleName = self.moduleName self.fileUtilities.localBaseDirectory = self.localTempDirectory self.fileUtilities.CreateFolders(self.job["folders"]) ### self.fromDate = self.GetFromDate() for databaseSettings in self.job["Databases"]: if databaseSettings["execute"] == 'Y': self.ProcessDatabase(databaseSettings) else: self.logger.debug(self.moduleName + " -- skip database " + databaseSettings["common"]["name"]) self.logger.debug(self.moduleName + " -- " + " finished ") except Exception as err: self.logger.exception(moduleName + " - Exception! Error: " + err.message) if self.etlUtilities.CompleteInstance(filelocs["tblEtl"]["table"],\ currProcId, 'F') is not True: self.logger.info(self.moduleName + " - we could not Complete Instance.") raise Exception(err.message)
def Start(self, logger, moduleName, filelocs): try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.csvFile = self.localTempDirectory + "/" + self.job[ "fileNameOut"] self.csvFileHistory = self.localTempDirectory + "/" + self.job[ "fileNameOutHistory"] self.GetAndTransform() self.UploadToS3() self.LoadAllFromS3( self.job["s3ToDirectory"] + '/' + self.job["fileNameOut"] + '.gz', self.job["tableName"]) self.LoadAllFromS3( self.job["s3ToDirectory"] + '/' + self.job["fileNameOutHistory"] + '.gz', self.job["tableName"] + '_history') self.LoadAllFromS3( self.job["xReference"]["s3DataDirectory"], self.job["tableName"] + self.job["xReference"]["tableNameSfx"]) self.ExecutePostETL() except Exception as err: self.logger.exception(moduleName + " - Exception! Error: " + err.message) raise Exception(err.message)
def Start(self, logger, moduleName, filelocs): currProcId = None try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.logger.debug(self.moduleName + " -- " + " starting ") currProcId = self.etlUtilities.GetRunID( filelocs["tblEtl"]["table"], self.moduleName) self.rawFolder = self.localTempDirectory + "/" + "Raw" self.csvFolder = self.localTempDirectory + "/" + "CSV" self.CheckWorkingFolders() self.BulkDownload() self.ProcessFiles() self.BulkUploadToS3() self.LoadAllFromS3() if self.job["cleanlocal"] == "Y": self.fileUtilities.RemoveFolder(self.localTempDirectory) self.logger.debug(self.moduleName + " -- " + " finished ") except Exception as err: self.logger.exception(moduleName + " - Exception! Error: " + err.message) if self.etlUtilities.CompleteInstance(filelocs["tblEtl"]["table"],\ currProcId, 'F') is not True: self.logger.info(self.moduleName + " - we could not Complete Instance.") raise Exception(err.message)
def Start(self, logger, moduleName, filelocs): ''' Start of routine ''' currProcId = None try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.logger.debug(self.moduleName + " -- " + " starting ") currProcId = self.etlUtilities.GetRunID( filelocs["tblEtl"]["table"], self.moduleName) for table in self.job["tables"]: self.ProcessTable(table) self.LoadDataFromAthenaIntoRedShiftLocalScripts(table) #self.LoadDataFromAthenaIntoRedShiftS3Scripts(table) # Test: Load all data from Athena if self.job["cleanlocal"] == "Y": self.fileUtilities.RemoveFolder(self.localTempDirectory) self.logger.debug(self.moduleName + " -- " + " finished ") except Exception as err: self.logger.exception(moduleName + " - Exception! Error: " + err.message) if self.etlUtilities.CompleteInstance(filelocs["tblEtl"]["table"],\ currProcId, 'F') is not True: self.logger.info(self.moduleName + " - we could not Complete Instance.") raise Exception(err.message)
def Start(self, logger, moduleName, filelocs): ''' main routine ''' try: ApplicationBase.Start(self, logger, moduleName, filelocs) s3Key = self.job["s3SrcDirectory"] + "/" + self.job["fileToLoad"] self.logger.info(self.moduleName + " - Processing file: " + s3Key) localFilepath = self.localTempDirectory + "/" + ntpath.basename( s3Key) S3Utilities.DownloadFileFromS3(self.awsParams.s3, self.job["bucketName"], s3Key, localFilepath) df = pd.read_excel(localFilepath, "Major Variables", index_col=None, na_values=['NaN'], skiprows=1, parse_cols="C:E,G:I", header=None) # Save the data as CSV outputCSVfileName = self.localTempDirectory + '/SampleData.csv' df.to_csv(outputCSVfileName, sep=str(self.job["delimiter"]), encoding='utf-8', index=False) # Update the CSV file into a temporary S3 location. Postgres will download it from there to its local directory bucketName, s3TempKey = S3Utilities.UploadFileToS3Temp( self.awsParams.s3, outputCSVfileName) psConnect = self.GetPSConnection() # Postgres tables are created using a connection (rather than psql) self.CreatePostgresTables(psConnect) postgresTempFile = self.DownloadFromS3ToPSTempDir( psConnect, bucketName, s3TempKey) self.LoadDataFromPostgresTempDir(psConnect, postgresTempFile) S3Utilities.DeleteFile(self.awsParams.s3, bucketName, s3TempKey) self.LoadBaseAttributes(psConnect) self.LoadBaseData(psConnect, '1000', 'glm_value') self.LoadBaseData(psConnect, '2000', 'arima_value') self.LoadBaseData(psConnect, '3000', 'lasso_value') # self.LoadBaseData(psConnect,'4000', 'nn_value') # self.LoadBaseData(psConnect,'5000', 'spectre_value') psConnect.close() self.logger.debug(" SampleData CSV loaded to RedShift") except: logger.exception(moduleName + " - Exception in start!") raise
def Start(self, logger, moduleName, filelocs): ''' main routine ''' currProcId = None try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.logger.debug(self.moduleName + " -- " + " starting ") currProcId = self.etlUtilities.GetRunID( filelocs["tblEtl"]["table"], self.moduleName) ### # set up to run create folder ### self.fileUtilities.moduleName = self.moduleName self.fileUtilities.localBaseDirectory = self.localTempDirectory self.fileUtilities.CreateFolders(self.job["folders"]) self.fromDate = self.GetFromDate() ### localFilepath = self.GetMostRecentFile(self.job["foldertoscan"]) # localFilepath = r'C:\tmp\IHS Markit Outlook for Global Oil Market Fundamentals - September 2017.xlsx' for tables in self.job["tables"]: fname = self.fileUtilities.CreateTableSql( tables, self.fileUtilities.sqlFolder) RedshiftUtilities.PSqlExecute(fname, self.logger) outPutFileName = self.fileUtilities.csvFolder +\ self.fromDate +\ "_" + tables["table"] + '.csv' outputGZ = self.fileUtilities.gzipFolder + self.fromDate +\ "_" + tables["table"] + '.csv.gz' tableJson = tables xl = ExcelUtilities(logger) if sys.version[0] == '3': csvfile = open(outPutFileName, 'w', newline='') elif sys.version[0] == '2': csvfile = open(outPutFileName, 'wb') csvWriter = csv.writer(csvfile, quoting=csv.QUOTE_ALL) if localFilepath is not None: self.ProcessFile(xl, localFilepath, csvWriter) csvfile.close() self.fileUtilities.GzipFile(outPutFileName, outputGZ) self.BulkUploadToS3() self.LoadData(tableJson) if self.job["cleanlocal"] == "Y": self.fileUtilities.RemoveFolder(self.localTempDirectory) self.logger.debug(self.moduleName + " -- " + " finished ") except Exception as err: self.logger.exception(moduleName + " - Exception! Error: " + err.message) if self.etlUtilities.CompleteInstance(filelocs["tblEtl"]["table"],\ currProcId, 'F') is not True: self.logger.info(self.moduleName + " - we could not Complete Instance.") raise Exception(err.message)
def Start(self, logger, moduleName, filelocs): ''' Start of routine ''' try: ApplicationBase.Start(self, logger, moduleName, filelocs) for srcFileParameter in self.job["srcFileParameters"]: self.ProcessS3File(srcFileParameter) except: logger.exception(moduleName + " - Exception!") raise
def Start(self, logger, moduleName, filelocs): ''' Starting point of this Project ''' try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.GetFileFromS3() Form1(logger, self.fileUtilities, self.localTempDirectory, self.job, self.awsParams) #invoke the Form1 handler except: self.logger.exception(moduleName + " - Exception!") raise
def Start(self, logger, moduleName, filelocs): ''' Application starting point ''' try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.DownloadFilesFromS3() ISO(logger, self.fileUtilities, self.localTempDirectory, self.job, self.awsParams) #invoke the ISO handler except Exception as ex: self.logger.exception(moduleName + " - Exception!") self.logger.exception("{}".format(str(ex))) raise
def Start(self, logger, moduleName, filelocs): ''' start it ''' ApplicationBase.Start(self, logger, moduleName, filelocs) seedVal = 100 logger.info("Starting count with seed %s in Counter" % (seedVal)) for i in range(seedVal): val = i + seedVal # logger.info("v value in Counter %s" % (str(v))) logger.info( "this is the starting seed %s and the end value was %s in Counter" % (seedVal, val))
def Start(self, logger, moduleName, filelocs): currProcId = None try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.logger.debug(self.moduleName + " -- " + " starting ") currProcId = self.etlUtilities.GetRunID( filelocs["tblEtl"]["table"], self.moduleName) paramsList = self.GetParamsList(filelocs["tblEtl"]["table"]) ### # set up to run create folder ### self.fileUtilities.moduleName = self.moduleName self.fileUtilities.localBaseDirectory = self.localTempDirectory self.fileUtilities.CreateFolders(self.job["folders"]) ### for tblJson in self.job["tables"]: fname = self.fileUtilities.CreateTableSql( tblJson, self.fileUtilities.sqlFolder) RedshiftUtilities.PSqlExecute(fname, self.logger) sqlPullDataScript, fromDate = self.CreatePullScript(paramsList) outputCSV = self.fileUtilities.csvFolder + fromDate + self.moduleName + ".CSV" outputGZ = self.fileUtilities.gzipFolder + fromDate + self.moduleName + '.csv.gz' self.BulkExtract(sqlPullDataScript, outputCSV) self.fileUtilities.GzipFile(outputCSV, outputGZ) self.BulkUploadToS3() for tblJson in self.job["tables"]: if "s3subfolder" in tblJson: self.LoadData(tblJson["s3subfolder"], tblJson) maxDate = self.GetMaxUpdateDate(tblJson) sMaxDate = maxDate["lastrun"].strftime('%m/%d/%Y') if self.etlUtilities.SetInstanceParameters(filelocs["tblEtl"]["table"],\ currProcId,\ json.dumps({"lastrun":sMaxDate})) is not True: self.logger.info(self.moduleName + " - we could not set the instance.") self.UpdateTable(filelocs["tblEtl"]["schemaName"], filelocs["tblEtl"]["table"], self.job["tables"], currProcId) if self.job["cleanlocal"] == "Y": self.fileUtilities.RemoveFolder(self.localTempDirectory) self.logger.debug(self.moduleName + " -- " + " finished ") except Exception as err: self.logger.exception(moduleName + " - Exception! Error: " + err.message) if self.etlUtilities.CompleteInstance(filelocs["tblEtl"]["table"],\ currProcId, 'F') is not True: self.logger.info(self.moduleName + " - we could not Complete Instance.") raise Exception(err.message)
def Start(self, logger, moduleName, filelocs): ''' main routine ''' currProcId = None try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.logger.debug(self.moduleName + " -- " + " starting ") currProcId = self.etlUtilities.GetRunID( filelocs["tblEtl"]["table"], self.moduleName) ### # establish connection to Access database ### conn = self.EstablishConnection() cur = conn.cursor() sqlline = self.FixSQLStatement() cur.execute(sqlline) outputfileName = self.localTempDirectory + '/ENPdata.csv' self.ConvertToCSV(cur, outputfileName) ### # load the CSV to RedShift ### self.logger.debug(self.moduleName + " - ENP load CSV to RedShift") rsConnect = self.etlUtilities.GetAWSConnection(self.awsParams) RedshiftUtilities.LoadFileIntoRedshift( rsConnect, self.awsParams.s3, self.logger, self.fileUtilities, outputfileName, self.job["destinationSchema"], self.job["tableName"], self.job["fileFormat"], self.job["dateFormat"], self.job["delimiter"]) self.logger.debug(self.moduleName + " - ENP CSV loaded to RedShift") # Cleanup rsConnect.close() cur.close() conn.close() if self.job["cleanlocal"] == "Y": self.fileUtilities.RemoveFolder(self.localTempDirectory) self.logger.debug(self.moduleName + " -- " + " finished ") except Exception as err: self.logger.exception(moduleName + " - Exception! Error: " + err.message) if self.etlUtilities.CompleteInstance(filelocs["tblEtl"]["table"],\ currProcId, 'F') is not True: self.logger.info(self.moduleName + " - we could not Complete Instance.") raise Exception(err.message)
def Start(self, logger, moduleName, filelocs): ''' Start of routine ''' try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.CreatePackedFolder() fileList = self.DownloadFiles() self.ProcessFiles(fileList) self.UploadPackedToS3() self.LoadTables() except Exception: logger.exception(moduleName + " - Exception!") raise
def Start(self, logger, moduleName, filelocs): ''' main routine starts here ''' try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.moduleName = moduleName self.CreateTables(self.job["tempTablesScript"]) self.SetLastLiquidsBalanceFileInfo() self.ProcessLiquidBalanceFile() self.CreateTables(self.job["unpivotScript"]) self.CreateTables(self.job["cleanTempTablesScript"]) except Exception as err: self.logger.error(self.moduleName + " - Exception in start.") raise err
def Start(self, logger, moduleName, filelocs): ''' Start of routine ''' try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.fileUtilities.EmptyFolderContents( self.localTempDirectory) #delete and recreate the folder self.fileUtilities.EmptyFolderContents( self.localTempDirectory + "/cleaned/") #delete and recreate the folder self.DownloadFiles() self.ProcessFiles() except Exception: logger.exception(moduleName + " - Exception!") raise
def Start(self, logger, moduleName, filelocs): ''' Starting point of this Project ''' try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.dbfUtilities = DBFUtilities(logger) self.CreateFolders() self.Process() self.UploadPackedToS3() self.LoadFilesIntoRedshift() self.EmptyPackedFolder() self.PostLoadETL() except: self.logger.exception(moduleName + " - Exception!") raise
def Start(self, logger, moduleName, filelocs): ''' Main starting routine ''' currProcId = None try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.logger.debug(self.moduleName + " -- " + " starting ") currProcId = self.etlUtilities.GetRunID( filelocs["tblEtl"]["table"], self.moduleName) ### # set up to run create folder ### self.fileUtilities.moduleName = self.moduleName self.fileUtilities.localBaseDirectory = self.localTempDirectory self.fileUtilities.CreateFolders(self.job["folders"]) ### for tblJson in self.job["tables"]: fname = self.fileUtilities.CreateTableSql( tblJson, self.fileUtilities.sqlFolder) RedshiftUtilities.PSqlExecute(fname, self.logger) if "s3subfolder" in tblJson: self.s3subFolder = tblJson["s3subfolder"] outputFileName = self.ProcessRequest() outputCSV = outputFileName outputGZ = self.fileUtilities.gzipFolder + self.moduleName + '.csv.gz' self.fileUtilities.GzipFile(outputCSV, outputGZ) self.BulkUploadToS3(self.s3subFolder) for tblJson in self.job["tables"]: if "s3subfolder" in tblJson: self.LoadData(tblJson["s3subfolder"], tblJson) self.UpdateTable(filelocs["tblEtl"]["schemaName"], filelocs["tblEtl"]["table"], self.job["tables"], currProcId) if self.job["cleanlocal"] == "Y": self.fileUtilities.RemoveFolder(self.localTempDirectory) self.logger.debug(self.moduleName + " -- " + " finished ") except Exception as err: self.logger.exception(moduleName + " - Exception! Error: " + err.message) if self.etlUtilities.CompleteInstance(filelocs["tblEtl"]["table"],\ currProcId, 'F') is not True: self.logger.info(self.moduleName + " - we could not Complete Instance.") raise Exception(err.message)
def Start(self, logger, moduleName, filelocs): ''' Start of routine ''' try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.packedFolder = self.localTempDirectory + "/packed/" self.CreateFolders() lastModifiedDatetime = self.GetLastModifiedDatetime(filelocs) maxModifiedDatetime = self.ProcessFiles(lastModifiedDatetime) self.UploadPackedToS3() self.LoadErcotTables() self.SetLastModifiedDatetime(filelocs, DatetimeUtilities.ConvertToSTR(maxModifiedDatetime)) self.EmptyPackedFolder() self.PostLoadETL() except Exception: logger.exception(moduleName + " - Exception!") raise
def Start(self, logger, moduleName, filelocs): ''' Start of routine ''' try: ApplicationBase.Start(self, logger, moduleName, filelocs) self.tempFolder = self.localTempDirectory + "/Temp" self.packedFolder = self.localTempDirectory + "/Packed" self.rawDataFolder = self.localTempDirectory + "/RawData" self.CleanWorkingFolders() self.SynchronizeSourceFolder() self.CleanUpAndPack() self.UploadPackedToS3() self.LoadAirMarketsTables() except: logger.exception(moduleName + " - Exception!") raise
def Start(self, logger, moduleName, filelocs): ''' Start of routine ''' currProcId = None try: ApplicationBase.Start(self, logger, moduleName, filelocs) currProcId = self.etlUtilities.GetRunID( filelocs["tblEtl"]["table"], self.moduleName) self.ProcessRequest() if self.job["cleanlocal"] == "Y": self.fileUtilities.RemoveFolder(self.localTempDirectory) except Exception as err: self.logger.exception(moduleName + " - Exception! Error: " + err.message) if self.etlUtilities.CompleteInstance(filelocs["tblEtl"]["table"],\ currProcId, 'F') is not True: self.logger.info(self.moduleName + " - we could not Complete Instance.") raise Exception(err.message)