Beispiel #1
0
def ProcessApps(logger, processArray, folderlocs):
    '''
    ProcessApps process all the applications that are turned on.
    '''
    FileUtilities.CreateFolder(folderlocs["relativeOutputfolder"])
    try:
        ab = ApplicationBase()
        ev = ab.LoadEnvironmentVariables(logger)
        if "tblEtl" in folderlocs:
            etlUtilities = EtlLoggingUtilities(logger)
            etlUtilities.awsParams = ev.awsParams
            etlUtilities.appschema = folderlocs["tblEtl"]["appschema"]
            etlUtilities.etlSchema = folderlocs["tblEtl"]["schemaName"]

        for proc in processArray:
            module = proc["module"]
            baseName = module.rsplit('.', 1)[1]

            logger.info(baseName + " - Starting module.")
            moduleName = importlib.import_module(module)
            className = getattr(moduleName, baseName)()
            className.Start(logger, baseName, folderlocs) # For single threading

            if "tblEtl" in folderlocs:
                procid = etlUtilities.GetRunID(folderlocs["tblEtl"]["table"], str(baseName))
                if procid > -1:
                    etlUtilities.CompleteInstance(folderlocs["tblEtl"]["table"], procid, 'C')

    except:
        logger.exception("Exception processing application modules!")
        raise
    logger.info(baseName + " - module COMPLETED.")
Beispiel #2
0
 def Start(self, logger, moduleName, filelocs):
     '''
     Start of routine
     '''
     ApplicationBase.Start(self, logger, moduleName, filelocs)
     # At some point this will be part of Start
     ApplicationBase.ProcessInput(self, logger, moduleName, filelocs)
Beispiel #3
0
 def Start(self, logger, moduleName, filelocs):
     '''
     Start of routine
     '''
     try:
         ApplicationBase.Start(self, logger, moduleName, filelocs)
         for sheet in self.job["ExcelSheets"]:
             self.DownloadAllFiles(sheet)
             self.ConvertExcel2Csv(sheet)
         self.SkipPackAndLoad(self.job["ExcelSheets"])
         if "postETLQueries" in self.job:
             ApplicationBase.CreateTables(self, self.job["postETLQueries"])
     except Exception:
         logger.exception(moduleName + " - Exception!")
         raise
Beispiel #4
0
def ProcessApps(logger, processArray, folderlocs):
    '''
    ProcessApps process all the applications that are turned on.
    '''

    FileUtilities.CreateFolder(folderlocs["relativeOutputfolder"])
##
#  runs holds a list of the processes we are wanting to run
##
    runs = []
###
#  if you have a new process make sure you add it here
###
    try:
        ab = ApplicationBase()
        ev = ab.LoadEnvironmentVariables(logger)
        if "tblEtl" in folderlocs:
            etlUtilities = EtlLoggingUtilities(logger)
            etlUtilities.awsParams = ev.awsParams
            etlUtilities.appschema = folderlocs["tblEtl"]["appschema"]
            etlUtilities.etlSchema = folderlocs["tblEtl"]["schemaName"]
            
        for proc in processArray:
            module = proc["module"]
            baseName = module.rsplit('.', 1)[1]

            logger.info(baseName + " - Starting module.")
            moduleName = importlib.import_module(module)
            className = getattr(moduleName, baseName)()

            #className.Start(logger, baseName, folderlocs) # For single threading
            # For multi-threading
            runs.append(Thread(name=baseName, target=className.Start, args=(logger, baseName, folderlocs)))

        for rn in runs:
            rn.start()

        for rn in runs:
            rn.join()
            if rn.is_alive() is False and "tblEtl" in folderlocs:
                procid = etlUtilities.GetRunID(folderlocs["tblEtl"]["table"], str(rn.name))
                if procid > -1:
                    etlUtilities.CompleteInstance(folderlocs["tblEtl"]["table"], procid, 'C')

    except:
        logger.exception("Exception processing application modules!")
        raise
    logger.info("All threads complete.")
Beispiel #5
0
    def Start(self, logger, moduleName, filelocs):
        '''
        Start of routine
        '''
        try:
            ApplicationBase.Start(self, logger, moduleName, filelocs)
            for srcFileParameter in self.job["srcFileParameters"]:
                self.DownloadAllFiles(srcFileParameter)
                self.UnzipExcel(srcFileParameter)
            self.SkipPackAndLoad(self.job["srcFileParameters"])
            if "postETLQueries" in self.job:
                ApplicationBase.CreateTables(self, self.job["postETLQueries"])

        except Exception:
            logger.exception(moduleName + " - Exception!")
            raise
Beispiel #6
0
 def AsyncBuildTables(self, proc):
     '''
     build the tables in RedShift
     '''
     try:
         ###
         ##  first create the file so we can use it
         ###
         self.logger.info(self.moduleName + " - " + proc["processname"] +
                          " - SQL tables starting.")
         sqlTableCreationScript = ApplicationBase.BuildTableCreationScriptTable(
             self,
             proc["sqlscript"],
             proc["processname"],
             templateFolder="sql",
             sqlFolder="sql")
         ###
         #  now we create the table from the file created
         ###
         RedshiftUtilities.PSqlExecute(sqlTableCreationScript, self.logger)
         self.logger.info(self.moduleName + " - " + proc["processname"] +
                          " - SQL tables created finished.")
     except:
         self.logger.exception(self.moduleName +
                               "- we had an error in AsyncBuildCXR")
         raise
Beispiel #7
0
 def AsyncBuildCXR(self, proc, basesql):
     '''
     actually build the cross reference tables
     '''
     try:
         self.logger.debug(self.moduleName + " -- " + "AsyncBuildCXR for " +
                           proc["name"] + " starting ")
         ###
         ##  first create the file so we can use it
         ###
         sqlTableCreationScript = ApplicationBase.BuildTableCreationScriptTable(
             self,
             basesql,
             proc["name"],
             templateFolder="sql",
             sqlFolder="sql")
         ###
         ##  execute DDL so we now have the blank table
         ###
         RedshiftUtilities.PSqlExecute(sqlTableCreationScript, self.logger)
         self.logger.debug(self.moduleName + " -- " + "AsyncBuildCXR for " +
                           proc["name"] + " finished ")
     except:
         self.logger.exception(self.moduleName +
                               "- we had an error in AsyncBuildCXR")
         raise
Beispiel #8
0
    def Start(self, logger, moduleName, filelocs):
        try:
            ApplicationBase.Start(self, logger, moduleName, filelocs)
            self.fileUtilities.EmptyFolderContents(
                self.localTempDirectory +
                "/QC/")  #delete and recreate the folder
            hqc = HindsightQC(self.logger, self.fileUtilities,
                              self.bcpUtilities, self.localTempDirectory)
            hqc.Get_sql_server_rowcounts("pre")  #Get pre-ETL rowcounts

            #Execute the pre-etl queries
            for sqlFile in self.job["bcpParameters"].get("preETLQueries"):
                RedshiftUtilities.PSqlExecute(
                    self.fileUtilities.GetApplicationDirectory("Hindsight") +
                    sqlFile, logger)

            for subJob in self.job["bcpParameters"]["subJobs"]:
                if subJob.get("destinationSchema") is None:
                    subJob["destinationSchema"] = self.job["bcpParameters"][
                        "destinationSchema"]
                    self.ProcessSubJob(subJob)

            #Get SQL Server rowcounts
            hqc.Get_sql_server_rowcounts("post")

            #Execute the post-etl queries to prepare the data post-ETL prior to loading into the production tables
            for sqlFile in self.job["bcpParameters"].get("postETLQueries"):
                RedshiftUtilities.PSqlExecute(
                    self.fileUtilities.GetApplicationDirectory("Hindsight") +
                    sqlFile, logger)

            #Get Redshift rowcounts
            hqc.Get_redshift_rowcounts("post")

            #Execute the post-etl qc queries
            status = hqc.ValidateETL()

            #Check whether the ETL passed the QC
            #Check 1: inter-version counts. Are the difference beyond a particular threshold
            #Check 2: pre-sql v/s post-redshift. Are the differences beyond a particular threshold
            #If the ETL doesn't pass the QC, do not update/insert the prod tables
            #If the ETL passed the QC, insert into production tables (data, attributes, history)
            if status == True:
                self.logger.info("ETL good to go")
                for sqlFile in self.job["bcpParameters"].get(
                        "FinalLoadQueries"):
                    #===========================================================
                    # add a process to backup data/attributes history tables
                    # Download to S3
                    #===========================================================
                    RedshiftUtilities.PSqlExecute(
                        self.fileUtilities.GetApplicationDirectory("Hindsight")
                        + sqlFile, logger)
            else:
                self.logger.warning("Bad ETL. No go!")

            print hqc.TimeElaspsed()
        except:
            logger.exception(moduleName + " - Exception!")
            raise
Beispiel #9
0
    def Start(self, logger, moduleName, filelocs):
        '''
        main routine
        '''
        try:
            ApplicationBase.Start(self, logger, moduleName, filelocs)
            self.logger.info(self.moduleName + " - Processing: ")
            outputCSVfileName = self.localTempDirectory + '/PheonixDocuments.csv'

            self.logger.info(self.moduleName + " - Pull documents from Phoenix: ")
            jsonDocuments = self.PullDataFromPhoenix()
            self.logger.info(self.moduleName + " - save contents to CSV file from Phoenix: ")
            self.ExportToCSV(outputCSVfileName, jsonDocuments)
            self.logger.info(self.moduleName + " - push documents csv file to S3: ")
            bucketName, s3TempKey = S3Utilities.UploadFileToS3Temp(self.awsParams.s3, outputCSVfileName)

            self.logger.info(self.moduleName + " - Create document table: ")
            psConnect = self.GetPSConnection()
            self.CreatePostgresTables(psConnect)

            self.logger.info(self.moduleName + " - pull document s3 to database server temp: ")
            postgresTempFile = self.DownloadFromS3ToPSTempDir(psConnect, bucketName, s3TempKey)
            self.logger.info(self.moduleName + " - load documents csv file: ")
            self.LoadDataFromPostgresTempDir(psConnect, postgresTempFile)
            self.logger.info(self.moduleName + " - clean up temp file: ")
            S3Utilities.DeleteFile(self.awsParams.s3, bucketName, s3TempKey)
        except:
            logger.exception(moduleName + " - Exception in start!")
            raise
Beispiel #10
0
    def Start(self, logger, moduleName, filelocs):
        '''
        Start of routine
        '''
        currProcId = None
        try:
            ApplicationBase.Start(self, logger, moduleName, filelocs)
            self.logger.debug(self.moduleName + " -- " + " starting ")
            currProcId = self.etlUtilities.GetRunID(
                filelocs["tblEtl"]["table"], self.moduleName)
            ###
            #  set up to run create folder
            ###
            #            self.fileUtilities.moduleName = self.moduleName
            #            self.fileUtilities.localBaseDirectory = self.localTempDirectory
            #            self.fileUtilities.CreateFolders(self.job["folders"])
            ###
            self.ProcessCategories()
            if self.job["cleanlocal"] == "Y":
                self.fileUtilities.RemoveFolder(self.localTempDirectory)

            self.logger.debug(self.moduleName + " -- " + " finished ")
        except Exception as err:
            self.logger.exception(moduleName + " - Exception! Error: " +
                                  err.message)
            if self.etlUtilities.CompleteInstance(filelocs["tblEtl"]["table"],\
                                             currProcId, 'F') is not True:
                self.logger.info(self.moduleName +
                                 " - we could not Complete Instance.")
            raise Exception(err.message)
Beispiel #11
0
    def Start(self, logger, moduleName, filelocs):
        currProcId = None
        try:
            ApplicationBase.Start(self, logger, moduleName, filelocs)
            self.logger.debug(self.moduleName + " -- " + " starting ")
            currProcId = self.etlUtilities.GetRunID(
                filelocs["tblEtl"]["table"], self.moduleName)
            ###
            #  set up to run create folder
            ###
            self.fileUtilities.moduleName = self.moduleName
            self.fileUtilities.localBaseDirectory = self.localTempDirectory
            self.fileUtilities.CreateFolders(self.job["folders"])
            ###
            self.fromDate = self.GetFromDate()
            for databaseSettings in self.job["Databases"]:
                if databaseSettings["execute"] == 'Y':
                    self.ProcessDatabase(databaseSettings)
                else:
                    self.logger.debug(self.moduleName + " -- skip database " +
                                      databaseSettings["common"]["name"])

            self.logger.debug(self.moduleName + " -- " + " finished ")
        except Exception as err:
            self.logger.exception(moduleName + " - Exception! Error: " +
                                  err.message)
            if self.etlUtilities.CompleteInstance(filelocs["tblEtl"]["table"],\
                                             currProcId, 'F') is not True:
                self.logger.info(self.moduleName +
                                 " - we could not Complete Instance.")
            raise Exception(err.message)
Beispiel #12
0
    def Start(self, logger, moduleName, filelocs):
        try:
            ApplicationBase.Start(self, logger, moduleName, filelocs)

            self.csvFile = self.localTempDirectory + "/" + self.job[
                "fileNameOut"]
            self.csvFileHistory = self.localTempDirectory + "/" + self.job[
                "fileNameOutHistory"]

            self.GetAndTransform()
            self.UploadToS3()
            self.LoadAllFromS3(
                self.job["s3ToDirectory"] + '/' + self.job["fileNameOut"] +
                '.gz', self.job["tableName"])
            self.LoadAllFromS3(
                self.job["s3ToDirectory"] + '/' +
                self.job["fileNameOutHistory"] + '.gz',
                self.job["tableName"] + '_history')
            self.LoadAllFromS3(
                self.job["xReference"]["s3DataDirectory"],
                self.job["tableName"] + self.job["xReference"]["tableNameSfx"])
            self.ExecutePostETL()
        except Exception as err:
            self.logger.exception(moduleName + " - Exception! Error: " +
                                  err.message)
            raise Exception(err.message)
Beispiel #13
0
    def Start(self, logger, moduleName, filelocs):
        currProcId = None
        try:
            ApplicationBase.Start(self, logger, moduleName, filelocs)
            self.logger.debug(self.moduleName + " -- " + " starting ")
            currProcId = self.etlUtilities.GetRunID(
                filelocs["tblEtl"]["table"], self.moduleName)

            self.rawFolder = self.localTempDirectory + "/" + "Raw"
            self.csvFolder = self.localTempDirectory + "/" + "CSV"

            self.CheckWorkingFolders()
            self.BulkDownload()
            self.ProcessFiles()
            self.BulkUploadToS3()
            self.LoadAllFromS3()
            if self.job["cleanlocal"] == "Y":
                self.fileUtilities.RemoveFolder(self.localTempDirectory)
            self.logger.debug(self.moduleName + " -- " + " finished ")
        except Exception as err:
            self.logger.exception(moduleName + " - Exception! Error: " +
                                  err.message)
            if self.etlUtilities.CompleteInstance(filelocs["tblEtl"]["table"],\
                                             currProcId, 'F') is not True:
                self.logger.info(self.moduleName +
                                 " - we could not Complete Instance.")
            raise Exception(err.message)
Beispiel #14
0
    def Start(self, logger, moduleName, filelocs):
        '''
        Start of routine
        '''
        currProcId = None
        try:
            ApplicationBase.Start(self, logger, moduleName, filelocs)
            self.logger.debug(self.moduleName + " -- " + " starting ")
            currProcId = self.etlUtilities.GetRunID(
                filelocs["tblEtl"]["table"], self.moduleName)

            for table in self.job["tables"]:
                self.ProcessTable(table)
                self.LoadDataFromAthenaIntoRedShiftLocalScripts(table)
                #self.LoadDataFromAthenaIntoRedShiftS3Scripts(table) # Test: Load all data from Athena

            if self.job["cleanlocal"] == "Y":
                self.fileUtilities.RemoveFolder(self.localTempDirectory)
            self.logger.debug(self.moduleName + " -- " + " finished ")
        except Exception as err:
            self.logger.exception(moduleName + " - Exception! Error: " +
                                  err.message)
            if self.etlUtilities.CompleteInstance(filelocs["tblEtl"]["table"],\
                                             currProcId, 'F') is not True:
                self.logger.info(self.moduleName +
                                 " - we could not Complete Instance.")
            raise Exception(err.message)
Beispiel #15
0
    def Start(self, logger, moduleName, filelocs):
        '''
        main routine
        '''
        try:
            ApplicationBase.Start(self, logger, moduleName, filelocs)

            s3Key = self.job["s3SrcDirectory"] + "/" + self.job["fileToLoad"]
            self.logger.info(self.moduleName + " - Processing file: " + s3Key)

            localFilepath = self.localTempDirectory + "/" + ntpath.basename(
                s3Key)
            S3Utilities.DownloadFileFromS3(self.awsParams.s3,
                                           self.job["bucketName"], s3Key,
                                           localFilepath)

            df = pd.read_excel(localFilepath,
                               "Major Variables",
                               index_col=None,
                               na_values=['NaN'],
                               skiprows=1,
                               parse_cols="C:E,G:I",
                               header=None)

            #  Save the data as CSV
            outputCSVfileName = self.localTempDirectory + '/SampleData.csv'
            df.to_csv(outputCSVfileName,
                      sep=str(self.job["delimiter"]),
                      encoding='utf-8',
                      index=False)

            # Update the CSV file into a temporary S3 location.  Postgres will download it from there to its local directory
            bucketName, s3TempKey = S3Utilities.UploadFileToS3Temp(
                self.awsParams.s3, outputCSVfileName)

            psConnect = self.GetPSConnection()
            # Postgres tables are created using a connection (rather than psql)
            self.CreatePostgresTables(psConnect)

            postgresTempFile = self.DownloadFromS3ToPSTempDir(
                psConnect, bucketName, s3TempKey)
            self.LoadDataFromPostgresTempDir(psConnect, postgresTempFile)

            S3Utilities.DeleteFile(self.awsParams.s3, bucketName, s3TempKey)

            self.LoadBaseAttributes(psConnect)
            self.LoadBaseData(psConnect, '1000', 'glm_value')
            self.LoadBaseData(psConnect, '2000', 'arima_value')
            self.LoadBaseData(psConnect, '3000', 'lasso_value')
            #           self.LoadBaseData(psConnect,'4000', 'nn_value')
            #            self.LoadBaseData(psConnect,'5000', 'spectre_value')

            psConnect.close()
            self.logger.debug(" SampleData CSV loaded to RedShift")

        except:
            logger.exception(moduleName + " - Exception in start!")
            raise
Beispiel #16
0
    def Start(self, logger, moduleName, filelocs):
        '''
        main routine
        '''
        currProcId = None
        try:
            ApplicationBase.Start(self, logger, moduleName, filelocs)
            self.logger.debug(self.moduleName + " -- " + " starting ")
            currProcId = self.etlUtilities.GetRunID(
                filelocs["tblEtl"]["table"], self.moduleName)
            ###
            #  set up to run create folder
            ###
            self.fileUtilities.moduleName = self.moduleName
            self.fileUtilities.localBaseDirectory = self.localTempDirectory
            self.fileUtilities.CreateFolders(self.job["folders"])
            self.fromDate = self.GetFromDate()
            ###
            localFilepath = self.GetMostRecentFile(self.job["foldertoscan"])
            #            localFilepath = r'C:\tmp\IHS Markit Outlook for Global Oil Market Fundamentals - September 2017.xlsx'
            for tables in self.job["tables"]:
                fname = self.fileUtilities.CreateTableSql(
                    tables, self.fileUtilities.sqlFolder)
                RedshiftUtilities.PSqlExecute(fname, self.logger)
                outPutFileName = self.fileUtilities.csvFolder +\
                                 self.fromDate +\
                                 "_" + tables["table"]  + '.csv'
                outputGZ = self.fileUtilities.gzipFolder + self.fromDate +\
                           "_" + tables["table"]  + '.csv.gz'
                tableJson = tables
            xl = ExcelUtilities(logger)
            if sys.version[0] == '3':
                csvfile = open(outPutFileName, 'w', newline='')
            elif sys.version[0] == '2':
                csvfile = open(outPutFileName, 'wb')
            csvWriter = csv.writer(csvfile, quoting=csv.QUOTE_ALL)

            if localFilepath is not None:
                self.ProcessFile(xl, localFilepath, csvWriter)

            csvfile.close()

            self.fileUtilities.GzipFile(outPutFileName, outputGZ)
            self.BulkUploadToS3()
            self.LoadData(tableJson)

            if self.job["cleanlocal"] == "Y":
                self.fileUtilities.RemoveFolder(self.localTempDirectory)

            self.logger.debug(self.moduleName + " -- " + " finished ")
        except Exception as err:
            self.logger.exception(moduleName + " - Exception! Error: " +
                                  err.message)
            if self.etlUtilities.CompleteInstance(filelocs["tblEtl"]["table"],\
                                             currProcId, 'F') is not True:
                self.logger.info(self.moduleName +
                                 " - we could not Complete Instance.")
            raise Exception(err.message)
Beispiel #17
0
 def Start(self, logger, moduleName, filelocs):
     '''
     Start of routine
     '''
     try:
         ApplicationBase.Start(self, logger, moduleName, filelocs)
         for srcFileParameter in self.job["srcFileParameters"]:
             self.ProcessS3File(srcFileParameter)
     except:
         logger.exception(moduleName + " - Exception!")
         raise
Beispiel #18
0
 def Start(self, logger, moduleName, filelocs):
     '''
     Starting point of this Project
     '''
     try:
         ApplicationBase.Start(self, logger, moduleName, filelocs)
         self.GetFileFromS3()
         Form1(logger, self.fileUtilities, self.localTempDirectory,
               self.job, self.awsParams)  #invoke the Form1 handler
     except:
         self.logger.exception(moduleName + " - Exception!")
         raise
Beispiel #19
0
 def Start(self, logger, moduleName, filelocs):
     '''
     Application starting point
     '''
     try:
         ApplicationBase.Start(self, logger, moduleName, filelocs)
         self.DownloadFilesFromS3()
         ISO(logger, self.fileUtilities, self.localTempDirectory, self.job, self.awsParams) #invoke the ISO handler
     except Exception as ex:
         self.logger.exception(moduleName + " - Exception!")
         self.logger.exception("{}".format(str(ex)))
         raise
Beispiel #20
0
    def Start(self, logger, moduleName, filelocs):
        '''
        start it
        '''
        ApplicationBase.Start(self, logger, moduleName, filelocs)
        seedVal = 100
        logger.info("Starting count with seed %s in Counter" % (seedVal))
        for i in range(seedVal):
            val = i + seedVal
#            logger.info("v value in Counter %s" % (str(v)))
        logger.info(
            "this is the starting seed %s and the end value was %s in Counter"
            % (seedVal, val))
Beispiel #21
0
    def Start(self, logger, moduleName, filelocs):
        currProcId = None
        try:
            ApplicationBase.Start(self, logger, moduleName, filelocs)
            self.logger.debug(self.moduleName + " -- " + " starting ")
            currProcId = self.etlUtilities.GetRunID(
                filelocs["tblEtl"]["table"], self.moduleName)

            paramsList = self.GetParamsList(filelocs["tblEtl"]["table"])
            ###
            #  set up to run create folder
            ###
            self.fileUtilities.moduleName = self.moduleName
            self.fileUtilities.localBaseDirectory = self.localTempDirectory
            self.fileUtilities.CreateFolders(self.job["folders"])
            ###
            for tblJson in self.job["tables"]:
                fname = self.fileUtilities.CreateTableSql(
                    tblJson, self.fileUtilities.sqlFolder)
                RedshiftUtilities.PSqlExecute(fname, self.logger)

            sqlPullDataScript, fromDate = self.CreatePullScript(paramsList)
            outputCSV = self.fileUtilities.csvFolder + fromDate + self.moduleName + ".CSV"
            outputGZ = self.fileUtilities.gzipFolder + fromDate + self.moduleName + '.csv.gz'
            self.BulkExtract(sqlPullDataScript, outputCSV)
            self.fileUtilities.GzipFile(outputCSV, outputGZ)
            self.BulkUploadToS3()
            for tblJson in self.job["tables"]:
                if "s3subfolder" in tblJson:
                    self.LoadData(tblJson["s3subfolder"], tblJson)
                    maxDate = self.GetMaxUpdateDate(tblJson)
                    sMaxDate = maxDate["lastrun"].strftime('%m/%d/%Y')
                    if self.etlUtilities.SetInstanceParameters(filelocs["tblEtl"]["table"],\
                                                               currProcId,\
                                                               json.dumps({"lastrun":sMaxDate})) is not True:
                        self.logger.info(self.moduleName +
                                         " - we could not set the instance.")
            self.UpdateTable(filelocs["tblEtl"]["schemaName"],
                             filelocs["tblEtl"]["table"], self.job["tables"],
                             currProcId)
            if self.job["cleanlocal"] == "Y":
                self.fileUtilities.RemoveFolder(self.localTempDirectory)
            self.logger.debug(self.moduleName + " -- " + " finished ")
        except Exception as err:
            self.logger.exception(moduleName + " - Exception! Error: " +
                                  err.message)
            if self.etlUtilities.CompleteInstance(filelocs["tblEtl"]["table"],\
                                             currProcId, 'F') is not True:
                self.logger.info(self.moduleName +
                                 " - we could not Complete Instance.")
            raise Exception(err.message)
Beispiel #22
0
    def Start(self, logger, moduleName, filelocs):
        '''
        main routine
        '''
        currProcId = None
        try:
            ApplicationBase.Start(self, logger, moduleName, filelocs)
            self.logger.debug(self.moduleName + " -- " + " starting ")
            currProcId = self.etlUtilities.GetRunID(
                filelocs["tblEtl"]["table"], self.moduleName)
            ###
            #  establish connection to Access database
            ###
            conn = self.EstablishConnection()
            cur = conn.cursor()
            sqlline = self.FixSQLStatement()
            cur.execute(sqlline)

            outputfileName = self.localTempDirectory + '/ENPdata.csv'
            self.ConvertToCSV(cur, outputfileName)
            ###
            #  load the CSV to RedShift
            ###
            self.logger.debug(self.moduleName + " - ENP load CSV to RedShift")

            rsConnect = self.etlUtilities.GetAWSConnection(self.awsParams)

            RedshiftUtilities.LoadFileIntoRedshift(
                rsConnect, self.awsParams.s3, self.logger, self.fileUtilities,
                outputfileName, self.job["destinationSchema"],
                self.job["tableName"], self.job["fileFormat"],
                self.job["dateFormat"], self.job["delimiter"])

            self.logger.debug(self.moduleName +
                              " - ENP CSV loaded to RedShift")

            # Cleanup
            rsConnect.close()
            cur.close()
            conn.close()
            if self.job["cleanlocal"] == "Y":
                self.fileUtilities.RemoveFolder(self.localTempDirectory)
            self.logger.debug(self.moduleName + " -- " + " finished ")
        except Exception as err:
            self.logger.exception(moduleName + " - Exception! Error: " +
                                  err.message)
            if self.etlUtilities.CompleteInstance(filelocs["tblEtl"]["table"],\
                                             currProcId, 'F') is not True:
                self.logger.info(self.moduleName +
                                 " - we could not Complete Instance.")
            raise Exception(err.message)
Beispiel #23
0
 def Start(self, logger, moduleName, filelocs):
     '''
     Start of routine
     '''
     try:
         ApplicationBase.Start(self, logger, moduleName, filelocs)
         self.CreatePackedFolder()
         fileList = self.DownloadFiles()
         self.ProcessFiles(fileList)
         self.UploadPackedToS3()
         self.LoadTables()
     except Exception:
         logger.exception(moduleName + " - Exception!")
         raise
Beispiel #24
0
    def Start(self, logger, moduleName, filelocs):
        '''
        main routine starts here
        '''
        try:
            ApplicationBase.Start(self, logger, moduleName, filelocs)

            self.moduleName = moduleName
            self.CreateTables(self.job["tempTablesScript"])
            self.SetLastLiquidsBalanceFileInfo()
            self.ProcessLiquidBalanceFile()
            self.CreateTables(self.job["unpivotScript"])
            self.CreateTables(self.job["cleanTempTablesScript"])
        except Exception as err:
            self.logger.error(self.moduleName + " - Exception in start.")
            raise err
Beispiel #25
0
 def Start(self, logger, moduleName, filelocs):
     '''
     Start of routine
     '''
     try:
         ApplicationBase.Start(self, logger, moduleName, filelocs)
         self.fileUtilities.EmptyFolderContents(
             self.localTempDirectory)  #delete and recreate the folder
         self.fileUtilities.EmptyFolderContents(
             self.localTempDirectory +
             "/cleaned/")  #delete and recreate the folder
         self.DownloadFiles()
         self.ProcessFiles()
     except Exception:
         logger.exception(moduleName + " - Exception!")
         raise
Beispiel #26
0
 def Start(self, logger, moduleName, filelocs):
     '''
     Starting point of this Project
     '''
     try:
         ApplicationBase.Start(self, logger, moduleName, filelocs)
         self.dbfUtilities = DBFUtilities(logger)
         self.CreateFolders()
         self.Process()
         self.UploadPackedToS3()
         self.LoadFilesIntoRedshift()
         self.EmptyPackedFolder()
         self.PostLoadETL()
     except:
         self.logger.exception(moduleName + " - Exception!")
         raise
Beispiel #27
0
    def Start(self, logger, moduleName, filelocs):
        '''
        Main starting routine
        '''
        currProcId = None
        try:
            ApplicationBase.Start(self, logger, moduleName, filelocs)
            self.logger.debug(self.moduleName + " -- " + " starting ")
            currProcId = self.etlUtilities.GetRunID(
                filelocs["tblEtl"]["table"], self.moduleName)
            ###
            #  set up to run create folder
            ###
            self.fileUtilities.moduleName = self.moduleName
            self.fileUtilities.localBaseDirectory = self.localTempDirectory
            self.fileUtilities.CreateFolders(self.job["folders"])
            ###
            for tblJson in self.job["tables"]:
                fname = self.fileUtilities.CreateTableSql(
                    tblJson, self.fileUtilities.sqlFolder)
                RedshiftUtilities.PSqlExecute(fname, self.logger)
                if "s3subfolder" in tblJson:
                    self.s3subFolder = tblJson["s3subfolder"]

            outputFileName = self.ProcessRequest()

            outputCSV = outputFileName
            outputGZ = self.fileUtilities.gzipFolder + self.moduleName + '.csv.gz'
            self.fileUtilities.GzipFile(outputCSV, outputGZ)
            self.BulkUploadToS3(self.s3subFolder)
            for tblJson in self.job["tables"]:
                if "s3subfolder" in tblJson:
                    self.LoadData(tblJson["s3subfolder"], tblJson)
            self.UpdateTable(filelocs["tblEtl"]["schemaName"],
                             filelocs["tblEtl"]["table"], self.job["tables"],
                             currProcId)
            if self.job["cleanlocal"] == "Y":
                self.fileUtilities.RemoveFolder(self.localTempDirectory)
            self.logger.debug(self.moduleName + " -- " + " finished ")
        except Exception as err:
            self.logger.exception(moduleName + " - Exception! Error: " +
                                  err.message)
            if self.etlUtilities.CompleteInstance(filelocs["tblEtl"]["table"],\
                                             currProcId, 'F') is not True:
                self.logger.info(self.moduleName +
                                 " - we could not Complete Instance.")
            raise Exception(err.message)
Beispiel #28
0
 def Start(self, logger, moduleName, filelocs):
     '''
     Start of routine
     '''
     try:
         ApplicationBase.Start(self, logger, moduleName, filelocs)
         self.packedFolder = self.localTempDirectory + "/packed/"
         self.CreateFolders()
         lastModifiedDatetime = self.GetLastModifiedDatetime(filelocs)
         maxModifiedDatetime = self.ProcessFiles(lastModifiedDatetime)
         self.UploadPackedToS3()
         self.LoadErcotTables()
         self.SetLastModifiedDatetime(filelocs, DatetimeUtilities.ConvertToSTR(maxModifiedDatetime))
         self.EmptyPackedFolder()
         self.PostLoadETL()
     except Exception:
         logger.exception(moduleName + " - Exception!")
         raise
Beispiel #29
0
    def Start(self, logger, moduleName, filelocs):
        '''
        Start of routine
        '''
        try:
            ApplicationBase.Start(self, logger, moduleName, filelocs)

            self.tempFolder = self.localTempDirectory + "/Temp"
            self.packedFolder = self.localTempDirectory + "/Packed"
            self.rawDataFolder = self.localTempDirectory + "/RawData"

            self.CleanWorkingFolders()
            self.SynchronizeSourceFolder()
            self.CleanUpAndPack()
            self.UploadPackedToS3()
            self.LoadAirMarketsTables()
        except:
            logger.exception(moduleName + " - Exception!")
            raise
Beispiel #30
0
 def Start(self, logger, moduleName, filelocs):
     '''
     Start of routine
     '''
     currProcId = None
     try:
         ApplicationBase.Start(self, logger, moduleName, filelocs)
         currProcId = self.etlUtilities.GetRunID(
             filelocs["tblEtl"]["table"], self.moduleName)
         self.ProcessRequest()
         if self.job["cleanlocal"] == "Y":
             self.fileUtilities.RemoveFolder(self.localTempDirectory)
     except Exception as err:
         self.logger.exception(moduleName + " - Exception! Error: " +
                               err.message)
         if self.etlUtilities.CompleteInstance(filelocs["tblEtl"]["table"],\
                                          currProcId, 'F') is not True:
             self.logger.info(self.moduleName +
                              " - we could not Complete Instance.")
         raise Exception(err.message)