def Main(): ''' Main application that loads the configuration and runs the applications ''' ### # if we passed in a parameter for the config file then use it otherwise we will use the # ProcessStepsConfig.json file ### processParams = GetMainParameters() if len(sys.argv) < 2: processParams.configfile = 'ProcessStepsConfig.json' else: processParams.configfile = sys.argv[1] processParams.LoadConfigFile() ### # Construct the log and output directory relative to the application directory. This way the # application works on Windows and Linux without change ### fileLocation = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + processParams.configdata["outputLocations"]["locationSuffix"] processParams.configdata["outputLocations"]["relativeLoggingFolder"] =\ os.path.join(fileLocation, processParams.configdata["outputLocations"]["relativeLoggingFolder"]) processParams.configdata["outputLocations"]["relativeOutputfolder"] =\ os.path.join(fileLocation, processParams.configdata["outputLocations"]["relativeOutputfolder"]) logger = FileUtilities.CreateLogger(processParams.configdata["outputLocations"]["relativeLoggingFolder"], processParams.configdata["debuggingLevel"]) logger.info("*** Starting Main Application.") processArrayJson = processParams.configdata["Processes"] processArray = [] ### # this gets a lists of all the process that you want to run ### cPlat = platform.system().upper()[:1] for proc in processArrayJson: if proc["execute"] == "Y": if proc["platform"] == 'W': if proc["platform"] == cPlat: processArray.append(proc) else: processArray.append(proc) ## # let's get started ## ProcessApps(logger, processArray, processParams.configdata["outputLocations"]) logger.info("*** Main Application Complete.")
def CreateTables(self): ''' Start of routine sqlFolder must be populated ''' try: self.moduleName = self.commonParams["moduleName"] self.logger = FileUtilities.CreateLogger( self.commonParams["loggerParams"]) for tblJson in self.commonParams["cat"]["tables"]: fname = self.fileUtilities.CreateTableSql( tblJson, self.commonParams["sqlFolder"]) RedshiftUtilities.PSqlExecute(fname, self.logger) except Exception: self.logger.exception(self.commonParams["moduleName"] + "- we had an error in StartHere") raise
def StartHere(self): ''' initial starting routine ''' try: self.moduleName = self.commonParams["moduleName"] self.logger = FileUtilities.CreateLogger( self.commonParams["loggerParams"]) self.logger.info("zipfile = " + self.fl + " started " + datetime.now().strftime('%Y-%m-%d %I:%M:%S')) ### # pull this file to local instance ### fileName = ntpath.basename(self.fl) batchFolderName = re.sub(r'\.zip$', '', fileName) ### # make sure we have this folder ### self.fileUtilities = FileUtilities(self.logger) segZipFolder = self.commonParams[ "zipFolder"] + batchFolderName + "/" self.fileUtilities.RemoveFolder(segZipFolder) self.fileUtilities.CreateFolder(segZipFolder) localGzipFilepath = self.commonParams[ "localTempDirectory"] + "/" + fileName self.fileUtilities.UnzipFile(localGzipFilepath, segZipFolder) zipContentFolder = re.sub(r'\/$', '', segZipFolder) directories = [ fName for fName in os.listdir(segZipFolder) if os.path.isdir(os.path.join(segZipFolder, fName)) ] for dirs in directories: zipContentFolder = os.path.join(segZipFolder, dirs) self.ProcessZipContents(zipContentFolder, batchFolderName) self.GZipItUp(batchFolderName) self.logger.info("zipfile = " + self.fl + " finished " + datetime.now().strftime('%Y-%m-%d %I:%M:%S')) except: self.logger.exception(self.commonParams["moduleName"] + "- we had an error in StartHere") raise
{ "name": "Conc_Gross_LE_8_TDR_Short_Other", "type": "REAL" }, { "name": "Conc_Net_LE_4_TDR_Long_Other", "type": "REAL" }, { "name": "Conc_Net_LE_4_TDR_Short_Other", "type": "REAL" }, { "name": "Conc_Net_LE_8_TDR_Long_Other", "type": "REAL" }, { "name": "Conc_Net_LE_8_TDR_Short_Other", "type": "REAL" }, { "name": "Contract_Units", "type": "VARCHAR", "size": "200" }, { "name": "CFTC_Contract_Market_Code_Quotes", "type": "VARCHAR", "size": "30" }, { "name": "CFTC_Market_Code_Quotes", "type": "VARCHAR", "size": "30" }, { "name": "CFTC_Commodity_Code_Quotes", "type": "VARCHAR", "size": "30" }, { "name": "CFTC_SubGroup_Code", "type": "VARCHAR", "size": "30" }, { "name": "FutOnly_or_Combined", "type": "VARCHAR", "size": "40" } ] }''' table = json.loads(tableString) logger = FileUtilities.CreateLogger("log", 10) os.environ["SPARK_HOME"] = "C:/WorkSpaceEclipse36/SparkWindows/spark" os.environ["HADOOP_HOME"] = "C:/WorkSpaceEclipse36/SparkWindows/hadoop" sc, sqlContext = SparkUtilities.CreateSparkContext(logger) samplejson = '''{ "fields": [ {"metadata": {}, "nullable": true, "name": "sourceset", "type": "string"}, {"metadata": {}, "nullable": true, "name": "sourcesetdesc", "type": "string"}, {"metadata": {}, "nullable": true, "name": "market_and_exchange_names", "type": "string"}, {"metadata": {}, "nullable": true, "name": "as_of_date_in_form_yymmdd", "type": "integer"}, {"metadata": {}, "nullable": true, "name": "report_date_as_yyyy_mm_dd", "type": "string"} ], "type": "struct"
def Main(self, option): ''' Main application that loads the configuration and runs the applications ''' ### # if we passed in a parameter for the config file then use it otherwise we will use the # ProcessStepsConfig.json file ### self.processParams.configfile = self.location + '/jobConfig.json' self.processParams.LoadConfigFile() ### # Construct the log and output directory relative to the application directory. This way the # application works on Windows and Linux without change ### ### # because we are not at the root we are using the config file to identify where to work with files ### fileLocation = self.processParams.configdata["outputLocations"]["workingFolder"] +\ self.processParams.configdata["outputLocations"]["locationSuffix"] self.processParams.configdata["outputLocations"]["relativeLoggingFolder"] =\ os.path.join(fileLocation, self.processParams.configdata["outputLocations"]["relativeLoggingFolder"]) self.processParams.configdata["outputLocations"]["relativeOutputfolder"] =\ os.path.join(fileLocation, self.processParams.configdata["outputLocations"]["relativeOutputfolder"]) self.processParams.configdata["outputLocations"]["relativeInputfolder"] =\ os.path.join(fileLocation, self.processParams.configdata["outputLocations"]["relativeInputfolder"]) logger = FileUtilities.CreateLogger(self.processParams.configdata["outputLocations"]["relativeLoggingFolder"]) ApplicationBase.Start(self, logger, self.moduleName, self.processParams.configdata["outputLocations"]) self.location = FileUtilities.PathToForwardSlash(os.path.dirname(os.path.abspath(__file__))) if option == 'PD': logger.info("*** Magellan PullData started") self.PullData() logger.info("*** Magellan PullData completed") elif option == 'SE': logger.info("*** Magellan SetupEnvironment started") self.SetupEnvironment() logger.info("*** Magellan SetupEnvironment completed") elif option == 'PJ': # fBatch = "20170516164925+0200-002111-XML.zip" fBatch = sys.argv[2] logger.info("*** Magellan ProcessJsonFiles started for " + fBatch) self.ProcessJsonFiles(fBatch) logger.info("*** Magellan ProcessJsonFiles completed for " + fBatch) elif option == 'LD': logger.info("*** Magellan LoadAllData started") self.LoadAllData() logger.info("*** Magellan LoadAllData completed") elif option == 'CL': logger.info("*** Magellan CleanupArea started") self.CleanupArea() logger.info("*** Magellan CleanupArea completed") elif option == 'EL': logger.info("*** Magellan Application Complete.") elif option == 'SL': logger.info("*** Starting Magellan Application.") elif option == 'LP': logger.info("NOTE: " + sys.argv[2]) else: logger.error("*** Magellan Application invalid option.")