class Process(): # class variables # lock, logger and loader type m_lock = "" m_logger = "" m_loaderType = "" # Picard PG object m_report_pg = "" m_report_nz = "" def __init__(self, configFile): """ Purpose: Constructor :param self: class object itself :param configFile: Configuration file to use """ # Initialize global logger object self.m_logger = Logger(logging.INFO, configFile) self.m_logger.addFileHandler(logging.DEBUG) try: # Add generic information fname = inspect.getfile(inspect.currentframe()) fpath = os.path.dirname(os.path.abspath(fname)) self.m_logger.addGenericInfo(fpath + "/" + fname) # Create NZ Data Warehouse object self.m_report_nz = Netezza(configFile, self.m_logger) self.m_report_nz.initDatabase(os.environ['NZ_USER'], os.environ['NZ_PASSWD'], os.environ['NZ_HOST'], os.environ['NZ_DATABASE'], os.environ['NZ_PORT']) # Create Picard Postgres Datamart object self.m_report_pg = Postgre(configFile, self.m_logger) # Create lock for the process self.m_lock = Lock(os.environ['LOCK_FILE'], self.m_logger) # pre and post processing dictionaries initialization self.sql_process_dict = defaultdict(list) except Exception, e: self.m_logger.error( "ERROR: Unable to initialize the process due to: %s" % str(e)) if self.m_reportpg: self.m_reportpg.closeConnection() if self.m_lock: self.m_lock.remove() sys.exit("ERROR: Unable to initialize the process due to: %s" % str(e))
class Process(): # class variables # lock, logger and loader type m_lock = "" m_logger = "" m_loaderType = "" # Picard PG object m_report_pg = "" m_report_nz = "" def __init__(self, configFile): """ Purpose: Constructor :param self: class object itself :param configFile: Configuration file to use """ # Initialize global logger object self.m_logger = Logger(logging.INFO, configFile) self.m_logger.addFileHandler(logging.DEBUG) try: # Add generic information fname = inspect.getfile(inspect.currentframe()) fpath = os.path.dirname(os.path.abspath(fname)) self.m_logger.addGenericInfo(fpath + "/" + fname) # Create NZ Data Warehouse object self.m_report_nz = Netezza(configFile, self.m_logger) self.m_report_nz.initDatabase(os.environ['NZ_USER'], os.environ['NZ_PASSWD'], os.environ['NZ_HOST'], os.environ['NZ_DATABASE'], os.environ['NZ_PORT']) # Create Picard Postgres Datamart object self.m_report_pg = Postgre(configFile, self.m_logger) # Create lock for the process self.m_lock = Lock(os.environ['LOCK_FILE'], self.m_logger) # pre and post processing dictionaries initialization self.sql_process_dict = defaultdict(list) except Exception, e: self.m_logger.error("ERROR: Unable to initialize the process due to: %s" % str(e)) if self.m_reportpg: self.m_reportpg.closeConnection() if self.m_lock: self.m_lock.remove() sys.exit("ERROR: Unable to initialize the process due to: %s" % str(e))
def main(): #log = Logger(logging.ERROR, "/Users/rnarayan/apps/gds_arch/ICE/common/conf/test.ini") log = Logger(logging.ERROR, "/cif/PY/apps/gds_arch/ICE/common/conf/test_ram_nyse.ini") log.addFileHandler(logging.INFO) log.addGenericInfo(__file__) #s3object = S3("/Users/rnarayan/apps/gds_arch/ICE/common/conf/test.ini", log) s3object = S3("/cif/PY/apps/gds_arch/ICE/common/conf/test_ram_nyse.ini", log, True) s3object.getToken() sourceFileWthPath = s3object.m_configFile["S3"]["source_file"] print("sourceFileWthPath =", sourceFileWthPath) targetFileWthPath = os.path.join(s3object.m_configFile["S3"]["path"], os.path.basename(sourceFileWthPath)) print("targetFileWthPath =", targetFileWthPath) targetBucket = s3object.m_configFile["S3"]["bucket"] print("targetBucket =", targetBucket) encryptKeyFlag = s3object.m_configFile["S3"]["encrypt_key"] print("encryptKeyFlag =", encryptKeyFlag) bytes_per_chunk = 524288000 #s3object.loadDataSinglePart(sourceFileWthPath, targetFileWthPath, targetBucket, encryptKeyFlag ) s3object.loadDataMultiPart(sourceFileWthPath, targetFileWthPath, targetBucket, encryptKeyFlag, bytes_per_chunk)
class DEAExtractor(): #class variables m_logger = "" #database objects m_oracle_db = "" #def __init__(self, configFile, mktName, processingDate, debugFlag, forceFlag): def __init__(self, configFile, mktName, processingDate, debugFlag): """ Purpose: Constructor :param self: class object itself :param configFile: Configuration file to use """ # Initialize m_logger object from class Logger and add Header to the log, using addGenericInfo function self.m_logger = Logger(logging.INFO, configFile, processingDate) self.m_logger.addFileHandler(logging.DEBUG) self.m_logger.addGenericInfo(__file__) self.processingDate = processingDate self.debugFlag = debugFlag #self.forceFlag = forceFlag self.configFile = configFile self.mktName = mktName try: # Get configuration to a dictionary self.m_configDict = configuration(self.configFile, True).m_dictionary #Initialize Oracle instance along with connection self.m_oracle_db = Oracle(self.m_configDict, self.m_logger) except Exception as exp: # An exception occurred self.m_logger.error("Unable to initialize the configuration " + str(exp)) print("ERROR: Unable to initialize the configuration for logger " + str(exp)) sys.exit(1) def readMktConfigFile(self, mktConfigFile): """ Purpose - To read the content of mktConfigFile into the global dictionary m_mktConfigDict for reference :param mktConfigFile: :return: """ try: self.m_mktConfigDict = configuration(mktConfigFile, True).m_dictionary except Exception as exp: # An exception occurred self.m_logger.error("Unable to initialize the configuration for logger " + str(exp)) print("ERROR: Unable to initialize the configuration for logger " + str(exp)) sys.exit(1) def chkActiveLoads(self): """ Purpose - To check the count of active Active loads happening at a given point :param None: None at this point :return: """ try: if self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_check_flag"] == 'Y': localActiveLoadMax = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max"] localActiveLoadWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_wait_time"] localActiveLoadMaxWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max_wait_time"] mySql = "" myParams = "" mySql = self.m_configDict["SQL"]["get_active_loads"] activeFlag=1 totalActiveWaitTime=0 while activeFlag: returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if self.debugFlag: print "mySql = ", mySql print "returnStr = ", returnStr print "chkActiveLoads - Active Loads value = ", int(returnStr[1].strip()) if returnStr[0] != '0': self.m_logger.error("Unable to get active loads using sql " + mySql + ". Error = " + returnStr[1]) sys.exit(1) #Check if actual active loads is <= configured active loads. If so, return out of the fn if int(returnStr[1].strip()) <= localActiveLoadMax: activeFlag=0 return 0 #Sleep for time defined by configured value for "active_load_wait_time" time.sleep(localActiveLoadWaitTime) totalActiveWaitTime += localActiveLoadWaitTime #Check if actual Total wait time is > configured total wait time. If so, throw an error and exit if totalActiveWaitTime > localActiveMaxWaitTime: self.m_logger.error("In Fn chkActiveLoads. Total Actual Wait Time exceeds the configured value active_load_max_wait_time. Either cleanup orphaned loads or increase the either active_load_max or active_load_max_wait_time. totalActiveWaitTime = " + str(totalActiveWaitTime) + " localActiveMaxWaitTime=" + str(localActiveMaxWaitTime)) return 1 else: return 0 #Return failure return 1 except Exception as exp: self.m_logger.error("Failure in chkActiveLoads process for file with the error " + str(exp)) sys.exit(1) def chkRaceStatus(self): """ Purpose - To check if a load is already running for the given dataset :param None: None at this point :return: """ try: if self.m_mktConfigDict["RACE"]["race_status_check_flag"] == 'Y': localRaceStatusWaitTime = int(self.m_mktConfigDict["RACE"]["race_status_wait_time"]) localRaceStatusMaxWaitTime = int(self.m_mktConfigDict["RACE"]["race_status_max_wait_time"]) mySql = "" #myParams = {"datasetName":self.datasetName} tempSql = self.m_configDict["SQL"]["get_race_status"] myParamsDict = { 'datasetName' : self.datasetName } tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql) raceFlag=1 totalRaceStatusWaitTime=0 while raceFlag: returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if self.debugFlag: print "tempSql = ", tempSql print "myParamsDict = ", myParamsDict print "mySql = ", mySql print "returnStr = ", returnStr print "chkRaceStatus - ReturnCode = ", int(returnStr[1].strip()) if returnStr[0] != '0': self.m_logger.error("Unable to get race status using sql " + mySql + ". Error = " + returnStr[1]) sys.exit(1) #Check if the load for this dataset is already running. If not, exit out of the function with normal return value if int(returnStr[1].strip()) <= 1: raceFlag=0 return 0 #Check if actual Total wait time is > configured total wait time. If so, throw an error and exit #Sleep for time defined by configured value for "active_load_wait_time" time.sleep(localRaceStatusWaitTime) #time.sleep(90) totalRaceStatusWaitTime += localRaceStatusWaitTime if self.debugFlag: print "totalRaceStatusWaitTime = ", totalRaceStatusWaitTime, "localRaceStatusWaitTime =", localRaceStatusWaitTime if totalRaceStatusWaitTime > localRaceStatusMaxWaitTime: self.m_logger.error("In Fn chkRaceStatusLoads. Total Actual Wait Time exceeds the configured value race_status_max_wait_time. Either check if the Dataset is getting loaded or increase the either active_load_max or active_load_max_wait_time. totalRaceStatusWaitTime = " + str(totalRaceStatusWaitTime) + " localRaceStatusMaxWaitTime=" + str(localRaceStatusMaxWaitTime)) return 1 else: return 0 #Return failure return 1 except Exception as exp: self.m_logger.error("Failure in chkRaceStatus process for file with the error " + str(exp)) sys.exit(1) def extractData(self,localDataFile, localFileID, localFileIDQueue, localDBFlag): """ Purpose - To extract the given datafile from the S3 bucket specified in the global mktConfigFile :param localDataFile: Data Filename :param localFileID: Internal File ID assigned to the local datafile :param localFileIDQueue: Queue in which, results of the operation is stored :param localDBFlag: Flag indicating if database should be used or not :return: """ try: if self.debugFlag: print "Inside extractData function" print "localDataFile = ", localDataFile if localDBFlag: """ Not sure if we need Race Status check for Extract raceStatusReturnValue=self.chkRaceStatus() if self.debugFlag: print "raceStatusReturnValue=", raceStatusReturnValue if raceStatusReturnValue: self.m_logger.error("Failure value returned by chkRaceStatus fn. Return value = " + str(raceStatusReturnValue)) localFileIDQueue.put((localFileID, raceStatusReturnValue)) return 1 """ """ Need to integrate Active loads with tb_dxt_process_status and tb_dxt_process_status ? activeLoadsReturnValue=self.chkActiveLoads() if activeLoadsReturnValue: self.m_logger.error("Failure value returned by chkRaceStatus fn. Return value = " + str(activeLoadsReturnValue)) localFileIDQueue.put((localFileID, raceStatusReturnValue)) return 1 """ processID = os.getpid() hostName = socket.gethostname() #Insert Process status into Oracle db #DB_CALL - sp_dxt_insert_process_status(RUNID, FILE_ID, etc) mySql = "" myParams = "" tempSql = self.m_configDict["SQL"]["put_process_status"] pStatus = 'P' pComment = 'Load started' # Keep the below vars 0 for now localDataFileSize=0 localDataFileRecordCount=0 myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileName":localDataFile, "tDate":str(self.processingDate), "processID":str(processID), "hostName":hostName, "fileSize": str(localDataFileSize), "recordCount" : str(localDataFileRecordCount), "status":pStatus , "lcomment":pComment} tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if self.debugFlag: print "tempSql = ", tempSql print "myParamsDict = ", myParamsDict print "mySql = ", mySql print "returnStr = ", returnStr if returnStr[0] != '0': self.m_logger.error("Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1]) return 1 #Here localFileWthPath is the local stage dir with file name localFileWthPath = self.m_configDict["ENV"]["stage_dir"] + "/" + localDataFile targetFolder = self.s3object.m_configFile["S3"]["path"] targetFileDir = targetFolder + self.processingDate + "/" #Here targetFileWthPath is the AWS dir with file name targetFileWthPath = os.path.join(targetFileDir, os.path.basename(localDataFile)) targetBucket = self.s3object.m_configFile["S3"]["bucket"] encryptKeyFlag = self.s3object.m_configFile["S3"]["encrypt_key"] localAWSRetries = int(self.m_mktConfigDict["ENV"]["aws_retries"]) if self.debugFlag: print("localFileWthPath =", localFileWthPath) print("targetFileWthPath =", targetFileWthPath) print("targetBucket =", targetBucket) print("encryptKeyFlag =", encryptKeyFlag) print("localAWSRetries =", localAWSRetries) initCount = 0 while (initCount < localAWSRetries): extractReturnValue = 0 #Call s3.data download to extract the manifest file (single part load) #extractReturnValue = self.s3object.getDataSinglePart(localFileWthPath, targetFileWthPath, targetBucket, encryptKeyFlag ) extractReturnValue = self.s3object.getDataSinglePart(localFileWthPath, targetFileWthPath, targetBucket) if self.debugFlag: print "extractReturnValue = ", extractReturnValue if int(extractReturnValue) == 0: pStatus = 'S' pComment = 'Extract completed' break else: pStatus = 'F' pComment = 'Extract failed' initCount += 1 # Get the size of the file downloaded localFileSize = os.stat(localFileWthPath).st_size # Check if the downloaded file size is matching with what is mentioned in manifest file. If not mark it as failed # Following check is commented as we don't have any manifest file to cross check size # if localFileSize != localDataFileSize: # pStatus = 'F' # pComment = 'Actual file size != Manifest file size' localRecordCount = 0 if localDBFlag: #Call DB to insert 'S' or 'F' in tb_dxt_process_status #localFileIDQueue.put((localFileID, extractReturnValue)) mySql = "" myParams = "" tempSql = self.m_configDict["SQL"]["put_process_status"] #myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileID":str(localFileID), "fileName":localDataFile, "tDate":str(self.processingDate), "processID":str(processID), "hostName":hostName, "fileSize": localFileSize, "recordCount" : localRecordCount, "status":pStatus , "lcomment":pComment} myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileName":localDataFile, "tDate":str(self.processingDate), "processID":str(processID), "hostName":hostName, "fileSize": str(localFileSize), "recordCount" : str(localRecordCount), "status":pStatus , "lcomment":pComment} tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if self.debugFlag: print "tempSql = ", tempSql print "myParamsDict = ", myParamsDict print "mySql = ", mySql print "returnStr = ", returnStr if returnStr[0] != '0': self.m_logger.error("Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1]) return 1 localFileIDQueue.put((localFileID,extractReturnValue)) return extractReturnValue except Exception as exp: self.m_logger.error("Failure in extractData process for file with the error " + str(exp)) if localDBFlag: localFileIDQueue.put((localFileID, 1)) else: return 1 def getRecords(self, fileDict, startDateTime, endDateTime): """ Purpose - Function to sort the dictionary based on the key and return a sorted list :param fileDict : Dictionary containing Last_modified Date and file name :param startDateTime : Start DateTime in the format '2016-02-01 00:00:00' :param endDateTime : End DateTime in the format '2016-02-10 00:00:00' """ try: # No pattern to search for #patternToSearch = self.m_configDict["ENV"]["pattern_to_search"] if self.debugFlag: print "fileDict = ", fileDict #print "patternToSearch = ", patternToSearch print "startDateTime = ", startDateTime print "endDateTime = ", endDateTime sorted_values = sorted(fileDict.values()) start = bisect.bisect_left(sorted_values, startDateTime) end = bisect.bisect_right(sorted_values, endDateTime) if self.debugFlag: print "sorted_values = ", sorted_values print "start = ", start print "end = ", end for fileItem in sorted(fileDict.iteritems())[start:end]: # No pattern to search for in DEA #if patternToSearch in fileItem[0]: #yield fileItem[0] yield fileItem[0] if self.debugFlag: print "fileItem[0] = ", fileItem[0] except Exception as exp: self.m_logger.error("Failed while executing getRecords to sort the dictionary content of dictionary with Error = " + str(exp)) sys.exit(1) def readManifestFile(self, manifestFileName): """ Purpose - To read the content of Finra's manifest file stored in key-value pair into Nested dictionary :param manifestFileName : Finra's manifestFileName containing data filenames, file size & no of rows """ try: manifestRecordStartPattern = self.m_configDict["dxt"]["MANIFEST_RECORD_START_PATTERN"] if self.debugFlag: print "manifestRecordStartPattern =", manifestRecordStartPattern with open(manifestFileName) as infile: manifestFileDict = {} file = None line_count = 0 for line in infile: line = line.strip() if line.startswith(manifestRecordStartPattern): line_count += 1 file = line_count manifestFileDict[file] = {} var, val = line.split(':',1) manifestFileDict[file][var.strip()] = val.strip() if self.debugFlag: print "manifestFileDict = ", manifestFileDict return manifestFileDict except Exception as exp: self.m_logger.error("Failed while executing readManifestFile to get FINRA manifest file into nested dictionary, Error = " + str(exp)) sys.exit(1) def getFileList(self, startDateTime, endDateTime, s3Bucket, s3Path, folderPosition): """ Purpose - Function to sort the dictionary based on the key and return a sorted list :param startDateTime : Start DateTime in the format '2016-02-01 00:00:00' :param endDateTime : End DateTime in the format '2016-02-10 00:00:00' """ try: if self.debugFlag: print "s3Bucket = ", s3Bucket print "s3Path = ", s3Path print "startDateTime = ", startDateTime print "endDateTime = ", endDateTime print "folderPosition = ", folderPosition fileListDict = self.s3object.listBucketWPathByLastModified(s3Bucket, s3Path, folderPosition) if self.debugFlag: print "fileListDict = ", fileListDict #endDateTime = datetime.now().strftime("%Y-%m-%d %H:%M:%S") fileList = list(self.getRecords(fileListDict, startDateTime, endDateTime)) if self.debugFlag: print "fileListDict = ", fileListDict print "fileList = ", fileList return fileList except Exception as exp: self.m_logger.error("Failed while creating AWS manifest file list with Error = " + str(exp)) return 1 def processDEAExtractor(self): """ Purpose - Function responsible for getting the AWS token and reading the last modified date in DB and fetch the list of files from AWS to be processed :param : None :return: """ try: # DB_CALL # Make database call sp_dxt_validate_mktName(mktName) to validate mktName # tempSql = self.m_configDict["SQL"]["validate_market_name"] # myParamsDict = { 'mktName' : self.mktName.upper() } # tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) # mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql) # returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) # # if self.debugFlag: # print "tempSql = ", tempSql # print "myParamsDict = ", myParamsDict # print "mySql = ", mySql # print "returnStr = ", returnStr # # if returnStr[0] != '0': # self.m_logger.error("Invalid market name provided " + mySql + ". Error = " + self.mktName) # sys.exit(1) if self.debugFlag: print "MktName from DB = ", self.mktName #Build the string for mktConfigFile based on mktName and configFile info self.mktConfigFile = os.path.dirname(self.configFile) + '/' + os.path.basename(self.configFile).split('.',1)[0].strip() + '_' + self.mktName.lower() + '.' + os.path.basename(self.configFile).split('.',1)[1].strip() if self.debugFlag: print("mktConfigFile = ", self.mktConfigFile) #Validate Market Config file is a valid file if not os.path.isfile(self.mktConfigFile): self.m_logger.error("Invalid market manifest file " + self.mktConfigFile) sys.exit(1) # Read Market specific config file and store it in a specific dictionary self.readMktConfigFile(self.mktConfigFile) if self.debugFlag: print("m_mktConfigDict=",self.m_mktConfigDict) # Get RunID self.runID = generate_runId() if self.debugFlag: print("RunID = ", self.runID) # Initialize S3 object and get FINRA cloud service token and establish s3 session self.s3object = S3(self.mktConfigFile, self.m_logger, self.debugFlag) tokenRetryTimes = int(self.m_configDict["TOKEN"]["token_retry_times"]) tokenRetryWaitTime = int(self.m_configDict["TOKEN"]["token_retry_wait_time"]) deaFileWaitTime = int(self.m_configDict["dea"]["DEA_FILE_WAIT_TIME"]) deaFileSleepTime = int(self.m_configDict["dea"]["DEA_FILE_SLEEP_TIME"]) s3TimeoutTime = int(self.m_configDict["dea"]["S3_TIMEOUT_TIME"]) #Not sure what to do. Keep this for a place holder in the future, when FINRA manifest for zero byte files everyday handleNoDatafileFlag = self.m_configDict["dea"]["HANDLE_NO_DATAFILE_FLAG"] deaActualTime = 0 # Download manifest files in the manifest file list to a specific folder from AWS localFileDir = self.s3object.m_configFile["ENV"]["stage_dir"] targetFolder = self.s3object.m_configFile["S3"]["path"] targetBucket = self.s3object.m_configFile["S3"]["bucket"] encryptKeyFlag = self.s3object.m_configFile["S3"]["encrypt_key"] folderPosition = int(self.s3object.m_configFile["S3"]["folder_position"]) targetFileDir = targetFolder + self.processingDate + "/" if self.debugFlag: print "localFileDir = ", localFileDir print "targetFolder = ", targetFolder print "targetBucket = ", targetBucket print "encryptKeyFlag = ", encryptKeyFlag print "self.processingDate = ", self.processingDate print "targetFileDir = ", targetFileDir startDate = ((datetime.now() - relativedelta(years=1)) + timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S") endDate = (datetime.now() + timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S") getTokenFlag = 0 fileExistFlag = 0 while deaActualTime < deaFileWaitTime: # Get token only the first time or when the time exceed s3TimeoutTime if deaActualTime > s3TimeoutTime or not getTokenFlag: getTokenFlag=1 initCount = 0 while (initCount < tokenRetryTimes): tokenReturnCode = self.s3object.getToken() if tokenReturnCode: if initCount == tokenRetryTimes: self.m_logger.error("Error: Exceeded the max retries " + tokenRetryTimes + " to get AWS Token from FINRA. Please re-try after some time or escalate.. ") sys.exit(1) initCount += 1 time.sleep(tokenRetryWaitTime) else: break self.currentEpochTime = int(time.time()) # Get the list of files from the AWS folder for the given processing date fileList = self.getFileList(startDate, endDate, targetBucket, targetFileDir, folderPosition) if len(fileList): if self.debugFlag: print("fileList = ", fileList) fileExistFlag=1 break time.sleep(deaFileSleepTime) deaActualTime += deaFileSleepTime if self.debugFlag: print "deaActualTime =", deaActualTime print "deaFileSleepTime =", deaFileSleepTime print "deaFileWaitTime =", deaFileWaitTime self.m_logger.info("INFO : Waiting for file in FINRA's cloud, " + str(deaFileWaitTime - deaActualTime) + " secs remaining...") # End of while tblName = self.m_mktConfigDict["dea"]["TARGET_TBL_NAME"] + "_" + self.mktName.upper() # If no files exists for the given day, create a zero byte data file and a manifest file for the same if not fileExistFlag: #deaDummyDataFile = self.m_configDict["dea"]["DEA_DUMMY_DATA_FILE"].replace("PDATE", self.processingDate) deaDummyDataFile = self.m_configDict["dea"]["DEA_DUMMY_DATA_FILE"].replace("PDATE", str(self.processingDate)) deaDummyDataFileWthPath = self.m_configDict["ENV"]["stage_dir"] + "/" + deaDummyDataFile open(deaDummyDataFileWthPath,'a').close() fatlManifestFile = self.m_configDict["ENV"]["stage_dir"] + "/" + tblName + "." + self.processingDate + ".manifest" if self.debugFlag: print "fileExistFlag = ", fileExistFlag with open(fatlManifestFile,"w") as fh: fileSize = 0 if self.debugFlag: print "deaDummyDataFileWthPath = ", deaDummyDataFileWthPath print "fileSize = ", fileSize print "tblName = ", tblName, "file = ", deaDummyDataFile, "fileSize = ", fileSize, "mktName = ", self.mktName fh.write(tblName + "|" + deaDummyDataFile + "|" + str(fileSize) + "|" + "0" + "\n") self.m_logger.info("INFO : No File found for processing date " + self.processingDate + ". Creating zero byte data file " + deaDummyDataFileWthPath + " and manifest file " + fatlManifestFile) sys.exit(0) fileIDQueue = Queue() localAWSRetries = int(self.m_mktConfigDict["ENV"]["aws_retries"]) # Insert a record into tb_dxt_dataset_trans with status 'P' for the given datasetName, saying that we start the process for this manifest file pStatus = 'P' # We decided to use tblName instead of dataset for DEA, as we don't have dataset concept or manifest files self.datasetName = tblName tempSql = self.m_configDict["SQL"]["put_dataset"] myParamsDict = {'datasetName':self.datasetName, 'runID': str(self.runID), 'tDate':str(self.processingDate), 'status': pStatus } tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if self.debugFlag: print "tempSql = ", tempSql print "myParamsDict = ", myParamsDict print "mySql = ", mySql print "returnStr = ", returnStr if returnStr[0] != '0': self.m_logger.error("Unable to insert into tb_dxt_dataset_trans table " + mySql + ". Error = " + self.datasetName + " for processing date = " + self.processingDate) sys.exit(1) fileID = 1 dbFlag = 1 fileIDQueue = Queue() procs = [] doneCounter = 0 sendCounter = 0 failureFlag = 0 process_count = int(self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max"]) while doneCounter < len(fileList): while sendCounter < len(fileList) and sendCounter - doneCounter < process_count: #print "Inside while loop" #print "fileList = ", fileList # Call fn extractData to fetch files from AWS. Pass manifestFileDict[sendCounter] as it contains the whole record including the filename, filesize & row count processHandle = Process(target=DEAExtractor.extractData, args=(self, fileList[sendCounter],fileID, fileIDQueue, dbFlag)) processFlag=1 if ((int(time.time()) - self.currentEpochTime) > s3TimeoutTime): self.currentEpochTime = int(time.time()) self.m_logger.info("Getting New Token for Batch : {0}, Max batches : {1}".format(batch_count,max_batches)) if self.debugFlag: print 'Memory usage: %s (kb)' % resource.getrusage(resource.RUSAGE_SELF).ru_maxrss print "self.currentEpochTime = ", self.currentEpochTime print "Current Time in Epoch = ", int(time.time()) if self.debugFlag: print "Inside get new token - self.currentEpochTime = ", self.currentEpochTime initCount = 0 while (initCount < tokenRetryTimes): tokenReturnCode = 0 tokenReturnCode = self.s3object.getToken() if tokenReturnCode: if initCount == tokenRetryTimes: self.m_logger.error("Error: Exceed the max retries " + tokenRetryTimes + " to get AWS Token from FINRA. Please re-try after some time or escalate.. ") sys.exit(1) initCount += 1 time.sleep(tokenRetryWaitTime) else: break threadDelayTime = int(self.m_configDict["dea"]["THREAD_DELAY_TIME"]) time.sleep(threadDelayTime) processHandle.start() procs.append(processHandle) sendCounter += 1 fileID += 1 if processFlag: for p in procs: p.join() procs=[] processFlag=0 while not fileIDQueue.empty(): # process completed results as they arrive #time.sleep(3) qFileID, qResult = fileIDQueue.get() if self.debugFlag: print("qFileID = ", qFileID, "qResult = ", qResult) doneCounter += 1 if qResult: failureFlag = 1 if self.debugFlag: print "ProcessFlag = ", processFlag, "sendCounter = ", sendCounter, "doneCounter = ", doneCounter if failureFlag: break if self.debugFlag: print "Failure Flag = ", failureFlag if failureFlag: pStatus = 'F' else: pStatus = 'S' fatlManifestFile = self.m_configDict["ENV"]["stage_dir"] + "/" + tblName + "." + self.processingDate + ".manifest" if self.debugFlag: print "File List = ", fileList print "fatlManifestFile =", fatlManifestFile with open(fatlManifestFile,"w") as fh: counter = 0 for file in fileList: sourceFileWthPath = self.m_configDict["ENV"]["stage_dir"] + "/" + file fileSize = os.stat(sourceFileWthPath).st_size if self.debugFlag: print "sourceFileWthPath = ", sourceFileWthPath print "fileSize = ", fileSize print "tblName = ", tblName, "file = ", file, "fileSize = ", fileSize, "mktName = ", self.mktName fh.write(tblName + "|" + file + "|" + str(fileSize) + "|" + "0" + "\n") counter += 1 # insert a record into tb_dxt_dataset_trans table with 'S' or 'F' record #Call Oracle fn to insert status 'S' into TB_DDY_DATASET_TRANS with RUNID etc #DB_CALL # Make database call sp_dxt_insert_dataset_trans and insert data based on Failure or Success mySql = "" myParams = "" tempSql = self.m_configDict["SQL"]["put_dataset"] myParamsDict = {"datasetName":self.datasetName, "runID": str(self.runID), "tDate":str(self.processingDate), "status": pStatus } tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if self.debugFlag: print "tempSql = ", tempSql print "myParamsDict = ", myParamsDict print "mySql = ", mySql print "returnStr = ", returnStr if returnStr[0] != '0': self.m_logger.error("Unable to insert into tb_dxt_dataset_trans table " + mySql + ". Error = " + self.datasetName + " for processing date = " + self.processingDate) sys.exit(1) except Exception as e: self.m_logger.error("processDEAExtractor failed with error " + str(e)) sys.exit(1)
class Loader(): #class variables m_logger = "" #database objects m_oracle_db = "" m_netezza_db = "" def __init__(self, configFile, tradeDate, debugFlag): """ Purpose: Constructor :param self: class object itself :param configFile: Configuration file to use """ # Initialize m_logger object from class Logger and add Header to the log, using addGenericInfo function self.m_logger = Logger(logging.INFO, configFile, tradeDate) self.m_logger.addFileHandler(logging.DEBUG) self.m_logger.addGenericInfo(__file__) self.tradeDate = tradeDate self.debugFlag = debugFlag self.configFile = configFile try: # Get configuration to a dictionary self.m_configDict = configuration(self.configFile, True).m_dictionary #Initialize Oracle instance along with connection self.m_oracle_db = Oracle(self.m_configDict, self.m_logger) except Exception as exp: # An exception occurred self.m_logger.error("Unable to initialize the configuration " + str(exp)) print("ERROR: Unable to initialize the configuration for logger " + str(exp)) sys.exit(1) def readMktConfigFile(self, mktConfigFile): """ Purpose - To read the content of mktConfigFile into the global dictionary m_mktConfigDict for reference :param mktConfigFile: :return: """ try: self.m_mktConfigDict = configuration(mktConfigFile, True).m_dictionary except Exception as exp: # An exception occurred self.m_logger.error("Unable to initialize the configuration for logger " + str(exp)) print("ERROR: Unable to initialize the configuration for logger " + str(exp)) sys.exit(1) def chkActiveLoads(self): """ Purpose - To check the count of active Active loads happening at a given point :param None: None at this point :return: """ try: if self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_check_flag"] == 'Y': localActiveLoadMax = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max"] localActiveLoadWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_wait_time"] localActiveLoadMaxWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max_wait_time"] mySql = "" myParams = "" mySql = self.m_configDict["SQL"]["get_active_loads"] activeFlag=1 totalActiveWaitTime=0 while activeFlag: returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if self.debugFlag: print "mySql = ", mySql print "returnStr = ", returnStr print "chkActiveLoads - Active Loads value = ", int(returnStr[1].strip()) if returnStr[0] != '0': self.m_logger.error("Unable to get active loads using sql " + mySql + ". Error = " + returnStr[1]) sys.exit(1) #Check if actual active loads is <= configured active loads. If so, return out of the fn if int(returnStr[1].strip()) <= localActiveLoadMax: activeFlag=0 return 0 #Sleep for time defined by configured value for "active_load_wait_time" time.sleep(localActiveLoadWaitTime) totalActiveWaitTime += localActiveLoadWaitTime #Check if actual Total wait time is > configured total wait time. If so, throw an error and exit if totalActiveWaitTime > localActiveMaxWaitTime: self.m_logger.error("In Fn chkActiveLoads. Total Actual Wait Time exceeds the configured value active_load_max_wait_time. Either cleanup orphaned loads or increase the either active_load_max or active_load_max_wait_time. totalActiveWaitTime = " + str(totalActiveWaitTime) + " localActiveMaxWaitTime=" + str(localActiveMaxWaitTime)) return 1 else: return 0 #Return failure return 1 except Exception as exp: self.m_logger.error("Failure in chkActiveLoads process for file with the error " + str(exp)) sys.exit(1) def chkRaceStatus(self): """ Purpose - To check if a load is already running for the given dataset :param None: None at this point :return: """ try: if self.m_mktConfigDict["RACE"]["race_status_check_flag"] == 'Y': localRaceStatusWaitTime = int(self.m_mktConfigDict["RACE"]["race_status_wait_time"]) localRaceStatusMaxWaitTime = int(self.m_mktConfigDict["RACE"]["race_status_max_wait_time"]) mySql = "" #myParams = {"datasetName":self.datasetName} tempSql = self.m_configDict["SQL"]["get_race_status"] myParamsDict = { 'datasetName' : self.datasetName } tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql) raceFlag=1 totalRaceStatusWaitTime=0 while raceFlag: returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if self.debugFlag: print "tempSql = ", tempSql print "myParamsDict = ", myParamsDict print "mySql = ", mySql print "returnStr = ", returnStr print "chkRaceStatus - ReturnCode = ", int(returnStr[1].strip()) if returnStr[0] != '0': self.m_logger.error("Unable to get race status using sql " + mySql + ". Error = " + returnStr[1]) sys.exit(1) #Check if the load for this dataset is already running. If not, exit out of the function with normal return value if int(returnStr[1].strip()) <= 1: raceFlag=0 return 0 #Check if actual Total wait time is > configured total wait time. If so, throw an error and exit #Sleep for time defined by configured value for "active_load_wait_time" time.sleep(localRaceStatusWaitTime) #time.sleep(90) totalRaceStatusWaitTime += localRaceStatusWaitTime if self.debugFlag: print "totalRaceStatusWaitTime = ", totalRaceStatusWaitTime, "localRaceStatusWaitTime =", localRaceStatusWaitTime if totalRaceStatusWaitTime > localRaceStatusMaxWaitTime: self.m_logger.error("In Fn chkRaceStatusLoads. Total Actual Wait Time exceeds the configured value race_status_max_wait_time. Either check if the Dataset is getting loaded or increase the either active_load_max or active_load_max_wait_time. totalRaceStatusWaitTime = " + str(totalRaceStatusWaitTime) + " localRaceStatusMaxWaitTime=" + str(localRaceStatusMaxWaitTime)) return 1 else: return 0 #Return failure return 1 except Exception as exp: self.m_logger.error("Failure in chkRaceStatus process for file with the error " + str(exp)) sys.exit(1) #def loadData(self,localDataFile, localFileID, localFileIDQueue, localDBFlag): def loadData(self,localDataFile, localFileID, localFileIDQueue): """ Purpose - To load the given datafile to the S3 bucket specified in the global mktConfigFile :param localDataFile: Source datafile to be uploaded to S3 :param localFileID: Internal File ID assigned to the source datafile :param localFileIDQueue: Queue in which, results of the operation is stored :return: """ try: localDBFlag=1 if self.debugFlag: print "Inside loadData function" if localDBFlag: raceStatusReturnValue=self.chkRaceStatus() if self.debugFlag: print "raceStatusReturnValue=", raceStatusReturnValue if raceStatusReturnValue: self.m_logger.error("Failure value returned by chkRaceStatus fn. Return value = " + str(raceStatusReturnValue)) localFileIDQueue.put((localFileID, raceStatusReturnValue)) return 1 activeLoadsReturnValue=self.chkActiveLoads() if activeLoadsReturnValue: self.m_logger.error("Failure value returned by chkRaceStatus fn. Return value = " + str(activeLoadsReturnValue)) localFileIDQueue.put((localFileID, raceStatusReturnValue)) return 1 processID = os.getpid() hostName = socket.gethostname() #Insert Process status into Oracle db #DB_CALL - sp_ddy_insert_process_status(RUNID, FILE_ID, etc) mySql = "" myParams = "" tempSql = self.m_configDict["SQL"]["put_process_status"] pStatus = 'P' pComment = 'Load started' myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileID":str(localFileID), "fileName":localDataFile, "tDate":str(self.tradeDate), "processID":str(processID), "hostName":hostName, "status":pStatus , "lcomment":pComment, "tidalRunID":str(self.tidalRunID)} tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if self.debugFlag: print "tempSql = ", tempSql print "myParamsDict = ", myParamsDict print "mySql = ", mySql print "returnStr = ", returnStr if returnStr[0] != '0': self.m_logger.error("Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1]) sys.exit(1) #Call s3.gettoken to get the token and establish connection sourceFileWthPath = localDataFile #Commented the following lines to move getToken outside parallel thread # Keep it until we test all 93 loads and remove it #s3object = S3(self.mktConfigFile, self.m_logger) #s3object.getToken() ##sourceFileWthPath = s3object.m_configfile["S3"]["source_file"] targetFileWthPath = os.path.join(self.s3object.m_configFile["S3"]["path"], os.path.basename(sourceFileWthPath)) targetBucket = self.s3object.m_configFile["S3"]["bucket"] encryptKeyFlag = self.s3object.m_configFile["S3"]["encrypt_key"] local_aws_retries = int(self.m_mktConfigDict["ENV"]["aws_retries"]) if self.debugFlag: print("sourceFileWthPath =", sourceFileWthPath) print("targetFileWthPath =", targetFileWthPath) print("targetBucket =", targetBucket) print("encryptKeyFlag =", encryptKeyFlag) print("local_aws_retries =", local_aws_retries) init_count = 0 while (init_count < local_aws_retries): loadReturnValue = 0 #Call s3.dataUpload to load the data (single part load) loadReturnValue = self.s3object.loadDataSinglePart(sourceFileWthPath, targetFileWthPath, targetBucket, encryptKeyFlag ) if self.debugFlag: print "loadReturnValue = ", loadReturnValue if loadReturnValue == 0: pStatus = 'S' pComment = 'Load completed' break else: pStatus = 'F' pComment = 'Load failed' init_count += 1 if localDBFlag: #Call DB to insert 'S' or 'F' in tb_ddy_process_status #localFileIDQueue.put((localFileID, loadReturnValue)) mySql = "" myParams = "" tempSql = self.m_configDict["SQL"]["put_process_status"] myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileID":str(localFileID), "fileName":localDataFile, "tDate":str(self.tradeDate), "processID":str(processID), "hostName":hostName, "status":pStatus , "lcomment":pComment, "tidalRunID":str(self.tidalRunID)} tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if self.debugFlag: print "tempSql = ", tempSql print "myParamsDict = ", myParamsDict print "mySql = ", mySql print "returnStr = ", returnStr if returnStr[0] != '0': self.m_logger.error("Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1]) sys.exit(1) localFileIDQueue.put((localFileID,loadReturnValue)) else: return loadReturnValue except Exception as exp: self.m_logger.error("Failure in loadData process for file with the error " + str(exp)) if localDBFlag: localFileIDQueue.put(localFileID, 1) else: return 1 def createFinraManifestFile(self, manifestFile): try: # Read Manifest file to get info like total rows, total size & other details to populate the done file for FINRA if self.debugFlag: print "Inside createFinraManifestFile fuction" with open(manifestFile,"r") as fh: self.totalRows = 0 self.totalSize = 0 self.fileCount = 0 self.fileDict = {} for data in fh: data.rstrip("\n") mylist = [] self.fileCount +=1 mylist = data.split("|") self.fileDict[self.fileCount] = [mylist[0],os.path.basename(mylist[1]),int(mylist[2]),int(mylist[3])] self.totalRows += int(mylist[3]) self.totalSize += int(mylist[2]) if self.debugFlag: print "self.fileDict = ", self.fileDict except Exception as exp: self.m_logger.error("Failed while processing readManifest with Error = " + str(exp)) return 1 try: #Use self.defautltsFile which is populated from the db later. No need to get it from config file self.defaultsFileWthPath = self.m_mktConfigDict["DATASET"]["defaults_dir"] + "/" + self.defaultsFile with open(self.defaultsFileWthPath,"r") as fh: self.defaultsDict = {} self.defaultsCount = 0 for data in fh: data.rstrip('\n') self.defaultsCount +=1 self.defaultsDict[self.defaultsCount]=data if self.debugFlag: print "After Defaults, self.fileDict = ", self.fileDict except Exception as exp: self.m_logger.error("Failed while processing defaults file " + self.defaultsFileWthPath + " with Error = " + str(exp)) return 1 try: self.finraManifestFile = self.m_mktConfigDict["ENV"]["donefile_dir"] + "/" + os.path.basename(manifestFile) + ".done" with open(self.finraManifestFile,"w") as finraMnFH: finraMnFH.write("# AWS RunID : {}\n".format(str(self.runID))) finraMnFH.write("# Dataset : {0} , TradeDate : {1}\n".format(str(self.datasetName),str(self.tradeDate))) finraMnFH.write("total_compressed={}\n".format(self.totalSize)) finraMnFH.write("total_rows={}\n".format(self.totalRows)) finraMnFH.write("no of files={}\n".format(self.fileCount)) for key,val in self.fileDict.items(): finraMnFH.write("file_{0}={1}\n".format(str(key),val[1])) finraMnFH.write("file_{0}_rows={1}\n".format(str(key),val[3])) finraMnFH.write("# Data Attributes\n") for key,val in self.defaultsDict.items(): finraMnFH.write("{0}".format(str(val))) return 0 except Exception as exp: self.m_logger.error("Failed while creating AWS Done file " + self.finraManifestFile + " with Error = " + str(exp)) return 1 def processLoader(self, manifestFile, datasetName, tidalRunID): """ Purpose - Function responsible for reading the manifest file, get market name, call multiprocess load and other db calls :param manifestFile: Manifest File :param tradeDate: Trade Date :param tidalRunID: Tidal Run ID :return: """ try: # Read the manifest filename and get the suffix i.e. datasetname # Assumption - Manifest file format - manifest.<datasetName>.<tradeDate>.<tidalRunID> # Program will break otherwise self.datasetName = datasetName self.tidalRunID = tidalRunID # DB_CALL # Make database call sp_ddy_get_market_info(datasetname) and get market info mktName = '' self.manifestFile = self.m_configDict["ENV"]["manifestfile_dir"] + "/" + manifestFile ##Validate Manifest file if not os.path.isfile(self.manifestFile): self.m_logger.error("Invalid manifest file " + self.manifestFile) sys.exit(1) if self.debugFlag: print "Inside processLoader" print "DatasetName = ", self.datasetName print "ManifestFile = ", manifestFile print "Self ManifestFile = ", self.manifestFile print "TidalRunID = ", self.tidalRunID print "DebugFlag = ", self.debugFlag print "confDict = ", self.m_configDict # Enable this one the proc to get mkt name and default file are ready and test it tempSql = self.m_configDict["SQL"]["get_mkt_defaults_filename"] myParamsDict = { 'datasetName' : self.datasetName } tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if self.debugFlag: print "tempSql = ", tempSql print "myParamsDict = ", myParamsDict print "mySql = ", mySql print "returnStr = ", returnStr if returnStr[0] == '0': mktName = returnStr[2].strip() self.defaultsFile = returnStr[3].strip() else: self.m_logger.error("Unable to get market info from the database using sql " + mySql + ". Error = " + returnStr[1]) sys.exit(1) if self.debugFlag: print "MktName from DB = ", mktName print "Defaults = ", self.defaultsFile #Build the string for mktConfigFile based on mktName and configFile info self.mktConfigFile = os.path.dirname(self.configFile) + '/' + os.path.basename(self.configFile).split('.',1)[0].strip() + '_' + mktName.lower() + '.' + os.path.basename(self.configFile).split('.',1)[1].strip() if self.debugFlag: print("mktConfigFile = ", self.mktConfigFile) #Validate Manifest file is a valid file if not os.path.isfile(self.mktConfigFile): self.m_logger.error("Invalid market manifest file " + self.mktConfigFile) sys.exit(1) #May not need the following section, as we send mktConfigFile to other function not the dictionary self.m_mktConfigDict. Need to remove it after finishing the loadData part fully # Read Market specific config file and store it in a specific dictionary self.readMktConfigFile(self.mktConfigFile) if self.debugFlag: print("m_mktConfigDict=",self.m_mktConfigDict) # Read the contents of manifest - dataFileNames into a list - Will validate the datafiles as well localManifest = Manifest() manifestDelim = self.m_configDict["ENV"]["manifest_delim"] manifestFileList = localManifest.readManifest(self.manifestFile, self.m_logger, manifestDelim, self.debugFlag) # Get RunID self.runID = generate_runId() if self.debugFlag: print("RunID = ", self.runID) #print("manifestFileList = ", manifestFileList) #Call Oracle fn to insert status 'P' into TB_DDY_DATASET_TRANS with RUNID etc #DB_CALL # Make database call sp_ddy_insert_dataset_trans and insert data that process started mySql = "" myParams = "" tempSql = self.m_configDict["SQL"]["put_dataset"] pStatus = 'P' myParamsDict = {'datasetName':self.datasetName, 'runID': str(self.runID), 'tDate':str(self.tradeDate), 'status': pStatus, 'tidalRunID':str(self.tidalRunID)} tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if self.debugFlag: print "tempSql = ", tempSql print "myParamsDict = ", myParamsDict print "mySql = ", mySql print "returnStr = ", returnStr if returnStr[0] != '0': self.m_logger.error("Unable to put dataset info into the database using sql " + mySql + ". Error = " + returnStr[1]) sys.exit(1) # Insert Manifest data in db and call multiprocessing s3 loader process. Shd we add RUN_ID to manifest table #For each datafile, generate fileID and call loadData fn using multiprocess to load data into AWS for dataRecord in manifestFileList: mySql = "" myParams = "" tempSql = self.m_configDict["SQL"]["put_manifest"] myParamsDict = {"datasetName":self.datasetName, "runID": str(self.runID), "tDate":str(self.tradeDate), "dataFileName":dataRecord[1], "manifestFileName":manifestFile , "noOfRecords": str(dataRecord[2]), "fileSize":str(dataRecord[3]), "tidalRunID":str(self.tidalRunID)} tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) #if self.debugFlag: #print "tempSql = ", tempSql #print "myParamsDict = ", myParamsDict #print "mySql = ", mySql #print "returnStr = ", returnStr if returnStr[0] != '0': self.m_logger.error("Unable to put manifest info into the database using sql " + mySql + ". Error = " + returnStr[1]) sys.exit(1) # Initialize S3 object and get FINRA cloud service token and establish s3 session self.s3object = S3(self.mktConfigFile, self.m_logger, self.debugFlag) self.s3object.getToken() # Get Active load values from config file localActiveLoadCheckFlag = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_check_flag"] process_count = int(self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max"]) #localActiveLoadMax = int(self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max"]) #localActiveLoadWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_wait_time"] #localActiveLoadMaxWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max_wait_time"] if self.debugFlag: print("localActiveLoadMax = ", process_count) print("len(manifestFileList) = ", len(manifestFileList)) pool = multiprocessing.Pool(processes=process_count) m = multiprocessing.Manager() fileIDQueue = m.Queue() #dbFlag=1 sendCounter = 0 doneCounter = 0 fileID=1 failureFlag=0 print manifestFileList while doneCounter < len(manifestFileList): print "Inside while doneCounter = ", doneCounter print "doneCounter = ", doneCounter, "sendCounter = ", sendCounter while sendCounter < len(manifestFileList) and sendCounter - doneCounter < process_count: tmpDataFileName = manifestFileList[sendCounter][1] print "Inside sendCounter, manifestFileList[sendCounter] = ", manifestFileList[sendCounter], "manifestFileList[sendCounter][1] = ", manifestFileList[sendCounter][1] #finraManifestLoadStatus=self.loadData(self.finraManifestFile ,fileID, fileIDQueue, dbFlag) # #processHandle = Process(target=Loader.loadData, args=(self, dataRecord[1],fileID, fileIDQueue, dbFlag)) #def loadData(self,localDataFile, localFileID, localFileIDQueue, localDBFlag): #results = mpPool.apply_async(Loader.loadData, (self, manifestFileList[sendCounter][1], fileID, fileIDQueue, dbFlag) ) #results = mpPool.apply_async(self.loadData, (tmpDataFileName, fileID, fileIDQueue, dbFlag)) results = pool.apply_async(self.loadData, args=(tmpDataFileName, fileID, fileIDQueue)) #results = pool.apply_async(Loader.loadData, (tmpDataFileName, fileID, fileIDQueue)) print "After pool apply_async" time.sleep(2) sendCounter += 1 fileID += 1 while not fileIDQueue.empty(): # process completed results as they arrive print "Inside Queue" time.sleep(3) qFileID, qResult = fileIDQueue.get() if qResult: failureFlag=1 if self.debugFlag: print("qFileID = ", qFileID, "qResult = ", qResult) doneCounter += 1 if failureFlag: break time.sleep(2) # #for dataRecord in manifestFileList: # #if self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_check_flag"] == 'Y': # #processHandle = Process(target=Loader.loadData, args=(self, dataRecord[1],fileID, fileIDQueue, dbFlag)) # #processHandle.start() # #procs.append(processHandle) # #fileID += 1 # # #for p in procs: # #p.join() # # #Without sleep the queue is unreliable and do not return the expected values. Fixed with procs.append function. No need for sleep anymore # #time.sleep(2) # ## failureFlag=0 # while not fileIDQueue.empty(): # qFileID, qResult = fileIDQueue.get() # if qResult: # failureFlag=1 # if self.debugFlag: # print("Inside fileIDQueue while") # print("qFileID = ", qFileID, "qResult = ", qResult) # # if self.debugFlag: # print "Failure Flag = ", failureFlag if failureFlag: pStatus = 'F' else: pStatus = 'S' """ #Generate FINRA Manifest file and Push it to AWS """ # Call Divakar's generate done file function returnValue = self.createFinraManifestFile(self.manifestFile) if self.debugFlag: print "Post createFinraManifestFile fn - return value= ", returnValue if returnValue: self.m_logger.error("Unable to generate done file. Please fix the issue the re-run the load") #sys.exit(1) failureFlag=1 pStatus = 'F' else: dbFlag=0 fileID=0 # Call the loader function with the manifest file finraManifestLoadStatus=0 finraManifestLoadStatus=self.loadData(self.finraManifestFile ,fileID, fileIDQueue, dbFlag) if finraManifestLoadStatus: pStatus = 'F' self.m_logger.error("Unable to load finra manifest file ") #Call Oracle fn to insert status 'S' into TB_DDY_DATASET_TRANS with RUNID etc #DB_CALL # Make database call sp_ddy_insert_dataset_trans and insert data based on Failure or Success mySql = "" myParams = "" tempSql = self.m_configDict["SQL"]["put_dataset"] myParamsDict = {"datasetName":self.datasetName, "runID": str(self.runID), "tDate":str(self.tradeDate), "status": pStatus, "tidalRunID":str(self.tidalRunID)} tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if self.debugFlag: print "tempSql = ", tempSql print "myParamsDict = ", myParamsDict print "mySql = ", mySql print "returnStr = ", returnStr if returnStr[0] != '0': self.m_logger.error("Unable to put dataset info into the database using sql " + mySql + ". Error = " + returnStr[1]) sys.exit(1) if failureFlag: self.m_logger.error("Load failed") sys.exit(1) except Exception as e: self.m_logger.error("ProcessLoader failed with error " + str(e)) sys.exit(1)
def main(configFile, logLevel, tDate): log = Logger(logLevel,configFile, tDate) log.addFileHandler(logging.DEBUG) log.addGenericInfo(__file__) try: m_configDict = configuration(configFile, True).m_dictionary #print "m_configDict = ", m_configDict myOracle = Oracle(m_configDict, log) #mySql = "select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.F_DDY_INSERT_DATASET_TRANS(:datasetID, :runID, :tDate, :status))" #pDatasetID = 2 #pRunID = 234234234 # 20151216144156584829 #pTDate = 20151215 #pStatus = 'P' #myParams = {"datasetID": pDatasetID, "runID": pRunID, "tDate": pTDate, "status": pStatus} #select RETURN_CODE||'|'||RETURN_MSG from table(PKG_RFCM_DDY.F_DDY_INSERT_PROCESS_STATUS('DLE_INFO', 20151216144156584829, 1, 'opb1.dat.bz2', 20151215, 111, 'test_hostname', 'P', 'Process Started')) #select RETURN_CODE||'|'||RETURN_MSG from table(PKG_RFCM_DDY.F_DDY_INSERT_PROCESS_STATUS('DLE_INFO', 20151216144156584829, 1, 'opb1.dat.bz2', 20151215, 111, 'test_hostname', 'P', 'Process Started')); mySql = "select RETURN_CODE||'|'||RETURN_MSG from table(PKG_RFCM_DDY.F_DDY_INSERT_PROCESS_STATUS(:datasetName, :runID, :fileID, :fileName, :tDate, :processID, :hostName, :lstatus , :lcomment))" pDatasetName = "DLE_INFO" pRunID = 20151216144156584 pTDate = 20151210 pStatus = 'P' pFileID = 1 pFileName = 'test.dat.gz' pComment = 'Process Started' pProcessID = 1234 pHostName = 'Test_Host' myParams = {"datasetName": pDatasetName, "runID": pRunID, "fileID": pFileID, "fileName": pFileName, "tDate": pTDate, "processID": pProcessID, "hostName": pHostName, "lstatus": pStatus, "lcomment": pComment} #returnStr = self.__cursor.execute("select * from table(PKG_RFCM_DDY.f_ddy_insert_manifest_trans(1, 20151215, 'opb.test1.bz2', 'manifest.opb', 23423, 2342334))") #mySql = "select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_insert_manifest_trans(:datasetID, :tDate, :dataFileName, :manifestFileName, :noOfRecords, :fileSize))" #pDatasetID = 3 #pTDate = 20151210 #pDataFileName = 'opb.test1.bz2' #pManifestFileName = 'manifest.opb' #pNoOfRecords = 23423 #pFileSize = 2342334 #myParams = {"datasetID": pDatasetID, "tDate": pTDate, "dataFileName": pDataFileName, "manifestFileName":pManifestFileName, "noOfRecords":pNoOfRecords, "fileSize":pFileSize} #mySql = "select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_get_process_status(20151216144156584829,1));" #mySql = "select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_get_process_status(:runID,:fileID))" #pRunID = 20151216144156584829 #pFileID = 1 #myParams = {"runID":pRunID, "fileID":pFileID} #mySql = "select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_get_dataset_status(234234234));" #mySql = "select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_get_dataset_status(:runID))" #pRunID = 234234234 #myParams = {"runID":pRunID} #mySql = "select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_get_active_loads());" #mySql = "select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_get_active_loads())" #myParams = {} #mySql = select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_get_market_info(p_dataset_name => 'ADW_EVENT_LSH_RAW')); #mySql = "select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_get_market_info(:datasetName))" #mySql = "select RETURN_CODE, RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_get_market_info(:datasetName))" #pDatasetName = 'ADW_EVENT_LSH_RAW' #pDatasetName = 'DLE_INFO' myParams = {"datasetName":pDatasetName} #mySql = select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_get_dataset_race('ADW_AMEX_OPT_RAW')); #mySql = "select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_get_dataset_race(:datasetName))" #pDatasetName = "ADW_AMEX_OPT_RAW" #myParams = {"datasetName":pDatasetName} """ #mySql = "select RETURN_CODE, RETURN_MSG, P_MARKET_IND, P_DEFAULTS_FILENAME from table(pkg_rfcm_ddy.f_ddy_get_makt_info_dflt_fname(:datasetName))" mySql = "select RETURN_CODE, RETURN_MSG, P_MARKET_IND, P_DEFAULTS_FILENAME from table(pkg_rfcm_ddy.f_ddy_get_makt_info_dflt_fname('datasetName'))" pDatasetName = 'DLE_INFO' new_mySql = re.sub('datasetName', pDatasetName, mySql.rstrip()) returnStr = myOracle.runSqlWthParamsGetOneRow(new_mySql) print "Return Value = ", returnStr[0], " Return Code = ", returnStr[1], " Mkt = ", returnStr[2], "Defaults = ", returnStr[3] """ """ mySql = "select RETURN_CODE, RETURN_MSG from table(PKG_RFCM_DDY.F_DDY_INSERT_DATASET_TRANS('datasetName', 'runID', 'tDate', 'status', 'tidalRunID'))" pDatasetName = 'DLE_INFO' pRunID = '20151216144156584829' pTDate = '20151215' pStatus = 'P' pTidalRunID ='20151215' myParamsDict = {"datasetName":self.datasetName, "runID": self.runID, "tDate":self.tradeDate, "status": pStatus, "tidalRunID":self.tidalRunID} tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql) rdict = { 'datasetName' : pDatasetName, 'runID' : pRunID, 'tDate' : pTDate, 'status' : pStatus, 'tidalRunID' : pTidalRunID } pat = "(%s)" % "|".join( map(re.escape, rdict.keys()) ) new_mySql = re.sub( pat, lambda m:rdict[m.group()], mySql) print "mySql = ", mySql, "new_mySql = ", new_mySql """ """ mySql = "select RETURN_CODE,RETURN_MSG from table(PKG_RFCM_DDY.F_DDY_INSERT_PROCESS_STATUS('datasetName', 'runID', 'fileID', 'fileName', 'tDate', 'processID', 'hostName', 'status' , 'lcomment', 'tidalRunID'))" pDatasetName = "DLE_INFO" pRunID = '20151216144156584' pTDate = '20151210' pStatus = 'P' pFileID = '1' pFileName = 'test.dat.gz' pComment = 'Process Started' pProcessID = '1234' pHostName = 'Test_Host' pTidalRunID = '20151210' rdict = { 'datasetName' : pDatasetName, 'runID' : pRunID, 'fileID' : pFileID, 'fileName' : pFileName, 'tDate' : pTDate, 'processID' : pProcessID, 'hostName' : pHostName, 'status' : pStatus, 'lcomment' : pComment, 'tidalRunID' : pTidalRunID } pat = "(%s)" % "|".join( map(re.escape, rdict.keys()) ) new_mySql = re.sub( pat, lambda m:rdict[m.group()], mySql) print "mySql = ", mySql, "new_mySql = ", new_mySql """ #>>> print re.sub( pat, lambda m:rdict[m.group()], target) #select RETURN_CODE, RETURN_MSG, P_MARKET_IND, P_DEFAULTS_FILENAME from table(pkg_rfcm_ddy.f_ddy_get_makt_info_dflt_fname('DLE_INFO', pNumber)) #>>> rdict = { 'datasetName' : pDatasetName, 'dnumber' : pNumber } #>>> pat = "(%s)" % "|".join( map(re.escape, rdict.keys()) ) #>>> print re.sub( pat, lambda m:rdict[m.group()], target) #Traceback (most recent call last): #File "<stdin>", line 1, in <module> #File "/var/opt/icetools/python/python2.7/lib/python2.7/re.py", line 155, in sub #return _compile(pattern, flags).sub(repl, string, count) #TypeError: sequence item 3: expected string, int found #>>> #myParams = {"datasetID": pDatasetID, "runID": pRunID, "tDate": pTDate, "status": pStatus} mySql = "select * from table(PKG_RFCM_DDY.f_ddy_internal_recon('tradeDate')) order by 4" tDate = '20160212' new_mySql = re.sub('tradeDate', tDate, mySql.rstrip()) print "mySql = ", mySql, "new_mySql = ", new_mySql returnStrs = myOracle.runSqlWthParamsGetMultipleRows(new_mySql) print "Return Strs = ", returnStrs print "returnStrs[0][0] = ", returnStrs[0][0] print "returnStrs[0][1] = ", returnStrs[0][1] print "returnStrs[0][2] = ", returnStrs[0][2] #for returnStr in returnStrs: #print "Return Value = ", returnStr[0], " Return Code = ", returnStr[1] #print "mySql = ", mySql, "myParams = ", myParams ########### Multiprocessing test code """ fileID=1 fileIDQueue = Queue() for x in range(5): processHandle = Process(target=myOracle.worker, args=( mySql, myParams, fileID, fileIDQueue)) processHandle.start() fileID += 1 processHandle.join() #Without sleep the queue is unreliable and do not return the expected values time.sleep(2) failureFlag=0 while not fileIDQueue.empty(): qFileID, qResult = fileIDQueue.get() print("qFileID = ", qFileID, "qResult = ", qResult) if qResult: failureFlag=1 print "FailureFlag = ", failureFlag """ ########## End except Exception as e: print "Failed on main", str(e) exit(1)
class Recon(): #class variables m_logger = "" #database objects m_oracle_db = "" m_netezza_db = "" def __init__(self, configFile, tradeDate, debugFlag): """ Purpose: Constructor :param self: class object itself :param configFile: Configuration file to use """ # Initialize m_logger object from class Logger and add Header to the log, using addGenericInfo function self.m_logger = Logger(logging.INFO, configFile, tradeDate) self.m_logger.addFileHandler(logging.DEBUG) self.m_logger.addGenericInfo(__file__) self.tradeDate = tradeDate self.debugFlag = debugFlag self.configFile = configFile try: # Get configuration to a dictionary self.m_configDict = configuration(self.configFile, True).m_dictionary #Initialize Oracle instance along with connection self.m_oracle_db = Oracle(self.m_configDict, self.m_logger) except Exception as exp: # An exception occurred self.m_logger.error("Unable to initialize the configuration " + str(exp)) print("ERROR: Unable to initialize the configuration for logger " + str(exp)) sys.exit(1) def readMktConfigFile(self, mktConfigFile): """ Purpose - To read the content of mktConfigFile into the global dictionary m_mktConfigDict for reference :param mktConfigFile: :return: """ try: self.m_mktConfigDict = configuration(mktConfigFile, True).m_dictionary except Exception as exp: # An exception occurred self.m_logger.error("Unable to initialize the configuration for logger " + str(exp)) print("ERROR: Unable to initialize the configuration for logger " + str(exp)) sys.exit(1) def processRecon(self, tidalRunID): """ Purpose - Function responsible for reading the datasets, get market name, call AWS ack files and other db calls :param tradeDate: Trade Date :param tidalRunID: Tidal Run ID :return: """ try: # Read the manifest filename and get the suffix i.e. datasetname # Assumption - Manifest file format - manifest.<datasetName>.<tradeDate>.<tidalRunID> # Program will break otherwise self.tidalRunID = tidalRunID # DB_CALL # Make database call sp_ddy_get_market_info(datasetname) and get market info mktName = '' # select * from TB_DDY_PROCESS_STATUS where CREATE_TIME > SYSDATE - INTERVAL '1' DAY # select * from TB_DDY_MANIFEST_TRANS where CREATE_TIME > SYSDATE - INTERVAL '1' DAY; # select * from TB_DDY_DATASET_MASTER where dataset_id = 49 tempSql = "select DM.DATASET_NAME , PS.FILE_ID, TO_CHAR(PS.TRADE_DATE,'YYYYMMDD'), PS.RUN_ID, PS.FILE_NAME from TB_DDY_PROCESS_STATUS PS" \ " INNER JOIN TB_DDY_MANIFEST_TRANS MT ON MT.RUN_ID= PS.RUN_ID" \ " INNER JOIN TB_DDY_DATASET_MASTER DM ON DM.DATASET_ID= PS.DATASET_ID" \ " WHERE PS.CREATE_TIME > SYSDATE - INTERVAL '1' DAY" \ " AND NOT EXISTS " \ " ( SELECT 1 FROM TB_DDY_PROCESS_STATUS PS1 WHERE PS1.RUN_ID = PS.RUN_ID and PS1.FILE_ID = PS.FILE_ID and PS.STATUS = 'R')" \ #" AND PS.STATUS = 'S'" \ #" AND rownum < 10000" print(tempSql) #myParamsDict = {} #tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) #mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql) returnList = self.m_oracle_db.runSqlWthParamsGetMultipleRows(tempSql) # Remove last element #print(returnList) #returnList.pop() returnDataDict= {d[0]: ','.join(d[2:]) if d[2] else 0 for d in returnList} returnDict = {d[0]+"_"+d[1]+"_"+d[2]: ','.join(d[0:]) if d[1:] else 0 for d in returnList} #print(returnDataDict) #print(returnDict) self._sqlerror_ = 0 returnMktList = [] for datasetName in returnDataDict: tempSql = self.m_configDict["SQL"]["get_mkt_defaults_filename"] myParamsDict = { 'datasetName' : datasetName } tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql) returnList = [] returnList = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) print(returnList[0]) if int(returnList[0]) == 0: if returnList[2] not in returnMktList: returnMktList.append(returnList[2]) #returnMktList.append(returnList[2]) #print(datasetName, returnList[2] ) elif int(returnList[0]) < 0: self.m_logger.error("Error in Get Market Deafults Filename for Dataset : {1}, SQL : {0}".format(mySql,datasetName)) self._sqlerror_ += 1 else: self.m_logger.error("Warning in Get Market Deafults Filename for Dataset : {1}, SQL : {0}".format(mySql,datasetName)) print(returnMktList) #returnMktList = ['NYSE'] for mktName in returnMktList : #Build the string for mktConfigFile based on mktName and configFile info mktConfigFile = os.path.dirname(self.configFile) + '/' + os.path.basename(self.configFile).split('.',1)[0].strip() + '_' + mktName.lower() + '.' + os.path.basename(self.configFile).split('.',1)[1].strip() print("mktConfigFile = ", mktConfigFile) #Validata Manifest file is a valid file if not os.path.isfile(mktConfigFile): #print "Inside invalid mktConfigFile" + self.mktConfigFile self.m_logger.error("Invalid market manifest file " + mktConfigFile) sys.exit(1) self.readMktConfigFile(mktConfigFile) self.s3object = S3(mktConfigFile, self.m_logger, self.debugFlag) self.s3object.getToken() #print(self.s3object.m_configFile["TOKEN"]) print(self.s3object.m_configFile["S3"]) bucket = self.s3object.m_configFile["S3"]["bucket"] path = self.s3object.m_configFile["S3"]["path"] ackpath= self.s3object.m_configFile["S3"]["ack_path"] print(bucket, path) print(ackpath) #ackPath = bucket + "/" + str(self.s3object.m_configFile["S3"]["path"]) + "/" + "acknowldge" #ackPath = str(bucket) + "/" + str(path) encryptKeyFlag = self.s3object.m_configFile["S3"]["encrypt_key"] #myBucket = self.s3object.m_connection.get_bucket(bucket, validate = False) #for testkey in myBucket.list(prefix='50006/'): # print("File = ", testkey.name) #b = self.s3object.m_connection.get_bucket(bucket) #rs = b.list() # get the result set from bucket #print(b.list()) rs = self.s3object.listBucketWPath(bucket,ackpath) print(rs) #ackList = s3object.getBucketList(ackPath) #print(ackList) except: self.m_logger.error("Error while creating S3 recon file Exception : {0}".format(sys.exc_info()[0])) # Not exitting at this point #sys.exit(1) sys.exit(0)
class Extractor(): #class variables m_logger = "" #database objects m_oracle_db = "" def __init__(self, configFile, mktName, tradeDate, debugFlag): """ Purpose: Constructor :param self: class object itself :param configFile: Configuration file to use """ # Initialize m_logger object from class Logger and add Header to the log, using addGenericInfo function self.m_logger = Logger(logging.INFO, configFile, tradeDate) self.m_logger.addFileHandler(logging.DEBUG) self.m_logger.addGenericInfo(__file__) self.tradeDate = tradeDate self.debugFlag = debugFlag self.configFile = configFile self.mktName = mktName try: # Get configuration to a dictionary self.m_configDict = configuration(self.configFile, True).m_dictionary #Initialize Oracle instance along with connection self.m_oracle_db = Oracle(self.m_configDict, self.m_logger) except Exception as exp: # An exception occurred self.m_logger.error("Unable to initialize the configuration " + str(exp)) print("ERROR: Unable to initialize the configuration for logger " + str(exp)) sys.exit(1) def readMktConfigFile(self, mktConfigFile): """ Purpose - To read the content of mktConfigFile into the global dictionary m_mktConfigDict for reference :param mktConfigFile: :return: """ try: self.m_mktConfigDict = configuration(mktConfigFile, True).m_dictionary except Exception as exp: # An exception occurred self.m_logger.error("Unable to initialize the configuration for logger " + str(exp)) print("ERROR: Unable to initialize the configuration for logger " + str(exp)) sys.exit(1) def chkActiveLoads(self): """ Purpose - To check the count of active Active loads happening at a given point :param None: None at this point :return: """ try: if self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_check_flag"] == 'Y': localActiveLoadMax = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max"] localActiveLoadWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_wait_time"] localActiveLoadMaxWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max_wait_time"] mySql = "" myParams = "" mySql = self.m_configDict["SQL"]["get_active_loads"] activeFlag=1 totalActiveWaitTime=0 while activeFlag: returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if self.debugFlag: print "mySql = ", mySql print "returnStr = ", returnStr print "chkActiveLoads - Active Loads value = ", int(returnStr[1].strip()) if returnStr[0] != '0': self.m_logger.error("Unable to get active loads using sql " + mySql + ". Error = " + returnStr[1]) sys.exit(1) #Check if actual active loads is <= configured active loads. If so, return out of the fn if int(returnStr[1].strip()) <= localActiveLoadMax: activeFlag=0 return 0 #Sleep for time defined by configured value for "active_load_wait_time" time.sleep(localActiveLoadWaitTime) totalActiveWaitTime += localActiveLoadWaitTime #Check if actual Total wait time is > configured total wait time. If so, throw an error and exit if totalActiveWaitTime > localActiveMaxWaitTime: self.m_logger.error("In Fn chkActiveLoads. Total Actual Wait Time exceeds the configured value active_load_max_wait_time. Either cleanup orphaned loads or increase the either active_load_max or active_load_max_wait_time. totalActiveWaitTime = " + str(totalActiveWaitTime) + " localActiveMaxWaitTime=" + str(localActiveMaxWaitTime)) return 1 else: return 0 #Return failure return 1 except Exception as exp: self.m_logger.error("Failure in chkActiveLoads process for file with the error " + str(exp)) sys.exit(1) def chkRaceStatus(self): """ Purpose - To check if a load is already running for the given dataset :param None: None at this point :return: """ try: if self.m_mktConfigDict["RACE"]["race_status_check_flag"] == 'Y': localRaceStatusWaitTime = int(self.m_mktConfigDict["RACE"]["race_status_wait_time"]) localRaceStatusMaxWaitTime = int(self.m_mktConfigDict["RACE"]["race_status_max_wait_time"]) mySql = "" #myParams = {"datasetName":self.datasetName} tempSql = self.m_configDict["SQL"]["get_race_status"] myParamsDict = { 'datasetName' : self.datasetName } tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql) raceFlag=1 totalRaceStatusWaitTime=0 while raceFlag: returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if self.debugFlag: print "tempSql = ", tempSql print "myParamsDict = ", myParamsDict print "mySql = ", mySql print "returnStr = ", returnStr print "chkRaceStatus - ReturnCode = ", int(returnStr[1].strip()) if returnStr[0] != '0': self.m_logger.error("Unable to get race status using sql " + mySql + ". Error = " + returnStr[1]) sys.exit(1) #Check if the load for this dataset is already running. If not, exit out of the function with normal return value if int(returnStr[1].strip()) <= 1: raceFlag=0 return 0 #Check if actual Total wait time is > configured total wait time. If so, throw an error and exit #Sleep for time defined by configured value for "active_load_wait_time" time.sleep(localRaceStatusWaitTime) #time.sleep(90) totalRaceStatusWaitTime += localRaceStatusWaitTime if self.debugFlag: print "totalRaceStatusWaitTime = ", totalRaceStatusWaitTime, "localRaceStatusWaitTime =", localRaceStatusWaitTime if totalRaceStatusWaitTime > localRaceStatusMaxWaitTime: self.m_logger.error("In Fn chkRaceStatusLoads. Total Actual Wait Time exceeds the configured value race_status_max_wait_time. Either check if the Dataset is getting loaded or increase the either active_load_max or active_load_max_wait_time. totalRaceStatusWaitTime = " + str(totalRaceStatusWaitTime) + " localRaceStatusMaxWaitTime=" + str(localRaceStatusMaxWaitTime)) return 1 else: return 0 #Return failure return 1 except Exception as exp: self.m_logger.error("Failure in chkRaceStatus process for file with the error " + str(exp)) sys.exit(1) def extractData(self,localDataRecordList, localFileID, localFileIDQueue, localDBFlag): """ Purpose - To load the given datafile to the S3 bucket specified in the global mktConfigFile :param localDataRecordList: Datafile related info fetched from FINRA's manifest file including filename, filesize, recordcount :param localFileID: Internal File ID assigned to the local datafile :param localFileIDQueue: Queue in which, results of the operation is stored :param localDBFlag: Flag indicating if database should be used or not :return: """ try: if self.debugFlag: print "Inside extractData function" if localDBFlag: """ Not sure if we need Race Status check for Extract raceStatusReturnValue=self.chkRaceStatus() if self.debugFlag: print "raceStatusReturnValue=", raceStatusReturnValue if raceStatusReturnValue: self.m_logger.error("Failure value returned by chkRaceStatus fn. Return value = " + str(raceStatusReturnValue)) localFileIDQueue.put((localFileID, raceStatusReturnValue)) return 1 """ """ Need to integrate Active loads with tb_dxt_process_status and tb_dxt_process_status ? activeLoadsReturnValue=self.chkActiveLoads() if activeLoadsReturnValue: self.m_logger.error("Failure value returned by chkRaceStatus fn. Return value = " + str(activeLoadsReturnValue)) localFileIDQueue.put((localFileID, raceStatusReturnValue)) return 1 """ processID = os.getpid() hostName = socket.gethostname() # Need to check the order localDataFile = localDataRecordList[1] dataFileSize = int(localDataRecordList[2]) dataFileRecordCount = int(localDataRecordList[3]) #Insert Process status into Oracle db #DB_CALL - sp_dxt_insert_process_status(RUNID, FILE_ID, etc) mySql = "" myParams = "" tempSql = self.m_configDict["SQL"]["put_process_status"] pStatus = 'P' pComment = 'Load started' #myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileID":str(localFileID), "fileName":localDataFile, "tDate":str(self.tradeDate), "processID":str(processID), "hostName":hostName, "fileSize": dataFileSize, "recordCount" : dataFileRecordCount, "status":pStatus , "lcomment":pComment} myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileName":localDataFile, "tDate":str(self.tradeDate), "processID":str(processID), "hostName":hostName, "fileSize": str(dataFileSize), "recordCount" : str(dataFileRecordCount), "status":pStatus , "lcomment":pComment} tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if self.debugFlag: print "tempSql = ", tempSql print "myParamsDict = ", myParamsDict print "mySql = ", mySql print "returnStr = ", returnStr if returnStr[0] != '0': self.m_logger.error("Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1]) sys.exit(1) # Get the dataFileName file to be extracted from AWS dataFileName = localDataRecordList[1] #Here localFileWthPath is the local stage dir with file name localFileWthPath = self.m_configDict["ENV"]["stage_dir"] + "/" + dataFileName #Here targetFileWthPath is the AWS dir with file name targetFileWthPath = os.path.join(self.s3object.m_configFile["S3"]["path"], os.path.basename(dataFileName)) targetBucket = self.s3object.m_configFile["S3"]["bucket"] encryptKeyFlag = self.s3object.m_configFile["S3"]["encrypt_key"] localAWSRetries = int(self.m_mktConfigDict["ENV"]["aws_retries"]) if self.debugFlag: print("localFileWthPath =", localFileWthPath) print("targetFileWthPath =", targetFileWthPath) print("targetBucket =", targetBucket) print("encryptKeyFlag =", encryptKeyFlag) print("localAWSRetries =", localAWSRetries) initCount = 0 while (initCount < localAWSRetries): extractReturnValue = 0 #Call s3.data download to extract the manifest file (single part load) #extractReturnValue = self.s3object.getDataSinglePart(localFileWthPath, targetFileWthPath, targetBucket, encryptKeyFlag ) extractReturnValue = self.s3object.getDataSinglePart(localFileWthPath, targetFileWthPath, targetBucket) if self.debugFlag: print "extractReturnValue = ", extractReturnValue if int(extractReturnValue) == 0: pStatus = 'S' pComment = 'Load completed' break else: pStatus = 'F' pComment = 'Load failed' initCount += 1 # Get the size of the file downloaded localFileSize = os.stat(localFileWthPath).st_size # Check if the downloaded file size is matching with what is mentioned in manifest file. If not mark it as failed if localFileSize != dataFileSize: pStatus = 'F' pComment = 'Actual file size != Manifest file size' localRecordCount = 0 if localDBFlag: #Call DB to insert 'S' or 'F' in tb_dxt_process_status #localFileIDQueue.put((localFileID, extractReturnValue)) mySql = "" myParams = "" tempSql = self.m_configDict["SQL"]["put_process_status"] #myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileID":str(localFileID), "fileName":localDataFile, "tDate":str(self.tradeDate), "processID":str(processID), "hostName":hostName, "fileSize": localFileSize, "recordCount" : localRecordCount, "status":pStatus , "lcomment":pComment} myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileName":localDataFile, "tDate":str(self.tradeDate), "processID":str(processID), "hostName":hostName, "fileSize": str(localFileSize), "recordCount" : str(localRecordCount), "status":pStatus , "lcomment":pComment} tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if self.debugFlag: print "tempSql = ", tempSql print "myParamsDict = ", myParamsDict print "mySql = ", mySql print "returnStr = ", returnStr if returnStr[0] != '0': self.m_logger.error("Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1]) sys.exit(1) localFileIDQueue.put((localFileID,extractReturnValue)) else: return extractReturnValue except Exception as exp: self.m_logger.error("Failure in extractData process for file with the error " + str(exp)) if localDBFlag: localFileIDQueue.put((localFileID, 1)) else: return 1 def getRecords(self, fileDict, startDateTime, endDateTime): """ Purpose - Function to sort the dictionary based on the key and return a sorted list :param fileDict : Dictionary containing Last_modified Date and file name :param startDateTime : Start DateTime in the format '2016-02-01 00:00:00' :param endDateTime : End DateTime in the format '2016-02-10 00:00:00' """ try: patternToSearch = self.m_configDict["ENV"]["pattern_to_search"] if self.debugFlag: print "fileDict = ", fileDict print "patternToSearch = ", patternToSearch print "startDateTime = ", startDateTime print "endDateTime = ", endDateTime sorted_keys = sorted(fileDict.iterkeys()) start = bisect.bisect_left(sorted_keys, startDateTime) end = bisect.bisect_right(sorted_keys, endDateTime) if self.debugFlag: print "start = ", start print "end = ", end for fileItem in sorted(fileDict.iteritems())[start:end]: print "For fileItem = ", fileItem if patternToSearch in fileItem[1]: yield fileItem[1] except Exception as exp: self.m_logger.error("Failed while executing getRecords to sort the dictionary content of dictionary with Error = " + str(exp)) sys.exit(1) def getManifestFileList(self, startDateTime, endDateTime): """ Purpose - Function to sort the dictionary based on the key and return a sorted list :param startDateTime : Start DateTime in the format '2016-02-01 00:00:00' :param endDateTime : End DateTime in the format '2016-02-10 00:00:00' """ try: if self.debugFlag: print "S3 Bucket = ", self.m_configDict["S3"]["bucket"] print "S3 Path = ", self.m_configDict["S3"]["path"] print "startDateTime = ", startDateTime print "endDateTime = ", endDateTime fileListDict = self.s3object.listBucketWPathByLastModified(self.m_configDict["S3"]["bucket"], self.m_configDict["S3"]["path"]) #endDateTime = datetime.now().strftime("%Y-%m-%d %H:%M:%S") manifestFileList = list(self.getRecords(fileListDict, startDateTime, endDateTime)) if self.debugFlag: print "fileListDict = ", fileListDict print "manifestFileList = ", manifestFileList return manifestFileList except Exception as exp: self.m_logger.error("Failed while creating AWS manifest file list with Error = " + str(exp)) return 1 def processExtractor(self): """ Purpose - Function responsible for getting the AWS token and reading the last modified date in DB and fetch the list of files from AWS to be processed :param : None :return: """ try: # DB_CALL # Make database call sp_dxt_validate_mktName(mktName) to validate mktName tempSql = self.m_configDict["SQL"]["validate_market_name"] myParamsDict = { 'mktName' : self.mktName } tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if self.debugFlag: print "tempSql = ", tempSql print "myParamsDict = ", myParamsDict print "mySql = ", mySql print "returnStr = ", returnStr if returnStr[0] != '0': self.m_logger.error("Invalid market name provided " + mySql + ". Error = " + self.mktName) sys.exit(1) if self.debugFlag: print "MktName from DB = ", self.mktName #Build the string for mktConfigFile based on mktName and configFile info self.mktConfigFile = os.path.dirname(self.configFile) + '/' + os.path.basename(self.configFile).split('.',1)[0].strip() + '_' + self.mktName.lower() + '.' + os.path.basename(self.configFile).split('.',1)[1].strip() if self.debugFlag: print("mktConfigFile = ", self.mktConfigFile) #Validate Market Config file is a valid file if not os.path.isfile(self.mktConfigFile): self.m_logger.error("Invalid market manifest file " + self.mktConfigFile) sys.exit(1) # Read Market specific config file and store it in a specific dictionary self.readMktConfigFile(self.mktConfigFile) if self.debugFlag: print("m_mktConfigDict=",self.m_mktConfigDict) # Read the table for the given market and fetch the last modified timestamp for the given manifest file #tempSql = self.m_configDict["SQL"]["get_last_modified"] #myParamsDict = { 'mktName' : self.mktName } #tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) #mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql) #returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) #if self.debugFlag: #print "tempSql = ", tempSql #print "myParamsDict = ", myParamsDict #print "mySql = ", mySql #print "returnStr = ", returnStr #if returnStr[0] == '0': #if returnStr[1]: #lastModifiedDate=returnStr[1] #else: #lastModifiedDate="2015-01-01 00:00:00" #else: #self.m_logger.error("Unable to get last_modified date using the sql " + mySql + ". Error = " + self.mktName) #sys.exit(1) #Temp call. Need to enable the previous lines to use DB call lastModifiedDate="2015-01-01 00:00:00" # Get RunID self.runID = generate_runId() if self.debugFlag: print("RunID = ", self.runID) # Initialize S3 object and get FINRA cloud service token and establish s3 session self.s3object = S3(self.mktConfigFile, self.m_logger, self.debugFlag) self.s3object.getToken() # Get list of Manifest files to be processed #currentDate = datetime.now().strftime("%Y-%m-%d %H:%M:%S") #currentDate = datetime.now().strftime("%Y-%m-%d %H:%M:%S") currentDate = (datetime.now() + timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S") finraManifestFileList = self.getManifestFileList(lastModifiedDate, currentDate) if self.debugFlag: print("finraManifestFileList = ", finraManifestFileList) # Download manifest files in the manifest file list to a specific folder from AWS localFileDir = self.s3object.m_configFile["ENV"]["stage_dir"] targetFolder = self.s3object.m_configFile["S3"]["path"] #targetFileWthPath = os.path.join(self.s3object.m_configFile["S3"]["path"], os.path.basename(localFileWthPath)) targetBucket = self.s3object.m_configFile["S3"]["bucket"] encryptKeyFlag = self.s3object.m_configFile["S3"]["encrypt_key"] #targetFileWthPath="50006/slmm_mnem.007.txt.gz" #localFileWthPath="/tmp/slmm_mnem.007.txt.gz" # Get an instance of the Manifest class localManifest = Manifest() fileIDQueue = Queue() localAWSRetries = int(self.m_mktConfigDict["ENV"]["aws_retries"]) for finraManifestFile in finraManifestFileList: targetFileWthPath = targetFolder + finraManifestFile localFileWthPath = localFileDir + "/" + finraManifestFile if self.debugFlag: print "targetFileWthPath = ", targetFileWthPath print "localFileWthPath = ", localFileWthPath print "finraManifestFile = ", finraManifestFile initCount = 0 while (initCount < localAWSRetries): extractReturnValue = 0 #Call s3.data download to extract the manifest file (single part load) #extractReturnValue = self.s3object.getDataSinglePart(localFileWthPath, targetFileWthPath, targetBucket, encryptKeyFlag ) extractReturnValue = self.s3object.getDataSinglePart(localFileWthPath, targetFileWthPath, targetBucket) if self.debugFlag: print "extractReturnValue = ", extractReturnValue if extractReturnValue: # Try it again initCount += 1 else: # Come out of the loop break # End of while loop for AWS Retries if extractReturnValue: self.m_logger.error("Unable to fetch manifestFile = " + finraManifestFile + "from the path = " + targetFileWthPath + " to the local filesystem = " + localFileWthPath ) sys.exit(1) """ Not needed if extractReturnValue == 0: pStatus = 'P' pComment = 'Load completed' break else: pStatus = 'F' pComment = 'Load failed' """ initCount += 1 # get datasetname from the manifest file. Need check based on FINRA naming self.datasetName = os.path.basename(finraManifestFile).split('.',3)[1].strip().upper() if self.debugFlag: print "datasetName = ", self.datasetName # Need to check DB call, once it is ready # Validate the manifest file name to make sure that we are expecting it tempSql = self.m_configDict["SQL"]["validate_dataset_name"] myParamsDict = { 'datasetName' : self.datasetName } tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if self.debugFlag: print "tempSql = ", tempSql print "myParamsDict = ", myParamsDict print "mySql = ", mySql print "returnStr = ", returnStr #Check if dataset is there in the tb_dxt_dataset_master, if not, skip it and move to the next file. For other errors, exit out of the program if int(returnStr[0]) < 0: self.m_logger.error("Unable to validate datasetName " + mySql + ". Error = " + self.datasetName) sys.exit(1) elif int(returnStr[0]) > 0: self.m_logger.info("Give Dataset is not in the list to process. Skipping it" + mySql + ". Dataset Name = " + self.datasetName) # Continue to the next file entry in the manifest list continue # Insert a record into tb_dxt_dataset_trans with status 'P' for the given datasetName, saying that we start the process for this manifest file pStatus = 'P' tempSql = self.m_configDict["SQL"]["put_dataset"] myParamsDict = {'datasetName':self.datasetName, 'runID': str(self.runID), 'tDate':str(self.tradeDate), 'status': pStatus } tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if self.debugFlag: print "tempSql = ", tempSql print "myParamsDict = ", myParamsDict print "mySql = ", mySql print "returnStr = ", returnStr if returnStr[0] != '0': self.m_logger.error("Unable to insert into tb_dxt_dataset_trans table " + mySql + ". Error = " + self.datasetName) sys.exit(1) # Read the contents of manifestfile i.e. dataFileNames into a list - Will validate the datafiles as well manifestDelim = self.m_configDict["ENV"]["manifest_delim"] if self.debugFlag: print "localFileWthPath = ", localFileWthPath manifestFileList = localManifest.readManifest(localFileWthPath, self.m_logger, manifestDelim, self.debugFlag) if self.debugFlag: print "manifestDelim = ", manifestDelim print "manifestFileList = ", manifestFileList process_count = int(self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max"]) # Now go into multiprocessing and call extractData function and extract files ones by one fileID=1 dbFlag=1 fileIDQueue = Queue() procs = [] doneCounter = 0 sendCounter = 0 failureFlag = 0 while doneCounter < len(manifestFileList): while sendCounter < len(manifestFileList) and sendCounter - doneCounter < process_count: if self.debugFlag: print "manifestFileList[sendCounter][1]", manifestFileList[sendCounter][1], "fileID = ", fileID # Call fn extractData to fetch files from AWS. Pass manifestFileList[sendCounter] as it contains the whole record including the filename, filesize & row count processHandle = Process(target=Extractor.extractData, args=(self, manifestFileList[sendCounter],fileID, fileIDQueue, dbFlag)) processFlag=1 processHandle.start() procs.append(processHandle) sendCounter += 1 fileID += 1 if processFlag: for p in procs: p.join() procs=[] processFlag=0 while not fileIDQueue.empty(): # process completed results as they arrive #time.sleep(3) qFileID, qResult = fileIDQueue.get() if self.debugFlag: print("qFileID = ", qFileID, "qResult = ", qResult) doneCounter += 1 if qResult: failureFlag = 1 if self.debugFlag: print "ProcessFlag = ", processFlag, "sendCounter = ", sendCounter, "doneCounter = ", doneCounter if failureFlag: break if self.debugFlag: print "Failure Flag = ", failureFlag if failureFlag: pStatus = 'F' else: pStatus = 'S' # Move all the data files to inbox from the stg location. No need for this step, as Joejo mentioned there will be another Tidal job doing this step # Move the manifest file to inbox from the stg location # insert a record into tb_dxt_dataset_trans table with 'S' or 'F' record #Call Oracle fn to insert status 'S' into TB_DDY_DATASET_TRANS with RUNID etc #DB_CALL # Make database call sp_dxt_insert_dataset_trans and insert data based on Failure or Success mySql = "" myParams = "" tempSql = self.m_configDict["SQL"]["put_dataset"] myParamsDict = {"datasetName":self.datasetName, "runID": str(self.runID), "tDate":str(self.tradeDate), "status": pStatus } tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if self.debugFlag: print "tempSql = ", tempSql print "myParamsDict = ", myParamsDict print "mySql = ", mySql print "returnStr = ", returnStr if returnStr[0] != '0': self.m_logger.error("Unable to put dataset info into the database using sql " + mySql + ". Error = " + returnStr[1]) sys.exit(1) if failureFlag: self.m_logger.error("Extract failed for data files for manifest file " + self.manifestFile) sys.exit(1) # End of for loop for finraManifestFiles except Exception as e: self.m_logger.error("ProcessExtractor failed with error " + str(e)) sys.exit(1)
class Loader(): #class variables m_logger = "" #database objects m_oracle_db = "" m_netezza_db = "" def __init__(self, configFile, tradeDate): """ Purpose: Constructor :param self: class object itself :param configFile: Configuration file to use """ # Initialize m_logger object from class Logger and add Header to the log, using addGenericInfo function self.m_logger = Logger(logging.INFO, configFile, tradeDate) self.m_logger.addFileHandler(logging.DEBUG) self.m_logger.addGenericInfo(__file__) self.tradeDate = tradeDate #log.addFileHandler(logging.INFO) try: # Get configuration to a dictionary self.m_configDict = configuration(configFile, True).m_dictionary #Initialize Oracle instance along with connection self.m_oracle_db = Oracle(self.m_configDict, self.m_logger) except Exception as exp: # An exception occurred self.m_logger.error("Unable to initialize the configuration " + str(exp)) print("ERROR: Unable to initialize the configuration for logger " + str(exp)) sys.exit(1) def readMktConfigFile(self, mktConfigFile): """ Purpose - To read the content of mktConfigFile into the global dictionary m_mktConfigDict for reference :param mktConfigFile: :return: """ try: self.m_mktConfigDict = configuration(mktConfigFile, True).m_dictionary #print("mktConfigFile = ", mktConfigFile, "m_mktConfigDict = ", self.m_mktConfigDict) #return m_mktConfigDict except Exception as exp: # An exception occurred self.m_logger.error("Unable to initialize the configuration for logger " + str(exp)) print("ERROR: Unable to initialize the configuration for logger " + str(exp)) sys.exit(1) def chkActiveLoads(self): """ Purpose - To check the count of active Active loads happening at a given point :param None: None at this point :return: """ try: if self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_check_flag"] == 'Y': localActiveLoadMax = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max"] localActiveLoadWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_wait_time"] localActiveLoadMaxWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max_wait_time"] mySql = "" myParams = "" mySql = self.m_configDict["SQL"]["get_active_loads"] activeFlag=1 totalActiveWaitTime=0 while activeFlag: #returnVal=0 #returnCode=0 #returnVal, returnCode = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams) if returnStr[0] != 0: self.m_logger.error("Unable to get active loads using sql " + mySql + ". Error = " + returnStr[1]) sys.exit(1) #Check if actual active loads is <= configured active loads. If so, return out of the fn #print "chkActiveLoads - ReturnCode = ", returnCode if returnStr[1] <= localActiveLoadMax: activeFlag=0 return 0 #Sleep for time defined by configured value for "active_load_wait_time" time.sleep(localActiveLoadWaitTime) totalActiveWaitTime += localActiveLoadWaitTime #Check if actual Total wait time is > configured total wait time. If so, throw an error and exit if totalActiveWaitTime > localActiveMaxWaitTime: self.m_logger.error("In Fn chkActiveLoads. Total Actual Wait Time exceeds the configured value active_load_max_wait_time. Either cleanup orphaned loads or increase the either active_load_max or active_load_max_wait_time. totalActiveWaitTime = " + str(totalActiveWaitTime) + " localActiveMaxWaitTime=" + str(localActiveMaxWaitTime)) return 1 else: return 0 #Return failure return 1 except Exception as exp: #self.m_logger.error("Failure in chkActiveLoads process for file " + sourceFileWthPath + " with fileID = " + localFileID + " with the error " + str(exp)) self.m_logger.error("Failure in chkActiveLoads process for file with the error " + str(exp)) sys.exit(1) def chkRaceStatus(self): """ Purpose - To check if a load is already running for the given dataset :param None: None at this point :return: """ try: if self.m_mktConfigDict["RACE"]["race_status_check_flag"] == 'Y': localRaceStatusWaitTime = int(self.m_mktConfigDict["RACE"]["race_status_wait_time"]) localRaceStatusMaxWaitTime = int(self.m_mktConfigDict["RACE"]["race_status_max_wait_time"]) mySql = "" myParams = {"datasetName":self.datasetName} mySql = self.m_configDict["SQL"]["get_race_status"] raceFlag=1 totalRaceStatusWaitTime=0 while raceFlag: #returnVal=0 #returnCode=0 #returnVal, returnCode = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams) if returnStr[0] != 0: self.m_logger.error("Unable to get race status using sql " + mySql + ". Error = " + returnStr[1]) sys.exit(1) #Check if the load for this dataset is already running. If not, exit out of the function with normal return value #print "0.chkActiveLoads - returnVal", returnVal, " ReturnCode = ", int(returnStr[1].strip()) #if int(returnStr[1]) <= 1: if int(returnStr[1].strip()) <= 1: raceFlag=0 return 0 #else: #print "E.chkActiveLoads - returnVal", returnVal, " ReturnCode = ", int(returnStr[1].strip()) #Check if actual Total wait time is > configured total wait time. If so, throw an error and exit #print "1.chkActiveLoads - returnVal", returnVal, " ReturnCode = ", int(returnStr[1].strip()), "localRaceStatusWaitTime =", localRaceStatusWaitTime #Sleep for time defined by configured value for "active_load_wait_time" time.sleep(localRaceStatusWaitTime) #time.sleep(90) totalRaceStatusWaitTime += localRaceStatusWaitTime #print "totalRaceStatusWaitTime = ", totalRaceStatusWaitTime, "localRaceStatusWaitTime =", localRaceStatusWaitTime if totalRaceStatusWaitTime > localRaceStatusMaxWaitTime: self.m_logger.error("In Fn chkRaceStatusLoads. Total Actual Wait Time exceeds the configured value race_status_max_wait_time. Either check if the Dataset is getting loaded or increase the either active_load_max or active_load_max_wait_time. totalRaceStatusWaitTime = " + str(totalRaceStatusWaitTime) + " localRaceStatusMaxWaitTime=" + str(localRaceStatusMaxWaitTime)) return 1 else: return 0 #Return failure return 1 except Exception as exp: #self.m_logger.error("Failure in chkRaceStatus process for file " + sourceFileWthPath + " with fileID = " + localFileID + " with the error " + str(exp)) self.m_logger.error("Failure in chkRaceStatus process for file with the error " + str(exp)) sys.exit(1) def loadData(self,localDataFile, localFileID, localFileIDQueue, localDBFlag): """ Purpose - To load the given datafile to the S3 bucket specified in the global mktConfigFile :param localDataFile: Source datafile to be uploaded to S3 :param localFileID: Internal File ID assigned to the source datafile :param localFileIDQueue: Queue in which, results of the operation is stored :return: """ try: if localDBFlag: raceStatusReturnValue=self.chkRaceStatus() #print "0.raceStatusReturnValue=", raceStatusReturnValue if raceStatusReturnValue: self.m_logger.error("Failure value returned by chkRaceStatus fn. Return value = " + str(raceStatusReturnValue)) localFileIDQueue.put((localFileID, raceStatusReturnValue)) return 1 activeLoadsReturnValue=self.chkActiveLoads() if activeLoadsReturnValue: self.m_logger.error("Failure value returned by chkRaceStatus fn. Return value = " + str(activeLoadsReturnValue)) localFileIDQueue.put((localFileID, raceStatusReturnValue)) return 1 processID = os.getpid() hostName = socket.gethostname() #Insert Process status into Oracle db #DB_CALL - sp_ddy_insert_process_status(RUNID, FILE_ID, etc) mySql = "" myParams = "" mySql = self.m_configDict["SQL"]["put_process_status"] pStatus = 'P' pComment = 'Load started' myParams = {"datasetName":self.datasetName, "runID":self.runID, "fileID":localFileID, "fileName":localDataFile, "tDate":self.tradeDate, "processID":processID, "hostName":hostName, "status":pStatus , "lcomment":pComment, "tidalRunID":self.tidalRunID} #myParams = {"datasetName":self.datasetName, "runID":self.runID, "fileID":localFileID, "fileName":localDataFile, "tDate":self.tradeDate, "processID":processID, "hostName":hostName, "status":pStatus , "lcomment":pComment} #print "Insert process Status" #print "mySql = ", mySql, "myParams = ", myParams #returnVal, returnCode = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams) if returnStr[0] != 0: self.m_logger.error("Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1]) sys.exit(1) #Call s3.gettoken to get the token and establish connection sourceFileWthPath = localDataFile #print("sourceFileWthPath =", sourceFileWthPath) #Commented the following lines to move getToken outside parallel thread ##print("mktConfigFile=", mktConfigFile) #s3object = S3(self.mktConfigFile, self.m_logger) #s3object.getToken() ##sourceFileWthPath = s3object.m_configfile["S3"]["source_file"] targetFileWthPath = os.path.join(self.s3object.m_configFile["S3"]["path"], os.path.basename(sourceFileWthPath)) #print("targetFileWthPath =", targetFileWthPath) targetBucket = self.s3object.m_configFile["S3"]["bucket"] #print("targetBucket =", targetBucket) encryptKeyFlag = self.s3object.m_configFile["S3"]["encrypt_key"] #print("encryptKeyFlag =", encryptKeyFlag) loadReturnValue = 0 #Call s3.dataUpload to load the data (single part load) loadReturnValue = self.s3object.loadDataSinglePart(sourceFileWthPath, targetFileWthPath, targetBucket, encryptKeyFlag ) if loadReturnValue == 0: pStatus = 'S' pComment = 'Load completed' else: pStatus = 'F' pComment = 'Load failed' if localDBFlag: #Call DB to insert 'S' or 'F' in tb_ddy_process_status #localFileIDQueue.put((localFileID, loadReturnValue)) mySql = "" myParams = "" mySql = self.m_configDict["SQL"]["put_process_status"] myParams = {"datasetName":self.datasetName, "runID":self.runID, "fileID":localFileID, "fileName":localDataFile, "tDate":self.tradeDate, "processID":processID, "hostName":hostName, "status":pStatus , "lcomment":pComment, "tidalRunID":self.tidalRunID} #myParams = {"datasetName":self.datasetName, "runID":self.runID, "fileID":localFileID, "fileName":localDataFile, "tDate":self.tradeDate, "processID":processID, "hostName":hostName, "status":pStatus , "lcomment":pComment} #returnVal, returnCode = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams) if returnStr[0] != 0: self.m_logger.error("Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1]) sys.exit(1) localFileIDQueue.put((localFileID,loadReturnValue)) else: return loadReturnValue #if localFileID == 1: #localFileIDQueue.put((localFileID, 1)) ##print("Queue In = ", localFileIDQueue.get()) ##print(localFileID) #else: #localFileIDQueue.put((localFileID, 0)) ##print("Queue In = ", localFileIDQueue.get()) ##print(localFileID) except Exception as exp: #self.m_logger.error("Failure in loadData process for file " + sourceFileWthPath + " with fileID = " + localFileID + " with the error " + str(exp)) self.m_logger.error("Failure in loadData process for file with the error " + str(exp)) if localDBFlag: localFileIDQueue.put(localFileID, 1) else: return 1 #sys.exit(1) def createFinraManifestFile(self, manifestFile): try: # Read Manifest file to get info like total rows, total size & other details to populate the done file for FINRA with open(manifestFile,"r") as fh: self.totalRows = 0 self.totalSize = 0 self.fileCount = 0 self.fileDict = {} for data in fh: data.rstrip("\n") mylist = [] self.fileCount +=1 mylist = data.split("|") self.fileDict[self.fileCount] = [mylist[0],os.path.basename(mylist[1]),int(mylist[2]),int(mylist[3])] self.totalRows += int(mylist[3]) self.totalSize += int(mylist[2]) except Exception as exp: self.m_logger.error("Failed while processing readManifest with Error = " + str(exp)) return 1 try: # Read Default file to get default file structure info #defaultsDir = self.m_mktConfigDict["DATASET"]["defaults_dir"] #Use self.defautltsFile which is populated from the db later. No need to get it from config file self.defaultsFileWthPath = self.m_mktConfigDict["DATASET"]["defaults_dir"] + "/" + self.defaultsFile #self.defaultsFileWthPath = self.m_mktConfigDict["DATASET"]["defaults_dir"] + "/" + self.m_mktConfigDict["DATASET"]["defaults_file"] with open(self.defaultsFileWthPath,"r") as fh: self.defaultsDict = {} self.defaultsCount = 0 for data in fh: data.rstrip('\n') self.defaultsCount +=1 self.defaultsDict[self.defaultsCount]=data except Exception as exp: self.m_logger.error("Failed while processing defaults file " + self.defaultsFileWthPath + " with Error = " + str(exp)) return 1 try: self.finraManifestFile = self.m_mktConfigDict["ENV"]["donefile_dir"] + "/" + os.path.basename(manifestFile) + ".done" with open(self.finraManifestFile,"w") as finraMnFH: finraMnFH.write("# AWS RunID : {}\n".format(str(self.runID))) #finraMnFH.write("# AWS RunID : {}\n".format(str(self.tidalRunID))) finraMnFH.write("# Dataset : {0} , TradeDate : {1}\n".format(str(self.datasetName),str(self.tradeDate))) finraMnFH.write("total_compressed={}\n".format(self.totalSize)) finraMnFH.write("total_rows={}\n".format(self.totalRows)) #finraMnFH.write("no of files={}\n".format(len(self.fileDict))) finraMnFH.write("no of files={}\n".format(self.fileCount)) for key,val in self.fileDict.items(): finraMnFH.write("file_{0}={1}\n".format(str(key),val[1])) finraMnFH.write("file_{0}_rows={1}\n".format(str(key),val[3])) finraMnFH.write("# Data Attributes\n") for key,val in self.defaultsDict.items(): finraMnFH.write("{0}".format(str(val))) return 0 except Exception as exp: self.m_logger.error("Failed while creating AWS Done file " + self.finraManifestFile + " with Error = " + str(exp)) return 1 def processLoader(self, configFile, manifestFile, datasetName, tidalRunID): """ Purpose - Function responsible for reading the manifest file, get market name, call multiprocess load and other db calls :param configFile: Configuration File :param manifestFile: Manifest File :param tradeDate: Trade Date :return: """ try: # Read the manifest filename and get the suffix i.e. datasetname # Assumption - Manifest file format - manifest.<datasetName>.<tradeDate>.<tidalRunID> # Program will break otherwise self.datasetName = datasetName self.tidalRunID = tidalRunID # DB_CALL # Make database call sp_ddy_get_market_info(datasetname) and get market info mktName = '' #print "self.m_configDict = ", self.m_configDict #print "self.m_configDict[mkt] = ", self.m_configDict["SQL"]["get_mkt"] #print "datasetName = ", self.datasetName #print "ManifestFile = ", manifestFile self.manifestFile = self.m_configDict["ENV"]["manifestfile_dir"] + "/" + manifestFile #print "ManifestFile with Path = ", self.manifestFile ##Validata Manifest file if not os.path.isfile(self.manifestFile): self.m_logger.error("Invalid manifest file " + self.manifestFile) sys.exit(1) # Enable this one the proc to get mkt name and default file are ready and test it mySql = self.m_configDict["SQL"]["get_mkt_defaults_filename"] myParams = {"datasetName":self.datasetName} #returnVal, returnCode = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams) #print "returnCode = ", returnStr if returnStr[0] == 0: #print "returnStr[2].strip()", returnStr[2].strip() if returnStr[2].strip() != None: mktName = returnStr[2].strip() else: self.m_logger.error("Invalid Market Name " + returnStr[2].strip() ) sys.exit(1) if returnStr[3].strip() != None: self.defaultsFile = returnStr[3].strip() else: self.m_logger.error("Invalid Defaults File " + returnStr[3].strip() ) sys.exit(1) else: self.m_logger.error("Unable to get market info from the database using sql " + mySql + ". Error = " + returnStr[1]) sys.exit(1) #print "MktName from DB = ", mktName #mySql = self.m_configDict["SQL"]["get_mkt"] ##print "mySql = ", mySql #myParams = {"datasetName":self.datasetName} ##print "myParams = ", myParams #returnVal, returnCode = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams) ##print "returnVal = ", returnVal ##print "returnCode = ", returnCode #if returnVal == 0: #mktName = returnCode #else: #self.m_logger.error("Unable to get market info from the database using sql " + mySql + ". Error = " + returnCode) #sys.exit(1) #print "MktName from DB = ", mktName # Temporarily use file lookup call ######## ### Start of temp call #lookupFile = "/Users/rnarayan/apps/ddy/ICE/conf/dataset_lookup.txt" #mktName = '' #with open(lookupFile, "r") as myFile: #for line in myFile: #print(datasetName, line) #if datasetName in line: #mktNameArray = line.partition('=') #mktName = mktNameArray[2].strip() #if not mktName: #self.m_logger.error("Unable to find market manifest for dataset " ) #sys.exit("ERROR: Unable to find market manifest for dataset " + mktConfigFile) #sys.exit(1) #print("Final MktName = ", mktName) ### End of temp call ######## #Build the string for mktConfigFile based on mktName and configFile info self.mktConfigFile = os.path.dirname(configFile) + '/' + os.path.basename(configFile).split('.',1)[0].strip() + '_' + mktName.lower() + '.' + os.path.basename(configFile).split('.',1)[1].strip() #print("mktConfigFile = ", self.mktConfigFile) #Validata Manifest file is a valid file if not os.path.isfile(self.mktConfigFile): #print "Inside invalid mktConfigFile" + self.mktConfigFile self.m_logger.error("Invalid market manifest file " + self.mktConfigFile) sys.exit(1) #May not need the following section, as we send mktConfigFile to other function not the dictionary self.m_mktConfigDict. Need to remove it after finishing the loadData part fully # Read Market specific config file and store it in a specific dictionary #m_mktConfigDict=process.readMktConfigFile(mktConfigFile) self.readMktConfigFile(self.mktConfigFile) #print("m_mktConfigDict=",self.m_mktConfigDict) # Read the contents of manifest - dataFileNames into a list - Will validate the datafiles as well localManifest = Manifest() manifestFileList = localManifest.readManifest(self.manifestFile, self.m_logger) #print("manifestFileList = ", manifestFileList) # Get RunID self.runID = generate_runId() #print("RunID = ", self.runID) #Call Oracle fn to insert status 'P' into TB_DDY_DATASET_TRANS with RUNID etc #DB_CALL # Make database call sp_ddy_insert_dataset_trans and insert data that process started #print "self.m_configDict = ", self.m_configDict #print "self.m_configDict[put_dataset] = ", self.m_configDict["SQL"]["put_dataset"] mySql = "" myParams = "" mySql = self.m_configDict["SQL"]["put_dataset"] #print "mySql = ", mySql pStatus = 'P' myParams = {"datasetName":self.datasetName, "runID": self.runID, "tDate":self.tradeDate, "status": pStatus, "tidalRunID":self.tidalRunID} #myParams = {"datasetName":self.datasetName, "runID": self.runID, "tDate":self.tradeDate, "status": pStatus} #print "myParams = ", myParams #returnVal, returnCode = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams) if returnStr[0] != 0: self.m_logger.error("Unable to put dataset info into the database using sql " + mySql + ". Error = " + returnStr[1]) sys.exit(1) # Insert Manifest data in db and call multiprocessing s3 loader process. Shd we add RUN_ID to manifest table #For each datafile, generate fileID and call loadData fn using multiprocess to load data into AWS for dataRecord in manifestFileList: mySql = "" myParams = "" mySql = self.m_configDict["SQL"]["put_manifest"] myParams = {"datasetName":self.datasetName, "runID": self.runID, "tDate":self.tradeDate, "dataFileName":dataRecord[1], "manifestFileName":manifestFile , "noOfRecords": dataRecord[2], "fileSize":dataRecord[3], "tidalRunID":self.tidalRunID} #myParams = {"datasetName":self.datasetName, "runID": self.runID, "tDate":self.tradeDate, "dataFileName":dataRecord[1], "manifestFileName":manifestFile , "noOfRecords": dataRecord[2], "fileSize":dataRecord[3]} returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams) if returnStr[0] != 0: self.m_logger.error("Unable to put manifest info into the database using sql " + mySql + ". Error = " + returnStr[1]) sys.exit(1) self.s3object = S3(self.mktConfigFile, self.m_logger) self.s3object.getToken() fileID=1 fileIDQueue = Queue() dbFlag=1 procs = [] for dataRecord in manifestFileList: processHandle = Process(target=Loader.loadData, args=(self, dataRecord[1],fileID, fileIDQueue, dbFlag)) processHandle.start() procs.append(processHandle) fileID += 1 #time.sleep(5) for p in procs: p.join() #Without sleep the queue is unreliable and do not return the expected values #time.sleep(2) failureFlag=0 while not fileIDQueue.empty(): #print("inside while") qFileID, qResult = fileIDQueue.get() #print("qFileID = ", qFileID, "qResult = ", qResult) if qResult: failureFlag=1 #print "Failure Flag = ", failureFlag if failureFlag: pStatus = 'F' else: pStatus = 'S' """ #Generate FINRA Manifest file and Push it to AWS """ # Call Divakar's generate done file function returnValue = self.createFinraManifestFile(self.manifestFile) if returnValue: self.m_logger.error("Unable to generate done file. Please fix the issue the re-run the load") #sys.exit(1) dbFlag=0 fileID=0 # Call the loader function with the manifest file finraManifestLoadStatus=0 finraManifestLoadStatus=self.loadData(self.finraManifestFile ,fileID, fileIDQueue, dbFlag) if finraManifestLoadStatus: pStatus = 'F' self.m_logger.error("Unable to load finra manifest file ") # Do we need to exit here or insert a failure #sys.exit(1) #Call Oracle fn to insert status 'S' into TB_DDY_DATASET_TRANS with RUNID etc #DB_CALL # Make database call sp_ddy_insert_dataset_trans and insert data based on Failure or Success #print "self.m_configDict = ", self.m_configDict mySql = "" myParams = "" mySql = self.m_configDict["SQL"]["put_dataset"] #print "mySql = ", mySql myParams = {"datasetName":self.datasetName, "runID": self.runID, "tDate":self.tradeDate, "status": pStatus, "tidalRunID":self.tidalRunID} #myParams = {"datasetName":self.datasetName, "runID": self.runID, "tDate":self.tradeDate, "status": pStatus} #print "myParams = ", myParams #returnVal, returnCode = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams) if returnStr[0] != 0: self.m_logger.error("Unable to put dataset info into the database using sql " + mySql + ". Error = " + returnStr[1]) sys.exit(1) except Exception as e: self.m_logger.error("ProcessLoader failed with error " + str(e)) sys.exit(1)
class Process(): # class variables # lock, logger and loader type m_lock = "" m_logger = "" m_loaderType = "" # Adama PG object m_adamapg = "" # Picard PG object m_picardpg = "" # Process details process_name = "" def __init__(self, configFile): """ Purpose: Constructor :param self: class object itself :param configFile: Configuration file to use """ # Initialize global logger object self.m_logger = Logger(logging.INFO, configFile) self.m_logger.addFileHandler(logging.DEBUG) try: # Add generic information fname = inspect.getfile(inspect.currentframe()) fpath = os.path.dirname(os.path.abspath(fname)) self.m_logger.addGenericInfo(fpath + "/" + fname) # export all the values from config into environment configObject = configuration(configFile, True) # Create Adama replica PG db object self.m_adamapg = Postgre(os.environ['adama_pg'], self.m_logger) # Create Picard Postgres Datamart object self.m_picardpg = Postgre(os.environ['picard_pg'], self.m_logger) # Create lock for the process self.m_lock = Lock(os.environ['LOCK_FILE'], self.m_logger) # loader type self.m_loaderType = self.getloaderType() # process name self.process_name = os.environ['process_name'] self.m_logger.info("Initializing the process, %s" % self.process_name ) except Exception, e: self.m_logger.error("ERROR: Unable to initialize the process due to: %s" % str(e)) self.updateProcessStatus("F") if self.m_adamapg: self.m_adamapg.closeConnection() if self.m_picardpg: self.m_picardpg.closeConnection() if self.m_lock: self.m_lock.remove() sys.exit("ERROR: Unable to initialize the process due to: %s" % str(e))
class Extractor(): #class variables m_logger = "" #database objects m_oracle_db = "" def __init__(self, configFile, mktName, tradeDate, debugFlag): """ Purpose: Constructor :param self: class object itself :param configFile: Configuration file to use """ # Initialize m_logger object from class Logger and add Header to the log, using addGenericInfo function self.m_logger = Logger(logging.INFO, configFile, tradeDate) self.m_logger.addFileHandler(logging.DEBUG) self.m_logger.addGenericInfo(__file__) self.tradeDate = tradeDate self.debugFlag = debugFlag self.configFile = configFile self.mktName = mktName try: # Get configuration to a dictionary self.m_configDict = configuration(self.configFile, True).m_dictionary #Initialize Oracle instance along with connection self.m_oracle_db = Oracle(self.m_configDict, self.m_logger) except Exception as exp: # An exception occurred self.m_logger.error("Unable to initialize the configuration " + str(exp)) print("ERROR: Unable to initialize the configuration for logger " + str(exp)) sys.exit(1) def readMktConfigFile(self, mktConfigFile): """ Purpose - To read the content of mktConfigFile into the global dictionary m_mktConfigDict for reference :param mktConfigFile: :return: """ try: self.m_mktConfigDict = configuration(mktConfigFile, True).m_dictionary except Exception as exp: # An exception occurred self.m_logger.error("Unable to initialize the configuration for logger " + str(exp)) print("ERROR: Unable to initialize the configuration for logger " + str(exp)) sys.exit(1) def chkActiveLoads(self): """ Purpose - To check the count of active Active loads happening at a given point :param None: None at this point :return: """ try: if self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_check_flag"] == 'Y': localActiveLoadMax = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max"] localActiveLoadWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_wait_time"] localActiveLoadMaxWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max_wait_time"] mySql = "" myParams = "" mySql = self.m_configDict["SQL"]["get_active_loads"] activeFlag=1 totalActiveWaitTime=0 while activeFlag: returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if self.debugFlag: print "mySql = ", mySql print "returnStr = ", returnStr print "chkActiveLoads - Active Loads value = ", int(returnStr[1].strip()) if returnStr[0] != '0': self.m_logger.error("Unable to get active loads using sql " + mySql + ". Error = " + returnStr[1]) sys.exit(1) #Check if actual active loads is <= configured active loads. If so, return out of the fn if int(returnStr[1].strip()) <= localActiveLoadMax: activeFlag=0 return 0 #Sleep for time defined by configured value for "active_load_wait_time" time.sleep(localActiveLoadWaitTime) totalActiveWaitTime += localActiveLoadWaitTime #Check if actual Total wait time is > configured total wait time. If so, throw an error and exit if totalActiveWaitTime > localActiveMaxWaitTime: self.m_logger.error("In Fn chkActiveLoads. Total Actual Wait Time exceeds the configured value active_load_max_wait_time. Either cleanup orphaned loads or increase the either active_load_max or active_load_max_wait_time. totalActiveWaitTime = " + str(totalActiveWaitTime) + " localActiveMaxWaitTime=" + str(localActiveMaxWaitTime)) return 1 else: return 0 #Return failure return 1 except Exception as exp: self.m_logger.error("Failure in chkActiveLoads process for file with the error " + str(exp)) sys.exit(1) def chkRaceStatus(self): """ Purpose - To check if a load is already running for the given dataset :param None: None at this point :return: """ try: if self.m_mktConfigDict["RACE"]["race_status_check_flag"] == 'Y': localRaceStatusWaitTime = int(self.m_mktConfigDict["RACE"]["race_status_wait_time"]) localRaceStatusMaxWaitTime = int(self.m_mktConfigDict["RACE"]["race_status_max_wait_time"]) mySql = "" #myParams = {"datasetName":self.datasetName} tempSql = self.m_configDict["SQL"]["get_race_status"] myParamsDict = { 'datasetName' : self.datasetName } tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql) raceFlag=1 totalRaceStatusWaitTime=0 while raceFlag: returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if self.debugFlag: print "tempSql = ", tempSql print "myParamsDict = ", myParamsDict print "mySql = ", mySql print "returnStr = ", returnStr print "chkRaceStatus - ReturnCode = ", int(returnStr[1].strip()) if returnStr[0] != '0': self.m_logger.error("Unable to get race status using sql " + mySql + ". Error = " + returnStr[1]) sys.exit(1) #Check if the load for this dataset is already running. If not, exit out of the function with normal return value if int(returnStr[1].strip()) <= 1: raceFlag=0 return 0 #Check if actual Total wait time is > configured total wait time. If so, throw an error and exit #Sleep for time defined by configured value for "active_load_wait_time" time.sleep(localRaceStatusWaitTime) #time.sleep(90) totalRaceStatusWaitTime += localRaceStatusWaitTime if self.debugFlag: print "totalRaceStatusWaitTime = ", totalRaceStatusWaitTime, "localRaceStatusWaitTime =", localRaceStatusWaitTime if totalRaceStatusWaitTime > localRaceStatusMaxWaitTime: self.m_logger.error("In Fn chkRaceStatusLoads. Total Actual Wait Time exceeds the configured value race_status_max_wait_time. Either check if the Dataset is getting loaded or increase the either active_load_max or active_load_max_wait_time. totalRaceStatusWaitTime = " + str(totalRaceStatusWaitTime) + " localRaceStatusMaxWaitTime=" + str(localRaceStatusMaxWaitTime)) return 1 else: return 0 #Return failure return 1 except Exception as exp: self.m_logger.error("Failure in chkRaceStatus process for file with the error " + str(exp)) sys.exit(1) def extractData(self,localDataRecordDict, localFileID, localFileIDQueue, localDBFlag): """ Purpose - To load the given datafile to the S3 bucket specified in the global mktConfigFile :param localDataRecordDict: Datafile related info fetched from FINRA's manifest file including filename, filesize, recordcount :param localFileID: Internal File ID assigned to the local datafile :param localFileIDQueue: Queue in which, results of the operation is stored :param localDBFlag: Flag indicating if database should be used or not :return: """ try: if self.debugFlag: print "Inside extractData function" print "localDataRecordDict = ", localDataRecordDict if localDBFlag: """ Not sure if we need Race Status check for Extract raceStatusReturnValue=self.chkRaceStatus() if self.debugFlag: print "raceStatusReturnValue=", raceStatusReturnValue if raceStatusReturnValue: self.m_logger.error("Failure value returned by chkRaceStatus fn. Return value = " + str(raceStatusReturnValue)) localFileIDQueue.put((localFileID, raceStatusReturnValue)) return 1 """ """ Need to integrate Active loads with tb_dxt_process_status and tb_dxt_process_status ? activeLoadsReturnValue=self.chkActiveLoads() if activeLoadsReturnValue: self.m_logger.error("Failure value returned by chkRaceStatus fn. Return value = " + str(activeLoadsReturnValue)) localFileIDQueue.put((localFileID, raceStatusReturnValue)) return 1 """ processID = os.getpid() hostName = socket.gethostname() # Need to check the order test_var = str(self.m_configDict["dxt"]["DATA_FILE_NAME_STR"]) localDataFile = localDataRecordDict[self.m_configDict["dxt"]["DATA_FILE_NAME_STR"]] localDataFileSize = int(localDataRecordDict[self.m_configDict["dxt"]["DATA_FILE_SIZE_STR"]]) localDataFileRecordCount = int(localDataRecordDict[self.m_configDict["dxt"]["NO_OF_ROWS_STR"]]) if self.debugFlag: print "localDataFile = ", localDataFile print "localDataFileSize = ", localDataFileSize print "localDataFileRecordCount = ", localDataFileRecordCount #Insert Process status into Oracle db #DB_CALL - sp_dxt_insert_process_status(RUNID, FILE_ID, etc) mySql = "" myParams = "" tempSql = self.m_configDict["SQL"]["put_process_status"] pStatus = 'P' pComment = 'Load started' #myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileID":str(localFileID), "fileName":localDataFile, "tDate":str(self.tradeDate), "processID":str(processID), "hostName":hostName, "fileSize": localDataFileSize, "recordCount" : localDataFileRecordCount, "status":pStatus , "lcomment":pComment} myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileName":localDataFile, "tDate":str(self.tradeDate), "processID":str(processID), "hostName":hostName, "fileSize": str(localDataFileSize), "recordCount" : str(localDataFileRecordCount), "status":pStatus , "lcomment":pComment} tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if self.debugFlag: print "tempSql = ", tempSql print "myParamsDict = ", myParamsDict print "mySql = ", mySql print "returnStr = ", returnStr if returnStr[0] != '0': self.m_logger.error("Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1]) sys.exit(1) # Get the dataFileName file to be extracted from AWS dataFileName = localDataRecordDict[self.m_configDict["dxt"]["DATA_FILE_NAME_STR"]] #Here localFileWthPath is the local stage dir with file name localFileWthPath = self.m_configDict["ENV"]["stage_dir"] + "/" + dataFileName #Here targetFileWthPath is the AWS dir with file name targetFileWthPath = os.path.join(self.s3object.m_configFile["S3"]["path"], os.path.basename(dataFileName)) targetBucket = self.s3object.m_configFile["S3"]["bucket"] encryptKeyFlag = self.s3object.m_configFile["S3"]["encrypt_key"] localAWSRetries = int(self.m_mktConfigDict["ENV"]["aws_retries"]) if self.debugFlag: print("localFileWthPath =", localFileWthPath) print("targetFileWthPath =", targetFileWthPath) print("targetBucket =", targetBucket) print("encryptKeyFlag =", encryptKeyFlag) print("localAWSRetries =", localAWSRetries) initCount = 0 while (initCount < localAWSRetries): extractReturnValue = 0 #Call s3.data download to extract the manifest file (single part load) #extractReturnValue = self.s3object.getDataSinglePart(localFileWthPath, targetFileWthPath, targetBucket, encryptKeyFlag ) extractReturnValue = self.s3object.getDataSinglePart(localFileWthPath, targetFileWthPath, targetBucket) if self.debugFlag: print "extractReturnValue = ", extractReturnValue if int(extractReturnValue) == 0: pStatus = 'S' pComment = 'Load completed' break else: pStatus = 'F' pComment = 'Load failed' initCount += 1 # Get the size of the file downloaded localFileSize = os.stat(localFileWthPath).st_size # Check if the downloaded file size is matching with what is mentioned in manifest file. If not mark it as failed if localFileSize != localDataFileSize: pStatus = 'F' pComment = 'Actual file size != Manifest file size' localRecordCount = 0 if localDBFlag: #Call DB to insert 'S' or 'F' in tb_dxt_process_status #localFileIDQueue.put((localFileID, extractReturnValue)) mySql = "" myParams = "" tempSql = self.m_configDict["SQL"]["put_process_status"] #myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileID":str(localFileID), "fileName":localDataFile, "tDate":str(self.tradeDate), "processID":str(processID), "hostName":hostName, "fileSize": localFileSize, "recordCount" : localRecordCount, "status":pStatus , "lcomment":pComment} myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileName":localDataFile, "tDate":str(self.tradeDate), "processID":str(processID), "hostName":hostName, "fileSize": str(localFileSize), "recordCount" : str(localRecordCount), "status":pStatus , "lcomment":pComment} tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if self.debugFlag: print "tempSql = ", tempSql print "myParamsDict = ", myParamsDict print "mySql = ", mySql print "returnStr = ", returnStr if returnStr[0] != '0': self.m_logger.error("Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1]) sys.exit(1) localFileIDQueue.put((localFileID,extractReturnValue)) else: return extractReturnValue except Exception as exp: self.m_logger.error("Failure in extractData process for file with the error " + str(exp)) if localDBFlag: localFileIDQueue.put((localFileID, 1)) else: return 1 def getRecords(self, fileDict, startDateTime, endDateTime): """ Purpose - Function to sort the dictionary based on the key and return a sorted list :param fileDict : Dictionary containing Last_modified Date and file name :param startDateTime : Start DateTime in the format '2016-02-01 00:00:00' :param endDateTime : End DateTime in the format '2016-02-10 00:00:00' """ try: patternToSearch = self.m_configDict["ENV"]["pattern_to_search"] if self.debugFlag: print "fileDict = ", fileDict print "patternToSearch = ", patternToSearch print "startDateTime = ", startDateTime print "endDateTime = ", endDateTime sorted_values = sorted(fileDict.values()) start = bisect.bisect_left(sorted_values, startDateTime) end = bisect.bisect_right(sorted_values, endDateTime) if self.debugFlag: print "start = ", start print "end = ", end for fileItem in sorted(fileDict.iteritems())[start:end]: if patternToSearch in fileItem[0]: if self.debugFlag: print "fileItem[0] = ", fileItem[0] yield fileItem[0] except Exception as exp: self.m_logger.error("Failed while executing getRecords to sort the dictionary content of dictionary with Error = " + str(exp)) sys.exit(1) def readManifestFile(self, manifestFileName): """ Purpose - To read the content of Finra's manifest file stored in key-value pair into Nested dictionary :param manifestFileName : Finra's manifestFileName containing data filenames, file size & no of rows """ try: manifestRecordStartPattern = self.m_configDict["dxt"]["MANIFEST_RECORD_START_PATTERN"] if self.debugFlag: print "manifestRecordStartPattern =", manifestRecordStartPattern with open(manifestFileName) as infile: manifestFileDict = {} file = 0 line_count = 0 for line in infile: line = line.strip() if line.startswith(manifestRecordStartPattern): file = line_count line_count += 1 manifestFileDict[file] = {} var, val = line.split('=',1) if self.debugFlag: print "var = ", var, "val = ", val manifestFileDict[file][var.strip()] = val.strip() if self.debugFlag: print "=====================================" print "manifestFileDict = ", manifestFileDict print "=====================================" return manifestFileDict #for key, values in manifest.items(): #if key == 1: #for k,v in values.items(): #print k, v except Exception as exp: self.m_logger.error("Failed while executing readManifestFile to get FINRA manifest file into nested dictionary, Error = " + str(exp)) sys.exit(1) def getManifestFileList(self, startDateTime, endDateTime, s3Bucket, s3Path, folderPosition): """ Purpose - Function to sort the dictionary based on the key and return a sorted list :param startDateTime : Start DateTime in the format '2016-02-01 00:00:00' :param endDateTime : End DateTime in the format '2016-02-10 00:00:00' """ try: if self.debugFlag: print "s3Bucket = ", s3Bucket print "s3Path = ", s3Path print "startDateTime = ", startDateTime print "endDateTime = ", endDateTime print "folderPosition = ", folderPosition fileListDict = self.s3object.listBucketWPathByLastModified(s3Bucket, s3Path, folderPosition) if self.debugFlag: print "fileListDict = ", fileListDict #endDateTime = datetime.now().strftime("%Y-%m-%d %H:%M:%S") manifestFileList = list(self.getRecords(fileListDict, startDateTime, endDateTime)) if self.debugFlag: print "fileListDict = ", fileListDict print "manifestFileList = ", manifestFileList return manifestFileList except Exception as exp: self.m_logger.error("Failed while creating AWS manifest file list with Error = " + str(exp)) return 1 def processExtractor(self): """ Purpose - Function responsible for getting the AWS token and reading the last modified date in DB and fetch the list of files from AWS to be processed :param : None :return: """ try: # DB_CALL # Make database call sp_dxt_validate_mktName(mktName) to validate mktName tempSql = self.m_configDict["SQL"]["validate_market_name"] myParamsDict = { 'mktName' : self.mktName } tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if self.debugFlag: print "tempSql = ", tempSql print "myParamsDict = ", myParamsDict print "mySql = ", mySql print "returnStr = ", returnStr if returnStr[0] != '0': self.m_logger.error("Invalid market name provided " + mySql + ". Error = " + self.mktName) sys.exit(1) if self.debugFlag: print "MktName from DB = ", self.mktName #Build the string for mktConfigFile based on mktName and configFile info self.mktConfigFile = os.path.dirname(self.configFile) + '/' + os.path.basename(self.configFile).split('.',1)[0].strip() + '_' + self.mktName.lower() + '.' + os.path.basename(self.configFile).split('.',1)[1].strip() if self.debugFlag: print("mktConfigFile = ", self.mktConfigFile) #Validate Market Config file is a valid file if not os.path.isfile(self.mktConfigFile): self.m_logger.error("Invalid market manifest file " + self.mktConfigFile) sys.exit(1) # Read Market specific config file and store it in a specific dictionary self.readMktConfigFile(self.mktConfigFile) if self.debugFlag: print("m_mktConfigDict=",self.m_mktConfigDict) # Read the table for the given market and fetch the last modified timestamp for the given manifest file tempSql = self.m_configDict["SQL"]["get_last_modified"] myParamsDict = { 'mktName' : self.mktName.upper() } tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if self.debugFlag: print "tempSql = ", tempSql print "myParamsDict = ", myParamsDict print "mySql = ", mySql print "returnStr = ", returnStr if returnStr[0] == '0': if returnStr[1]: lastModifiedDate=returnStr[1] else: lastModifiedDate="2015-01-01 00:00:00" else: self.m_logger.error("Unable to get last_modified date using the sql " + mySql + ". Error = " + self.mktName) sys.exit(1) if self.debugFlag: print("lastModifiedDate=",lastModifiedDate) #Temp call. Need to enable the previous lines to use DB call. Comment them bfr production if self.mktName == 'nyse_mkt': lastModifiedDate="2016-06-03 15:00:00" else: lastModifiedDate="2016-06-01 00:00:00" #print "Ram - Last Modified Date = ", lastModifiedDate, "mktName = ", self.mktName # Get RunID self.runID = generate_runId() if self.debugFlag: print("RunID = ", self.runID) # Initialize S3 object and get FINRA cloud service token and establish s3 session self.s3object = S3(self.mktConfigFile, self.m_logger, self.debugFlag) tokenRetryTimes = int(self.m_configDict["TOKEN"]["token_retry_times"]) tokenRetryWaitTime = int(self.m_configDict["TOKEN"]["token_retry_wait_time"]) initCount = 0 while (initCount < tokenRetryTimes): tokenReturnCode = self.s3object.getToken() if tokenReturnCode: if initCount == tokenRetryTimes: self.m_logger.error("Error: Exceeded the max retries " + tokenRetryTimes + " to get AWS Token from FINRA. Please re-try after some time or escalate.. ") sys.exit(1) initCount += 1 time.sleep(tokenRetryWaitTime) else: break self.currentEpochTime = int(time.time()) # Get list of Manifest files to be processed #currentDate = datetime.now().strftime("%Y-%m-%d %H:%M:%S") #currentDate = datetime.now().strftime("%Y-%m-%d %H:%M:%S") currentDate = (datetime.now() + timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S") folderPosition = int(self.s3object.m_configFile["S3"]["folder_position"]) targetBucket = self.s3object.m_configFile["S3"]["bucket"] targetFolder = self.s3object.m_configFile["S3"]["path"] targetFilePath = targetFolder finraManifestFileList = self.getManifestFileList(lastModifiedDate, currentDate, targetBucket, targetFilePath, folderPosition) if self.debugFlag: print("finraManifestFileList = ", finraManifestFileList) # Download manifest files in the manifest file list to a specific folder from AWS localFileDir = self.s3object.m_configFile["ENV"]["stage_dir"] #targetFileWthPath = os.path.join(self.s3object.m_configFile["S3"]["path"], os.path.basename(localFileWthPath)) encryptKeyFlag = self.s3object.m_configFile["S3"]["encrypt_key"] # Get an instance of the Manifest class fileIDQueue = Queue() localAWSRetries = int(self.m_mktConfigDict["ENV"]["aws_retries"]) for finraManifestFile in finraManifestFileList: # Following 2 lines temporarily written to avoid bad manifest files. Please remove them before go-live if finraManifestFile == 'manifest.TSP_A_20160425.txt': continue if finraManifestFile == 'manifest.TSP_P_20160425.txt': continue targetFileWthPath = targetFolder + finraManifestFile localFileWthPath = localFileDir + "/" + finraManifestFile if self.debugFlag: print "targetFileWthPath = ", targetFileWthPath print "localFileWthPath = ", localFileWthPath print "finraManifestFile = ", finraManifestFile initCount = 0 while (initCount < localAWSRetries): extractReturnValue = 0 #Call s3.data download to extract the manifest file (single part load) #extractReturnValue = self.s3object.getDataSinglePart(localFileWthPath, targetFileWthPath, targetBucket, encryptKeyFlag ) extractReturnValue = self.s3object.getDataSinglePart(localFileWthPath, targetFileWthPath, targetBucket) if self.debugFlag: print "extractReturnValue = ", extractReturnValue if extractReturnValue: # Try it again initCount += 1 else: # Come out of the loop break # End of while loop for AWS Retries if extractReturnValue: self.m_logger.error("Unable to fetch manifestFile = " + finraManifestFile + "from the path = " + targetFileWthPath + " to the local filesystem = " + localFileWthPath ) sys.exit(1) """ Not needed if extractReturnValue == 0: pStatus = 'P' pComment = 'Load completed' break else: pStatus = 'F' pComment = 'Load failed' """ initCount += 1 # get datasetname from the manifest file. Need check based on FINRA naming # Original requirement #self.datasetName = os.path.basename(finraManifestFile).split('.',3)[1].strip().upper() # Customized for FINRA's latest file self.datasetName = os.path.basename(finraManifestFile).split('.')[1].split('_')[1].strip().upper() if self.debugFlag: print "datasetName = ", self.datasetName # Need to check DB call, once it is ready # Validate the manifest file name to make sure that we are expecting it tempSql = self.m_configDict["SQL"]["validate_dataset_name"] myParamsDict = { 'datasetName' : self.datasetName } tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if self.debugFlag: print "tempSql = ", tempSql print "myParamsDict = ", myParamsDict print "mySql = ", mySql print "returnStr = ", returnStr #Check if dataset is there in the tb_dxt_dataset_master, if not, skip it and move to the next file. For other errors, exit out of the program if int(returnStr[0]) < 0: self.m_logger.error("Unable to validate datasetName " + mySql + ". Error = " + self.datasetName) sys.exit(1) elif int(returnStr[0]) > 0: self.m_logger.info("Give Dataset is not in the list to process. Skipping it" + mySql + ". Dataset Name = " + self.datasetName) # Continue to the next file entry in the manifest list continue # Insert a record into tb_dxt_dataset_trans with status 'P' for the given datasetName, saying that we start the process for this manifest file pStatus = 'P' tempSql = self.m_configDict["SQL"]["put_dataset"] myParamsDict = {'datasetName':self.datasetName, 'runID': str(self.runID), 'tDate':str(self.tradeDate), 'status': pStatus } tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if self.debugFlag: print "tempSql = ", tempSql print "myParamsDict = ", myParamsDict print "mySql = ", mySql print "returnStr = ", returnStr if returnStr[0] != '0': self.m_logger.error("Unable to insert into tb_dxt_dataset_trans table " + mySql + ". Error = " + self.datasetName) sys.exit(1) # Read the contents of manifestfile i.e. dataFileNames into a list - Will validate the datafiles as well manifestDelim = self.m_configDict["ENV"]["manifest_delim"] if self.debugFlag: print "localFileWthPath = ", localFileWthPath #Need to change the following line to read a nested dictionary from a keyValuePair manifestFileDict = self.readManifestFile(localFileWthPath) if self.debugFlag: print "manifestDelim = ", manifestDelim print "manifestFileDict = ", manifestFileDict process_count = int(self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max"]) # Now go into multiprocessing and call extractData function and extract files ones by one fileID=1 dbFlag=1 fileIDQueue = Queue() procs = [] doneCounter = 0 sendCounter = 0 failureFlag = 0 finraManifestFileCounter=0 while doneCounter < len(manifestFileDict): while sendCounter < len(manifestFileDict) and sendCounter - doneCounter < process_count: if self.debugFlag: print "manifestFileDict[self.m_configDict[dxt][DATA_FILE_NAME_STR]] = ", manifestFileDict[sendCounter]['Datafilename'] # Call fn extractData to fetch files from AWS. Pass manifestFileDict[sendCounter] as it contains the whole record including the filename, filesize & row count processHandle = Process(target=Extractor.extractData, args=(self, manifestFileDict[sendCounter],fileID, fileIDQueue, dbFlag)) processFlag=1 s3TimeoutTime = int(self.m_configDict["dxt"]["S3_TIMEOUT_TIME"]) if ((int(time.time()) - self.currentEpochTime) > s3TimeoutTime): self.currentEpochTime = int(time.time()) self.m_logger.info("Getting New Token for Batch : {0}, Max batches : {1}".format(batch_count,max_batches)) if self.debugFlag: print 'Memory usage: %s (kb)' % resource.getrusage(resource.RUSAGE_SELF).ru_maxrss print "self.currentEpochTime = ", self.currentEpochTime print "Current Time in Epoch = ", int(time.time()) if self.debugFlag: print "Inside get new token - self.currentEpochTime = ", self.currentEpochTime initCount = 0 while (initCount < tokenRetryTimes): tokenReturnCode = 0 tokenReturnCode = self.s3object.getToken() if tokenReturnCode: if initCount == tokenRetryTimes: self.m_logger.error("Error: Exceed the max retries " + tokenRetryTimes + " to get AWS Token from FINRA. Please re-try after some time or escalate.. ") sys.exit(1) initCount += 1 time.sleep(tokenRetryWaitTime) else: break threadDelayTime = int(self.m_configDict["dxt"]["THREAD_DELAY_TIME"]) time.sleep(threadDelayTime) processHandle.start() procs.append(processHandle) sendCounter += 1 fileID += 1 if processFlag: for p in procs: p.join() procs=[] processFlag=0 while not fileIDQueue.empty(): # process completed results as they arrive #time.sleep(3) qFileID, qResult = fileIDQueue.get() if self.debugFlag: print("qFileID = ", qFileID, "qResult = ", qResult) doneCounter += 1 if qResult: failureFlag = 1 if self.debugFlag: print "ProcessFlag = ", processFlag, "sendCounter = ", sendCounter, "doneCounter = ", doneCounter if failureFlag: break if self.debugFlag: print "Failure Flag = ", failureFlag if failureFlag: pStatus = 'F' else: pStatus = 'S' tblName = self.m_mktConfigDict["dxt"]["TARGET_TBL_NAME"] + "_" + self.mktName.upper() manifestDate = os.path.basename(finraManifestFile).split('.',3)[1][6:12] fatlManifestFile = self.m_configDict["ENV"]["stage_dir"] + "/" + tblName + "." + manifestDate + ".manifest" with open(fatlManifestFile,"w") as fh: counter = 0 for dictRecord in manifestFileDict: dataFile = manifestFileDict[dictRecord][self.m_configDict["dxt"]["DATA_FILE_NAME_STR"]] sourceFileWthPath = self.m_configDict["ENV"]["stage_dir"] + "/" + dataFile dataFileSize = int(manifestFileDict[dictRecord][self.m_configDict["dxt"]["DATA_FILE_SIZE_STR"]]) dataFileRecordCount = int(manifestFileDict[dictRecord][self.m_configDict["dxt"]["NO_OF_ROWS_STR"]]) #fileSize = os.stat(sourceFileWthPath).st_size if self.debugFlag: print "dataFile = ", dataFile print "dataFileSize = ", dataFileSize print "dataFileRecordCount = ", dataFileRecordCount print "sourceFileWthPath = ", sourceFileWthPath print "tblName = ", tblName, "dataFile = ", dataFile, "dataFileSize = ", dataFileSize, "mktName = ", self.mktName fh.write(tblName + "|" + str(dataFile) + "|" + str(dataFileSize) + "|" + str(dataFileRecordCount) + "|" + "0" + "\n") counter += 1 # Move all the data files to inbox from the stg location. No need for this step, as Joejo mentioned there will be another Tidal job doing this step # Move the manifest file to inbox from the stg location # insert a record into tb_dxt_dataset_trans table with 'S' or 'F' record #Call Oracle fn to insert status 'S' into TB_DDY_DATASET_TRANS with RUNID etc #DB_CALL # Make database call sp_dxt_insert_dataset_trans and insert data based on Failure or Success mySql = "" myParams = "" tempSql = self.m_configDict["SQL"]["put_dataset"] myParamsDict = {"datasetName":self.datasetName, "runID": str(self.runID), "tDate":str(self.tradeDate), "status": pStatus } tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if self.debugFlag: print "tempSql = ", tempSql print "myParamsDict = ", myParamsDict print "mySql = ", mySql print "returnStr = ", returnStr if returnStr[0] != '0': self.m_logger.error("Unable to put dataset info into the database using sql " + mySql + ". Error = " + returnStr[1]) sys.exit(1) if failureFlag: self.m_logger.error("Extract failed for data files for manifest file " + finraManifestFile) sys.exit(1) finraManifestFileCounter += 1 # End of for loop for finraManifestFiles except Exception as e: self.m_logger.error("ProcessExtractor failed with error " + str(e)) sys.exit(1)
def main(configFile, logLevel, tDate): log = Logger(logLevel,configFile, tDate) log.addFileHandler(logging.DEBUG) log.addGenericInfo(__file__) try: m_configDict = configuration(configFile, True).m_dictionary #print "m_configDict = ", m_configDict myOracle = Oracle(m_configDict, log) #mySql = "select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.F_DDY_INSERT_DATASET_TRANS(:datasetID, :runID, :tDate, :status))" #pDatasetID = 2 #pRunID = 234234234 # 20151216144156584829 #pTDate = 20151215 #pStatus = 'P' #myParams = {"datasetID": pDatasetID, "runID": pRunID, "tDate": pTDate, "status": pStatus} #select RETURN_CODE||'|'||RETURN_MSG from table(PKG_RFCM_DDY.F_DDY_INSERT_PROCESS_STATUS('DLE_INFO', 20151216144156584829, 1, 'opb1.dat.bz2', 20151215, 111, 'test_hostname', 'P', 'Process Started')) #select RETURN_CODE||'|'||RETURN_MSG from table(PKG_RFCM_DDY.F_DDY_INSERT_PROCESS_STATUS('DLE_INFO', 20151216144156584829, 1, 'opb1.dat.bz2', 20151215, 111, 'test_hostname', 'P', 'Process Started')); mySql = "select RETURN_CODE||'|'||RETURN_MSG from table(PKG_RFCM_DDY.F_DDY_INSERT_PROCESS_STATUS(:datasetName, :runID, :fileID, :fileName, :tDate, :processID, :hostName, :lstatus , :lcomment))" pDatasetName = "DLE_INFO" pRunID = 20151216144156584 pTDate = 20151210 pStatus = 'P' pFileID = 1 pFileName = 'test.dat.gz' pComment = 'Process Started' pProcessID = 1234 pHostName = 'Test_Host' myParams = {"datasetName": pDatasetName, "runID": pRunID, "fileID": pFileID, "fileName": pFileName, "tDate": pTDate, "processID": pProcessID, "hostName": pHostName, "lstatus": pStatus, "lcomment": pComment} #returnStr = self.__cursor.execute("select * from table(PKG_RFCM_DDY.f_ddy_insert_manifest_trans(1, 20151215, 'opb.test1.bz2', 'manifest.opb', 23423, 2342334))") #mySql = "select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_insert_manifest_trans(:datasetID, :tDate, :dataFileName, :manifestFileName, :noOfRecords, :fileSize))" #pDatasetID = 3 #pTDate = 20151210 #pDataFileName = 'opb.test1.bz2' #pManifestFileName = 'manifest.opb' #pNoOfRecords = 23423 #pFileSize = 2342334 #myParams = {"datasetID": pDatasetID, "tDate": pTDate, "dataFileName": pDataFileName, "manifestFileName":pManifestFileName, "noOfRecords":pNoOfRecords, "fileSize":pFileSize} #mySql = "select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_get_process_status(20151216144156584829,1));" #mySql = "select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_get_process_status(:runID,:fileID))" #pRunID = 20151216144156584829 #pFileID = 1 #myParams = {"runID":pRunID, "fileID":pFileID} #mySql = "select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_get_dataset_status(234234234));" #mySql = "select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_get_dataset_status(:runID))" #pRunID = 234234234 #myParams = {"runID":pRunID} #mySql = "select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_get_active_loads());" #mySql = "select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_get_active_loads())" #myParams = {} #mySql = select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_get_market_info(p_dataset_name => 'ADW_EVENT_LSH_RAW')); #mySql = "select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_get_market_info(:datasetName))" #mySql = "select RETURN_CODE, RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_get_market_info(:datasetName))" #pDatasetName = 'ADW_EVENT_LSH_RAW' #pDatasetName = 'DLE_INFO' myParams = {"datasetName":pDatasetName} #mySql = select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_get_dataset_race('ADW_AMEX_OPT_RAW')); #mySql = "select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_get_dataset_race(:datasetName))" #pDatasetName = "ADW_AMEX_OPT_RAW" #myParams = {"datasetName":pDatasetName} #mySql = "select RETURN_CODE, RETURN_MSG, P_MARKET_IND, P_DEFAULTS_FILENAME from table(pkg_rfcm_ddy.f_ddy_get_makt_info_dflt_fname(:datasetName))" mySql = "select RETURN_CODE, RETURN_MSG, P_MARKET_IND, P_DEFAULTS_FILENAME from table(pkg_rfcm_ddy.f_ddy_get_makt_info_dflt_fname('datasetName'))" pDatasetName = 'DLE_INFO' new_mySql = re.sub('datasetName', pDatasetName, mySql.rstrip()) returnStr = myOracle.runSqlWthParamsGetOneRow(new_mySql) print "Return Value = ", returnStr[0], " Return Code = ", returnStr[1], " Mkt = ", returnStr[2], "Defaults = ", returnStr[3] mySql = "select * from table(PKG_RFCM_DDY.f_ddy_internal_recon('tradeDate')) order by 4" tDate = '20160212' new_mySql = re.sub('tradeDate', tDate, mySql.rstrip()) print "mySql = ", mySql, "new_mySql = ", new_mySql returnStrs = myOracle.runSqlWthParamsGetMultipleRows(new_mySql) print "Return Strs = ", returnStrs #for returnStr in returnStrs: #print "Return Value = ", returnStr[0], " Return Code = ", returnStr[1] #print "mySql = ", mySql, "myParams = ", myParams ########### Multiprocessing test code """ fileID=1 fileIDQueue = Queue() for x in range(5): processHandle = Process(target=myOracle.worker, args=( mySql, myParams, fileID, fileIDQueue)) processHandle.start() fileID += 1 processHandle.join() #Without sleep the queue is unreliable and do not return the expected values time.sleep(2) failureFlag=0 while not fileIDQueue.empty(): qFileID, qResult = fileIDQueue.get() print("qFileID = ", qFileID, "qResult = ", qResult) if qResult: failureFlag=1 print "FailureFlag = ", failureFlag """ ########## End except Exception as e: print "Failed on main", str(e) exit(1)
class Loader(): #class variables m_logger = "" #database objects m_oracle_db = "" m_netezza_db = "" def __init__(self, configFile, tradeDate, debugFlag, datasetName): """ Purpose: Constructor :param self: class object itself :param configFile: Configuration file to use """ # Initialize m_logger object from class Logger and add Header to the log, using addGenericInfo function self.m_logger = Logger(logging.INFO, configFile, tradeDate, datasetName.lower()) self.m_logger.addFileHandler(logging.DEBUG) self.m_logger.addGenericInfo(__file__) self.tradeDate = tradeDate self.debugFlag = debugFlag self.configFile = configFile try: # Get configuration to a dictionary self.m_configDict = configuration(self.configFile, True).m_dictionary #Initialize Oracle instance along with connection self.m_oracle_db = Oracle(self.m_configDict, self.m_logger) except Exception as exp: # An exception occurred self.m_logger.error("Unable to initialize the configuration " + str(exp)) print("ERROR: Unable to initialize the configuration for logger " + str(exp)) sys.exit(1) def readMktConfigFile(self, mktConfigFile): """ Purpose - To read the content of mktConfigFile into the global dictionary m_mktConfigDict for reference :param mktConfigFile: :return: """ try: self.m_mktConfigDict = configuration(mktConfigFile, True).m_dictionary except Exception as exp: # An exception occurred self.m_logger.error("Unable to initialize the configuration for logger " + str(exp)) print("ERROR: Unable to initialize the configuration for logger " + str(exp)) sys.exit(1) def chkActiveLoads(self): """ Purpose - To check the count of active Active loads happening at a given point :param None: None at this point :return: """ try: if self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_check_flag"] == 'Y': localActiveLoadMax = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max"] localActiveLoadWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_wait_time"] localActiveLoadMaxWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max_wait_time"] mySql = "" myParams = "" mySql = self.m_configDict["SQL"]["get_active_loads"] activeFlag=1 totalActiveWaitTime=0 while activeFlag: returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if self.debugFlag: print "mySql = ", mySql print "returnStr = ", returnStr print "chkActiveLoads - Active Loads value = ", int(returnStr[1].strip()) if returnStr[0] != '0': self.m_logger.info("Retry after delay., Unable to get active loads using sql " + mySql + ". Error = " + returnStr[1]) local_delay_time = int(self.m_configDict["SQL"]["delay_time"]) time.sleep(local_delay_time) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if returnStr[0] != '0': self.m_logger.error("Unable to get active loads using sql " + mySql + ". Error = " + returnStr[1]) return 1 #Check if actual active loads is <= configured active loads. If so, return out of the fn if int(returnStr[1].strip()) <= localActiveLoadMax: activeFlag=0 return 0 #Sleep for time defined by configured value for "active_load_wait_time" time.sleep(localActiveLoadWaitTime) totalActiveWaitTime += localActiveLoadWaitTime #Check if actual Total wait time is > configured total wait time. If so, throw an error and exit if totalActiveWaitTime > localActiveMaxWaitTime: self.m_logger.error("In Fn chkActiveLoads. Total Actual Wait Time exceeds the configured value active_load_max_wait_time. Either cleanup orphaned loads or increase the either active_load_max or active_load_max_wait_time. totalActiveWaitTime = " + str(totalActiveWaitTime) + " localActiveMaxWaitTime=" + str(localActiveMaxWaitTime)) return 1 else: return 0 #Return failure return 1 except Exception as exp: self.m_logger.error("Failure in chkActiveLoads process for file with the error " + str(exp)) return 1 def chkRaceStatus(self): """ Purpose - To check if a load is already running for the given dataset :param None: None at this point :return: """ try: if self.m_mktConfigDict["RACE"]["race_status_check_flag"] == 'Y': localRaceStatusWaitTime = int(self.m_mktConfigDict["RACE"]["race_status_wait_time"]) localRaceStatusMaxWaitTime = int(self.m_mktConfigDict["RACE"]["race_status_max_wait_time"]) mySql = "" #myParams = {"datasetName":self.datasetName} tempSql = self.m_configDict["SQL"]["get_race_status"] myParamsDict = { 'datasetName' : self.datasetName } tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql) raceFlag=1 totalRaceStatusWaitTime=0 while raceFlag: returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if self.debugFlag: print "tempSql = ", tempSql print "myParamsDict = ", myParamsDict print "mySql = ", mySql print "returnStr = ", returnStr print "chkRaceStatus - ReturnCode = ", int(returnStr[1].strip()) if returnStr[0] != '0': self.m_logger.info("Retry after delay., Unable to get race status using sql " + mySql + ". Error = " + returnStr[1]) local_delay_time = int(self.m_configDict["SQL"]["delay_time"]) time.sleep(local_delay_time) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if returnStr[0] != '0': self.m_logger.error("Unable to get race status using sql " + mySql + ". Error = " + returnStr[1]) return 1 #Check if the load for this dataset is already running. If not, exit out of the function with normal return value if int(returnStr[1].strip()) <= 1: raceFlag=0 return 0 #Check if actual Total wait time is > configured total wait time. If so, throw an error and exit #Sleep for time defined by configured value for "active_load_wait_time" time.sleep(localRaceStatusWaitTime) #time.sleep(90) totalRaceStatusWaitTime += localRaceStatusWaitTime if self.debugFlag: print "totalRaceStatusWaitTime = ", totalRaceStatusWaitTime, "localRaceStatusWaitTime =", localRaceStatusWaitTime if totalRaceStatusWaitTime > localRaceStatusMaxWaitTime: self.m_logger.error("In Fn chkRaceStatusLoads. Total Actual Wait Time exceeds the configured value race_status_max_wait_time. Either check if the Dataset is getting loaded or increase the either active_load_max or active_load_max_wait_time. totalRaceStatusWaitTime = " + str(totalRaceStatusWaitTime) + " localRaceStatusMaxWaitTime=" + str(localRaceStatusMaxWaitTime)) return 1 else: return 0 #Return failure return 1 except Exception as exp: self.m_logger.error("Failure in chkRaceStatus process for file with the error " + str(exp)) return 1 def loadData(self,localDataFile, localFileID, localFileIDQueue, localDBFlag, dataFileFlag, localRecordCount): """ Purpose - To load the given datafile to the S3 bucket specified in the global mktConfigFile :param localDataFile: Source datafile to be uploaded to S3 :param localFileID: Internal File ID assigned to the source datafile :param localFileIDQueue: Queue in which, results of the operation is stored :return: """ try: if self.debugFlag: print "Inside loadData function" if localDBFlag: raceStatusReturnValue=self.chkRaceStatus() if self.debugFlag: print "raceStatusReturnValue=", raceStatusReturnValue if raceStatusReturnValue: self.m_logger.error("Failure value returned by chkRaceStatus fn. Return value = " + str(raceStatusReturnValue)) localFileIDQueue.put((localFileID, raceStatusReturnValue)) return 1 activeLoadsReturnValue=self.chkActiveLoads() if activeLoadsReturnValue: self.m_logger.error("Failure value returned by chkActiveLoads fn. Return value = " + str(activeLoadsReturnValue)) localFileIDQueue.put((localFileID, activeLoadsReturnValue)) return 1 processID = os.getpid() hostName = socket.gethostname() #Insert Process status into Oracle db #DB_CALL - sp_ddy_insert_process_status(RUNID, FILE_ID, etc) mySql = "" myParams = "" tempSql = self.m_configDict["SQL"]["put_process_status"] pStatus = 'P' pComment = 'Load started' myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileID":str(localFileID), "fileName":localDataFile, "tDate":str(self.tradeDate), "processID":str(processID), "hostName":hostName, "status":pStatus , "lcomment":pComment, "tidalRunID":str(self.tidalRunID)} tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if self.debugFlag: print "tempSql = ", tempSql print "myParamsDict = ", myParamsDict print "mySql = ", mySql print "returnStr = ", returnStr if returnStr[0] != '0': self.m_logger.info("Retry after delay., Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1]) local_delay_time = int(self.m_configDict["SQL"]["delay_time"]) time.sleep(local_delay_time) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if returnStr[0] != '0': self.m_logger.error("Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1]) localFileIDQueue.put((localFileID, 1)) return 1 #Call s3.gettoken to get the token and establish connection sourceFileWthPath = localDataFile #Commented the following lines to move getToken outside parallel thread # Keep it until we test all 93 loads and remove it #s3object = S3(self.mktConfigFile, self.m_logger) #s3object.getToken() ##sourceFileWthPath = s3object.m_configfile["S3"]["source_file"] targetFileWthPath = os.path.join(self.s3object.m_configFile["S3"]["path"], os.path.basename(sourceFileWthPath)) targetBucket = self.s3object.m_configFile["S3"]["bucket"] encryptKeyFlag = self.s3object.m_configFile["S3"]["encrypt_key"] local_aws_retries = int(self.m_mktConfigDict["ENV"]["aws_retries"]) if self.debugFlag: print("sourceFileWthPath =", sourceFileWthPath) print("targetFileWthPath =", targetFileWthPath) print("targetBucket =", targetBucket) print("encryptKeyFlag =", encryptKeyFlag) print("local_aws_retries =", local_aws_retries) #Get size of the file sourceSize = os.stat(sourceFileWthPath).st_size multiPartFlag=False GBFACTOR = float(1<<30) #Check if the given file is greater than 4.5 GB. Limit on AWS > 5 GB on single part upload if float(sourceSize/GBFACTOR) > 4.5: multiPartFlag=True init_count = 0 self.m_logger.info("Started Xfer of Source File " + sourceFileWthPath + " with size " + str(sourceSize) + " to target " + targetFileWthPath) while (init_count < local_aws_retries): loadReturnValue = 0 #Call s3.dataUpload to load the data (single part load) if multiPartFlag: if self.debugFlag: print "Inside Multipart load. File size = ", sourceSize loadReturnValue = self.s3object.loadDataMultiPart(sourceFileWthPath, targetFileWthPath, targetBucket, encryptKeyFlag, self.bytes_per_chunk) else: if self.debugFlag: print "Inside Singlepart load. File size = ", sourceSize loadReturnValue = self.s3object.loadDataSinglePart(sourceFileWthPath, targetFileWthPath, targetBucket, encryptKeyFlag ) if self.debugFlag: print "loadReturnValue = ", loadReturnValue #Check if we are sending data file. If so, we need to generate a complete file and send it along if (dataFileFlag) and (loadReturnValue == 0): completeFile = localDataFile.split(".")[0] + self.compFilePattern sourceFileWthPath = self.m_mktConfigDict["ENV"]["donefile_dir"] + "/" + os.path.basename(completeFile) if self.debugFlag: print("completeFile =", completeFile) print("sourceFileWthPath =", sourceFileWthPath) with open(sourceFileWthPath,"w") as finraMnFH: finraMnFH.write("{0},{1}\n".format(str(self.tradeDate),str(localRecordCount))) targetFileWthPath = os.path.join(self.s3object.m_configFile["S3"]["path"], os.path.basename(sourceFileWthPath)) sourceSize = os.stat(sourceFileWthPath).st_size self.m_logger.info("Started Xfer of complete file " + sourceFileWthPath + " with size " + str(sourceSize) + " to target " + targetFileWthPath) loadReturnValueCompleteFile = self.s3object.loadDataSinglePart(sourceFileWthPath, targetFileWthPath, targetBucket, encryptKeyFlag ) if loadReturnValueCompleteFile: loadReturnValue = 1 #End of loadReturnValueCompleteFile If if loadReturnValue == 0: pStatus = 'S' pComment = 'Load completed' break else: pStatus = 'F' pComment = 'Load failed' init_count += 1 if localDBFlag: #Call DB to insert 'S' or 'F' in tb_ddy_process_status #localFileIDQueue.put((localFileID, loadReturnValue)) mySql = "" myParams = "" tempSql = self.m_configDict["SQL"]["put_process_status"] myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileID":str(localFileID), "fileName":localDataFile, "tDate":str(self.tradeDate), "processID":str(processID), "hostName":hostName, "status":pStatus , "lcomment":pComment, "tidalRunID":str(self.tidalRunID)} tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if self.debugFlag: print "tempSql = ", tempSql print "localFileID = ", localFileID print "myParamsDict = ", myParamsDict print "mySql = ", mySql print "returnStr = ", returnStr if returnStr[0] != '0': self.m_logger.info("Retry after delay., Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1]) local_delay_time = int(self.m_configDict["SQL"]["delay_time"]) time.sleep(local_delay_time) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if returnStr[0] != '0': self.m_logger.error("Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1]) localFileIDQueue.put((localFileID, 1)) return 1 if self.debugFlag: print "localFileID = ", localFileID print "loadReturnValue = ", loadReturnValue print 'Memory usage: %s (kb)' % resource.getrusage(resource.RUSAGE_SELF).ru_maxrss localFileIDQueue.put((localFileID,loadReturnValue)) else: return loadReturnValue except Exception as exp: self.m_logger.error("Failure in loadData process for file with the error " + str(exp)) if localDBFlag: localFileIDQueue.put(localFileID, 1) else: return 1 def createFinraManifestFile(self, manifestFile): try: # Read Manifest file to get info like total rows, total size & other details to populate the done file for FINRA if self.debugFlag: print "Inside createFinraManifestFile fuction" with open(manifestFile,"r") as fh: self.totalRows = 0 self.totalSize = 0 self.fileCount = 0 self.fileDict = {} for data in fh: data.rstrip("\n") # Exclude any entry with the pattern "start-of-day" if self.sodFilePatternSearch in data: continue mylist = [] self.fileCount +=1 mylist = data.split("|") self.fileDict[self.fileCount] = [mylist[0],os.path.basename(mylist[1]),int(mylist[2]),int(mylist[3])] self.totalRows += int(mylist[3]) self.totalSize += int(mylist[2]) if self.debugFlag: print "self.fileDict = ", self.fileDict except Exception as exp: self.m_logger.error("Failed while processing readManifest with Error = " + str(exp)) return 1 try: #Use self.defautltsFile which is populated from the db later. No need to get it from config file self.defaultsFileWthPath = self.m_mktConfigDict["DATASET"]["defaults_dir"] + "/" + self.defaultsFile with open(self.defaultsFileWthPath,"r") as fh: self.defaultsDict = {} self.defaultsCount = 0 for data in fh: data.rstrip('\n') self.defaultsCount +=1 self.defaultsDict[self.defaultsCount]=data if self.debugFlag: print "After Defaults, self.fileDict = ", self.fileDict except Exception as exp: self.m_logger.error("Failed while processing defaults file " + self.defaultsFileWthPath + " with Error = " + str(exp)) return 1 try: # Not needed as the naming convention is changed #self.finraManifestFile = self.m_mktConfigDict["ENV"]["donefile_dir"] + "/" + os.path.basename(manifestFile) + ".done" #Changing the EOD naming convention per Finra's requirement if self.debugFlag: print "self.eodFilePattern = ", self.eodFilePattern myParamsDict = {'datasetName':self.datasetName.lower(), 'tradeDate':str(self.tradeDate)} tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) eodFileName = re.sub( tempGrp, lambda m:myParamsDict[m.group()], self.eodFilePattern) if self.debugFlag: print "eodFileName = ", eodFileName self.finraManifestFile = self.m_mktConfigDict["ENV"]["donefile_dir"] + "/" + eodFileName if self.debugFlag: print "self.finraManifestFile = ", self.finraManifestFile with open(self.finraManifestFile,"w") as finraMnFH: finraMnFH.write("# AWS RunID : {}\n".format(str(self.runID))) finraMnFH.write("# Dataset : {0} , TradeDate : {1}\n".format(str(self.datasetName),str(self.tradeDate))) finraMnFH.write("total_compressed={}\n".format(self.totalSize)) finraMnFH.write("total_rows={}\n".format(self.totalRows)) finraMnFH.write("no of files={}\n".format(self.fileCount)) for key,val in self.fileDict.items(): finraMnFH.write("file_{0}={1}\n".format(str(key),val[1])) finraMnFH.write("file_{0}_rows={1}\n".format(str(key),val[3])) finraMnFH.write("# Data Attributes\n") for key,val in self.defaultsDict.items(): finraMnFH.write("{0}".format(str(val))) return 0 except Exception as exp: self.m_logger.error("Failed while creating AWS Done file " + self.finraManifestFile + " with Error = " + str(exp)) return 1 def processLoader(self, manifestFile, datasetName, tidalRunID): """ Purpose - Function responsible for reading the manifest file, get market name, call multiprocess load and other db calls :param manifestFile: Manifest File :param tradeDate: Trade Date :param tidalRunID: Tidal Run ID :return: """ try: # Read the manifest filename and get the suffix i.e. datasetname # Assumption - Manifest file format - manifest.<datasetName>.<tradeDate>.<tidalRunID> # Program will break otherwise self.datasetName = datasetName self.tidalRunID = tidalRunID # DB_CALL # Make database call sp_ddy_get_market_info(datasetname) and get market info mktName = '' self.manifestFile = self.m_configDict["ENV"]["manifestfile_dir"] + "/" + manifestFile ##Validate Manifest file if not os.path.isfile(self.manifestFile): self.m_logger.error("Invalid manifest file " + self.manifestFile) sys.exit(1) if self.debugFlag: print "Inside processLoader" print "DatasetName = ", self.datasetName print "ManifestFile = ", manifestFile print "Self ManifestFile = ", self.manifestFile print "TidalRunID = ", self.tidalRunID print "DebugFlag = ", self.debugFlag print "confDict = ", self.m_configDict # Enable this one the proc to get mkt name and default file are ready and test it tempSql = self.m_configDict["SQL"]["get_mkt_defaults_filename"] myParamsDict = { 'datasetName' : self.datasetName } tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if self.debugFlag: print "tempSql = ", tempSql print "myParamsDict = ", myParamsDict print "mySql = ", mySql print "returnStr = ", returnStr if returnStr[0] == '0': mktName = returnStr[2].strip() self.defaultsFile = returnStr[3].strip() else: self.m_logger.info("Retry after delay., Unable to get market info from the database using sql " + mySql + ". Error = " + returnStr[1]) local_delay_time = int(self.m_configDict["SQL"]["delay_time"]) time.sleep(local_delay_time) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if returnStr[0] == '0': mktName = returnStr[2].strip() self.defaultsFile = returnStr[3].strip() else: self.m_logger.error("Unable to get market info from the database using sql " + mySql + ". Error = " + returnStr[1]) sys.exit(1) if self.debugFlag: print "MktName from DB = ", mktName print "Defaults = ", self.defaultsFile #Build the string for mktConfigFile based on mktName and configFile info self.mktConfigFile = os.path.dirname(self.configFile) + '/' + os.path.basename(self.configFile).split('.',1)[0].strip() + '_' + mktName.lower() + '.' + os.path.basename(self.configFile).split('.',1)[1].strip() if self.debugFlag: print("mktConfigFile = ", self.mktConfigFile) #Validate Manifest file is a valid file if not os.path.isfile(self.mktConfigFile): self.m_logger.error("Invalid market manifest file " + self.mktConfigFile) sys.exit(1) # Read Market specific config file and store it in a specific dictionary self.readMktConfigFile(self.mktConfigFile) if self.debugFlag: print("m_mktConfigDict=",self.m_mktConfigDict) # Read the contents of manifest - dataFileNames into a list - Will validate the datafiles as well localManifest = Manifest() manifestDelim = self.m_configDict["ENV"]["manifest_delim"] manifestFileList = localManifest.readManifest(self.manifestFile, self.m_logger, manifestDelim, self.debugFlag) # Get RunID self.runID = generate_runId() if self.debugFlag: print("RunID = ", self.runID) #print("manifestFileList = ", manifestFileList) #Call Oracle fn to insert status 'P' into TB_DDY_DATASET_TRANS with RUNID etc #DB_CALL # Make database call sp_ddy_insert_dataset_trans and insert data that process started mySql = "" myParams = "" tempSql = self.m_configDict["SQL"]["put_dataset"] pStatus = 'P' myParamsDict = {'datasetName':self.datasetName, 'runID': str(self.runID), 'tDate':str(self.tradeDate), 'status': pStatus, 'tidalRunID':str(self.tidalRunID)} tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if self.debugFlag: print "tempSql = ", tempSql print "myParamsDict = ", myParamsDict print "mySql = ", mySql print "returnStr = ", returnStr if returnStr[0] != '0': self.m_logger.info("Retry after delay., Unable to put dataset info into the database using sql " + mySql + ". Error = " + returnStr[1]) local_delay_time = int(self.m_configDict["SQL"]["delay_time"]) time.sleep(local_delay_time) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if returnStr[0] != '0': self.m_logger.error("Unable to put dataset info into the database using sql " + mySql + ". Error = " + returnStr[1]) sys.exit(1) # Insert Manifest data in db and call multiprocessing s3 loader process. Shd we add RUN_ID to manifest table #For each datafile, generate fileID and call loadData fn using multiprocess to load data into AWS for dataRecord in manifestFileList: mySql = "" myParams = "" tempSql = self.m_configDict["SQL"]["put_manifest"] myParamsDict = {"datasetName":self.datasetName, "runID": str(self.runID), "tDate":str(self.tradeDate), "dataFileName":dataRecord[1], "manifestFileName":manifestFile , "noOfRecords": str(dataRecord[3]), "fileSize":str(dataRecord[2]), "tidalRunID":str(self.tidalRunID)} tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) #if self.debugFlag: #print "tempSql = ", tempSql #print "myParamsDict = ", myParamsDict #print "mySql = ", mySql #print "returnStr = ", returnStr if returnStr[0] != '0': self.m_logger.info("Retry after delay., Unable to put manifest info into the database using sql " + mySql + ". Error = " + returnStr[1]) local_delay_time = int(self.m_configDict["SQL"]["delay_time"]) time.sleep(local_delay_time) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if returnStr[0] != '0': self.m_logger.error("Unable to put manifest info into the database using sql " + mySql + ". Error = " + returnStr[1]) sys.exit(1) # Initialize S3 object and get FINRA cloud service token and establish s3 session self.currentEpochTime = int(time.time()) self.s3object = S3(self.mktConfigFile, self.m_logger, self.debugFlag) self.s3object.getToken() if self.debugFlag: print "self.currentEpochTime = ", self.currentEpochTime process_count = int(self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max"]) fileID=1 fileIDQueue = Queue() dbFlag=1 procs = [] doneCounter = 0 sendCounter = 0 processFlag=0 failureFlag=0 #Get chunk size from config file for multipart uploads self.bytes_per_chunk = int(self.m_configDict["DATASET"]["bytes_per_chunk"]) self.sodFilePatternSearch = self.m_configDict["ddy"]["SOD_FILE_PATTERN_SEARCH"] # Following variables are used across the class. Hence, assigned to self variables self.eodFilePattern = self.m_configDict["ddy"]["EOD_FILE_PATTERN"] self.compFilePattern = self.m_configDict["ddy"]["COMP_FILE_PATTERN"] self.sodFileCheck = self.m_configDict["ddy"]["SOD_FILE_CHECK"].strip().upper() manifestListItems = len(manifestFileList) if self.debugFlag: print "bytes_per_chunk = ", self.bytes_per_chunk print "self.sodFilePatternSearch = ", self.sodFilePatternSearch print "self.eodFilePattern = ", self.eodFilePattern print "self.compFilePattern = ", self.compFilePattern print "self.sodFileCheck = ", self.sodFileCheck print "manifestListItems = ", manifestListItems dataFileFlag=False sodFileProcessedFlag=0 max_batches= int(math.ceil(float(len(manifestFileList))/process_count)) batch_count=0 while doneCounter < manifestListItems and failureFlag == 0 : while sendCounter < manifestListItems and sendCounter - doneCounter < process_count and failureFlag == 0: if self.sodFilePatternSearch in manifestFileList[sendCounter][1] and not sodFileProcessedFlag: dataFileFlag=False sodFileLoadStatus=self.loadData(manifestFileList[sendCounter][1] ,fileID, fileIDQueue, dbFlag, dataFileFlag, 0) if sodFileLoadStatus: self.m_logger.error("Unable to push Start of Day file to FINRA. Exiting.. ") sys.exit(1) sodFileProcessedFlag=1 sendCounter += 1 qFileID = 0 qRestult = 0 qFileID, qResult = fileIDQueue.get() doneCounter += 1 fileID += 1 else: if self.sodFileCheck == 'Y': if not sodFileProcessedFlag: self.m_logger.error("No Start of day file. Please add SOD file to the generate manifest. Exiting.. ") sys.exit(1) dataFileFlag=True if self.debugFlag: print "manifestFileList[sendCounter][1]", manifestFileList[sendCounter][1], "fileID = ", fileID processHandle = Process(target=Loader.loadData, args=(self, manifestFileList[sendCounter][1],fileID, fileIDQueue, dbFlag, dataFileFlag, manifestFileList[sendCounter][3])) processFlag=1 s3TimeoutTime = int(self.m_configDict["ddy"]["S3_TIMEOUT_TIME"]) if ((int(time.time()) - self.currentEpochTime) > s3TimeoutTime): self.currentEpochTime = int(time.time()) self.m_logger.info("Getting New Token for Batch : {0}, Max batches : {1}".format(batch_count,max_batches)) if self.debugFlag: print "Inside get new token - self.currentEpochTime = ", self.currentEpochTime self.s3object.getToken() threadDelayTime = int(self.m_configDict["ddy"]["THREAD_DELAY_TIME"]) time.sleep(threadDelayTime) processHandle.start() procs.append(processHandle) sendCounter += 1 fileID += 1 if processFlag and ( sendCounter - doneCounter == process_count or sendCounter == manifestListItems ) : batch_count += 1 self.m_logger.info("Waiting for Batch : {0} to complete. No of active workers : {2}. Max batches : {1}".format(batch_count,max_batches,sendCounter-doneCounter)) for p in procs: p.join() processFlag=0 if self.debugFlag: print "Before fileIDQueue - ProcessFlag = ", processFlag, "sendCounter = ", sendCounter, "doneCounter = ", doneCounter, "manifestListItems = ", manifestListItems while not fileIDQueue.empty(): # process completed results as they arrive qFileID = 0 qRestult = 0 qFileID, qResult = fileIDQueue.get() if self.debugFlag: print("qFileID = ", qFileID, "qResult = ", qResult) doneCounter += 1 if qResult: failureFlag = 1 if self.debugFlag: print "After fileIDQueue - ProcessFlag = ", processFlag, "sendCounter = ", sendCounter, "doneCounter = ", doneCounter, "manifestListItems = ", manifestListItems, "failureFlag = ", failureFlag if failureFlag: break #Check to see if specified time has passed. If so get another token to avoid expiration. Required for large datasets if self.debugFlag: print 'Memory usage: %s (kb)' % resource.getrusage(resource.RUSAGE_SELF).ru_maxrss print "self.currentEpochTime = ", self.currentEpochTime print "Current Time in Epoch = ", int(time.time()) # End of else #End of Inner While #End of Outer While if failureFlag: pStatus = 'F' else: pStatus = 'S' """ #Generate FINRA Manifest file and Push it to AWS """ # Call Divakar's finra manifest generate function returnValue = self.createFinraManifestFile(self.manifestFile) if self.debugFlag: print "Post createFinraManifestFile fn - return value= ", returnValue if returnValue: self.m_logger.error("Unable to generate done file. Please fix the issue the re-run the load") #sys.exit(1) failureFlag=1 pStatus = 'F' else: dbFlag=0 fileID=0 # Call the loader function with the manifest file finraManifestLoadStatus=0 dataFileFlag=False finraManifestLoadStatus=self.loadData(self.finraManifestFile ,fileID, fileIDQueue, dbFlag, dataFileFlag, 0) if finraManifestLoadStatus: pStatus = 'F' self.m_logger.error("Unable to load finra manifest file ") #Call Oracle fn to insert status 'S' into TB_DDY_DATASET_TRANS with RUNID etc #DB_CALL # Make database call sp_ddy_insert_dataset_trans and insert data based on Failure or Success mySql = "" myParams = "" tempSql = self.m_configDict["SQL"]["put_dataset"] myParamsDict = {"datasetName":self.datasetName, "runID": str(self.runID), "tDate":str(self.tradeDate), "status": pStatus, "tidalRunID":str(self.tidalRunID)} tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys()) ) mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if self.debugFlag: print "tempSql = ", tempSql print "myParamsDict = ", myParamsDict print "mySql = ", mySql print "returnStr = ", returnStr if returnStr[0] != '0': self.m_logger.info("Retry after delay., Unable to put dataset info into the database using sql " + mySql + ". Error = " + returnStr[1]) local_delay_time = int(self.m_configDict["SQL"]["delay_time"]) time.sleep(local_delay_time) returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql) if returnStr[0] != '0': self.m_logger.error("Unable to put dataset info into the database using sql " + mySql + ". Error = " + returnStr[1]) sys.exit(1) if failureFlag: self.m_logger.error("Load failed") sys.exit(1) except Exception as e: self.m_logger.error("ProcessLoader failed with error " + str(e)) sys.exit(1)
class Process(): # class variables # lock, logger and loader type m_lock = "" m_logger = "" m_loaderType = "" # Adama PG object m_adamapg = "" # Picard PG object m_picardpg = "" # Process details process_name = "" def __init__(self, configFile): """ Purpose: Constructor :param self: class object itself :param configFile: Configuration file to use """ # Initialize global logger object self.m_logger = Logger(logging.INFO, configFile) self.m_logger.addFileHandler(logging.DEBUG) try: # Add generic information fname = inspect.getfile(inspect.currentframe()) fpath = os.path.dirname(os.path.abspath(fname)) self.m_logger.addGenericInfo(fpath + "/" + fname) # export all the values from config into environment configObject = configuration(configFile, True) # Create Adama replica PG db object self.m_adamapg = Postgre(os.environ['adama_pg'], self.m_logger) # Create Picard Postgres Datamart object self.m_picardpg = Postgre(os.environ['picard_pg'], self.m_logger) # Create lock for the process self.m_lock = Lock(os.environ['LOCK_FILE'], self.m_logger) # loader type self.m_loaderType = self.getloaderType() # process name self.process_name = os.environ['process_name'] self.m_logger.info("Initializing the process, %s" % self.process_name) except Exception, e: self.m_logger.error( "ERROR: Unable to initialize the process due to: %s" % str(e)) self.updateProcessStatus("F") if self.m_adamapg: self.m_adamapg.closeConnection() if self.m_picardpg: self.m_picardpg.closeConnection() if self.m_lock: self.m_lock.remove() sys.exit("ERROR: Unable to initialize the process due to: %s" % str(e))