Esempio n. 1
0
class Process():
    # class variables

    # lock, logger and loader type
    m_lock = ""
    m_logger = ""
    m_loaderType = ""

    # Picard PG object
    m_report_pg = ""
    m_report_nz = ""

    def __init__(self, configFile):
        """ Purpose: Constructor

        :param self:        class object itself
        :param configFile:  Configuration file to use

        """
        # Initialize global logger object
        self.m_logger = Logger(logging.INFO, configFile)
        self.m_logger.addFileHandler(logging.DEBUG)
        try:
            # Add generic information
            fname = inspect.getfile(inspect.currentframe())
            fpath = os.path.dirname(os.path.abspath(fname))
            self.m_logger.addGenericInfo(fpath + "/" + fname)

            # Create NZ Data Warehouse object
            self.m_report_nz = Netezza(configFile, self.m_logger)
            self.m_report_nz.initDatabase(os.environ['NZ_USER'],
                                          os.environ['NZ_PASSWD'],
                                          os.environ['NZ_HOST'],
                                          os.environ['NZ_DATABASE'],
                                          os.environ['NZ_PORT'])

            # Create Picard Postgres Datamart object
            self.m_report_pg = Postgre(configFile, self.m_logger)

            # Create lock for the process
            self.m_lock = Lock(os.environ['LOCK_FILE'], self.m_logger)

            # pre and post processing dictionaries initialization
            self.sql_process_dict = defaultdict(list)

        except Exception, e:
            self.m_logger.error(
                "ERROR: Unable to initialize the process due to: %s" % str(e))
            if self.m_reportpg:
                self.m_reportpg.closeConnection()
            if self.m_lock:
                self.m_lock.remove()
            sys.exit("ERROR: Unable to initialize the process due to: %s" %
                     str(e))
Esempio n. 2
0
class Process():
    # class variables

    # lock, logger and loader type
    m_lock = ""
    m_logger = ""
    m_loaderType = ""

    # Picard PG object
    m_report_pg = ""
    m_report_nz = ""

    def __init__(self, configFile):
        """ Purpose: Constructor

        :param self:        class object itself
        :param configFile:  Configuration file to use

        """
        # Initialize global logger object
        self.m_logger = Logger(logging.INFO, configFile)
        self.m_logger.addFileHandler(logging.DEBUG)
        try:
            # Add generic information
            fname = inspect.getfile(inspect.currentframe())
            fpath = os.path.dirname(os.path.abspath(fname))
            self.m_logger.addGenericInfo(fpath + "/" + fname)

	    # Create NZ Data Warehouse object
            self.m_report_nz = Netezza(configFile, self.m_logger)
            self.m_report_nz.initDatabase(os.environ['NZ_USER'], os.environ['NZ_PASSWD'], os.environ['NZ_HOST'], os.environ['NZ_DATABASE'], os.environ['NZ_PORT'])

            # Create Picard Postgres Datamart object
            self.m_report_pg = Postgre(configFile, self.m_logger)

            # Create lock for the process
            self.m_lock = Lock(os.environ['LOCK_FILE'], self.m_logger)

            # pre and post processing dictionaries initialization
            self.sql_process_dict = defaultdict(list)

        except Exception, e:
            self.m_logger.error("ERROR: Unable to initialize the process due to: %s" % str(e))
            if self.m_reportpg:
                self.m_reportpg.closeConnection()
            if self.m_lock:
                self.m_lock.remove()
            sys.exit("ERROR: Unable to initialize the process due to: %s" % str(e))
Esempio n. 3
0
def main():
    #log = Logger(logging.ERROR, "/Users/rnarayan/apps/gds_arch/ICE/common/conf/test.ini")
    log = Logger(logging.ERROR, "/cif/PY/apps/gds_arch/ICE/common/conf/test_ram_nyse.ini")
    log.addFileHandler(logging.INFO)
    log.addGenericInfo(__file__)

    #s3object = S3("/Users/rnarayan/apps/gds_arch/ICE/common/conf/test.ini", log)
    s3object = S3("/cif/PY/apps/gds_arch/ICE/common/conf/test_ram_nyse.ini", log, True)
    s3object.getToken()
    sourceFileWthPath =  s3object.m_configFile["S3"]["source_file"]
    print("sourceFileWthPath =", sourceFileWthPath)

    targetFileWthPath = os.path.join(s3object.m_configFile["S3"]["path"], os.path.basename(sourceFileWthPath))
    print("targetFileWthPath =", targetFileWthPath)
    targetBucket = s3object.m_configFile["S3"]["bucket"]
    print("targetBucket =", targetBucket)
    encryptKeyFlag = s3object.m_configFile["S3"]["encrypt_key"]
    print("encryptKeyFlag =", encryptKeyFlag)
    bytes_per_chunk = 524288000
    
    #s3object.loadDataSinglePart(sourceFileWthPath, targetFileWthPath, targetBucket, encryptKeyFlag )
    s3object.loadDataMultiPart(sourceFileWthPath, targetFileWthPath, targetBucket, encryptKeyFlag, bytes_per_chunk)
Esempio n. 4
0
File: dea.py Progetto: tnraman/ddy
class DEAExtractor():
    #class variables
    m_logger = ""

    #database objects
    m_oracle_db = ""

    #def __init__(self, configFile, mktName, processingDate, debugFlag, forceFlag):
    def __init__(self, configFile, mktName, processingDate, debugFlag):
        """
        Purpose: Constructor
        :param self:        class object itself
        :param configFile:  Configuration file to use
        """

        # Initialize m_logger object from class Logger and add Header to the log, using addGenericInfo function
        self.m_logger = Logger(logging.INFO, configFile, processingDate)
        self.m_logger.addFileHandler(logging.DEBUG)
        self.m_logger.addGenericInfo(__file__)
        
        self.processingDate = processingDate
        self.debugFlag = debugFlag
        #self.forceFlag = forceFlag
        self.configFile = configFile
        self.mktName = mktName

        try:
            # Get configuration to a dictionary
            self.m_configDict = configuration(self.configFile, True).m_dictionary
            
            #Initialize Oracle instance along with connection
            self.m_oracle_db = Oracle(self.m_configDict, self.m_logger)

        except Exception as exp:
                   # An exception occurred
            self.m_logger.error("Unable to initialize the configuration " + str(exp))
            print("ERROR: Unable to initialize the configuration for logger " + str(exp))
            sys.exit(1)

    def readMktConfigFile(self, mktConfigFile):
        """
        Purpose - To read the content of mktConfigFile into the global dictionary m_mktConfigDict for reference
        :param mktConfigFile:
        :return:
        """
        try:
            self.m_mktConfigDict = configuration(mktConfigFile, True).m_dictionary
        except Exception as exp:
                   # An exception occurred
            self.m_logger.error("Unable to initialize the configuration for logger " + str(exp))
            print("ERROR: Unable to initialize the configuration for logger " + str(exp))
            sys.exit(1)

    def chkActiveLoads(self):
        """
        Purpose - To check the count of active Active loads happening at a given point
        :param None: None at this point
        :return:
        """
        try:
            if self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_check_flag"] == 'Y': 
                localActiveLoadMax = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max"]
                localActiveLoadWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_wait_time"]
                localActiveLoadMaxWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max_wait_time"]

                mySql = ""
                myParams = ""
                mySql = self.m_configDict["SQL"]["get_active_loads"]
    
                activeFlag=1
                totalActiveWaitTime=0
                while activeFlag:
                    returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                    if self.debugFlag:
                        print "mySql = ", mySql
                        print "returnStr = ", returnStr
                        print "chkActiveLoads - Active Loads value = ", int(returnStr[1].strip())
                    if returnStr[0] != '0':
                        self.m_logger.error("Unable to get active loads using sql " + mySql + ". Error = " + returnStr[1])
                        sys.exit(1)

                    #Check if actual active loads is <= configured active loads.  If so, return out of the fn
                    if int(returnStr[1].strip()) <= localActiveLoadMax:
                        activeFlag=0
                        return 0

                    #Sleep for time defined by configured value for "active_load_wait_time"
                    time.sleep(localActiveLoadWaitTime)
                    totalActiveWaitTime += localActiveLoadWaitTime

                    #Check if actual Total wait time is > configured total wait time.  If so, throw an error and exit
                    if totalActiveWaitTime > localActiveMaxWaitTime:
                        self.m_logger.error("In Fn chkActiveLoads.  Total Actual Wait Time exceeds the configured value active_load_max_wait_time.  Either cleanup orphaned loads or increase the either active_load_max or active_load_max_wait_time. totalActiveWaitTime = " + str(totalActiveWaitTime) + " localActiveMaxWaitTime=" + str(localActiveMaxWaitTime))
                        return 1
            else:
                return 0

            #Return failure
            return 1
                    
        except Exception as exp:
            self.m_logger.error("Failure in chkActiveLoads process for file with the error " + str(exp))
            sys.exit(1)

    def chkRaceStatus(self):
        """
        Purpose - To check if a load is already running for the given dataset
        :param None: None at this point
        :return:
        """
        try:
            if self.m_mktConfigDict["RACE"]["race_status_check_flag"] == 'Y':
                localRaceStatusWaitTime = int(self.m_mktConfigDict["RACE"]["race_status_wait_time"])
                localRaceStatusMaxWaitTime = int(self.m_mktConfigDict["RACE"]["race_status_max_wait_time"])

                mySql = ""

                #myParams = {"datasetName":self.datasetName}
                tempSql = self.m_configDict["SQL"]["get_race_status"]
                myParamsDict = { 'datasetName' : self.datasetName }
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
    
                raceFlag=1
                totalRaceStatusWaitTime=0
                while raceFlag:
                    returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                    if self.debugFlag:
                        print "tempSql = ", tempSql
                        print "myParamsDict = ", myParamsDict
                        print "mySql = ", mySql
                        print "returnStr = ", returnStr
                        print "chkRaceStatus - ReturnCode = ", int(returnStr[1].strip())
                    if returnStr[0] != '0':
                        self.m_logger.error("Unable to get race status using sql " + mySql + ". Error = " + returnStr[1])
                        sys.exit(1)

                    #Check if the load for this dataset is already running. If not, exit out of the function with normal return value
                    if int(returnStr[1].strip()) <= 1:
                        raceFlag=0
                        return 0

                    #Check if actual Total wait time is > configured total wait time.  If so, throw an error and exit


                    #Sleep for time defined by configured value for "active_load_wait_time"
                    time.sleep(localRaceStatusWaitTime)
                    #time.sleep(90)
                    totalRaceStatusWaitTime += localRaceStatusWaitTime
                    if self.debugFlag:
                        print "totalRaceStatusWaitTime = ", totalRaceStatusWaitTime, "localRaceStatusWaitTime =", localRaceStatusWaitTime
                    if totalRaceStatusWaitTime > localRaceStatusMaxWaitTime:
                        self.m_logger.error("In Fn chkRaceStatusLoads.  Total Actual Wait Time exceeds the configured value race_status_max_wait_time.  Either check if the Dataset is getting loaded or increase the either active_load_max or active_load_max_wait_time. totalRaceStatusWaitTime = " + str(totalRaceStatusWaitTime) + " localRaceStatusMaxWaitTime=" + str(localRaceStatusMaxWaitTime))
                        return 1
            else:
                return 0

            #Return failure
            return 1
                    
        except Exception as exp:
            self.m_logger.error("Failure in chkRaceStatus process for file with the error " + str(exp))
            sys.exit(1)

    def extractData(self,localDataFile, localFileID, localFileIDQueue, localDBFlag):
        """
        Purpose - To extract the given datafile from the S3 bucket specified in the global mktConfigFile
        :param localDataFile: Data Filename
        :param localFileID: Internal File ID assigned to the local datafile
        :param localFileIDQueue: Queue in which, results of the operation is stored
        :param localDBFlag: Flag indicating if database should be used or not
        :return:
        """
        try:
            if self.debugFlag:
                print "Inside extractData function"
                print "localDataFile = ", localDataFile

            if localDBFlag:
                """ Not sure if we need Race Status check for Extract
                raceStatusReturnValue=self.chkRaceStatus()
                if self.debugFlag:
                    print "raceStatusReturnValue=", raceStatusReturnValue

                if raceStatusReturnValue:
                    self.m_logger.error("Failure value returned by chkRaceStatus fn.  Return value = " + str(raceStatusReturnValue))
                    localFileIDQueue.put((localFileID, raceStatusReturnValue))
                    return 1
                """

                """  Need to integrate Active loads with tb_dxt_process_status and tb_dxt_process_status ?
                activeLoadsReturnValue=self.chkActiveLoads()
                if activeLoadsReturnValue:
                    self.m_logger.error("Failure value returned by chkRaceStatus fn.  Return value = " + str(activeLoadsReturnValue))
                    localFileIDQueue.put((localFileID, raceStatusReturnValue))
                    return 1
                """

                processID = os.getpid()
                hostName = socket.gethostname()

                #Insert Process status into Oracle db
                #DB_CALL - sp_dxt_insert_process_status(RUNID, FILE_ID, etc)
                mySql = ""
                myParams = ""
                tempSql = self.m_configDict["SQL"]["put_process_status"]
                pStatus = 'P'
                pComment = 'Load started'
                # Keep the below vars 0 for now
                localDataFileSize=0
                localDataFileRecordCount=0

                myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileName":localDataFile, "tDate":str(self.processingDate), "processID":str(processID), "hostName":hostName, "fileSize": str(localDataFileSize), "recordCount" : str(localDataFileRecordCount), "status":pStatus , "lcomment":pComment}
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)

                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

                if self.debugFlag:
                    print "tempSql = ", tempSql
                    print "myParamsDict = ", myParamsDict
                    print "mySql = ", mySql
                    print "returnStr = ", returnStr
    
                if returnStr[0] != '0':
                    self.m_logger.error("Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1])
                    return 1

            #Here localFileWthPath is the local stage dir with file name
            localFileWthPath = self.m_configDict["ENV"]["stage_dir"] + "/" + localDataFile

            targetFolder = self.s3object.m_configFile["S3"]["path"]
            targetFileDir = targetFolder + self.processingDate + "/" 
            
            #Here targetFileWthPath is the AWS dir with file name
            targetFileWthPath = os.path.join(targetFileDir, os.path.basename(localDataFile))
            targetBucket = self.s3object.m_configFile["S3"]["bucket"]
            encryptKeyFlag = self.s3object.m_configFile["S3"]["encrypt_key"]

            localAWSRetries = int(self.m_mktConfigDict["ENV"]["aws_retries"])
            if self.debugFlag:
                print("localFileWthPath =", localFileWthPath)
                print("targetFileWthPath =", targetFileWthPath)
                print("targetBucket =", targetBucket)
                print("encryptKeyFlag =", encryptKeyFlag)
                print("localAWSRetries =", localAWSRetries)
            initCount = 0
            while (initCount < localAWSRetries):
                extractReturnValue = 0

                #Call s3.data download to extract the manifest file (single part load)
                #extractReturnValue = self.s3object.getDataSinglePart(localFileWthPath, targetFileWthPath, targetBucket, encryptKeyFlag )
                extractReturnValue = self.s3object.getDataSinglePart(localFileWthPath, targetFileWthPath, targetBucket)

                if self.debugFlag:
                    print "extractReturnValue = ", extractReturnValue

                if int(extractReturnValue) == 0:
                    pStatus = 'S'
                    pComment = 'Extract completed'
                    break
                else:
                    pStatus = 'F'
                    pComment = 'Extract failed'
                    initCount += 1

            # Get the size of the file downloaded 
            localFileSize = os.stat(localFileWthPath).st_size

            # Check if the downloaded file size is matching with what is mentioned in manifest file.  If not mark it as failed
            # Following check is commented as we don't have any manifest file to cross check size
#            if localFileSize != localDataFileSize:
#                pStatus = 'F'
#                pComment = 'Actual file size != Manifest file size'

            localRecordCount = 0
 
            if localDBFlag:
                #Call DB to insert 'S' or 'F' in tb_dxt_process_status
                #localFileIDQueue.put((localFileID, extractReturnValue))
                mySql = ""
                myParams = ""
                tempSql = self.m_configDict["SQL"]["put_process_status"]
                #myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileID":str(localFileID), "fileName":localDataFile, "tDate":str(self.processingDate), "processID":str(processID), "hostName":hostName, "fileSize": localFileSize, "recordCount" : localRecordCount, "status":pStatus , "lcomment":pComment}
                myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileName":localDataFile, "tDate":str(self.processingDate), "processID":str(processID), "hostName":hostName, "fileSize": str(localFileSize), "recordCount" : str(localRecordCount), "status":pStatus , "lcomment":pComment}
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                if self.debugFlag:
                    print "tempSql = ", tempSql
                    print "myParamsDict = ", myParamsDict
                    print "mySql = ", mySql
                    print "returnStr = ", returnStr

                if returnStr[0] != '0':
                    self.m_logger.error("Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1])
                    return 1

            localFileIDQueue.put((localFileID,extractReturnValue))
            return extractReturnValue
      
        except Exception as exp:
            self.m_logger.error("Failure in extractData process for file with the error " + str(exp))
            if localDBFlag:
                localFileIDQueue.put((localFileID, 1))
            else:
                return 1

    def getRecords(self, fileDict, startDateTime, endDateTime):
        """
        Purpose - Function to sort the dictionary based on the key and return a sorted list
        :param fileDict : Dictionary containing Last_modified Date and file name
        :param startDateTime : Start DateTime in the format '2016-02-01 00:00:00'
        :param endDateTime : End DateTime in the format '2016-02-10 00:00:00'
        """
        try:
            # No pattern to search for
            #patternToSearch =  self.m_configDict["ENV"]["pattern_to_search"]
            
            if self.debugFlag:
                print "fileDict = ", fileDict
                #print "patternToSearch = ", patternToSearch
                print "startDateTime = ", startDateTime
                print "endDateTime = ", endDateTime
            sorted_values = sorted(fileDict.values())
            start = bisect.bisect_left(sorted_values, startDateTime)
            end = bisect.bisect_right(sorted_values, endDateTime)
            if self.debugFlag:
                print "sorted_values = ", sorted_values
                print "start = ", start
                print "end = ", end
            for fileItem in sorted(fileDict.iteritems())[start:end]:
                # No pattern to search for in DEA
                #if patternToSearch in fileItem[0]:
                    #yield fileItem[0]
                yield fileItem[0]
                if self.debugFlag:
                    print "fileItem[0] = ", fileItem[0]

        except Exception as exp:
            self.m_logger.error("Failed while executing getRecords to sort the dictionary content of dictionary  with Error = " + str(exp))
            sys.exit(1)

    def readManifestFile(self, manifestFileName):
        """
        Purpose - To read the content of Finra's manifest file stored in key-value pair into Nested dictionary 
        :param manifestFileName : Finra's manifestFileName containing data filenames, file size & no of rows
        """
        try:
            manifestRecordStartPattern = self.m_configDict["dxt"]["MANIFEST_RECORD_START_PATTERN"]

            if self.debugFlag:
                print "manifestRecordStartPattern =", manifestRecordStartPattern

            with open(manifestFileName) as infile:
                manifestFileDict = {}
                file = None
                line_count = 0
                for line in infile:
                    line = line.strip()
                    if line.startswith(manifestRecordStartPattern):
                        line_count += 1
                        file = line_count
                        manifestFileDict[file] = {}
                    var, val = line.split(':',1)
                    manifestFileDict[file][var.strip()] = val.strip()

            if self.debugFlag:
                print "manifestFileDict = ", manifestFileDict

            return manifestFileDict

        except Exception as exp:
            self.m_logger.error("Failed while executing readManifestFile to get FINRA manifest file into nested dictionary, Error = " + str(exp))
            sys.exit(1)

    def getFileList(self, startDateTime, endDateTime, s3Bucket, s3Path, folderPosition):
        """
        Purpose - Function to sort the dictionary based on the key and return a sorted list
        :param startDateTime : Start DateTime in the format '2016-02-01 00:00:00'
        :param endDateTime : End DateTime in the format '2016-02-10 00:00:00'
        """
        try:
            if self.debugFlag:
                print "s3Bucket = ", s3Bucket
                print "s3Path = ", s3Path
                print "startDateTime = ", startDateTime
                print "endDateTime = ", endDateTime
                print "folderPosition = ", folderPosition
                
            fileListDict = self.s3object.listBucketWPathByLastModified(s3Bucket, s3Path, folderPosition)
            if self.debugFlag:
                print "fileListDict = ", fileListDict
           
            #endDateTime = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            fileList = list(self.getRecords(fileListDict, startDateTime, endDateTime))

            if self.debugFlag:
                print "fileListDict = ", fileListDict
                print "fileList = ", fileList

            return fileList
        except Exception as exp:
            self.m_logger.error("Failed while creating AWS manifest file list with Error = " + str(exp))
            return 1

    def processDEAExtractor(self):
        """
        Purpose - Function responsible for getting the AWS token and reading the last modified date in DB and fetch the list of files from AWS to be processed
        :param : None
        :return:
        """
        try:
            # DB_CALL
            # Make database call sp_dxt_validate_mktName(mktName) to validate mktName

#            tempSql = self.m_configDict["SQL"]["validate_market_name"]
#            myParamsDict = { 'mktName' : self.mktName.upper() }
#            tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
#            mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
#            returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
#
#            if self.debugFlag:
#                print "tempSql = ", tempSql
#                print "myParamsDict = ", myParamsDict
#                print "mySql = ", mySql
#                print "returnStr = ", returnStr
#
#            if returnStr[0] != '0':
#                self.m_logger.error("Invalid market name provided " + mySql + ". Error = " + self.mktName)
#                sys.exit(1)

            if self.debugFlag:
                print "MktName from DB = ", self.mktName
          
            #Build the string for mktConfigFile based on mktName and configFile info
            self.mktConfigFile = os.path.dirname(self.configFile) + '/' + os.path.basename(self.configFile).split('.',1)[0].strip() + '_' + self.mktName.lower() + '.' + os.path.basename(self.configFile).split('.',1)[1].strip()

            if self.debugFlag:
                print("mktConfigFile = ", self.mktConfigFile)

            #Validate Market Config file is a valid file
            if not os.path.isfile(self.mktConfigFile):
                self.m_logger.error("Invalid market manifest file " + self.mktConfigFile)
                sys.exit(1)

            # Read Market specific config file and store it in a specific dictionary
            self.readMktConfigFile(self.mktConfigFile)

            if self.debugFlag:
                print("m_mktConfigDict=",self.m_mktConfigDict)

            # Get RunID
            self.runID = generate_runId()
            if self.debugFlag:
                print("RunID = ", self.runID)

            # Initialize S3 object and get FINRA cloud service token and establish s3 session
            self.s3object = S3(self.mktConfigFile, self.m_logger, self.debugFlag)

            tokenRetryTimes = int(self.m_configDict["TOKEN"]["token_retry_times"])
            tokenRetryWaitTime = int(self.m_configDict["TOKEN"]["token_retry_wait_time"])

            deaFileWaitTime = int(self.m_configDict["dea"]["DEA_FILE_WAIT_TIME"])
            deaFileSleepTime = int(self.m_configDict["dea"]["DEA_FILE_SLEEP_TIME"])
            s3TimeoutTime = int(self.m_configDict["dea"]["S3_TIMEOUT_TIME"])

            #Not sure what to do.  Keep this for a place holder in the future, when FINRA manifest for zero byte files everyday
            handleNoDatafileFlag = self.m_configDict["dea"]["HANDLE_NO_DATAFILE_FLAG"]

            deaActualTime = 0

            # Download manifest files in the manifest file list to a specific folder from AWS
            localFileDir = self.s3object.m_configFile["ENV"]["stage_dir"]
            targetFolder = self.s3object.m_configFile["S3"]["path"]
            targetBucket = self.s3object.m_configFile["S3"]["bucket"]
            encryptKeyFlag = self.s3object.m_configFile["S3"]["encrypt_key"]
            folderPosition =  int(self.s3object.m_configFile["S3"]["folder_position"])
            targetFileDir = targetFolder + self.processingDate + "/" 

            if self.debugFlag:
                print "localFileDir = ", localFileDir
                print "targetFolder = ", targetFolder
                print "targetBucket = ", targetBucket
                print "encryptKeyFlag = ", encryptKeyFlag
                print "self.processingDate = ", self.processingDate
                print "targetFileDir = ", targetFileDir

            startDate = ((datetime.now() - relativedelta(years=1)) + timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S")
            endDate = (datetime.now() + timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S")

            getTokenFlag = 0

            fileExistFlag = 0

            while deaActualTime < deaFileWaitTime:
             
                # Get token only the first time or when the time exceed s3TimeoutTime
                if deaActualTime > s3TimeoutTime or not getTokenFlag:
                    getTokenFlag=1
                    initCount = 0
                    while (initCount < tokenRetryTimes):
                        tokenReturnCode = self.s3object.getToken()
                        if tokenReturnCode:
                            if initCount == tokenRetryTimes:
                                self.m_logger.error("Error: Exceeded the max retries " + tokenRetryTimes + " to get AWS Token from FINRA.  Please re-try after some time or escalate.. ")
                                sys.exit(1)
                            initCount += 1
                            time.sleep(tokenRetryWaitTime)
                        else:
                            break

                self.currentEpochTime = int(time.time())

                # Get the list of files from the AWS folder for the given processing date
                fileList = self.getFileList(startDate, endDate, targetBucket, targetFileDir, folderPosition) 

                if len(fileList):
                    if self.debugFlag:
                        print("fileList = ", fileList)
                    fileExistFlag=1
                    break

                time.sleep(deaFileSleepTime)
                deaActualTime += deaFileSleepTime

                if self.debugFlag:
                    print "deaActualTime =", deaActualTime
                    print "deaFileSleepTime =", deaFileSleepTime
                    print "deaFileWaitTime =", deaFileWaitTime
                self.m_logger.info("INFO : Waiting for file in FINRA's cloud, " + str(deaFileWaitTime - deaActualTime) + " secs remaining...")
                    
            # End of while

            tblName = self.m_mktConfigDict["dea"]["TARGET_TBL_NAME"] + "_" + self.mktName.upper()
            # If no files exists for the given day, create a zero byte data file and a manifest file for the same
            if not fileExistFlag:
                #deaDummyDataFile = self.m_configDict["dea"]["DEA_DUMMY_DATA_FILE"].replace("PDATE", self.processingDate)
                deaDummyDataFile = self.m_configDict["dea"]["DEA_DUMMY_DATA_FILE"].replace("PDATE", str(self.processingDate))
                deaDummyDataFileWthPath = self.m_configDict["ENV"]["stage_dir"] + "/" + deaDummyDataFile
                open(deaDummyDataFileWthPath,'a').close()
                fatlManifestFile = self.m_configDict["ENV"]["stage_dir"] + "/" + tblName + "." + self.processingDate + ".manifest"

                if self.debugFlag:
                    print "fileExistFlag = ", fileExistFlag
                with open(fatlManifestFile,"w") as fh:
                    fileSize = 0
                    if self.debugFlag:
                        print "deaDummyDataFileWthPath = ", deaDummyDataFileWthPath
                        print "fileSize = ", fileSize
                        print "tblName = ", tblName, "file = ", deaDummyDataFile, "fileSize = ", fileSize, "mktName = ", self.mktName
                    fh.write(tblName + "|" + deaDummyDataFile + "|" + str(fileSize) + "|" + "0" + "\n")
                self.m_logger.info("INFO : No File found for processing date " + self.processingDate + ". Creating zero byte data file " + deaDummyDataFileWthPath + " and manifest file " + fatlManifestFile)
                sys.exit(0)
                
            fileIDQueue = Queue()
            localAWSRetries = int(self.m_mktConfigDict["ENV"]["aws_retries"])

            # Insert a record into tb_dxt_dataset_trans with status 'P' for the given datasetName, saying that we start the process for this manifest file

            pStatus = 'P'
            # We decided to use tblName instead of dataset for DEA, as we don't have dataset concept or manifest files
            self.datasetName = tblName
            tempSql = self.m_configDict["SQL"]["put_dataset"]
            myParamsDict = {'datasetName':self.datasetName, 'runID': str(self.runID), 'tDate':str(self.processingDate), 'status': pStatus }
            tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
            mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
            returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

            if self.debugFlag:
                print "tempSql = ", tempSql
                print "myParamsDict = ", myParamsDict
                print "mySql = ", mySql
                print "returnStr = ", returnStr

            if returnStr[0] != '0':
                self.m_logger.error("Unable to insert into tb_dxt_dataset_trans table " + mySql + ". Error = " + self.datasetName + " for processing date = " + self.processingDate)
                sys.exit(1)

            fileID = 1
            dbFlag = 1
            fileIDQueue = Queue()
            procs = []
            doneCounter = 0
            sendCounter = 0
            failureFlag = 0

            process_count = int(self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max"])

            while doneCounter < len(fileList):
                while sendCounter < len(fileList) and sendCounter - doneCounter < process_count:
                    #print "Inside while loop"
                    #print "fileList = ", fileList
                    # Call fn extractData to fetch files from AWS.  Pass manifestFileDict[sendCounter] as it contains the whole record including the filename, filesize & row count
                    processHandle = Process(target=DEAExtractor.extractData, args=(self, fileList[sendCounter],fileID, fileIDQueue, dbFlag))
                    processFlag=1
                    if ((int(time.time()) - self.currentEpochTime) > s3TimeoutTime):
                        self.currentEpochTime = int(time.time())
                        self.m_logger.info("Getting New Token for Batch : {0}, Max batches : {1}".format(batch_count,max_batches))
                        if self.debugFlag:
                            print 'Memory usage: %s (kb)' % resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
                            print "self.currentEpochTime = ", self.currentEpochTime
                            print "Current Time in Epoch = ", int(time.time())
                        if self.debugFlag:
                            print "Inside get new token - self.currentEpochTime = ", self.currentEpochTime
                        initCount = 0
                        while (initCount < tokenRetryTimes):
                            tokenReturnCode = 0
                            tokenReturnCode = self.s3object.getToken()
                            if tokenReturnCode:
                                if initCount == tokenRetryTimes:
                                    self.m_logger.error("Error: Exceed the max retries " + tokenRetryTimes + " to get AWS Token from FINRA.  Please re-try after some time or escalate.. ")
                                    sys.exit(1)
                                initCount += 1
                                time.sleep(tokenRetryWaitTime)
                            else:
                                break

                    threadDelayTime = int(self.m_configDict["dea"]["THREAD_DELAY_TIME"])
                    time.sleep(threadDelayTime)

                    processHandle.start()
                    procs.append(processHandle)
                    sendCounter += 1
                    fileID += 1
                if processFlag:
                    for p in procs:
                        p.join()
                        procs=[]
                    processFlag=0
                while not fileIDQueue.empty():  # process completed results as they arrive
                    #time.sleep(3)
                    qFileID, qResult = fileIDQueue.get()
                    if self.debugFlag:
                        print("qFileID = ", qFileID, "qResult = ", qResult)
                    doneCounter += 1
                    if qResult:
                        failureFlag = 1
                if self.debugFlag:
                    print "ProcessFlag = ", processFlag, "sendCounter = ", sendCounter, "doneCounter = ", doneCounter
                if failureFlag:
                    break
                            
            if self.debugFlag:
                print "Failure Flag = ", failureFlag
    
            if failureFlag:
                pStatus = 'F'
            else:
                pStatus = 'S'
                fatlManifestFile = self.m_configDict["ENV"]["stage_dir"] + "/" + tblName + "." + self.processingDate + ".manifest"
                if self.debugFlag:
                    print "File List = ", fileList
                    print "fatlManifestFile =", fatlManifestFile
                with open(fatlManifestFile,"w") as fh:
                    counter = 0
                    for file in fileList:
                        sourceFileWthPath = self.m_configDict["ENV"]["stage_dir"] + "/" + file
                        fileSize = os.stat(sourceFileWthPath).st_size
                        if self.debugFlag:
                            print "sourceFileWthPath = ", sourceFileWthPath
                            print "fileSize = ", fileSize
                            print "tblName = ", tblName, "file = ", file, "fileSize = ", fileSize, "mktName = ", self.mktName
                        fh.write(tblName + "|" + file + "|" + str(fileSize) + "|" + "0" + "\n")
                        counter += 1

            # insert a record into tb_dxt_dataset_trans table with 'S' or 'F' record
    
            #Call Oracle fn to insert status 'S' into TB_DDY_DATASET_TRANS with RUNID etc
            #DB_CALL
            # Make database call sp_dxt_insert_dataset_trans and insert data based on Failure or Success
    
            mySql = ""
            myParams = ""
            tempSql = self.m_configDict["SQL"]["put_dataset"]
            
            myParamsDict = {"datasetName":self.datasetName, "runID": str(self.runID), "tDate":str(self.processingDate), "status": pStatus }
            tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
            mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
        
            returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
        
            if self.debugFlag:
                print "tempSql = ", tempSql
                print "myParamsDict = ", myParamsDict
                print "mySql = ", mySql
                print "returnStr = ", returnStr
       
            if returnStr[0] != '0':
                self.m_logger.error("Unable to insert into tb_dxt_dataset_trans table " + mySql + ". Error = " + self.datasetName + " for processing date = " + self.processingDate)
                sys.exit(1)
    
        except Exception as e:
            self.m_logger.error("processDEAExtractor failed with error " + str(e))
            sys.exit(1)
Esempio n. 5
0
class Loader():
    #class variables
    m_logger = ""

    #database objects
    m_oracle_db = ""
    m_netezza_db = ""

    def __init__(self, configFile, tradeDate, debugFlag):
        """
        Purpose: Constructor
        :param self:        class object itself
        :param configFile:  Configuration file to use
        """

        # Initialize m_logger object from class Logger and add Header to the log, using addGenericInfo function
        self.m_logger = Logger(logging.INFO, configFile, tradeDate)
        self.m_logger.addFileHandler(logging.DEBUG)
        self.m_logger.addGenericInfo(__file__)
        
        self.tradeDate = tradeDate
        self.debugFlag = debugFlag
        self.configFile = configFile

        try:
            # Get configuration to a dictionary
            self.m_configDict = configuration(self.configFile, True).m_dictionary
            
            #Initialize Oracle instance along with connection
            self.m_oracle_db = Oracle(self.m_configDict, self.m_logger)

        except Exception as exp:
                   # An exception occurred
            self.m_logger.error("Unable to initialize the configuration " + str(exp))
            print("ERROR: Unable to initialize the configuration for logger " + str(exp))
            sys.exit(1)

    def readMktConfigFile(self, mktConfigFile):
        """
        Purpose - To read the content of mktConfigFile into the global dictionary m_mktConfigDict for reference
        :param mktConfigFile:
        :return:
        """
        try:
            self.m_mktConfigDict = configuration(mktConfigFile, True).m_dictionary
        except Exception as exp:
                   # An exception occurred
            self.m_logger.error("Unable to initialize the configuration for logger " + str(exp))
            print("ERROR: Unable to initialize the configuration for logger " + str(exp))
            sys.exit(1)

    def chkActiveLoads(self):
        """
        Purpose - To check the count of active Active loads happening at a given point
        :param None: None at this point
        :return:
        """
        try:
            if self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_check_flag"] == 'Y': 
                localActiveLoadMax = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max"]
                localActiveLoadWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_wait_time"]
                localActiveLoadMaxWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max_wait_time"]

                mySql = ""
                myParams = ""
                mySql = self.m_configDict["SQL"]["get_active_loads"]
    
                activeFlag=1
                totalActiveWaitTime=0
                while activeFlag:
                    returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                    if self.debugFlag:
                        print "mySql = ", mySql
                        print "returnStr = ", returnStr
                        print "chkActiveLoads - Active Loads value = ", int(returnStr[1].strip())
                    if returnStr[0] != '0':
                        self.m_logger.error("Unable to get active loads using sql " + mySql + ". Error = " + returnStr[1])
                        sys.exit(1)

                    #Check if actual active loads is <= configured active loads.  If so, return out of the fn
                    if int(returnStr[1].strip()) <= localActiveLoadMax:
                        activeFlag=0
                        return 0

                    #Sleep for time defined by configured value for "active_load_wait_time"
                    time.sleep(localActiveLoadWaitTime)
                    totalActiveWaitTime += localActiveLoadWaitTime

                    #Check if actual Total wait time is > configured total wait time.  If so, throw an error and exit
                    if totalActiveWaitTime > localActiveMaxWaitTime:
                        self.m_logger.error("In Fn chkActiveLoads.  Total Actual Wait Time exceeds the configured value active_load_max_wait_time.  Either cleanup orphaned loads or increase the either active_load_max or active_load_max_wait_time. totalActiveWaitTime = " + str(totalActiveWaitTime) + " localActiveMaxWaitTime=" + str(localActiveMaxWaitTime))
                        return 1
            else:
                return 0

            #Return failure
            return 1
                    
        except Exception as exp:
            self.m_logger.error("Failure in chkActiveLoads process for file with the error " + str(exp))
            sys.exit(1)

    def chkRaceStatus(self):
        """
        Purpose - To check if a load is already running for the given dataset
        :param None: None at this point
        :return:
        """
        try:
            if self.m_mktConfigDict["RACE"]["race_status_check_flag"] == 'Y':
                localRaceStatusWaitTime = int(self.m_mktConfigDict["RACE"]["race_status_wait_time"])
                localRaceStatusMaxWaitTime = int(self.m_mktConfigDict["RACE"]["race_status_max_wait_time"])

                mySql = ""

                #myParams = {"datasetName":self.datasetName}
                tempSql = self.m_configDict["SQL"]["get_race_status"]
                myParamsDict = { 'datasetName' : self.datasetName }
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
    
                raceFlag=1
                totalRaceStatusWaitTime=0
                while raceFlag:
                    returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                    if self.debugFlag:
                        print "tempSql = ", tempSql
                        print "myParamsDict = ", myParamsDict
                        print "mySql = ", mySql
                        print "returnStr = ", returnStr
                        print "chkRaceStatus - ReturnCode = ", int(returnStr[1].strip())
                    if returnStr[0] != '0':
                        self.m_logger.error("Unable to get race status using sql " + mySql + ". Error = " + returnStr[1])
                        sys.exit(1)

                    #Check if the load for this dataset is already running. If not, exit out of the function with normal return value
                    if int(returnStr[1].strip()) <= 1:
                        raceFlag=0
                        return 0

                    #Check if actual Total wait time is > configured total wait time.  If so, throw an error and exit


                    #Sleep for time defined by configured value for "active_load_wait_time"
                    time.sleep(localRaceStatusWaitTime)
                    #time.sleep(90)
                    totalRaceStatusWaitTime += localRaceStatusWaitTime
                    if self.debugFlag:
                        print "totalRaceStatusWaitTime = ", totalRaceStatusWaitTime, "localRaceStatusWaitTime =", localRaceStatusWaitTime
                    if totalRaceStatusWaitTime > localRaceStatusMaxWaitTime:
                        self.m_logger.error("In Fn chkRaceStatusLoads.  Total Actual Wait Time exceeds the configured value race_status_max_wait_time.  Either check if the Dataset is getting loaded or increase the either active_load_max or active_load_max_wait_time. totalRaceStatusWaitTime = " + str(totalRaceStatusWaitTime) + " localRaceStatusMaxWaitTime=" + str(localRaceStatusMaxWaitTime))
                        return 1
            else:
                return 0

            #Return failure
            return 1
                    
        except Exception as exp:
            self.m_logger.error("Failure in chkRaceStatus process for file with the error " + str(exp))
            sys.exit(1)

    #def loadData(self,localDataFile, localFileID, localFileIDQueue, localDBFlag):
    def loadData(self,localDataFile, localFileID, localFileIDQueue):
        """
        Purpose - To load the given datafile to the S3 bucket specified in the global mktConfigFile
        :param localDataFile: Source datafile to be uploaded to S3
        :param localFileID: Internal File ID assigned to the source datafile
        :param localFileIDQueue: Queue in which, results of the operation is stored
        :return:
        """
        try:
            localDBFlag=1
            if self.debugFlag:
                print "Inside loadData function"

            if localDBFlag:
                raceStatusReturnValue=self.chkRaceStatus()
                if self.debugFlag:
                    print "raceStatusReturnValue=", raceStatusReturnValue

                if raceStatusReturnValue:
                    self.m_logger.error("Failure value returned by chkRaceStatus fn.  Return value = " + str(raceStatusReturnValue))
                    localFileIDQueue.put((localFileID, raceStatusReturnValue))
                    return 1

                activeLoadsReturnValue=self.chkActiveLoads()
                if activeLoadsReturnValue:
                    self.m_logger.error("Failure value returned by chkRaceStatus fn.  Return value = " + str(activeLoadsReturnValue))
                    localFileIDQueue.put((localFileID, raceStatusReturnValue))
                    return 1
    
                processID = os.getpid()
                hostName = socket.gethostname()

                #Insert Process status into Oracle db
                #DB_CALL - sp_ddy_insert_process_status(RUNID, FILE_ID, etc)
                mySql = ""
                myParams = ""
                tempSql = self.m_configDict["SQL"]["put_process_status"]
                pStatus = 'P'
                pComment = 'Load started'
                myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileID":str(localFileID), "fileName":localDataFile, "tDate":str(self.tradeDate), "processID":str(processID), "hostName":hostName, "status":pStatus , "lcomment":pComment, "tidalRunID":str(self.tidalRunID)}
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)

                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

                if self.debugFlag:
                    print "tempSql = ", tempSql
                    print "myParamsDict = ", myParamsDict
                    print "mySql = ", mySql
                    print "returnStr = ", returnStr
    
                if returnStr[0] != '0':
                    self.m_logger.error("Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1])
                    sys.exit(1)

            #Call s3.gettoken to get the token and establish connection

            sourceFileWthPath = localDataFile
            
            #Commented the following lines to move getToken outside parallel thread
            # Keep it until we test all 93 loads and remove it
            #s3object = S3(self.mktConfigFile, self.m_logger)
            #s3object.getToken()
            ##sourceFileWthPath =  s3object.m_configfile["S3"]["source_file"]

            targetFileWthPath = os.path.join(self.s3object.m_configFile["S3"]["path"], os.path.basename(sourceFileWthPath))
            targetBucket = self.s3object.m_configFile["S3"]["bucket"]
            encryptKeyFlag = self.s3object.m_configFile["S3"]["encrypt_key"]

            local_aws_retries = int(self.m_mktConfigDict["ENV"]["aws_retries"])
            if self.debugFlag:
                print("sourceFileWthPath =", sourceFileWthPath)
                print("targetFileWthPath =", targetFileWthPath)
                print("targetBucket =", targetBucket)
                print("encryptKeyFlag =", encryptKeyFlag)
                print("local_aws_retries =", local_aws_retries)

            init_count = 0
            while (init_count < local_aws_retries):
                loadReturnValue = 0

                #Call s3.dataUpload to load the data (single part load)

                loadReturnValue = self.s3object.loadDataSinglePart(sourceFileWthPath, targetFileWthPath, targetBucket, encryptKeyFlag )
                if self.debugFlag:
                    print "loadReturnValue = ", loadReturnValue

                if loadReturnValue == 0:
                    pStatus = 'S'
                    pComment = 'Load completed'
                    break
                else:
                    pStatus = 'F'
                    pComment = 'Load failed'
                    init_count += 1

            if localDBFlag:
                #Call DB to insert 'S' or 'F' in tb_ddy_process_status
                #localFileIDQueue.put((localFileID, loadReturnValue))
                mySql = ""
                myParams = ""
                tempSql = self.m_configDict["SQL"]["put_process_status"]
                myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileID":str(localFileID), "fileName":localDataFile, "tDate":str(self.tradeDate), "processID":str(processID), "hostName":hostName, "status":pStatus , "lcomment":pComment, "tidalRunID":str(self.tidalRunID)}
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                if self.debugFlag:
                    print "tempSql = ", tempSql
                    print "myParamsDict = ", myParamsDict
                    print "mySql = ", mySql
                    print "returnStr = ", returnStr

                if returnStr[0] != '0':
                    self.m_logger.error("Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1])
                    sys.exit(1)

                localFileIDQueue.put((localFileID,loadReturnValue))
            else:
                return loadReturnValue
      
        except Exception as exp:
            self.m_logger.error("Failure in loadData process for file with the error " + str(exp))
            if localDBFlag:
                localFileIDQueue.put(localFileID, 1)
            else:
                return 1

    def createFinraManifestFile(self, manifestFile):
        try:
            # Read Manifest file to get info like total rows, total size & other details to populate the done file for FINRA
            if self.debugFlag:
                print "Inside createFinraManifestFile fuction"

            with open(manifestFile,"r") as fh:
                self.totalRows = 0
                self.totalSize = 0
                self.fileCount = 0
                self.fileDict = {}
                for data in fh:
                    data.rstrip("\n")
                    mylist = []
                    self.fileCount +=1
                    mylist = data.split("|")
                    self.fileDict[self.fileCount] = [mylist[0],os.path.basename(mylist[1]),int(mylist[2]),int(mylist[3])]
                    self.totalRows += int(mylist[3])
                    self.totalSize += int(mylist[2])
                if self.debugFlag:
                    print "self.fileDict = ", self.fileDict
        except Exception as exp:
            self.m_logger.error("Failed while processing readManifest with Error = " + str(exp))
            return 1

        try:
            #Use self.defautltsFile which is populated from the db later. No need to get it from config file
            self.defaultsFileWthPath = self.m_mktConfigDict["DATASET"]["defaults_dir"] + "/" + self.defaultsFile
            with open(self.defaultsFileWthPath,"r") as fh:
                self.defaultsDict = {}
                self.defaultsCount  = 0
                for data in fh:
                    data.rstrip('\n')
                    self.defaultsCount +=1
                    self.defaultsDict[self.defaultsCount]=data
                if self.debugFlag:
                    print "After Defaults, self.fileDict = ", self.fileDict

        except Exception as exp:
            self.m_logger.error("Failed while processing defaults file " + self.defaultsFileWthPath + " with Error = " + str(exp))
            return 1

        try:
            self.finraManifestFile =  self.m_mktConfigDict["ENV"]["donefile_dir"] + "/" +  os.path.basename(manifestFile) + ".done"
            with open(self.finraManifestFile,"w") as finraMnFH:
                finraMnFH.write("# AWS RunID : {}\n".format(str(self.runID)))
                finraMnFH.write("# Dataset : {0} , TradeDate : {1}\n".format(str(self.datasetName),str(self.tradeDate)))
                finraMnFH.write("total_compressed={}\n".format(self.totalSize))
                finraMnFH.write("total_rows={}\n".format(self.totalRows))
                finraMnFH.write("no of files={}\n".format(self.fileCount))
   
                for key,val in self.fileDict.items():
                    finraMnFH.write("file_{0}={1}\n".format(str(key),val[1]))
                    finraMnFH.write("file_{0}_rows={1}\n".format(str(key),val[3]))
  
                finraMnFH.write("# Data Attributes\n")
                for key,val in self.defaultsDict.items():
                    finraMnFH.write("{0}".format(str(val)))
            return 0
        except Exception as exp:
            self.m_logger.error("Failed while creating AWS Done file " + self.finraManifestFile + " with Error = " + str(exp))
            return 1

    def processLoader(self, manifestFile, datasetName, tidalRunID):
        """
        Purpose - Function responsible for reading the manifest file, get market name, call multiprocess load and other db calls
        :param manifestFile: Manifest File
        :param tradeDate: Trade Date
        :param tidalRunID: Tidal Run ID 
        :return:
        """
        try:
            # Read the manifest filename and get the suffix i.e. datasetname
            # Assumption - Manifest file format - manifest.<datasetName>.<tradeDate>.<tidalRunID>
            # Program will break otherwise

            self.datasetName = datasetName
            self.tidalRunID = tidalRunID

            # DB_CALL
            # Make database call sp_ddy_get_market_info(datasetname) and get market info
            mktName = ''

            self.manifestFile = self.m_configDict["ENV"]["manifestfile_dir"] + "/" + manifestFile

            ##Validate Manifest file
            if not os.path.isfile(self.manifestFile):
                self.m_logger.error("Invalid manifest file " + self.manifestFile)
                sys.exit(1)

            if self.debugFlag:
                print "Inside processLoader"
                print "DatasetName = ", self.datasetName
                print "ManifestFile = ", manifestFile
                print "Self ManifestFile = ", self.manifestFile
                print "TidalRunID = ", self.tidalRunID
                print "DebugFlag = ", self.debugFlag
                print "confDict = ", self.m_configDict

            # Enable this one the proc to get mkt name and default file are ready and test it
            tempSql = self.m_configDict["SQL"]["get_mkt_defaults_filename"]
            myParamsDict = { 'datasetName' : self.datasetName }
            tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
            mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
            returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

            if self.debugFlag:
                print "tempSql = ", tempSql
                print "myParamsDict = ", myParamsDict
                print "mySql = ", mySql
                print "returnStr = ", returnStr

            if returnStr[0] == '0':
                mktName = returnStr[2].strip()
                self.defaultsFile = returnStr[3].strip()
            else:
                self.m_logger.error("Unable to get market info from the database using sql " + mySql + ". Error = " + returnStr[1])
                sys.exit(1)

            if self.debugFlag:
                print "MktName from DB = ", mktName
                print "Defaults = ", self.defaultsFile
          
            #Build the string for mktConfigFile based on mktName and configFile info
            self.mktConfigFile = os.path.dirname(self.configFile) + '/' + os.path.basename(self.configFile).split('.',1)[0].strip() + '_' + mktName.lower() + '.' + os.path.basename(self.configFile).split('.',1)[1].strip()

            if self.debugFlag:
                print("mktConfigFile = ", self.mktConfigFile)

            #Validate Manifest file is a valid file
            if not os.path.isfile(self.mktConfigFile):
                self.m_logger.error("Invalid market manifest file " + self.mktConfigFile)
                sys.exit(1)

            #May not need the following section, as we send mktConfigFile to other function not the dictionary self.m_mktConfigDict.  Need to remove it after finishing the loadData part fully

            # Read Market specific config file and store it in a specific dictionary
            self.readMktConfigFile(self.mktConfigFile)

            if self.debugFlag:
                print("m_mktConfigDict=",self.m_mktConfigDict)

            # Read the contents of manifest - dataFileNames into a list - Will validate the datafiles as well

            localManifest = Manifest()
            manifestDelim = self.m_configDict["ENV"]["manifest_delim"]
            manifestFileList = localManifest.readManifest(self.manifestFile, self.m_logger, manifestDelim, self.debugFlag)

            # Get RunID
            self.runID = generate_runId()
            if self.debugFlag:
                print("RunID = ", self.runID)
                #print("manifestFileList = ", manifestFileList)

            #Call Oracle fn to insert status 'P' into TB_DDY_DATASET_TRANS with RUNID etc
            #DB_CALL
            # Make database call sp_ddy_insert_dataset_trans and insert data that process started

            mySql = ""
            myParams = ""
            tempSql = self.m_configDict["SQL"]["put_dataset"]
            pStatus = 'P'

            myParamsDict = {'datasetName':self.datasetName, 'runID': str(self.runID), 'tDate':str(self.tradeDate), 'status': pStatus, 'tidalRunID':str(self.tidalRunID)}
            tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
            mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)

            returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

            if self.debugFlag:
                print "tempSql = ", tempSql
                print "myParamsDict = ", myParamsDict
                print "mySql = ", mySql
                print "returnStr = ", returnStr

            if returnStr[0] != '0':
                self.m_logger.error("Unable to put dataset info into the database using sql " + mySql + ". Error = " + returnStr[1])
                sys.exit(1)

            # Insert Manifest data in db and call multiprocessing s3 loader process.  Shd we add RUN_ID to manifest table

            #For each datafile, generate fileID and call loadData fn using multiprocess to load data into AWS
            for dataRecord in manifestFileList:
                mySql = ""
                myParams = ""
                tempSql = self.m_configDict["SQL"]["put_manifest"]
                myParamsDict = {"datasetName":self.datasetName, "runID": str(self.runID), "tDate":str(self.tradeDate), "dataFileName":dataRecord[1], "manifestFileName":manifestFile , "noOfRecords": str(dataRecord[2]), "fileSize":str(dataRecord[3]), "tidalRunID":str(self.tidalRunID)}
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
     
                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                #if self.debugFlag:
                    #print "tempSql = ", tempSql
                    #print "myParamsDict = ", myParamsDict
                    #print "mySql = ", mySql
                    #print "returnStr = ", returnStr

                if returnStr[0] != '0':
                    self.m_logger.error("Unable to put manifest info into the database using sql " + mySql + ". Error = " + returnStr[1])
                    sys.exit(1)

            # Initialize S3 object and get FINRA cloud service token and establish s3 session
            self.s3object = S3(self.mktConfigFile, self.m_logger, self.debugFlag)
            self.s3object.getToken()

            # Get Active load values from config file
            localActiveLoadCheckFlag = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_check_flag"]
            process_count = int(self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max"])
            #localActiveLoadMax = int(self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max"])
            #localActiveLoadWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_wait_time"]
            #localActiveLoadMaxWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max_wait_time"]
            if self.debugFlag:
                print("localActiveLoadMax = ", process_count)
                print("len(manifestFileList) = ", len(manifestFileList))

            pool = multiprocessing.Pool(processes=process_count)
            m = multiprocessing.Manager()
            fileIDQueue = m.Queue()
            #dbFlag=1

            sendCounter = 0
            doneCounter = 0
            fileID=1

            failureFlag=0
            print manifestFileList
            while doneCounter < len(manifestFileList):
                print "Inside while doneCounter = ", doneCounter
                print "doneCounter = ", doneCounter, "sendCounter = ", sendCounter
                while sendCounter < len(manifestFileList) and sendCounter - doneCounter < process_count:
                    tmpDataFileName = manifestFileList[sendCounter][1]
                    print "Inside sendCounter, manifestFileList[sendCounter] = ", manifestFileList[sendCounter], "manifestFileList[sendCounter][1] = ", manifestFileList[sendCounter][1]
                    #finraManifestLoadStatus=self.loadData(self.finraManifestFile ,fileID, fileIDQueue, dbFlag)
#                    #processHandle = Process(target=Loader.loadData, args=(self, dataRecord[1],fileID, fileIDQueue, dbFlag))
                     #def loadData(self,localDataFile, localFileID, localFileIDQueue, localDBFlag):
                    #results = mpPool.apply_async(Loader.loadData, (self, manifestFileList[sendCounter][1], fileID, fileIDQueue, dbFlag) )
                    #results = mpPool.apply_async(self.loadData, (tmpDataFileName, fileID, fileIDQueue, dbFlag))

                    results = pool.apply_async(self.loadData, args=(tmpDataFileName, fileID, fileIDQueue))
                    #results = pool.apply_async(Loader.loadData, (tmpDataFileName, fileID, fileIDQueue))
                    print "After pool apply_async"
                    time.sleep(2)
                    sendCounter += 1
                    fileID += 1
            
                while not fileIDQueue.empty():  # process completed results as they arrive
                    print "Inside Queue"
                    time.sleep(3)
                    qFileID, qResult = fileIDQueue.get()
                    if qResult:
                        failureFlag=1
                    if self.debugFlag:
                        print("qFileID = ", qFileID, "qResult = ", qResult)
                    doneCounter += 1
                if failureFlag:
                    break
                time.sleep(2)
                


#            #for dataRecord in manifestFileList:
#                #if self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_check_flag"] == 'Y': 
#                    #processHandle = Process(target=Loader.loadData, args=(self, dataRecord[1],fileID, fileIDQueue, dbFlag))
#                #processHandle.start()
#                #procs.append(processHandle)
#                #fileID += 1
#
#            #for p in procs:
#                #p.join()
#
#            #Without sleep the queue is unreliable and do not return the expected values.  Fixed with procs.append function.  No need for sleep anymore
#            #time.sleep(2)
#
##            failureFlag=0
#            while not fileIDQueue.empty():
#                qFileID, qResult = fileIDQueue.get()
#                if qResult:
#                    failureFlag=1
#                if self.debugFlag:
#                    print("Inside fileIDQueue while")
#                    print("qFileID = ", qFileID, "qResult = ", qResult)
#                    
#            if self.debugFlag:
#                print "Failure Flag = ", failureFlag

            if failureFlag:
                pStatus = 'F'
            else:
                pStatus = 'S'
                """
                #Generate FINRA Manifest file and Push it to AWS 
                """

                # Call Divakar's generate done file function
                returnValue = self.createFinraManifestFile(self.manifestFile)
                if self.debugFlag:
                    print "Post createFinraManifestFile fn - return value= ", returnValue
    
                if returnValue:
                    self.m_logger.error("Unable to generate done file. Please fix the issue the re-run the load")
                    #sys.exit(1)
                    failureFlag=1
                    pStatus = 'F'
                else:

                    dbFlag=0
                    fileID=0
                    # Call the loader function with the manifest file
                    finraManifestLoadStatus=0
                    finraManifestLoadStatus=self.loadData(self.finraManifestFile ,fileID, fileIDQueue, dbFlag)
    
                    if finraManifestLoadStatus:
                        pStatus = 'F'
                        self.m_logger.error("Unable to load finra manifest file ")
                

            #Call Oracle fn to insert status 'S' into TB_DDY_DATASET_TRANS with RUNID etc
            #DB_CALL
            # Make database call sp_ddy_insert_dataset_trans and insert data based on Failure or Success

            mySql = ""
            myParams = ""
            tempSql = self.m_configDict["SQL"]["put_dataset"]
            
            myParamsDict = {"datasetName":self.datasetName, "runID": str(self.runID), "tDate":str(self.tradeDate), "status": pStatus, "tidalRunID":str(self.tidalRunID)}
            tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
            mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)

            returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

            if self.debugFlag:
                print "tempSql = ", tempSql
                print "myParamsDict = ", myParamsDict
                print "mySql = ", mySql
                print "returnStr = ", returnStr

            if returnStr[0] != '0':
                self.m_logger.error("Unable to put dataset info into the database using sql " + mySql + ". Error = " + returnStr[1])
                sys.exit(1)

            if failureFlag:
                self.m_logger.error("Load failed")
                sys.exit(1)

        except Exception as e:
            self.m_logger.error("ProcessLoader failed with error " + str(e))
            sys.exit(1)
Esempio n. 6
0
def main(configFile, logLevel, tDate):
    log = Logger(logLevel,configFile, tDate)
    log.addFileHandler(logging.DEBUG)
    log.addGenericInfo(__file__)

    try:
        m_configDict = configuration(configFile, True).m_dictionary
        #print "m_configDict = ", m_configDict
        myOracle = Oracle(m_configDict, log)

        #mySql = "select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.F_DDY_INSERT_DATASET_TRANS(:datasetID, :runID, :tDate, :status))"
        #pDatasetID = 2
        #pRunID = 234234234
        # 20151216144156584829
        #pTDate = 20151215
        #pStatus = 'P'
        #myParams = {"datasetID": pDatasetID, "runID": pRunID, "tDate": pTDate, "status": pStatus}

        #select RETURN_CODE||'|'||RETURN_MSG from table(PKG_RFCM_DDY.F_DDY_INSERT_PROCESS_STATUS('DLE_INFO', 20151216144156584829, 1, 'opb1.dat.bz2', 20151215,  111, 'test_hostname', 'P', 'Process Started'))
        #select RETURN_CODE||'|'||RETURN_MSG from table(PKG_RFCM_DDY.F_DDY_INSERT_PROCESS_STATUS('DLE_INFO', 20151216144156584829, 1, 'opb1.dat.bz2', 20151215,  111, 'test_hostname', 'P', 'Process Started'));
        mySql = "select RETURN_CODE||'|'||RETURN_MSG from table(PKG_RFCM_DDY.F_DDY_INSERT_PROCESS_STATUS(:datasetName, :runID, :fileID, :fileName, :tDate, :processID, :hostName, :lstatus , :lcomment))"
        pDatasetName = "DLE_INFO"
        pRunID = 20151216144156584
        pTDate = 20151210
        pStatus = 'P'
        pFileID = 1
        pFileName = 'test.dat.gz'
        pComment = 'Process Started'
        pProcessID = 1234
        pHostName = 'Test_Host'
        
        myParams = {"datasetName": pDatasetName, "runID": pRunID, "fileID": pFileID, "fileName": pFileName, "tDate": pTDate, "processID": pProcessID, "hostName": pHostName, "lstatus": pStatus, "lcomment": pComment}

        #returnStr = self.__cursor.execute("select * from table(PKG_RFCM_DDY.f_ddy_insert_manifest_trans(1, 20151215, 'opb.test1.bz2', 'manifest.opb', 23423, 2342334))")
        #mySql = "select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_insert_manifest_trans(:datasetID, :tDate, :dataFileName, :manifestFileName, :noOfRecords, :fileSize))"
        #pDatasetID = 3
        #pTDate = 20151210
        #pDataFileName = 'opb.test1.bz2'
        #pManifestFileName = 'manifest.opb'
        #pNoOfRecords = 23423
        #pFileSize = 2342334
        #myParams = {"datasetID": pDatasetID, "tDate": pTDate, "dataFileName": pDataFileName, "manifestFileName":pManifestFileName, "noOfRecords":pNoOfRecords, "fileSize":pFileSize}

        #mySql = "select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_get_process_status(20151216144156584829,1));"
        #mySql = "select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_get_process_status(:runID,:fileID))"
        #pRunID = 20151216144156584829
        #pFileID = 1
        #myParams = {"runID":pRunID, "fileID":pFileID}

        #mySql = "select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_get_dataset_status(234234234));"
        #mySql = "select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_get_dataset_status(:runID))"

        #pRunID = 234234234

        #myParams = {"runID":pRunID}

        #mySql = "select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_get_active_loads());"
        #mySql = "select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_get_active_loads())" 

        #myParams = {}

        #mySql = select RETURN_CODE||'|'|| RETURN_MSG from  table(PKG_RFCM_DDY.f_ddy_get_market_info(p_dataset_name => 'ADW_EVENT_LSH_RAW'));
        #mySql = "select RETURN_CODE||'|'|| RETURN_MSG from  table(PKG_RFCM_DDY.f_ddy_get_market_info(:datasetName))"
        #mySql = "select RETURN_CODE, RETURN_MSG from  table(PKG_RFCM_DDY.f_ddy_get_market_info(:datasetName))"

        #pDatasetName = 'ADW_EVENT_LSH_RAW'
        #pDatasetName = 'DLE_INFO'

        myParams = {"datasetName":pDatasetName}

        #mySql = select  RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_get_dataset_race('ADW_AMEX_OPT_RAW'));
        #mySql = "select  RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_get_dataset_race(:datasetName))"

        #pDatasetName = "ADW_AMEX_OPT_RAW"

        #myParams = {"datasetName":pDatasetName}

        """
        #mySql = "select RETURN_CODE, RETURN_MSG, P_MARKET_IND, P_DEFAULTS_FILENAME  from  table(pkg_rfcm_ddy.f_ddy_get_makt_info_dflt_fname(:datasetName))"
        mySql = "select RETURN_CODE, RETURN_MSG, P_MARKET_IND, P_DEFAULTS_FILENAME  from  table(pkg_rfcm_ddy.f_ddy_get_makt_info_dflt_fname('datasetName'))"
        pDatasetName = 'DLE_INFO'
        new_mySql = re.sub('datasetName', pDatasetName, mySql.rstrip())
        returnStr = myOracle.runSqlWthParamsGetOneRow(new_mySql)
        print "Return Value = ", returnStr[0], " Return Code = ", returnStr[1], " Mkt = ", returnStr[2], "Defaults = ", returnStr[3]
        """
        """
        mySql = "select RETURN_CODE, RETURN_MSG from table(PKG_RFCM_DDY.F_DDY_INSERT_DATASET_TRANS('datasetName', 'runID', 'tDate', 'status', 'tidalRunID'))"
        pDatasetName = 'DLE_INFO'
        pRunID = '20151216144156584829'
        pTDate = '20151215'
        pStatus = 'P'
        pTidalRunID ='20151215'
        myParamsDict = {"datasetName":self.datasetName, "runID": self.runID, "tDate":self.tradeDate, "status": pStatus, "tidalRunID":self.tidalRunID}
        tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
        mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)

        rdict = { 'datasetName' : pDatasetName, 'runID' : pRunID, 'tDate' : pTDate, 'status' : pStatus, 'tidalRunID' : pTidalRunID }
        pat = "(%s)" % "|".join( map(re.escape, rdict.keys())  )
        new_mySql = re.sub( pat, lambda m:rdict[m.group()], mySql)
        print "mySql = ", mySql, "new_mySql = ", new_mySql
        """

        """
        mySql = "select RETURN_CODE,RETURN_MSG from table(PKG_RFCM_DDY.F_DDY_INSERT_PROCESS_STATUS('datasetName', 'runID', 'fileID', 'fileName', 'tDate', 'processID', 'hostName', 'status' , 'lcomment', 'tidalRunID'))"
        pDatasetName = "DLE_INFO"
        pRunID = '20151216144156584'
        pTDate = '20151210'
        pStatus = 'P'
        pFileID = '1'
        pFileName = 'test.dat.gz'
        pComment = 'Process Started'
        pProcessID = '1234'
        pHostName = 'Test_Host'
        pTidalRunID = '20151210'
        rdict = { 'datasetName' : pDatasetName, 'runID' : pRunID, 'fileID' : pFileID, 'fileName' : pFileName, 'tDate' : pTDate, 'processID' : pProcessID, 'hostName' : pHostName, 'status' : pStatus, 'lcomment' : pComment, 'tidalRunID' : pTidalRunID  }
        pat = "(%s)" % "|".join( map(re.escape, rdict.keys())  )
        new_mySql = re.sub( pat, lambda m:rdict[m.group()], mySql)
        print "mySql = ", mySql, "new_mySql = ", new_mySql
        """

        #>>> print re.sub( pat, lambda m:rdict[m.group()], target)
        #select RETURN_CODE, RETURN_MSG, P_MARKET_IND, P_DEFAULTS_FILENAME  from  table(pkg_rfcm_ddy.f_ddy_get_makt_info_dflt_fname('DLE_INFO', pNumber))
        #>>> rdict = { 'datasetName' : pDatasetName, 'dnumber' : pNumber }
        #>>> pat = "(%s)" % "|".join( map(re.escape, rdict.keys())  )
        #>>> print re.sub( pat, lambda m:rdict[m.group()], target)
        #Traceback (most recent call last):
          #File "<stdin>", line 1, in <module>
          #File "/var/opt/icetools/python/python2.7/lib/python2.7/re.py", line 155, in sub
            #return _compile(pattern, flags).sub(repl, string, count)
        #TypeError: sequence item 3: expected string, int found
        #>>>

        #myParams = {"datasetID": pDatasetID, "runID": pRunID, "tDate": pTDate, "status": pStatus}

        mySql = "select * from  table(PKG_RFCM_DDY.f_ddy_internal_recon('tradeDate')) order by 4"
        tDate = '20160212'
        new_mySql = re.sub('tradeDate', tDate, mySql.rstrip())
        print "mySql = ", mySql, "new_mySql = ", new_mySql
        returnStrs = myOracle.runSqlWthParamsGetMultipleRows(new_mySql)
        print "Return Strs = ", returnStrs
        print "returnStrs[0][0] = ", returnStrs[0][0]
        print "returnStrs[0][1] = ", returnStrs[0][1]
        print "returnStrs[0][2] = ", returnStrs[0][2]
        #for returnStr in returnStrs:
            #print "Return Value = ", returnStr[0], " Return Code = ", returnStr[1]


        #print "mySql = ", mySql, "myParams = ", myParams

########### Multiprocessing test code
        """
        fileID=1
        fileIDQueue = Queue()
        for x in range(5):
            processHandle = Process(target=myOracle.worker, args=( mySql, myParams, fileID, fileIDQueue))
            processHandle.start()
            fileID += 1
        
        processHandle.join()
        #Without sleep the queue is unreliable and do not return the expected values
        time.sleep(2)
        
        failureFlag=0
        while not fileIDQueue.empty():
            qFileID, qResult = fileIDQueue.get()
            print("qFileID = ", qFileID, "qResult = ", qResult)
            if qResult:
                failureFlag=1

        print "FailureFlag = ", failureFlag
        """
########## End

    except Exception as e:
        print "Failed on main", str(e)
        exit(1)
Esempio n. 7
0
class Recon():
    #class variables
    m_logger = ""

    #database objects
    m_oracle_db = ""
    m_netezza_db = ""

    def __init__(self, configFile, tradeDate, debugFlag):
        """
        Purpose: Constructor
        :param self:        class object itself
        :param configFile:  Configuration file to use
        """

        # Initialize m_logger object from class Logger and add Header to the log, using addGenericInfo function
        self.m_logger = Logger(logging.INFO, configFile, tradeDate)
        self.m_logger.addFileHandler(logging.DEBUG)
        self.m_logger.addGenericInfo(__file__)

        self.tradeDate = tradeDate
        self.debugFlag = debugFlag
        self.configFile = configFile

        try:
            # Get configuration to a dictionary
            self.m_configDict = configuration(self.configFile, True).m_dictionary

            #Initialize Oracle instance along with connection
            self.m_oracle_db = Oracle(self.m_configDict, self.m_logger)

        except Exception as exp:
                   # An exception occurred
            self.m_logger.error("Unable to initialize the configuration " + str(exp))
            print("ERROR: Unable to initialize the configuration for logger " + str(exp))
            sys.exit(1)

    def readMktConfigFile(self, mktConfigFile):
        """
        Purpose - To read the content of mktConfigFile into the global dictionary m_mktConfigDict for reference
        :param mktConfigFile:
        :return:
        """
        try:
            self.m_mktConfigDict = configuration(mktConfigFile, True).m_dictionary
        except Exception as exp:
                   # An exception occurred
            self.m_logger.error("Unable to initialize the configuration for logger " + str(exp))
            print("ERROR: Unable to initialize the configuration for logger " + str(exp))
            sys.exit(1)

    def processRecon(self, tidalRunID):
        """
        Purpose - Function responsible for reading the datasets, get market name, call AWS ack files and other db calls

        :param tradeDate: Trade Date
        :param tidalRunID: Tidal Run ID
        :return:
        """
        try:
            # Read the manifest filename and get the suffix i.e. datasetname
            # Assumption - Manifest file format - manifest.<datasetName>.<tradeDate>.<tidalRunID>
            # Program will break otherwise

            self.tidalRunID = tidalRunID

            # DB_CALL
            # Make database call sp_ddy_get_market_info(datasetname) and get market info
            mktName = ''

            # select * from TB_DDY_PROCESS_STATUS where CREATE_TIME > SYSDATE - INTERVAL '1' DAY
            # select * from TB_DDY_MANIFEST_TRANS where  CREATE_TIME > SYSDATE - INTERVAL '1' DAY;
            # select * from TB_DDY_DATASET_MASTER where dataset_id = 49

            tempSql = "select DM.DATASET_NAME , PS.FILE_ID, TO_CHAR(PS.TRADE_DATE,'YYYYMMDD'), PS.RUN_ID, PS.FILE_NAME from TB_DDY_PROCESS_STATUS PS" \
                      " INNER JOIN TB_DDY_MANIFEST_TRANS MT ON MT.RUN_ID= PS.RUN_ID" \
                      " INNER JOIN TB_DDY_DATASET_MASTER DM ON DM.DATASET_ID= PS.DATASET_ID" \
                      " WHERE PS.CREATE_TIME > SYSDATE - INTERVAL '1' DAY" \
                      " AND NOT EXISTS " \
                      " ( SELECT 1 FROM TB_DDY_PROCESS_STATUS PS1 WHERE PS1.RUN_ID = PS.RUN_ID and PS1.FILE_ID = PS.FILE_ID and PS.STATUS = 'R')" \
                      #" AND PS.STATUS = 'S'" \
                      #" AND rownum < 10000"


            print(tempSql)
            #myParamsDict = {}
            #tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
            #mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
            returnList = self.m_oracle_db.runSqlWthParamsGetMultipleRows(tempSql)
            # Remove last element
            #print(returnList)
            #returnList.pop()

            returnDataDict= {d[0]: ','.join(d[2:]) if d[2] else 0 for d in returnList}
            returnDict = {d[0]+"_"+d[1]+"_"+d[2]: ','.join(d[0:]) if d[1:] else 0 for d in returnList}

            #print(returnDataDict)
            #print(returnDict)

            self._sqlerror_ = 0
            returnMktList = []
            for datasetName in returnDataDict:
                tempSql = self.m_configDict["SQL"]["get_mkt_defaults_filename"]
                myParamsDict = { 'datasetName' : datasetName }
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
                returnList = []
                returnList = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                print(returnList[0])

                if int(returnList[0]) == 0:

                    if returnList[2] not in returnMktList:
                        returnMktList.append(returnList[2])
                    #returnMktList.append(returnList[2])
                    #print(datasetName, returnList[2] )
                elif int(returnList[0]) < 0:
                    self.m_logger.error("Error in Get Market Deafults Filename for Dataset : {1}, SQL : {0}".format(mySql,datasetName))
                    self._sqlerror_ += 1
                else:
                    self.m_logger.error("Warning in Get Market Deafults Filename for Dataset : {1}, SQL : {0}".format(mySql,datasetName))

            print(returnMktList)
      
            #returnMktList = ['NYSE']

            for mktName in returnMktList :

                #Build the string for mktConfigFile based on mktName and configFile info
                mktConfigFile = os.path.dirname(self.configFile) + '/' + os.path.basename(self.configFile).split('.',1)[0].strip() + '_' + mktName.lower() + '.' + os.path.basename(self.configFile).split('.',1)[1].strip()

                print("mktConfigFile = ", mktConfigFile)
                #Validata Manifest file is a valid file
                if not os.path.isfile(mktConfigFile):
                    #print "Inside invalid mktConfigFile" + self.mktConfigFile
                    self.m_logger.error("Invalid market manifest file " + mktConfigFile)
                    sys.exit(1)

                self.readMktConfigFile(mktConfigFile)

                self.s3object = S3(mktConfigFile, self.m_logger, self.debugFlag)
                self.s3object.getToken()
                #print(self.s3object.m_configFile["TOKEN"])
                print(self.s3object.m_configFile["S3"])
                bucket = self.s3object.m_configFile["S3"]["bucket"]
                path   = self.s3object.m_configFile["S3"]["path"]
                ackpath= self.s3object.m_configFile["S3"]["ack_path"]
                print(bucket, path)
                print(ackpath)
                #ackPath = bucket + "/" + str(self.s3object.m_configFile["S3"]["path"]) + "/" + "acknowldge"
                #ackPath = str(bucket) + "/" + str(path)
                encryptKeyFlag = self.s3object.m_configFile["S3"]["encrypt_key"]

                #myBucket = self.s3object.m_connection.get_bucket(bucket, validate = False)

                #for testkey in myBucket.list(prefix='50006/'):
                #     print("File = ", testkey.name)

                #b = self.s3object.m_connection.get_bucket(bucket)
                #rs = b.list()
                # get the result set from bucket
                #print(b.list())

                rs = self.s3object.listBucketWPath(bucket,ackpath)
                print(rs)


                #ackList = s3object.getBucketList(ackPath)
                #print(ackList)


        except:
            self.m_logger.error("Error while creating S3 recon file Exception : {0}".format(sys.exc_info()[0]))
            # Not exitting at this point
            #sys.exit(1)

        sys.exit(0)
Esempio n. 8
0
class Extractor():
    #class variables
    m_logger = ""

    #database objects
    m_oracle_db = ""

    def __init__(self, configFile, mktName, tradeDate, debugFlag):
        """
        Purpose: Constructor
        :param self:        class object itself
        :param configFile:  Configuration file to use
        """

        # Initialize m_logger object from class Logger and add Header to the log, using addGenericInfo function
        self.m_logger = Logger(logging.INFO, configFile, tradeDate)
        self.m_logger.addFileHandler(logging.DEBUG)
        self.m_logger.addGenericInfo(__file__)
        
        self.tradeDate = tradeDate
        self.debugFlag = debugFlag
        self.configFile = configFile
        self.mktName = mktName

        try:
            # Get configuration to a dictionary
            self.m_configDict = configuration(self.configFile, True).m_dictionary
            
            #Initialize Oracle instance along with connection
            self.m_oracle_db = Oracle(self.m_configDict, self.m_logger)

        except Exception as exp:
                   # An exception occurred
            self.m_logger.error("Unable to initialize the configuration " + str(exp))
            print("ERROR: Unable to initialize the configuration for logger " + str(exp))
            sys.exit(1)

    def readMktConfigFile(self, mktConfigFile):
        """
        Purpose - To read the content of mktConfigFile into the global dictionary m_mktConfigDict for reference
        :param mktConfigFile:
        :return:
        """
        try:
            self.m_mktConfigDict = configuration(mktConfigFile, True).m_dictionary
        except Exception as exp:
                   # An exception occurred
            self.m_logger.error("Unable to initialize the configuration for logger " + str(exp))
            print("ERROR: Unable to initialize the configuration for logger " + str(exp))
            sys.exit(1)

    def chkActiveLoads(self):
        """
        Purpose - To check the count of active Active loads happening at a given point
        :param None: None at this point
        :return:
        """
        try:
            if self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_check_flag"] == 'Y': 
                localActiveLoadMax = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max"]
                localActiveLoadWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_wait_time"]
                localActiveLoadMaxWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max_wait_time"]

                mySql = ""
                myParams = ""
                mySql = self.m_configDict["SQL"]["get_active_loads"]
    
                activeFlag=1
                totalActiveWaitTime=0
                while activeFlag:
                    returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                    if self.debugFlag:
                        print "mySql = ", mySql
                        print "returnStr = ", returnStr
                        print "chkActiveLoads - Active Loads value = ", int(returnStr[1].strip())
                    if returnStr[0] != '0':
                        self.m_logger.error("Unable to get active loads using sql " + mySql + ". Error = " + returnStr[1])
                        sys.exit(1)

                    #Check if actual active loads is <= configured active loads.  If so, return out of the fn
                    if int(returnStr[1].strip()) <= localActiveLoadMax:
                        activeFlag=0
                        return 0

                    #Sleep for time defined by configured value for "active_load_wait_time"
                    time.sleep(localActiveLoadWaitTime)
                    totalActiveWaitTime += localActiveLoadWaitTime

                    #Check if actual Total wait time is > configured total wait time.  If so, throw an error and exit
                    if totalActiveWaitTime > localActiveMaxWaitTime:
                        self.m_logger.error("In Fn chkActiveLoads.  Total Actual Wait Time exceeds the configured value active_load_max_wait_time.  Either cleanup orphaned loads or increase the either active_load_max or active_load_max_wait_time. totalActiveWaitTime = " + str(totalActiveWaitTime) + " localActiveMaxWaitTime=" + str(localActiveMaxWaitTime))
                        return 1
            else:
                return 0

            #Return failure
            return 1
                    
        except Exception as exp:
            self.m_logger.error("Failure in chkActiveLoads process for file with the error " + str(exp))
            sys.exit(1)

    def chkRaceStatus(self):
        """
        Purpose - To check if a load is already running for the given dataset
        :param None: None at this point
        :return:
        """
        try:
            if self.m_mktConfigDict["RACE"]["race_status_check_flag"] == 'Y':
                localRaceStatusWaitTime = int(self.m_mktConfigDict["RACE"]["race_status_wait_time"])
                localRaceStatusMaxWaitTime = int(self.m_mktConfigDict["RACE"]["race_status_max_wait_time"])

                mySql = ""

                #myParams = {"datasetName":self.datasetName}
                tempSql = self.m_configDict["SQL"]["get_race_status"]
                myParamsDict = { 'datasetName' : self.datasetName }
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
    
                raceFlag=1
                totalRaceStatusWaitTime=0
                while raceFlag:
                    returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                    if self.debugFlag:
                        print "tempSql = ", tempSql
                        print "myParamsDict = ", myParamsDict
                        print "mySql = ", mySql
                        print "returnStr = ", returnStr
                        print "chkRaceStatus - ReturnCode = ", int(returnStr[1].strip())
                    if returnStr[0] != '0':
                        self.m_logger.error("Unable to get race status using sql " + mySql + ". Error = " + returnStr[1])
                        sys.exit(1)

                    #Check if the load for this dataset is already running. If not, exit out of the function with normal return value
                    if int(returnStr[1].strip()) <= 1:
                        raceFlag=0
                        return 0

                    #Check if actual Total wait time is > configured total wait time.  If so, throw an error and exit


                    #Sleep for time defined by configured value for "active_load_wait_time"
                    time.sleep(localRaceStatusWaitTime)
                    #time.sleep(90)
                    totalRaceStatusWaitTime += localRaceStatusWaitTime
                    if self.debugFlag:
                        print "totalRaceStatusWaitTime = ", totalRaceStatusWaitTime, "localRaceStatusWaitTime =", localRaceStatusWaitTime
                    if totalRaceStatusWaitTime > localRaceStatusMaxWaitTime:
                        self.m_logger.error("In Fn chkRaceStatusLoads.  Total Actual Wait Time exceeds the configured value race_status_max_wait_time.  Either check if the Dataset is getting loaded or increase the either active_load_max or active_load_max_wait_time. totalRaceStatusWaitTime = " + str(totalRaceStatusWaitTime) + " localRaceStatusMaxWaitTime=" + str(localRaceStatusMaxWaitTime))
                        return 1
            else:
                return 0

            #Return failure
            return 1
                    
        except Exception as exp:
            self.m_logger.error("Failure in chkRaceStatus process for file with the error " + str(exp))
            sys.exit(1)

    def extractData(self,localDataRecordList, localFileID, localFileIDQueue, localDBFlag):
        """
        Purpose - To load the given datafile to the S3 bucket specified in the global mktConfigFile
        :param localDataRecordList: Datafile related info fetched from FINRA's manifest file including filename, filesize, recordcount
        :param localFileID: Internal File ID assigned to the local datafile
        :param localFileIDQueue: Queue in which, results of the operation is stored
        :param localDBFlag: Flag indicating if database should be used or not
        :return:
        """
        try:
            if self.debugFlag:
                print "Inside extractData function"

            if localDBFlag:
                """ Not sure if we need Race Status check for Extract
                raceStatusReturnValue=self.chkRaceStatus()
                if self.debugFlag:
                    print "raceStatusReturnValue=", raceStatusReturnValue

                if raceStatusReturnValue:
                    self.m_logger.error("Failure value returned by chkRaceStatus fn.  Return value = " + str(raceStatusReturnValue))
                    localFileIDQueue.put((localFileID, raceStatusReturnValue))
                    return 1
                """

                """  Need to integrate Active loads with tb_dxt_process_status and tb_dxt_process_status ?
                activeLoadsReturnValue=self.chkActiveLoads()
                if activeLoadsReturnValue:
                    self.m_logger.error("Failure value returned by chkRaceStatus fn.  Return value = " + str(activeLoadsReturnValue))
                    localFileIDQueue.put((localFileID, raceStatusReturnValue))
                    return 1
                """

                processID = os.getpid()
                hostName = socket.gethostname()
                # Need to check the order
                localDataFile = localDataRecordList[1]
                dataFileSize = int(localDataRecordList[2])
                dataFileRecordCount = int(localDataRecordList[3])

                #Insert Process status into Oracle db
                #DB_CALL - sp_dxt_insert_process_status(RUNID, FILE_ID, etc)
                mySql = ""
                myParams = ""
                tempSql = self.m_configDict["SQL"]["put_process_status"]
                pStatus = 'P'
                pComment = 'Load started'
                #myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileID":str(localFileID), "fileName":localDataFile, "tDate":str(self.tradeDate), "processID":str(processID), "hostName":hostName, "fileSize": dataFileSize, "recordCount" : dataFileRecordCount, "status":pStatus , "lcomment":pComment}
                myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileName":localDataFile, "tDate":str(self.tradeDate), "processID":str(processID), "hostName":hostName, "fileSize": str(dataFileSize), "recordCount" : str(dataFileRecordCount), "status":pStatus , "lcomment":pComment}
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)

                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

                if self.debugFlag:
                    print "tempSql = ", tempSql
                    print "myParamsDict = ", myParamsDict
                    print "mySql = ", mySql
                    print "returnStr = ", returnStr
    
                if returnStr[0] != '0':
                    self.m_logger.error("Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1])
                    sys.exit(1)

            # Get the dataFileName file to be extracted from AWS
            dataFileName = localDataRecordList[1]
            #Here localFileWthPath is the local stage dir with file name
            localFileWthPath = self.m_configDict["ENV"]["stage_dir"] + "/" + dataFileName
            
            #Here targetFileWthPath is the AWS dir with file name
            targetFileWthPath = os.path.join(self.s3object.m_configFile["S3"]["path"], os.path.basename(dataFileName))
            targetBucket = self.s3object.m_configFile["S3"]["bucket"]
            encryptKeyFlag = self.s3object.m_configFile["S3"]["encrypt_key"]

            localAWSRetries = int(self.m_mktConfigDict["ENV"]["aws_retries"])
            if self.debugFlag:
                print("localFileWthPath =", localFileWthPath)
                print("targetFileWthPath =", targetFileWthPath)
                print("targetBucket =", targetBucket)
                print("encryptKeyFlag =", encryptKeyFlag)
                print("localAWSRetries =", localAWSRetries)
            initCount = 0
            while (initCount < localAWSRetries):
                extractReturnValue = 0

                #Call s3.data download to extract the manifest file (single part load)
                #extractReturnValue = self.s3object.getDataSinglePart(localFileWthPath, targetFileWthPath, targetBucket, encryptKeyFlag )
                extractReturnValue = self.s3object.getDataSinglePart(localFileWthPath, targetFileWthPath, targetBucket)

                if self.debugFlag:
                    print "extractReturnValue = ", extractReturnValue

                if int(extractReturnValue) == 0:
                    pStatus = 'S'
                    pComment = 'Load completed'
                    break
                else:
                    pStatus = 'F'
                    pComment = 'Load failed'
                    initCount += 1


            # Get the size of the file downloaded 
            localFileSize = os.stat(localFileWthPath).st_size

            # Check if the downloaded file size is matching with what is mentioned in manifest file.  If not mark it as failed
            if localFileSize != dataFileSize:
                pStatus = 'F'
                pComment = 'Actual file size != Manifest file size'


            localRecordCount = 0
 
            if localDBFlag:
                #Call DB to insert 'S' or 'F' in tb_dxt_process_status
                #localFileIDQueue.put((localFileID, extractReturnValue))
                mySql = ""
                myParams = ""
                tempSql = self.m_configDict["SQL"]["put_process_status"]
                #myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileID":str(localFileID), "fileName":localDataFile, "tDate":str(self.tradeDate), "processID":str(processID), "hostName":hostName, "fileSize": localFileSize, "recordCount" : localRecordCount, "status":pStatus , "lcomment":pComment}
                myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileName":localDataFile, "tDate":str(self.tradeDate), "processID":str(processID), "hostName":hostName, "fileSize": str(localFileSize), "recordCount" : str(localRecordCount), "status":pStatus , "lcomment":pComment}
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                if self.debugFlag:
                    print "tempSql = ", tempSql
                    print "myParamsDict = ", myParamsDict
                    print "mySql = ", mySql
                    print "returnStr = ", returnStr

                if returnStr[0] != '0':
                    self.m_logger.error("Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1])
                    sys.exit(1)

                localFileIDQueue.put((localFileID,extractReturnValue))
            else:
                return extractReturnValue
      
        except Exception as exp:
            self.m_logger.error("Failure in extractData process for file with the error " + str(exp))
            if localDBFlag:
                localFileIDQueue.put((localFileID, 1))
            else:
                return 1

    def getRecords(self, fileDict, startDateTime, endDateTime):
        """
        Purpose - Function to sort the dictionary based on the key and return a sorted list
        :param fileDict : Dictionary containing Last_modified Date and file name
        :param startDateTime : Start DateTime in the format '2016-02-01 00:00:00'
        :param endDateTime : End DateTime in the format '2016-02-10 00:00:00'
        """
        try:
            patternToSearch =  self.m_configDict["ENV"]["pattern_to_search"]
            
            if self.debugFlag:
                print "fileDict = ", fileDict
                print "patternToSearch = ", patternToSearch
                print "startDateTime = ", startDateTime
                print "endDateTime = ", endDateTime
            sorted_keys = sorted(fileDict.iterkeys())
            start = bisect.bisect_left(sorted_keys, startDateTime)
            end = bisect.bisect_right(sorted_keys, endDateTime)
            if self.debugFlag:
                print "start = ", start
                print "end = ", end
            for fileItem in sorted(fileDict.iteritems())[start:end]:
                print "For fileItem = ", fileItem
                if patternToSearch in fileItem[1]:
                    yield fileItem[1]

        except Exception as exp:
            self.m_logger.error("Failed while executing getRecords to sort the dictionary content of dictionary  with Error = " + str(exp))
            sys.exit(1)

    def getManifestFileList(self, startDateTime, endDateTime):
        """
        Purpose - Function to sort the dictionary based on the key and return a sorted list
        :param startDateTime : Start DateTime in the format '2016-02-01 00:00:00'
        :param endDateTime : End DateTime in the format '2016-02-10 00:00:00'
        """
        try:
            if self.debugFlag:
                print "S3 Bucket = ", self.m_configDict["S3"]["bucket"]
                print "S3 Path = ", self.m_configDict["S3"]["path"]
                print "startDateTime = ", startDateTime
                print "endDateTime = ", endDateTime
            fileListDict = self.s3object.listBucketWPathByLastModified(self.m_configDict["S3"]["bucket"], self.m_configDict["S3"]["path"])
           
            #endDateTime = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            manifestFileList = list(self.getRecords(fileListDict, startDateTime, endDateTime))

            if self.debugFlag:
                print "fileListDict = ", fileListDict
                print "manifestFileList = ", manifestFileList

            return manifestFileList
        except Exception as exp:
            self.m_logger.error("Failed while creating AWS manifest file list with Error = " + str(exp))
            return 1

    def processExtractor(self):
        """
        Purpose - Function responsible for getting the AWS token and reading the last modified date in DB and fetch the list of files from AWS to be processed
        :param : None
        :return:
        """
        try:
            # DB_CALL
            # Make database call sp_dxt_validate_mktName(mktName) to validate mktName

            tempSql = self.m_configDict["SQL"]["validate_market_name"]
            myParamsDict = { 'mktName' : self.mktName }
            tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
            mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
            returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

            if self.debugFlag:
                print "tempSql = ", tempSql
                print "myParamsDict = ", myParamsDict
                print "mySql = ", mySql
                print "returnStr = ", returnStr

            if returnStr[0] != '0':
                self.m_logger.error("Invalid market name provided " + mySql + ". Error = " + self.mktName)
                sys.exit(1)

            if self.debugFlag:
                print "MktName from DB = ", self.mktName
          
            #Build the string for mktConfigFile based on mktName and configFile info
            self.mktConfigFile = os.path.dirname(self.configFile) + '/' + os.path.basename(self.configFile).split('.',1)[0].strip() + '_' + self.mktName.lower() + '.' + os.path.basename(self.configFile).split('.',1)[1].strip()

            if self.debugFlag:
                print("mktConfigFile = ", self.mktConfigFile)

            #Validate Market Config file is a valid file
            if not os.path.isfile(self.mktConfigFile):
                self.m_logger.error("Invalid market manifest file " + self.mktConfigFile)
                sys.exit(1)

            # Read Market specific config file and store it in a specific dictionary
            self.readMktConfigFile(self.mktConfigFile)

            if self.debugFlag:
                print("m_mktConfigDict=",self.m_mktConfigDict)

            # Read the table for the given market and fetch the last modified timestamp for the given manifest file
            #tempSql = self.m_configDict["SQL"]["get_last_modified"]
            #myParamsDict = { 'mktName' : self.mktName }
            #tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
            #mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
            #returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

            #if self.debugFlag:
                #print "tempSql = ", tempSql
                #print "myParamsDict = ", myParamsDict
                #print "mySql = ", mySql
                #print "returnStr = ", returnStr

            #if returnStr[0] == '0':
                #if returnStr[1]:
                    #lastModifiedDate=returnStr[1] 
                #else:
                    #lastModifiedDate="2015-01-01 00:00:00"
            #else:
                #self.m_logger.error("Unable to get last_modified date using the sql " + mySql + ". Error = " + self.mktName)
                #sys.exit(1)

            #Temp call.  Need to enable the previous lines to use DB call

            lastModifiedDate="2015-01-01 00:00:00"

            # Get RunID
            self.runID = generate_runId()
            if self.debugFlag:
                print("RunID = ", self.runID)

            # Initialize S3 object and get FINRA cloud service token and establish s3 session
            self.s3object = S3(self.mktConfigFile, self.m_logger, self.debugFlag)
            self.s3object.getToken()

            # Get list of Manifest files to be processed

            #currentDate = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            #currentDate = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            currentDate = (datetime.now() + timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S")
            finraManifestFileList = self.getManifestFileList(lastModifiedDate, currentDate)
            
            if self.debugFlag:
                print("finraManifestFileList = ", finraManifestFileList)

            # Download manifest files in the manifest file list to a specific folder from AWS
            localFileDir = self.s3object.m_configFile["ENV"]["stage_dir"]

            targetFolder = self.s3object.m_configFile["S3"]["path"]
            #targetFileWthPath = os.path.join(self.s3object.m_configFile["S3"]["path"], os.path.basename(localFileWthPath))
            targetBucket = self.s3object.m_configFile["S3"]["bucket"]
            encryptKeyFlag = self.s3object.m_configFile["S3"]["encrypt_key"]
            #targetFileWthPath="50006/slmm_mnem.007.txt.gz"
            #localFileWthPath="/tmp/slmm_mnem.007.txt.gz"

            # Get an instance of the Manifest class
            localManifest = Manifest()
            fileIDQueue = Queue()
            localAWSRetries = int(self.m_mktConfigDict["ENV"]["aws_retries"])

            for finraManifestFile in finraManifestFileList:
                targetFileWthPath = targetFolder + finraManifestFile
                localFileWthPath = localFileDir + "/" + finraManifestFile
                if self.debugFlag:
                    print "targetFileWthPath = ", targetFileWthPath
                    print "localFileWthPath = ", localFileWthPath
                    print "finraManifestFile = ", finraManifestFile
                
                initCount = 0
                while (initCount < localAWSRetries):
                    extractReturnValue = 0

                    #Call s3.data download to extract the manifest file (single part load)
                    #extractReturnValue = self.s3object.getDataSinglePart(localFileWthPath, targetFileWthPath, targetBucket, encryptKeyFlag )
                    extractReturnValue = self.s3object.getDataSinglePart(localFileWthPath, targetFileWthPath, targetBucket)

                    if self.debugFlag:
                        print "extractReturnValue = ", extractReturnValue


                    if extractReturnValue:
                        # Try it again
                        initCount += 1
                    else:
                        # Come out of the loop
                        break
                # End of while loop for AWS Retries
                  
                if extractReturnValue:
                    self.m_logger.error("Unable to fetch manifestFile = " + finraManifestFile + "from the path = " + targetFileWthPath + " to the local filesystem = " + localFileWthPath )
                    sys.exit(1)

                    """ Not needed
                    if extractReturnValue == 0:
                        pStatus = 'P'
                        pComment = 'Load completed'
                        break
                    else:
                        pStatus = 'F'
                        pComment = 'Load failed'
                    """
                    initCount += 1

                # get datasetname from the manifest file.  Need check based on FINRA naming

                self.datasetName = os.path.basename(finraManifestFile).split('.',3)[1].strip().upper()
                if self.debugFlag:
                    print "datasetName = ", self.datasetName

                # Need to check DB call, once it is ready

                # Validate the manifest file name to make sure that we are expecting it
                tempSql = self.m_configDict["SQL"]["validate_dataset_name"]
                myParamsDict = { 'datasetName' : self.datasetName }
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

                if self.debugFlag:
                    print "tempSql = ", tempSql
                    print "myParamsDict = ", myParamsDict
                    print "mySql = ", mySql
                    print "returnStr = ", returnStr
    
                #Check if dataset is there in the tb_dxt_dataset_master, if not, skip it and move to the next file.  For other errors, exit out of the program
                if int(returnStr[0]) < 0:
                    self.m_logger.error("Unable to validate datasetName " + mySql + ". Error = " + self.datasetName)
                    sys.exit(1)
                elif int(returnStr[0]) > 0:
                    self.m_logger.info("Give Dataset is not in the list to process.  Skipping it" + mySql + ". Dataset Name = " + self.datasetName)
                    # Continue to the next file entry in the manifest list
                    continue
                    
                # Insert a record into tb_dxt_dataset_trans with status 'P' for the given datasetName, saying that we start the process for this manifest file

                pStatus = 'P'
                tempSql = self.m_configDict["SQL"]["put_dataset"]
                myParamsDict = {'datasetName':self.datasetName, 'runID': str(self.runID), 'tDate':str(self.tradeDate), 'status': pStatus }
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

                if self.debugFlag:
                    print "tempSql = ", tempSql
                    print "myParamsDict = ", myParamsDict
                    print "mySql = ", mySql
                    print "returnStr = ", returnStr
    
                if returnStr[0] != '0':
                    self.m_logger.error("Unable to insert into tb_dxt_dataset_trans table " + mySql + ". Error = " + self.datasetName)
                    sys.exit(1)

                # Read the contents of manifestfile i.e. dataFileNames into a list - Will validate the datafiles as well
                manifestDelim = self.m_configDict["ENV"]["manifest_delim"]
                if self.debugFlag:
                    print "localFileWthPath = ", localFileWthPath
                manifestFileList = localManifest.readManifest(localFileWthPath, self.m_logger, manifestDelim, self.debugFlag)
                if self.debugFlag:
                    print "manifestDelim = ", manifestDelim
                    print "manifestFileList = ", manifestFileList

                process_count = int(self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max"])

                # Now go into multiprocessing and call extractData function and extract files ones by one 
                fileID=1
                dbFlag=1
                fileIDQueue = Queue()
                procs = []
                doneCounter = 0
                sendCounter = 0
                failureFlag = 0

                while doneCounter < len(manifestFileList):
                    while sendCounter < len(manifestFileList) and sendCounter - doneCounter < process_count:
                        if self.debugFlag:
                            print "manifestFileList[sendCounter][1]", manifestFileList[sendCounter][1], "fileID = ", fileID
                        # Call fn extractData to fetch files from AWS.  Pass manifestFileList[sendCounter] as it contains the whole record including the filename, filesize & row count
                        processHandle = Process(target=Extractor.extractData, args=(self, manifestFileList[sendCounter],fileID, fileIDQueue, dbFlag))
                        processFlag=1
                        processHandle.start()
                        procs.append(processHandle)
                        sendCounter += 1
                        fileID += 1
                    if processFlag:
                        for p in procs:
                            p.join()
                            procs=[]
                        processFlag=0
                    while not fileIDQueue.empty():  # process completed results as they arrive
                        #time.sleep(3)
                        qFileID, qResult = fileIDQueue.get()
                        if self.debugFlag:
                            print("qFileID = ", qFileID, "qResult = ", qResult)
                        doneCounter += 1
                        if qResult:
                            failureFlag = 1
                    if self.debugFlag:
                        print "ProcessFlag = ", processFlag, "sendCounter = ", sendCounter, "doneCounter = ", doneCounter
                    if failureFlag:
                        break
                        
                if self.debugFlag:
                    print "Failure Flag = ", failureFlag
    
                if failureFlag:
                    pStatus = 'F'
                else:
                    pStatus = 'S'

                # Move all the data files to inbox  from the stg location.  No need for this step, as Joejo mentioned there will be another Tidal job doing this step

                # Move the manifest file to inbox from the stg location

                # insert a record into tb_dxt_dataset_trans table with 'S' or 'F' record

                #Call Oracle fn to insert status 'S' into TB_DDY_DATASET_TRANS with RUNID etc
                #DB_CALL
                # Make database call sp_dxt_insert_dataset_trans and insert data based on Failure or Success

                mySql = ""
                myParams = ""
                tempSql = self.m_configDict["SQL"]["put_dataset"]
            
                myParamsDict = {"datasetName":self.datasetName, "runID": str(self.runID), "tDate":str(self.tradeDate), "status": pStatus }
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
    
                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
    
                if self.debugFlag:
                    print "tempSql = ", tempSql
                    print "myParamsDict = ", myParamsDict
                    print "mySql = ", mySql
                    print "returnStr = ", returnStr
    
                if returnStr[0] != '0':
                    self.m_logger.error("Unable to put dataset info into the database using sql " + mySql + ". Error = " + returnStr[1])
                    sys.exit(1)

                if failureFlag:
                    self.m_logger.error("Extract failed for data files for manifest file " + self.manifestFile)
                    sys.exit(1)

           # End of for loop for finraManifestFiles

        except Exception as e:
            self.m_logger.error("ProcessExtractor failed with error " + str(e))
            sys.exit(1)
Esempio n. 9
0
class Loader():
    #class variables
    m_logger = ""

    #database objects
    m_oracle_db = ""
    m_netezza_db = ""

    def __init__(self, configFile, tradeDate):
        """
        Purpose: Constructor
        :param self:        class object itself
        :param configFile:  Configuration file to use
        """

        # Initialize m_logger object from class Logger and add Header to the log, using addGenericInfo function
        self.m_logger = Logger(logging.INFO, configFile, tradeDate)
        self.m_logger.addFileHandler(logging.DEBUG)
        self.m_logger.addGenericInfo(__file__)
        
        self.tradeDate = tradeDate

        #log.addFileHandler(logging.INFO)

        try:
            # Get configuration to a dictionary
            self.m_configDict = configuration(configFile, True).m_dictionary
            
            #Initialize Oracle instance along with connection
            self.m_oracle_db = Oracle(self.m_configDict, self.m_logger)

        except Exception as exp:
                   # An exception occurred
            self.m_logger.error("Unable to initialize the configuration " + str(exp))
            print("ERROR: Unable to initialize the configuration for logger " + str(exp))
            sys.exit(1)

    def readMktConfigFile(self, mktConfigFile):
        """
        Purpose - To read the content of mktConfigFile into the global dictionary m_mktConfigDict for reference
        :param mktConfigFile:
        :return:
        """
        try:
            self.m_mktConfigDict = configuration(mktConfigFile, True).m_dictionary
            #print("mktConfigFile = ", mktConfigFile, "m_mktConfigDict = ", self.m_mktConfigDict)
            #return m_mktConfigDict
        except Exception as exp:
                   # An exception occurred
            self.m_logger.error("Unable to initialize the configuration for logger " + str(exp))
            print("ERROR: Unable to initialize the configuration for logger " + str(exp))
            sys.exit(1)

    def chkActiveLoads(self):
        """
        Purpose - To check the count of active Active loads happening at a given point
        :param None: None at this point
        :return:
        """
        try:
            if self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_check_flag"] == 'Y': 
                localActiveLoadMax = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max"]
                localActiveLoadWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_wait_time"]
                localActiveLoadMaxWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max_wait_time"]

                mySql = ""
                myParams = ""
                mySql = self.m_configDict["SQL"]["get_active_loads"]
    
                activeFlag=1
                totalActiveWaitTime=0
                while activeFlag:
                    #returnVal=0
                    #returnCode=0
                    #returnVal, returnCode = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams)
                    returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams)
                    if returnStr[0] != 0:
                        self.m_logger.error("Unable to get active loads using sql " + mySql + ". Error = " + returnStr[1])
                        sys.exit(1)

                    #Check if actual active loads is <= configured active loads.  If so, return out of the fn
                    #print "chkActiveLoads - ReturnCode = ", returnCode
                    if returnStr[1] <= localActiveLoadMax:
                        activeFlag=0
                        return 0

                    #Sleep for time defined by configured value for "active_load_wait_time"
                    time.sleep(localActiveLoadWaitTime)
                    totalActiveWaitTime += localActiveLoadWaitTime

                    #Check if actual Total wait time is > configured total wait time.  If so, throw an error and exit
                    if totalActiveWaitTime > localActiveMaxWaitTime:
                        self.m_logger.error("In Fn chkActiveLoads.  Total Actual Wait Time exceeds the configured value active_load_max_wait_time.  Either cleanup orphaned loads or increase the either active_load_max or active_load_max_wait_time. totalActiveWaitTime = " + str(totalActiveWaitTime) + " localActiveMaxWaitTime=" + str(localActiveMaxWaitTime))
                        return 1
            else:
                return 0

            #Return failure
            return 1
                    
        except Exception as exp:
            #self.m_logger.error("Failure in chkActiveLoads process for file " + sourceFileWthPath + " with fileID = " + localFileID + " with the error " + str(exp))
            self.m_logger.error("Failure in chkActiveLoads process for file with the error " + str(exp))
            sys.exit(1)

    def chkRaceStatus(self):
        """
        Purpose - To check if a load is already running for the given dataset
        :param None: None at this point
        :return:
        """
        try:
            if self.m_mktConfigDict["RACE"]["race_status_check_flag"] == 'Y':
                localRaceStatusWaitTime = int(self.m_mktConfigDict["RACE"]["race_status_wait_time"])
                localRaceStatusMaxWaitTime = int(self.m_mktConfigDict["RACE"]["race_status_max_wait_time"])

                mySql = ""
                myParams = {"datasetName":self.datasetName}
                mySql = self.m_configDict["SQL"]["get_race_status"]
    
                raceFlag=1
                totalRaceStatusWaitTime=0
                while raceFlag:
                    #returnVal=0
                    #returnCode=0
                    #returnVal, returnCode = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams)
                    returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams)
                    if returnStr[0] != 0:
                        self.m_logger.error("Unable to get race status using sql " + mySql + ". Error = " + returnStr[1])
                        sys.exit(1)

                    #Check if the load for this dataset is already running. If not, exit out of the function with normal return value
                    #print "0.chkActiveLoads - returnVal", returnVal, " ReturnCode = ", int(returnStr[1].strip())
                    #if int(returnStr[1]) <= 1:
                    if int(returnStr[1].strip()) <= 1:
                        raceFlag=0
                        return 0
                    #else:
                        #print "E.chkActiveLoads - returnVal", returnVal, " ReturnCode = ", int(returnStr[1].strip())

                    #Check if actual Total wait time is > configured total wait time.  If so, throw an error and exit

                    #print "1.chkActiveLoads - returnVal", returnVal, " ReturnCode = ", int(returnStr[1].strip()), "localRaceStatusWaitTime =", localRaceStatusWaitTime

                    #Sleep for time defined by configured value for "active_load_wait_time"
                    time.sleep(localRaceStatusWaitTime)
                    #time.sleep(90)
                    totalRaceStatusWaitTime += localRaceStatusWaitTime
                    #print "totalRaceStatusWaitTime = ", totalRaceStatusWaitTime, "localRaceStatusWaitTime =", localRaceStatusWaitTime
                    if totalRaceStatusWaitTime > localRaceStatusMaxWaitTime:
                        self.m_logger.error("In Fn chkRaceStatusLoads.  Total Actual Wait Time exceeds the configured value race_status_max_wait_time.  Either check if the Dataset is getting loaded or increase the either active_load_max or active_load_max_wait_time. totalRaceStatusWaitTime = " + str(totalRaceStatusWaitTime) + " localRaceStatusMaxWaitTime=" + str(localRaceStatusMaxWaitTime))
                        return 1
            else:
                return 0

            #Return failure
            return 1
                    
        except Exception as exp:
            #self.m_logger.error("Failure in chkRaceStatus process for file " + sourceFileWthPath + " with fileID = " + localFileID + " with the error " + str(exp))
            self.m_logger.error("Failure in chkRaceStatus process for file with the error " + str(exp))
            sys.exit(1)

    def loadData(self,localDataFile, localFileID, localFileIDQueue, localDBFlag):
        """
        Purpose - To load the given datafile to the S3 bucket specified in the global mktConfigFile
        :param localDataFile: Source datafile to be uploaded to S3
        :param localFileID: Internal File ID assigned to the source datafile
        :param localFileIDQueue: Queue in which, results of the operation is stored
        :return:
        """
        try:
            if localDBFlag:
                raceStatusReturnValue=self.chkRaceStatus()
                #print "0.raceStatusReturnValue=", raceStatusReturnValue

                if raceStatusReturnValue:
                    self.m_logger.error("Failure value returned by chkRaceStatus fn.  Return value = " + str(raceStatusReturnValue))
                    localFileIDQueue.put((localFileID, raceStatusReturnValue))
                    return 1

                activeLoadsReturnValue=self.chkActiveLoads()
                if activeLoadsReturnValue:
                    self.m_logger.error("Failure value returned by chkRaceStatus fn.  Return value = " + str(activeLoadsReturnValue))
                    localFileIDQueue.put((localFileID, raceStatusReturnValue))
                    return 1
    
                processID = os.getpid()
                hostName = socket.gethostname()

                #Insert Process status into Oracle db
                #DB_CALL - sp_ddy_insert_process_status(RUNID, FILE_ID, etc)
                mySql = ""
                myParams = ""
                mySql = self.m_configDict["SQL"]["put_process_status"]
                pStatus = 'P'
                pComment = 'Load started'
                myParams = {"datasetName":self.datasetName, "runID":self.runID, "fileID":localFileID, "fileName":localDataFile, "tDate":self.tradeDate, "processID":processID, "hostName":hostName, "status":pStatus , "lcomment":pComment, "tidalRunID":self.tidalRunID}
                #myParams = {"datasetName":self.datasetName, "runID":self.runID, "fileID":localFileID, "fileName":localDataFile, "tDate":self.tradeDate, "processID":processID, "hostName":hostName, "status":pStatus , "lcomment":pComment}

                #print "Insert process Status"
                #print "mySql = ", mySql, "myParams = ", myParams
        
                #returnVal, returnCode = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams)
                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams)
    
                if returnStr[0] != 0:
                    self.m_logger.error("Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1])
                    sys.exit(1)

            #Call s3.gettoken to get the token and establish connection

            sourceFileWthPath = localDataFile
            #print("sourceFileWthPath =", sourceFileWthPath)
            
            #Commented the following lines to move getToken outside parallel thread
            ##print("mktConfigFile=", mktConfigFile)
            #s3object = S3(self.mktConfigFile, self.m_logger)
            #s3object.getToken()
            ##sourceFileWthPath =  s3object.m_configfile["S3"]["source_file"]

            targetFileWthPath = os.path.join(self.s3object.m_configFile["S3"]["path"], os.path.basename(sourceFileWthPath))
            #print("targetFileWthPath =", targetFileWthPath)
            targetBucket = self.s3object.m_configFile["S3"]["bucket"]
            #print("targetBucket =", targetBucket)
            encryptKeyFlag = self.s3object.m_configFile["S3"]["encrypt_key"]
            #print("encryptKeyFlag =", encryptKeyFlag)

            loadReturnValue = 0

            #Call s3.dataUpload to load the data (single part load)

            loadReturnValue = self.s3object.loadDataSinglePart(sourceFileWthPath, targetFileWthPath, targetBucket, encryptKeyFlag )

            if loadReturnValue == 0:
                pStatus = 'S'
                pComment = 'Load completed'
            else:
                pStatus = 'F'
                pComment = 'Load failed'

            if localDBFlag:
                #Call DB to insert 'S' or 'F' in tb_ddy_process_status
                #localFileIDQueue.put((localFileID, loadReturnValue))
                mySql = ""
                myParams = ""
                mySql = self.m_configDict["SQL"]["put_process_status"]
                myParams = {"datasetName":self.datasetName, "runID":self.runID, "fileID":localFileID, "fileName":localDataFile, "tDate":self.tradeDate, "processID":processID, "hostName":hostName, "status":pStatus , "lcomment":pComment, "tidalRunID":self.tidalRunID}
                #myParams = {"datasetName":self.datasetName, "runID":self.runID, "fileID":localFileID, "fileName":localDataFile, "tDate":self.tradeDate, "processID":processID, "hostName":hostName, "status":pStatus , "lcomment":pComment}
    
                #returnVal, returnCode = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams)
                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams)

                if returnStr[0] != 0:
                    self.m_logger.error("Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1])
                    sys.exit(1)

                localFileIDQueue.put((localFileID,loadReturnValue))
            else:
                return loadReturnValue
      
            #if localFileID == 1:
                #localFileIDQueue.put((localFileID, 1))
                ##print("Queue In = ", localFileIDQueue.get())
                ##print(localFileID)
            #else:
                #localFileIDQueue.put((localFileID, 0))
                ##print("Queue In = ", localFileIDQueue.get())
                ##print(localFileID)

        except Exception as exp:
            #self.m_logger.error("Failure in loadData process for file " + sourceFileWthPath + " with fileID = " + localFileID + " with the error " + str(exp))
            self.m_logger.error("Failure in loadData process for file with the error " + str(exp))
            if localDBFlag:
                localFileIDQueue.put(localFileID, 1)
            else:
                return 1
            #sys.exit(1)

    def createFinraManifestFile(self, manifestFile):
        try:
            # Read Manifest file to get info like total rows, total size & other details to populate the done file for FINRA
            with open(manifestFile,"r") as fh:
                self.totalRows = 0
                self.totalSize = 0
                self.fileCount = 0
                self.fileDict = {}
                for data in fh:
                    data.rstrip("\n")
                    mylist = []
                    self.fileCount +=1
                    mylist = data.split("|")
                    self.fileDict[self.fileCount] = [mylist[0],os.path.basename(mylist[1]),int(mylist[2]),int(mylist[3])]
                    self.totalRows += int(mylist[3])
                    self.totalSize += int(mylist[2])
        except Exception as exp:
            self.m_logger.error("Failed while processing readManifest with Error = " + str(exp))
            return 1

        try:
            # Read Default file to get default file structure info
            #defaultsDir = self.m_mktConfigDict["DATASET"]["defaults_dir"]

            #Use self.defautltsFile which is populated from the db later. No need to get it from config file
            self.defaultsFileWthPath = self.m_mktConfigDict["DATASET"]["defaults_dir"] + "/" + self.defaultsFile

            #self.defaultsFileWthPath = self.m_mktConfigDict["DATASET"]["defaults_dir"] + "/" + self.m_mktConfigDict["DATASET"]["defaults_file"]

            with open(self.defaultsFileWthPath,"r") as fh:
                self.defaultsDict = {}
                self.defaultsCount  = 0
                for data in fh:
                    data.rstrip('\n')
                    self.defaultsCount +=1
                    self.defaultsDict[self.defaultsCount]=data

        except Exception as exp:
            self.m_logger.error("Failed while processing defaults file " + self.defaultsFileWthPath + " with Error = " + str(exp))
            return 1

        try:
            self.finraManifestFile =  self.m_mktConfigDict["ENV"]["donefile_dir"] + "/" +  os.path.basename(manifestFile) + ".done"
            with open(self.finraManifestFile,"w") as finraMnFH:
                finraMnFH.write("# AWS RunID : {}\n".format(str(self.runID)))
                #finraMnFH.write("# AWS RunID : {}\n".format(str(self.tidalRunID)))
                finraMnFH.write("# Dataset : {0} , TradeDate : {1}\n".format(str(self.datasetName),str(self.tradeDate)))
                finraMnFH.write("total_compressed={}\n".format(self.totalSize))
                finraMnFH.write("total_rows={}\n".format(self.totalRows))
                #finraMnFH.write("no of files={}\n".format(len(self.fileDict)))
                finraMnFH.write("no of files={}\n".format(self.fileCount))
   
                for key,val in self.fileDict.items():
                    finraMnFH.write("file_{0}={1}\n".format(str(key),val[1]))
                    finraMnFH.write("file_{0}_rows={1}\n".format(str(key),val[3]))
  
                finraMnFH.write("# Data Attributes\n")
                for key,val in self.defaultsDict.items():
                    finraMnFH.write("{0}".format(str(val)))
            return 0
        except Exception as exp:
            self.m_logger.error("Failed while creating AWS Done file " + self.finraManifestFile + " with Error = " + str(exp))
            return 1

    def processLoader(self, configFile, manifestFile, datasetName, tidalRunID):
        """
        Purpose - Function responsible for reading the manifest file, get market name, call multiprocess load and other db calls
        :param configFile: Configuration File
        :param manifestFile: Manifest File
        :param tradeDate: Trade Date
        :return:
        """
        try:
            # Read the manifest filename and get the suffix i.e. datasetname
            # Assumption - Manifest file format - manifest.<datasetName>.<tradeDate>.<tidalRunID>
            # Program will break otherwise
            self.datasetName = datasetName
            self.tidalRunID = tidalRunID

            # DB_CALL
            # Make database call sp_ddy_get_market_info(datasetname) and get market info
            mktName = ''

            #print "self.m_configDict = ", self.m_configDict
            #print "self.m_configDict[mkt] = ", self.m_configDict["SQL"]["get_mkt"]
            #print "datasetName = ", self.datasetName
            #print "ManifestFile = ", manifestFile
            self.manifestFile = self.m_configDict["ENV"]["manifestfile_dir"] + "/" + manifestFile
            #print "ManifestFile with Path = ", self.manifestFile
            ##Validata Manifest file
            if not os.path.isfile(self.manifestFile):
                self.m_logger.error("Invalid manifest file " + self.manifestFile)
                sys.exit(1)

            # Enable this one the proc to get mkt name and default file are ready and test it
            mySql = self.m_configDict["SQL"]["get_mkt_defaults_filename"]
            myParams = {"datasetName":self.datasetName}
            #returnVal, returnCode = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams)
            returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams)
            #print "returnCode = ", returnStr
            if returnStr[0] == 0:
                #print "returnStr[2].strip()", returnStr[2].strip()
                if returnStr[2].strip() != None:
                    mktName = returnStr[2].strip()
                else:
                    self.m_logger.error("Invalid Market Name " + returnStr[2].strip() )
                    sys.exit(1)
                
                if returnStr[3].strip() != None:
                    self.defaultsFile = returnStr[3].strip()
                else:
                    self.m_logger.error("Invalid Defaults File " + returnStr[3].strip() )
                    sys.exit(1)
            else:
                self.m_logger.error("Unable to get market info from the database using sql " + mySql + ". Error = " + returnStr[1])
                sys.exit(1)

            #print "MktName from DB = ", mktName
          
            #mySql = self.m_configDict["SQL"]["get_mkt"]
            ##print "mySql = ", mySql
            #myParams = {"datasetName":self.datasetName}
            ##print "myParams = ", myParams

            #returnVal, returnCode = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams)
            ##print "returnVal = ", returnVal
            ##print "returnCode = ", returnCode

            #if returnVal == 0:
                #mktName = returnCode
            #else:
                #self.m_logger.error("Unable to get market info from the database using sql " + mySql + ". Error = " + returnCode)
                #sys.exit(1)

            #print "MktName from DB = ", mktName

            # Temporarily use file lookup call

            ########
            ### Start of temp call

            #lookupFile = "/Users/rnarayan/apps/ddy/ICE/conf/dataset_lookup.txt"

            #mktName = ''
            #with open(lookupFile, "r") as myFile:
                #for line in myFile:
                    #print(datasetName, line)
                    #if datasetName in line:
                        #mktNameArray = line.partition('=')
                        #mktName = mktNameArray[2].strip()
            #if not mktName:
                #self.m_logger.error("Unable to find market manifest for dataset " )
                #sys.exit("ERROR: Unable to find market manifest for dataset " + mktConfigFile)
                #sys.exit(1)

            #print("Final MktName = ", mktName)
            ### End of temp call
            ########

            #Build the string for mktConfigFile based on mktName and configFile info
            self.mktConfigFile = os.path.dirname(configFile) + '/' + os.path.basename(configFile).split('.',1)[0].strip() + '_' + mktName.lower() + '.' + os.path.basename(configFile).split('.',1)[1].strip()

            #print("mktConfigFile = ", self.mktConfigFile)
            #Validata Manifest file is a valid file
            if not os.path.isfile(self.mktConfigFile):
                #print "Inside invalid mktConfigFile" + self.mktConfigFile
                self.m_logger.error("Invalid market manifest file " + self.mktConfigFile)
                sys.exit(1)

            #May not need the following section, as we send mktConfigFile to other function not the dictionary self.m_mktConfigDict.  Need to remove it after finishing the loadData part fully

            # Read Market specific config file and store it in a specific dictionary
            #m_mktConfigDict=process.readMktConfigFile(mktConfigFile)
            self.readMktConfigFile(self.mktConfigFile)

            #print("m_mktConfigDict=",self.m_mktConfigDict)

            # Read the contents of manifest - dataFileNames into a list - Will validate the datafiles as well

            localManifest = Manifest()
            manifestFileList = localManifest.readManifest(self.manifestFile, self.m_logger)
            #print("manifestFileList = ", manifestFileList)

            # Get RunID
            self.runID = generate_runId()
            #print("RunID = ", self.runID)

            #Call Oracle fn to insert status 'P' into TB_DDY_DATASET_TRANS with RUNID etc
            #DB_CALL
            # Make database call sp_ddy_insert_dataset_trans and insert data that process started

            #print "self.m_configDict = ", self.m_configDict
            #print "self.m_configDict[put_dataset] = ", self.m_configDict["SQL"]["put_dataset"]
            mySql = ""
            myParams = ""
            mySql = self.m_configDict["SQL"]["put_dataset"]
            #print "mySql = ", mySql
            
            pStatus = 'P'
            myParams = {"datasetName":self.datasetName, "runID": self.runID, "tDate":self.tradeDate, "status": pStatus, "tidalRunID":self.tidalRunID}
            #myParams = {"datasetName":self.datasetName, "runID": self.runID, "tDate":self.tradeDate, "status": pStatus}
            #print "myParams = ", myParams

            #returnVal, returnCode = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams)
            returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams)

            if returnStr[0] != 0:
                self.m_logger.error("Unable to put dataset info into the database using sql " + mySql + ". Error = " + returnStr[1])
                sys.exit(1)


            # Insert Manifest data in db and call multiprocessing s3 loader process.  Shd we add RUN_ID to manifest table

            #For each datafile, generate fileID and call loadData fn using multiprocess to load data into AWS
            for dataRecord in manifestFileList:
                mySql = ""
                myParams = ""
                mySql = self.m_configDict["SQL"]["put_manifest"]
                myParams = {"datasetName":self.datasetName, "runID": self.runID, "tDate":self.tradeDate, "dataFileName":dataRecord[1], "manifestFileName":manifestFile , "noOfRecords": dataRecord[2], "fileSize":dataRecord[3], "tidalRunID":self.tidalRunID}
                #myParams = {"datasetName":self.datasetName, "runID": self.runID, "tDate":self.tradeDate, "dataFileName":dataRecord[1], "manifestFileName":manifestFile , "noOfRecords": dataRecord[2], "fileSize":dataRecord[3]}
     
                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams)

                if returnStr[0] != 0:
                    self.m_logger.error("Unable to put manifest info into the database using sql " + mySql + ". Error = " + returnStr[1])
                    sys.exit(1)

            self.s3object = S3(self.mktConfigFile, self.m_logger)
            self.s3object.getToken()

            fileID=1
            fileIDQueue = Queue()
            dbFlag=1
            procs = []
            for dataRecord in manifestFileList:
                processHandle = Process(target=Loader.loadData, args=(self, dataRecord[1],fileID, fileIDQueue, dbFlag))
                processHandle.start()
                procs.append(processHandle)
                fileID += 1
                #time.sleep(5)

            for p in procs:
                p.join()

            #Without sleep the queue is unreliable and do not return the expected values
            #time.sleep(2)

            failureFlag=0
            while not fileIDQueue.empty():
                #print("inside while")
                qFileID, qResult = fileIDQueue.get()
                #print("qFileID = ", qFileID, "qResult = ", qResult)
                if qResult:
                    failureFlag=1

            #print "Failure Flag = ", failureFlag

            if failureFlag:
                pStatus = 'F'
            else:
                pStatus = 'S'
                """
                #Generate FINRA Manifest file and Push it to AWS 
                """

                # Call Divakar's generate done file function
                returnValue = self.createFinraManifestFile(self.manifestFile)
    
                if returnValue:
                    self.m_logger.error("Unable to generate done file. Please fix the issue the re-run the load")
                    #sys.exit(1)

                dbFlag=0
                fileID=0
                # Call the loader function with the manifest file
                finraManifestLoadStatus=0
                finraManifestLoadStatus=self.loadData(self.finraManifestFile ,fileID, fileIDQueue, dbFlag)

                if finraManifestLoadStatus:
                    pStatus = 'F'
                    self.m_logger.error("Unable to load finra manifest file ")
                    # Do we need to exit here or insert a failure
                    #sys.exit(1)
                
            #Call Oracle fn to insert status 'S' into TB_DDY_DATASET_TRANS with RUNID etc
            #DB_CALL
            # Make database call sp_ddy_insert_dataset_trans and insert data based on Failure or Success

            #print "self.m_configDict = ", self.m_configDict
            mySql = ""
            myParams = ""
            mySql = self.m_configDict["SQL"]["put_dataset"]
            #print "mySql = ", mySql
            
            myParams = {"datasetName":self.datasetName, "runID": self.runID, "tDate":self.tradeDate, "status": pStatus, "tidalRunID":self.tidalRunID}
            #myParams = {"datasetName":self.datasetName, "runID": self.runID, "tDate":self.tradeDate, "status": pStatus}
            #print "myParams = ", myParams

            #returnVal, returnCode = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams)
            returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql, myParams)

            if returnStr[0] != 0:
                self.m_logger.error("Unable to put dataset info into the database using sql " + mySql + ". Error = " + returnStr[1])
                sys.exit(1)

        except Exception as e:
            self.m_logger.error("ProcessLoader failed with error " + str(e))
            sys.exit(1)
Esempio n. 10
0
class Process():
    # class variables

    # lock, logger and loader type
    m_lock = ""
    m_logger = ""
    m_loaderType = ""

    # Adama PG object
    m_adamapg = ""
    # Picard PG object
    m_picardpg = ""

    # Process details
    process_name = ""

    def __init__(self, configFile):
        """ Purpose: Constructor

        :param self:        class object itself
        :param configFile:  Configuration file to use

        """
        # Initialize global logger object
        self.m_logger = Logger(logging.INFO, configFile)
        self.m_logger.addFileHandler(logging.DEBUG)
        try:
            # Add generic information
            fname = inspect.getfile(inspect.currentframe())
            fpath = os.path.dirname(os.path.abspath(fname))
            self.m_logger.addGenericInfo(fpath + "/" + fname)

            # export all the values from config into environment
            configObject = configuration(configFile, True)

            # Create Adama replica PG db object
            self.m_adamapg = Postgre(os.environ['adama_pg'], self.m_logger)

            # Create Picard Postgres Datamart object
            self.m_picardpg = Postgre(os.environ['picard_pg'], self.m_logger)

            # Create lock for the process
            self.m_lock = Lock(os.environ['LOCK_FILE'], self.m_logger)

            # loader type
            self.m_loaderType = self.getloaderType()

            # process name
            self.process_name = os.environ['process_name']

            self.m_logger.info("Initializing the process, %s" % self.process_name )

        except Exception, e:
            self.m_logger.error("ERROR: Unable to initialize the process due to: %s" % str(e))
            self.updateProcessStatus("F")
            if self.m_adamapg:
                self.m_adamapg.closeConnection()
            if self.m_picardpg:
                self.m_picardpg.closeConnection()
            if self.m_lock:
                self.m_lock.remove()
            sys.exit("ERROR: Unable to initialize the process due to: %s" % str(e))
Esempio n. 11
0
File: dxt.py Progetto: tnraman/ddy
class Extractor():
    #class variables
    m_logger = ""

    #database objects
    m_oracle_db = ""

    def __init__(self, configFile, mktName, tradeDate, debugFlag):
        """
        Purpose: Constructor
        :param self:        class object itself
        :param configFile:  Configuration file to use
        """

        # Initialize m_logger object from class Logger and add Header to the log, using addGenericInfo function
        self.m_logger = Logger(logging.INFO, configFile, tradeDate)
        self.m_logger.addFileHandler(logging.DEBUG)
        self.m_logger.addGenericInfo(__file__)
        
        self.tradeDate = tradeDate
        self.debugFlag = debugFlag
        self.configFile = configFile
        self.mktName = mktName

        try:
            # Get configuration to a dictionary
            self.m_configDict = configuration(self.configFile, True).m_dictionary
            
            #Initialize Oracle instance along with connection
            self.m_oracle_db = Oracle(self.m_configDict, self.m_logger)

        except Exception as exp:
                   # An exception occurred
            self.m_logger.error("Unable to initialize the configuration " + str(exp))
            print("ERROR: Unable to initialize the configuration for logger " + str(exp))
            sys.exit(1)

    def readMktConfigFile(self, mktConfigFile):
        """
        Purpose - To read the content of mktConfigFile into the global dictionary m_mktConfigDict for reference
        :param mktConfigFile:
        :return:
        """
        try:
            self.m_mktConfigDict = configuration(mktConfigFile, True).m_dictionary
        except Exception as exp:
                   # An exception occurred
            self.m_logger.error("Unable to initialize the configuration for logger " + str(exp))
            print("ERROR: Unable to initialize the configuration for logger " + str(exp))
            sys.exit(1)

    def chkActiveLoads(self):
        """
        Purpose - To check the count of active Active loads happening at a given point
        :param None: None at this point
        :return:
        """
        try:
            if self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_check_flag"] == 'Y': 
                localActiveLoadMax = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max"]
                localActiveLoadWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_wait_time"]
                localActiveLoadMaxWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max_wait_time"]

                mySql = ""
                myParams = ""
                mySql = self.m_configDict["SQL"]["get_active_loads"]
    
                activeFlag=1
                totalActiveWaitTime=0
                while activeFlag:
                    returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                    if self.debugFlag:
                        print "mySql = ", mySql
                        print "returnStr = ", returnStr
                        print "chkActiveLoads - Active Loads value = ", int(returnStr[1].strip())
                    if returnStr[0] != '0':
                        self.m_logger.error("Unable to get active loads using sql " + mySql + ". Error = " + returnStr[1])
                        sys.exit(1)

                    #Check if actual active loads is <= configured active loads.  If so, return out of the fn
                    if int(returnStr[1].strip()) <= localActiveLoadMax:
                        activeFlag=0
                        return 0

                    #Sleep for time defined by configured value for "active_load_wait_time"
                    time.sleep(localActiveLoadWaitTime)
                    totalActiveWaitTime += localActiveLoadWaitTime

                    #Check if actual Total wait time is > configured total wait time.  If so, throw an error and exit
                    if totalActiveWaitTime > localActiveMaxWaitTime:
                        self.m_logger.error("In Fn chkActiveLoads.  Total Actual Wait Time exceeds the configured value active_load_max_wait_time.  Either cleanup orphaned loads or increase the either active_load_max or active_load_max_wait_time. totalActiveWaitTime = " + str(totalActiveWaitTime) + " localActiveMaxWaitTime=" + str(localActiveMaxWaitTime))
                        return 1
            else:
                return 0

            #Return failure
            return 1
                    
        except Exception as exp:
            self.m_logger.error("Failure in chkActiveLoads process for file with the error " + str(exp))
            sys.exit(1)

    def chkRaceStatus(self):
        """
        Purpose - To check if a load is already running for the given dataset
        :param None: None at this point
        :return:
        """
        try:
            if self.m_mktConfigDict["RACE"]["race_status_check_flag"] == 'Y':
                localRaceStatusWaitTime = int(self.m_mktConfigDict["RACE"]["race_status_wait_time"])
                localRaceStatusMaxWaitTime = int(self.m_mktConfigDict["RACE"]["race_status_max_wait_time"])

                mySql = ""

                #myParams = {"datasetName":self.datasetName}
                tempSql = self.m_configDict["SQL"]["get_race_status"]
                myParamsDict = { 'datasetName' : self.datasetName }
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
    
                raceFlag=1
                totalRaceStatusWaitTime=0
                while raceFlag:
                    returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                    if self.debugFlag:
                        print "tempSql = ", tempSql
                        print "myParamsDict = ", myParamsDict
                        print "mySql = ", mySql
                        print "returnStr = ", returnStr
                        print "chkRaceStatus - ReturnCode = ", int(returnStr[1].strip())
                    if returnStr[0] != '0':
                        self.m_logger.error("Unable to get race status using sql " + mySql + ". Error = " + returnStr[1])
                        sys.exit(1)

                    #Check if the load for this dataset is already running. If not, exit out of the function with normal return value
                    if int(returnStr[1].strip()) <= 1:
                        raceFlag=0
                        return 0

                    #Check if actual Total wait time is > configured total wait time.  If so, throw an error and exit


                    #Sleep for time defined by configured value for "active_load_wait_time"
                    time.sleep(localRaceStatusWaitTime)
                    #time.sleep(90)
                    totalRaceStatusWaitTime += localRaceStatusWaitTime
                    if self.debugFlag:
                        print "totalRaceStatusWaitTime = ", totalRaceStatusWaitTime, "localRaceStatusWaitTime =", localRaceStatusWaitTime
                    if totalRaceStatusWaitTime > localRaceStatusMaxWaitTime:
                        self.m_logger.error("In Fn chkRaceStatusLoads.  Total Actual Wait Time exceeds the configured value race_status_max_wait_time.  Either check if the Dataset is getting loaded or increase the either active_load_max or active_load_max_wait_time. totalRaceStatusWaitTime = " + str(totalRaceStatusWaitTime) + " localRaceStatusMaxWaitTime=" + str(localRaceStatusMaxWaitTime))
                        return 1
            else:
                return 0

            #Return failure
            return 1
                    
        except Exception as exp:
            self.m_logger.error("Failure in chkRaceStatus process for file with the error " + str(exp))
            sys.exit(1)

    def extractData(self,localDataRecordDict, localFileID, localFileIDQueue, localDBFlag):
        """
        Purpose - To load the given datafile to the S3 bucket specified in the global mktConfigFile
        :param localDataRecordDict: Datafile related info fetched from FINRA's manifest file including filename, filesize, recordcount
        :param localFileID: Internal File ID assigned to the local datafile
        :param localFileIDQueue: Queue in which, results of the operation is stored
        :param localDBFlag: Flag indicating if database should be used or not
        :return:
        """
        try:
            if self.debugFlag:
                print "Inside extractData function"
                print "localDataRecordDict = ", localDataRecordDict

            if localDBFlag:
                """ Not sure if we need Race Status check for Extract
                raceStatusReturnValue=self.chkRaceStatus()
                if self.debugFlag:
                    print "raceStatusReturnValue=", raceStatusReturnValue

                if raceStatusReturnValue:
                    self.m_logger.error("Failure value returned by chkRaceStatus fn.  Return value = " + str(raceStatusReturnValue))
                    localFileIDQueue.put((localFileID, raceStatusReturnValue))
                    return 1
                """

                """  Need to integrate Active loads with tb_dxt_process_status and tb_dxt_process_status ?
                activeLoadsReturnValue=self.chkActiveLoads()
                if activeLoadsReturnValue:
                    self.m_logger.error("Failure value returned by chkRaceStatus fn.  Return value = " + str(activeLoadsReturnValue))
                    localFileIDQueue.put((localFileID, raceStatusReturnValue))
                    return 1
                """

                processID = os.getpid()
                hostName = socket.gethostname()
                # Need to check the order
                test_var = str(self.m_configDict["dxt"]["DATA_FILE_NAME_STR"])
                localDataFile = localDataRecordDict[self.m_configDict["dxt"]["DATA_FILE_NAME_STR"]]
                localDataFileSize = int(localDataRecordDict[self.m_configDict["dxt"]["DATA_FILE_SIZE_STR"]])
                localDataFileRecordCount = int(localDataRecordDict[self.m_configDict["dxt"]["NO_OF_ROWS_STR"]])

                if self.debugFlag:
                    print "localDataFile = ", localDataFile
                    print "localDataFileSize = ", localDataFileSize
                    print "localDataFileRecordCount = ", localDataFileRecordCount

                #Insert Process status into Oracle db
                #DB_CALL - sp_dxt_insert_process_status(RUNID, FILE_ID, etc)
                mySql = ""
                myParams = ""
                tempSql = self.m_configDict["SQL"]["put_process_status"]
                pStatus = 'P'
                pComment = 'Load started'
                #myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileID":str(localFileID), "fileName":localDataFile, "tDate":str(self.tradeDate), "processID":str(processID), "hostName":hostName, "fileSize": localDataFileSize, "recordCount" : localDataFileRecordCount, "status":pStatus , "lcomment":pComment}
                myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileName":localDataFile, "tDate":str(self.tradeDate), "processID":str(processID), "hostName":hostName, "fileSize": str(localDataFileSize), "recordCount" : str(localDataFileRecordCount), "status":pStatus , "lcomment":pComment}
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)

                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

                if self.debugFlag:
                    print "tempSql = ", tempSql
                    print "myParamsDict = ", myParamsDict
                    print "mySql = ", mySql
                    print "returnStr = ", returnStr
    
                if returnStr[0] != '0':
                    self.m_logger.error("Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1])
                    sys.exit(1)

            # Get the dataFileName file to be extracted from AWS
            dataFileName = localDataRecordDict[self.m_configDict["dxt"]["DATA_FILE_NAME_STR"]]
            #Here localFileWthPath is the local stage dir with file name
            localFileWthPath = self.m_configDict["ENV"]["stage_dir"] + "/" + dataFileName
            
            #Here targetFileWthPath is the AWS dir with file name
            targetFileWthPath = os.path.join(self.s3object.m_configFile["S3"]["path"], os.path.basename(dataFileName))
            targetBucket = self.s3object.m_configFile["S3"]["bucket"]
            encryptKeyFlag = self.s3object.m_configFile["S3"]["encrypt_key"]

            localAWSRetries = int(self.m_mktConfigDict["ENV"]["aws_retries"])
            if self.debugFlag:
                print("localFileWthPath =", localFileWthPath)
                print("targetFileWthPath =", targetFileWthPath)
                print("targetBucket =", targetBucket)
                print("encryptKeyFlag =", encryptKeyFlag)
                print("localAWSRetries =", localAWSRetries)
            initCount = 0
            while (initCount < localAWSRetries):
                extractReturnValue = 0

                #Call s3.data download to extract the manifest file (single part load)
                #extractReturnValue = self.s3object.getDataSinglePart(localFileWthPath, targetFileWthPath, targetBucket, encryptKeyFlag )
                extractReturnValue = self.s3object.getDataSinglePart(localFileWthPath, targetFileWthPath, targetBucket)

                if self.debugFlag:
                    print "extractReturnValue = ", extractReturnValue

                if int(extractReturnValue) == 0:
                    pStatus = 'S'
                    pComment = 'Load completed'
                    break
                else:
                    pStatus = 'F'
                    pComment = 'Load failed'
                    initCount += 1


            # Get the size of the file downloaded 
            localFileSize = os.stat(localFileWthPath).st_size

            # Check if the downloaded file size is matching with what is mentioned in manifest file.  If not mark it as failed
            if localFileSize != localDataFileSize:
                pStatus = 'F'
                pComment = 'Actual file size != Manifest file size'


            localRecordCount = 0
 
            if localDBFlag:
                #Call DB to insert 'S' or 'F' in tb_dxt_process_status
                #localFileIDQueue.put((localFileID, extractReturnValue))
                mySql = ""
                myParams = ""
                tempSql = self.m_configDict["SQL"]["put_process_status"]
                #myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileID":str(localFileID), "fileName":localDataFile, "tDate":str(self.tradeDate), "processID":str(processID), "hostName":hostName, "fileSize": localFileSize, "recordCount" : localRecordCount, "status":pStatus , "lcomment":pComment}
                myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileName":localDataFile, "tDate":str(self.tradeDate), "processID":str(processID), "hostName":hostName, "fileSize": str(localFileSize), "recordCount" : str(localRecordCount), "status":pStatus , "lcomment":pComment}
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                if self.debugFlag:
                    print "tempSql = ", tempSql
                    print "myParamsDict = ", myParamsDict
                    print "mySql = ", mySql
                    print "returnStr = ", returnStr

                if returnStr[0] != '0':
                    self.m_logger.error("Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1])
                    sys.exit(1)

                localFileIDQueue.put((localFileID,extractReturnValue))
            else:
                return extractReturnValue
      
        except Exception as exp:
            self.m_logger.error("Failure in extractData process for file with the error " + str(exp))
            if localDBFlag:
                localFileIDQueue.put((localFileID, 1))
            else:
                return 1

    def getRecords(self, fileDict, startDateTime, endDateTime):
        """
        Purpose - Function to sort the dictionary based on the key and return a sorted list
        :param fileDict : Dictionary containing Last_modified Date and file name
        :param startDateTime : Start DateTime in the format '2016-02-01 00:00:00'
        :param endDateTime : End DateTime in the format '2016-02-10 00:00:00'
        """
        try:
            patternToSearch =  self.m_configDict["ENV"]["pattern_to_search"]
            
            if self.debugFlag:
                print "fileDict = ", fileDict
                print "patternToSearch = ", patternToSearch
                print "startDateTime = ", startDateTime
                print "endDateTime = ", endDateTime
            sorted_values = sorted(fileDict.values())
            start = bisect.bisect_left(sorted_values, startDateTime)
            end = bisect.bisect_right(sorted_values, endDateTime)
            if self.debugFlag:
                print "start = ", start
                print "end = ", end
            for fileItem in sorted(fileDict.iteritems())[start:end]:
                if patternToSearch in fileItem[0]:
                    if self.debugFlag:
                        print "fileItem[0] = ", fileItem[0]
                    yield fileItem[0]

        except Exception as exp:
            self.m_logger.error("Failed while executing getRecords to sort the dictionary content of dictionary  with Error = " + str(exp))
            sys.exit(1)

    def readManifestFile(self, manifestFileName):
        """
        Purpose - To read the content of Finra's manifest file stored in key-value pair into Nested dictionary 
        :param manifestFileName : Finra's manifestFileName containing data filenames, file size & no of rows
        """
        try:
            manifestRecordStartPattern = self.m_configDict["dxt"]["MANIFEST_RECORD_START_PATTERN"]

            if self.debugFlag:
                print "manifestRecordStartPattern =", manifestRecordStartPattern

            with open(manifestFileName) as infile:
                manifestFileDict = {}
                file = 0
                line_count = 0
                for line in infile:
                    line = line.strip()
                    if line.startswith(manifestRecordStartPattern):
                        file = line_count
                        line_count += 1
                        manifestFileDict[file] = {}
                    var, val = line.split('=',1)
                    if self.debugFlag:
                        print "var = ", var, "val = ", val
                    manifestFileDict[file][var.strip()] = val.strip()

            if self.debugFlag:
                print "====================================="
                print "manifestFileDict = ", manifestFileDict
                print "====================================="

            return manifestFileDict

            #for key, values in manifest.items():
                #if key == 1:
                    #for k,v in values.items():
                        #print k, v

        except Exception as exp:
            self.m_logger.error("Failed while executing readManifestFile to get FINRA manifest file into nested dictionary, Error = " + str(exp))
            sys.exit(1)


    def getManifestFileList(self, startDateTime, endDateTime, s3Bucket, s3Path, folderPosition):
        """
        Purpose - Function to sort the dictionary based on the key and return a sorted list
        :param startDateTime : Start DateTime in the format '2016-02-01 00:00:00'
        :param endDateTime : End DateTime in the format '2016-02-10 00:00:00'
        """
        try:
            if self.debugFlag:
                print "s3Bucket = ", s3Bucket
                print "s3Path = ", s3Path
                print "startDateTime = ", startDateTime
                print "endDateTime = ", endDateTime
                print "folderPosition = ", folderPosition

            fileListDict = self.s3object.listBucketWPathByLastModified(s3Bucket, s3Path, folderPosition)
            if self.debugFlag:
                print "fileListDict = ", fileListDict

           
            #endDateTime = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            manifestFileList = list(self.getRecords(fileListDict, startDateTime, endDateTime))

            if self.debugFlag:
                print "fileListDict = ", fileListDict
                print "manifestFileList = ", manifestFileList

            return manifestFileList
        except Exception as exp:
            self.m_logger.error("Failed while creating AWS manifest file list with Error = " + str(exp))
            return 1

    def processExtractor(self):
        """
        Purpose - Function responsible for getting the AWS token and reading the last modified date in DB and fetch the list of files from AWS to be processed
        :param : None
        :return:
        """
        try:
            # DB_CALL
            # Make database call sp_dxt_validate_mktName(mktName) to validate mktName

            tempSql = self.m_configDict["SQL"]["validate_market_name"]
            myParamsDict = { 'mktName' : self.mktName }
            tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
            mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
            returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

            if self.debugFlag:
                print "tempSql = ", tempSql
                print "myParamsDict = ", myParamsDict
                print "mySql = ", mySql
                print "returnStr = ", returnStr

            if returnStr[0] != '0':
                self.m_logger.error("Invalid market name provided " + mySql + ". Error = " + self.mktName)
                sys.exit(1)

            if self.debugFlag:
                print "MktName from DB = ", self.mktName
          
            #Build the string for mktConfigFile based on mktName and configFile info
            self.mktConfigFile = os.path.dirname(self.configFile) + '/' + os.path.basename(self.configFile).split('.',1)[0].strip() + '_' + self.mktName.lower() + '.' + os.path.basename(self.configFile).split('.',1)[1].strip()

            if self.debugFlag:
                print("mktConfigFile = ", self.mktConfigFile)

            #Validate Market Config file is a valid file
            if not os.path.isfile(self.mktConfigFile):
                self.m_logger.error("Invalid market manifest file " + self.mktConfigFile)
                sys.exit(1)

            # Read Market specific config file and store it in a specific dictionary
            self.readMktConfigFile(self.mktConfigFile)

            if self.debugFlag:
                print("m_mktConfigDict=",self.m_mktConfigDict)

            # Read the table for the given market and fetch the last modified timestamp for the given manifest file
            tempSql = self.m_configDict["SQL"]["get_last_modified"]
            myParamsDict = { 'mktName' : self.mktName.upper() }
            tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
            mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
            returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

            if self.debugFlag:
                print "tempSql = ", tempSql
                print "myParamsDict = ", myParamsDict
                print "mySql = ", mySql
                print "returnStr = ", returnStr

            if returnStr[0] == '0':
                if returnStr[1]:
                    lastModifiedDate=returnStr[1] 
                else:
                    lastModifiedDate="2015-01-01 00:00:00"
            else:
                self.m_logger.error("Unable to get last_modified date using the sql " + mySql + ". Error = " + self.mktName)
                sys.exit(1)
            if self.debugFlag:
                print("lastModifiedDate=",lastModifiedDate)

            #Temp call.  Need to enable the previous lines to use DB call.  Comment them bfr production

            if self.mktName == 'nyse_mkt': 
                lastModifiedDate="2016-06-03 15:00:00"
            else:
                lastModifiedDate="2016-06-01 00:00:00"

            #print "Ram - Last Modified Date = ", lastModifiedDate, "mktName = ", self.mktName

            # Get RunID
            self.runID = generate_runId()
            if self.debugFlag:
                print("RunID = ", self.runID)

            # Initialize S3 object and get FINRA cloud service token and establish s3 session
            self.s3object = S3(self.mktConfigFile, self.m_logger, self.debugFlag)

            tokenRetryTimes = int(self.m_configDict["TOKEN"]["token_retry_times"])
            tokenRetryWaitTime = int(self.m_configDict["TOKEN"]["token_retry_wait_time"])

            initCount = 0
            while (initCount < tokenRetryTimes):
                tokenReturnCode = self.s3object.getToken()
                if tokenReturnCode:
                    if initCount == tokenRetryTimes:
                        self.m_logger.error("Error: Exceeded the max retries " + tokenRetryTimes + " to get AWS Token from FINRA.  Please re-try after some time or escalate.. ")
                        sys.exit(1)
                    initCount += 1
                    time.sleep(tokenRetryWaitTime)
                else:
                    break

            self.currentEpochTime = int(time.time())

            # Get list of Manifest files to be processed

            #currentDate = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            #currentDate = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            currentDate = (datetime.now() + timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S")

            folderPosition =  int(self.s3object.m_configFile["S3"]["folder_position"])
            targetBucket = self.s3object.m_configFile["S3"]["bucket"]
            targetFolder = self.s3object.m_configFile["S3"]["path"]
            targetFilePath = targetFolder 

            finraManifestFileList = self.getManifestFileList(lastModifiedDate, currentDate, targetBucket, targetFilePath, folderPosition)
            
            if self.debugFlag:
                print("finraManifestFileList = ", finraManifestFileList)

            # Download manifest files in the manifest file list to a specific folder from AWS
            localFileDir = self.s3object.m_configFile["ENV"]["stage_dir"]

            #targetFileWthPath = os.path.join(self.s3object.m_configFile["S3"]["path"], os.path.basename(localFileWthPath))
            encryptKeyFlag = self.s3object.m_configFile["S3"]["encrypt_key"]

            # Get an instance of the Manifest class
            fileIDQueue = Queue()
            localAWSRetries = int(self.m_mktConfigDict["ENV"]["aws_retries"])

            for finraManifestFile in finraManifestFileList:
                # Following 2 lines temporarily written to avoid bad manifest files.  Please remove them before go-live
                if finraManifestFile == 'manifest.TSP_A_20160425.txt':
                    continue
                if finraManifestFile == 'manifest.TSP_P_20160425.txt':
                    continue
                
                targetFileWthPath = targetFolder + finraManifestFile
                localFileWthPath = localFileDir + "/" + finraManifestFile
                if self.debugFlag:
                    print "targetFileWthPath = ", targetFileWthPath
                    print "localFileWthPath = ", localFileWthPath
                    print "finraManifestFile = ", finraManifestFile
                
                initCount = 0
                while (initCount < localAWSRetries):
                    extractReturnValue = 0

                    #Call s3.data download to extract the manifest file (single part load)
                    #extractReturnValue = self.s3object.getDataSinglePart(localFileWthPath, targetFileWthPath, targetBucket, encryptKeyFlag )
                    extractReturnValue = self.s3object.getDataSinglePart(localFileWthPath, targetFileWthPath, targetBucket)

                    if self.debugFlag:
                        print "extractReturnValue = ", extractReturnValue

                    if extractReturnValue:
                        # Try it again
                        initCount += 1
                    else:
                        # Come out of the loop
                        break
                # End of while loop for AWS Retries
                  
                if extractReturnValue:
                    self.m_logger.error("Unable to fetch manifestFile = " + finraManifestFile + "from the path = " + targetFileWthPath + " to the local filesystem = " + localFileWthPath )
                    sys.exit(1)

                    """ Not needed
                    if extractReturnValue == 0:
                        pStatus = 'P'
                        pComment = 'Load completed'
                        break
                    else:
                        pStatus = 'F'
                        pComment = 'Load failed'
                    """
                    initCount += 1

                # get datasetname from the manifest file.  Need check based on FINRA naming

                # Original requirement
                #self.datasetName = os.path.basename(finraManifestFile).split('.',3)[1].strip().upper()

                # Customized for FINRA's latest file
                self.datasetName = os.path.basename(finraManifestFile).split('.')[1].split('_')[1].strip().upper()
                if self.debugFlag:
                    print "datasetName = ", self.datasetName

                # Need to check DB call, once it is ready

                # Validate the manifest file name to make sure that we are expecting it
                tempSql = self.m_configDict["SQL"]["validate_dataset_name"]
                myParamsDict = { 'datasetName' : self.datasetName }
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

                if self.debugFlag:
                    print "tempSql = ", tempSql
                    print "myParamsDict = ", myParamsDict
                    print "mySql = ", mySql
                    print "returnStr = ", returnStr
    
                #Check if dataset is there in the tb_dxt_dataset_master, if not, skip it and move to the next file.  For other errors, exit out of the program
                if int(returnStr[0]) < 0:
                    self.m_logger.error("Unable to validate datasetName " + mySql + ". Error = " + self.datasetName)
                    sys.exit(1)
                elif int(returnStr[0]) > 0:
                    self.m_logger.info("Give Dataset is not in the list to process.  Skipping it" + mySql + ". Dataset Name = " + self.datasetName)
                    # Continue to the next file entry in the manifest list
                    continue
                    
                # Insert a record into tb_dxt_dataset_trans with status 'P' for the given datasetName, saying that we start the process for this manifest file

                pStatus = 'P'
                tempSql = self.m_configDict["SQL"]["put_dataset"]
                myParamsDict = {'datasetName':self.datasetName, 'runID': str(self.runID), 'tDate':str(self.tradeDate), 'status': pStatus }
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

                if self.debugFlag:
                    print "tempSql = ", tempSql
                    print "myParamsDict = ", myParamsDict
                    print "mySql = ", mySql
                    print "returnStr = ", returnStr
    
                if returnStr[0] != '0':
                    self.m_logger.error("Unable to insert into tb_dxt_dataset_trans table " + mySql + ". Error = " + self.datasetName)
                    sys.exit(1)

                # Read the contents of manifestfile i.e. dataFileNames into a list - Will validate the datafiles as well
                manifestDelim = self.m_configDict["ENV"]["manifest_delim"]
                if self.debugFlag:
                    print "localFileWthPath = ", localFileWthPath

                #Need to change the following line to read a nested dictionary from a keyValuePair
                manifestFileDict = self.readManifestFile(localFileWthPath)
                if self.debugFlag:
                    print "manifestDelim = ", manifestDelim
                    print "manifestFileDict = ", manifestFileDict

                process_count = int(self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max"])

                # Now go into multiprocessing and call extractData function and extract files ones by one 
                fileID=1
                dbFlag=1
                fileIDQueue = Queue()
                procs = []
                doneCounter = 0
                sendCounter = 0
                failureFlag = 0
                finraManifestFileCounter=0

                while doneCounter < len(manifestFileDict):
                    while sendCounter < len(manifestFileDict) and sendCounter - doneCounter < process_count:
                        if self.debugFlag:
                            print "manifestFileDict[self.m_configDict[dxt][DATA_FILE_NAME_STR]] = ", manifestFileDict[sendCounter]['Datafilename']
                        # Call fn extractData to fetch files from AWS.  Pass manifestFileDict[sendCounter] as it contains the whole record including the filename, filesize & row count
                        processHandle = Process(target=Extractor.extractData, args=(self, manifestFileDict[sendCounter],fileID, fileIDQueue, dbFlag))
                        processFlag=1
    
                        s3TimeoutTime = int(self.m_configDict["dxt"]["S3_TIMEOUT_TIME"])
                        if ((int(time.time()) - self.currentEpochTime) > s3TimeoutTime):
                            self.currentEpochTime = int(time.time())
                            self.m_logger.info("Getting New Token for Batch : {0}, Max batches : {1}".format(batch_count,max_batches))
                            if self.debugFlag:
                                print 'Memory usage: %s (kb)' % resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
                                print "self.currentEpochTime = ", self.currentEpochTime
                                print "Current Time in Epoch = ", int(time.time())
                            if self.debugFlag:
                                print "Inside get new token - self.currentEpochTime = ", self.currentEpochTime
                            initCount = 0
                            while (initCount < tokenRetryTimes):
                                tokenReturnCode = 0
                                tokenReturnCode = self.s3object.getToken()
                                if tokenReturnCode:
                                    if initCount == tokenRetryTimes:
                                        self.m_logger.error("Error: Exceed the max retries " + tokenRetryTimes + " to get AWS Token from FINRA.  Please re-try after some time or escalate.. ")
                                        sys.exit(1)
                                    initCount += 1
                                    time.sleep(tokenRetryWaitTime)
                                else:
                                    break

                        threadDelayTime = int(self.m_configDict["dxt"]["THREAD_DELAY_TIME"])
                        time.sleep(threadDelayTime)

                        processHandle.start()
                        procs.append(processHandle)
                        sendCounter += 1
                        fileID += 1
                    if processFlag:
                        for p in procs:
                            p.join()
                            procs=[]
                        processFlag=0
                    while not fileIDQueue.empty():  # process completed results as they arrive
                        #time.sleep(3)
                        qFileID, qResult = fileIDQueue.get()
                        if self.debugFlag:
                            print("qFileID = ", qFileID, "qResult = ", qResult)
                        doneCounter += 1
                        if qResult:
                            failureFlag = 1
                    if self.debugFlag:
                        print "ProcessFlag = ", processFlag, "sendCounter = ", sendCounter, "doneCounter = ", doneCounter
                    if failureFlag:
                        break
                        
                if self.debugFlag:
                    print "Failure Flag = ", failureFlag
    
                if failureFlag:
                    pStatus = 'F'
                else:
                    pStatus = 'S'

                    tblName = self.m_mktConfigDict["dxt"]["TARGET_TBL_NAME"] + "_" + self.mktName.upper()
                    manifestDate = os.path.basename(finraManifestFile).split('.',3)[1][6:12]
                    fatlManifestFile = self.m_configDict["ENV"]["stage_dir"] + "/" + tblName + "." + manifestDate + ".manifest"
                    with open(fatlManifestFile,"w") as fh:
                        counter = 0
                        for dictRecord in manifestFileDict:
                            dataFile = manifestFileDict[dictRecord][self.m_configDict["dxt"]["DATA_FILE_NAME_STR"]]
                            sourceFileWthPath = self.m_configDict["ENV"]["stage_dir"] + "/" + dataFile
                            dataFileSize = int(manifestFileDict[dictRecord][self.m_configDict["dxt"]["DATA_FILE_SIZE_STR"]])
                            dataFileRecordCount = int(manifestFileDict[dictRecord][self.m_configDict["dxt"]["NO_OF_ROWS_STR"]])
                            #fileSize = os.stat(sourceFileWthPath).st_size
                            if self.debugFlag:
                                print "dataFile = ", dataFile
                                print "dataFileSize = ", dataFileSize
                                print "dataFileRecordCount = ", dataFileRecordCount
                                print "sourceFileWthPath = ", sourceFileWthPath
                                print "tblName = ", tblName, "dataFile = ", dataFile, "dataFileSize = ", dataFileSize, "mktName = ", self.mktName
                                
                            fh.write(tblName + "|" + str(dataFile) + "|" + str(dataFileSize) + "|" + str(dataFileRecordCount) + "|" + "0" + "\n")
                            counter += 1

                # Move all the data files to inbox  from the stg location.  No need for this step, as Joejo mentioned there will be another Tidal job doing this step

                # Move the manifest file to inbox from the stg location

                # insert a record into tb_dxt_dataset_trans table with 'S' or 'F' record

                #Call Oracle fn to insert status 'S' into TB_DDY_DATASET_TRANS with RUNID etc
                #DB_CALL
                # Make database call sp_dxt_insert_dataset_trans and insert data based on Failure or Success

                mySql = ""
                myParams = ""
                tempSql = self.m_configDict["SQL"]["put_dataset"]
            
                myParamsDict = {"datasetName":self.datasetName, "runID": str(self.runID), "tDate":str(self.tradeDate), "status": pStatus }
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
    
                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
    
                if self.debugFlag:
                    print "tempSql = ", tempSql
                    print "myParamsDict = ", myParamsDict
                    print "mySql = ", mySql
                    print "returnStr = ", returnStr
    
                if returnStr[0] != '0':
                    self.m_logger.error("Unable to put dataset info into the database using sql " + mySql + ". Error = " + returnStr[1])
                    sys.exit(1)

                if failureFlag:
                    self.m_logger.error("Extract failed for data files for manifest file " + finraManifestFile)
                    sys.exit(1)

                finraManifestFileCounter += 1

           # End of for loop for finraManifestFiles

        except Exception as e:
            self.m_logger.error("ProcessExtractor failed with error " + str(e))
            sys.exit(1)
Esempio n. 12
0
def main(configFile, logLevel, tDate):
    log = Logger(logLevel,configFile, tDate)
    log.addFileHandler(logging.DEBUG)
    log.addGenericInfo(__file__)

    try:
        m_configDict = configuration(configFile, True).m_dictionary
        #print "m_configDict = ", m_configDict
        myOracle = Oracle(m_configDict, log)

        #mySql = "select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.F_DDY_INSERT_DATASET_TRANS(:datasetID, :runID, :tDate, :status))"
        #pDatasetID = 2
        #pRunID = 234234234
        # 20151216144156584829
        #pTDate = 20151215
        #pStatus = 'P'
        #myParams = {"datasetID": pDatasetID, "runID": pRunID, "tDate": pTDate, "status": pStatus}

        #select RETURN_CODE||'|'||RETURN_MSG from table(PKG_RFCM_DDY.F_DDY_INSERT_PROCESS_STATUS('DLE_INFO', 20151216144156584829, 1, 'opb1.dat.bz2', 20151215,  111, 'test_hostname', 'P', 'Process Started'))
        #select RETURN_CODE||'|'||RETURN_MSG from table(PKG_RFCM_DDY.F_DDY_INSERT_PROCESS_STATUS('DLE_INFO', 20151216144156584829, 1, 'opb1.dat.bz2', 20151215,  111, 'test_hostname', 'P', 'Process Started'));
        mySql = "select RETURN_CODE||'|'||RETURN_MSG from table(PKG_RFCM_DDY.F_DDY_INSERT_PROCESS_STATUS(:datasetName, :runID, :fileID, :fileName, :tDate, :processID, :hostName, :lstatus , :lcomment))"
        pDatasetName = "DLE_INFO"
        pRunID = 20151216144156584
        pTDate = 20151210
        pStatus = 'P'
        pFileID = 1
        pFileName = 'test.dat.gz'
        pComment = 'Process Started'
        pProcessID = 1234
        pHostName = 'Test_Host'
        
        myParams = {"datasetName": pDatasetName, "runID": pRunID, "fileID": pFileID, "fileName": pFileName, "tDate": pTDate, "processID": pProcessID, "hostName": pHostName, "lstatus": pStatus, "lcomment": pComment}

        #returnStr = self.__cursor.execute("select * from table(PKG_RFCM_DDY.f_ddy_insert_manifest_trans(1, 20151215, 'opb.test1.bz2', 'manifest.opb', 23423, 2342334))")
        #mySql = "select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_insert_manifest_trans(:datasetID, :tDate, :dataFileName, :manifestFileName, :noOfRecords, :fileSize))"
        #pDatasetID = 3
        #pTDate = 20151210
        #pDataFileName = 'opb.test1.bz2'
        #pManifestFileName = 'manifest.opb'
        #pNoOfRecords = 23423
        #pFileSize = 2342334
        #myParams = {"datasetID": pDatasetID, "tDate": pTDate, "dataFileName": pDataFileName, "manifestFileName":pManifestFileName, "noOfRecords":pNoOfRecords, "fileSize":pFileSize}

        #mySql = "select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_get_process_status(20151216144156584829,1));"
        #mySql = "select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_get_process_status(:runID,:fileID))"
        #pRunID = 20151216144156584829
        #pFileID = 1
        #myParams = {"runID":pRunID, "fileID":pFileID}

        #mySql = "select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_get_dataset_status(234234234));"
        #mySql = "select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_get_dataset_status(:runID))"

        #pRunID = 234234234

        #myParams = {"runID":pRunID}

        #mySql = "select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_get_active_loads());"
        #mySql = "select RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_get_active_loads())" 

        #myParams = {}

        #mySql = select RETURN_CODE||'|'|| RETURN_MSG from  table(PKG_RFCM_DDY.f_ddy_get_market_info(p_dataset_name => 'ADW_EVENT_LSH_RAW'));
        #mySql = "select RETURN_CODE||'|'|| RETURN_MSG from  table(PKG_RFCM_DDY.f_ddy_get_market_info(:datasetName))"
        #mySql = "select RETURN_CODE, RETURN_MSG from  table(PKG_RFCM_DDY.f_ddy_get_market_info(:datasetName))"

        #pDatasetName = 'ADW_EVENT_LSH_RAW'
        #pDatasetName = 'DLE_INFO'

        myParams = {"datasetName":pDatasetName}

        #mySql = select  RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_get_dataset_race('ADW_AMEX_OPT_RAW'));
        #mySql = "select  RETURN_CODE||'|'|| RETURN_MSG from table(PKG_RFCM_DDY.f_ddy_get_dataset_race(:datasetName))"

        #pDatasetName = "ADW_AMEX_OPT_RAW"

        #myParams = {"datasetName":pDatasetName}

        #mySql = "select RETURN_CODE, RETURN_MSG, P_MARKET_IND, P_DEFAULTS_FILENAME  from  table(pkg_rfcm_ddy.f_ddy_get_makt_info_dflt_fname(:datasetName))"
        mySql = "select RETURN_CODE, RETURN_MSG, P_MARKET_IND, P_DEFAULTS_FILENAME  from  table(pkg_rfcm_ddy.f_ddy_get_makt_info_dflt_fname('datasetName'))"
        pDatasetName = 'DLE_INFO'
        new_mySql = re.sub('datasetName', pDatasetName, mySql.rstrip())
        returnStr = myOracle.runSqlWthParamsGetOneRow(new_mySql)
        print "Return Value = ", returnStr[0], " Return Code = ", returnStr[1], " Mkt = ", returnStr[2], "Defaults = ", returnStr[3]



        mySql = "select * from  table(PKG_RFCM_DDY.f_ddy_internal_recon('tradeDate')) order by 4"
        tDate = '20160212'
        new_mySql = re.sub('tradeDate', tDate, mySql.rstrip())
        print "mySql = ", mySql, "new_mySql = ", new_mySql
        returnStrs = myOracle.runSqlWthParamsGetMultipleRows(new_mySql)
        print "Return Strs = ", returnStrs
        #for returnStr in returnStrs:
            #print "Return Value = ", returnStr[0], " Return Code = ", returnStr[1]


        #print "mySql = ", mySql, "myParams = ", myParams

########### Multiprocessing test code
        """
        fileID=1
        fileIDQueue = Queue()
        for x in range(5):
            processHandle = Process(target=myOracle.worker, args=( mySql, myParams, fileID, fileIDQueue))
            processHandle.start()
            fileID += 1
        
        processHandle.join()
        #Without sleep the queue is unreliable and do not return the expected values
        time.sleep(2)
        
        failureFlag=0
        while not fileIDQueue.empty():
            qFileID, qResult = fileIDQueue.get()
            print("qFileID = ", qFileID, "qResult = ", qResult)
            if qResult:
                failureFlag=1

        print "FailureFlag = ", failureFlag
        """
########## End

    except Exception as e:
        print "Failed on main", str(e)
        exit(1)
Esempio n. 13
0
class Loader():
    #class variables
    m_logger = ""

    #database objects
    m_oracle_db = ""
    m_netezza_db = ""

    def __init__(self, configFile, tradeDate, debugFlag, datasetName):
        """
        Purpose: Constructor
        :param self:        class object itself
        :param configFile:  Configuration file to use
        """

        # Initialize m_logger object from class Logger and add Header to the log, using addGenericInfo function
        self.m_logger = Logger(logging.INFO, configFile, tradeDate, datasetName.lower())
        self.m_logger.addFileHandler(logging.DEBUG)
        self.m_logger.addGenericInfo(__file__)
        
        self.tradeDate = tradeDate
        self.debugFlag = debugFlag
        self.configFile = configFile

        try:
            # Get configuration to a dictionary
            self.m_configDict = configuration(self.configFile, True).m_dictionary
            
            #Initialize Oracle instance along with connection
            self.m_oracle_db = Oracle(self.m_configDict, self.m_logger)

        except Exception as exp:
                   # An exception occurred
            self.m_logger.error("Unable to initialize the configuration " + str(exp))
            print("ERROR: Unable to initialize the configuration for logger " + str(exp))
            sys.exit(1)

    def readMktConfigFile(self, mktConfigFile):
        """
        Purpose - To read the content of mktConfigFile into the global dictionary m_mktConfigDict for reference
        :param mktConfigFile:
        :return:
        """
        try:
            self.m_mktConfigDict = configuration(mktConfigFile, True).m_dictionary
        except Exception as exp:
                   # An exception occurred
            self.m_logger.error("Unable to initialize the configuration for logger " + str(exp))
            print("ERROR: Unable to initialize the configuration for logger " + str(exp))
            sys.exit(1)

    def chkActiveLoads(self):
        """
        Purpose - To check the count of active Active loads happening at a given point
        :param None: None at this point
        :return:
        """
        try:
            if self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_check_flag"] == 'Y': 
                localActiveLoadMax = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max"]
                localActiveLoadWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_wait_time"]
                localActiveLoadMaxWaitTime = self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max_wait_time"]

                mySql = ""
                myParams = ""
                mySql = self.m_configDict["SQL"]["get_active_loads"]
    
                activeFlag=1
                totalActiveWaitTime=0
                while activeFlag:
                    returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                    if self.debugFlag:
                        print "mySql = ", mySql
                        print "returnStr = ", returnStr
                        print "chkActiveLoads - Active Loads value = ", int(returnStr[1].strip())
                    if returnStr[0] != '0':
                        self.m_logger.info("Retry after delay., Unable to get active loads using sql " + mySql + ". Error = " + returnStr[1])
                        local_delay_time = int(self.m_configDict["SQL"]["delay_time"])
                        time.sleep(local_delay_time)
                        returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                        if returnStr[0] != '0':
                            self.m_logger.error("Unable to get active loads using sql " + mySql + ". Error = " + returnStr[1])
                            return 1

                    #Check if actual active loads is <= configured active loads.  If so, return out of the fn
                    if int(returnStr[1].strip()) <= localActiveLoadMax:
                        activeFlag=0
                        return 0

                    #Sleep for time defined by configured value for "active_load_wait_time"
                    time.sleep(localActiveLoadWaitTime)
                    totalActiveWaitTime += localActiveLoadWaitTime

                    #Check if actual Total wait time is > configured total wait time.  If so, throw an error and exit
                    if totalActiveWaitTime > localActiveMaxWaitTime:
                        self.m_logger.error("In Fn chkActiveLoads.  Total Actual Wait Time exceeds the configured value active_load_max_wait_time.  Either cleanup orphaned loads or increase the either active_load_max or active_load_max_wait_time. totalActiveWaitTime = " + str(totalActiveWaitTime) + " localActiveMaxWaitTime=" + str(localActiveMaxWaitTime))
                        return 1
            else:
                return 0

            #Return failure
            return 1
                    
        except Exception as exp:
            self.m_logger.error("Failure in chkActiveLoads process for file with the error " + str(exp))
            return 1

    def chkRaceStatus(self):
        """
        Purpose - To check if a load is already running for the given dataset
        :param None: None at this point
        :return:
        """
        try:
            if self.m_mktConfigDict["RACE"]["race_status_check_flag"] == 'Y':
                localRaceStatusWaitTime = int(self.m_mktConfigDict["RACE"]["race_status_wait_time"])
                localRaceStatusMaxWaitTime = int(self.m_mktConfigDict["RACE"]["race_status_max_wait_time"])

                mySql = ""

                #myParams = {"datasetName":self.datasetName}
                tempSql = self.m_configDict["SQL"]["get_race_status"]
                myParamsDict = { 'datasetName' : self.datasetName }
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
    
                raceFlag=1
                totalRaceStatusWaitTime=0
                while raceFlag:
                    returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                    if self.debugFlag:
                        print "tempSql = ", tempSql
                        print "myParamsDict = ", myParamsDict
                        print "mySql = ", mySql
                        print "returnStr = ", returnStr
                        print "chkRaceStatus - ReturnCode = ", int(returnStr[1].strip())
                    if returnStr[0] != '0':
                        self.m_logger.info("Retry after delay., Unable to get race status using sql " + mySql + ". Error = " + returnStr[1])
                        local_delay_time = int(self.m_configDict["SQL"]["delay_time"])
                        time.sleep(local_delay_time)
                        returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                        if returnStr[0] != '0':
                            self.m_logger.error("Unable to get race status using sql " + mySql + ". Error = " + returnStr[1])
                            return 1

                    #Check if the load for this dataset is already running. If not, exit out of the function with normal return value
                    if int(returnStr[1].strip()) <= 1:
                        raceFlag=0
                        return 0

                    #Check if actual Total wait time is > configured total wait time.  If so, throw an error and exit


                    #Sleep for time defined by configured value for "active_load_wait_time"
                    time.sleep(localRaceStatusWaitTime)
                    #time.sleep(90)
                    totalRaceStatusWaitTime += localRaceStatusWaitTime
                    if self.debugFlag:
                        print "totalRaceStatusWaitTime = ", totalRaceStatusWaitTime, "localRaceStatusWaitTime =", localRaceStatusWaitTime
                    if totalRaceStatusWaitTime > localRaceStatusMaxWaitTime:
                        self.m_logger.error("In Fn chkRaceStatusLoads.  Total Actual Wait Time exceeds the configured value race_status_max_wait_time.  Either check if the Dataset is getting loaded or increase the either active_load_max or active_load_max_wait_time. totalRaceStatusWaitTime = " + str(totalRaceStatusWaitTime) + " localRaceStatusMaxWaitTime=" + str(localRaceStatusMaxWaitTime))
                        return 1
            else:
                return 0

            #Return failure
            return 1
                    
        except Exception as exp:
            self.m_logger.error("Failure in chkRaceStatus process for file with the error " + str(exp))
            return 1

    def loadData(self,localDataFile, localFileID, localFileIDQueue, localDBFlag, dataFileFlag, localRecordCount):
        """
        Purpose - To load the given datafile to the S3 bucket specified in the global mktConfigFile
        :param localDataFile: Source datafile to be uploaded to S3
        :param localFileID: Internal File ID assigned to the source datafile
        :param localFileIDQueue: Queue in which, results of the operation is stored
        :return:
        """
        try:
            if self.debugFlag:
                print "Inside loadData function"

            if localDBFlag:
                raceStatusReturnValue=self.chkRaceStatus()
                if self.debugFlag:
                    print "raceStatusReturnValue=", raceStatusReturnValue

                if raceStatusReturnValue:
                    self.m_logger.error("Failure value returned by chkRaceStatus fn.  Return value = " + str(raceStatusReturnValue))
                    localFileIDQueue.put((localFileID, raceStatusReturnValue))
                    return 1

                activeLoadsReturnValue=self.chkActiveLoads()
                if activeLoadsReturnValue:
                    self.m_logger.error("Failure value returned by chkActiveLoads fn.  Return value = " + str(activeLoadsReturnValue))
                    localFileIDQueue.put((localFileID, activeLoadsReturnValue))
                    return 1
    
                processID = os.getpid()
                hostName = socket.gethostname()

                #Insert Process status into Oracle db
                #DB_CALL - sp_ddy_insert_process_status(RUNID, FILE_ID, etc)
                mySql = ""
                myParams = ""
                tempSql = self.m_configDict["SQL"]["put_process_status"]
                pStatus = 'P'
                pComment = 'Load started'
                myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileID":str(localFileID), "fileName":localDataFile, "tDate":str(self.tradeDate), "processID":str(processID), "hostName":hostName, "status":pStatus , "lcomment":pComment, "tidalRunID":str(self.tidalRunID)}
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)

                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

                if self.debugFlag:
                    print "tempSql = ", tempSql
                    print "myParamsDict = ", myParamsDict
                    print "mySql = ", mySql
                    print "returnStr = ", returnStr
    
                if returnStr[0] != '0':
                    self.m_logger.info("Retry after delay., Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1])
                    local_delay_time = int(self.m_configDict["SQL"]["delay_time"])
                    time.sleep(local_delay_time)
                    returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                    if returnStr[0] != '0':
                         self.m_logger.error("Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1])
                         localFileIDQueue.put((localFileID, 1))
                         return 1

            #Call s3.gettoken to get the token and establish connection

            sourceFileWthPath = localDataFile
            
            #Commented the following lines to move getToken outside parallel thread
            # Keep it until we test all 93 loads and remove it
            #s3object = S3(self.mktConfigFile, self.m_logger)
            #s3object.getToken()
            ##sourceFileWthPath =  s3object.m_configfile["S3"]["source_file"]

            targetFileWthPath = os.path.join(self.s3object.m_configFile["S3"]["path"], os.path.basename(sourceFileWthPath))
            targetBucket = self.s3object.m_configFile["S3"]["bucket"]
            encryptKeyFlag = self.s3object.m_configFile["S3"]["encrypt_key"]

            local_aws_retries = int(self.m_mktConfigDict["ENV"]["aws_retries"])
            if self.debugFlag:
                print("sourceFileWthPath =", sourceFileWthPath)
                print("targetFileWthPath =", targetFileWthPath)
                print("targetBucket =", targetBucket)
                print("encryptKeyFlag =", encryptKeyFlag)
                print("local_aws_retries =", local_aws_retries)

            #Get size of the file
            sourceSize = os.stat(sourceFileWthPath).st_size

            multiPartFlag=False
            GBFACTOR = float(1<<30)
            #Check if the given file is greater than 4.5 GB.  Limit on AWS > 5 GB on single part upload
            if float(sourceSize/GBFACTOR) > 4.5:
                multiPartFlag=True

            init_count = 0
            self.m_logger.info("Started Xfer of Source File " + sourceFileWthPath  + " with size " + str(sourceSize) + " to target " + targetFileWthPath)
            while (init_count < local_aws_retries):
                loadReturnValue = 0

                #Call s3.dataUpload to load the data (single part load)

                if multiPartFlag:
                    if self.debugFlag:
                        print "Inside Multipart load.  File size = ", sourceSize
                    loadReturnValue = self.s3object.loadDataMultiPart(sourceFileWthPath, targetFileWthPath, targetBucket, encryptKeyFlag, self.bytes_per_chunk)
                else:
                    if self.debugFlag:
                        print "Inside Singlepart load.  File size = ", sourceSize
                    loadReturnValue = self.s3object.loadDataSinglePart(sourceFileWthPath, targetFileWthPath, targetBucket, encryptKeyFlag )
                if self.debugFlag:
                    print "loadReturnValue = ", loadReturnValue

                #Check if we are sending data file.  If so, we need to generate a complete file and send it along
                if (dataFileFlag) and (loadReturnValue == 0):
                    completeFile = localDataFile.split(".")[0] + self.compFilePattern
   
                    sourceFileWthPath = self.m_mktConfigDict["ENV"]["donefile_dir"] + "/" +  os.path.basename(completeFile)
                    if self.debugFlag:
                        print("completeFile =", completeFile)
                        print("sourceFileWthPath =", sourceFileWthPath)
                
                    with open(sourceFileWthPath,"w") as finraMnFH:
                        finraMnFH.write("{0},{1}\n".format(str(self.tradeDate),str(localRecordCount)))

                    targetFileWthPath = os.path.join(self.s3object.m_configFile["S3"]["path"], os.path.basename(sourceFileWthPath))
                    sourceSize = os.stat(sourceFileWthPath).st_size
                    self.m_logger.info("Started Xfer of complete file " + sourceFileWthPath  + " with size " + str(sourceSize) + " to target " + targetFileWthPath)
                    loadReturnValueCompleteFile = self.s3object.loadDataSinglePart(sourceFileWthPath, targetFileWthPath, targetBucket, encryptKeyFlag )

                    if loadReturnValueCompleteFile:
                        loadReturnValue = 1
                    #End of loadReturnValueCompleteFile If

                if loadReturnValue == 0:
                    pStatus = 'S'
                    pComment = 'Load completed'
                    break
                else:
                    pStatus = 'F'
                    pComment = 'Load failed'
                    init_count += 1


            if localDBFlag:
                #Call DB to insert 'S' or 'F' in tb_ddy_process_status
                #localFileIDQueue.put((localFileID, loadReturnValue))
                mySql = ""
                myParams = ""
                tempSql = self.m_configDict["SQL"]["put_process_status"]
                myParamsDict = {"datasetName":self.datasetName, "runID":str(self.runID), "fileID":str(localFileID), "fileName":localDataFile, "tDate":str(self.tradeDate), "processID":str(processID), "hostName":hostName, "status":pStatus , "lcomment":pComment, "tidalRunID":str(self.tidalRunID)}
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                if self.debugFlag:
                    print "tempSql = ", tempSql
                    print "localFileID = ", localFileID
                    print "myParamsDict = ", myParamsDict
                    print "mySql = ", mySql
                    print "returnStr = ", returnStr

                if returnStr[0] != '0':
                    self.m_logger.info("Retry after delay., Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1])
                    local_delay_time = int(self.m_configDict["SQL"]["delay_time"])
                    time.sleep(local_delay_time)
                    returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                    if returnStr[0] != '0':
                        self.m_logger.error("Unable to put process status info into the database using sql " + mySql + ". Error = " + returnStr[1])
                        localFileIDQueue.put((localFileID, 1))
                        return 1

                if self.debugFlag:
                    print "localFileID = ", localFileID
                    print "loadReturnValue = ", loadReturnValue
                    print 'Memory usage: %s (kb)' % resource.getrusage(resource.RUSAGE_SELF).ru_maxrss

                localFileIDQueue.put((localFileID,loadReturnValue))
            else:
                return loadReturnValue
      
        except Exception as exp:
            self.m_logger.error("Failure in loadData process for file with the error " + str(exp))
            if localDBFlag:
                localFileIDQueue.put(localFileID, 1)
            else:
                return 1

    def createFinraManifestFile(self, manifestFile):
        try:
            # Read Manifest file to get info like total rows, total size & other details to populate the done file for FINRA
            if self.debugFlag:
                print "Inside createFinraManifestFile fuction"

            with open(manifestFile,"r") as fh:
                self.totalRows = 0
                self.totalSize = 0
                self.fileCount = 0
                self.fileDict = {}
                for data in fh:
                    data.rstrip("\n")
                    # Exclude any entry with the pattern "start-of-day"
                    if self.sodFilePatternSearch in data:
                        continue
                    mylist = []
                    self.fileCount +=1
                    mylist = data.split("|")
                    self.fileDict[self.fileCount] = [mylist[0],os.path.basename(mylist[1]),int(mylist[2]),int(mylist[3])]
                    self.totalRows += int(mylist[3])
                    self.totalSize += int(mylist[2])
                if self.debugFlag:
                    print "self.fileDict = ", self.fileDict
        except Exception as exp:
            self.m_logger.error("Failed while processing readManifest with Error = " + str(exp))
            return 1

        try:
            #Use self.defautltsFile which is populated from the db later. No need to get it from config file
            self.defaultsFileWthPath = self.m_mktConfigDict["DATASET"]["defaults_dir"] + "/" + self.defaultsFile
            with open(self.defaultsFileWthPath,"r") as fh:
                self.defaultsDict = {}
                self.defaultsCount  = 0
                for data in fh:
                    data.rstrip('\n')
                    self.defaultsCount +=1
                    self.defaultsDict[self.defaultsCount]=data
                if self.debugFlag:
                    print "After Defaults, self.fileDict = ", self.fileDict

        except Exception as exp:
            self.m_logger.error("Failed while processing defaults file " + self.defaultsFileWthPath + " with Error = " + str(exp))
            return 1

        try:
            # Not needed as the naming convention is changed
            #self.finraManifestFile =  self.m_mktConfigDict["ENV"]["donefile_dir"] + "/" +  os.path.basename(manifestFile) + ".done"

            #Changing the EOD naming convention per Finra's requirement
            if self.debugFlag:
                print "self.eodFilePattern = ", self.eodFilePattern

            myParamsDict = {'datasetName':self.datasetName.lower(), 'tradeDate':str(self.tradeDate)}
            tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
            eodFileName = re.sub( tempGrp, lambda m:myParamsDict[m.group()], self.eodFilePattern)
            if self.debugFlag:
                print "eodFileName = ", eodFileName

            self.finraManifestFile =  self.m_mktConfigDict["ENV"]["donefile_dir"] + "/" +  eodFileName
            if self.debugFlag:
                print "self.finraManifestFile = ", self.finraManifestFile
            
            with open(self.finraManifestFile,"w") as finraMnFH:
                finraMnFH.write("# AWS RunID : {}\n".format(str(self.runID)))
                finraMnFH.write("# Dataset : {0} , TradeDate : {1}\n".format(str(self.datasetName),str(self.tradeDate)))
                finraMnFH.write("total_compressed={}\n".format(self.totalSize))
                finraMnFH.write("total_rows={}\n".format(self.totalRows))
                finraMnFH.write("no of files={}\n".format(self.fileCount))
   
                for key,val in self.fileDict.items():
                    finraMnFH.write("file_{0}={1}\n".format(str(key),val[1]))
                    finraMnFH.write("file_{0}_rows={1}\n".format(str(key),val[3]))
  
                finraMnFH.write("# Data Attributes\n")
                for key,val in self.defaultsDict.items():
                    finraMnFH.write("{0}".format(str(val)))
            return 0
        except Exception as exp:
            self.m_logger.error("Failed while creating AWS Done file " + self.finraManifestFile + " with Error = " + str(exp))
            return 1

    def processLoader(self, manifestFile, datasetName, tidalRunID):
        """
        Purpose - Function responsible for reading the manifest file, get market name, call multiprocess load and other db calls
        :param manifestFile: Manifest File
        :param tradeDate: Trade Date
        :param tidalRunID: Tidal Run ID 
        :return:
        """
        try:
            # Read the manifest filename and get the suffix i.e. datasetname
            # Assumption - Manifest file format - manifest.<datasetName>.<tradeDate>.<tidalRunID>
            # Program will break otherwise

            self.datasetName = datasetName
            self.tidalRunID = tidalRunID

            # DB_CALL
            # Make database call sp_ddy_get_market_info(datasetname) and get market info
            mktName = ''

            self.manifestFile = self.m_configDict["ENV"]["manifestfile_dir"] + "/" + manifestFile

            ##Validate Manifest file
            if not os.path.isfile(self.manifestFile):
                self.m_logger.error("Invalid manifest file " + self.manifestFile)
                sys.exit(1)

            if self.debugFlag:
                print "Inside processLoader"
                print "DatasetName = ", self.datasetName
                print "ManifestFile = ", manifestFile
                print "Self ManifestFile = ", self.manifestFile
                print "TidalRunID = ", self.tidalRunID
                print "DebugFlag = ", self.debugFlag
                print "confDict = ", self.m_configDict

            # Enable this one the proc to get mkt name and default file are ready and test it
            tempSql = self.m_configDict["SQL"]["get_mkt_defaults_filename"]
            myParamsDict = { 'datasetName' : self.datasetName }
            tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
            mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
            returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

            if self.debugFlag:
                print "tempSql = ", tempSql
                print "myParamsDict = ", myParamsDict
                print "mySql = ", mySql
                print "returnStr = ", returnStr

            if returnStr[0] == '0':
                mktName = returnStr[2].strip()
                self.defaultsFile = returnStr[3].strip()
            else:
                self.m_logger.info("Retry after delay., Unable to get market info from the database using sql " + mySql + ". Error = " + returnStr[1])
                local_delay_time = int(self.m_configDict["SQL"]["delay_time"])
                time.sleep(local_delay_time)
                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                if returnStr[0] == '0':
                    mktName = returnStr[2].strip()
                    self.defaultsFile = returnStr[3].strip()
                else:
                    self.m_logger.error("Unable to get market info from the database using sql " + mySql + ". Error = " + returnStr[1])
                    sys.exit(1)

            if self.debugFlag:
                print "MktName from DB = ", mktName
                print "Defaults = ", self.defaultsFile
          
            #Build the string for mktConfigFile based on mktName and configFile info
            self.mktConfigFile = os.path.dirname(self.configFile) + '/' + os.path.basename(self.configFile).split('.',1)[0].strip() + '_' + mktName.lower() + '.' + os.path.basename(self.configFile).split('.',1)[1].strip()

            if self.debugFlag:
                print("mktConfigFile = ", self.mktConfigFile)

            #Validate Manifest file is a valid file
            if not os.path.isfile(self.mktConfigFile):
                self.m_logger.error("Invalid market manifest file " + self.mktConfigFile)
                sys.exit(1)

            # Read Market specific config file and store it in a specific dictionary
            self.readMktConfigFile(self.mktConfigFile)

            if self.debugFlag:
                print("m_mktConfigDict=",self.m_mktConfigDict)

            # Read the contents of manifest - dataFileNames into a list - Will validate the datafiles as well

            localManifest = Manifest()
            manifestDelim = self.m_configDict["ENV"]["manifest_delim"]

            manifestFileList = localManifest.readManifest(self.manifestFile, self.m_logger, manifestDelim, self.debugFlag)

            # Get RunID
            self.runID = generate_runId()
            if self.debugFlag:
                print("RunID = ", self.runID)
                #print("manifestFileList = ", manifestFileList)

            #Call Oracle fn to insert status 'P' into TB_DDY_DATASET_TRANS with RUNID etc
            #DB_CALL
            # Make database call sp_ddy_insert_dataset_trans and insert data that process started

            mySql = ""
            myParams = ""
            tempSql = self.m_configDict["SQL"]["put_dataset"]
            pStatus = 'P'

            myParamsDict = {'datasetName':self.datasetName, 'runID': str(self.runID), 'tDate':str(self.tradeDate), 'status': pStatus, 'tidalRunID':str(self.tidalRunID)}
            tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
            mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)

            returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

            if self.debugFlag:
                print "tempSql = ", tempSql
                print "myParamsDict = ", myParamsDict
                print "mySql = ", mySql
                print "returnStr = ", returnStr

            if returnStr[0] != '0':
                self.m_logger.info("Retry after delay., Unable to put dataset info into the database using sql " + mySql + ". Error = " + returnStr[1])
                local_delay_time = int(self.m_configDict["SQL"]["delay_time"])
                time.sleep(local_delay_time)
                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                if returnStr[0] != '0':
                    self.m_logger.error("Unable to put dataset info into the database using sql " + mySql + ". Error = " + returnStr[1])
                    sys.exit(1)

            # Insert Manifest data in db and call multiprocessing s3 loader process.  Shd we add RUN_ID to manifest table

            #For each datafile, generate fileID and call loadData fn using multiprocess to load data into AWS
            for dataRecord in manifestFileList:
                mySql = ""
                myParams = ""
                tempSql = self.m_configDict["SQL"]["put_manifest"]
                myParamsDict = {"datasetName":self.datasetName, "runID": str(self.runID), "tDate":str(self.tradeDate), "dataFileName":dataRecord[1], "manifestFileName":manifestFile , "noOfRecords": str(dataRecord[3]), "fileSize":str(dataRecord[2]), "tidalRunID":str(self.tidalRunID)}
                tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
                mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)
     
                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                #if self.debugFlag:
                    #print "tempSql = ", tempSql
                    #print "myParamsDict = ", myParamsDict
                    #print "mySql = ", mySql
                    #print "returnStr = ", returnStr

                if returnStr[0] != '0':
                    self.m_logger.info("Retry after delay., Unable to put manifest info into the database using sql " + mySql + ". Error = " + returnStr[1])
                    local_delay_time = int(self.m_configDict["SQL"]["delay_time"])
                    time.sleep(local_delay_time)
                    returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                    if returnStr[0] != '0':
                        self.m_logger.error("Unable to put manifest info into the database using sql " + mySql + ". Error = " + returnStr[1])
                        sys.exit(1)

            # Initialize S3 object and get FINRA cloud service token and establish s3 session
            self.currentEpochTime = int(time.time())

            self.s3object = S3(self.mktConfigFile, self.m_logger, self.debugFlag)
            self.s3object.getToken()
            if self.debugFlag:
                print "self.currentEpochTime = ", self.currentEpochTime

            process_count = int(self.m_mktConfigDict["ACTIVE_LOAD"]["active_load_max"])

            fileID=1
            fileIDQueue = Queue()
            dbFlag=1
            procs = []
            doneCounter = 0
            sendCounter = 0
            processFlag=0
            failureFlag=0
            

            #Get chunk size from config file for multipart uploads
            self.bytes_per_chunk = int(self.m_configDict["DATASET"]["bytes_per_chunk"])
            
            self.sodFilePatternSearch = self.m_configDict["ddy"]["SOD_FILE_PATTERN_SEARCH"]
            # Following variables are used across the class.  Hence, assigned to self variables
            self.eodFilePattern = self.m_configDict["ddy"]["EOD_FILE_PATTERN"]
            self.compFilePattern = self.m_configDict["ddy"]["COMP_FILE_PATTERN"]
            self.sodFileCheck = self.m_configDict["ddy"]["SOD_FILE_CHECK"].strip().upper()

            manifestListItems = len(manifestFileList)
            if self.debugFlag:
                print "bytes_per_chunk = ", self.bytes_per_chunk
                print "self.sodFilePatternSearch = ", self.sodFilePatternSearch
                print "self.eodFilePattern = ", self.eodFilePattern
                print "self.compFilePattern = ", self.compFilePattern
                print "self.sodFileCheck = ", self.sodFileCheck
                print "manifestListItems = ", manifestListItems

            dataFileFlag=False
            sodFileProcessedFlag=0
            max_batches= int(math.ceil(float(len(manifestFileList))/process_count)) 
            batch_count=0

            while doneCounter < manifestListItems and failureFlag == 0 :
                while sendCounter < manifestListItems and sendCounter - doneCounter < process_count and failureFlag == 0:
                    if self.sodFilePatternSearch in manifestFileList[sendCounter][1] and not sodFileProcessedFlag:
                        dataFileFlag=False
                        sodFileLoadStatus=self.loadData(manifestFileList[sendCounter][1] ,fileID, fileIDQueue, dbFlag, dataFileFlag, 0)
                        if sodFileLoadStatus:
                            self.m_logger.error("Unable to push Start of Day file to FINRA.  Exiting.. ")
                            sys.exit(1)
                        sodFileProcessedFlag=1
                        sendCounter += 1
                        qFileID = 0
                        qRestult = 0
                        qFileID, qResult = fileIDQueue.get()
                        doneCounter += 1
                        fileID += 1
                    else:
                        if self.sodFileCheck == 'Y':
                            if not sodFileProcessedFlag:
                                self.m_logger.error("No Start of day file. Please add SOD file to the generate manifest.  Exiting.. ") 
                                sys.exit(1)
    
                        dataFileFlag=True
                        if self.debugFlag:
                            print "manifestFileList[sendCounter][1]", manifestFileList[sendCounter][1], "fileID = ", fileID
                        processHandle = Process(target=Loader.loadData, args=(self, manifestFileList[sendCounter][1],fileID, fileIDQueue, dbFlag, dataFileFlag, manifestFileList[sendCounter][3]))
                        processFlag=1
                        
                        s3TimeoutTime = int(self.m_configDict["ddy"]["S3_TIMEOUT_TIME"])
                        if ((int(time.time()) - self.currentEpochTime) > s3TimeoutTime):
                            self.currentEpochTime = int(time.time())
                            self.m_logger.info("Getting New Token for Batch : {0}, Max batches : {1}".format(batch_count,max_batches))
                            if self.debugFlag:
                                print "Inside get new token - self.currentEpochTime = ", self.currentEpochTime
                            self.s3object.getToken()

                        threadDelayTime = int(self.m_configDict["ddy"]["THREAD_DELAY_TIME"])
                        time.sleep(threadDelayTime)
                        processHandle.start()
                        procs.append(processHandle)
                        sendCounter += 1
                        fileID += 1
                        if processFlag and ( sendCounter - doneCounter == process_count or sendCounter == manifestListItems ) : 
                            batch_count += 1
                            self.m_logger.info("Waiting for Batch : {0} to complete. No of active workers : {2}.  Max batches : {1}".format(batch_count,max_batches,sendCounter-doneCounter))
                            for p in procs:
                                p.join()
                            processFlag=0
                        if self.debugFlag:
                            print "Before fileIDQueue - ProcessFlag = ", processFlag, "sendCounter = ", sendCounter, "doneCounter = ", doneCounter, "manifestListItems = ", manifestListItems
                        while not fileIDQueue.empty():  # process completed results as they arrive
                            qFileID = 0
                            qRestult = 0
                            qFileID, qResult = fileIDQueue.get()
                            if self.debugFlag:
                                print("qFileID = ", qFileID, "qResult = ", qResult)
                            doneCounter += 1
                            if qResult:
                                failureFlag = 1
                        if self.debugFlag:
                            print "After fileIDQueue - ProcessFlag = ", processFlag, "sendCounter = ", sendCounter, "doneCounter = ", doneCounter, "manifestListItems = ", manifestListItems, "failureFlag = ", failureFlag
                        if failureFlag:
                            break
                        #Check to see if specified time has passed.  If so get another token to avoid expiration.  Required for large datasets
                        if self.debugFlag:
                            print 'Memory usage: %s (kb)' % resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
                            print "self.currentEpochTime = ", self.currentEpochTime
                            print "Current Time in Epoch = ", int(time.time())


                    # End of else
                #End of Inner While
            #End of Outer While

            if failureFlag:
                pStatus = 'F'
            else:
                pStatus = 'S'
                """
                #Generate FINRA Manifest file and Push it to AWS 
                """

                # Call Divakar's finra manifest generate function
                returnValue = self.createFinraManifestFile(self.manifestFile)
                if self.debugFlag:
                    print "Post createFinraManifestFile fn - return value= ", returnValue
    
                if returnValue:
                    self.m_logger.error("Unable to generate done file. Please fix the issue the re-run the load")
                    #sys.exit(1)
                    failureFlag=1
                    pStatus = 'F'
                else:

                    dbFlag=0
                    fileID=0
                    # Call the loader function with the manifest file
                    finraManifestLoadStatus=0
                    dataFileFlag=False
                    finraManifestLoadStatus=self.loadData(self.finraManifestFile ,fileID, fileIDQueue, dbFlag, dataFileFlag, 0)
    
                    if finraManifestLoadStatus:
                        pStatus = 'F'
                        self.m_logger.error("Unable to load finra manifest file ")

            #Call Oracle fn to insert status 'S' into TB_DDY_DATASET_TRANS with RUNID etc
            #DB_CALL
            # Make database call sp_ddy_insert_dataset_trans and insert data based on Failure or Success

            mySql = ""
            myParams = ""
            tempSql = self.m_configDict["SQL"]["put_dataset"]
            
            myParamsDict = {"datasetName":self.datasetName, "runID": str(self.runID), "tDate":str(self.tradeDate), "status": pStatus, "tidalRunID":str(self.tidalRunID)}
            tempGrp = "(%s)" % "|".join( map(re.escape, myParamsDict.keys())  )
            mySql = re.sub( tempGrp, lambda m:myParamsDict[m.group()], tempSql)

            returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)

            if self.debugFlag:
                print "tempSql = ", tempSql
                print "myParamsDict = ", myParamsDict
                print "mySql = ", mySql
                print "returnStr = ", returnStr

            if returnStr[0] != '0':
                self.m_logger.info("Retry after delay., Unable to put dataset info into the database using sql " + mySql + ". Error = " + returnStr[1])
                local_delay_time = int(self.m_configDict["SQL"]["delay_time"])
                time.sleep(local_delay_time)
                returnStr = self.m_oracle_db.runSqlWthParamsGetOneRow(mySql)
                if returnStr[0] != '0':
                    self.m_logger.error("Unable to put dataset info into the database using sql " + mySql + ". Error = " + returnStr[1])
                    sys.exit(1)

            if failureFlag:
                self.m_logger.error("Load failed")
                sys.exit(1)

        except Exception as e:
            self.m_logger.error("ProcessLoader failed with error " + str(e))
            sys.exit(1)
Esempio n. 14
0
class Process():
    # class variables

    # lock, logger and loader type
    m_lock = ""
    m_logger = ""
    m_loaderType = ""

    # Adama PG object
    m_adamapg = ""
    # Picard PG object
    m_picardpg = ""

    # Process details
    process_name = ""

    def __init__(self, configFile):
        """ Purpose: Constructor

        :param self:        class object itself
        :param configFile:  Configuration file to use

        """
        # Initialize global logger object
        self.m_logger = Logger(logging.INFO, configFile)
        self.m_logger.addFileHandler(logging.DEBUG)
        try:
            # Add generic information
            fname = inspect.getfile(inspect.currentframe())
            fpath = os.path.dirname(os.path.abspath(fname))
            self.m_logger.addGenericInfo(fpath + "/" + fname)

            # export all the values from config into environment
            configObject = configuration(configFile, True)

            # Create Adama replica PG db object
            self.m_adamapg = Postgre(os.environ['adama_pg'], self.m_logger)

            # Create Picard Postgres Datamart object
            self.m_picardpg = Postgre(os.environ['picard_pg'], self.m_logger)

            # Create lock for the process
            self.m_lock = Lock(os.environ['LOCK_FILE'], self.m_logger)

            # loader type
            self.m_loaderType = self.getloaderType()

            # process name
            self.process_name = os.environ['process_name']

            self.m_logger.info("Initializing the process, %s" %
                               self.process_name)

        except Exception, e:
            self.m_logger.error(
                "ERROR: Unable to initialize the process due to: %s" % str(e))
            self.updateProcessStatus("F")
            if self.m_adamapg:
                self.m_adamapg.closeConnection()
            if self.m_picardpg:
                self.m_picardpg.closeConnection()
            if self.m_lock:
                self.m_lock.remove()
            sys.exit("ERROR: Unable to initialize the process due to: %s" %
                     str(e))