Example #1
0
File: toXML.py Project: twdb/txhis
 def process (self):
     'Creates the XML file in the output buffer'
     try:
         self.tmpMsg = StringIO.StringIO()
         self.updateAttributesFromMetadata()
         if self.hasHeader == 'y':
             self.fieldNames = self.getHeader()
         self.tmpMsg.write('<?xml version=\"1.0\" encoding=\"' + self.encoding + '\"?>')
         self.tmpMsg.write('\n<' + self.rootTag + '>\n')
         current_record = self.InMsg.readline()
         #print current_record
         while len(current_record) > 0 :
             #print current_record
             if current_record[0] == '#' :
                 current_record = self.InMsg.readline()
                 continue
             if current_record[-1] == '\n':
                 self.writeRecordAsXML(current_record[:-1])
             else:
                 self.writeRecordAsXML(current_record)
             current_record = self.InMsg.readline()                    
         self.tmpMsg.write('</' + self.rootTag + '>')
         self.tmpMsg.seek(0)
         self.msg = self.tmpMsg.read()
         #print self.msg
         self.msgList.append(self.msg)
         #print type(self.msgList)
         reticLog.logInfo(self.logList, '(' + self.name + ') ' + "Message process is finished in pipe")
         return 0
     except:
         errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0]
         reticLog.logError(self.logList, '(' + self.name + ') ' + "Error during message processing in pipe")
         reticLog.logError(self.logList, '(' + self.name + ') ' + errorMessage)
         return 1
Example #2
0
    def adpator_run(self):
        #preprocessing, so that all intermediate files for file source are generated        
        try:
            preprocess_batch(direcotryPath,self.logList)
        except:
            import sys,traceback
            errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0]
            reticLog.logError(self.logList, "( " + self.adtName + " ) Unknown error during initialization in sink")
            reticLog.logError(self.logList, "( " + self.adtName + " ) " + errorMessage)
        self.adaptorSource.start()
        while self.adaptorSource.next()==1:
            self.adaptorPipe.getMsg(self.adaptorSource.msg)
            self.adaptorPipe.process()
            self.adaptorSource.commit()
        self.adaptorSource.commit()
        for i in range(len(self.adaptorPipe.msgList)):
            self.adaptorSink.getMsg(self.adaptorPipe.msgList[i])
            #print self.adaptorSink.msg
            recordList = self.adaptorSink.getRecordList()
            print str(len(recordList))+" XML records"+' generated'+' for %s'%self.adaptorSource.metadata['filename'][i]
            self.adaptorSink.processSites(recordList,self.adaptorSource.metadata['filename'][i])
            updateList = self.adaptorSink.prepareUpdateObject(recordList,testTPWDmodel.tpwdProcessInfo)
            print str(len(updateList))+" database records"+' to be inserted'
            self.adaptorSink.updateDB(updateList)

#redefine runCommand 
#def 
Example #3
0
    def getMsg (self, messages):
        """
        for TCEQ, every time, it passes in 2 message StringIO object, one for event, another for result
        """
        try:
            reticLog.logInfo(self.logList, "( " + self.name + " ) Retrieving message for sink : " + self.name)
            # Re-initialize msg to get new message
            # eventString IO is message[0], resultStringIO is messag[1]
            # get a mini hash table for each segment:
            # key: RFA tag id, 
            # value: [event list, result list]
            self.basinSegmentInfo = {}       
            eventCSVList = csv.reader(messages[0],delimiter="|")
            resultCSVLIst = csv.reader(messages[1],delimiter="|")     
#            raw_input("print event...")
#            for row in eventCSVList:
#                print row
#            raw_input("print result...")
#            for row in resultCSVLIst:
#                print row
            for row in eventCSVList:
                #this is for basiID 6, year 2010
                if (row[0] == "" and row[1] == "" and row[2] == ""):
                    row = row[3:]
                    print row
                self.basinSegmentInfo[row[RFATAG_COLUMN]] = {}
                self.basinSegmentInfo[row[RFATAG_COLUMN]][EVENT_IN_HASHTable] = row
            for row in resultCSVLIst: 
                if self.basinSegmentInfo[row[RFATAG_COLUMN]].has_key(RESULT_IN_HASHTable):
                    self.basinSegmentInfo[row[RFATAG_COLUMN]][RESULT_IN_HASHTable].append(row)    
                else:
                    #if this is the first result row for this RFATAG
                    self.basinSegmentInfo[row[RFATAG_COLUMN]][RESULT_IN_HASHTable] = [row]                    
#            resultCounter,eventCounter = 0,0
#            for key in self.basinSegmentInfo.keys():
#                #print "key => ",self.basinSegmentInfo[key]
#                eventCounter += 1
#                for resultRow in  self.basinSegmentInfo[key][RESULT_IN_HASHTable]:
#                    #print resultRow
#                    resultCounter += 1
#            #print "%d result in Total......" % eventCounter
#            print "%d result in Total......" % resultCounter
            reticLog.logInfo(self.logList, "( " + self.name + " ) Message retrieved in sink : " + self.name)          
            return 0
        except Exception, e:
            import traceback
            #if row[RFATAG_COLUMN] in self.basinSegmentInfo:
            #    print "In Dictionary Already!"
            #else:
            #    print "Not In Dictionary!"    
            traceback.print_exc(file=sys.stdout)
            errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0]
            reticLog.logError(self.logList, "( " + self.name + " ) Error during message retrieval in sink : " + self.name)
            reticLog.logError(self.logList, "( " + self.name + " ) " + errorMessage)
            return 1                    
Example #4
0
 def start (self):
     'Start the source of the adaptor (begin work...)'
     try:
         reticLog.logInfo(self.logList, '( ' + self.name + ' ) ' +  "Starting the source adaptor")
         self.getMsg()        
         return 0
     except:
         errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0]
         reticLog.logError(self.logList, '( ' + self.name + ' ) ' + "Unknown error on start of source")                        
         reticLog.logError(self.logList, '( ' + self.name + ' ) ' + errorMessage)
         sys.exit(1)
Example #5
0
 def getMsg (self, message):
     try:
         reticLog.logInfo(self.logList, "( " + self.name + " ) Retrieving message for sink : " + self.name)
         # Re-initialize msg to get new message
         self.msg = ''
         self.msg = message
         reticLog.logInfo(self.logList, "( " + self.name + " ) Message retrieved in sink : " + self.name)          
         return 0
     except:
         errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0]
         reticLog.logError(self.logList, "( " + self.name + " ) Error during message retrieval in sink : " + self.name)
         reticLog.logError(self.logList, "( " + self.name + " ) " + errorMessage)
         return 1                    
Example #6
0
File: toXML.py Project: twdb/txhis
 def getMsg (self, message):
     'Initializes input buffer with message content'
     try:
         reticLog.logInfo(self.logList, '(' + self.name + ') ' + "Getting message into pipe")
         self.InMsg = StringIO.StringIO()
         self.msg = ''
         self.InMsg.write(message)
         self.InMsg.seek(0)
         return 0
     except:
         errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0]
         reticLog.logError(self.logList, '(' + self.name + ') ' + "Error during message retrieval in pipe" )                        
         reticLog.logError(self.logList, '(' + self.name + ') ' + errorMessage)
         return 1
Example #7
0
File: toXML.py Project: twdb/txhis
 def __init__ (self, args, logger):
     try:
         self.args = args
         self.logList = logger
         self.name = args['name']
         self.InMsg = ''
         self.msg = ''
         self.msgKind = args['msgKind']
         self.delimiter = ''
         self.fieldNames = []
         self.fieldLength = []
         self.msgList = []
         self.rootTag = args['rootTag']
         self.recTag = args['recTag']
         self.encoding = args['encoding']
         self.metadata = {}
         #here, for update
         if self.msgKind == 'delimited':
             self.delimiter = args['delimiter']
             self.hasHeader = args['hasHeader']
             if self.hasHeader == 'n':
                 self.fieldNames = args['fieldNames']
         elif self.msgKind == 'fixedLength':
             self.fieldNames = args['fieldNames']
             self.fieldLength = args['fieldLength']
             self.hasHeader = 'n'
     except KeyError:
         reticLog.logError(self.logList, '(' + self.name + ') ' + "Error on ToXMLPipe initialization")
         reticLog.logError(self.logList, '(' + self.name + ') ' + "Parameter " + str(sys.exc_info()[1]) + " is missing on pipe definition. Exiting..." )
         sys.exit(1)
     except:
         errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0]
         reticLog.logError(self.logList, '(' + self.name + ') ' + "Unknown error during initialization of pipe : " + self.name)                        
         reticLog.logError(self.logList, '(' + self.name + ') ' + errorMessage)
         sys.exit(1)
Example #8
0
 def __init__ (self, args, logger):
     try:
         self.logList = logger
         self.name = args['name']
         reticLog.logInfo(self.logList, '( ' + self.name + ' ) ' +  "Intitializing HTTPSource : " + self.name)
         self.URL = args['URL']
         self.exitOnError = 'n'
         self.msgList = []
         self.nbMsg = 0
         self.msg = []
         self.msgName = ''
         self.params = []
         self.metadata = {}
         #this is used for wait(interval) function, so use float
         if args.has_key('pollPeriod'):
             self.interval = float(args['pollPeriod'])
         if args.has_key('params'):
             self.params = args['params']
         if args.has_key('exitOnError'):
             self.exitOnError = args['exitOnError']
     except KeyError:
         reticLog.logError(self.logList, '( ' + self.name + ' ) ' + "Error on HTTPSource initialization")
         reticLog.logError(self.logList, '( ' + self.name + ' ) ' + "Parameter " + str(sys.exc_info()[1]) + " is missing on source definition" )
         sys.exit(1)
     except:
         reticLog.logError(self.logList, '( ' + self.name + ' ) ' + "Unknown error on HTTPSource initialization")
         reticLog.logError(self.logList, '( ' + self.name + ' ) ' + traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1]))
         raise
         sys.exit(1)
Example #9
0
 def __init__ (self, args, logList):
     try:
         self.logList = []
         self.logList = logList
         self.name = args['name']
         self.exitOnError = 'y'
         reticLog.logInfo(self.logList, '( ' + self.name + ' ) ' +  "Intitializing fileSource")
         self.fileFilter = args['fileFilter']
         self.newExtension = args['newExtension']
         self.msgList = []
         self.metadata = {}
         self.nbMsg = 0
         self.msg = ''
         self.msgName = ''
         self.filePath = args['filePath']
         #this is used for wait(interval) function, so use float
         self.interval = float(args['pollPeriod'])
         #here,determine the os path seperator, '\\' for nt, '/' for linux
         import os
         self.filePath = self.filePath + os.sep
         if args.has_key('exitOnError'):
             self.exitOnError = args['exitOnError']
     except KeyError:
         reticLog.logError(self.logList, '( ' + self.name + ' ) ' + "Error on fileSource initialization")
         reticLog.logError(self.logList, '( ' + self.name + ' ) ' + "Parameter " + str(sys.exc_info()[1]) + " is missing on source definition" )
         sys.exit(1)
     except:
         errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0]
         reticLog.logError(self.logList, '( ' + self.name + ' ) ' + "Unknown error on initialization on source")                        
         reticLog.logError(self.logList, '( ' + self.name + ' ) ' + errorMessage)
         sys.exit(1)
Example #10
0
 def connect (self, args):
     'Establish a connection with the database'
     try:
         reticLog.logInfo(self.logList, "Intitializing Database Connection : " + args['dsn'])
         #construct connection string according to parameters
         engineStr = string.join([args['dbType'],
                                  ''.join(['//', args['user']],),
                                  ''.join([args['password'],"@",args['dsn']]) ],
                                  ':')
         #print engineStr
         dbDriverMod = __import__(args['driverName'])
         self.engine = create_engine(engineStr, module=dbDriverMod)
         reticLog.logInfo(self.logList, "DataBase Connection established")
     except:
         errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0]
         reticLog.logError(self.logList, "Database error : " + errorMessage ); raise "Database Error";
Example #11
0
 def __init__ (self, args, logger):
     try:
         self.args = args
         self.msg = ''
         self.logList = logger
         self.name = args['name']
         self.dbFactoryArg = args['dbFactroyArg']
         self.inputFormat = args['inputFormat']
         self.delimiter = args['delimiter']
         self.hasHeader = args['hasHeader']            
         #self.replaceEmptyFieldBy = args['replaceEmptyFieldBy']
         if not self.args.has_key('autoCommit'):
             self.autocommit = False
         #session pool
         self.session = None
         self.nbThreads = int(args['nbThreads'])
         if self.nbThreads == 0:
             self.nbThreads = 1
         if args.has_key('retries'):
             self.retries = int(args['retries'])
         else:
             self.retries = 5
         #self.curArgs = {}
         #self.curArgs['cursorName'] = 'cursor1'
         self.metadata = {}
         self.fieldNames = []
         self.fieldLength = []
         if self.inputFormat == 'delimited':
             if self.args.has_key('fieldNames'):
                 self.fieldNames = args['fieldNames']
         elif self.inputFormat == 'fixedLength':
             if self.args.has_key('fieldNames') and self.args.has_key('fieldLength'):
                 self.fieldNames = args['fieldNames']
                 self.fieldLength = args['fieldLength']
         #self.args['cursorName'] = "cursor"
         #args['cursorName'] = "cursor"
         #here, for multithread updating database, speed up here
         self.connection = dbFactory.dbFactory(args['dbFactroyArg'], self.logList)
         #for i in range(self.nbThreads):
         self.session = self.makeSession(self.connection)
         self.parallelize = 'n'
         self.nbQueriesParal = 10
         ########################################################
         if args.has_key('parallelize'):
             self.parallelize = args['parallelize']
         if args.has_key('nbQueriesParal'):
             self.nbQueriesParal = int(args['nbQueriesParal'])
     except KeyError:
         reticLog.logError(self.logList, "( " + self.name + " ) Error during SQLSink initialization")
         reticLog.logError(self.logList, "( " + self.name + " ) Parameter " + str(sys.exc_info()[1]) + " is missing in sink definition" )
         sys.exit(1)
     except:
         errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0]
         reticLog.logError(self.logList, "( " + self.name + " ) Unknown error during initialization in sink")
         reticLog.logError(self.logList, "( " + self.name + " ) " + errorMessage)
         sys.exit(1)
Example #12
0
 def commit(self):
     'Commit the current message treatment'
     if self.nbMsg > 0 : 
         reticLog.logInfo(self.logList, '( ' + self.name + ' ) ' +  "Commiting msg " + self.msgList[0] + " on source : " + self.name)
         msgName = self.msgList[0]
         try:    
             #if self.newExtension != '' and self.newExtension != ' ':
                 #shutil.copyfile(self.filePath+self.msgName,self.filePath+self.msgName+self.newExtension)
             shutil.os.remove(self.filePath+self.msgName)
             self.nbMsg = self.nbMsg - 1 
             self.msgList = self.msgList[1:]
             self.msg = ''
             return 0
         except:
             errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0]
             reticLog.logError(self.logList, '( ' + self.name + ' ) ' + "Error on commit phase on source - File : " + msgName) 
             reticLog.logError(self.logList, '( ' + self.name + ' ) ' + errorMessage)
             return 1
             
     else:
             return 1
Example #13
0
 def next(self):
     'Get the next message to be processed or return that sources are dry'
     if self.nbMsg == 0:
         reticLog.logInfo(self.logList, '( ' + self.name + ' ) ' +  "Sources dry on source ")
         return 0
     else:
         try:
             reticLog.logInfo(self.logList, '( ' + self.name + ' ) ' +  "Messages Left on queue of adaptor : " + str(self.nbMsg))
             reticLog.logInfo(self.logList, '( ' + self.name + ' ) ' +  "Retrieving file : " + self.msgList[0])
             self.msgName = self.msgList[0]
             dotIndex = string.find(self.msgName,'.')
             if dotIndex > 0:
                 if(not self.metadata.has_key('filename')):
                     self.metadata['filename'] = []
                 self.metadata['filename'].append(string.split(self.msgName,'.')[0])
                 self.metadata['extension'] = string.split(self.msgName,'.')[1]
             else:
                 if(not self.metadata.has_key('filename')):
                     self.metadata['filename'] = []
                 self.metadata['filename'] = self.msgName
                 self.metadata['extension'] = ''
             #file reading happend here
             fp = open(os.path.join(self.filePath,self.msgList[0]))
             if reticUtils.istext(fp):
                 fp.close()
                 fp = open(os.path.join(self.filePath,self.msgList[0]),'r')
             else:
                 fp.close()
                 fp = open(os.path.join(self.filePath,self.msgList[0]),'rb')                    
             self.msg = fp.read()
             fp.close()
             return 1
         except:
             errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0]
             reticLog.logError(self.logList, '( ' + self.name + ' ) ' + "Error on message retrieval on source : " + self.name)                        
             reticLog.logError(self.logList, '( ' + self.name + ' ) ' + errorMessage)
             if self.exitOnError.lower() == 'y':
                 return 0
             else:
                 return 1
Example #14
0
    def next(self):
        'Get the next message to be processed or return that sources are dry'
        if self.nbMsg == 0:
            return 0
        else:
            reticLog.logInfo(self.logList, '( ' + self.name + ' ) ' +  "Messages Left on queue of adaptor : " + str(self.nbMsg))
             # No params are provided, processing raw URL. (without GET/POST request)
            if len(self.params) == 0:    
                reticLog.logInfo(self.logList, '( ' + self.name + ' ) ' +  "Retrieving message from : " + self.URL)
                try:
                    request = urllib2.Request(self.URL)
                    connection = urllib2.urlopen(request)
                    self.msg.append(connection.read())
                    reticLog.logInfo(self.logList, '( ' + self.name + ' ) ' +  "Message retrieved on adaptor: " + self.name)
                except:
                    errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0]
                    reticLog.logError(self.logList, '( ' + self.name + ' ) ' + "Error on message retrieval on source : " + self.name)                        
                    reticLog.logError(self.logList, '( ' + self.name + ' ) ' + errorMessage)
                    if self.exitOnError.lower() == 'y':
                        sys.exit(1)
            else:
                self.msg = []
                for param in self.params:
                    # Params are provided, processing URL passing them through GET method
                    # There are as many calls as there are param lists             
                    paramLine = '?'
                    for key in param.keys():
                        self.metadata[key] = param[key]
                    paramLine = urlencode(self.metadata)

                    reticLog.logInfo(self.logList, '( ' + self.name + ' ) ' +  "Retrieving message from : " + self.URL+"?" + paramLine)
                    successful = False
                    while not successful:
                        try:
                            request = urllib2.Request("?".join([self.URL,paramLine]))
                            connection = urllib2.urlopen(request)
                            self.msg.append(connection.read())
                            reticLog.logInfo(self.logList, '( ' + self.name + ' ) ' +  "Message retrieved on adaptor: " + self.name)
                        except:
                            errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0]
                            reticLog.logError(self.logList, '( ' + self.name + ' ) ' + "Error on message retrieval on source : " + self.name)                        
                            reticLog.logError(self.logList, '( ' + self.name + ' ) ' + errorMessage)
                            if self.exitOnError.lower() == 'y':
                                sys.exit(1)
                        else:
                            successful = True                            
            return 1
Example #15
0
 def __init__ (self, args, logger):
     try:
         self.args = args
         self.msg = ''
         self.logList = logger
         self.name = args['name']
         self.dbFactoryArg = args['dbFactroyArg']
         self.inputFormat = args['inputFormat']
         self.delimiter = args['delimiter']
         self.hasHeader = args['hasHeader']            
         #self.replaceEmptyFieldBy = args['replaceEmptyFieldBy']
         if not self.args.has_key('autoCommit'):
             self.autocommit = False
         #session pool
         self.session = None
         self.nbThreads = int(args['nbThreads'])
         if self.nbThreads == 0:
             self.nbThreads = 1
         if args.has_key('retries'):
             self.retries = int(args['retries'])
         else:
             self.retries = 5
         #self.curArgs = {}
         #self.curArgs['cursorName'] = 'cursor1'
         self.metadata = {}
         self.fieldNames = []
         self.fieldLength = []
         if self.inputFormat == 'delimited':
             if self.args.has_key('fieldNames'):
                 self.fieldNames = args['fieldNames']
         elif self.inputFormat == 'fixedLength':
             if self.args.has_key('fieldNames') and self.args.has_key('fieldLength'):
                 self.fieldNames = args['fieldNames']
                 self.fieldLength = args['fieldLength']
         self.connection = dbFactory.dbFactory(args['dbFactroyArg'], self.logList)
         # map DataValue table in ODM database to DataValue python class
         # to utilize the magic power of sqlalchemy
         metadata = schema.MetaData()
         metadata.bind = self.connection.engine
         DataValues_table = schema.Table('DataValues',metadata,  \
                             autoload=True,autoload_with=self.connection.engine)
         orm.mapper(DataValues,DataValues_table)
         Sites_table = schema.Table('Sites',metadata,  \
                             autoload=True,autoload_with=self.connection.engine)
         orm.mapper(Sites,Sites_table)
         Variables_table = schema.Table('Variables',metadata,  \
                             autoload=True,autoload_with=self.connection.engine)
         orm.mapper(Variables,Variables_table)
         Methods_table = schema.Table('Methods',metadata,  \
                             autoload=True,autoload_with=self.connection.engine)
         orm.mapper(Methods,Methods_table)
         #for i in range(self.nbThreads):
         self.session = self.makeSession(self.connection)
         self.parallelize = 'n'
         self.nbQueriesParal = 10
         ########################################################
         if args.has_key('parallelize'):
             self.parallelize = args['parallelize']
         if args.has_key('nbQueriesParal'):
             self.nbQueriesParal = int(args['nbQueriesParal'])
     except KeyError:
         reticLog.logError(self.logList, "( " + self.name + " ) Error during SQLSink initialization")
         reticLog.logError(self.logList, "( " + self.name + " ) Parameter " + str(sys.exc_info()[1]) + " is missing in sink definition" )
         sys.exit(1)
     except:
         errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0]
         reticLog.logError(self.logList, "( " + self.name + " ) Unknown error during initialization in sink")
         reticLog.logError(self.logList, "( " + self.name + " ) " + errorMessage)
         sys.exit(1)
Example #16
0
                    ###################################
                    newSite.Latitude,newSite.Longitude = float(record['start_latitude_num']),float(record['start_longitude_num'])
                    newSite.LatLongDatumID= 2
                    newSite.VerticalDatum = u'Unknown'
                    newSite.State = u'Texas'
                    self.session.add(newSite)
                    self.session.flush()
                    record['SiteID'] = newSite.SiteID
                else:
                    record['SiteID'] = sites_exist.SiteID
            self.session.commit()
            reticLog.logInfo(self.logList, "( " + self.name + " ) sites info processed on sink : " + self.name)
            return 0
        except:
            errorMessage = traceback.format_exception_only(sys.exc_info()[0],sys.exc_info()[1])[0]
            reticLog.logError(self.logList, "( " + self.name + " ) Error during Sites message processing in sink : " + self.name)
            reticLog.logError(self.logList, "( " + self.name + " ) " + errorMessage)
            return 1                    

    
# ============================================================= #
#
#      Private methods (optional) 
#
# ============================================================= #

    #transform the xml/flat/other format of file into a list of record(dictionary)
    #return type : a list of csv records to be inserted into database
    def getRecordList(self):
        """Extraction of the fields and values to map to the SQL statement.
        The method returns a list of dictionnaries"""